diff options
Diffstat (limited to 'kernel')
37 files changed, 2255 insertions, 678 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 9df4501cb921..0a32cb21ec97 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -71,6 +71,7 @@ obj-$(CONFIG_STOP_MACHINE) += stop_machine.o | |||
71 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o | 71 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o |
72 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o | 72 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o |
73 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o | 73 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o |
74 | obj-$(CONFIG_GCOV_KERNEL) += gcov/ | ||
74 | obj-$(CONFIG_AUDIT_TREE) += audit_tree.o | 75 | obj-$(CONFIG_AUDIT_TREE) += audit_tree.o |
75 | obj-$(CONFIG_KPROBES) += kprobes.o | 76 | obj-$(CONFIG_KPROBES) += kprobes.o |
76 | obj-$(CONFIG_KGDB) += kgdb.o | 77 | obj-$(CONFIG_KGDB) += kgdb.o |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3fb789f6df94..3737a682cdf5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -843,6 +843,11 @@ static int parse_cgroupfs_options(char *data, | |||
843 | struct cgroup_sb_opts *opts) | 843 | struct cgroup_sb_opts *opts) |
844 | { | 844 | { |
845 | char *token, *o = data ?: "all"; | 845 | char *token, *o = data ?: "all"; |
846 | unsigned long mask = (unsigned long)-1; | ||
847 | |||
848 | #ifdef CONFIG_CPUSETS | ||
849 | mask = ~(1UL << cpuset_subsys_id); | ||
850 | #endif | ||
846 | 851 | ||
847 | opts->subsys_bits = 0; | 852 | opts->subsys_bits = 0; |
848 | opts->flags = 0; | 853 | opts->flags = 0; |
@@ -887,6 +892,15 @@ static int parse_cgroupfs_options(char *data, | |||
887 | } | 892 | } |
888 | } | 893 | } |
889 | 894 | ||
895 | /* | ||
896 | * Option noprefix was introduced just for backward compatibility | ||
897 | * with the old cpuset, so we allow noprefix only if mounting just | ||
898 | * the cpuset subsystem. | ||
899 | */ | ||
900 | if (test_bit(ROOT_NOPREFIX, &opts->flags) && | ||
901 | (opts->subsys_bits & mask)) | ||
902 | return -EINVAL; | ||
903 | |||
890 | /* We can't have an empty hierarchy */ | 904 | /* We can't have an empty hierarchy */ |
891 | if (!opts->subsys_bits) | 905 | if (!opts->subsys_bits) |
892 | return -EINVAL; | 906 | return -EINVAL; |
diff --git a/kernel/exit.c b/kernel/exit.c index b6c90b5ef509..628d41f0dd54 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -375,9 +375,8 @@ static void set_special_pids(struct pid *pid) | |||
375 | } | 375 | } |
376 | 376 | ||
377 | /* | 377 | /* |
378 | * Let kernel threads use this to say that they | 378 | * Let kernel threads use this to say that they allow a certain signal. |
379 | * allow a certain signal (since daemonize() will | 379 | * Must not be used if kthread was cloned with CLONE_SIGHAND. |
380 | * have disabled all of them by default). | ||
381 | */ | 380 | */ |
382 | int allow_signal(int sig) | 381 | int allow_signal(int sig) |
383 | { | 382 | { |
@@ -385,14 +384,14 @@ int allow_signal(int sig) | |||
385 | return -EINVAL; | 384 | return -EINVAL; |
386 | 385 | ||
387 | spin_lock_irq(¤t->sighand->siglock); | 386 | spin_lock_irq(¤t->sighand->siglock); |
387 | /* This is only needed for daemonize()'ed kthreads */ | ||
388 | sigdelset(¤t->blocked, sig); | 388 | sigdelset(¤t->blocked, sig); |
389 | if (!current->mm) { | 389 | /* |
390 | /* Kernel threads handle their own signals. | 390 | * Kernel threads handle their own signals. Let the signal code |
391 | Let the signal code know it'll be handled, so | 391 | * know it'll be handled, so that they don't get converted to |
392 | that they don't get converted to SIGKILL or | 392 | * SIGKILL or just silently dropped. |
393 | just silently dropped */ | 393 | */ |
394 | current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; | 394 | current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; |
395 | } | ||
396 | recalc_sigpending(); | 395 | recalc_sigpending(); |
397 | spin_unlock_irq(¤t->sighand->siglock); | 396 | spin_unlock_irq(¤t->sighand->siglock); |
398 | return 0; | 397 | return 0; |
@@ -591,7 +590,7 @@ retry: | |||
591 | /* | 590 | /* |
592 | * Search in the siblings | 591 | * Search in the siblings |
593 | */ | 592 | */ |
594 | list_for_each_entry(c, &p->parent->children, sibling) { | 593 | list_for_each_entry(c, &p->real_parent->children, sibling) { |
595 | if (c->mm == mm) | 594 | if (c->mm == mm) |
596 | goto assign_new_owner; | 595 | goto assign_new_owner; |
597 | } | 596 | } |
@@ -758,7 +757,7 @@ static void reparent_thread(struct task_struct *father, struct task_struct *p, | |||
758 | p->exit_signal = SIGCHLD; | 757 | p->exit_signal = SIGCHLD; |
759 | 758 | ||
760 | /* If it has exited notify the new parent about this child's death. */ | 759 | /* If it has exited notify the new parent about this child's death. */ |
761 | if (!p->ptrace && | 760 | if (!task_ptrace(p) && |
762 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { | 761 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { |
763 | do_notify_parent(p, p->exit_signal); | 762 | do_notify_parent(p, p->exit_signal); |
764 | if (task_detached(p)) { | 763 | if (task_detached(p)) { |
@@ -783,7 +782,7 @@ static void forget_original_parent(struct task_struct *father) | |||
783 | list_for_each_entry_safe(p, n, &father->children, sibling) { | 782 | list_for_each_entry_safe(p, n, &father->children, sibling) { |
784 | p->real_parent = reaper; | 783 | p->real_parent = reaper; |
785 | if (p->parent == father) { | 784 | if (p->parent == father) { |
786 | BUG_ON(p->ptrace); | 785 | BUG_ON(task_ptrace(p)); |
787 | p->parent = p->real_parent; | 786 | p->parent = p->real_parent; |
788 | } | 787 | } |
789 | reparent_thread(father, p, &dead_children); | 788 | reparent_thread(father, p, &dead_children); |
@@ -1081,6 +1080,18 @@ SYSCALL_DEFINE1(exit_group, int, error_code) | |||
1081 | return 0; | 1080 | return 0; |
1082 | } | 1081 | } |
1083 | 1082 | ||
1083 | struct wait_opts { | ||
1084 | enum pid_type wo_type; | ||
1085 | int wo_flags; | ||
1086 | struct pid *wo_pid; | ||
1087 | |||
1088 | struct siginfo __user *wo_info; | ||
1089 | int __user *wo_stat; | ||
1090 | struct rusage __user *wo_rusage; | ||
1091 | |||
1092 | int notask_error; | ||
1093 | }; | ||
1094 | |||
1084 | static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) | 1095 | static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) |
1085 | { | 1096 | { |
1086 | struct pid *pid = NULL; | 1097 | struct pid *pid = NULL; |
@@ -1091,13 +1102,12 @@ static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) | |||
1091 | return pid; | 1102 | return pid; |
1092 | } | 1103 | } |
1093 | 1104 | ||
1094 | static int eligible_child(enum pid_type type, struct pid *pid, int options, | 1105 | static int eligible_child(struct wait_opts *wo, struct task_struct *p) |
1095 | struct task_struct *p) | ||
1096 | { | 1106 | { |
1097 | int err; | 1107 | int err; |
1098 | 1108 | ||
1099 | if (type < PIDTYPE_MAX) { | 1109 | if (wo->wo_type < PIDTYPE_MAX) { |
1100 | if (task_pid_type(p, type) != pid) | 1110 | if (task_pid_type(p, wo->wo_type) != wo->wo_pid) |
1101 | return 0; | 1111 | return 0; |
1102 | } | 1112 | } |
1103 | 1113 | ||
@@ -1106,8 +1116,8 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, | |||
1106 | * set; otherwise, wait for non-clone children *only*. (Note: | 1116 | * set; otherwise, wait for non-clone children *only*. (Note: |
1107 | * A "clone" child here is one that reports to its parent | 1117 | * A "clone" child here is one that reports to its parent |
1108 | * using a signal other than SIGCHLD.) */ | 1118 | * using a signal other than SIGCHLD.) */ |
1109 | if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) | 1119 | if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) |
1110 | && !(options & __WALL)) | 1120 | && !(wo->wo_flags & __WALL)) |
1111 | return 0; | 1121 | return 0; |
1112 | 1122 | ||
1113 | err = security_task_wait(p); | 1123 | err = security_task_wait(p); |
@@ -1117,14 +1127,15 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, | |||
1117 | return 1; | 1127 | return 1; |
1118 | } | 1128 | } |
1119 | 1129 | ||
1120 | static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, | 1130 | static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, |
1121 | int why, int status, | 1131 | pid_t pid, uid_t uid, int why, int status) |
1122 | struct siginfo __user *infop, | ||
1123 | struct rusage __user *rusagep) | ||
1124 | { | 1132 | { |
1125 | int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; | 1133 | struct siginfo __user *infop; |
1134 | int retval = wo->wo_rusage | ||
1135 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | ||
1126 | 1136 | ||
1127 | put_task_struct(p); | 1137 | put_task_struct(p); |
1138 | infop = wo->wo_info; | ||
1128 | if (!retval) | 1139 | if (!retval) |
1129 | retval = put_user(SIGCHLD, &infop->si_signo); | 1140 | retval = put_user(SIGCHLD, &infop->si_signo); |
1130 | if (!retval) | 1141 | if (!retval) |
@@ -1148,19 +1159,18 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, | |||
1148 | * the lock and this task is uninteresting. If we return nonzero, we have | 1159 | * the lock and this task is uninteresting. If we return nonzero, we have |
1149 | * released the lock and the system call should return. | 1160 | * released the lock and the system call should return. |
1150 | */ | 1161 | */ |
1151 | static int wait_task_zombie(struct task_struct *p, int options, | 1162 | static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) |
1152 | struct siginfo __user *infop, | ||
1153 | int __user *stat_addr, struct rusage __user *ru) | ||
1154 | { | 1163 | { |
1155 | unsigned long state; | 1164 | unsigned long state; |
1156 | int retval, status, traced; | 1165 | int retval, status, traced; |
1157 | pid_t pid = task_pid_vnr(p); | 1166 | pid_t pid = task_pid_vnr(p); |
1158 | uid_t uid = __task_cred(p)->uid; | 1167 | uid_t uid = __task_cred(p)->uid; |
1168 | struct siginfo __user *infop; | ||
1159 | 1169 | ||
1160 | if (!likely(options & WEXITED)) | 1170 | if (!likely(wo->wo_flags & WEXITED)) |
1161 | return 0; | 1171 | return 0; |
1162 | 1172 | ||
1163 | if (unlikely(options & WNOWAIT)) { | 1173 | if (unlikely(wo->wo_flags & WNOWAIT)) { |
1164 | int exit_code = p->exit_code; | 1174 | int exit_code = p->exit_code; |
1165 | int why, status; | 1175 | int why, status; |
1166 | 1176 | ||
@@ -1173,8 +1183,7 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1173 | why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; | 1183 | why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; |
1174 | status = exit_code & 0x7f; | 1184 | status = exit_code & 0x7f; |
1175 | } | 1185 | } |
1176 | return wait_noreap_copyout(p, pid, uid, why, | 1186 | return wait_noreap_copyout(wo, p, pid, uid, why, status); |
1177 | status, infop, ru); | ||
1178 | } | 1187 | } |
1179 | 1188 | ||
1180 | /* | 1189 | /* |
@@ -1188,11 +1197,13 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1188 | } | 1197 | } |
1189 | 1198 | ||
1190 | traced = ptrace_reparented(p); | 1199 | traced = ptrace_reparented(p); |
1191 | 1200 | /* | |
1192 | if (likely(!traced)) { | 1201 | * It can be ptraced but not reparented, check |
1202 | * !task_detached() to filter out sub-threads. | ||
1203 | */ | ||
1204 | if (likely(!traced) && likely(!task_detached(p))) { | ||
1193 | struct signal_struct *psig; | 1205 | struct signal_struct *psig; |
1194 | struct signal_struct *sig; | 1206 | struct signal_struct *sig; |
1195 | struct task_cputime cputime; | ||
1196 | 1207 | ||
1197 | /* | 1208 | /* |
1198 | * The resource counters for the group leader are in its | 1209 | * The resource counters for the group leader are in its |
@@ -1205,26 +1216,23 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1205 | * p->signal fields, because they are only touched by | 1216 | * p->signal fields, because they are only touched by |
1206 | * __exit_signal, which runs with tasklist_lock | 1217 | * __exit_signal, which runs with tasklist_lock |
1207 | * write-locked anyway, and so is excluded here. We do | 1218 | * write-locked anyway, and so is excluded here. We do |
1208 | * need to protect the access to p->parent->signal fields, | 1219 | * need to protect the access to parent->signal fields, |
1209 | * as other threads in the parent group can be right | 1220 | * as other threads in the parent group can be right |
1210 | * here reaping other children at the same time. | 1221 | * here reaping other children at the same time. |
1211 | * | ||
1212 | * We use thread_group_cputime() to get times for the thread | ||
1213 | * group, which consolidates times for all threads in the | ||
1214 | * group including the group leader. | ||
1215 | */ | 1222 | */ |
1216 | thread_group_cputime(p, &cputime); | 1223 | spin_lock_irq(&p->real_parent->sighand->siglock); |
1217 | spin_lock_irq(&p->parent->sighand->siglock); | 1224 | psig = p->real_parent->signal; |
1218 | psig = p->parent->signal; | ||
1219 | sig = p->signal; | 1225 | sig = p->signal; |
1220 | psig->cutime = | 1226 | psig->cutime = |
1221 | cputime_add(psig->cutime, | 1227 | cputime_add(psig->cutime, |
1222 | cputime_add(cputime.utime, | 1228 | cputime_add(p->utime, |
1223 | sig->cutime)); | 1229 | cputime_add(sig->utime, |
1230 | sig->cutime))); | ||
1224 | psig->cstime = | 1231 | psig->cstime = |
1225 | cputime_add(psig->cstime, | 1232 | cputime_add(psig->cstime, |
1226 | cputime_add(cputime.stime, | 1233 | cputime_add(p->stime, |
1227 | sig->cstime)); | 1234 | cputime_add(sig->stime, |
1235 | sig->cstime))); | ||
1228 | psig->cgtime = | 1236 | psig->cgtime = |
1229 | cputime_add(psig->cgtime, | 1237 | cputime_add(psig->cgtime, |
1230 | cputime_add(p->gtime, | 1238 | cputime_add(p->gtime, |
@@ -1246,7 +1254,7 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1246 | sig->oublock + sig->coublock; | 1254 | sig->oublock + sig->coublock; |
1247 | task_io_accounting_add(&psig->ioac, &p->ioac); | 1255 | task_io_accounting_add(&psig->ioac, &p->ioac); |
1248 | task_io_accounting_add(&psig->ioac, &sig->ioac); | 1256 | task_io_accounting_add(&psig->ioac, &sig->ioac); |
1249 | spin_unlock_irq(&p->parent->sighand->siglock); | 1257 | spin_unlock_irq(&p->real_parent->sighand->siglock); |
1250 | } | 1258 | } |
1251 | 1259 | ||
1252 | /* | 1260 | /* |
@@ -1255,11 +1263,14 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1255 | */ | 1263 | */ |
1256 | read_unlock(&tasklist_lock); | 1264 | read_unlock(&tasklist_lock); |
1257 | 1265 | ||
1258 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1266 | retval = wo->wo_rusage |
1267 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | ||
1259 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) | 1268 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) |
1260 | ? p->signal->group_exit_code : p->exit_code; | 1269 | ? p->signal->group_exit_code : p->exit_code; |
1261 | if (!retval && stat_addr) | 1270 | if (!retval && wo->wo_stat) |
1262 | retval = put_user(status, stat_addr); | 1271 | retval = put_user(status, wo->wo_stat); |
1272 | |||
1273 | infop = wo->wo_info; | ||
1263 | if (!retval && infop) | 1274 | if (!retval && infop) |
1264 | retval = put_user(SIGCHLD, &infop->si_signo); | 1275 | retval = put_user(SIGCHLD, &infop->si_signo); |
1265 | if (!retval && infop) | 1276 | if (!retval && infop) |
@@ -1327,15 +1338,18 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace) | |||
1327 | * the lock and this task is uninteresting. If we return nonzero, we have | 1338 | * the lock and this task is uninteresting. If we return nonzero, we have |
1328 | * released the lock and the system call should return. | 1339 | * released the lock and the system call should return. |
1329 | */ | 1340 | */ |
1330 | static int wait_task_stopped(int ptrace, struct task_struct *p, | 1341 | static int wait_task_stopped(struct wait_opts *wo, |
1331 | int options, struct siginfo __user *infop, | 1342 | int ptrace, struct task_struct *p) |
1332 | int __user *stat_addr, struct rusage __user *ru) | ||
1333 | { | 1343 | { |
1344 | struct siginfo __user *infop; | ||
1334 | int retval, exit_code, *p_code, why; | 1345 | int retval, exit_code, *p_code, why; |
1335 | uid_t uid = 0; /* unneeded, required by compiler */ | 1346 | uid_t uid = 0; /* unneeded, required by compiler */ |
1336 | pid_t pid; | 1347 | pid_t pid; |
1337 | 1348 | ||
1338 | if (!(options & WUNTRACED)) | 1349 | /* |
1350 | * Traditionally we see ptrace'd stopped tasks regardless of options. | ||
1351 | */ | ||
1352 | if (!ptrace && !(wo->wo_flags & WUNTRACED)) | ||
1339 | return 0; | 1353 | return 0; |
1340 | 1354 | ||
1341 | exit_code = 0; | 1355 | exit_code = 0; |
@@ -1349,7 +1363,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, | |||
1349 | if (!exit_code) | 1363 | if (!exit_code) |
1350 | goto unlock_sig; | 1364 | goto unlock_sig; |
1351 | 1365 | ||
1352 | if (!unlikely(options & WNOWAIT)) | 1366 | if (!unlikely(wo->wo_flags & WNOWAIT)) |
1353 | *p_code = 0; | 1367 | *p_code = 0; |
1354 | 1368 | ||
1355 | /* don't need the RCU readlock here as we're holding a spinlock */ | 1369 | /* don't need the RCU readlock here as we're holding a spinlock */ |
@@ -1371,14 +1385,15 @@ unlock_sig: | |||
1371 | why = ptrace ? CLD_TRAPPED : CLD_STOPPED; | 1385 | why = ptrace ? CLD_TRAPPED : CLD_STOPPED; |
1372 | read_unlock(&tasklist_lock); | 1386 | read_unlock(&tasklist_lock); |
1373 | 1387 | ||
1374 | if (unlikely(options & WNOWAIT)) | 1388 | if (unlikely(wo->wo_flags & WNOWAIT)) |
1375 | return wait_noreap_copyout(p, pid, uid, | 1389 | return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); |
1376 | why, exit_code, | ||
1377 | infop, ru); | ||
1378 | 1390 | ||
1379 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1391 | retval = wo->wo_rusage |
1380 | if (!retval && stat_addr) | 1392 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; |
1381 | retval = put_user((exit_code << 8) | 0x7f, stat_addr); | 1393 | if (!retval && wo->wo_stat) |
1394 | retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat); | ||
1395 | |||
1396 | infop = wo->wo_info; | ||
1382 | if (!retval && infop) | 1397 | if (!retval && infop) |
1383 | retval = put_user(SIGCHLD, &infop->si_signo); | 1398 | retval = put_user(SIGCHLD, &infop->si_signo); |
1384 | if (!retval && infop) | 1399 | if (!retval && infop) |
@@ -1405,15 +1420,13 @@ unlock_sig: | |||
1405 | * the lock and this task is uninteresting. If we return nonzero, we have | 1420 | * the lock and this task is uninteresting. If we return nonzero, we have |
1406 | * released the lock and the system call should return. | 1421 | * released the lock and the system call should return. |
1407 | */ | 1422 | */ |
1408 | static int wait_task_continued(struct task_struct *p, int options, | 1423 | static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) |
1409 | struct siginfo __user *infop, | ||
1410 | int __user *stat_addr, struct rusage __user *ru) | ||
1411 | { | 1424 | { |
1412 | int retval; | 1425 | int retval; |
1413 | pid_t pid; | 1426 | pid_t pid; |
1414 | uid_t uid; | 1427 | uid_t uid; |
1415 | 1428 | ||
1416 | if (!unlikely(options & WCONTINUED)) | 1429 | if (!unlikely(wo->wo_flags & WCONTINUED)) |
1417 | return 0; | 1430 | return 0; |
1418 | 1431 | ||
1419 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) | 1432 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) |
@@ -1425,7 +1438,7 @@ static int wait_task_continued(struct task_struct *p, int options, | |||
1425 | spin_unlock_irq(&p->sighand->siglock); | 1438 | spin_unlock_irq(&p->sighand->siglock); |
1426 | return 0; | 1439 | return 0; |
1427 | } | 1440 | } |
1428 | if (!unlikely(options & WNOWAIT)) | 1441 | if (!unlikely(wo->wo_flags & WNOWAIT)) |
1429 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; | 1442 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; |
1430 | uid = __task_cred(p)->uid; | 1443 | uid = __task_cred(p)->uid; |
1431 | spin_unlock_irq(&p->sighand->siglock); | 1444 | spin_unlock_irq(&p->sighand->siglock); |
@@ -1434,17 +1447,17 @@ static int wait_task_continued(struct task_struct *p, int options, | |||
1434 | get_task_struct(p); | 1447 | get_task_struct(p); |
1435 | read_unlock(&tasklist_lock); | 1448 | read_unlock(&tasklist_lock); |
1436 | 1449 | ||
1437 | if (!infop) { | 1450 | if (!wo->wo_info) { |
1438 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1451 | retval = wo->wo_rusage |
1452 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | ||
1439 | put_task_struct(p); | 1453 | put_task_struct(p); |
1440 | if (!retval && stat_addr) | 1454 | if (!retval && wo->wo_stat) |
1441 | retval = put_user(0xffff, stat_addr); | 1455 | retval = put_user(0xffff, wo->wo_stat); |
1442 | if (!retval) | 1456 | if (!retval) |
1443 | retval = pid; | 1457 | retval = pid; |
1444 | } else { | 1458 | } else { |
1445 | retval = wait_noreap_copyout(p, pid, uid, | 1459 | retval = wait_noreap_copyout(wo, p, pid, uid, |
1446 | CLD_CONTINUED, SIGCONT, | 1460 | CLD_CONTINUED, SIGCONT); |
1447 | infop, ru); | ||
1448 | BUG_ON(retval == 0); | 1461 | BUG_ON(retval == 0); |
1449 | } | 1462 | } |
1450 | 1463 | ||
@@ -1454,19 +1467,16 @@ static int wait_task_continued(struct task_struct *p, int options, | |||
1454 | /* | 1467 | /* |
1455 | * Consider @p for a wait by @parent. | 1468 | * Consider @p for a wait by @parent. |
1456 | * | 1469 | * |
1457 | * -ECHILD should be in *@notask_error before the first call. | 1470 | * -ECHILD should be in ->notask_error before the first call. |
1458 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. | 1471 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. |
1459 | * Returns zero if the search for a child should continue; | 1472 | * Returns zero if the search for a child should continue; |
1460 | * then *@notask_error is 0 if @p is an eligible child, | 1473 | * then ->notask_error is 0 if @p is an eligible child, |
1461 | * or another error from security_task_wait(), or still -ECHILD. | 1474 | * or another error from security_task_wait(), or still -ECHILD. |
1462 | */ | 1475 | */ |
1463 | static int wait_consider_task(struct task_struct *parent, int ptrace, | 1476 | static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, |
1464 | struct task_struct *p, int *notask_error, | 1477 | int ptrace, struct task_struct *p) |
1465 | enum pid_type type, struct pid *pid, int options, | ||
1466 | struct siginfo __user *infop, | ||
1467 | int __user *stat_addr, struct rusage __user *ru) | ||
1468 | { | 1478 | { |
1469 | int ret = eligible_child(type, pid, options, p); | 1479 | int ret = eligible_child(wo, p); |
1470 | if (!ret) | 1480 | if (!ret) |
1471 | return ret; | 1481 | return ret; |
1472 | 1482 | ||
@@ -1478,17 +1488,17 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, | |||
1478 | * to look for security policy problems, rather | 1488 | * to look for security policy problems, rather |
1479 | * than for mysterious wait bugs. | 1489 | * than for mysterious wait bugs. |
1480 | */ | 1490 | */ |
1481 | if (*notask_error) | 1491 | if (wo->notask_error) |
1482 | *notask_error = ret; | 1492 | wo->notask_error = ret; |
1483 | return 0; | 1493 | return 0; |
1484 | } | 1494 | } |
1485 | 1495 | ||
1486 | if (likely(!ptrace) && unlikely(p->ptrace)) { | 1496 | if (likely(!ptrace) && unlikely(task_ptrace(p))) { |
1487 | /* | 1497 | /* |
1488 | * This child is hidden by ptrace. | 1498 | * This child is hidden by ptrace. |
1489 | * We aren't allowed to see it now, but eventually we will. | 1499 | * We aren't allowed to see it now, but eventually we will. |
1490 | */ | 1500 | */ |
1491 | *notask_error = 0; | 1501 | wo->notask_error = 0; |
1492 | return 0; | 1502 | return 0; |
1493 | } | 1503 | } |
1494 | 1504 | ||
@@ -1499,34 +1509,30 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, | |||
1499 | * We don't reap group leaders with subthreads. | 1509 | * We don't reap group leaders with subthreads. |
1500 | */ | 1510 | */ |
1501 | if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) | 1511 | if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) |
1502 | return wait_task_zombie(p, options, infop, stat_addr, ru); | 1512 | return wait_task_zombie(wo, p); |
1503 | 1513 | ||
1504 | /* | 1514 | /* |
1505 | * It's stopped or running now, so it might | 1515 | * It's stopped or running now, so it might |
1506 | * later continue, exit, or stop again. | 1516 | * later continue, exit, or stop again. |
1507 | */ | 1517 | */ |
1508 | *notask_error = 0; | 1518 | wo->notask_error = 0; |
1509 | 1519 | ||
1510 | if (task_stopped_code(p, ptrace)) | 1520 | if (task_stopped_code(p, ptrace)) |
1511 | return wait_task_stopped(ptrace, p, options, | 1521 | return wait_task_stopped(wo, ptrace, p); |
1512 | infop, stat_addr, ru); | ||
1513 | 1522 | ||
1514 | return wait_task_continued(p, options, infop, stat_addr, ru); | 1523 | return wait_task_continued(wo, p); |
1515 | } | 1524 | } |
1516 | 1525 | ||
1517 | /* | 1526 | /* |
1518 | * Do the work of do_wait() for one thread in the group, @tsk. | 1527 | * Do the work of do_wait() for one thread in the group, @tsk. |
1519 | * | 1528 | * |
1520 | * -ECHILD should be in *@notask_error before the first call. | 1529 | * -ECHILD should be in ->notask_error before the first call. |
1521 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. | 1530 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. |
1522 | * Returns zero if the search for a child should continue; then | 1531 | * Returns zero if the search for a child should continue; then |
1523 | * *@notask_error is 0 if there were any eligible children, | 1532 | * ->notask_error is 0 if there were any eligible children, |
1524 | * or another error from security_task_wait(), or still -ECHILD. | 1533 | * or another error from security_task_wait(), or still -ECHILD. |
1525 | */ | 1534 | */ |
1526 | static int do_wait_thread(struct task_struct *tsk, int *notask_error, | 1535 | static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) |
1527 | enum pid_type type, struct pid *pid, int options, | ||
1528 | struct siginfo __user *infop, int __user *stat_addr, | ||
1529 | struct rusage __user *ru) | ||
1530 | { | 1536 | { |
1531 | struct task_struct *p; | 1537 | struct task_struct *p; |
1532 | 1538 | ||
@@ -1535,9 +1541,7 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error, | |||
1535 | * Do not consider detached threads. | 1541 | * Do not consider detached threads. |
1536 | */ | 1542 | */ |
1537 | if (!task_detached(p)) { | 1543 | if (!task_detached(p)) { |
1538 | int ret = wait_consider_task(tsk, 0, p, notask_error, | 1544 | int ret = wait_consider_task(wo, tsk, 0, p); |
1539 | type, pid, options, | ||
1540 | infop, stat_addr, ru); | ||
1541 | if (ret) | 1545 | if (ret) |
1542 | return ret; | 1546 | return ret; |
1543 | } | 1547 | } |
@@ -1546,22 +1550,12 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error, | |||
1546 | return 0; | 1550 | return 0; |
1547 | } | 1551 | } |
1548 | 1552 | ||
1549 | static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, | 1553 | static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) |
1550 | enum pid_type type, struct pid *pid, int options, | ||
1551 | struct siginfo __user *infop, int __user *stat_addr, | ||
1552 | struct rusage __user *ru) | ||
1553 | { | 1554 | { |
1554 | struct task_struct *p; | 1555 | struct task_struct *p; |
1555 | 1556 | ||
1556 | /* | ||
1557 | * Traditionally we see ptrace'd stopped tasks regardless of options. | ||
1558 | */ | ||
1559 | options |= WUNTRACED; | ||
1560 | |||
1561 | list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { | 1557 | list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { |
1562 | int ret = wait_consider_task(tsk, 1, p, notask_error, | 1558 | int ret = wait_consider_task(wo, tsk, 1, p); |
1563 | type, pid, options, | ||
1564 | infop, stat_addr, ru); | ||
1565 | if (ret) | 1559 | if (ret) |
1566 | return ret; | 1560 | return ret; |
1567 | } | 1561 | } |
@@ -1569,65 +1563,59 @@ static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, | |||
1569 | return 0; | 1563 | return 0; |
1570 | } | 1564 | } |
1571 | 1565 | ||
1572 | static long do_wait(enum pid_type type, struct pid *pid, int options, | 1566 | static long do_wait(struct wait_opts *wo) |
1573 | struct siginfo __user *infop, int __user *stat_addr, | ||
1574 | struct rusage __user *ru) | ||
1575 | { | 1567 | { |
1576 | DECLARE_WAITQUEUE(wait, current); | 1568 | DECLARE_WAITQUEUE(wait, current); |
1577 | struct task_struct *tsk; | 1569 | struct task_struct *tsk; |
1578 | int retval; | 1570 | int retval; |
1579 | 1571 | ||
1580 | trace_sched_process_wait(pid); | 1572 | trace_sched_process_wait(wo->wo_pid); |
1581 | 1573 | ||
1582 | add_wait_queue(¤t->signal->wait_chldexit,&wait); | 1574 | add_wait_queue(¤t->signal->wait_chldexit,&wait); |
1583 | repeat: | 1575 | repeat: |
1584 | /* | 1576 | /* |
1585 | * If there is nothing that can match our critiera just get out. | 1577 | * If there is nothing that can match our critiera just get out. |
1586 | * We will clear @retval to zero if we see any child that might later | 1578 | * We will clear ->notask_error to zero if we see any child that |
1587 | * match our criteria, even if we are not able to reap it yet. | 1579 | * might later match our criteria, even if we are not able to reap |
1580 | * it yet. | ||
1588 | */ | 1581 | */ |
1589 | retval = -ECHILD; | 1582 | wo->notask_error = -ECHILD; |
1590 | if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) | 1583 | if ((wo->wo_type < PIDTYPE_MAX) && |
1591 | goto end; | 1584 | (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) |
1585 | goto notask; | ||
1592 | 1586 | ||
1593 | current->state = TASK_INTERRUPTIBLE; | 1587 | set_current_state(TASK_INTERRUPTIBLE); |
1594 | read_lock(&tasklist_lock); | 1588 | read_lock(&tasklist_lock); |
1595 | tsk = current; | 1589 | tsk = current; |
1596 | do { | 1590 | do { |
1597 | int tsk_result = do_wait_thread(tsk, &retval, | 1591 | retval = do_wait_thread(wo, tsk); |
1598 | type, pid, options, | 1592 | if (retval) |
1599 | infop, stat_addr, ru); | 1593 | goto end; |
1600 | if (!tsk_result) | 1594 | |
1601 | tsk_result = ptrace_do_wait(tsk, &retval, | 1595 | retval = ptrace_do_wait(wo, tsk); |
1602 | type, pid, options, | 1596 | if (retval) |
1603 | infop, stat_addr, ru); | ||
1604 | if (tsk_result) { | ||
1605 | /* | ||
1606 | * tasklist_lock is unlocked and we have a final result. | ||
1607 | */ | ||
1608 | retval = tsk_result; | ||
1609 | goto end; | 1597 | goto end; |
1610 | } | ||
1611 | 1598 | ||
1612 | if (options & __WNOTHREAD) | 1599 | if (wo->wo_flags & __WNOTHREAD) |
1613 | break; | 1600 | break; |
1614 | tsk = next_thread(tsk); | 1601 | } while_each_thread(current, tsk); |
1615 | BUG_ON(tsk->signal != current->signal); | ||
1616 | } while (tsk != current); | ||
1617 | read_unlock(&tasklist_lock); | 1602 | read_unlock(&tasklist_lock); |
1618 | 1603 | ||
1619 | if (!retval && !(options & WNOHANG)) { | 1604 | notask: |
1605 | retval = wo->notask_error; | ||
1606 | if (!retval && !(wo->wo_flags & WNOHANG)) { | ||
1620 | retval = -ERESTARTSYS; | 1607 | retval = -ERESTARTSYS; |
1621 | if (!signal_pending(current)) { | 1608 | if (!signal_pending(current)) { |
1622 | schedule(); | 1609 | schedule(); |
1623 | goto repeat; | 1610 | goto repeat; |
1624 | } | 1611 | } |
1625 | } | 1612 | } |
1626 | |||
1627 | end: | 1613 | end: |
1628 | current->state = TASK_RUNNING; | 1614 | __set_current_state(TASK_RUNNING); |
1629 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); | 1615 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); |
1630 | if (infop) { | 1616 | if (wo->wo_info) { |
1617 | struct siginfo __user *infop = wo->wo_info; | ||
1618 | |||
1631 | if (retval > 0) | 1619 | if (retval > 0) |
1632 | retval = 0; | 1620 | retval = 0; |
1633 | else { | 1621 | else { |
@@ -1656,6 +1644,7 @@ end: | |||
1656 | SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | 1644 | SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, |
1657 | infop, int, options, struct rusage __user *, ru) | 1645 | infop, int, options, struct rusage __user *, ru) |
1658 | { | 1646 | { |
1647 | struct wait_opts wo; | ||
1659 | struct pid *pid = NULL; | 1648 | struct pid *pid = NULL; |
1660 | enum pid_type type; | 1649 | enum pid_type type; |
1661 | long ret; | 1650 | long ret; |
@@ -1685,7 +1674,14 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | |||
1685 | 1674 | ||
1686 | if (type < PIDTYPE_MAX) | 1675 | if (type < PIDTYPE_MAX) |
1687 | pid = find_get_pid(upid); | 1676 | pid = find_get_pid(upid); |
1688 | ret = do_wait(type, pid, options, infop, NULL, ru); | 1677 | |
1678 | wo.wo_type = type; | ||
1679 | wo.wo_pid = pid; | ||
1680 | wo.wo_flags = options; | ||
1681 | wo.wo_info = infop; | ||
1682 | wo.wo_stat = NULL; | ||
1683 | wo.wo_rusage = ru; | ||
1684 | ret = do_wait(&wo); | ||
1689 | put_pid(pid); | 1685 | put_pid(pid); |
1690 | 1686 | ||
1691 | /* avoid REGPARM breakage on x86: */ | 1687 | /* avoid REGPARM breakage on x86: */ |
@@ -1696,6 +1692,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | |||
1696 | SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, | 1692 | SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, |
1697 | int, options, struct rusage __user *, ru) | 1693 | int, options, struct rusage __user *, ru) |
1698 | { | 1694 | { |
1695 | struct wait_opts wo; | ||
1699 | struct pid *pid = NULL; | 1696 | struct pid *pid = NULL; |
1700 | enum pid_type type; | 1697 | enum pid_type type; |
1701 | long ret; | 1698 | long ret; |
@@ -1717,7 +1714,13 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, | |||
1717 | pid = find_get_pid(upid); | 1714 | pid = find_get_pid(upid); |
1718 | } | 1715 | } |
1719 | 1716 | ||
1720 | ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); | 1717 | wo.wo_type = type; |
1718 | wo.wo_pid = pid; | ||
1719 | wo.wo_flags = options | WEXITED; | ||
1720 | wo.wo_info = NULL; | ||
1721 | wo.wo_stat = stat_addr; | ||
1722 | wo.wo_rusage = ru; | ||
1723 | ret = do_wait(&wo); | ||
1721 | put_pid(pid); | 1724 | put_pid(pid); |
1722 | 1725 | ||
1723 | /* avoid REGPARM breakage on x86: */ | 1726 | /* avoid REGPARM breakage on x86: */ |
diff --git a/kernel/fork.c b/kernel/fork.c index be022c200da6..467746b3f0aa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1029,7 +1029,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1029 | p->vfork_done = NULL; | 1029 | p->vfork_done = NULL; |
1030 | spin_lock_init(&p->alloc_lock); | 1030 | spin_lock_init(&p->alloc_lock); |
1031 | 1031 | ||
1032 | clear_tsk_thread_flag(p, TIF_SIGPENDING); | ||
1033 | init_sigpending(&p->pending); | 1032 | init_sigpending(&p->pending); |
1034 | 1033 | ||
1035 | p->utime = cputime_zero; | 1034 | p->utime = cputime_zero; |
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig new file mode 100644 index 000000000000..22e9dcfaa3d3 --- /dev/null +++ b/kernel/gcov/Kconfig | |||
@@ -0,0 +1,48 @@ | |||
1 | menu "GCOV-based kernel profiling" | ||
2 | |||
3 | config GCOV_KERNEL | ||
4 | bool "Enable gcov-based kernel profiling" | ||
5 | depends on DEBUG_FS && CONSTRUCTORS | ||
6 | default n | ||
7 | ---help--- | ||
8 | This option enables gcov-based code profiling (e.g. for code coverage | ||
9 | measurements). | ||
10 | |||
11 | If unsure, say N. | ||
12 | |||
13 | Additionally specify CONFIG_GCOV_PROFILE_ALL=y to get profiling data | ||
14 | for the entire kernel. To enable profiling for specific files or | ||
15 | directories, add a line similar to the following to the respective | ||
16 | Makefile: | ||
17 | |||
18 | For a single file (e.g. main.o): | ||
19 | GCOV_PROFILE_main.o := y | ||
20 | |||
21 | For all files in one directory: | ||
22 | GCOV_PROFILE := y | ||
23 | |||
24 | To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL | ||
25 | is specified, use: | ||
26 | |||
27 | GCOV_PROFILE_main.o := n | ||
28 | and: | ||
29 | GCOV_PROFILE := n | ||
30 | |||
31 | Note that the debugfs filesystem has to be mounted to access | ||
32 | profiling data. | ||
33 | |||
34 | config GCOV_PROFILE_ALL | ||
35 | bool "Profile entire Kernel" | ||
36 | depends on GCOV_KERNEL | ||
37 | depends on S390 || X86 | ||
38 | default n | ||
39 | ---help--- | ||
40 | This options activates profiling for the entire kernel. | ||
41 | |||
42 | If unsure, say N. | ||
43 | |||
44 | Note that a kernel compiled with profiling flags will be significantly | ||
45 | larger and run slower. Also be sure to exclude files from profiling | ||
46 | which are not linked to the kernel image to prevent linker errors. | ||
47 | |||
48 | endmenu | ||
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile new file mode 100644 index 000000000000..3f761001d517 --- /dev/null +++ b/kernel/gcov/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' | ||
2 | |||
3 | obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o | ||
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c new file mode 100644 index 000000000000..9b22d03cc581 --- /dev/null +++ b/kernel/gcov/base.c | |||
@@ -0,0 +1,148 @@ | |||
1 | /* | ||
2 | * This code maintains a list of active profiling data structures. | ||
3 | * | ||
4 | * Copyright IBM Corp. 2009 | ||
5 | * Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com> | ||
6 | * | ||
7 | * Uses gcc-internal data definitions. | ||
8 | * Based on the gcov-kernel patch by: | ||
9 | * Hubertus Franke <frankeh@us.ibm.com> | ||
10 | * Nigel Hinds <nhinds@us.ibm.com> | ||
11 | * Rajan Ravindran <rajancr@us.ibm.com> | ||
12 | * Peter Oberparleiter <oberpar@linux.vnet.ibm.com> | ||
13 | * Paul Larson | ||
14 | */ | ||
15 | |||
16 | #define pr_fmt(fmt) "gcov: " fmt | ||
17 | |||
18 | #include <linux/init.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/mutex.h> | ||
21 | #include "gcov.h" | ||
22 | |||
23 | static struct gcov_info *gcov_info_head; | ||
24 | static int gcov_events_enabled; | ||
25 | static DEFINE_MUTEX(gcov_lock); | ||
26 | |||
27 | /* | ||
28 | * __gcov_init is called by gcc-generated constructor code for each object | ||
29 | * file compiled with -fprofile-arcs. | ||
30 | */ | ||
31 | void __gcov_init(struct gcov_info *info) | ||
32 | { | ||
33 | static unsigned int gcov_version; | ||
34 | |||
35 | mutex_lock(&gcov_lock); | ||
36 | if (gcov_version == 0) { | ||
37 | gcov_version = info->version; | ||
38 | /* | ||
39 | * Printing gcc's version magic may prove useful for debugging | ||
40 | * incompatibility reports. | ||
41 | */ | ||
42 | pr_info("version magic: 0x%x\n", gcov_version); | ||
43 | } | ||
44 | /* | ||
45 | * Add new profiling data structure to list and inform event | ||
46 | * listener. | ||
47 | */ | ||
48 | info->next = gcov_info_head; | ||
49 | gcov_info_head = info; | ||
50 | if (gcov_events_enabled) | ||
51 | gcov_event(GCOV_ADD, info); | ||
52 | mutex_unlock(&gcov_lock); | ||
53 | } | ||
54 | EXPORT_SYMBOL(__gcov_init); | ||
55 | |||
56 | /* | ||
57 | * These functions may be referenced by gcc-generated profiling code but serve | ||
58 | * no function for kernel profiling. | ||
59 | */ | ||
60 | void __gcov_flush(void) | ||
61 | { | ||
62 | /* Unused. */ | ||
63 | } | ||
64 | EXPORT_SYMBOL(__gcov_flush); | ||
65 | |||
66 | void __gcov_merge_add(gcov_type *counters, unsigned int n_counters) | ||
67 | { | ||
68 | /* Unused. */ | ||
69 | } | ||
70 | EXPORT_SYMBOL(__gcov_merge_add); | ||
71 | |||
72 | void __gcov_merge_single(gcov_type *counters, unsigned int n_counters) | ||
73 | { | ||
74 | /* Unused. */ | ||
75 | } | ||
76 | EXPORT_SYMBOL(__gcov_merge_single); | ||
77 | |||
78 | void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters) | ||
79 | { | ||
80 | /* Unused. */ | ||
81 | } | ||
82 | EXPORT_SYMBOL(__gcov_merge_delta); | ||
83 | |||
84 | /** | ||
85 | * gcov_enable_events - enable event reporting through gcov_event() | ||
86 | * | ||
87 | * Turn on reporting of profiling data load/unload-events through the | ||
88 | * gcov_event() callback. Also replay all previous events once. This function | ||
89 | * is needed because some events are potentially generated too early for the | ||
90 | * callback implementation to handle them initially. | ||
91 | */ | ||
92 | void gcov_enable_events(void) | ||
93 | { | ||
94 | struct gcov_info *info; | ||
95 | |||
96 | mutex_lock(&gcov_lock); | ||
97 | gcov_events_enabled = 1; | ||
98 | /* Perform event callback for previously registered entries. */ | ||
99 | for (info = gcov_info_head; info; info = info->next) | ||
100 | gcov_event(GCOV_ADD, info); | ||
101 | mutex_unlock(&gcov_lock); | ||
102 | } | ||
103 | |||
104 | #ifdef CONFIG_MODULES | ||
105 | static inline int within(void *addr, void *start, unsigned long size) | ||
106 | { | ||
107 | return ((addr >= start) && (addr < start + size)); | ||
108 | } | ||
109 | |||
110 | /* Update list and generate events when modules are unloaded. */ | ||
111 | static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, | ||
112 | void *data) | ||
113 | { | ||
114 | struct module *mod = data; | ||
115 | struct gcov_info *info; | ||
116 | struct gcov_info *prev; | ||
117 | |||
118 | if (event != MODULE_STATE_GOING) | ||
119 | return NOTIFY_OK; | ||
120 | mutex_lock(&gcov_lock); | ||
121 | prev = NULL; | ||
122 | /* Remove entries located in module from linked list. */ | ||
123 | for (info = gcov_info_head; info; info = info->next) { | ||
124 | if (within(info, mod->module_core, mod->core_size)) { | ||
125 | if (prev) | ||
126 | prev->next = info->next; | ||
127 | else | ||
128 | gcov_info_head = info->next; | ||
129 | if (gcov_events_enabled) | ||
130 | gcov_event(GCOV_REMOVE, info); | ||
131 | } else | ||
132 | prev = info; | ||
133 | } | ||
134 | mutex_unlock(&gcov_lock); | ||
135 | |||
136 | return NOTIFY_OK; | ||
137 | } | ||
138 | |||
139 | static struct notifier_block gcov_nb = { | ||
140 | .notifier_call = gcov_module_notifier, | ||
141 | }; | ||
142 | |||
143 | static int __init gcov_init(void) | ||
144 | { | ||
145 | return register_module_notifier(&gcov_nb); | ||
146 | } | ||
147 | device_initcall(gcov_init); | ||
148 | #endif /* CONFIG_MODULES */ | ||
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c new file mode 100644 index 000000000000..ef3c3f88a7a3 --- /dev/null +++ b/kernel/gcov/fs.c | |||
@@ -0,0 +1,673 @@ | |||
1 | /* | ||
2 | * This code exports profiling data as debugfs files to userspace. | ||
3 | * | ||
4 | * Copyright IBM Corp. 2009 | ||
5 | * Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com> | ||
6 | * | ||
7 | * Uses gcc-internal data definitions. | ||
8 | * Based on the gcov-kernel patch by: | ||
9 | * Hubertus Franke <frankeh@us.ibm.com> | ||
10 | * Nigel Hinds <nhinds@us.ibm.com> | ||
11 | * Rajan Ravindran <rajancr@us.ibm.com> | ||
12 | * Peter Oberparleiter <oberpar@linux.vnet.ibm.com> | ||
13 | * Paul Larson | ||
14 | * Yi CDL Yang | ||
15 | */ | ||
16 | |||
17 | #define pr_fmt(fmt) "gcov: " fmt | ||
18 | |||
19 | #include <linux/init.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/debugfs.h> | ||
22 | #include <linux/fs.h> | ||
23 | #include <linux/list.h> | ||
24 | #include <linux/string.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/mutex.h> | ||
27 | #include <linux/seq_file.h> | ||
28 | #include "gcov.h" | ||
29 | |||
30 | /** | ||
31 | * struct gcov_node - represents a debugfs entry | ||
32 | * @list: list head for child node list | ||
33 | * @children: child nodes | ||
34 | * @all: list head for list of all nodes | ||
35 | * @parent: parent node | ||
36 | * @info: associated profiling data structure if not a directory | ||
37 | * @ghost: when an object file containing profiling data is unloaded we keep a | ||
38 | * copy of the profiling data here to allow collecting coverage data | ||
39 | * for cleanup code. Such a node is called a "ghost". | ||
40 | * @dentry: main debugfs entry, either a directory or data file | ||
41 | * @links: associated symbolic links | ||
42 | * @name: data file basename | ||
43 | * | ||
44 | * struct gcov_node represents an entity within the gcov/ subdirectory | ||
45 | * of debugfs. There are directory and data file nodes. The latter represent | ||
46 | * the actual synthesized data file plus any associated symbolic links which | ||
47 | * are needed by the gcov tool to work correctly. | ||
48 | */ | ||
49 | struct gcov_node { | ||
50 | struct list_head list; | ||
51 | struct list_head children; | ||
52 | struct list_head all; | ||
53 | struct gcov_node *parent; | ||
54 | struct gcov_info *info; | ||
55 | struct gcov_info *ghost; | ||
56 | struct dentry *dentry; | ||
57 | struct dentry **links; | ||
58 | char name[0]; | ||
59 | }; | ||
60 | |||
61 | static const char objtree[] = OBJTREE; | ||
62 | static const char srctree[] = SRCTREE; | ||
63 | static struct gcov_node root_node; | ||
64 | static struct dentry *reset_dentry; | ||
65 | static LIST_HEAD(all_head); | ||
66 | static DEFINE_MUTEX(node_lock); | ||
67 | |||
68 | /* If non-zero, keep copies of profiling data for unloaded modules. */ | ||
69 | static int gcov_persist = 1; | ||
70 | |||
71 | static int __init gcov_persist_setup(char *str) | ||
72 | { | ||
73 | unsigned long val; | ||
74 | |||
75 | if (strict_strtoul(str, 0, &val)) { | ||
76 | pr_warning("invalid gcov_persist parameter '%s'\n", str); | ||
77 | return 0; | ||
78 | } | ||
79 | gcov_persist = val; | ||
80 | pr_info("setting gcov_persist to %d\n", gcov_persist); | ||
81 | |||
82 | return 1; | ||
83 | } | ||
84 | __setup("gcov_persist=", gcov_persist_setup); | ||
85 | |||
86 | /* | ||
87 | * seq_file.start() implementation for gcov data files. Note that the | ||
88 | * gcov_iterator interface is designed to be more restrictive than seq_file | ||
89 | * (no start from arbitrary position, etc.), to simplify the iterator | ||
90 | * implementation. | ||
91 | */ | ||
92 | static void *gcov_seq_start(struct seq_file *seq, loff_t *pos) | ||
93 | { | ||
94 | loff_t i; | ||
95 | |||
96 | gcov_iter_start(seq->private); | ||
97 | for (i = 0; i < *pos; i++) { | ||
98 | if (gcov_iter_next(seq->private)) | ||
99 | return NULL; | ||
100 | } | ||
101 | return seq->private; | ||
102 | } | ||
103 | |||
104 | /* seq_file.next() implementation for gcov data files. */ | ||
105 | static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos) | ||
106 | { | ||
107 | struct gcov_iterator *iter = data; | ||
108 | |||
109 | if (gcov_iter_next(iter)) | ||
110 | return NULL; | ||
111 | (*pos)++; | ||
112 | |||
113 | return iter; | ||
114 | } | ||
115 | |||
116 | /* seq_file.show() implementation for gcov data files. */ | ||
117 | static int gcov_seq_show(struct seq_file *seq, void *data) | ||
118 | { | ||
119 | struct gcov_iterator *iter = data; | ||
120 | |||
121 | if (gcov_iter_write(iter, seq)) | ||
122 | return -EINVAL; | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static void gcov_seq_stop(struct seq_file *seq, void *data) | ||
127 | { | ||
128 | /* Unused. */ | ||
129 | } | ||
130 | |||
131 | static const struct seq_operations gcov_seq_ops = { | ||
132 | .start = gcov_seq_start, | ||
133 | .next = gcov_seq_next, | ||
134 | .show = gcov_seq_show, | ||
135 | .stop = gcov_seq_stop, | ||
136 | }; | ||
137 | |||
138 | /* | ||
139 | * Return the profiling data set for a given node. This can either be the | ||
140 | * original profiling data structure or a duplicate (also called "ghost") | ||
141 | * in case the associated object file has been unloaded. | ||
142 | */ | ||
143 | static struct gcov_info *get_node_info(struct gcov_node *node) | ||
144 | { | ||
145 | if (node->info) | ||
146 | return node->info; | ||
147 | |||
148 | return node->ghost; | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * open() implementation for gcov data files. Create a copy of the profiling | ||
153 | * data set and initialize the iterator and seq_file interface. | ||
154 | */ | ||
155 | static int gcov_seq_open(struct inode *inode, struct file *file) | ||
156 | { | ||
157 | struct gcov_node *node = inode->i_private; | ||
158 | struct gcov_iterator *iter; | ||
159 | struct seq_file *seq; | ||
160 | struct gcov_info *info; | ||
161 | int rc = -ENOMEM; | ||
162 | |||
163 | mutex_lock(&node_lock); | ||
164 | /* | ||
165 | * Read from a profiling data copy to minimize reference tracking | ||
166 | * complexity and concurrent access. | ||
167 | */ | ||
168 | info = gcov_info_dup(get_node_info(node)); | ||
169 | if (!info) | ||
170 | goto out_unlock; | ||
171 | iter = gcov_iter_new(info); | ||
172 | if (!iter) | ||
173 | goto err_free_info; | ||
174 | rc = seq_open(file, &gcov_seq_ops); | ||
175 | if (rc) | ||
176 | goto err_free_iter_info; | ||
177 | seq = file->private_data; | ||
178 | seq->private = iter; | ||
179 | out_unlock: | ||
180 | mutex_unlock(&node_lock); | ||
181 | return rc; | ||
182 | |||
183 | err_free_iter_info: | ||
184 | gcov_iter_free(iter); | ||
185 | err_free_info: | ||
186 | gcov_info_free(info); | ||
187 | goto out_unlock; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * release() implementation for gcov data files. Release resources allocated | ||
192 | * by open(). | ||
193 | */ | ||
194 | static int gcov_seq_release(struct inode *inode, struct file *file) | ||
195 | { | ||
196 | struct gcov_iterator *iter; | ||
197 | struct gcov_info *info; | ||
198 | struct seq_file *seq; | ||
199 | |||
200 | seq = file->private_data; | ||
201 | iter = seq->private; | ||
202 | info = gcov_iter_get_info(iter); | ||
203 | gcov_iter_free(iter); | ||
204 | gcov_info_free(info); | ||
205 | seq_release(inode, file); | ||
206 | |||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Find a node by the associated data file name. Needs to be called with | ||
212 | * node_lock held. | ||
213 | */ | ||
214 | static struct gcov_node *get_node_by_name(const char *name) | ||
215 | { | ||
216 | struct gcov_node *node; | ||
217 | struct gcov_info *info; | ||
218 | |||
219 | list_for_each_entry(node, &all_head, all) { | ||
220 | info = get_node_info(node); | ||
221 | if (info && (strcmp(info->filename, name) == 0)) | ||
222 | return node; | ||
223 | } | ||
224 | |||
225 | return NULL; | ||
226 | } | ||
227 | |||
228 | static void remove_node(struct gcov_node *node); | ||
229 | |||
230 | /* | ||
231 | * write() implementation for gcov data files. Reset profiling data for the | ||
232 | * associated file. If the object file has been unloaded (i.e. this is | ||
233 | * a "ghost" node), remove the debug fs node as well. | ||
234 | */ | ||
235 | static ssize_t gcov_seq_write(struct file *file, const char __user *addr, | ||
236 | size_t len, loff_t *pos) | ||
237 | { | ||
238 | struct seq_file *seq; | ||
239 | struct gcov_info *info; | ||
240 | struct gcov_node *node; | ||
241 | |||
242 | seq = file->private_data; | ||
243 | info = gcov_iter_get_info(seq->private); | ||
244 | mutex_lock(&node_lock); | ||
245 | node = get_node_by_name(info->filename); | ||
246 | if (node) { | ||
247 | /* Reset counts or remove node for unloaded modules. */ | ||
248 | if (node->ghost) | ||
249 | remove_node(node); | ||
250 | else | ||
251 | gcov_info_reset(node->info); | ||
252 | } | ||
253 | /* Reset counts for open file. */ | ||
254 | gcov_info_reset(info); | ||
255 | mutex_unlock(&node_lock); | ||
256 | |||
257 | return len; | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * Given a string <path> representing a file path of format: | ||
262 | * path/to/file.gcda | ||
263 | * construct and return a new string: | ||
264 | * <dir/>path/to/file.<ext> | ||
265 | */ | ||
266 | static char *link_target(const char *dir, const char *path, const char *ext) | ||
267 | { | ||
268 | char *target; | ||
269 | char *old_ext; | ||
270 | char *copy; | ||
271 | |||
272 | copy = kstrdup(path, GFP_KERNEL); | ||
273 | if (!copy) | ||
274 | return NULL; | ||
275 | old_ext = strrchr(copy, '.'); | ||
276 | if (old_ext) | ||
277 | *old_ext = '\0'; | ||
278 | if (dir) | ||
279 | target = kasprintf(GFP_KERNEL, "%s/%s.%s", dir, copy, ext); | ||
280 | else | ||
281 | target = kasprintf(GFP_KERNEL, "%s.%s", copy, ext); | ||
282 | kfree(copy); | ||
283 | |||
284 | return target; | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * Construct a string representing the symbolic link target for the given | ||
289 | * gcov data file name and link type. Depending on the link type and the | ||
290 | * location of the data file, the link target can either point to a | ||
291 | * subdirectory of srctree, objtree or in an external location. | ||
292 | */ | ||
293 | static char *get_link_target(const char *filename, const struct gcov_link *ext) | ||
294 | { | ||
295 | const char *rel; | ||
296 | char *result; | ||
297 | |||
298 | if (strncmp(filename, objtree, strlen(objtree)) == 0) { | ||
299 | rel = filename + strlen(objtree) + 1; | ||
300 | if (ext->dir == SRC_TREE) | ||
301 | result = link_target(srctree, rel, ext->ext); | ||
302 | else | ||
303 | result = link_target(objtree, rel, ext->ext); | ||
304 | } else { | ||
305 | /* External compilation. */ | ||
306 | result = link_target(NULL, filename, ext->ext); | ||
307 | } | ||
308 | |||
309 | return result; | ||
310 | } | ||
311 | |||
312 | #define SKEW_PREFIX ".tmp_" | ||
313 | |||
314 | /* | ||
315 | * For a filename .tmp_filename.ext return filename.ext. Needed to compensate | ||
316 | * for filename skewing caused by the mod-versioning mechanism. | ||
317 | */ | ||
318 | static const char *deskew(const char *basename) | ||
319 | { | ||
320 | if (strncmp(basename, SKEW_PREFIX, sizeof(SKEW_PREFIX) - 1) == 0) | ||
321 | return basename + sizeof(SKEW_PREFIX) - 1; | ||
322 | return basename; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Create links to additional files (usually .c and .gcno files) which the | ||
327 | * gcov tool expects to find in the same directory as the gcov data file. | ||
328 | */ | ||
329 | static void add_links(struct gcov_node *node, struct dentry *parent) | ||
330 | { | ||
331 | char *basename; | ||
332 | char *target; | ||
333 | int num; | ||
334 | int i; | ||
335 | |||
336 | for (num = 0; gcov_link[num].ext; num++) | ||
337 | /* Nothing. */; | ||
338 | node->links = kcalloc(num, sizeof(struct dentry *), GFP_KERNEL); | ||
339 | if (!node->links) | ||
340 | return; | ||
341 | for (i = 0; i < num; i++) { | ||
342 | target = get_link_target(get_node_info(node)->filename, | ||
343 | &gcov_link[i]); | ||
344 | if (!target) | ||
345 | goto out_err; | ||
346 | basename = strrchr(target, '/'); | ||
347 | if (!basename) | ||
348 | goto out_err; | ||
349 | basename++; | ||
350 | node->links[i] = debugfs_create_symlink(deskew(basename), | ||
351 | parent, target); | ||
352 | if (!node->links[i]) | ||
353 | goto out_err; | ||
354 | kfree(target); | ||
355 | } | ||
356 | |||
357 | return; | ||
358 | out_err: | ||
359 | kfree(target); | ||
360 | while (i-- > 0) | ||
361 | debugfs_remove(node->links[i]); | ||
362 | kfree(node->links); | ||
363 | node->links = NULL; | ||
364 | } | ||
365 | |||
366 | static const struct file_operations gcov_data_fops = { | ||
367 | .open = gcov_seq_open, | ||
368 | .release = gcov_seq_release, | ||
369 | .read = seq_read, | ||
370 | .llseek = seq_lseek, | ||
371 | .write = gcov_seq_write, | ||
372 | }; | ||
373 | |||
374 | /* Basic initialization of a new node. */ | ||
375 | static void init_node(struct gcov_node *node, struct gcov_info *info, | ||
376 | const char *name, struct gcov_node *parent) | ||
377 | { | ||
378 | INIT_LIST_HEAD(&node->list); | ||
379 | INIT_LIST_HEAD(&node->children); | ||
380 | INIT_LIST_HEAD(&node->all); | ||
381 | node->info = info; | ||
382 | node->parent = parent; | ||
383 | if (name) | ||
384 | strcpy(node->name, name); | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * Create a new node and associated debugfs entry. Needs to be called with | ||
389 | * node_lock held. | ||
390 | */ | ||
391 | static struct gcov_node *new_node(struct gcov_node *parent, | ||
392 | struct gcov_info *info, const char *name) | ||
393 | { | ||
394 | struct gcov_node *node; | ||
395 | |||
396 | node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL); | ||
397 | if (!node) { | ||
398 | pr_warning("out of memory\n"); | ||
399 | return NULL; | ||
400 | } | ||
401 | init_node(node, info, name, parent); | ||
402 | /* Differentiate between gcov data file nodes and directory nodes. */ | ||
403 | if (info) { | ||
404 | node->dentry = debugfs_create_file(deskew(node->name), 0600, | ||
405 | parent->dentry, node, &gcov_data_fops); | ||
406 | } else | ||
407 | node->dentry = debugfs_create_dir(node->name, parent->dentry); | ||
408 | if (!node->dentry) { | ||
409 | pr_warning("could not create file\n"); | ||
410 | kfree(node); | ||
411 | return NULL; | ||
412 | } | ||
413 | if (info) | ||
414 | add_links(node, parent->dentry); | ||
415 | list_add(&node->list, &parent->children); | ||
416 | list_add(&node->all, &all_head); | ||
417 | |||
418 | return node; | ||
419 | } | ||
420 | |||
421 | /* Remove symbolic links associated with node. */ | ||
422 | static void remove_links(struct gcov_node *node) | ||
423 | { | ||
424 | int i; | ||
425 | |||
426 | if (!node->links) | ||
427 | return; | ||
428 | for (i = 0; gcov_link[i].ext; i++) | ||
429 | debugfs_remove(node->links[i]); | ||
430 | kfree(node->links); | ||
431 | node->links = NULL; | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * Remove node from all lists and debugfs and release associated resources. | ||
436 | * Needs to be called with node_lock held. | ||
437 | */ | ||
438 | static void release_node(struct gcov_node *node) | ||
439 | { | ||
440 | list_del(&node->list); | ||
441 | list_del(&node->all); | ||
442 | debugfs_remove(node->dentry); | ||
443 | remove_links(node); | ||
444 | if (node->ghost) | ||
445 | gcov_info_free(node->ghost); | ||
446 | kfree(node); | ||
447 | } | ||
448 | |||
449 | /* Release node and empty parents. Needs to be called with node_lock held. */ | ||
450 | static void remove_node(struct gcov_node *node) | ||
451 | { | ||
452 | struct gcov_node *parent; | ||
453 | |||
454 | while ((node != &root_node) && list_empty(&node->children)) { | ||
455 | parent = node->parent; | ||
456 | release_node(node); | ||
457 | node = parent; | ||
458 | } | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * Find child node with given basename. Needs to be called with node_lock | ||
463 | * held. | ||
464 | */ | ||
465 | static struct gcov_node *get_child_by_name(struct gcov_node *parent, | ||
466 | const char *name) | ||
467 | { | ||
468 | struct gcov_node *node; | ||
469 | |||
470 | list_for_each_entry(node, &parent->children, list) { | ||
471 | if (strcmp(node->name, name) == 0) | ||
472 | return node; | ||
473 | } | ||
474 | |||
475 | return NULL; | ||
476 | } | ||
477 | |||
478 | /* | ||
479 | * write() implementation for reset file. Reset all profiling data to zero | ||
480 | * and remove ghost nodes. | ||
481 | */ | ||
482 | static ssize_t reset_write(struct file *file, const char __user *addr, | ||
483 | size_t len, loff_t *pos) | ||
484 | { | ||
485 | struct gcov_node *node; | ||
486 | |||
487 | mutex_lock(&node_lock); | ||
488 | restart: | ||
489 | list_for_each_entry(node, &all_head, all) { | ||
490 | if (node->info) | ||
491 | gcov_info_reset(node->info); | ||
492 | else if (list_empty(&node->children)) { | ||
493 | remove_node(node); | ||
494 | /* Several nodes may have gone - restart loop. */ | ||
495 | goto restart; | ||
496 | } | ||
497 | } | ||
498 | mutex_unlock(&node_lock); | ||
499 | |||
500 | return len; | ||
501 | } | ||
502 | |||
503 | /* read() implementation for reset file. Unused. */ | ||
504 | static ssize_t reset_read(struct file *file, char __user *addr, size_t len, | ||
505 | loff_t *pos) | ||
506 | { | ||
507 | /* Allow read operation so that a recursive copy won't fail. */ | ||
508 | return 0; | ||
509 | } | ||
510 | |||
511 | static const struct file_operations gcov_reset_fops = { | ||
512 | .write = reset_write, | ||
513 | .read = reset_read, | ||
514 | }; | ||
515 | |||
516 | /* | ||
517 | * Create a node for a given profiling data set and add it to all lists and | ||
518 | * debugfs. Needs to be called with node_lock held. | ||
519 | */ | ||
520 | static void add_node(struct gcov_info *info) | ||
521 | { | ||
522 | char *filename; | ||
523 | char *curr; | ||
524 | char *next; | ||
525 | struct gcov_node *parent; | ||
526 | struct gcov_node *node; | ||
527 | |||
528 | filename = kstrdup(info->filename, GFP_KERNEL); | ||
529 | if (!filename) | ||
530 | return; | ||
531 | parent = &root_node; | ||
532 | /* Create directory nodes along the path. */ | ||
533 | for (curr = filename; (next = strchr(curr, '/')); curr = next + 1) { | ||
534 | if (curr == next) | ||
535 | continue; | ||
536 | *next = 0; | ||
537 | if (strcmp(curr, ".") == 0) | ||
538 | continue; | ||
539 | if (strcmp(curr, "..") == 0) { | ||
540 | if (!parent->parent) | ||
541 | goto err_remove; | ||
542 | parent = parent->parent; | ||
543 | continue; | ||
544 | } | ||
545 | node = get_child_by_name(parent, curr); | ||
546 | if (!node) { | ||
547 | node = new_node(parent, NULL, curr); | ||
548 | if (!node) | ||
549 | goto err_remove; | ||
550 | } | ||
551 | parent = node; | ||
552 | } | ||
553 | /* Create file node. */ | ||
554 | node = new_node(parent, info, curr); | ||
555 | if (!node) | ||
556 | goto err_remove; | ||
557 | out: | ||
558 | kfree(filename); | ||
559 | return; | ||
560 | |||
561 | err_remove: | ||
562 | remove_node(parent); | ||
563 | goto out; | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | * The profiling data set associated with this node is being unloaded. Store a | ||
568 | * copy of the profiling data and turn this node into a "ghost". | ||
569 | */ | ||
570 | static int ghost_node(struct gcov_node *node) | ||
571 | { | ||
572 | node->ghost = gcov_info_dup(node->info); | ||
573 | if (!node->ghost) { | ||
574 | pr_warning("could not save data for '%s' (out of memory)\n", | ||
575 | node->info->filename); | ||
576 | return -ENOMEM; | ||
577 | } | ||
578 | node->info = NULL; | ||
579 | |||
580 | return 0; | ||
581 | } | ||
582 | |||
583 | /* | ||
584 | * Profiling data for this node has been loaded again. Add profiling data | ||
585 | * from previous instantiation and turn this node into a regular node. | ||
586 | */ | ||
587 | static void revive_node(struct gcov_node *node, struct gcov_info *info) | ||
588 | { | ||
589 | if (gcov_info_is_compatible(node->ghost, info)) | ||
590 | gcov_info_add(info, node->ghost); | ||
591 | else { | ||
592 | pr_warning("discarding saved data for '%s' (version changed)\n", | ||
593 | info->filename); | ||
594 | } | ||
595 | gcov_info_free(node->ghost); | ||
596 | node->ghost = NULL; | ||
597 | node->info = info; | ||
598 | } | ||
599 | |||
600 | /* | ||
601 | * Callback to create/remove profiling files when code compiled with | ||
602 | * -fprofile-arcs is loaded/unloaded. | ||
603 | */ | ||
604 | void gcov_event(enum gcov_action action, struct gcov_info *info) | ||
605 | { | ||
606 | struct gcov_node *node; | ||
607 | |||
608 | mutex_lock(&node_lock); | ||
609 | node = get_node_by_name(info->filename); | ||
610 | switch (action) { | ||
611 | case GCOV_ADD: | ||
612 | /* Add new node or revive ghost. */ | ||
613 | if (!node) { | ||
614 | add_node(info); | ||
615 | break; | ||
616 | } | ||
617 | if (gcov_persist) | ||
618 | revive_node(node, info); | ||
619 | else { | ||
620 | pr_warning("could not add '%s' (already exists)\n", | ||
621 | info->filename); | ||
622 | } | ||
623 | break; | ||
624 | case GCOV_REMOVE: | ||
625 | /* Remove node or turn into ghost. */ | ||
626 | if (!node) { | ||
627 | pr_warning("could not remove '%s' (not found)\n", | ||
628 | info->filename); | ||
629 | break; | ||
630 | } | ||
631 | if (gcov_persist) { | ||
632 | if (!ghost_node(node)) | ||
633 | break; | ||
634 | } | ||
635 | remove_node(node); | ||
636 | break; | ||
637 | } | ||
638 | mutex_unlock(&node_lock); | ||
639 | } | ||
640 | |||
641 | /* Create debugfs entries. */ | ||
642 | static __init int gcov_fs_init(void) | ||
643 | { | ||
644 | int rc = -EIO; | ||
645 | |||
646 | init_node(&root_node, NULL, NULL, NULL); | ||
647 | /* | ||
648 | * /sys/kernel/debug/gcov will be parent for the reset control file | ||
649 | * and all profiling files. | ||
650 | */ | ||
651 | root_node.dentry = debugfs_create_dir("gcov", NULL); | ||
652 | if (!root_node.dentry) | ||
653 | goto err_remove; | ||
654 | /* | ||
655 | * Create reset file which resets all profiling counts when written | ||
656 | * to. | ||
657 | */ | ||
658 | reset_dentry = debugfs_create_file("reset", 0600, root_node.dentry, | ||
659 | NULL, &gcov_reset_fops); | ||
660 | if (!reset_dentry) | ||
661 | goto err_remove; | ||
662 | /* Replay previous events to get our fs hierarchy up-to-date. */ | ||
663 | gcov_enable_events(); | ||
664 | return 0; | ||
665 | |||
666 | err_remove: | ||
667 | pr_err("init failed\n"); | ||
668 | if (root_node.dentry) | ||
669 | debugfs_remove(root_node.dentry); | ||
670 | |||
671 | return rc; | ||
672 | } | ||
673 | device_initcall(gcov_fs_init); | ||
diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c new file mode 100644 index 000000000000..ae5bb4260033 --- /dev/null +++ b/kernel/gcov/gcc_3_4.c | |||
@@ -0,0 +1,447 @@ | |||
1 | /* | ||
2 | * This code provides functions to handle gcc's profiling data format | ||
3 | * introduced with gcc 3.4. Future versions of gcc may change the gcov | ||
4 | * format (as happened before), so all format-specific information needs | ||
5 | * to be kept modular and easily exchangeable. | ||
6 | * | ||
7 | * This file is based on gcc-internal definitions. Functions and data | ||
8 | * structures are defined to be compatible with gcc counterparts. | ||
9 | * For a better understanding, refer to gcc source: gcc/gcov-io.h. | ||
10 | * | ||
11 | * Copyright IBM Corp. 2009 | ||
12 | * Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com> | ||
13 | * | ||
14 | * Uses gcc-internal data definitions. | ||
15 | */ | ||
16 | |||
17 | #include <linux/errno.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/string.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include <linux/vmalloc.h> | ||
22 | #include "gcov.h" | ||
23 | |||
24 | /* Symbolic links to be created for each profiling data file. */ | ||
25 | const struct gcov_link gcov_link[] = { | ||
26 | { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ | ||
27 | { 0, NULL}, | ||
28 | }; | ||
29 | |||
30 | /* | ||
31 | * Determine whether a counter is active. Based on gcc magic. Doesn't change | ||
32 | * at run-time. | ||
33 | */ | ||
34 | static int counter_active(struct gcov_info *info, unsigned int type) | ||
35 | { | ||
36 | return (1 << type) & info->ctr_mask; | ||
37 | } | ||
38 | |||
39 | /* Determine number of active counters. Based on gcc magic. */ | ||
40 | static unsigned int num_counter_active(struct gcov_info *info) | ||
41 | { | ||
42 | unsigned int i; | ||
43 | unsigned int result = 0; | ||
44 | |||
45 | for (i = 0; i < GCOV_COUNTERS; i++) { | ||
46 | if (counter_active(info, i)) | ||
47 | result++; | ||
48 | } | ||
49 | return result; | ||
50 | } | ||
51 | |||
52 | /** | ||
53 | * gcov_info_reset - reset profiling data to zero | ||
54 | * @info: profiling data set | ||
55 | */ | ||
56 | void gcov_info_reset(struct gcov_info *info) | ||
57 | { | ||
58 | unsigned int active = num_counter_active(info); | ||
59 | unsigned int i; | ||
60 | |||
61 | for (i = 0; i < active; i++) { | ||
62 | memset(info->counts[i].values, 0, | ||
63 | info->counts[i].num * sizeof(gcov_type)); | ||
64 | } | ||
65 | } | ||
66 | |||
67 | /** | ||
68 | * gcov_info_is_compatible - check if profiling data can be added | ||
69 | * @info1: first profiling data set | ||
70 | * @info2: second profiling data set | ||
71 | * | ||
72 | * Returns non-zero if profiling data can be added, zero otherwise. | ||
73 | */ | ||
74 | int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2) | ||
75 | { | ||
76 | return (info1->stamp == info2->stamp); | ||
77 | } | ||
78 | |||
79 | /** | ||
80 | * gcov_info_add - add up profiling data | ||
81 | * @dest: profiling data set to which data is added | ||
82 | * @source: profiling data set which is added | ||
83 | * | ||
84 | * Adds profiling counts of @source to @dest. | ||
85 | */ | ||
86 | void gcov_info_add(struct gcov_info *dest, struct gcov_info *source) | ||
87 | { | ||
88 | unsigned int i; | ||
89 | unsigned int j; | ||
90 | |||
91 | for (i = 0; i < num_counter_active(dest); i++) { | ||
92 | for (j = 0; j < dest->counts[i].num; j++) { | ||
93 | dest->counts[i].values[j] += | ||
94 | source->counts[i].values[j]; | ||
95 | } | ||
96 | } | ||
97 | } | ||
98 | |||
99 | /* Get size of function info entry. Based on gcc magic. */ | ||
100 | static size_t get_fn_size(struct gcov_info *info) | ||
101 | { | ||
102 | size_t size; | ||
103 | |||
104 | size = sizeof(struct gcov_fn_info) + num_counter_active(info) * | ||
105 | sizeof(unsigned int); | ||
106 | if (__alignof__(struct gcov_fn_info) > sizeof(unsigned int)) | ||
107 | size = ALIGN(size, __alignof__(struct gcov_fn_info)); | ||
108 | return size; | ||
109 | } | ||
110 | |||
111 | /* Get address of function info entry. Based on gcc magic. */ | ||
112 | static struct gcov_fn_info *get_fn_info(struct gcov_info *info, unsigned int fn) | ||
113 | { | ||
114 | return (struct gcov_fn_info *) | ||
115 | ((char *) info->functions + fn * get_fn_size(info)); | ||
116 | } | ||
117 | |||
118 | /** | ||
119 | * gcov_info_dup - duplicate profiling data set | ||
120 | * @info: profiling data set to duplicate | ||
121 | * | ||
122 | * Return newly allocated duplicate on success, %NULL on error. | ||
123 | */ | ||
124 | struct gcov_info *gcov_info_dup(struct gcov_info *info) | ||
125 | { | ||
126 | struct gcov_info *dup; | ||
127 | unsigned int i; | ||
128 | unsigned int active; | ||
129 | |||
130 | /* Duplicate gcov_info. */ | ||
131 | active = num_counter_active(info); | ||
132 | dup = kzalloc(sizeof(struct gcov_info) + | ||
133 | sizeof(struct gcov_ctr_info) * active, GFP_KERNEL); | ||
134 | if (!dup) | ||
135 | return NULL; | ||
136 | dup->version = info->version; | ||
137 | dup->stamp = info->stamp; | ||
138 | dup->n_functions = info->n_functions; | ||
139 | dup->ctr_mask = info->ctr_mask; | ||
140 | /* Duplicate filename. */ | ||
141 | dup->filename = kstrdup(info->filename, GFP_KERNEL); | ||
142 | if (!dup->filename) | ||
143 | goto err_free; | ||
144 | /* Duplicate table of functions. */ | ||
145 | dup->functions = kmemdup(info->functions, info->n_functions * | ||
146 | get_fn_size(info), GFP_KERNEL); | ||
147 | if (!dup->functions) | ||
148 | goto err_free; | ||
149 | /* Duplicate counter arrays. */ | ||
150 | for (i = 0; i < active ; i++) { | ||
151 | struct gcov_ctr_info *ctr = &info->counts[i]; | ||
152 | size_t size = ctr->num * sizeof(gcov_type); | ||
153 | |||
154 | dup->counts[i].num = ctr->num; | ||
155 | dup->counts[i].merge = ctr->merge; | ||
156 | dup->counts[i].values = vmalloc(size); | ||
157 | if (!dup->counts[i].values) | ||
158 | goto err_free; | ||
159 | memcpy(dup->counts[i].values, ctr->values, size); | ||
160 | } | ||
161 | return dup; | ||
162 | |||
163 | err_free: | ||
164 | gcov_info_free(dup); | ||
165 | return NULL; | ||
166 | } | ||
167 | |||
168 | /** | ||
169 | * gcov_info_free - release memory for profiling data set duplicate | ||
170 | * @info: profiling data set duplicate to free | ||
171 | */ | ||
172 | void gcov_info_free(struct gcov_info *info) | ||
173 | { | ||
174 | unsigned int active = num_counter_active(info); | ||
175 | unsigned int i; | ||
176 | |||
177 | for (i = 0; i < active ; i++) | ||
178 | vfree(info->counts[i].values); | ||
179 | kfree(info->functions); | ||
180 | kfree(info->filename); | ||
181 | kfree(info); | ||
182 | } | ||
183 | |||
184 | /** | ||
185 | * struct type_info - iterator helper array | ||
186 | * @ctr_type: counter type | ||
187 | * @offset: index of the first value of the current function for this type | ||
188 | * | ||
189 | * This array is needed to convert the in-memory data format into the in-file | ||
190 | * data format: | ||
191 | * | ||
192 | * In-memory: | ||
193 | * for each counter type | ||
194 | * for each function | ||
195 | * values | ||
196 | * | ||
197 | * In-file: | ||
198 | * for each function | ||
199 | * for each counter type | ||
200 | * values | ||
201 | * | ||
202 | * See gcc source gcc/gcov-io.h for more information on data organization. | ||
203 | */ | ||
204 | struct type_info { | ||
205 | int ctr_type; | ||
206 | unsigned int offset; | ||
207 | }; | ||
208 | |||
209 | /** | ||
210 | * struct gcov_iterator - specifies current file position in logical records | ||
211 | * @info: associated profiling data | ||
212 | * @record: record type | ||
213 | * @function: function number | ||
214 | * @type: counter type | ||
215 | * @count: index into values array | ||
216 | * @num_types: number of counter types | ||
217 | * @type_info: helper array to get values-array offset for current function | ||
218 | */ | ||
219 | struct gcov_iterator { | ||
220 | struct gcov_info *info; | ||
221 | |||
222 | int record; | ||
223 | unsigned int function; | ||
224 | unsigned int type; | ||
225 | unsigned int count; | ||
226 | |||
227 | int num_types; | ||
228 | struct type_info type_info[0]; | ||
229 | }; | ||
230 | |||
231 | static struct gcov_fn_info *get_func(struct gcov_iterator *iter) | ||
232 | { | ||
233 | return get_fn_info(iter->info, iter->function); | ||
234 | } | ||
235 | |||
236 | static struct type_info *get_type(struct gcov_iterator *iter) | ||
237 | { | ||
238 | return &iter->type_info[iter->type]; | ||
239 | } | ||
240 | |||
241 | /** | ||
242 | * gcov_iter_new - allocate and initialize profiling data iterator | ||
243 | * @info: profiling data set to be iterated | ||
244 | * | ||
245 | * Return file iterator on success, %NULL otherwise. | ||
246 | */ | ||
247 | struct gcov_iterator *gcov_iter_new(struct gcov_info *info) | ||
248 | { | ||
249 | struct gcov_iterator *iter; | ||
250 | |||
251 | iter = kzalloc(sizeof(struct gcov_iterator) + | ||
252 | num_counter_active(info) * sizeof(struct type_info), | ||
253 | GFP_KERNEL); | ||
254 | if (iter) | ||
255 | iter->info = info; | ||
256 | |||
257 | return iter; | ||
258 | } | ||
259 | |||
260 | /** | ||
261 | * gcov_iter_free - release memory for iterator | ||
262 | * @iter: file iterator to free | ||
263 | */ | ||
264 | void gcov_iter_free(struct gcov_iterator *iter) | ||
265 | { | ||
266 | kfree(iter); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * gcov_iter_get_info - return profiling data set for given file iterator | ||
271 | * @iter: file iterator | ||
272 | */ | ||
273 | struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter) | ||
274 | { | ||
275 | return iter->info; | ||
276 | } | ||
277 | |||
278 | /** | ||
279 | * gcov_iter_start - reset file iterator to starting position | ||
280 | * @iter: file iterator | ||
281 | */ | ||
282 | void gcov_iter_start(struct gcov_iterator *iter) | ||
283 | { | ||
284 | int i; | ||
285 | |||
286 | iter->record = 0; | ||
287 | iter->function = 0; | ||
288 | iter->type = 0; | ||
289 | iter->count = 0; | ||
290 | iter->num_types = 0; | ||
291 | for (i = 0; i < GCOV_COUNTERS; i++) { | ||
292 | if (counter_active(iter->info, i)) { | ||
293 | iter->type_info[iter->num_types].ctr_type = i; | ||
294 | iter->type_info[iter->num_types++].offset = 0; | ||
295 | } | ||
296 | } | ||
297 | } | ||
298 | |||
299 | /* Mapping of logical record number to actual file content. */ | ||
300 | #define RECORD_FILE_MAGIC 0 | ||
301 | #define RECORD_GCOV_VERSION 1 | ||
302 | #define RECORD_TIME_STAMP 2 | ||
303 | #define RECORD_FUNCTION_TAG 3 | ||
304 | #define RECORD_FUNCTON_TAG_LEN 4 | ||
305 | #define RECORD_FUNCTION_IDENT 5 | ||
306 | #define RECORD_FUNCTION_CHECK 6 | ||
307 | #define RECORD_COUNT_TAG 7 | ||
308 | #define RECORD_COUNT_LEN 8 | ||
309 | #define RECORD_COUNT 9 | ||
310 | |||
311 | /** | ||
312 | * gcov_iter_next - advance file iterator to next logical record | ||
313 | * @iter: file iterator | ||
314 | * | ||
315 | * Return zero if new position is valid, non-zero if iterator has reached end. | ||
316 | */ | ||
317 | int gcov_iter_next(struct gcov_iterator *iter) | ||
318 | { | ||
319 | switch (iter->record) { | ||
320 | case RECORD_FILE_MAGIC: | ||
321 | case RECORD_GCOV_VERSION: | ||
322 | case RECORD_FUNCTION_TAG: | ||
323 | case RECORD_FUNCTON_TAG_LEN: | ||
324 | case RECORD_FUNCTION_IDENT: | ||
325 | case RECORD_COUNT_TAG: | ||
326 | /* Advance to next record */ | ||
327 | iter->record++; | ||
328 | break; | ||
329 | case RECORD_COUNT: | ||
330 | /* Advance to next count */ | ||
331 | iter->count++; | ||
332 | /* fall through */ | ||
333 | case RECORD_COUNT_LEN: | ||
334 | if (iter->count < get_func(iter)->n_ctrs[iter->type]) { | ||
335 | iter->record = 9; | ||
336 | break; | ||
337 | } | ||
338 | /* Advance to next counter type */ | ||
339 | get_type(iter)->offset += iter->count; | ||
340 | iter->count = 0; | ||
341 | iter->type++; | ||
342 | /* fall through */ | ||
343 | case RECORD_FUNCTION_CHECK: | ||
344 | if (iter->type < iter->num_types) { | ||
345 | iter->record = 7; | ||
346 | break; | ||
347 | } | ||
348 | /* Advance to next function */ | ||
349 | iter->type = 0; | ||
350 | iter->function++; | ||
351 | /* fall through */ | ||
352 | case RECORD_TIME_STAMP: | ||
353 | if (iter->function < iter->info->n_functions) | ||
354 | iter->record = 3; | ||
355 | else | ||
356 | iter->record = -1; | ||
357 | break; | ||
358 | } | ||
359 | /* Check for EOF. */ | ||
360 | if (iter->record == -1) | ||
361 | return -EINVAL; | ||
362 | else | ||
363 | return 0; | ||
364 | } | ||
365 | |||
366 | /** | ||
367 | * seq_write_gcov_u32 - write 32 bit number in gcov format to seq_file | ||
368 | * @seq: seq_file handle | ||
369 | * @v: value to be stored | ||
370 | * | ||
371 | * Number format defined by gcc: numbers are recorded in the 32 bit | ||
372 | * unsigned binary form of the endianness of the machine generating the | ||
373 | * file. | ||
374 | */ | ||
375 | static int seq_write_gcov_u32(struct seq_file *seq, u32 v) | ||
376 | { | ||
377 | return seq_write(seq, &v, sizeof(v)); | ||
378 | } | ||
379 | |||
380 | /** | ||
381 | * seq_write_gcov_u64 - write 64 bit number in gcov format to seq_file | ||
382 | * @seq: seq_file handle | ||
383 | * @v: value to be stored | ||
384 | * | ||
385 | * Number format defined by gcc: numbers are recorded in the 32 bit | ||
386 | * unsigned binary form of the endianness of the machine generating the | ||
387 | * file. 64 bit numbers are stored as two 32 bit numbers, the low part | ||
388 | * first. | ||
389 | */ | ||
390 | static int seq_write_gcov_u64(struct seq_file *seq, u64 v) | ||
391 | { | ||
392 | u32 data[2]; | ||
393 | |||
394 | data[0] = (v & 0xffffffffUL); | ||
395 | data[1] = (v >> 32); | ||
396 | return seq_write(seq, data, sizeof(data)); | ||
397 | } | ||
398 | |||
399 | /** | ||
400 | * gcov_iter_write - write data for current pos to seq_file | ||
401 | * @iter: file iterator | ||
402 | * @seq: seq_file handle | ||
403 | * | ||
404 | * Return zero on success, non-zero otherwise. | ||
405 | */ | ||
406 | int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq) | ||
407 | { | ||
408 | int rc = -EINVAL; | ||
409 | |||
410 | switch (iter->record) { | ||
411 | case RECORD_FILE_MAGIC: | ||
412 | rc = seq_write_gcov_u32(seq, GCOV_DATA_MAGIC); | ||
413 | break; | ||
414 | case RECORD_GCOV_VERSION: | ||
415 | rc = seq_write_gcov_u32(seq, iter->info->version); | ||
416 | break; | ||
417 | case RECORD_TIME_STAMP: | ||
418 | rc = seq_write_gcov_u32(seq, iter->info->stamp); | ||
419 | break; | ||
420 | case RECORD_FUNCTION_TAG: | ||
421 | rc = seq_write_gcov_u32(seq, GCOV_TAG_FUNCTION); | ||
422 | break; | ||
423 | case RECORD_FUNCTON_TAG_LEN: | ||
424 | rc = seq_write_gcov_u32(seq, 2); | ||
425 | break; | ||
426 | case RECORD_FUNCTION_IDENT: | ||
427 | rc = seq_write_gcov_u32(seq, get_func(iter)->ident); | ||
428 | break; | ||
429 | case RECORD_FUNCTION_CHECK: | ||
430 | rc = seq_write_gcov_u32(seq, get_func(iter)->checksum); | ||
431 | break; | ||
432 | case RECORD_COUNT_TAG: | ||
433 | rc = seq_write_gcov_u32(seq, | ||
434 | GCOV_TAG_FOR_COUNTER(get_type(iter)->ctr_type)); | ||
435 | break; | ||
436 | case RECORD_COUNT_LEN: | ||
437 | rc = seq_write_gcov_u32(seq, | ||
438 | get_func(iter)->n_ctrs[iter->type] * 2); | ||
439 | break; | ||
440 | case RECORD_COUNT: | ||
441 | rc = seq_write_gcov_u64(seq, | ||
442 | iter->info->counts[iter->type]. | ||
443 | values[iter->count + get_type(iter)->offset]); | ||
444 | break; | ||
445 | } | ||
446 | return rc; | ||
447 | } | ||
diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h new file mode 100644 index 000000000000..060073ebf7a6 --- /dev/null +++ b/kernel/gcov/gcov.h | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | * Profiling infrastructure declarations. | ||
3 | * | ||
4 | * This file is based on gcc-internal definitions. Data structures are | ||
5 | * defined to be compatible with gcc counterparts. For a better | ||
6 | * understanding, refer to gcc source: gcc/gcov-io.h. | ||
7 | * | ||
8 | * Copyright IBM Corp. 2009 | ||
9 | * Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com> | ||
10 | * | ||
11 | * Uses gcc-internal data definitions. | ||
12 | */ | ||
13 | |||
14 | #ifndef GCOV_H | ||
15 | #define GCOV_H GCOV_H | ||
16 | |||
17 | #include <linux/types.h> | ||
18 | |||
19 | /* | ||
20 | * Profiling data types used for gcc 3.4 and above - these are defined by | ||
21 | * gcc and need to be kept as close to the original definition as possible to | ||
22 | * remain compatible. | ||
23 | */ | ||
24 | #define GCOV_COUNTERS 5 | ||
25 | #define GCOV_DATA_MAGIC ((unsigned int) 0x67636461) | ||
26 | #define GCOV_TAG_FUNCTION ((unsigned int) 0x01000000) | ||
27 | #define GCOV_TAG_COUNTER_BASE ((unsigned int) 0x01a10000) | ||
28 | #define GCOV_TAG_FOR_COUNTER(count) \ | ||
29 | (GCOV_TAG_COUNTER_BASE + ((unsigned int) (count) << 17)) | ||
30 | |||
31 | #if BITS_PER_LONG >= 64 | ||
32 | typedef long gcov_type; | ||
33 | #else | ||
34 | typedef long long gcov_type; | ||
35 | #endif | ||
36 | |||
37 | /** | ||
38 | * struct gcov_fn_info - profiling meta data per function | ||
39 | * @ident: object file-unique function identifier | ||
40 | * @checksum: function checksum | ||
41 | * @n_ctrs: number of values per counter type belonging to this function | ||
42 | * | ||
43 | * This data is generated by gcc during compilation and doesn't change | ||
44 | * at run-time. | ||
45 | */ | ||
46 | struct gcov_fn_info { | ||
47 | unsigned int ident; | ||
48 | unsigned int checksum; | ||
49 | unsigned int n_ctrs[0]; | ||
50 | }; | ||
51 | |||
52 | /** | ||
53 | * struct gcov_ctr_info - profiling data per counter type | ||
54 | * @num: number of counter values for this type | ||
55 | * @values: array of counter values for this type | ||
56 | * @merge: merge function for counter values of this type (unused) | ||
57 | * | ||
58 | * This data is generated by gcc during compilation and doesn't change | ||
59 | * at run-time with the exception of the values array. | ||
60 | */ | ||
61 | struct gcov_ctr_info { | ||
62 | unsigned int num; | ||
63 | gcov_type *values; | ||
64 | void (*merge)(gcov_type *, unsigned int); | ||
65 | }; | ||
66 | |||
67 | /** | ||
68 | * struct gcov_info - profiling data per object file | ||
69 | * @version: gcov version magic indicating the gcc version used for compilation | ||
70 | * @next: list head for a singly-linked list | ||
71 | * @stamp: time stamp | ||
72 | * @filename: name of the associated gcov data file | ||
73 | * @n_functions: number of instrumented functions | ||
74 | * @functions: function data | ||
75 | * @ctr_mask: mask specifying which counter types are active | ||
76 | * @counts: counter data per counter type | ||
77 | * | ||
78 | * This data is generated by gcc during compilation and doesn't change | ||
79 | * at run-time with the exception of the next pointer. | ||
80 | */ | ||
81 | struct gcov_info { | ||
82 | unsigned int version; | ||
83 | struct gcov_info *next; | ||
84 | unsigned int stamp; | ||
85 | const char *filename; | ||
86 | unsigned int n_functions; | ||
87 | const struct gcov_fn_info *functions; | ||
88 | unsigned int ctr_mask; | ||
89 | struct gcov_ctr_info counts[0]; | ||
90 | }; | ||
91 | |||
92 | /* Base interface. */ | ||
93 | enum gcov_action { | ||
94 | GCOV_ADD, | ||
95 | GCOV_REMOVE, | ||
96 | }; | ||
97 | |||
98 | void gcov_event(enum gcov_action action, struct gcov_info *info); | ||
99 | void gcov_enable_events(void); | ||
100 | |||
101 | /* Iterator control. */ | ||
102 | struct seq_file; | ||
103 | struct gcov_iterator; | ||
104 | |||
105 | struct gcov_iterator *gcov_iter_new(struct gcov_info *info); | ||
106 | void gcov_iter_free(struct gcov_iterator *iter); | ||
107 | void gcov_iter_start(struct gcov_iterator *iter); | ||
108 | int gcov_iter_next(struct gcov_iterator *iter); | ||
109 | int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq); | ||
110 | struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter); | ||
111 | |||
112 | /* gcov_info control. */ | ||
113 | void gcov_info_reset(struct gcov_info *info); | ||
114 | int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2); | ||
115 | void gcov_info_add(struct gcov_info *dest, struct gcov_info *source); | ||
116 | struct gcov_info *gcov_info_dup(struct gcov_info *info); | ||
117 | void gcov_info_free(struct gcov_info *info); | ||
118 | |||
119 | struct gcov_link { | ||
120 | enum { | ||
121 | OBJ_TREE, | ||
122 | SRC_TREE, | ||
123 | } dir; | ||
124 | const char *ext; | ||
125 | }; | ||
126 | extern const struct gcov_link gcov_link[]; | ||
127 | |||
128 | #endif /* GCOV_H */ | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index aaf5c9d05770..50da67672901 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -856,7 +856,7 @@ EXPORT_SYMBOL(free_irq); | |||
856 | * still called in hard interrupt context and has to check | 856 | * still called in hard interrupt context and has to check |
857 | * whether the interrupt originates from the device. If yes it | 857 | * whether the interrupt originates from the device. If yes it |
858 | * needs to disable the interrupt on the device and return | 858 | * needs to disable the interrupt on the device and return |
859 | * IRQ_THREAD_WAKE which will wake up the handler thread and run | 859 | * IRQ_WAKE_THREAD which will wake up the handler thread and run |
860 | * @thread_fn. This split handler design is necessary to support | 860 | * @thread_fn. This split handler design is necessary to support |
861 | * shared interrupts. | 861 | * shared interrupts. |
862 | * | 862 | * |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 7fa441333529..9b1a7de26979 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -27,7 +27,6 @@ struct kthread_create_info | |||
27 | /* Information passed to kthread() from kthreadd. */ | 27 | /* Information passed to kthread() from kthreadd. */ |
28 | int (*threadfn)(void *data); | 28 | int (*threadfn)(void *data); |
29 | void *data; | 29 | void *data; |
30 | struct completion started; | ||
31 | 30 | ||
32 | /* Result passed back to kthread_create() from kthreadd. */ | 31 | /* Result passed back to kthread_create() from kthreadd. */ |
33 | struct task_struct *result; | 32 | struct task_struct *result; |
@@ -36,17 +35,13 @@ struct kthread_create_info | |||
36 | struct list_head list; | 35 | struct list_head list; |
37 | }; | 36 | }; |
38 | 37 | ||
39 | struct kthread_stop_info | 38 | struct kthread { |
40 | { | 39 | int should_stop; |
41 | struct task_struct *k; | 40 | struct completion exited; |
42 | int err; | ||
43 | struct completion done; | ||
44 | }; | 41 | }; |
45 | 42 | ||
46 | /* Thread stopping is done by setthing this var: lock serializes | 43 | #define to_kthread(tsk) \ |
47 | * multiple kthread_stop calls. */ | 44 | container_of((tsk)->vfork_done, struct kthread, exited) |
48 | static DEFINE_MUTEX(kthread_stop_lock); | ||
49 | static struct kthread_stop_info kthread_stop_info; | ||
50 | 45 | ||
51 | /** | 46 | /** |
52 | * kthread_should_stop - should this kthread return now? | 47 | * kthread_should_stop - should this kthread return now? |
@@ -57,36 +52,35 @@ static struct kthread_stop_info kthread_stop_info; | |||
57 | */ | 52 | */ |
58 | int kthread_should_stop(void) | 53 | int kthread_should_stop(void) |
59 | { | 54 | { |
60 | return (kthread_stop_info.k == current); | 55 | return to_kthread(current)->should_stop; |
61 | } | 56 | } |
62 | EXPORT_SYMBOL(kthread_should_stop); | 57 | EXPORT_SYMBOL(kthread_should_stop); |
63 | 58 | ||
64 | static int kthread(void *_create) | 59 | static int kthread(void *_create) |
65 | { | 60 | { |
61 | /* Copy data: it's on kthread's stack */ | ||
66 | struct kthread_create_info *create = _create; | 62 | struct kthread_create_info *create = _create; |
67 | int (*threadfn)(void *data); | 63 | int (*threadfn)(void *data) = create->threadfn; |
68 | void *data; | 64 | void *data = create->data; |
69 | int ret = -EINTR; | 65 | struct kthread self; |
66 | int ret; | ||
70 | 67 | ||
71 | /* Copy data: it's on kthread's stack */ | 68 | self.should_stop = 0; |
72 | threadfn = create->threadfn; | 69 | init_completion(&self.exited); |
73 | data = create->data; | 70 | current->vfork_done = &self.exited; |
74 | 71 | ||
75 | /* OK, tell user we're spawned, wait for stop or wakeup */ | 72 | /* OK, tell user we're spawned, wait for stop or wakeup */ |
76 | __set_current_state(TASK_UNINTERRUPTIBLE); | 73 | __set_current_state(TASK_UNINTERRUPTIBLE); |
77 | create->result = current; | 74 | create->result = current; |
78 | complete(&create->started); | 75 | complete(&create->done); |
79 | schedule(); | 76 | schedule(); |
80 | 77 | ||
81 | if (!kthread_should_stop()) | 78 | ret = -EINTR; |
79 | if (!self.should_stop) | ||
82 | ret = threadfn(data); | 80 | ret = threadfn(data); |
83 | 81 | ||
84 | /* It might have exited on its own, w/o kthread_stop. Check. */ | 82 | /* we can't just return, we must preserve "self" on stack */ |
85 | if (kthread_should_stop()) { | 83 | do_exit(ret); |
86 | kthread_stop_info.err = ret; | ||
87 | complete(&kthread_stop_info.done); | ||
88 | } | ||
89 | return 0; | ||
90 | } | 84 | } |
91 | 85 | ||
92 | static void create_kthread(struct kthread_create_info *create) | 86 | static void create_kthread(struct kthread_create_info *create) |
@@ -95,11 +89,10 @@ static void create_kthread(struct kthread_create_info *create) | |||
95 | 89 | ||
96 | /* We want our own signal handler (we take no signals by default). */ | 90 | /* We want our own signal handler (we take no signals by default). */ |
97 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); | 91 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); |
98 | if (pid < 0) | 92 | if (pid < 0) { |
99 | create->result = ERR_PTR(pid); | 93 | create->result = ERR_PTR(pid); |
100 | else | 94 | complete(&create->done); |
101 | wait_for_completion(&create->started); | 95 | } |
102 | complete(&create->done); | ||
103 | } | 96 | } |
104 | 97 | ||
105 | /** | 98 | /** |
@@ -130,7 +123,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
130 | 123 | ||
131 | create.threadfn = threadfn; | 124 | create.threadfn = threadfn; |
132 | create.data = data; | 125 | create.data = data; |
133 | init_completion(&create.started); | ||
134 | init_completion(&create.done); | 126 | init_completion(&create.done); |
135 | 127 | ||
136 | spin_lock(&kthread_create_lock); | 128 | spin_lock(&kthread_create_lock); |
@@ -198,30 +190,22 @@ EXPORT_SYMBOL(kthread_bind); | |||
198 | */ | 190 | */ |
199 | int kthread_stop(struct task_struct *k) | 191 | int kthread_stop(struct task_struct *k) |
200 | { | 192 | { |
193 | struct kthread *kthread; | ||
201 | int ret; | 194 | int ret; |
202 | 195 | ||
203 | mutex_lock(&kthread_stop_lock); | ||
204 | |||
205 | /* It could exit after stop_info.k set, but before wake_up_process. */ | ||
206 | get_task_struct(k); | ||
207 | |||
208 | trace_sched_kthread_stop(k); | 196 | trace_sched_kthread_stop(k); |
197 | get_task_struct(k); | ||
209 | 198 | ||
210 | /* Must init completion *before* thread sees kthread_stop_info.k */ | 199 | kthread = to_kthread(k); |
211 | init_completion(&kthread_stop_info.done); | 200 | barrier(); /* it might have exited */ |
212 | smp_wmb(); | 201 | if (k->vfork_done != NULL) { |
202 | kthread->should_stop = 1; | ||
203 | wake_up_process(k); | ||
204 | wait_for_completion(&kthread->exited); | ||
205 | } | ||
206 | ret = k->exit_code; | ||
213 | 207 | ||
214 | /* Now set kthread_should_stop() to true, and wake it up. */ | ||
215 | kthread_stop_info.k = k; | ||
216 | wake_up_process(k); | ||
217 | put_task_struct(k); | 208 | put_task_struct(k); |
218 | |||
219 | /* Once it dies, reset stop ptr, gather result and we're done. */ | ||
220 | wait_for_completion(&kthread_stop_info.done); | ||
221 | kthread_stop_info.k = NULL; | ||
222 | ret = kthread_stop_info.err; | ||
223 | mutex_unlock(&kthread_stop_lock); | ||
224 | |||
225 | trace_sched_kthread_stop_ret(ret); | 209 | trace_sched_kthread_stop_ret(ret); |
226 | 210 | ||
227 | return ret; | 211 | return ret; |
diff --git a/kernel/module.c b/kernel/module.c index 215aaab09e91..38928fcaff2b 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2216,6 +2216,10 @@ static noinline struct module *load_module(void __user *umod, | |||
2216 | mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, | 2216 | mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, |
2217 | "__kcrctab_unused_gpl"); | 2217 | "__kcrctab_unused_gpl"); |
2218 | #endif | 2218 | #endif |
2219 | #ifdef CONFIG_CONSTRUCTORS | ||
2220 | mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors", | ||
2221 | sizeof(*mod->ctors), &mod->num_ctors); | ||
2222 | #endif | ||
2219 | 2223 | ||
2220 | #ifdef CONFIG_MARKERS | 2224 | #ifdef CONFIG_MARKERS |
2221 | mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", | 2225 | mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", |
@@ -2389,6 +2393,17 @@ static noinline struct module *load_module(void __user *umod, | |||
2389 | goto free_hdr; | 2393 | goto free_hdr; |
2390 | } | 2394 | } |
2391 | 2395 | ||
2396 | /* Call module constructors. */ | ||
2397 | static void do_mod_ctors(struct module *mod) | ||
2398 | { | ||
2399 | #ifdef CONFIG_CONSTRUCTORS | ||
2400 | unsigned long i; | ||
2401 | |||
2402 | for (i = 0; i < mod->num_ctors; i++) | ||
2403 | mod->ctors[i](); | ||
2404 | #endif | ||
2405 | } | ||
2406 | |||
2392 | /* This is where the real work happens */ | 2407 | /* This is where the real work happens */ |
2393 | SYSCALL_DEFINE3(init_module, void __user *, umod, | 2408 | SYSCALL_DEFINE3(init_module, void __user *, umod, |
2394 | unsigned long, len, const char __user *, uargs) | 2409 | unsigned long, len, const char __user *, uargs) |
@@ -2417,6 +2432,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
2417 | blocking_notifier_call_chain(&module_notify_list, | 2432 | blocking_notifier_call_chain(&module_notify_list, |
2418 | MODULE_STATE_COMING, mod); | 2433 | MODULE_STATE_COMING, mod); |
2419 | 2434 | ||
2435 | do_mod_ctors(mod); | ||
2420 | /* Start the module */ | 2436 | /* Start the module */ |
2421 | if (mod->init != NULL) | 2437 | if (mod->init != NULL) |
2422 | ret = do_one_initcall(mod->init); | 2438 | ret = do_one_initcall(mod->init); |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 63598dca2d0c..09b4ff9711b2 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -26,19 +26,14 @@ static struct kmem_cache *nsproxy_cachep; | |||
26 | 26 | ||
27 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); | 27 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); |
28 | 28 | ||
29 | /* | 29 | static inline struct nsproxy *create_nsproxy(void) |
30 | * creates a copy of "orig" with refcount 1. | ||
31 | */ | ||
32 | static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig) | ||
33 | { | 30 | { |
34 | struct nsproxy *ns; | 31 | struct nsproxy *nsproxy; |
35 | 32 | ||
36 | ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); | 33 | nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); |
37 | if (ns) { | 34 | if (nsproxy) |
38 | memcpy(ns, orig, sizeof(struct nsproxy)); | 35 | atomic_set(&nsproxy->count, 1); |
39 | atomic_set(&ns->count, 1); | 36 | return nsproxy; |
40 | } | ||
41 | return ns; | ||
42 | } | 37 | } |
43 | 38 | ||
44 | /* | 39 | /* |
@@ -52,7 +47,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, | |||
52 | struct nsproxy *new_nsp; | 47 | struct nsproxy *new_nsp; |
53 | int err; | 48 | int err; |
54 | 49 | ||
55 | new_nsp = clone_nsproxy(tsk->nsproxy); | 50 | new_nsp = create_nsproxy(); |
56 | if (!new_nsp) | 51 | if (!new_nsp) |
57 | return ERR_PTR(-ENOMEM); | 52 | return ERR_PTR(-ENOMEM); |
58 | 53 | ||
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 29b685f551aa..1a933a221ea4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -124,7 +124,7 @@ void perf_enable(void) | |||
124 | 124 | ||
125 | static void get_ctx(struct perf_counter_context *ctx) | 125 | static void get_ctx(struct perf_counter_context *ctx) |
126 | { | 126 | { |
127 | atomic_inc(&ctx->refcount); | 127 | WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); |
128 | } | 128 | } |
129 | 129 | ||
130 | static void free_ctx(struct rcu_head *head) | 130 | static void free_ctx(struct rcu_head *head) |
@@ -175,6 +175,11 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) | |||
175 | spin_unlock_irqrestore(&ctx->lock, *flags); | 175 | spin_unlock_irqrestore(&ctx->lock, *flags); |
176 | goto retry; | 176 | goto retry; |
177 | } | 177 | } |
178 | |||
179 | if (!atomic_inc_not_zero(&ctx->refcount)) { | ||
180 | spin_unlock_irqrestore(&ctx->lock, *flags); | ||
181 | ctx = NULL; | ||
182 | } | ||
178 | } | 183 | } |
179 | rcu_read_unlock(); | 184 | rcu_read_unlock(); |
180 | return ctx; | 185 | return ctx; |
@@ -193,7 +198,6 @@ static struct perf_counter_context *perf_pin_task_context(struct task_struct *ta | |||
193 | ctx = perf_lock_task_context(task, &flags); | 198 | ctx = perf_lock_task_context(task, &flags); |
194 | if (ctx) { | 199 | if (ctx) { |
195 | ++ctx->pin_count; | 200 | ++ctx->pin_count; |
196 | get_ctx(ctx); | ||
197 | spin_unlock_irqrestore(&ctx->lock, flags); | 201 | spin_unlock_irqrestore(&ctx->lock, flags); |
198 | } | 202 | } |
199 | return ctx; | 203 | return ctx; |
@@ -1283,7 +1287,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) | |||
1283 | if (!interrupts) { | 1287 | if (!interrupts) { |
1284 | perf_disable(); | 1288 | perf_disable(); |
1285 | counter->pmu->disable(counter); | 1289 | counter->pmu->disable(counter); |
1286 | atomic_set(&hwc->period_left, 0); | 1290 | atomic64_set(&hwc->period_left, 0); |
1287 | counter->pmu->enable(counter); | 1291 | counter->pmu->enable(counter); |
1288 | perf_enable(); | 1292 | perf_enable(); |
1289 | } | 1293 | } |
@@ -1459,11 +1463,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) | |||
1459 | put_ctx(parent_ctx); | 1463 | put_ctx(parent_ctx); |
1460 | ctx->parent_ctx = NULL; /* no longer a clone */ | 1464 | ctx->parent_ctx = NULL; /* no longer a clone */ |
1461 | } | 1465 | } |
1462 | /* | ||
1463 | * Get an extra reference before dropping the lock so that | ||
1464 | * this context won't get freed if the task exits. | ||
1465 | */ | ||
1466 | get_ctx(ctx); | ||
1467 | spin_unlock_irqrestore(&ctx->lock, flags); | 1466 | spin_unlock_irqrestore(&ctx->lock, flags); |
1468 | } | 1467 | } |
1469 | 1468 | ||
@@ -1553,7 +1552,7 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1553 | static ssize_t | 1552 | static ssize_t |
1554 | perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | 1553 | perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) |
1555 | { | 1554 | { |
1556 | u64 values[3]; | 1555 | u64 values[4]; |
1557 | int n; | 1556 | int n; |
1558 | 1557 | ||
1559 | /* | 1558 | /* |
@@ -1620,22 +1619,6 @@ static void perf_counter_reset(struct perf_counter *counter) | |||
1620 | perf_counter_update_userpage(counter); | 1619 | perf_counter_update_userpage(counter); |
1621 | } | 1620 | } |
1622 | 1621 | ||
1623 | static void perf_counter_for_each_sibling(struct perf_counter *counter, | ||
1624 | void (*func)(struct perf_counter *)) | ||
1625 | { | ||
1626 | struct perf_counter_context *ctx = counter->ctx; | ||
1627 | struct perf_counter *sibling; | ||
1628 | |||
1629 | WARN_ON_ONCE(ctx->parent_ctx); | ||
1630 | mutex_lock(&ctx->mutex); | ||
1631 | counter = counter->group_leader; | ||
1632 | |||
1633 | func(counter); | ||
1634 | list_for_each_entry(sibling, &counter->sibling_list, list_entry) | ||
1635 | func(sibling); | ||
1636 | mutex_unlock(&ctx->mutex); | ||
1637 | } | ||
1638 | |||
1639 | /* | 1622 | /* |
1640 | * Holding the top-level counter's child_mutex means that any | 1623 | * Holding the top-level counter's child_mutex means that any |
1641 | * descendant process that has inherited this counter will block | 1624 | * descendant process that has inherited this counter will block |
@@ -1658,14 +1641,18 @@ static void perf_counter_for_each_child(struct perf_counter *counter, | |||
1658 | static void perf_counter_for_each(struct perf_counter *counter, | 1641 | static void perf_counter_for_each(struct perf_counter *counter, |
1659 | void (*func)(struct perf_counter *)) | 1642 | void (*func)(struct perf_counter *)) |
1660 | { | 1643 | { |
1661 | struct perf_counter *child; | 1644 | struct perf_counter_context *ctx = counter->ctx; |
1645 | struct perf_counter *sibling; | ||
1662 | 1646 | ||
1663 | WARN_ON_ONCE(counter->ctx->parent_ctx); | 1647 | WARN_ON_ONCE(ctx->parent_ctx); |
1664 | mutex_lock(&counter->child_mutex); | 1648 | mutex_lock(&ctx->mutex); |
1665 | perf_counter_for_each_sibling(counter, func); | 1649 | counter = counter->group_leader; |
1666 | list_for_each_entry(child, &counter->child_list, child_list) | 1650 | |
1667 | perf_counter_for_each_sibling(child, func); | 1651 | perf_counter_for_each_child(counter, func); |
1668 | mutex_unlock(&counter->child_mutex); | 1652 | func(counter); |
1653 | list_for_each_entry(sibling, &counter->sibling_list, list_entry) | ||
1654 | perf_counter_for_each_child(counter, func); | ||
1655 | mutex_unlock(&ctx->mutex); | ||
1669 | } | 1656 | } |
1670 | 1657 | ||
1671 | static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) | 1658 | static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) |
@@ -1806,6 +1793,12 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1806 | struct perf_mmap_data *data; | 1793 | struct perf_mmap_data *data; |
1807 | int ret = VM_FAULT_SIGBUS; | 1794 | int ret = VM_FAULT_SIGBUS; |
1808 | 1795 | ||
1796 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
1797 | if (vmf->pgoff == 0) | ||
1798 | ret = 0; | ||
1799 | return ret; | ||
1800 | } | ||
1801 | |||
1809 | rcu_read_lock(); | 1802 | rcu_read_lock(); |
1810 | data = rcu_dereference(counter->data); | 1803 | data = rcu_dereference(counter->data); |
1811 | if (!data) | 1804 | if (!data) |
@@ -1819,9 +1812,16 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1819 | if ((unsigned)nr > data->nr_pages) | 1812 | if ((unsigned)nr > data->nr_pages) |
1820 | goto unlock; | 1813 | goto unlock; |
1821 | 1814 | ||
1815 | if (vmf->flags & FAULT_FLAG_WRITE) | ||
1816 | goto unlock; | ||
1817 | |||
1822 | vmf->page = virt_to_page(data->data_pages[nr]); | 1818 | vmf->page = virt_to_page(data->data_pages[nr]); |
1823 | } | 1819 | } |
1820 | |||
1824 | get_page(vmf->page); | 1821 | get_page(vmf->page); |
1822 | vmf->page->mapping = vma->vm_file->f_mapping; | ||
1823 | vmf->page->index = vmf->pgoff; | ||
1824 | |||
1825 | ret = 0; | 1825 | ret = 0; |
1826 | unlock: | 1826 | unlock: |
1827 | rcu_read_unlock(); | 1827 | rcu_read_unlock(); |
@@ -1874,6 +1874,14 @@ fail: | |||
1874 | return -ENOMEM; | 1874 | return -ENOMEM; |
1875 | } | 1875 | } |
1876 | 1876 | ||
1877 | static void perf_mmap_free_page(unsigned long addr) | ||
1878 | { | ||
1879 | struct page *page = virt_to_page(addr); | ||
1880 | |||
1881 | page->mapping = NULL; | ||
1882 | __free_page(page); | ||
1883 | } | ||
1884 | |||
1877 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) | 1885 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) |
1878 | { | 1886 | { |
1879 | struct perf_mmap_data *data; | 1887 | struct perf_mmap_data *data; |
@@ -1881,9 +1889,10 @@ static void __perf_mmap_data_free(struct rcu_head *rcu_head) | |||
1881 | 1889 | ||
1882 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | 1890 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); |
1883 | 1891 | ||
1884 | free_page((unsigned long)data->user_page); | 1892 | perf_mmap_free_page((unsigned long)data->user_page); |
1885 | for (i = 0; i < data->nr_pages; i++) | 1893 | for (i = 0; i < data->nr_pages; i++) |
1886 | free_page((unsigned long)data->data_pages[i]); | 1894 | perf_mmap_free_page((unsigned long)data->data_pages[i]); |
1895 | |||
1887 | kfree(data); | 1896 | kfree(data); |
1888 | } | 1897 | } |
1889 | 1898 | ||
@@ -1920,9 +1929,10 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
1920 | } | 1929 | } |
1921 | 1930 | ||
1922 | static struct vm_operations_struct perf_mmap_vmops = { | 1931 | static struct vm_operations_struct perf_mmap_vmops = { |
1923 | .open = perf_mmap_open, | 1932 | .open = perf_mmap_open, |
1924 | .close = perf_mmap_close, | 1933 | .close = perf_mmap_close, |
1925 | .fault = perf_mmap_fault, | 1934 | .fault = perf_mmap_fault, |
1935 | .page_mkwrite = perf_mmap_fault, | ||
1926 | }; | 1936 | }; |
1927 | 1937 | ||
1928 | static int perf_mmap(struct file *file, struct vm_area_struct *vma) | 1938 | static int perf_mmap(struct file *file, struct vm_area_struct *vma) |
@@ -1936,7 +1946,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
1936 | long user_extra, extra; | 1946 | long user_extra, extra; |
1937 | int ret = 0; | 1947 | int ret = 0; |
1938 | 1948 | ||
1939 | if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) | 1949 | if (!(vma->vm_flags & VM_SHARED)) |
1940 | return -EINVAL; | 1950 | return -EINVAL; |
1941 | 1951 | ||
1942 | vma_size = vma->vm_end - vma->vm_start; | 1952 | vma_size = vma->vm_end - vma->vm_start; |
@@ -1995,10 +2005,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
1995 | atomic_long_add(user_extra, &user->locked_vm); | 2005 | atomic_long_add(user_extra, &user->locked_vm); |
1996 | vma->vm_mm->locked_vm += extra; | 2006 | vma->vm_mm->locked_vm += extra; |
1997 | counter->data->nr_locked = extra; | 2007 | counter->data->nr_locked = extra; |
2008 | if (vma->vm_flags & VM_WRITE) | ||
2009 | counter->data->writable = 1; | ||
2010 | |||
1998 | unlock: | 2011 | unlock: |
1999 | mutex_unlock(&counter->mmap_mutex); | 2012 | mutex_unlock(&counter->mmap_mutex); |
2000 | 2013 | ||
2001 | vma->vm_flags &= ~VM_MAYWRITE; | ||
2002 | vma->vm_flags |= VM_RESERVED; | 2014 | vma->vm_flags |= VM_RESERVED; |
2003 | vma->vm_ops = &perf_mmap_vmops; | 2015 | vma->vm_ops = &perf_mmap_vmops; |
2004 | 2016 | ||
@@ -2175,11 +2187,38 @@ struct perf_output_handle { | |||
2175 | unsigned long head; | 2187 | unsigned long head; |
2176 | unsigned long offset; | 2188 | unsigned long offset; |
2177 | int nmi; | 2189 | int nmi; |
2178 | int overflow; | 2190 | int sample; |
2179 | int locked; | 2191 | int locked; |
2180 | unsigned long flags; | 2192 | unsigned long flags; |
2181 | }; | 2193 | }; |
2182 | 2194 | ||
2195 | static bool perf_output_space(struct perf_mmap_data *data, | ||
2196 | unsigned int offset, unsigned int head) | ||
2197 | { | ||
2198 | unsigned long tail; | ||
2199 | unsigned long mask; | ||
2200 | |||
2201 | if (!data->writable) | ||
2202 | return true; | ||
2203 | |||
2204 | mask = (data->nr_pages << PAGE_SHIFT) - 1; | ||
2205 | /* | ||
2206 | * Userspace could choose to issue a mb() before updating the tail | ||
2207 | * pointer. So that all reads will be completed before the write is | ||
2208 | * issued. | ||
2209 | */ | ||
2210 | tail = ACCESS_ONCE(data->user_page->data_tail); | ||
2211 | smp_rmb(); | ||
2212 | |||
2213 | offset = (offset - tail) & mask; | ||
2214 | head = (head - tail) & mask; | ||
2215 | |||
2216 | if ((int)(head - offset) < 0) | ||
2217 | return false; | ||
2218 | |||
2219 | return true; | ||
2220 | } | ||
2221 | |||
2183 | static void perf_output_wakeup(struct perf_output_handle *handle) | 2222 | static void perf_output_wakeup(struct perf_output_handle *handle) |
2184 | { | 2223 | { |
2185 | atomic_set(&handle->data->poll, POLL_IN); | 2224 | atomic_set(&handle->data->poll, POLL_IN); |
@@ -2270,12 +2309,57 @@ out: | |||
2270 | local_irq_restore(handle->flags); | 2309 | local_irq_restore(handle->flags); |
2271 | } | 2310 | } |
2272 | 2311 | ||
2312 | static void perf_output_copy(struct perf_output_handle *handle, | ||
2313 | const void *buf, unsigned int len) | ||
2314 | { | ||
2315 | unsigned int pages_mask; | ||
2316 | unsigned int offset; | ||
2317 | unsigned int size; | ||
2318 | void **pages; | ||
2319 | |||
2320 | offset = handle->offset; | ||
2321 | pages_mask = handle->data->nr_pages - 1; | ||
2322 | pages = handle->data->data_pages; | ||
2323 | |||
2324 | do { | ||
2325 | unsigned int page_offset; | ||
2326 | int nr; | ||
2327 | |||
2328 | nr = (offset >> PAGE_SHIFT) & pages_mask; | ||
2329 | page_offset = offset & (PAGE_SIZE - 1); | ||
2330 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | ||
2331 | |||
2332 | memcpy(pages[nr] + page_offset, buf, size); | ||
2333 | |||
2334 | len -= size; | ||
2335 | buf += size; | ||
2336 | offset += size; | ||
2337 | } while (len); | ||
2338 | |||
2339 | handle->offset = offset; | ||
2340 | |||
2341 | /* | ||
2342 | * Check we didn't copy past our reservation window, taking the | ||
2343 | * possible unsigned int wrap into account. | ||
2344 | */ | ||
2345 | WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); | ||
2346 | } | ||
2347 | |||
2348 | #define perf_output_put(handle, x) \ | ||
2349 | perf_output_copy((handle), &(x), sizeof(x)) | ||
2350 | |||
2273 | static int perf_output_begin(struct perf_output_handle *handle, | 2351 | static int perf_output_begin(struct perf_output_handle *handle, |
2274 | struct perf_counter *counter, unsigned int size, | 2352 | struct perf_counter *counter, unsigned int size, |
2275 | int nmi, int overflow) | 2353 | int nmi, int sample) |
2276 | { | 2354 | { |
2277 | struct perf_mmap_data *data; | 2355 | struct perf_mmap_data *data; |
2278 | unsigned int offset, head; | 2356 | unsigned int offset, head; |
2357 | int have_lost; | ||
2358 | struct { | ||
2359 | struct perf_event_header header; | ||
2360 | u64 id; | ||
2361 | u64 lost; | ||
2362 | } lost_event; | ||
2279 | 2363 | ||
2280 | /* | 2364 | /* |
2281 | * For inherited counters we send all the output towards the parent. | 2365 | * For inherited counters we send all the output towards the parent. |
@@ -2288,19 +2372,25 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
2288 | if (!data) | 2372 | if (!data) |
2289 | goto out; | 2373 | goto out; |
2290 | 2374 | ||
2291 | handle->data = data; | 2375 | handle->data = data; |
2292 | handle->counter = counter; | 2376 | handle->counter = counter; |
2293 | handle->nmi = nmi; | 2377 | handle->nmi = nmi; |
2294 | handle->overflow = overflow; | 2378 | handle->sample = sample; |
2295 | 2379 | ||
2296 | if (!data->nr_pages) | 2380 | if (!data->nr_pages) |
2297 | goto fail; | 2381 | goto fail; |
2298 | 2382 | ||
2383 | have_lost = atomic_read(&data->lost); | ||
2384 | if (have_lost) | ||
2385 | size += sizeof(lost_event); | ||
2386 | |||
2299 | perf_output_lock(handle); | 2387 | perf_output_lock(handle); |
2300 | 2388 | ||
2301 | do { | 2389 | do { |
2302 | offset = head = atomic_long_read(&data->head); | 2390 | offset = head = atomic_long_read(&data->head); |
2303 | head += size; | 2391 | head += size; |
2392 | if (unlikely(!perf_output_space(data, offset, head))) | ||
2393 | goto fail; | ||
2304 | } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); | 2394 | } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); |
2305 | 2395 | ||
2306 | handle->offset = offset; | 2396 | handle->offset = offset; |
@@ -2309,55 +2399,27 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
2309 | if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) | 2399 | if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) |
2310 | atomic_set(&data->wakeup, 1); | 2400 | atomic_set(&data->wakeup, 1); |
2311 | 2401 | ||
2402 | if (have_lost) { | ||
2403 | lost_event.header.type = PERF_EVENT_LOST; | ||
2404 | lost_event.header.misc = 0; | ||
2405 | lost_event.header.size = sizeof(lost_event); | ||
2406 | lost_event.id = counter->id; | ||
2407 | lost_event.lost = atomic_xchg(&data->lost, 0); | ||
2408 | |||
2409 | perf_output_put(handle, lost_event); | ||
2410 | } | ||
2411 | |||
2312 | return 0; | 2412 | return 0; |
2313 | 2413 | ||
2314 | fail: | 2414 | fail: |
2315 | perf_output_wakeup(handle); | 2415 | atomic_inc(&data->lost); |
2416 | perf_output_unlock(handle); | ||
2316 | out: | 2417 | out: |
2317 | rcu_read_unlock(); | 2418 | rcu_read_unlock(); |
2318 | 2419 | ||
2319 | return -ENOSPC; | 2420 | return -ENOSPC; |
2320 | } | 2421 | } |
2321 | 2422 | ||
2322 | static void perf_output_copy(struct perf_output_handle *handle, | ||
2323 | const void *buf, unsigned int len) | ||
2324 | { | ||
2325 | unsigned int pages_mask; | ||
2326 | unsigned int offset; | ||
2327 | unsigned int size; | ||
2328 | void **pages; | ||
2329 | |||
2330 | offset = handle->offset; | ||
2331 | pages_mask = handle->data->nr_pages - 1; | ||
2332 | pages = handle->data->data_pages; | ||
2333 | |||
2334 | do { | ||
2335 | unsigned int page_offset; | ||
2336 | int nr; | ||
2337 | |||
2338 | nr = (offset >> PAGE_SHIFT) & pages_mask; | ||
2339 | page_offset = offset & (PAGE_SIZE - 1); | ||
2340 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | ||
2341 | |||
2342 | memcpy(pages[nr] + page_offset, buf, size); | ||
2343 | |||
2344 | len -= size; | ||
2345 | buf += size; | ||
2346 | offset += size; | ||
2347 | } while (len); | ||
2348 | |||
2349 | handle->offset = offset; | ||
2350 | |||
2351 | /* | ||
2352 | * Check we didn't copy past our reservation window, taking the | ||
2353 | * possible unsigned int wrap into account. | ||
2354 | */ | ||
2355 | WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); | ||
2356 | } | ||
2357 | |||
2358 | #define perf_output_put(handle, x) \ | ||
2359 | perf_output_copy((handle), &(x), sizeof(x)) | ||
2360 | |||
2361 | static void perf_output_end(struct perf_output_handle *handle) | 2423 | static void perf_output_end(struct perf_output_handle *handle) |
2362 | { | 2424 | { |
2363 | struct perf_counter *counter = handle->counter; | 2425 | struct perf_counter *counter = handle->counter; |
@@ -2365,7 +2427,7 @@ static void perf_output_end(struct perf_output_handle *handle) | |||
2365 | 2427 | ||
2366 | int wakeup_events = counter->attr.wakeup_events; | 2428 | int wakeup_events = counter->attr.wakeup_events; |
2367 | 2429 | ||
2368 | if (handle->overflow && wakeup_events) { | 2430 | if (handle->sample && wakeup_events) { |
2369 | int events = atomic_inc_return(&data->events); | 2431 | int events = atomic_inc_return(&data->events); |
2370 | if (events >= wakeup_events) { | 2432 | if (events >= wakeup_events) { |
2371 | atomic_sub(wakeup_events, &data->events); | 2433 | atomic_sub(wakeup_events, &data->events); |
@@ -2970,7 +3032,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable) | |||
2970 | } | 3032 | } |
2971 | 3033 | ||
2972 | /* | 3034 | /* |
2973 | * Generic counter overflow handling. | 3035 | * Generic counter overflow handling, sampling. |
2974 | */ | 3036 | */ |
2975 | 3037 | ||
2976 | int perf_counter_overflow(struct perf_counter *counter, int nmi, | 3038 | int perf_counter_overflow(struct perf_counter *counter, int nmi, |
@@ -3109,20 +3171,15 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) | |||
3109 | } | 3171 | } |
3110 | 3172 | ||
3111 | static void perf_swcounter_overflow(struct perf_counter *counter, | 3173 | static void perf_swcounter_overflow(struct perf_counter *counter, |
3112 | int nmi, struct pt_regs *regs, u64 addr) | 3174 | int nmi, struct perf_sample_data *data) |
3113 | { | 3175 | { |
3114 | struct perf_sample_data data = { | 3176 | data->period = counter->hw.last_period; |
3115 | .regs = regs, | ||
3116 | .addr = addr, | ||
3117 | .period = counter->hw.last_period, | ||
3118 | }; | ||
3119 | 3177 | ||
3120 | perf_swcounter_update(counter); | 3178 | perf_swcounter_update(counter); |
3121 | perf_swcounter_set_period(counter); | 3179 | perf_swcounter_set_period(counter); |
3122 | if (perf_counter_overflow(counter, nmi, &data)) | 3180 | if (perf_counter_overflow(counter, nmi, data)) |
3123 | /* soft-disable the counter */ | 3181 | /* soft-disable the counter */ |
3124 | ; | 3182 | ; |
3125 | |||
3126 | } | 3183 | } |
3127 | 3184 | ||
3128 | static int perf_swcounter_is_counting(struct perf_counter *counter) | 3185 | static int perf_swcounter_is_counting(struct perf_counter *counter) |
@@ -3187,18 +3244,18 @@ static int perf_swcounter_match(struct perf_counter *counter, | |||
3187 | } | 3244 | } |
3188 | 3245 | ||
3189 | static void perf_swcounter_add(struct perf_counter *counter, u64 nr, | 3246 | static void perf_swcounter_add(struct perf_counter *counter, u64 nr, |
3190 | int nmi, struct pt_regs *regs, u64 addr) | 3247 | int nmi, struct perf_sample_data *data) |
3191 | { | 3248 | { |
3192 | int neg = atomic64_add_negative(nr, &counter->hw.count); | 3249 | int neg = atomic64_add_negative(nr, &counter->hw.count); |
3193 | 3250 | ||
3194 | if (counter->hw.sample_period && !neg && regs) | 3251 | if (counter->hw.sample_period && !neg && data->regs) |
3195 | perf_swcounter_overflow(counter, nmi, regs, addr); | 3252 | perf_swcounter_overflow(counter, nmi, data); |
3196 | } | 3253 | } |
3197 | 3254 | ||
3198 | static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, | 3255 | static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, |
3199 | enum perf_type_id type, u32 event, | 3256 | enum perf_type_id type, |
3200 | u64 nr, int nmi, struct pt_regs *regs, | 3257 | u32 event, u64 nr, int nmi, |
3201 | u64 addr) | 3258 | struct perf_sample_data *data) |
3202 | { | 3259 | { |
3203 | struct perf_counter *counter; | 3260 | struct perf_counter *counter; |
3204 | 3261 | ||
@@ -3207,8 +3264,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, | |||
3207 | 3264 | ||
3208 | rcu_read_lock(); | 3265 | rcu_read_lock(); |
3209 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | 3266 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { |
3210 | if (perf_swcounter_match(counter, type, event, regs)) | 3267 | if (perf_swcounter_match(counter, type, event, data->regs)) |
3211 | perf_swcounter_add(counter, nr, nmi, regs, addr); | 3268 | perf_swcounter_add(counter, nr, nmi, data); |
3212 | } | 3269 | } |
3213 | rcu_read_unlock(); | 3270 | rcu_read_unlock(); |
3214 | } | 3271 | } |
@@ -3227,9 +3284,9 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) | |||
3227 | return &cpuctx->recursion[0]; | 3284 | return &cpuctx->recursion[0]; |
3228 | } | 3285 | } |
3229 | 3286 | ||
3230 | static void __perf_swcounter_event(enum perf_type_id type, u32 event, | 3287 | static void do_perf_swcounter_event(enum perf_type_id type, u32 event, |
3231 | u64 nr, int nmi, struct pt_regs *regs, | 3288 | u64 nr, int nmi, |
3232 | u64 addr) | 3289 | struct perf_sample_data *data) |
3233 | { | 3290 | { |
3234 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | 3291 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); |
3235 | int *recursion = perf_swcounter_recursion_context(cpuctx); | 3292 | int *recursion = perf_swcounter_recursion_context(cpuctx); |
@@ -3242,7 +3299,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event, | |||
3242 | barrier(); | 3299 | barrier(); |
3243 | 3300 | ||
3244 | perf_swcounter_ctx_event(&cpuctx->ctx, type, event, | 3301 | perf_swcounter_ctx_event(&cpuctx->ctx, type, event, |
3245 | nr, nmi, regs, addr); | 3302 | nr, nmi, data); |
3246 | rcu_read_lock(); | 3303 | rcu_read_lock(); |
3247 | /* | 3304 | /* |
3248 | * doesn't really matter which of the child contexts the | 3305 | * doesn't really matter which of the child contexts the |
@@ -3250,7 +3307,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event, | |||
3250 | */ | 3307 | */ |
3251 | ctx = rcu_dereference(current->perf_counter_ctxp); | 3308 | ctx = rcu_dereference(current->perf_counter_ctxp); |
3252 | if (ctx) | 3309 | if (ctx) |
3253 | perf_swcounter_ctx_event(ctx, type, event, nr, nmi, regs, addr); | 3310 | perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data); |
3254 | rcu_read_unlock(); | 3311 | rcu_read_unlock(); |
3255 | 3312 | ||
3256 | barrier(); | 3313 | barrier(); |
@@ -3263,7 +3320,12 @@ out: | |||
3263 | void | 3320 | void |
3264 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) | 3321 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) |
3265 | { | 3322 | { |
3266 | __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); | 3323 | struct perf_sample_data data = { |
3324 | .regs = regs, | ||
3325 | .addr = addr, | ||
3326 | }; | ||
3327 | |||
3328 | do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data); | ||
3267 | } | 3329 | } |
3268 | 3330 | ||
3269 | static void perf_swcounter_read(struct perf_counter *counter) | 3331 | static void perf_swcounter_read(struct perf_counter *counter) |
@@ -3404,36 +3466,18 @@ static const struct pmu perf_ops_task_clock = { | |||
3404 | .read = task_clock_perf_counter_read, | 3466 | .read = task_clock_perf_counter_read, |
3405 | }; | 3467 | }; |
3406 | 3468 | ||
3407 | /* | ||
3408 | * Software counter: cpu migrations | ||
3409 | */ | ||
3410 | void perf_counter_task_migration(struct task_struct *task, int cpu) | ||
3411 | { | ||
3412 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
3413 | struct perf_counter_context *ctx; | ||
3414 | |||
3415 | perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE, | ||
3416 | PERF_COUNT_SW_CPU_MIGRATIONS, | ||
3417 | 1, 1, NULL, 0); | ||
3418 | |||
3419 | ctx = perf_pin_task_context(task); | ||
3420 | if (ctx) { | ||
3421 | perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE, | ||
3422 | PERF_COUNT_SW_CPU_MIGRATIONS, | ||
3423 | 1, 1, NULL, 0); | ||
3424 | perf_unpin_context(ctx); | ||
3425 | } | ||
3426 | } | ||
3427 | |||
3428 | #ifdef CONFIG_EVENT_PROFILE | 3469 | #ifdef CONFIG_EVENT_PROFILE |
3429 | void perf_tpcounter_event(int event_id) | 3470 | void perf_tpcounter_event(int event_id) |
3430 | { | 3471 | { |
3431 | struct pt_regs *regs = get_irq_regs(); | 3472 | struct perf_sample_data data = { |
3473 | .regs = get_irq_regs(); | ||
3474 | .addr = 0, | ||
3475 | }; | ||
3432 | 3476 | ||
3433 | if (!regs) | 3477 | if (!data.regs) |
3434 | regs = task_pt_regs(current); | 3478 | data.regs = task_pt_regs(current); |
3435 | 3479 | ||
3436 | __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); | 3480 | do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data); |
3437 | } | 3481 | } |
3438 | EXPORT_SYMBOL_GPL(perf_tpcounter_event); | 3482 | EXPORT_SYMBOL_GPL(perf_tpcounter_event); |
3439 | 3483 | ||
diff --git a/kernel/pid.c b/kernel/pid.c index b2e5f78fd281..31310b5d3f50 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -378,26 +378,15 @@ EXPORT_SYMBOL(pid_task); | |||
378 | /* | 378 | /* |
379 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. | 379 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. |
380 | */ | 380 | */ |
381 | struct task_struct *find_task_by_pid_type_ns(int type, int nr, | 381 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
382 | struct pid_namespace *ns) | ||
383 | { | 382 | { |
384 | return pid_task(find_pid_ns(nr, ns), type); | 383 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); |
385 | } | 384 | } |
386 | 385 | ||
387 | EXPORT_SYMBOL(find_task_by_pid_type_ns); | ||
388 | |||
389 | struct task_struct *find_task_by_vpid(pid_t vnr) | 386 | struct task_struct *find_task_by_vpid(pid_t vnr) |
390 | { | 387 | { |
391 | return find_task_by_pid_type_ns(PIDTYPE_PID, vnr, | 388 | return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns); |
392 | current->nsproxy->pid_ns); | ||
393 | } | ||
394 | EXPORT_SYMBOL(find_task_by_vpid); | ||
395 | |||
396 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | ||
397 | { | ||
398 | return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns); | ||
399 | } | 389 | } |
400 | EXPORT_SYMBOL(find_task_by_pid_ns); | ||
401 | 390 | ||
402 | struct pid *get_task_pid(struct task_struct *task, enum pid_type type) | 391 | struct pid *get_task_pid(struct task_struct *task, enum pid_type type) |
403 | { | 392 | { |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 2d1001b4858d..821722ae58a7 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -67,9 +67,10 @@ err_alloc: | |||
67 | return NULL; | 67 | return NULL; |
68 | } | 68 | } |
69 | 69 | ||
70 | static struct pid_namespace *create_pid_namespace(unsigned int level) | 70 | static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) |
71 | { | 71 | { |
72 | struct pid_namespace *ns; | 72 | struct pid_namespace *ns; |
73 | unsigned int level = parent_pid_ns->level + 1; | ||
73 | int i; | 74 | int i; |
74 | 75 | ||
75 | ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); | 76 | ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); |
@@ -86,6 +87,7 @@ static struct pid_namespace *create_pid_namespace(unsigned int level) | |||
86 | 87 | ||
87 | kref_init(&ns->kref); | 88 | kref_init(&ns->kref); |
88 | ns->level = level; | 89 | ns->level = level; |
90 | ns->parent = get_pid_ns(parent_pid_ns); | ||
89 | 91 | ||
90 | set_bit(0, ns->pidmap[0].page); | 92 | set_bit(0, ns->pidmap[0].page); |
91 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | 93 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); |
@@ -114,25 +116,11 @@ static void destroy_pid_namespace(struct pid_namespace *ns) | |||
114 | 116 | ||
115 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | 117 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) |
116 | { | 118 | { |
117 | struct pid_namespace *new_ns; | ||
118 | |||
119 | BUG_ON(!old_ns); | ||
120 | new_ns = get_pid_ns(old_ns); | ||
121 | if (!(flags & CLONE_NEWPID)) | 119 | if (!(flags & CLONE_NEWPID)) |
122 | goto out; | 120 | return get_pid_ns(old_ns); |
123 | |||
124 | new_ns = ERR_PTR(-EINVAL); | ||
125 | if (flags & CLONE_THREAD) | 121 | if (flags & CLONE_THREAD) |
126 | goto out_put; | 122 | return ERR_PTR(-EINVAL); |
127 | 123 | return create_pid_namespace(old_ns); | |
128 | new_ns = create_pid_namespace(old_ns->level + 1); | ||
129 | if (!IS_ERR(new_ns)) | ||
130 | new_ns->parent = get_pid_ns(old_ns); | ||
131 | |||
132 | out_put: | ||
133 | put_pid_ns(old_ns); | ||
134 | out: | ||
135 | return new_ns; | ||
136 | } | 124 | } |
137 | 125 | ||
138 | void free_pid_ns(struct kref *kref) | 126 | void free_pid_ns(struct kref *kref) |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index f6d8b8cb5e34..61c78b2c07ba 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -167,67 +167,82 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
167 | int ptrace_attach(struct task_struct *task) | 167 | int ptrace_attach(struct task_struct *task) |
168 | { | 168 | { |
169 | int retval; | 169 | int retval; |
170 | unsigned long flags; | ||
171 | 170 | ||
172 | audit_ptrace(task); | 171 | audit_ptrace(task); |
173 | 172 | ||
174 | retval = -EPERM; | 173 | retval = -EPERM; |
174 | if (unlikely(task->flags & PF_KTHREAD)) | ||
175 | goto out; | ||
175 | if (same_thread_group(task, current)) | 176 | if (same_thread_group(task, current)) |
176 | goto out; | 177 | goto out; |
177 | 178 | ||
178 | /* Protect the target's credential calculations against our | 179 | /* |
180 | * Protect exec's credential calculations against our interference; | ||
179 | * interference; SUID, SGID and LSM creds get determined differently | 181 | * interference; SUID, SGID and LSM creds get determined differently |
180 | * under ptrace. | 182 | * under ptrace. |
181 | */ | 183 | */ |
182 | retval = mutex_lock_interruptible(&task->cred_guard_mutex); | 184 | retval = mutex_lock_interruptible(&task->cred_guard_mutex); |
183 | if (retval < 0) | 185 | if (retval < 0) |
184 | goto out; | 186 | goto out; |
185 | 187 | ||
186 | retval = -EPERM; | ||
187 | repeat: | ||
188 | /* | ||
189 | * Nasty, nasty. | ||
190 | * | ||
191 | * We want to hold both the task-lock and the | ||
192 | * tasklist_lock for writing at the same time. | ||
193 | * But that's against the rules (tasklist_lock | ||
194 | * is taken for reading by interrupts on other | ||
195 | * cpu's that may have task_lock). | ||
196 | */ | ||
197 | task_lock(task); | 188 | task_lock(task); |
198 | if (!write_trylock_irqsave(&tasklist_lock, flags)) { | ||
199 | task_unlock(task); | ||
200 | do { | ||
201 | cpu_relax(); | ||
202 | } while (!write_can_lock(&tasklist_lock)); | ||
203 | goto repeat; | ||
204 | } | ||
205 | |||
206 | if (!task->mm) | ||
207 | goto bad; | ||
208 | /* the same process cannot be attached many times */ | ||
209 | if (task->ptrace & PT_PTRACED) | ||
210 | goto bad; | ||
211 | retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); | 189 | retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); |
190 | task_unlock(task); | ||
212 | if (retval) | 191 | if (retval) |
213 | goto bad; | 192 | goto unlock_creds; |
214 | 193 | ||
215 | /* Go */ | 194 | write_lock_irq(&tasklist_lock); |
216 | task->ptrace |= PT_PTRACED; | 195 | retval = -EPERM; |
196 | if (unlikely(task->exit_state)) | ||
197 | goto unlock_tasklist; | ||
198 | if (task->ptrace) | ||
199 | goto unlock_tasklist; | ||
200 | |||
201 | task->ptrace = PT_PTRACED; | ||
217 | if (capable(CAP_SYS_PTRACE)) | 202 | if (capable(CAP_SYS_PTRACE)) |
218 | task->ptrace |= PT_PTRACE_CAP; | 203 | task->ptrace |= PT_PTRACE_CAP; |
219 | 204 | ||
220 | __ptrace_link(task, current); | 205 | __ptrace_link(task, current); |
221 | |||
222 | send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); | 206 | send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); |
223 | bad: | 207 | |
224 | write_unlock_irqrestore(&tasklist_lock, flags); | 208 | retval = 0; |
225 | task_unlock(task); | 209 | unlock_tasklist: |
210 | write_unlock_irq(&tasklist_lock); | ||
211 | unlock_creds: | ||
226 | mutex_unlock(&task->cred_guard_mutex); | 212 | mutex_unlock(&task->cred_guard_mutex); |
227 | out: | 213 | out: |
228 | return retval; | 214 | return retval; |
229 | } | 215 | } |
230 | 216 | ||
217 | /** | ||
218 | * ptrace_traceme -- helper for PTRACE_TRACEME | ||
219 | * | ||
220 | * Performs checks and sets PT_PTRACED. | ||
221 | * Should be used by all ptrace implementations for PTRACE_TRACEME. | ||
222 | */ | ||
223 | int ptrace_traceme(void) | ||
224 | { | ||
225 | int ret = -EPERM; | ||
226 | |||
227 | write_lock_irq(&tasklist_lock); | ||
228 | /* Are we already being traced? */ | ||
229 | if (!current->ptrace) { | ||
230 | ret = security_ptrace_traceme(current->parent); | ||
231 | /* | ||
232 | * Check PF_EXITING to ensure ->real_parent has not passed | ||
233 | * exit_ptrace(). Otherwise we don't report the error but | ||
234 | * pretend ->real_parent untraces us right after return. | ||
235 | */ | ||
236 | if (!ret && !(current->real_parent->flags & PF_EXITING)) { | ||
237 | current->ptrace = PT_PTRACED; | ||
238 | __ptrace_link(current, current->real_parent); | ||
239 | } | ||
240 | } | ||
241 | write_unlock_irq(&tasklist_lock); | ||
242 | |||
243 | return ret; | ||
244 | } | ||
245 | |||
231 | /* | 246 | /* |
232 | * Called with irqs disabled, returns true if childs should reap themselves. | 247 | * Called with irqs disabled, returns true if childs should reap themselves. |
233 | */ | 248 | */ |
@@ -409,37 +424,33 @@ static int ptrace_setoptions(struct task_struct *child, long data) | |||
409 | 424 | ||
410 | static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) | 425 | static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) |
411 | { | 426 | { |
427 | unsigned long flags; | ||
412 | int error = -ESRCH; | 428 | int error = -ESRCH; |
413 | 429 | ||
414 | read_lock(&tasklist_lock); | 430 | if (lock_task_sighand(child, &flags)) { |
415 | if (likely(child->sighand != NULL)) { | ||
416 | error = -EINVAL; | 431 | error = -EINVAL; |
417 | spin_lock_irq(&child->sighand->siglock); | ||
418 | if (likely(child->last_siginfo != NULL)) { | 432 | if (likely(child->last_siginfo != NULL)) { |
419 | *info = *child->last_siginfo; | 433 | *info = *child->last_siginfo; |
420 | error = 0; | 434 | error = 0; |
421 | } | 435 | } |
422 | spin_unlock_irq(&child->sighand->siglock); | 436 | unlock_task_sighand(child, &flags); |
423 | } | 437 | } |
424 | read_unlock(&tasklist_lock); | ||
425 | return error; | 438 | return error; |
426 | } | 439 | } |
427 | 440 | ||
428 | static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) | 441 | static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) |
429 | { | 442 | { |
443 | unsigned long flags; | ||
430 | int error = -ESRCH; | 444 | int error = -ESRCH; |
431 | 445 | ||
432 | read_lock(&tasklist_lock); | 446 | if (lock_task_sighand(child, &flags)) { |
433 | if (likely(child->sighand != NULL)) { | ||
434 | error = -EINVAL; | 447 | error = -EINVAL; |
435 | spin_lock_irq(&child->sighand->siglock); | ||
436 | if (likely(child->last_siginfo != NULL)) { | 448 | if (likely(child->last_siginfo != NULL)) { |
437 | *child->last_siginfo = *info; | 449 | *child->last_siginfo = *info; |
438 | error = 0; | 450 | error = 0; |
439 | } | 451 | } |
440 | spin_unlock_irq(&child->sighand->siglock); | 452 | unlock_task_sighand(child, &flags); |
441 | } | 453 | } |
442 | read_unlock(&tasklist_lock); | ||
443 | return error; | 454 | return error; |
444 | } | 455 | } |
445 | 456 | ||
@@ -566,72 +577,16 @@ int ptrace_request(struct task_struct *child, long request, | |||
566 | return ret; | 577 | return ret; |
567 | } | 578 | } |
568 | 579 | ||
569 | /** | 580 | static struct task_struct *ptrace_get_task_struct(pid_t pid) |
570 | * ptrace_traceme -- helper for PTRACE_TRACEME | ||
571 | * | ||
572 | * Performs checks and sets PT_PTRACED. | ||
573 | * Should be used by all ptrace implementations for PTRACE_TRACEME. | ||
574 | */ | ||
575 | int ptrace_traceme(void) | ||
576 | { | ||
577 | int ret = -EPERM; | ||
578 | |||
579 | /* | ||
580 | * Are we already being traced? | ||
581 | */ | ||
582 | repeat: | ||
583 | task_lock(current); | ||
584 | if (!(current->ptrace & PT_PTRACED)) { | ||
585 | /* | ||
586 | * See ptrace_attach() comments about the locking here. | ||
587 | */ | ||
588 | unsigned long flags; | ||
589 | if (!write_trylock_irqsave(&tasklist_lock, flags)) { | ||
590 | task_unlock(current); | ||
591 | do { | ||
592 | cpu_relax(); | ||
593 | } while (!write_can_lock(&tasklist_lock)); | ||
594 | goto repeat; | ||
595 | } | ||
596 | |||
597 | ret = security_ptrace_traceme(current->parent); | ||
598 | |||
599 | /* | ||
600 | * Check PF_EXITING to ensure ->real_parent has not passed | ||
601 | * exit_ptrace(). Otherwise we don't report the error but | ||
602 | * pretend ->real_parent untraces us right after return. | ||
603 | */ | ||
604 | if (!ret && !(current->real_parent->flags & PF_EXITING)) { | ||
605 | current->ptrace |= PT_PTRACED; | ||
606 | __ptrace_link(current, current->real_parent); | ||
607 | } | ||
608 | |||
609 | write_unlock_irqrestore(&tasklist_lock, flags); | ||
610 | } | ||
611 | task_unlock(current); | ||
612 | return ret; | ||
613 | } | ||
614 | |||
615 | /** | ||
616 | * ptrace_get_task_struct -- grab a task struct reference for ptrace | ||
617 | * @pid: process id to grab a task_struct reference of | ||
618 | * | ||
619 | * This function is a helper for ptrace implementations. It checks | ||
620 | * permissions and then grabs a task struct for use of the actual | ||
621 | * ptrace implementation. | ||
622 | * | ||
623 | * Returns the task_struct for @pid or an ERR_PTR() on failure. | ||
624 | */ | ||
625 | struct task_struct *ptrace_get_task_struct(pid_t pid) | ||
626 | { | 581 | { |
627 | struct task_struct *child; | 582 | struct task_struct *child; |
628 | 583 | ||
629 | read_lock(&tasklist_lock); | 584 | rcu_read_lock(); |
630 | child = find_task_by_vpid(pid); | 585 | child = find_task_by_vpid(pid); |
631 | if (child) | 586 | if (child) |
632 | get_task_struct(child); | 587 | get_task_struct(child); |
588 | rcu_read_unlock(); | ||
633 | 589 | ||
634 | read_unlock(&tasklist_lock); | ||
635 | if (!child) | 590 | if (!child) |
636 | return ERR_PTR(-ESRCH); | 591 | return ERR_PTR(-ESRCH); |
637 | return child; | 592 | return child; |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index bf8e7534c803..e1338f074314 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -18,7 +18,7 @@ | |||
18 | void res_counter_init(struct res_counter *counter, struct res_counter *parent) | 18 | void res_counter_init(struct res_counter *counter, struct res_counter *parent) |
19 | { | 19 | { |
20 | spin_lock_init(&counter->lock); | 20 | spin_lock_init(&counter->lock); |
21 | counter->limit = (unsigned long long)LLONG_MAX; | 21 | counter->limit = RESOURCE_MAX; |
22 | counter->parent = parent; | 22 | counter->parent = parent; |
23 | } | 23 | } |
24 | 24 | ||
@@ -133,6 +133,16 @@ int res_counter_memparse_write_strategy(const char *buf, | |||
133 | unsigned long long *res) | 133 | unsigned long long *res) |
134 | { | 134 | { |
135 | char *end; | 135 | char *end; |
136 | |||
137 | /* return RESOURCE_MAX(unlimited) if "-1" is specified */ | ||
138 | if (*buf == '-') { | ||
139 | *res = simple_strtoull(buf + 1, &end, 10); | ||
140 | if (*res != 1 || *end != '\0') | ||
141 | return -EINVAL; | ||
142 | *res = RESOURCE_MAX; | ||
143 | return 0; | ||
144 | } | ||
145 | |||
136 | /* FIXME - make memparse() take const char* args */ | 146 | /* FIXME - make memparse() take const char* args */ |
137 | *res = memparse((char *)buf, &end); | 147 | *res = memparse((char *)buf, &end); |
138 | if (*end != '\0') | 148 | if (*end != '\0') |
diff --git a/kernel/sched.c b/kernel/sched.c index 8fb88a906aaa..7c9098d186e6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1978,7 +1978,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1978 | if (task_hot(p, old_rq->clock, NULL)) | 1978 | if (task_hot(p, old_rq->clock, NULL)) |
1979 | schedstat_inc(p, se.nr_forced2_migrations); | 1979 | schedstat_inc(p, se.nr_forced2_migrations); |
1980 | #endif | 1980 | #endif |
1981 | perf_counter_task_migration(p, new_cpu); | 1981 | perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, |
1982 | 1, 1, NULL, 0); | ||
1982 | } | 1983 | } |
1983 | p->se.vruntime -= old_cfsrq->min_vruntime - | 1984 | p->se.vruntime -= old_cfsrq->min_vruntime - |
1984 | new_cfsrq->min_vruntime; | 1985 | new_cfsrq->min_vruntime; |
@@ -7045,7 +7046,7 @@ static int migration_thread(void *data) | |||
7045 | 7046 | ||
7046 | if (cpu_is_offline(cpu)) { | 7047 | if (cpu_is_offline(cpu)) { |
7047 | spin_unlock_irq(&rq->lock); | 7048 | spin_unlock_irq(&rq->lock); |
7048 | goto wait_to_die; | 7049 | break; |
7049 | } | 7050 | } |
7050 | 7051 | ||
7051 | if (rq->active_balance) { | 7052 | if (rq->active_balance) { |
@@ -7071,16 +7072,7 @@ static int migration_thread(void *data) | |||
7071 | complete(&req->done); | 7072 | complete(&req->done); |
7072 | } | 7073 | } |
7073 | __set_current_state(TASK_RUNNING); | 7074 | __set_current_state(TASK_RUNNING); |
7074 | return 0; | ||
7075 | 7075 | ||
7076 | wait_to_die: | ||
7077 | /* Wait for kthread_stop */ | ||
7078 | set_current_state(TASK_INTERRUPTIBLE); | ||
7079 | while (!kthread_should_stop()) { | ||
7080 | schedule(); | ||
7081 | set_current_state(TASK_INTERRUPTIBLE); | ||
7082 | } | ||
7083 | __set_current_state(TASK_RUNNING); | ||
7084 | return 0; | 7076 | return 0; |
7085 | } | 7077 | } |
7086 | 7078 | ||
@@ -7494,6 +7486,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7494 | rq = task_rq_lock(p, &flags); | 7486 | rq = task_rq_lock(p, &flags); |
7495 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | 7487 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); |
7496 | task_rq_unlock(rq, &flags); | 7488 | task_rq_unlock(rq, &flags); |
7489 | get_task_struct(p); | ||
7497 | cpu_rq(cpu)->migration_thread = p; | 7490 | cpu_rq(cpu)->migration_thread = p; |
7498 | break; | 7491 | break; |
7499 | 7492 | ||
@@ -7524,6 +7517,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7524 | kthread_bind(cpu_rq(cpu)->migration_thread, | 7517 | kthread_bind(cpu_rq(cpu)->migration_thread, |
7525 | cpumask_any(cpu_online_mask)); | 7518 | cpumask_any(cpu_online_mask)); |
7526 | kthread_stop(cpu_rq(cpu)->migration_thread); | 7519 | kthread_stop(cpu_rq(cpu)->migration_thread); |
7520 | put_task_struct(cpu_rq(cpu)->migration_thread); | ||
7527 | cpu_rq(cpu)->migration_thread = NULL; | 7521 | cpu_rq(cpu)->migration_thread = NULL; |
7528 | break; | 7522 | break; |
7529 | 7523 | ||
@@ -7533,6 +7527,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7533 | migrate_live_tasks(cpu); | 7527 | migrate_live_tasks(cpu); |
7534 | rq = cpu_rq(cpu); | 7528 | rq = cpu_rq(cpu); |
7535 | kthread_stop(rq->migration_thread); | 7529 | kthread_stop(rq->migration_thread); |
7530 | put_task_struct(rq->migration_thread); | ||
7536 | rq->migration_thread = NULL; | 7531 | rq->migration_thread = NULL; |
7537 | /* Idle task back to normal (off runqueue, low prio) */ | 7532 | /* Idle task back to normal (off runqueue, low prio) */ |
7538 | spin_lock_irq(&rq->lock); | 7533 | spin_lock_irq(&rq->lock); |
@@ -7828,7 +7823,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
7828 | free_rootdomain(old_rd); | 7823 | free_rootdomain(old_rd); |
7829 | } | 7824 | } |
7830 | 7825 | ||
7831 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) | 7826 | static int init_rootdomain(struct root_domain *rd, bool bootmem) |
7832 | { | 7827 | { |
7833 | gfp_t gfp = GFP_KERNEL; | 7828 | gfp_t gfp = GFP_KERNEL; |
7834 | 7829 | ||
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 7deffc9f0e5f..e6c251790dde 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c | |||
@@ -152,7 +152,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) | |||
152 | * | 152 | * |
153 | * Returns: -ENOMEM if memory fails. | 153 | * Returns: -ENOMEM if memory fails. |
154 | */ | 154 | */ |
155 | int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) | 155 | int cpupri_init(struct cpupri *cp, bool bootmem) |
156 | { | 156 | { |
157 | gfp_t gfp = GFP_KERNEL; | 157 | gfp_t gfp = GFP_KERNEL; |
158 | int i; | 158 | int i; |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 467ca72f1657..70c7e0b79946 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -162,7 +162,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
162 | { | 162 | { |
163 | s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, | 163 | s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, |
164 | spread, rq0_min_vruntime, spread0; | 164 | spread, rq0_min_vruntime, spread0; |
165 | struct rq *rq = &per_cpu(runqueues, cpu); | 165 | struct rq *rq = cpu_rq(cpu); |
166 | struct sched_entity *last; | 166 | struct sched_entity *last; |
167 | unsigned long flags; | 167 | unsigned long flags; |
168 | 168 | ||
@@ -191,7 +191,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
191 | if (last) | 191 | if (last) |
192 | max_vruntime = last->vruntime; | 192 | max_vruntime = last->vruntime; |
193 | min_vruntime = cfs_rq->min_vruntime; | 193 | min_vruntime = cfs_rq->min_vruntime; |
194 | rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime; | 194 | rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; |
195 | spin_unlock_irqrestore(&rq->lock, flags); | 195 | spin_unlock_irqrestore(&rq->lock, flags); |
196 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", | 196 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", |
197 | SPLIT_NS(MIN_vruntime)); | 197 | SPLIT_NS(MIN_vruntime)); |
@@ -248,7 +248,7 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | |||
248 | 248 | ||
249 | static void print_cpu(struct seq_file *m, int cpu) | 249 | static void print_cpu(struct seq_file *m, int cpu) |
250 | { | 250 | { |
251 | struct rq *rq = &per_cpu(runqueues, cpu); | 251 | struct rq *rq = cpu_rq(cpu); |
252 | 252 | ||
253 | #ifdef CONFIG_X86 | 253 | #ifdef CONFIG_X86 |
254 | { | 254 | { |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5f9650e8fe75..ba7fd6e9556f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -430,12 +430,13 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
430 | 430 | ||
431 | for_each_sched_entity(se) { | 431 | for_each_sched_entity(se) { |
432 | struct load_weight *load; | 432 | struct load_weight *load; |
433 | struct load_weight lw; | ||
433 | 434 | ||
434 | cfs_rq = cfs_rq_of(se); | 435 | cfs_rq = cfs_rq_of(se); |
435 | load = &cfs_rq->load; | 436 | load = &cfs_rq->load; |
436 | 437 | ||
437 | if (unlikely(!se->on_rq)) { | 438 | if (unlikely(!se->on_rq)) { |
438 | struct load_weight lw = cfs_rq->load; | 439 | lw = cfs_rq->load; |
439 | 440 | ||
440 | update_load_add(&lw, se->load.weight); | 441 | update_load_add(&lw, se->load.weight); |
441 | load = &lw; | 442 | load = &lw; |
diff --git a/kernel/signal.c b/kernel/signal.c index d81f4952eebb..ccf1ceedaebe 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1410,7 +1410,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) | |||
1410 | /* do_notify_parent_cldstop should have been called instead. */ | 1410 | /* do_notify_parent_cldstop should have been called instead. */ |
1411 | BUG_ON(task_is_stopped_or_traced(tsk)); | 1411 | BUG_ON(task_is_stopped_or_traced(tsk)); |
1412 | 1412 | ||
1413 | BUG_ON(!tsk->ptrace && | 1413 | BUG_ON(!task_ptrace(tsk) && |
1414 | (tsk->group_leader != tsk || !thread_group_empty(tsk))); | 1414 | (tsk->group_leader != tsk || !thread_group_empty(tsk))); |
1415 | 1415 | ||
1416 | info.si_signo = sig; | 1416 | info.si_signo = sig; |
@@ -1449,7 +1449,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) | |||
1449 | 1449 | ||
1450 | psig = tsk->parent->sighand; | 1450 | psig = tsk->parent->sighand; |
1451 | spin_lock_irqsave(&psig->siglock, flags); | 1451 | spin_lock_irqsave(&psig->siglock, flags); |
1452 | if (!tsk->ptrace && sig == SIGCHLD && | 1452 | if (!task_ptrace(tsk) && sig == SIGCHLD && |
1453 | (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || | 1453 | (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || |
1454 | (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) { | 1454 | (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) { |
1455 | /* | 1455 | /* |
@@ -1486,7 +1486,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) | |||
1486 | struct task_struct *parent; | 1486 | struct task_struct *parent; |
1487 | struct sighand_struct *sighand; | 1487 | struct sighand_struct *sighand; |
1488 | 1488 | ||
1489 | if (tsk->ptrace & PT_PTRACED) | 1489 | if (task_ptrace(tsk)) |
1490 | parent = tsk->parent; | 1490 | parent = tsk->parent; |
1491 | else { | 1491 | else { |
1492 | tsk = tsk->group_leader; | 1492 | tsk = tsk->group_leader; |
@@ -1499,7 +1499,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) | |||
1499 | * see comment in do_notify_parent() abot the following 3 lines | 1499 | * see comment in do_notify_parent() abot the following 3 lines |
1500 | */ | 1500 | */ |
1501 | rcu_read_lock(); | 1501 | rcu_read_lock(); |
1502 | info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); | 1502 | info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); |
1503 | info.si_uid = __task_cred(tsk)->uid; | 1503 | info.si_uid = __task_cred(tsk)->uid; |
1504 | rcu_read_unlock(); | 1504 | rcu_read_unlock(); |
1505 | 1505 | ||
@@ -1535,7 +1535,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) | |||
1535 | 1535 | ||
1536 | static inline int may_ptrace_stop(void) | 1536 | static inline int may_ptrace_stop(void) |
1537 | { | 1537 | { |
1538 | if (!likely(current->ptrace & PT_PTRACED)) | 1538 | if (!likely(task_ptrace(current))) |
1539 | return 0; | 1539 | return 0; |
1540 | /* | 1540 | /* |
1541 | * Are we in the middle of do_coredump? | 1541 | * Are we in the middle of do_coredump? |
@@ -1753,7 +1753,7 @@ static int do_signal_stop(int signr) | |||
1753 | static int ptrace_signal(int signr, siginfo_t *info, | 1753 | static int ptrace_signal(int signr, siginfo_t *info, |
1754 | struct pt_regs *regs, void *cookie) | 1754 | struct pt_regs *regs, void *cookie) |
1755 | { | 1755 | { |
1756 | if (!(current->ptrace & PT_PTRACED)) | 1756 | if (!task_ptrace(current)) |
1757 | return signr; | 1757 | return signr; |
1758 | 1758 | ||
1759 | ptrace_signal_deliver(regs, cookie); | 1759 | ptrace_signal_deliver(regs, cookie); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index b41fb710e114..3a94905fa5d2 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -213,6 +213,7 @@ restart: | |||
213 | do { | 213 | do { |
214 | if (pending & 1) { | 214 | if (pending & 1) { |
215 | int prev_count = preempt_count(); | 215 | int prev_count = preempt_count(); |
216 | kstat_incr_softirqs_this_cpu(h - softirq_vec); | ||
216 | 217 | ||
217 | trace_softirq_entry(h, softirq_vec); | 218 | trace_softirq_entry(h, softirq_vec); |
218 | h->action(h); | 219 | h->action(h); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ab462b9968d5..62e4ff9968b5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -2283,7 +2283,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, | |||
2283 | void *data) | 2283 | void *data) |
2284 | { | 2284 | { |
2285 | #define TMPBUFLEN 21 | 2285 | #define TMPBUFLEN 21 |
2286 | int *i, vleft, first=1, neg, val; | 2286 | int *i, vleft, first = 1, neg; |
2287 | unsigned long lval; | 2287 | unsigned long lval; |
2288 | size_t left, len; | 2288 | size_t left, len; |
2289 | 2289 | ||
@@ -2336,8 +2336,6 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, | |||
2336 | len = p-buf; | 2336 | len = p-buf; |
2337 | if ((len < left) && *p && !isspace(*p)) | 2337 | if ((len < left) && *p && !isspace(*p)) |
2338 | break; | 2338 | break; |
2339 | if (neg) | ||
2340 | val = -val; | ||
2341 | s += len; | 2339 | s += len; |
2342 | left -= len; | 2340 | left -= len; |
2343 | 2341 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2aff39c6f10c..e0f59a21c061 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -222,6 +222,15 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
222 | 222 | ||
223 | cpu = smp_processor_id(); | 223 | cpu = smp_processor_id(); |
224 | ts = &per_cpu(tick_cpu_sched, cpu); | 224 | ts = &per_cpu(tick_cpu_sched, cpu); |
225 | |||
226 | /* | ||
227 | * Call to tick_nohz_start_idle stops the last_update_time from being | ||
228 | * updated. Thus, it must not be called in the event we are called from | ||
229 | * irq_exit() with the prior state different than idle. | ||
230 | */ | ||
231 | if (!inidle && !ts->inidle) | ||
232 | goto end; | ||
233 | |||
225 | now = tick_nohz_start_idle(ts); | 234 | now = tick_nohz_start_idle(ts); |
226 | 235 | ||
227 | /* | 236 | /* |
@@ -239,9 +248,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
239 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | 248 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) |
240 | goto end; | 249 | goto end; |
241 | 250 | ||
242 | if (!inidle && !ts->inidle) | ||
243 | goto end; | ||
244 | |||
245 | ts->inidle = 1; | 251 | ts->inidle = 1; |
246 | 252 | ||
247 | if (need_resched()) | 253 | if (need_resched()) |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 61071fecc82e..1551f47e7669 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER | |||
18 | config HAVE_FUNCTION_GRAPH_TRACER | 18 | config HAVE_FUNCTION_GRAPH_TRACER |
19 | bool | 19 | bool |
20 | 20 | ||
21 | config HAVE_FUNCTION_GRAPH_FP_TEST | ||
22 | bool | ||
23 | help | ||
24 | An arch may pass in a unique value (frame pointer) to both the | ||
25 | entering and exiting of a function. On exit, the value is compared | ||
26 | and if it does not match, then it will panic the kernel. | ||
27 | |||
21 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST | 28 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST |
22 | bool | 29 | bool |
23 | help | 30 | help |
@@ -121,6 +128,7 @@ config FUNCTION_GRAPH_TRACER | |||
121 | bool "Kernel Function Graph Tracer" | 128 | bool "Kernel Function Graph Tracer" |
122 | depends on HAVE_FUNCTION_GRAPH_TRACER | 129 | depends on HAVE_FUNCTION_GRAPH_TRACER |
123 | depends on FUNCTION_TRACER | 130 | depends on FUNCTION_TRACER |
131 | depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE | ||
124 | default y | 132 | default y |
125 | help | 133 | help |
126 | Enable the kernel to trace a function at both its return | 134 | Enable the kernel to trace a function at both its return |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bb60732ade0c..3718d55fb4c3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -1224,6 +1224,13 @@ static void ftrace_shutdown(int command) | |||
1224 | return; | 1224 | return; |
1225 | 1225 | ||
1226 | ftrace_start_up--; | 1226 | ftrace_start_up--; |
1227 | /* | ||
1228 | * Just warn in case of unbalance, no need to kill ftrace, it's not | ||
1229 | * critical but the ftrace_call callers may be never nopped again after | ||
1230 | * further ftrace uses. | ||
1231 | */ | ||
1232 | WARN_ON_ONCE(ftrace_start_up < 0); | ||
1233 | |||
1227 | if (!ftrace_start_up) | 1234 | if (!ftrace_start_up) |
1228 | command |= FTRACE_DISABLE_CALLS; | 1235 | command |= FTRACE_DISABLE_CALLS; |
1229 | 1236 | ||
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 86cdf671d7e2..1edaa9516e81 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c | |||
@@ -186,7 +186,7 @@ static int kmem_trace_init(struct trace_array *tr) | |||
186 | int cpu; | 186 | int cpu; |
187 | kmemtrace_array = tr; | 187 | kmemtrace_array = tr; |
188 | 188 | ||
189 | for_each_cpu_mask(cpu, cpu_possible_map) | 189 | for_each_cpu(cpu, cpu_possible_mask) |
190 | tracing_reset(tr, cpu); | 190 | tracing_reset(tr, cpu); |
191 | 191 | ||
192 | kmemtrace_start_probes(); | 192 | kmemtrace_start_probes(); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index dc4dc70171ce..04dac2638258 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -206,6 +206,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
207 | #define RB_ALIGNMENT 4U | 207 | #define RB_ALIGNMENT 4U |
208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
209 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | ||
209 | 210 | ||
210 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 211 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ |
211 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 212 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
@@ -415,6 +416,8 @@ struct ring_buffer_per_cpu { | |||
415 | unsigned long overrun; | 416 | unsigned long overrun; |
416 | unsigned long read; | 417 | unsigned long read; |
417 | local_t entries; | 418 | local_t entries; |
419 | local_t committing; | ||
420 | local_t commits; | ||
418 | u64 write_stamp; | 421 | u64 write_stamp; |
419 | u64 read_stamp; | 422 | u64 read_stamp; |
420 | atomic_t record_disabled; | 423 | atomic_t record_disabled; |
@@ -618,12 +621,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | |||
618 | kfree(cpu_buffer); | 621 | kfree(cpu_buffer); |
619 | } | 622 | } |
620 | 623 | ||
621 | /* | ||
622 | * Causes compile errors if the struct buffer_page gets bigger | ||
623 | * than the struct page. | ||
624 | */ | ||
625 | extern int ring_buffer_page_too_big(void); | ||
626 | |||
627 | #ifdef CONFIG_HOTPLUG_CPU | 624 | #ifdef CONFIG_HOTPLUG_CPU |
628 | static int rb_cpu_notify(struct notifier_block *self, | 625 | static int rb_cpu_notify(struct notifier_block *self, |
629 | unsigned long action, void *hcpu); | 626 | unsigned long action, void *hcpu); |
@@ -646,11 +643,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
646 | int bsize; | 643 | int bsize; |
647 | int cpu; | 644 | int cpu; |
648 | 645 | ||
649 | /* Paranoid! Optimizes out when all is well */ | ||
650 | if (sizeof(struct buffer_page) > sizeof(struct page)) | ||
651 | ring_buffer_page_too_big(); | ||
652 | |||
653 | |||
654 | /* keep it in its own cache line */ | 646 | /* keep it in its own cache line */ |
655 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), | 647 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), |
656 | GFP_KERNEL); | 648 | GFP_KERNEL); |
@@ -666,8 +658,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
666 | buffer->reader_lock_key = key; | 658 | buffer->reader_lock_key = key; |
667 | 659 | ||
668 | /* need at least two pages */ | 660 | /* need at least two pages */ |
669 | if (buffer->pages == 1) | 661 | if (buffer->pages < 2) |
670 | buffer->pages++; | 662 | buffer->pages = 2; |
671 | 663 | ||
672 | /* | 664 | /* |
673 | * In case of non-hotplug cpu, if the ring-buffer is allocated | 665 | * In case of non-hotplug cpu, if the ring-buffer is allocated |
@@ -1011,12 +1003,12 @@ rb_event_index(struct ring_buffer_event *event) | |||
1011 | { | 1003 | { |
1012 | unsigned long addr = (unsigned long)event; | 1004 | unsigned long addr = (unsigned long)event; |
1013 | 1005 | ||
1014 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 1006 | return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; |
1015 | } | 1007 | } |
1016 | 1008 | ||
1017 | static inline int | 1009 | static inline int |
1018 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1010 | rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
1019 | struct ring_buffer_event *event) | 1011 | struct ring_buffer_event *event) |
1020 | { | 1012 | { |
1021 | unsigned long addr = (unsigned long)event; | 1013 | unsigned long addr = (unsigned long)event; |
1022 | unsigned long index; | 1014 | unsigned long index; |
@@ -1029,31 +1021,6 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1029 | } | 1021 | } |
1030 | 1022 | ||
1031 | static void | 1023 | static void |
1032 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | ||
1033 | struct ring_buffer_event *event) | ||
1034 | { | ||
1035 | unsigned long addr = (unsigned long)event; | ||
1036 | unsigned long index; | ||
1037 | |||
1038 | index = rb_event_index(event); | ||
1039 | addr &= PAGE_MASK; | ||
1040 | |||
1041 | while (cpu_buffer->commit_page->page != (void *)addr) { | ||
1042 | if (RB_WARN_ON(cpu_buffer, | ||
1043 | cpu_buffer->commit_page == cpu_buffer->tail_page)) | ||
1044 | return; | ||
1045 | cpu_buffer->commit_page->page->commit = | ||
1046 | cpu_buffer->commit_page->write; | ||
1047 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | ||
1048 | cpu_buffer->write_stamp = | ||
1049 | cpu_buffer->commit_page->page->time_stamp; | ||
1050 | } | ||
1051 | |||
1052 | /* Now set the commit to the event's index */ | ||
1053 | local_set(&cpu_buffer->commit_page->page->commit, index); | ||
1054 | } | ||
1055 | |||
1056 | static void | ||
1057 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 1024 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
1058 | { | 1025 | { |
1059 | /* | 1026 | /* |
@@ -1171,6 +1138,60 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
1171 | return length; | 1138 | return length; |
1172 | } | 1139 | } |
1173 | 1140 | ||
1141 | static inline void | ||
1142 | rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | ||
1143 | struct buffer_page *tail_page, | ||
1144 | unsigned long tail, unsigned long length) | ||
1145 | { | ||
1146 | struct ring_buffer_event *event; | ||
1147 | |||
1148 | /* | ||
1149 | * Only the event that crossed the page boundary | ||
1150 | * must fill the old tail_page with padding. | ||
1151 | */ | ||
1152 | if (tail >= BUF_PAGE_SIZE) { | ||
1153 | local_sub(length, &tail_page->write); | ||
1154 | return; | ||
1155 | } | ||
1156 | |||
1157 | event = __rb_page_index(tail_page, tail); | ||
1158 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1159 | |||
1160 | /* | ||
1161 | * If this event is bigger than the minimum size, then | ||
1162 | * we need to be careful that we don't subtract the | ||
1163 | * write counter enough to allow another writer to slip | ||
1164 | * in on this page. | ||
1165 | * We put in a discarded commit instead, to make sure | ||
1166 | * that this space is not used again. | ||
1167 | * | ||
1168 | * If we are less than the minimum size, we don't need to | ||
1169 | * worry about it. | ||
1170 | */ | ||
1171 | if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) { | ||
1172 | /* No room for any events */ | ||
1173 | |||
1174 | /* Mark the rest of the page with padding */ | ||
1175 | rb_event_set_padding(event); | ||
1176 | |||
1177 | /* Set the write back to the previous setting */ | ||
1178 | local_sub(length, &tail_page->write); | ||
1179 | return; | ||
1180 | } | ||
1181 | |||
1182 | /* Put in a discarded event */ | ||
1183 | event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE; | ||
1184 | event->type_len = RINGBUF_TYPE_PADDING; | ||
1185 | /* time delta must be non zero */ | ||
1186 | event->time_delta = 1; | ||
1187 | /* Account for this as an entry */ | ||
1188 | local_inc(&tail_page->entries); | ||
1189 | local_inc(&cpu_buffer->entries); | ||
1190 | |||
1191 | /* Set write to end of buffer */ | ||
1192 | length = (tail + length) - BUF_PAGE_SIZE; | ||
1193 | local_sub(length, &tail_page->write); | ||
1194 | } | ||
1174 | 1195 | ||
1175 | static struct ring_buffer_event * | 1196 | static struct ring_buffer_event * |
1176 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1197 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
@@ -1180,7 +1201,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1180 | { | 1201 | { |
1181 | struct buffer_page *next_page, *head_page, *reader_page; | 1202 | struct buffer_page *next_page, *head_page, *reader_page; |
1182 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1203 | struct ring_buffer *buffer = cpu_buffer->buffer; |
1183 | struct ring_buffer_event *event; | ||
1184 | bool lock_taken = false; | 1204 | bool lock_taken = false; |
1185 | unsigned long flags; | 1205 | unsigned long flags; |
1186 | 1206 | ||
@@ -1265,27 +1285,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1265 | cpu_buffer->tail_page->page->time_stamp = *ts; | 1285 | cpu_buffer->tail_page->page->time_stamp = *ts; |
1266 | } | 1286 | } |
1267 | 1287 | ||
1268 | /* | 1288 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1269 | * The actual tail page has moved forward. | ||
1270 | */ | ||
1271 | if (tail < BUF_PAGE_SIZE) { | ||
1272 | /* Mark the rest of the page with padding */ | ||
1273 | event = __rb_page_index(tail_page, tail); | ||
1274 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1275 | rb_event_set_padding(event); | ||
1276 | } | ||
1277 | |||
1278 | /* Set the write back to the previous setting */ | ||
1279 | local_sub(length, &tail_page->write); | ||
1280 | |||
1281 | /* | ||
1282 | * If this was a commit entry that failed, | ||
1283 | * increment that too | ||
1284 | */ | ||
1285 | if (tail_page == cpu_buffer->commit_page && | ||
1286 | tail == rb_commit_index(cpu_buffer)) { | ||
1287 | rb_set_commit_to_write(cpu_buffer); | ||
1288 | } | ||
1289 | 1289 | ||
1290 | __raw_spin_unlock(&cpu_buffer->lock); | 1290 | __raw_spin_unlock(&cpu_buffer->lock); |
1291 | local_irq_restore(flags); | 1291 | local_irq_restore(flags); |
@@ -1295,7 +1295,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1295 | 1295 | ||
1296 | out_reset: | 1296 | out_reset: |
1297 | /* reset write */ | 1297 | /* reset write */ |
1298 | local_sub(length, &tail_page->write); | 1298 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1299 | 1299 | ||
1300 | if (likely(lock_taken)) | 1300 | if (likely(lock_taken)) |
1301 | __raw_spin_unlock(&cpu_buffer->lock); | 1301 | __raw_spin_unlock(&cpu_buffer->lock); |
@@ -1325,9 +1325,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1325 | 1325 | ||
1326 | /* We reserved something on the buffer */ | 1326 | /* We reserved something on the buffer */ |
1327 | 1327 | ||
1328 | if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE)) | ||
1329 | return NULL; | ||
1330 | |||
1331 | event = __rb_page_index(tail_page, tail); | 1328 | event = __rb_page_index(tail_page, tail); |
1332 | kmemcheck_annotate_bitfield(event, bitfield); | 1329 | kmemcheck_annotate_bitfield(event, bitfield); |
1333 | rb_update_event(event, type, length); | 1330 | rb_update_event(event, type, length); |
@@ -1337,11 +1334,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1337 | local_inc(&tail_page->entries); | 1334 | local_inc(&tail_page->entries); |
1338 | 1335 | ||
1339 | /* | 1336 | /* |
1340 | * If this is a commit and the tail is zero, then update | 1337 | * If this is the first commit on the page, then update |
1341 | * this page's time stamp. | 1338 | * its timestamp. |
1342 | */ | 1339 | */ |
1343 | if (!tail && rb_is_commit(cpu_buffer, event)) | 1340 | if (!tail) |
1344 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1341 | tail_page->page->time_stamp = *ts; |
1345 | 1342 | ||
1346 | return event; | 1343 | return event; |
1347 | } | 1344 | } |
@@ -1410,16 +1407,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1410 | return -EAGAIN; | 1407 | return -EAGAIN; |
1411 | 1408 | ||
1412 | /* Only a commited time event can update the write stamp */ | 1409 | /* Only a commited time event can update the write stamp */ |
1413 | if (rb_is_commit(cpu_buffer, event)) { | 1410 | if (rb_event_is_commit(cpu_buffer, event)) { |
1414 | /* | 1411 | /* |
1415 | * If this is the first on the page, then we need to | 1412 | * If this is the first on the page, then it was |
1416 | * update the page itself, and just put in a zero. | 1413 | * updated with the page itself. Try to discard it |
1414 | * and if we can't just make it zero. | ||
1417 | */ | 1415 | */ |
1418 | if (rb_event_index(event)) { | 1416 | if (rb_event_index(event)) { |
1419 | event->time_delta = *delta & TS_MASK; | 1417 | event->time_delta = *delta & TS_MASK; |
1420 | event->array[0] = *delta >> TS_SHIFT; | 1418 | event->array[0] = *delta >> TS_SHIFT; |
1421 | } else { | 1419 | } else { |
1422 | cpu_buffer->commit_page->page->time_stamp = *ts; | ||
1423 | /* try to discard, since we do not need this */ | 1420 | /* try to discard, since we do not need this */ |
1424 | if (!rb_try_to_discard(cpu_buffer, event)) { | 1421 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1425 | /* nope, just zero it */ | 1422 | /* nope, just zero it */ |
@@ -1445,6 +1442,44 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1445 | return ret; | 1442 | return ret; |
1446 | } | 1443 | } |
1447 | 1444 | ||
1445 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | ||
1446 | { | ||
1447 | local_inc(&cpu_buffer->committing); | ||
1448 | local_inc(&cpu_buffer->commits); | ||
1449 | } | ||
1450 | |||
1451 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | ||
1452 | { | ||
1453 | unsigned long commits; | ||
1454 | |||
1455 | if (RB_WARN_ON(cpu_buffer, | ||
1456 | !local_read(&cpu_buffer->committing))) | ||
1457 | return; | ||
1458 | |||
1459 | again: | ||
1460 | commits = local_read(&cpu_buffer->commits); | ||
1461 | /* synchronize with interrupts */ | ||
1462 | barrier(); | ||
1463 | if (local_read(&cpu_buffer->committing) == 1) | ||
1464 | rb_set_commit_to_write(cpu_buffer); | ||
1465 | |||
1466 | local_dec(&cpu_buffer->committing); | ||
1467 | |||
1468 | /* synchronize with interrupts */ | ||
1469 | barrier(); | ||
1470 | |||
1471 | /* | ||
1472 | * Need to account for interrupts coming in between the | ||
1473 | * updating of the commit page and the clearing of the | ||
1474 | * committing counter. | ||
1475 | */ | ||
1476 | if (unlikely(local_read(&cpu_buffer->commits) != commits) && | ||
1477 | !local_read(&cpu_buffer->committing)) { | ||
1478 | local_inc(&cpu_buffer->committing); | ||
1479 | goto again; | ||
1480 | } | ||
1481 | } | ||
1482 | |||
1448 | static struct ring_buffer_event * | 1483 | static struct ring_buffer_event * |
1449 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | 1484 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, |
1450 | unsigned long length) | 1485 | unsigned long length) |
@@ -1454,6 +1489,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1454 | int commit = 0; | 1489 | int commit = 0; |
1455 | int nr_loops = 0; | 1490 | int nr_loops = 0; |
1456 | 1491 | ||
1492 | rb_start_commit(cpu_buffer); | ||
1493 | |||
1457 | length = rb_calculate_event_length(length); | 1494 | length = rb_calculate_event_length(length); |
1458 | again: | 1495 | again: |
1459 | /* | 1496 | /* |
@@ -1466,7 +1503,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1466 | * Bail! | 1503 | * Bail! |
1467 | */ | 1504 | */ |
1468 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 1505 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
1469 | return NULL; | 1506 | goto out_fail; |
1470 | 1507 | ||
1471 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); | 1508 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); |
1472 | 1509 | ||
@@ -1497,7 +1534,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1497 | 1534 | ||
1498 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 1535 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); |
1499 | if (commit == -EBUSY) | 1536 | if (commit == -EBUSY) |
1500 | return NULL; | 1537 | goto out_fail; |
1501 | 1538 | ||
1502 | if (commit == -EAGAIN) | 1539 | if (commit == -EAGAIN) |
1503 | goto again; | 1540 | goto again; |
@@ -1511,28 +1548,19 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1511 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 1548 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
1512 | goto again; | 1549 | goto again; |
1513 | 1550 | ||
1514 | if (!event) { | 1551 | if (!event) |
1515 | if (unlikely(commit)) | 1552 | goto out_fail; |
1516 | /* | ||
1517 | * Ouch! We needed a timestamp and it was commited. But | ||
1518 | * we didn't get our event reserved. | ||
1519 | */ | ||
1520 | rb_set_commit_to_write(cpu_buffer); | ||
1521 | return NULL; | ||
1522 | } | ||
1523 | 1553 | ||
1524 | /* | 1554 | if (!rb_event_is_commit(cpu_buffer, event)) |
1525 | * If the timestamp was commited, make the commit our entry | ||
1526 | * now so that we will update it when needed. | ||
1527 | */ | ||
1528 | if (unlikely(commit)) | ||
1529 | rb_set_commit_event(cpu_buffer, event); | ||
1530 | else if (!rb_is_commit(cpu_buffer, event)) | ||
1531 | delta = 0; | 1555 | delta = 0; |
1532 | 1556 | ||
1533 | event->time_delta = delta; | 1557 | event->time_delta = delta; |
1534 | 1558 | ||
1535 | return event; | 1559 | return event; |
1560 | |||
1561 | out_fail: | ||
1562 | rb_end_commit(cpu_buffer); | ||
1563 | return NULL; | ||
1536 | } | 1564 | } |
1537 | 1565 | ||
1538 | #define TRACE_RECURSIVE_DEPTH 16 | 1566 | #define TRACE_RECURSIVE_DEPTH 16 |
@@ -1642,13 +1670,14 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1642 | { | 1670 | { |
1643 | local_inc(&cpu_buffer->entries); | 1671 | local_inc(&cpu_buffer->entries); |
1644 | 1672 | ||
1645 | /* Only process further if we own the commit */ | 1673 | /* |
1646 | if (!rb_is_commit(cpu_buffer, event)) | 1674 | * The event first in the commit queue updates the |
1647 | return; | 1675 | * time stamp. |
1648 | 1676 | */ | |
1649 | cpu_buffer->write_stamp += event->time_delta; | 1677 | if (rb_event_is_commit(cpu_buffer, event)) |
1678 | cpu_buffer->write_stamp += event->time_delta; | ||
1650 | 1679 | ||
1651 | rb_set_commit_to_write(cpu_buffer); | 1680 | rb_end_commit(cpu_buffer); |
1652 | } | 1681 | } |
1653 | 1682 | ||
1654 | /** | 1683 | /** |
@@ -1737,15 +1766,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1737 | /* The event is discarded regardless */ | 1766 | /* The event is discarded regardless */ |
1738 | rb_event_discard(event); | 1767 | rb_event_discard(event); |
1739 | 1768 | ||
1769 | cpu = smp_processor_id(); | ||
1770 | cpu_buffer = buffer->buffers[cpu]; | ||
1771 | |||
1740 | /* | 1772 | /* |
1741 | * This must only be called if the event has not been | 1773 | * This must only be called if the event has not been |
1742 | * committed yet. Thus we can assume that preemption | 1774 | * committed yet. Thus we can assume that preemption |
1743 | * is still disabled. | 1775 | * is still disabled. |
1744 | */ | 1776 | */ |
1745 | RB_WARN_ON(buffer, preemptible()); | 1777 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); |
1746 | |||
1747 | cpu = smp_processor_id(); | ||
1748 | cpu_buffer = buffer->buffers[cpu]; | ||
1749 | 1778 | ||
1750 | if (!rb_try_to_discard(cpu_buffer, event)) | 1779 | if (!rb_try_to_discard(cpu_buffer, event)) |
1751 | goto out; | 1780 | goto out; |
@@ -1756,13 +1785,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1756 | */ | 1785 | */ |
1757 | local_inc(&cpu_buffer->entries); | 1786 | local_inc(&cpu_buffer->entries); |
1758 | out: | 1787 | out: |
1759 | /* | 1788 | rb_end_commit(cpu_buffer); |
1760 | * If a write came in and pushed the tail page | ||
1761 | * we still need to update the commit pointer | ||
1762 | * if we were the commit. | ||
1763 | */ | ||
1764 | if (rb_is_commit(cpu_buffer, event)) | ||
1765 | rb_set_commit_to_write(cpu_buffer); | ||
1766 | 1789 | ||
1767 | trace_recursive_unlock(); | 1790 | trace_recursive_unlock(); |
1768 | 1791 | ||
@@ -2446,6 +2469,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2446 | } | 2469 | } |
2447 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); | 2470 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); |
2448 | 2471 | ||
2472 | static inline int rb_ok_to_lock(void) | ||
2473 | { | ||
2474 | /* | ||
2475 | * If an NMI die dumps out the content of the ring buffer | ||
2476 | * do not grab locks. We also permanently disable the ring | ||
2477 | * buffer too. A one time deal is all you get from reading | ||
2478 | * the ring buffer from an NMI. | ||
2479 | */ | ||
2480 | if (likely(!in_nmi() && !oops_in_progress)) | ||
2481 | return 1; | ||
2482 | |||
2483 | tracing_off_permanent(); | ||
2484 | return 0; | ||
2485 | } | ||
2486 | |||
2449 | /** | 2487 | /** |
2450 | * ring_buffer_peek - peek at the next event to be read | 2488 | * ring_buffer_peek - peek at the next event to be read |
2451 | * @buffer: The ring buffer to read | 2489 | * @buffer: The ring buffer to read |
@@ -2461,14 +2499,20 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2461 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 2499 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
2462 | struct ring_buffer_event *event; | 2500 | struct ring_buffer_event *event; |
2463 | unsigned long flags; | 2501 | unsigned long flags; |
2502 | int dolock; | ||
2464 | 2503 | ||
2465 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2504 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2466 | return NULL; | 2505 | return NULL; |
2467 | 2506 | ||
2507 | dolock = rb_ok_to_lock(); | ||
2468 | again: | 2508 | again: |
2469 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2509 | local_irq_save(flags); |
2510 | if (dolock) | ||
2511 | spin_lock(&cpu_buffer->reader_lock); | ||
2470 | event = rb_buffer_peek(buffer, cpu, ts); | 2512 | event = rb_buffer_peek(buffer, cpu, ts); |
2471 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2513 | if (dolock) |
2514 | spin_unlock(&cpu_buffer->reader_lock); | ||
2515 | local_irq_restore(flags); | ||
2472 | 2516 | ||
2473 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 2517 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
2474 | cpu_relax(); | 2518 | cpu_relax(); |
@@ -2520,6 +2564,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2520 | struct ring_buffer_per_cpu *cpu_buffer; | 2564 | struct ring_buffer_per_cpu *cpu_buffer; |
2521 | struct ring_buffer_event *event = NULL; | 2565 | struct ring_buffer_event *event = NULL; |
2522 | unsigned long flags; | 2566 | unsigned long flags; |
2567 | int dolock; | ||
2568 | |||
2569 | dolock = rb_ok_to_lock(); | ||
2523 | 2570 | ||
2524 | again: | 2571 | again: |
2525 | /* might be called in atomic */ | 2572 | /* might be called in atomic */ |
@@ -2529,7 +2576,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2529 | goto out; | 2576 | goto out; |
2530 | 2577 | ||
2531 | cpu_buffer = buffer->buffers[cpu]; | 2578 | cpu_buffer = buffer->buffers[cpu]; |
2532 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2579 | local_irq_save(flags); |
2580 | if (dolock) | ||
2581 | spin_lock(&cpu_buffer->reader_lock); | ||
2533 | 2582 | ||
2534 | event = rb_buffer_peek(buffer, cpu, ts); | 2583 | event = rb_buffer_peek(buffer, cpu, ts); |
2535 | if (!event) | 2584 | if (!event) |
@@ -2538,7 +2587,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2538 | rb_advance_reader(cpu_buffer); | 2587 | rb_advance_reader(cpu_buffer); |
2539 | 2588 | ||
2540 | out_unlock: | 2589 | out_unlock: |
2541 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2590 | if (dolock) |
2591 | spin_unlock(&cpu_buffer->reader_lock); | ||
2592 | local_irq_restore(flags); | ||
2542 | 2593 | ||
2543 | out: | 2594 | out: |
2544 | preempt_enable(); | 2595 | preempt_enable(); |
@@ -2680,6 +2731,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
2680 | cpu_buffer->overrun = 0; | 2731 | cpu_buffer->overrun = 0; |
2681 | cpu_buffer->read = 0; | 2732 | cpu_buffer->read = 0; |
2682 | local_set(&cpu_buffer->entries, 0); | 2733 | local_set(&cpu_buffer->entries, 0); |
2734 | local_set(&cpu_buffer->committing, 0); | ||
2735 | local_set(&cpu_buffer->commits, 0); | ||
2683 | 2736 | ||
2684 | cpu_buffer->write_stamp = 0; | 2737 | cpu_buffer->write_stamp = 0; |
2685 | cpu_buffer->read_stamp = 0; | 2738 | cpu_buffer->read_stamp = 0; |
@@ -2734,12 +2787,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset); | |||
2734 | int ring_buffer_empty(struct ring_buffer *buffer) | 2787 | int ring_buffer_empty(struct ring_buffer *buffer) |
2735 | { | 2788 | { |
2736 | struct ring_buffer_per_cpu *cpu_buffer; | 2789 | struct ring_buffer_per_cpu *cpu_buffer; |
2790 | unsigned long flags; | ||
2791 | int dolock; | ||
2737 | int cpu; | 2792 | int cpu; |
2793 | int ret; | ||
2794 | |||
2795 | dolock = rb_ok_to_lock(); | ||
2738 | 2796 | ||
2739 | /* yes this is racy, but if you don't like the race, lock the buffer */ | 2797 | /* yes this is racy, but if you don't like the race, lock the buffer */ |
2740 | for_each_buffer_cpu(buffer, cpu) { | 2798 | for_each_buffer_cpu(buffer, cpu) { |
2741 | cpu_buffer = buffer->buffers[cpu]; | 2799 | cpu_buffer = buffer->buffers[cpu]; |
2742 | if (!rb_per_cpu_empty(cpu_buffer)) | 2800 | local_irq_save(flags); |
2801 | if (dolock) | ||
2802 | spin_lock(&cpu_buffer->reader_lock); | ||
2803 | ret = rb_per_cpu_empty(cpu_buffer); | ||
2804 | if (dolock) | ||
2805 | spin_unlock(&cpu_buffer->reader_lock); | ||
2806 | local_irq_restore(flags); | ||
2807 | |||
2808 | if (!ret) | ||
2743 | return 0; | 2809 | return 0; |
2744 | } | 2810 | } |
2745 | 2811 | ||
@@ -2755,14 +2821,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); | |||
2755 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | 2821 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) |
2756 | { | 2822 | { |
2757 | struct ring_buffer_per_cpu *cpu_buffer; | 2823 | struct ring_buffer_per_cpu *cpu_buffer; |
2824 | unsigned long flags; | ||
2825 | int dolock; | ||
2758 | int ret; | 2826 | int ret; |
2759 | 2827 | ||
2760 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2828 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2761 | return 1; | 2829 | return 1; |
2762 | 2830 | ||
2831 | dolock = rb_ok_to_lock(); | ||
2832 | |||
2763 | cpu_buffer = buffer->buffers[cpu]; | 2833 | cpu_buffer = buffer->buffers[cpu]; |
2834 | local_irq_save(flags); | ||
2835 | if (dolock) | ||
2836 | spin_lock(&cpu_buffer->reader_lock); | ||
2764 | ret = rb_per_cpu_empty(cpu_buffer); | 2837 | ret = rb_per_cpu_empty(cpu_buffer); |
2765 | 2838 | if (dolock) | |
2839 | spin_unlock(&cpu_buffer->reader_lock); | ||
2840 | local_irq_restore(flags); | ||
2766 | 2841 | ||
2767 | return ret; | 2842 | return ret; |
2768 | } | 2843 | } |
@@ -3108,7 +3183,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
3108 | switch (action) { | 3183 | switch (action) { |
3109 | case CPU_UP_PREPARE: | 3184 | case CPU_UP_PREPARE: |
3110 | case CPU_UP_PREPARE_FROZEN: | 3185 | case CPU_UP_PREPARE_FROZEN: |
3111 | if (cpu_isset(cpu, *buffer->cpumask)) | 3186 | if (cpumask_test_cpu(cpu, buffer->cpumask)) |
3112 | return NOTIFY_OK; | 3187 | return NOTIFY_OK; |
3113 | 3188 | ||
3114 | buffer->buffers[cpu] = | 3189 | buffer->buffers[cpu] = |
@@ -3119,7 +3194,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
3119 | return NOTIFY_OK; | 3194 | return NOTIFY_OK; |
3120 | } | 3195 | } |
3121 | smp_wmb(); | 3196 | smp_wmb(); |
3122 | cpu_set(cpu, *buffer->cpumask); | 3197 | cpumask_set_cpu(cpu, buffer->cpumask); |
3123 | break; | 3198 | break; |
3124 | case CPU_DOWN_PREPARE: | 3199 | case CPU_DOWN_PREPARE: |
3125 | case CPU_DOWN_PREPARE_FROZEN: | 3200 | case CPU_DOWN_PREPARE_FROZEN: |
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 8d68e149a8b3..573d3cc762c3 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c | |||
@@ -102,8 +102,10 @@ static enum event_status read_page(int cpu) | |||
102 | event = (void *)&rpage->data[i]; | 102 | event = (void *)&rpage->data[i]; |
103 | switch (event->type_len) { | 103 | switch (event->type_len) { |
104 | case RINGBUF_TYPE_PADDING: | 104 | case RINGBUF_TYPE_PADDING: |
105 | /* We don't expect any padding */ | 105 | /* failed writes may be discarded events */ |
106 | KILL_TEST(); | 106 | if (!event->time_delta) |
107 | KILL_TEST(); | ||
108 | inc = event->array[0] + 4; | ||
107 | break; | 109 | break; |
108 | case RINGBUF_TYPE_TIME_EXTEND: | 110 | case RINGBUF_TYPE_TIME_EXTEND: |
109 | inc = 8; | 111 | inc = 8; |
@@ -119,7 +121,7 @@ static enum event_status read_page(int cpu) | |||
119 | KILL_TEST(); | 121 | KILL_TEST(); |
120 | break; | 122 | break; |
121 | } | 123 | } |
122 | inc = event->array[0]; | 124 | inc = event->array[0] + 4; |
123 | break; | 125 | break; |
124 | default: | 126 | default: |
125 | entry = ring_buffer_event_data(event); | 127 | entry = ring_buffer_event_data(event); |
@@ -201,7 +203,7 @@ static void ring_buffer_producer(void) | |||
201 | * Hammer the buffer for 10 secs (this may | 203 | * Hammer the buffer for 10 secs (this may |
202 | * make the system stall) | 204 | * make the system stall) |
203 | */ | 205 | */ |
204 | pr_info("Starting ring buffer hammer\n"); | 206 | trace_printk("Starting ring buffer hammer\n"); |
205 | do_gettimeofday(&start_tv); | 207 | do_gettimeofday(&start_tv); |
206 | do { | 208 | do { |
207 | struct ring_buffer_event *event; | 209 | struct ring_buffer_event *event; |
@@ -237,7 +239,7 @@ static void ring_buffer_producer(void) | |||
237 | #endif | 239 | #endif |
238 | 240 | ||
239 | } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); | 241 | } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); |
240 | pr_info("End ring buffer hammer\n"); | 242 | trace_printk("End ring buffer hammer\n"); |
241 | 243 | ||
242 | if (consumer) { | 244 | if (consumer) { |
243 | /* Init both completions here to avoid races */ | 245 | /* Init both completions here to avoid races */ |
@@ -260,49 +262,50 @@ static void ring_buffer_producer(void) | |||
260 | overruns = ring_buffer_overruns(buffer); | 262 | overruns = ring_buffer_overruns(buffer); |
261 | 263 | ||
262 | if (kill_test) | 264 | if (kill_test) |
263 | pr_info("ERROR!\n"); | 265 | trace_printk("ERROR!\n"); |
264 | pr_info("Time: %lld (usecs)\n", time); | 266 | trace_printk("Time: %lld (usecs)\n", time); |
265 | pr_info("Overruns: %lld\n", overruns); | 267 | trace_printk("Overruns: %lld\n", overruns); |
266 | if (disable_reader) | 268 | if (disable_reader) |
267 | pr_info("Read: (reader disabled)\n"); | 269 | trace_printk("Read: (reader disabled)\n"); |
268 | else | 270 | else |
269 | pr_info("Read: %ld (by %s)\n", read, | 271 | trace_printk("Read: %ld (by %s)\n", read, |
270 | read_events ? "events" : "pages"); | 272 | read_events ? "events" : "pages"); |
271 | pr_info("Entries: %lld\n", entries); | 273 | trace_printk("Entries: %lld\n", entries); |
272 | pr_info("Total: %lld\n", entries + overruns + read); | 274 | trace_printk("Total: %lld\n", entries + overruns + read); |
273 | pr_info("Missed: %ld\n", missed); | 275 | trace_printk("Missed: %ld\n", missed); |
274 | pr_info("Hit: %ld\n", hit); | 276 | trace_printk("Hit: %ld\n", hit); |
275 | 277 | ||
276 | /* Convert time from usecs to millisecs */ | 278 | /* Convert time from usecs to millisecs */ |
277 | do_div(time, USEC_PER_MSEC); | 279 | do_div(time, USEC_PER_MSEC); |
278 | if (time) | 280 | if (time) |
279 | hit /= (long)time; | 281 | hit /= (long)time; |
280 | else | 282 | else |
281 | pr_info("TIME IS ZERO??\n"); | 283 | trace_printk("TIME IS ZERO??\n"); |
282 | 284 | ||
283 | pr_info("Entries per millisec: %ld\n", hit); | 285 | trace_printk("Entries per millisec: %ld\n", hit); |
284 | 286 | ||
285 | if (hit) { | 287 | if (hit) { |
286 | /* Calculate the average time in nanosecs */ | 288 | /* Calculate the average time in nanosecs */ |
287 | avg = NSEC_PER_MSEC / hit; | 289 | avg = NSEC_PER_MSEC / hit; |
288 | pr_info("%ld ns per entry\n", avg); | 290 | trace_printk("%ld ns per entry\n", avg); |
289 | } | 291 | } |
290 | 292 | ||
291 | if (missed) { | 293 | if (missed) { |
292 | if (time) | 294 | if (time) |
293 | missed /= (long)time; | 295 | missed /= (long)time; |
294 | 296 | ||
295 | pr_info("Total iterations per millisec: %ld\n", hit + missed); | 297 | trace_printk("Total iterations per millisec: %ld\n", |
298 | hit + missed); | ||
296 | 299 | ||
297 | /* it is possible that hit + missed will overflow and be zero */ | 300 | /* it is possible that hit + missed will overflow and be zero */ |
298 | if (!(hit + missed)) { | 301 | if (!(hit + missed)) { |
299 | pr_info("hit + missed overflowed and totalled zero!\n"); | 302 | trace_printk("hit + missed overflowed and totalled zero!\n"); |
300 | hit--; /* make it non zero */ | 303 | hit--; /* make it non zero */ |
301 | } | 304 | } |
302 | 305 | ||
303 | /* Caculate the average time in nanosecs */ | 306 | /* Caculate the average time in nanosecs */ |
304 | avg = NSEC_PER_MSEC / (hit + missed); | 307 | avg = NSEC_PER_MSEC / (hit + missed); |
305 | pr_info("%ld ns per entry\n", avg); | 308 | trace_printk("%ld ns per entry\n", avg); |
306 | } | 309 | } |
307 | } | 310 | } |
308 | 311 | ||
@@ -353,7 +356,7 @@ static int ring_buffer_producer_thread(void *arg) | |||
353 | 356 | ||
354 | ring_buffer_producer(); | 357 | ring_buffer_producer(); |
355 | 358 | ||
356 | pr_info("Sleeping for 10 secs\n"); | 359 | trace_printk("Sleeping for 10 secs\n"); |
357 | set_current_state(TASK_INTERRUPTIBLE); | 360 | set_current_state(TASK_INTERRUPTIBLE); |
358 | schedule_timeout(HZ * SLEEP_TIME); | 361 | schedule_timeout(HZ * SLEEP_TIME); |
359 | __set_current_state(TASK_RUNNING); | 362 | __set_current_state(TASK_RUNNING); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c1878bfb2e1e..076fa6f0ee48 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -2191,11 +2191,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, | |||
2191 | if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) | 2191 | if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) |
2192 | return -ENOMEM; | 2192 | return -ENOMEM; |
2193 | 2193 | ||
2194 | mutex_lock(&tracing_cpumask_update_lock); | ||
2195 | err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); | 2194 | err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); |
2196 | if (err) | 2195 | if (err) |
2197 | goto err_unlock; | 2196 | goto err_unlock; |
2198 | 2197 | ||
2198 | mutex_lock(&tracing_cpumask_update_lock); | ||
2199 | |||
2199 | local_irq_disable(); | 2200 | local_irq_disable(); |
2200 | __raw_spin_lock(&ftrace_max_lock); | 2201 | __raw_spin_lock(&ftrace_max_lock); |
2201 | for_each_tracing_cpu(cpu) { | 2202 | for_each_tracing_cpu(cpu) { |
@@ -2223,8 +2224,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, | |||
2223 | return count; | 2224 | return count; |
2224 | 2225 | ||
2225 | err_unlock: | 2226 | err_unlock: |
2226 | mutex_unlock(&tracing_cpumask_update_lock); | 2227 | free_cpumask_var(tracing_cpumask_new); |
2227 | free_cpumask_var(tracing_cpumask); | ||
2228 | 2228 | ||
2229 | return err; | 2229 | return err; |
2230 | } | 2230 | } |
@@ -3626,7 +3626,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
3626 | struct trace_seq *s; | 3626 | struct trace_seq *s; |
3627 | unsigned long cnt; | 3627 | unsigned long cnt; |
3628 | 3628 | ||
3629 | s = kmalloc(sizeof(*s), GFP_ATOMIC); | 3629 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
3630 | if (!s) | 3630 | if (!s) |
3631 | return ENOMEM; | 3631 | return ENOMEM; |
3632 | 3632 | ||
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index db6e54bdb596..936c621bbf46 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -27,8 +27,6 @@ | |||
27 | #include "trace.h" | 27 | #include "trace.h" |
28 | #include "trace_output.h" | 28 | #include "trace_output.h" |
29 | 29 | ||
30 | static DEFINE_MUTEX(filter_mutex); | ||
31 | |||
32 | enum filter_op_ids | 30 | enum filter_op_ids |
33 | { | 31 | { |
34 | OP_OR, | 32 | OP_OR, |
@@ -178,7 +176,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event, | |||
178 | static int filter_pred_strloc(struct filter_pred *pred, void *event, | 176 | static int filter_pred_strloc(struct filter_pred *pred, void *event, |
179 | int val1, int val2) | 177 | int val1, int val2) |
180 | { | 178 | { |
181 | int str_loc = *(int *)(event + pred->offset); | 179 | unsigned short str_loc = *(unsigned short *)(event + pred->offset); |
182 | char *addr = (char *)(event + str_loc); | 180 | char *addr = (char *)(event + str_loc); |
183 | int cmp, match; | 181 | int cmp, match; |
184 | 182 | ||
@@ -294,12 +292,12 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) | |||
294 | { | 292 | { |
295 | struct event_filter *filter = call->filter; | 293 | struct event_filter *filter = call->filter; |
296 | 294 | ||
297 | mutex_lock(&filter_mutex); | 295 | mutex_lock(&event_mutex); |
298 | if (filter->filter_string) | 296 | if (filter->filter_string) |
299 | trace_seq_printf(s, "%s\n", filter->filter_string); | 297 | trace_seq_printf(s, "%s\n", filter->filter_string); |
300 | else | 298 | else |
301 | trace_seq_printf(s, "none\n"); | 299 | trace_seq_printf(s, "none\n"); |
302 | mutex_unlock(&filter_mutex); | 300 | mutex_unlock(&event_mutex); |
303 | } | 301 | } |
304 | 302 | ||
305 | void print_subsystem_event_filter(struct event_subsystem *system, | 303 | void print_subsystem_event_filter(struct event_subsystem *system, |
@@ -307,12 +305,12 @@ void print_subsystem_event_filter(struct event_subsystem *system, | |||
307 | { | 305 | { |
308 | struct event_filter *filter = system->filter; | 306 | struct event_filter *filter = system->filter; |
309 | 307 | ||
310 | mutex_lock(&filter_mutex); | 308 | mutex_lock(&event_mutex); |
311 | if (filter->filter_string) | 309 | if (filter->filter_string) |
312 | trace_seq_printf(s, "%s\n", filter->filter_string); | 310 | trace_seq_printf(s, "%s\n", filter->filter_string); |
313 | else | 311 | else |
314 | trace_seq_printf(s, "none\n"); | 312 | trace_seq_printf(s, "none\n"); |
315 | mutex_unlock(&filter_mutex); | 313 | mutex_unlock(&event_mutex); |
316 | } | 314 | } |
317 | 315 | ||
318 | static struct ftrace_event_field * | 316 | static struct ftrace_event_field * |
@@ -381,6 +379,7 @@ void destroy_preds(struct ftrace_event_call *call) | |||
381 | filter_free_pred(filter->preds[i]); | 379 | filter_free_pred(filter->preds[i]); |
382 | } | 380 | } |
383 | kfree(filter->preds); | 381 | kfree(filter->preds); |
382 | kfree(filter->filter_string); | ||
384 | kfree(filter); | 383 | kfree(filter); |
385 | call->filter = NULL; | 384 | call->filter = NULL; |
386 | } | 385 | } |
@@ -433,7 +432,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system) | |||
433 | filter->n_preds = 0; | 432 | filter->n_preds = 0; |
434 | } | 433 | } |
435 | 434 | ||
436 | mutex_lock(&event_mutex); | ||
437 | list_for_each_entry(call, &ftrace_events, list) { | 435 | list_for_each_entry(call, &ftrace_events, list) { |
438 | if (!call->define_fields) | 436 | if (!call->define_fields) |
439 | continue; | 437 | continue; |
@@ -443,7 +441,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system) | |||
443 | remove_filter_string(call->filter); | 441 | remove_filter_string(call->filter); |
444 | } | 442 | } |
445 | } | 443 | } |
446 | mutex_unlock(&event_mutex); | ||
447 | } | 444 | } |
448 | 445 | ||
449 | static int filter_add_pred_fn(struct filter_parse_state *ps, | 446 | static int filter_add_pred_fn(struct filter_parse_state *ps, |
@@ -546,6 +543,7 @@ static int filter_add_pred(struct filter_parse_state *ps, | |||
546 | filter_pred_fn_t fn; | 543 | filter_pred_fn_t fn; |
547 | unsigned long long val; | 544 | unsigned long long val; |
548 | int string_type; | 545 | int string_type; |
546 | int ret; | ||
549 | 547 | ||
550 | pred->fn = filter_pred_none; | 548 | pred->fn = filter_pred_none; |
551 | 549 | ||
@@ -581,7 +579,11 @@ static int filter_add_pred(struct filter_parse_state *ps, | |||
581 | pred->not = 1; | 579 | pred->not = 1; |
582 | return filter_add_pred_fn(ps, call, pred, fn); | 580 | return filter_add_pred_fn(ps, call, pred, fn); |
583 | } else { | 581 | } else { |
584 | if (strict_strtoull(pred->str_val, 0, &val)) { | 582 | if (field->is_signed) |
583 | ret = strict_strtoll(pred->str_val, 0, &val); | ||
584 | else | ||
585 | ret = strict_strtoull(pred->str_val, 0, &val); | ||
586 | if (ret) { | ||
585 | parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); | 587 | parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); |
586 | return -EINVAL; | 588 | return -EINVAL; |
587 | } | 589 | } |
@@ -625,7 +627,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps, | |||
625 | filter->preds[filter->n_preds] = pred; | 627 | filter->preds[filter->n_preds] = pred; |
626 | filter->n_preds++; | 628 | filter->n_preds++; |
627 | 629 | ||
628 | mutex_lock(&event_mutex); | ||
629 | list_for_each_entry(call, &ftrace_events, list) { | 630 | list_for_each_entry(call, &ftrace_events, list) { |
630 | 631 | ||
631 | if (!call->define_fields) | 632 | if (!call->define_fields) |
@@ -636,14 +637,12 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps, | |||
636 | 637 | ||
637 | err = filter_add_pred(ps, call, pred); | 638 | err = filter_add_pred(ps, call, pred); |
638 | if (err) { | 639 | if (err) { |
639 | mutex_unlock(&event_mutex); | ||
640 | filter_free_subsystem_preds(system); | 640 | filter_free_subsystem_preds(system); |
641 | parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); | 641 | parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); |
642 | goto out; | 642 | goto out; |
643 | } | 643 | } |
644 | replace_filter_string(call->filter, filter_string); | 644 | replace_filter_string(call->filter, filter_string); |
645 | } | 645 | } |
646 | mutex_unlock(&event_mutex); | ||
647 | out: | 646 | out: |
648 | return err; | 647 | return err; |
649 | } | 648 | } |
@@ -1070,12 +1069,12 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | |||
1070 | 1069 | ||
1071 | struct filter_parse_state *ps; | 1070 | struct filter_parse_state *ps; |
1072 | 1071 | ||
1073 | mutex_lock(&filter_mutex); | 1072 | mutex_lock(&event_mutex); |
1074 | 1073 | ||
1075 | if (!strcmp(strstrip(filter_string), "0")) { | 1074 | if (!strcmp(strstrip(filter_string), "0")) { |
1076 | filter_disable_preds(call); | 1075 | filter_disable_preds(call); |
1077 | remove_filter_string(call->filter); | 1076 | remove_filter_string(call->filter); |
1078 | mutex_unlock(&filter_mutex); | 1077 | mutex_unlock(&event_mutex); |
1079 | return 0; | 1078 | return 0; |
1080 | } | 1079 | } |
1081 | 1080 | ||
@@ -1103,7 +1102,7 @@ out: | |||
1103 | postfix_clear(ps); | 1102 | postfix_clear(ps); |
1104 | kfree(ps); | 1103 | kfree(ps); |
1105 | out_unlock: | 1104 | out_unlock: |
1106 | mutex_unlock(&filter_mutex); | 1105 | mutex_unlock(&event_mutex); |
1107 | 1106 | ||
1108 | return err; | 1107 | return err; |
1109 | } | 1108 | } |
@@ -1115,12 +1114,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system, | |||
1115 | 1114 | ||
1116 | struct filter_parse_state *ps; | 1115 | struct filter_parse_state *ps; |
1117 | 1116 | ||
1118 | mutex_lock(&filter_mutex); | 1117 | mutex_lock(&event_mutex); |
1119 | 1118 | ||
1120 | if (!strcmp(strstrip(filter_string), "0")) { | 1119 | if (!strcmp(strstrip(filter_string), "0")) { |
1121 | filter_free_subsystem_preds(system); | 1120 | filter_free_subsystem_preds(system); |
1122 | remove_filter_string(system->filter); | 1121 | remove_filter_string(system->filter); |
1123 | mutex_unlock(&filter_mutex); | 1122 | mutex_unlock(&event_mutex); |
1124 | return 0; | 1123 | return 0; |
1125 | } | 1124 | } |
1126 | 1125 | ||
@@ -1148,7 +1147,7 @@ out: | |||
1148 | postfix_clear(ps); | 1147 | postfix_clear(ps); |
1149 | kfree(ps); | 1148 | kfree(ps); |
1150 | out_unlock: | 1149 | out_unlock: |
1151 | mutex_unlock(&filter_mutex); | 1150 | mutex_unlock(&event_mutex); |
1152 | 1151 | ||
1153 | return err; | 1152 | return err; |
1154 | } | 1153 | } |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c9a0b7df44ff..90f134764837 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -193,9 +193,11 @@ static void tracing_start_function_trace(void) | |||
193 | static void tracing_stop_function_trace(void) | 193 | static void tracing_stop_function_trace(void) |
194 | { | 194 | { |
195 | ftrace_function_enabled = 0; | 195 | ftrace_function_enabled = 0; |
196 | /* OK if they are not registered */ | 196 | |
197 | unregister_ftrace_function(&trace_stack_ops); | 197 | if (func_flags.val & TRACE_FUNC_OPT_STACK) |
198 | unregister_ftrace_function(&trace_ops); | 198 | unregister_ftrace_function(&trace_stack_ops); |
199 | else | ||
200 | unregister_ftrace_function(&trace_ops); | ||
199 | } | 201 | } |
200 | 202 | ||
201 | static int func_set_flag(u32 old_flags, u32 bit, int set) | 203 | static int func_set_flag(u32 old_flags, u32 bit, int set) |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 8b592418d8b2..d2249abafb53 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = { | |||
57 | 57 | ||
58 | /* Add a function return address to the trace stack on thread info.*/ | 58 | /* Add a function return address to the trace stack on thread info.*/ |
59 | int | 59 | int |
60 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) | 60 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, |
61 | unsigned long frame_pointer) | ||
61 | { | 62 | { |
62 | unsigned long long calltime; | 63 | unsigned long long calltime; |
63 | int index; | 64 | int index; |
@@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) | |||
85 | current->ret_stack[index].func = func; | 86 | current->ret_stack[index].func = func; |
86 | current->ret_stack[index].calltime = calltime; | 87 | current->ret_stack[index].calltime = calltime; |
87 | current->ret_stack[index].subtime = 0; | 88 | current->ret_stack[index].subtime = 0; |
89 | current->ret_stack[index].fp = frame_pointer; | ||
88 | *depth = index; | 90 | *depth = index; |
89 | 91 | ||
90 | return 0; | 92 | return 0; |
@@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) | |||
92 | 94 | ||
93 | /* Retrieve a function return address to the trace stack on thread info.*/ | 95 | /* Retrieve a function return address to the trace stack on thread info.*/ |
94 | static void | 96 | static void |
95 | ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | 97 | ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, |
98 | unsigned long frame_pointer) | ||
96 | { | 99 | { |
97 | int index; | 100 | int index; |
98 | 101 | ||
@@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | |||
106 | return; | 109 | return; |
107 | } | 110 | } |
108 | 111 | ||
112 | #ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST | ||
113 | /* | ||
114 | * The arch may choose to record the frame pointer used | ||
115 | * and check it here to make sure that it is what we expect it | ||
116 | * to be. If gcc does not set the place holder of the return | ||
117 | * address in the frame pointer, and does a copy instead, then | ||
118 | * the function graph trace will fail. This test detects this | ||
119 | * case. | ||
120 | * | ||
121 | * Currently, x86_32 with optimize for size (-Os) makes the latest | ||
122 | * gcc do the above. | ||
123 | */ | ||
124 | if (unlikely(current->ret_stack[index].fp != frame_pointer)) { | ||
125 | ftrace_graph_stop(); | ||
126 | WARN(1, "Bad frame pointer: expected %lx, received %lx\n" | ||
127 | " from func %pF return to %lx\n", | ||
128 | current->ret_stack[index].fp, | ||
129 | frame_pointer, | ||
130 | (void *)current->ret_stack[index].func, | ||
131 | current->ret_stack[index].ret); | ||
132 | *ret = (unsigned long)panic; | ||
133 | return; | ||
134 | } | ||
135 | #endif | ||
136 | |||
109 | *ret = current->ret_stack[index].ret; | 137 | *ret = current->ret_stack[index].ret; |
110 | trace->func = current->ret_stack[index].func; | 138 | trace->func = current->ret_stack[index].func; |
111 | trace->calltime = current->ret_stack[index].calltime; | 139 | trace->calltime = current->ret_stack[index].calltime; |
@@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | |||
117 | * Send the trace to the ring-buffer. | 145 | * Send the trace to the ring-buffer. |
118 | * @return the original return address. | 146 | * @return the original return address. |
119 | */ | 147 | */ |
120 | unsigned long ftrace_return_to_handler(void) | 148 | unsigned long ftrace_return_to_handler(unsigned long frame_pointer) |
121 | { | 149 | { |
122 | struct ftrace_graph_ret trace; | 150 | struct ftrace_graph_ret trace; |
123 | unsigned long ret; | 151 | unsigned long ret; |
124 | 152 | ||
125 | ftrace_pop_return_trace(&trace, &ret); | 153 | ftrace_pop_return_trace(&trace, &ret, frame_pointer); |
126 | trace.rettime = trace_clock_local(); | 154 | trace.rettime = trace_clock_local(); |
127 | ftrace_graph_return(&trace); | 155 | ftrace_graph_return(&trace); |
128 | barrier(); | 156 | barrier(); |
diff --git a/kernel/utsname.c b/kernel/utsname.c index 815237a55af8..8a82b4b8ea52 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -15,6 +15,16 @@ | |||
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | 17 | ||
18 | static struct uts_namespace *create_uts_ns(void) | ||
19 | { | ||
20 | struct uts_namespace *uts_ns; | ||
21 | |||
22 | uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); | ||
23 | if (uts_ns) | ||
24 | kref_init(&uts_ns->kref); | ||
25 | return uts_ns; | ||
26 | } | ||
27 | |||
18 | /* | 28 | /* |
19 | * Clone a new ns copying an original utsname, setting refcount to 1 | 29 | * Clone a new ns copying an original utsname, setting refcount to 1 |
20 | * @old_ns: namespace to clone | 30 | * @old_ns: namespace to clone |
@@ -24,14 +34,13 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
24 | { | 34 | { |
25 | struct uts_namespace *ns; | 35 | struct uts_namespace *ns; |
26 | 36 | ||
27 | ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); | 37 | ns = create_uts_ns(); |
28 | if (!ns) | 38 | if (!ns) |
29 | return ERR_PTR(-ENOMEM); | 39 | return ERR_PTR(-ENOMEM); |
30 | 40 | ||
31 | down_read(&uts_sem); | 41 | down_read(&uts_sem); |
32 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | 42 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); |
33 | up_read(&uts_sem); | 43 | up_read(&uts_sem); |
34 | kref_init(&ns->kref); | ||
35 | return ns; | 44 | return ns; |
36 | } | 45 | } |
37 | 46 | ||