aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cgroup.c14
-rw-r--r--kernel/exit.c293
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/gcov/Kconfig48
-rw-r--r--kernel/gcov/Makefile3
-rw-r--r--kernel/gcov/base.c148
-rw-r--r--kernel/gcov/fs.c673
-rw-r--r--kernel/gcov/gcc_3_4.c447
-rw-r--r--kernel/gcov/gcov.h128
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/kthread.c80
-rw-r--r--kernel/module.c16
-rw-r--r--kernel/nsproxy.c19
-rw-r--r--kernel/perf_counter.c312
-rw-r--r--kernel/pid.c17
-rw-r--r--kernel/pid_namespace.c24
-rw-r--r--kernel/ptrace.c161
-rw-r--r--kernel/res_counter.c12
-rw-r--r--kernel/sched.c19
-rw-r--r--kernel/sched_cpupri.c2
-rw-r--r--kernel/sched_debug.c6
-rw-r--r--kernel/sched_fair.c3
-rw-r--r--kernel/signal.c12
-rw-r--r--kernel/softirq.c1
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/time/tick-sched.c12
-rw-r--r--kernel/trace/Kconfig8
-rw-r--r--kernel/trace/ftrace.c7
-rw-r--r--kernel/trace/kmemtrace.c2
-rw-r--r--kernel/trace/ring_buffer.c311
-rw-r--r--kernel/trace/ring_buffer_benchmark.c45
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--kernel/trace/trace_events_filter.c37
-rw-r--r--kernel/trace/trace_functions.c8
-rw-r--r--kernel/trace/trace_functions_graph.c36
-rw-r--r--kernel/utsname.c13
37 files changed, 2255 insertions, 678 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 9df4501cb921..0a32cb21ec97 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
71obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o 71obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
72obj-$(CONFIG_AUDIT) += audit.o auditfilter.o 72obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
73obj-$(CONFIG_AUDITSYSCALL) += auditsc.o 73obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
74obj-$(CONFIG_GCOV_KERNEL) += gcov/
74obj-$(CONFIG_AUDIT_TREE) += audit_tree.o 75obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
75obj-$(CONFIG_KPROBES) += kprobes.o 76obj-$(CONFIG_KPROBES) += kprobes.o
76obj-$(CONFIG_KGDB) += kgdb.o 77obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3fb789f6df94..3737a682cdf5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -843,6 +843,11 @@ static int parse_cgroupfs_options(char *data,
843 struct cgroup_sb_opts *opts) 843 struct cgroup_sb_opts *opts)
844{ 844{
845 char *token, *o = data ?: "all"; 845 char *token, *o = data ?: "all";
846 unsigned long mask = (unsigned long)-1;
847
848#ifdef CONFIG_CPUSETS
849 mask = ~(1UL << cpuset_subsys_id);
850#endif
846 851
847 opts->subsys_bits = 0; 852 opts->subsys_bits = 0;
848 opts->flags = 0; 853 opts->flags = 0;
@@ -887,6 +892,15 @@ static int parse_cgroupfs_options(char *data,
887 } 892 }
888 } 893 }
889 894
895 /*
896 * Option noprefix was introduced just for backward compatibility
897 * with the old cpuset, so we allow noprefix only if mounting just
898 * the cpuset subsystem.
899 */
900 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
901 (opts->subsys_bits & mask))
902 return -EINVAL;
903
890 /* We can't have an empty hierarchy */ 904 /* We can't have an empty hierarchy */
891 if (!opts->subsys_bits) 905 if (!opts->subsys_bits)
892 return -EINVAL; 906 return -EINVAL;
diff --git a/kernel/exit.c b/kernel/exit.c
index b6c90b5ef509..628d41f0dd54 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -375,9 +375,8 @@ static void set_special_pids(struct pid *pid)
375} 375}
376 376
377/* 377/*
378 * Let kernel threads use this to say that they 378 * Let kernel threads use this to say that they allow a certain signal.
379 * allow a certain signal (since daemonize() will 379 * Must not be used if kthread was cloned with CLONE_SIGHAND.
380 * have disabled all of them by default).
381 */ 380 */
382int allow_signal(int sig) 381int allow_signal(int sig)
383{ 382{
@@ -385,14 +384,14 @@ int allow_signal(int sig)
385 return -EINVAL; 384 return -EINVAL;
386 385
387 spin_lock_irq(&current->sighand->siglock); 386 spin_lock_irq(&current->sighand->siglock);
387 /* This is only needed for daemonize()'ed kthreads */
388 sigdelset(&current->blocked, sig); 388 sigdelset(&current->blocked, sig);
389 if (!current->mm) { 389 /*
390 /* Kernel threads handle their own signals. 390 * Kernel threads handle their own signals. Let the signal code
391 Let the signal code know it'll be handled, so 391 * know it'll be handled, so that they don't get converted to
392 that they don't get converted to SIGKILL or 392 * SIGKILL or just silently dropped.
393 just silently dropped */ 393 */
394 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; 394 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
395 }
396 recalc_sigpending(); 395 recalc_sigpending();
397 spin_unlock_irq(&current->sighand->siglock); 396 spin_unlock_irq(&current->sighand->siglock);
398 return 0; 397 return 0;
@@ -591,7 +590,7 @@ retry:
591 /* 590 /*
592 * Search in the siblings 591 * Search in the siblings
593 */ 592 */
594 list_for_each_entry(c, &p->parent->children, sibling) { 593 list_for_each_entry(c, &p->real_parent->children, sibling) {
595 if (c->mm == mm) 594 if (c->mm == mm)
596 goto assign_new_owner; 595 goto assign_new_owner;
597 } 596 }
@@ -758,7 +757,7 @@ static void reparent_thread(struct task_struct *father, struct task_struct *p,
758 p->exit_signal = SIGCHLD; 757 p->exit_signal = SIGCHLD;
759 758
760 /* If it has exited notify the new parent about this child's death. */ 759 /* If it has exited notify the new parent about this child's death. */
761 if (!p->ptrace && 760 if (!task_ptrace(p) &&
762 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { 761 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
763 do_notify_parent(p, p->exit_signal); 762 do_notify_parent(p, p->exit_signal);
764 if (task_detached(p)) { 763 if (task_detached(p)) {
@@ -783,7 +782,7 @@ static void forget_original_parent(struct task_struct *father)
783 list_for_each_entry_safe(p, n, &father->children, sibling) { 782 list_for_each_entry_safe(p, n, &father->children, sibling) {
784 p->real_parent = reaper; 783 p->real_parent = reaper;
785 if (p->parent == father) { 784 if (p->parent == father) {
786 BUG_ON(p->ptrace); 785 BUG_ON(task_ptrace(p));
787 p->parent = p->real_parent; 786 p->parent = p->real_parent;
788 } 787 }
789 reparent_thread(father, p, &dead_children); 788 reparent_thread(father, p, &dead_children);
@@ -1081,6 +1080,18 @@ SYSCALL_DEFINE1(exit_group, int, error_code)
1081 return 0; 1080 return 0;
1082} 1081}
1083 1082
1083struct wait_opts {
1084 enum pid_type wo_type;
1085 int wo_flags;
1086 struct pid *wo_pid;
1087
1088 struct siginfo __user *wo_info;
1089 int __user *wo_stat;
1090 struct rusage __user *wo_rusage;
1091
1092 int notask_error;
1093};
1094
1084static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) 1095static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1085{ 1096{
1086 struct pid *pid = NULL; 1097 struct pid *pid = NULL;
@@ -1091,13 +1102,12 @@ static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1091 return pid; 1102 return pid;
1092} 1103}
1093 1104
1094static int eligible_child(enum pid_type type, struct pid *pid, int options, 1105static int eligible_child(struct wait_opts *wo, struct task_struct *p)
1095 struct task_struct *p)
1096{ 1106{
1097 int err; 1107 int err;
1098 1108
1099 if (type < PIDTYPE_MAX) { 1109 if (wo->wo_type < PIDTYPE_MAX) {
1100 if (task_pid_type(p, type) != pid) 1110 if (task_pid_type(p, wo->wo_type) != wo->wo_pid)
1101 return 0; 1111 return 0;
1102 } 1112 }
1103 1113
@@ -1106,8 +1116,8 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1106 * set; otherwise, wait for non-clone children *only*. (Note: 1116 * set; otherwise, wait for non-clone children *only*. (Note:
1107 * A "clone" child here is one that reports to its parent 1117 * A "clone" child here is one that reports to its parent
1108 * using a signal other than SIGCHLD.) */ 1118 * using a signal other than SIGCHLD.) */
1109 if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) 1119 if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
1110 && !(options & __WALL)) 1120 && !(wo->wo_flags & __WALL))
1111 return 0; 1121 return 0;
1112 1122
1113 err = security_task_wait(p); 1123 err = security_task_wait(p);
@@ -1117,14 +1127,15 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1117 return 1; 1127 return 1;
1118} 1128}
1119 1129
1120static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, 1130static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
1121 int why, int status, 1131 pid_t pid, uid_t uid, int why, int status)
1122 struct siginfo __user *infop,
1123 struct rusage __user *rusagep)
1124{ 1132{
1125 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; 1133 struct siginfo __user *infop;
1134 int retval = wo->wo_rusage
1135 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1126 1136
1127 put_task_struct(p); 1137 put_task_struct(p);
1138 infop = wo->wo_info;
1128 if (!retval) 1139 if (!retval)
1129 retval = put_user(SIGCHLD, &infop->si_signo); 1140 retval = put_user(SIGCHLD, &infop->si_signo);
1130 if (!retval) 1141 if (!retval)
@@ -1148,19 +1159,18 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1148 * the lock and this task is uninteresting. If we return nonzero, we have 1159 * the lock and this task is uninteresting. If we return nonzero, we have
1149 * released the lock and the system call should return. 1160 * released the lock and the system call should return.
1150 */ 1161 */
1151static int wait_task_zombie(struct task_struct *p, int options, 1162static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1152 struct siginfo __user *infop,
1153 int __user *stat_addr, struct rusage __user *ru)
1154{ 1163{
1155 unsigned long state; 1164 unsigned long state;
1156 int retval, status, traced; 1165 int retval, status, traced;
1157 pid_t pid = task_pid_vnr(p); 1166 pid_t pid = task_pid_vnr(p);
1158 uid_t uid = __task_cred(p)->uid; 1167 uid_t uid = __task_cred(p)->uid;
1168 struct siginfo __user *infop;
1159 1169
1160 if (!likely(options & WEXITED)) 1170 if (!likely(wo->wo_flags & WEXITED))
1161 return 0; 1171 return 0;
1162 1172
1163 if (unlikely(options & WNOWAIT)) { 1173 if (unlikely(wo->wo_flags & WNOWAIT)) {
1164 int exit_code = p->exit_code; 1174 int exit_code = p->exit_code;
1165 int why, status; 1175 int why, status;
1166 1176
@@ -1173,8 +1183,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1173 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; 1183 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
1174 status = exit_code & 0x7f; 1184 status = exit_code & 0x7f;
1175 } 1185 }
1176 return wait_noreap_copyout(p, pid, uid, why, 1186 return wait_noreap_copyout(wo, p, pid, uid, why, status);
1177 status, infop, ru);
1178 } 1187 }
1179 1188
1180 /* 1189 /*
@@ -1188,11 +1197,13 @@ static int wait_task_zombie(struct task_struct *p, int options,
1188 } 1197 }
1189 1198
1190 traced = ptrace_reparented(p); 1199 traced = ptrace_reparented(p);
1191 1200 /*
1192 if (likely(!traced)) { 1201 * It can be ptraced but not reparented, check
1202 * !task_detached() to filter out sub-threads.
1203 */
1204 if (likely(!traced) && likely(!task_detached(p))) {
1193 struct signal_struct *psig; 1205 struct signal_struct *psig;
1194 struct signal_struct *sig; 1206 struct signal_struct *sig;
1195 struct task_cputime cputime;
1196 1207
1197 /* 1208 /*
1198 * The resource counters for the group leader are in its 1209 * The resource counters for the group leader are in its
@@ -1205,26 +1216,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
1205 * p->signal fields, because they are only touched by 1216 * p->signal fields, because they are only touched by
1206 * __exit_signal, which runs with tasklist_lock 1217 * __exit_signal, which runs with tasklist_lock
1207 * write-locked anyway, and so is excluded here. We do 1218 * write-locked anyway, and so is excluded here. We do
1208 * need to protect the access to p->parent->signal fields, 1219 * need to protect the access to parent->signal fields,
1209 * as other threads in the parent group can be right 1220 * as other threads in the parent group can be right
1210 * here reaping other children at the same time. 1221 * here reaping other children at the same time.
1211 *
1212 * We use thread_group_cputime() to get times for the thread
1213 * group, which consolidates times for all threads in the
1214 * group including the group leader.
1215 */ 1222 */
1216 thread_group_cputime(p, &cputime); 1223 spin_lock_irq(&p->real_parent->sighand->siglock);
1217 spin_lock_irq(&p->parent->sighand->siglock); 1224 psig = p->real_parent->signal;
1218 psig = p->parent->signal;
1219 sig = p->signal; 1225 sig = p->signal;
1220 psig->cutime = 1226 psig->cutime =
1221 cputime_add(psig->cutime, 1227 cputime_add(psig->cutime,
1222 cputime_add(cputime.utime, 1228 cputime_add(p->utime,
1223 sig->cutime)); 1229 cputime_add(sig->utime,
1230 sig->cutime)));
1224 psig->cstime = 1231 psig->cstime =
1225 cputime_add(psig->cstime, 1232 cputime_add(psig->cstime,
1226 cputime_add(cputime.stime, 1233 cputime_add(p->stime,
1227 sig->cstime)); 1234 cputime_add(sig->stime,
1235 sig->cstime)));
1228 psig->cgtime = 1236 psig->cgtime =
1229 cputime_add(psig->cgtime, 1237 cputime_add(psig->cgtime,
1230 cputime_add(p->gtime, 1238 cputime_add(p->gtime,
@@ -1246,7 +1254,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1246 sig->oublock + sig->coublock; 1254 sig->oublock + sig->coublock;
1247 task_io_accounting_add(&psig->ioac, &p->ioac); 1255 task_io_accounting_add(&psig->ioac, &p->ioac);
1248 task_io_accounting_add(&psig->ioac, &sig->ioac); 1256 task_io_accounting_add(&psig->ioac, &sig->ioac);
1249 spin_unlock_irq(&p->parent->sighand->siglock); 1257 spin_unlock_irq(&p->real_parent->sighand->siglock);
1250 } 1258 }
1251 1259
1252 /* 1260 /*
@@ -1255,11 +1263,14 @@ static int wait_task_zombie(struct task_struct *p, int options,
1255 */ 1263 */
1256 read_unlock(&tasklist_lock); 1264 read_unlock(&tasklist_lock);
1257 1265
1258 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1266 retval = wo->wo_rusage
1267 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1259 status = (p->signal->flags & SIGNAL_GROUP_EXIT) 1268 status = (p->signal->flags & SIGNAL_GROUP_EXIT)
1260 ? p->signal->group_exit_code : p->exit_code; 1269 ? p->signal->group_exit_code : p->exit_code;
1261 if (!retval && stat_addr) 1270 if (!retval && wo->wo_stat)
1262 retval = put_user(status, stat_addr); 1271 retval = put_user(status, wo->wo_stat);
1272
1273 infop = wo->wo_info;
1263 if (!retval && infop) 1274 if (!retval && infop)
1264 retval = put_user(SIGCHLD, &infop->si_signo); 1275 retval = put_user(SIGCHLD, &infop->si_signo);
1265 if (!retval && infop) 1276 if (!retval && infop)
@@ -1327,15 +1338,18 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
1327 * the lock and this task is uninteresting. If we return nonzero, we have 1338 * the lock and this task is uninteresting. If we return nonzero, we have
1328 * released the lock and the system call should return. 1339 * released the lock and the system call should return.
1329 */ 1340 */
1330static int wait_task_stopped(int ptrace, struct task_struct *p, 1341static int wait_task_stopped(struct wait_opts *wo,
1331 int options, struct siginfo __user *infop, 1342 int ptrace, struct task_struct *p)
1332 int __user *stat_addr, struct rusage __user *ru)
1333{ 1343{
1344 struct siginfo __user *infop;
1334 int retval, exit_code, *p_code, why; 1345 int retval, exit_code, *p_code, why;
1335 uid_t uid = 0; /* unneeded, required by compiler */ 1346 uid_t uid = 0; /* unneeded, required by compiler */
1336 pid_t pid; 1347 pid_t pid;
1337 1348
1338 if (!(options & WUNTRACED)) 1349 /*
1350 * Traditionally we see ptrace'd stopped tasks regardless of options.
1351 */
1352 if (!ptrace && !(wo->wo_flags & WUNTRACED))
1339 return 0; 1353 return 0;
1340 1354
1341 exit_code = 0; 1355 exit_code = 0;
@@ -1349,7 +1363,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p,
1349 if (!exit_code) 1363 if (!exit_code)
1350 goto unlock_sig; 1364 goto unlock_sig;
1351 1365
1352 if (!unlikely(options & WNOWAIT)) 1366 if (!unlikely(wo->wo_flags & WNOWAIT))
1353 *p_code = 0; 1367 *p_code = 0;
1354 1368
1355 /* don't need the RCU readlock here as we're holding a spinlock */ 1369 /* don't need the RCU readlock here as we're holding a spinlock */
@@ -1371,14 +1385,15 @@ unlock_sig:
1371 why = ptrace ? CLD_TRAPPED : CLD_STOPPED; 1385 why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
1372 read_unlock(&tasklist_lock); 1386 read_unlock(&tasklist_lock);
1373 1387
1374 if (unlikely(options & WNOWAIT)) 1388 if (unlikely(wo->wo_flags & WNOWAIT))
1375 return wait_noreap_copyout(p, pid, uid, 1389 return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
1376 why, exit_code,
1377 infop, ru);
1378 1390
1379 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1391 retval = wo->wo_rusage
1380 if (!retval && stat_addr) 1392 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1381 retval = put_user((exit_code << 8) | 0x7f, stat_addr); 1393 if (!retval && wo->wo_stat)
1394 retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
1395
1396 infop = wo->wo_info;
1382 if (!retval && infop) 1397 if (!retval && infop)
1383 retval = put_user(SIGCHLD, &infop->si_signo); 1398 retval = put_user(SIGCHLD, &infop->si_signo);
1384 if (!retval && infop) 1399 if (!retval && infop)
@@ -1405,15 +1420,13 @@ unlock_sig:
1405 * the lock and this task is uninteresting. If we return nonzero, we have 1420 * the lock and this task is uninteresting. If we return nonzero, we have
1406 * released the lock and the system call should return. 1421 * released the lock and the system call should return.
1407 */ 1422 */
1408static int wait_task_continued(struct task_struct *p, int options, 1423static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1409 struct siginfo __user *infop,
1410 int __user *stat_addr, struct rusage __user *ru)
1411{ 1424{
1412 int retval; 1425 int retval;
1413 pid_t pid; 1426 pid_t pid;
1414 uid_t uid; 1427 uid_t uid;
1415 1428
1416 if (!unlikely(options & WCONTINUED)) 1429 if (!unlikely(wo->wo_flags & WCONTINUED))
1417 return 0; 1430 return 0;
1418 1431
1419 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 1432 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
@@ -1425,7 +1438,7 @@ static int wait_task_continued(struct task_struct *p, int options,
1425 spin_unlock_irq(&p->sighand->siglock); 1438 spin_unlock_irq(&p->sighand->siglock);
1426 return 0; 1439 return 0;
1427 } 1440 }
1428 if (!unlikely(options & WNOWAIT)) 1441 if (!unlikely(wo->wo_flags & WNOWAIT))
1429 p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 1442 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1430 uid = __task_cred(p)->uid; 1443 uid = __task_cred(p)->uid;
1431 spin_unlock_irq(&p->sighand->siglock); 1444 spin_unlock_irq(&p->sighand->siglock);
@@ -1434,17 +1447,17 @@ static int wait_task_continued(struct task_struct *p, int options,
1434 get_task_struct(p); 1447 get_task_struct(p);
1435 read_unlock(&tasklist_lock); 1448 read_unlock(&tasklist_lock);
1436 1449
1437 if (!infop) { 1450 if (!wo->wo_info) {
1438 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1451 retval = wo->wo_rusage
1452 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1439 put_task_struct(p); 1453 put_task_struct(p);
1440 if (!retval && stat_addr) 1454 if (!retval && wo->wo_stat)
1441 retval = put_user(0xffff, stat_addr); 1455 retval = put_user(0xffff, wo->wo_stat);
1442 if (!retval) 1456 if (!retval)
1443 retval = pid; 1457 retval = pid;
1444 } else { 1458 } else {
1445 retval = wait_noreap_copyout(p, pid, uid, 1459 retval = wait_noreap_copyout(wo, p, pid, uid,
1446 CLD_CONTINUED, SIGCONT, 1460 CLD_CONTINUED, SIGCONT);
1447 infop, ru);
1448 BUG_ON(retval == 0); 1461 BUG_ON(retval == 0);
1449 } 1462 }
1450 1463
@@ -1454,19 +1467,16 @@ static int wait_task_continued(struct task_struct *p, int options,
1454/* 1467/*
1455 * Consider @p for a wait by @parent. 1468 * Consider @p for a wait by @parent.
1456 * 1469 *
1457 * -ECHILD should be in *@notask_error before the first call. 1470 * -ECHILD should be in ->notask_error before the first call.
1458 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1471 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1459 * Returns zero if the search for a child should continue; 1472 * Returns zero if the search for a child should continue;
1460 * then *@notask_error is 0 if @p is an eligible child, 1473 * then ->notask_error is 0 if @p is an eligible child,
1461 * or another error from security_task_wait(), or still -ECHILD. 1474 * or another error from security_task_wait(), or still -ECHILD.
1462 */ 1475 */
1463static int wait_consider_task(struct task_struct *parent, int ptrace, 1476static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent,
1464 struct task_struct *p, int *notask_error, 1477 int ptrace, struct task_struct *p)
1465 enum pid_type type, struct pid *pid, int options,
1466 struct siginfo __user *infop,
1467 int __user *stat_addr, struct rusage __user *ru)
1468{ 1478{
1469 int ret = eligible_child(type, pid, options, p); 1479 int ret = eligible_child(wo, p);
1470 if (!ret) 1480 if (!ret)
1471 return ret; 1481 return ret;
1472 1482
@@ -1478,17 +1488,17 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
1478 * to look for security policy problems, rather 1488 * to look for security policy problems, rather
1479 * than for mysterious wait bugs. 1489 * than for mysterious wait bugs.
1480 */ 1490 */
1481 if (*notask_error) 1491 if (wo->notask_error)
1482 *notask_error = ret; 1492 wo->notask_error = ret;
1483 return 0; 1493 return 0;
1484 } 1494 }
1485 1495
1486 if (likely(!ptrace) && unlikely(p->ptrace)) { 1496 if (likely(!ptrace) && unlikely(task_ptrace(p))) {
1487 /* 1497 /*
1488 * This child is hidden by ptrace. 1498 * This child is hidden by ptrace.
1489 * We aren't allowed to see it now, but eventually we will. 1499 * We aren't allowed to see it now, but eventually we will.
1490 */ 1500 */
1491 *notask_error = 0; 1501 wo->notask_error = 0;
1492 return 0; 1502 return 0;
1493 } 1503 }
1494 1504
@@ -1499,34 +1509,30 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
1499 * We don't reap group leaders with subthreads. 1509 * We don't reap group leaders with subthreads.
1500 */ 1510 */
1501 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) 1511 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
1502 return wait_task_zombie(p, options, infop, stat_addr, ru); 1512 return wait_task_zombie(wo, p);
1503 1513
1504 /* 1514 /*
1505 * It's stopped or running now, so it might 1515 * It's stopped or running now, so it might
1506 * later continue, exit, or stop again. 1516 * later continue, exit, or stop again.
1507 */ 1517 */
1508 *notask_error = 0; 1518 wo->notask_error = 0;
1509 1519
1510 if (task_stopped_code(p, ptrace)) 1520 if (task_stopped_code(p, ptrace))
1511 return wait_task_stopped(ptrace, p, options, 1521 return wait_task_stopped(wo, ptrace, p);
1512 infop, stat_addr, ru);
1513 1522
1514 return wait_task_continued(p, options, infop, stat_addr, ru); 1523 return wait_task_continued(wo, p);
1515} 1524}
1516 1525
1517/* 1526/*
1518 * Do the work of do_wait() for one thread in the group, @tsk. 1527 * Do the work of do_wait() for one thread in the group, @tsk.
1519 * 1528 *
1520 * -ECHILD should be in *@notask_error before the first call. 1529 * -ECHILD should be in ->notask_error before the first call.
1521 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1530 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1522 * Returns zero if the search for a child should continue; then 1531 * Returns zero if the search for a child should continue; then
1523 * *@notask_error is 0 if there were any eligible children, 1532 * ->notask_error is 0 if there were any eligible children,
1524 * or another error from security_task_wait(), or still -ECHILD. 1533 * or another error from security_task_wait(), or still -ECHILD.
1525 */ 1534 */
1526static int do_wait_thread(struct task_struct *tsk, int *notask_error, 1535static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1527 enum pid_type type, struct pid *pid, int options,
1528 struct siginfo __user *infop, int __user *stat_addr,
1529 struct rusage __user *ru)
1530{ 1536{
1531 struct task_struct *p; 1537 struct task_struct *p;
1532 1538
@@ -1535,9 +1541,7 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1535 * Do not consider detached threads. 1541 * Do not consider detached threads.
1536 */ 1542 */
1537 if (!task_detached(p)) { 1543 if (!task_detached(p)) {
1538 int ret = wait_consider_task(tsk, 0, p, notask_error, 1544 int ret = wait_consider_task(wo, tsk, 0, p);
1539 type, pid, options,
1540 infop, stat_addr, ru);
1541 if (ret) 1545 if (ret)
1542 return ret; 1546 return ret;
1543 } 1547 }
@@ -1546,22 +1550,12 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1546 return 0; 1550 return 0;
1547} 1551}
1548 1552
1549static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, 1553static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1550 enum pid_type type, struct pid *pid, int options,
1551 struct siginfo __user *infop, int __user *stat_addr,
1552 struct rusage __user *ru)
1553{ 1554{
1554 struct task_struct *p; 1555 struct task_struct *p;
1555 1556
1556 /*
1557 * Traditionally we see ptrace'd stopped tasks regardless of options.
1558 */
1559 options |= WUNTRACED;
1560
1561 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { 1557 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1562 int ret = wait_consider_task(tsk, 1, p, notask_error, 1558 int ret = wait_consider_task(wo, tsk, 1, p);
1563 type, pid, options,
1564 infop, stat_addr, ru);
1565 if (ret) 1559 if (ret)
1566 return ret; 1560 return ret;
1567 } 1561 }
@@ -1569,65 +1563,59 @@ static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
1569 return 0; 1563 return 0;
1570} 1564}
1571 1565
1572static long do_wait(enum pid_type type, struct pid *pid, int options, 1566static long do_wait(struct wait_opts *wo)
1573 struct siginfo __user *infop, int __user *stat_addr,
1574 struct rusage __user *ru)
1575{ 1567{
1576 DECLARE_WAITQUEUE(wait, current); 1568 DECLARE_WAITQUEUE(wait, current);
1577 struct task_struct *tsk; 1569 struct task_struct *tsk;
1578 int retval; 1570 int retval;
1579 1571
1580 trace_sched_process_wait(pid); 1572 trace_sched_process_wait(wo->wo_pid);
1581 1573
1582 add_wait_queue(&current->signal->wait_chldexit,&wait); 1574 add_wait_queue(&current->signal->wait_chldexit,&wait);
1583repeat: 1575repeat:
1584 /* 1576 /*
1585 * If there is nothing that can match our critiera just get out. 1577 * If there is nothing that can match our critiera just get out.
1586 * We will clear @retval to zero if we see any child that might later 1578 * We will clear ->notask_error to zero if we see any child that
1587 * match our criteria, even if we are not able to reap it yet. 1579 * might later match our criteria, even if we are not able to reap
1580 * it yet.
1588 */ 1581 */
1589 retval = -ECHILD; 1582 wo->notask_error = -ECHILD;
1590 if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) 1583 if ((wo->wo_type < PIDTYPE_MAX) &&
1591 goto end; 1584 (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
1585 goto notask;
1592 1586
1593 current->state = TASK_INTERRUPTIBLE; 1587 set_current_state(TASK_INTERRUPTIBLE);
1594 read_lock(&tasklist_lock); 1588 read_lock(&tasklist_lock);
1595 tsk = current; 1589 tsk = current;
1596 do { 1590 do {
1597 int tsk_result = do_wait_thread(tsk, &retval, 1591 retval = do_wait_thread(wo, tsk);
1598 type, pid, options, 1592 if (retval)
1599 infop, stat_addr, ru); 1593 goto end;
1600 if (!tsk_result) 1594
1601 tsk_result = ptrace_do_wait(tsk, &retval, 1595 retval = ptrace_do_wait(wo, tsk);
1602 type, pid, options, 1596 if (retval)
1603 infop, stat_addr, ru);
1604 if (tsk_result) {
1605 /*
1606 * tasklist_lock is unlocked and we have a final result.
1607 */
1608 retval = tsk_result;
1609 goto end; 1597 goto end;
1610 }
1611 1598
1612 if (options & __WNOTHREAD) 1599 if (wo->wo_flags & __WNOTHREAD)
1613 break; 1600 break;
1614 tsk = next_thread(tsk); 1601 } while_each_thread(current, tsk);
1615 BUG_ON(tsk->signal != current->signal);
1616 } while (tsk != current);
1617 read_unlock(&tasklist_lock); 1602 read_unlock(&tasklist_lock);
1618 1603
1619 if (!retval && !(options & WNOHANG)) { 1604notask:
1605 retval = wo->notask_error;
1606 if (!retval && !(wo->wo_flags & WNOHANG)) {
1620 retval = -ERESTARTSYS; 1607 retval = -ERESTARTSYS;
1621 if (!signal_pending(current)) { 1608 if (!signal_pending(current)) {
1622 schedule(); 1609 schedule();
1623 goto repeat; 1610 goto repeat;
1624 } 1611 }
1625 } 1612 }
1626
1627end: 1613end:
1628 current->state = TASK_RUNNING; 1614 __set_current_state(TASK_RUNNING);
1629 remove_wait_queue(&current->signal->wait_chldexit,&wait); 1615 remove_wait_queue(&current->signal->wait_chldexit,&wait);
1630 if (infop) { 1616 if (wo->wo_info) {
1617 struct siginfo __user *infop = wo->wo_info;
1618
1631 if (retval > 0) 1619 if (retval > 0)
1632 retval = 0; 1620 retval = 0;
1633 else { 1621 else {
@@ -1656,6 +1644,7 @@ end:
1656SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, 1644SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1657 infop, int, options, struct rusage __user *, ru) 1645 infop, int, options, struct rusage __user *, ru)
1658{ 1646{
1647 struct wait_opts wo;
1659 struct pid *pid = NULL; 1648 struct pid *pid = NULL;
1660 enum pid_type type; 1649 enum pid_type type;
1661 long ret; 1650 long ret;
@@ -1685,7 +1674,14 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1685 1674
1686 if (type < PIDTYPE_MAX) 1675 if (type < PIDTYPE_MAX)
1687 pid = find_get_pid(upid); 1676 pid = find_get_pid(upid);
1688 ret = do_wait(type, pid, options, infop, NULL, ru); 1677
1678 wo.wo_type = type;
1679 wo.wo_pid = pid;
1680 wo.wo_flags = options;
1681 wo.wo_info = infop;
1682 wo.wo_stat = NULL;
1683 wo.wo_rusage = ru;
1684 ret = do_wait(&wo);
1689 put_pid(pid); 1685 put_pid(pid);
1690 1686
1691 /* avoid REGPARM breakage on x86: */ 1687 /* avoid REGPARM breakage on x86: */
@@ -1696,6 +1692,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1696SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, 1692SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1697 int, options, struct rusage __user *, ru) 1693 int, options, struct rusage __user *, ru)
1698{ 1694{
1695 struct wait_opts wo;
1699 struct pid *pid = NULL; 1696 struct pid *pid = NULL;
1700 enum pid_type type; 1697 enum pid_type type;
1701 long ret; 1698 long ret;
@@ -1717,7 +1714,13 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1717 pid = find_get_pid(upid); 1714 pid = find_get_pid(upid);
1718 } 1715 }
1719 1716
1720 ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); 1717 wo.wo_type = type;
1718 wo.wo_pid = pid;
1719 wo.wo_flags = options | WEXITED;
1720 wo.wo_info = NULL;
1721 wo.wo_stat = stat_addr;
1722 wo.wo_rusage = ru;
1723 ret = do_wait(&wo);
1721 put_pid(pid); 1724 put_pid(pid);
1722 1725
1723 /* avoid REGPARM breakage on x86: */ 1726 /* avoid REGPARM breakage on x86: */
diff --git a/kernel/fork.c b/kernel/fork.c
index be022c200da6..467746b3f0aa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1029,7 +1029,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1029 p->vfork_done = NULL; 1029 p->vfork_done = NULL;
1030 spin_lock_init(&p->alloc_lock); 1030 spin_lock_init(&p->alloc_lock);
1031 1031
1032 clear_tsk_thread_flag(p, TIF_SIGPENDING);
1033 init_sigpending(&p->pending); 1032 init_sigpending(&p->pending);
1034 1033
1035 p->utime = cputime_zero; 1034 p->utime = cputime_zero;
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
new file mode 100644
index 000000000000..22e9dcfaa3d3
--- /dev/null
+++ b/kernel/gcov/Kconfig
@@ -0,0 +1,48 @@
1menu "GCOV-based kernel profiling"
2
3config GCOV_KERNEL
4 bool "Enable gcov-based kernel profiling"
5 depends on DEBUG_FS && CONSTRUCTORS
6 default n
7 ---help---
8 This option enables gcov-based code profiling (e.g. for code coverage
9 measurements).
10
11 If unsure, say N.
12
13 Additionally specify CONFIG_GCOV_PROFILE_ALL=y to get profiling data
14 for the entire kernel. To enable profiling for specific files or
15 directories, add a line similar to the following to the respective
16 Makefile:
17
18 For a single file (e.g. main.o):
19 GCOV_PROFILE_main.o := y
20
21 For all files in one directory:
22 GCOV_PROFILE := y
23
24 To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL
25 is specified, use:
26
27 GCOV_PROFILE_main.o := n
28 and:
29 GCOV_PROFILE := n
30
31 Note that the debugfs filesystem has to be mounted to access
32 profiling data.
33
34config GCOV_PROFILE_ALL
35 bool "Profile entire Kernel"
36 depends on GCOV_KERNEL
37 depends on S390 || X86
38 default n
39 ---help---
40 This options activates profiling for the entire kernel.
41
42 If unsure, say N.
43
44 Note that a kernel compiled with profiling flags will be significantly
45 larger and run slower. Also be sure to exclude files from profiling
46 which are not linked to the kernel image to prevent linker errors.
47
48endmenu
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
new file mode 100644
index 000000000000..3f761001d517
--- /dev/null
+++ b/kernel/gcov/Makefile
@@ -0,0 +1,3 @@
1EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
2
3obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
new file mode 100644
index 000000000000..9b22d03cc581
--- /dev/null
+++ b/kernel/gcov/base.c
@@ -0,0 +1,148 @@
1/*
2 * This code maintains a list of active profiling data structures.
3 *
4 * Copyright IBM Corp. 2009
5 * Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
6 *
7 * Uses gcc-internal data definitions.
8 * Based on the gcov-kernel patch by:
9 * Hubertus Franke <frankeh@us.ibm.com>
10 * Nigel Hinds <nhinds@us.ibm.com>
11 * Rajan Ravindran <rajancr@us.ibm.com>
12 * Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
13 * Paul Larson
14 */
15
16#define pr_fmt(fmt) "gcov: " fmt
17
18#include <linux/init.h>
19#include <linux/module.h>
20#include <linux/mutex.h>
21#include "gcov.h"
22
23static struct gcov_info *gcov_info_head;
24static int gcov_events_enabled;
25static DEFINE_MUTEX(gcov_lock);
26
27/*
28 * __gcov_init is called by gcc-generated constructor code for each object
29 * file compiled with -fprofile-arcs.
30 */
31void __gcov_init(struct gcov_info *info)
32{
33 static unsigned int gcov_version;
34
35 mutex_lock(&gcov_lock);
36 if (gcov_version == 0) {
37 gcov_version = info->version;
38 /*
39 * Printing gcc's version magic may prove useful for debugging
40 * incompatibility reports.
41 */
42 pr_info("version magic: 0x%x\n", gcov_version);
43 }
44 /*
45 * Add new profiling data structure to list and inform event
46 * listener.
47 */
48 info->next = gcov_info_head;
49 gcov_info_head = info;
50 if (gcov_events_enabled)
51 gcov_event(GCOV_ADD, info);
52 mutex_unlock(&gcov_lock);
53}
54EXPORT_SYMBOL(__gcov_init);
55
56/*
57 * These functions may be referenced by gcc-generated profiling code but serve
58 * no function for kernel profiling.
59 */
60void __gcov_flush(void)
61{
62 /* Unused. */
63}
64EXPORT_SYMBOL(__gcov_flush);
65
66void __gcov_merge_add(gcov_type *counters, unsigned int n_counters)
67{
68 /* Unused. */
69}
70EXPORT_SYMBOL(__gcov_merge_add);
71
72void __gcov_merge_single(gcov_type *counters, unsigned int n_counters)
73{
74 /* Unused. */
75}
76EXPORT_SYMBOL(__gcov_merge_single);
77
78void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters)
79{
80 /* Unused. */
81}
82EXPORT_SYMBOL(__gcov_merge_delta);
83
84/**
85 * gcov_enable_events - enable event reporting through gcov_event()
86 *
87 * Turn on reporting of profiling data load/unload-events through the
88 * gcov_event() callback. Also replay all previous events once. This function
89 * is needed because some events are potentially generated too early for the
90 * callback implementation to handle them initially.
91 */
92void gcov_enable_events(void)
93{
94 struct gcov_info *info;
95
96 mutex_lock(&gcov_lock);
97 gcov_events_enabled = 1;
98 /* Perform event callback for previously registered entries. */
99 for (info = gcov_info_head; info; info = info->next)
100 gcov_event(GCOV_ADD, info);
101 mutex_unlock(&gcov_lock);
102}
103
104#ifdef CONFIG_MODULES
105static inline int within(void *addr, void *start, unsigned long size)
106{
107 return ((addr >= start) && (addr < start + size));
108}
109
110/* Update list and generate events when modules are unloaded. */
111static int gcov_module_notifier(struct notifier_block *nb, unsigned long event,
112 void *data)
113{
114 struct module *mod = data;
115 struct gcov_info *info;
116 struct gcov_info *prev;
117
118 if (event != MODULE_STATE_GOING)
119 return NOTIFY_OK;
120 mutex_lock(&gcov_lock);
121 prev = NULL;
122 /* Remove entries located in module from linked list. */
123 for (info = gcov_info_head; info; info = info->next) {
124 if (within(info, mod->module_core, mod->core_size)) {
125 if (prev)
126 prev->next = info->next;
127 else
128 gcov_info_head = info->next;
129 if (gcov_events_enabled)
130 gcov_event(GCOV_REMOVE, info);
131 } else
132 prev = info;
133 }
134 mutex_unlock(&gcov_lock);
135
136 return NOTIFY_OK;
137}
138
139static struct notifier_block gcov_nb = {
140 .notifier_call = gcov_module_notifier,
141};
142
143static int __init gcov_init(void)
144{
145 return register_module_notifier(&gcov_nb);
146}
147device_initcall(gcov_init);
148#endif /* CONFIG_MODULES */
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
new file mode 100644
index 000000000000..ef3c3f88a7a3
--- /dev/null
+++ b/kernel/gcov/fs.c
@@ -0,0 +1,673 @@
1/*
2 * This code exports profiling data as debugfs files to userspace.
3 *
4 * Copyright IBM Corp. 2009
5 * Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
6 *
7 * Uses gcc-internal data definitions.
8 * Based on the gcov-kernel patch by:
9 * Hubertus Franke <frankeh@us.ibm.com>
10 * Nigel Hinds <nhinds@us.ibm.com>
11 * Rajan Ravindran <rajancr@us.ibm.com>
12 * Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
13 * Paul Larson
14 * Yi CDL Yang
15 */
16
17#define pr_fmt(fmt) "gcov: " fmt
18
19#include <linux/init.h>
20#include <linux/module.h>
21#include <linux/debugfs.h>
22#include <linux/fs.h>
23#include <linux/list.h>
24#include <linux/string.h>
25#include <linux/slab.h>
26#include <linux/mutex.h>
27#include <linux/seq_file.h>
28#include "gcov.h"
29
30/**
31 * struct gcov_node - represents a debugfs entry
32 * @list: list head for child node list
33 * @children: child nodes
34 * @all: list head for list of all nodes
35 * @parent: parent node
36 * @info: associated profiling data structure if not a directory
37 * @ghost: when an object file containing profiling data is unloaded we keep a
38 * copy of the profiling data here to allow collecting coverage data
39 * for cleanup code. Such a node is called a "ghost".
40 * @dentry: main debugfs entry, either a directory or data file
41 * @links: associated symbolic links
42 * @name: data file basename
43 *
44 * struct gcov_node represents an entity within the gcov/ subdirectory
45 * of debugfs. There are directory and data file nodes. The latter represent
46 * the actual synthesized data file plus any associated symbolic links which
47 * are needed by the gcov tool to work correctly.
48 */
49struct gcov_node {
50 struct list_head list;
51 struct list_head children;
52 struct list_head all;
53 struct gcov_node *parent;
54 struct gcov_info *info;
55 struct gcov_info *ghost;
56 struct dentry *dentry;
57 struct dentry **links;
58 char name[0];
59};
60
61static const char objtree[] = OBJTREE;
62static const char srctree[] = SRCTREE;
63static struct gcov_node root_node;
64static struct dentry *reset_dentry;
65static LIST_HEAD(all_head);
66static DEFINE_MUTEX(node_lock);
67
68/* If non-zero, keep copies of profiling data for unloaded modules. */
69static int gcov_persist = 1;
70
71static int __init gcov_persist_setup(char *str)
72{
73 unsigned long val;
74
75 if (strict_strtoul(str, 0, &val)) {
76 pr_warning("invalid gcov_persist parameter '%s'\n", str);
77 return 0;
78 }
79 gcov_persist = val;
80 pr_info("setting gcov_persist to %d\n", gcov_persist);
81
82 return 1;
83}
84__setup("gcov_persist=", gcov_persist_setup);
85
86/*
87 * seq_file.start() implementation for gcov data files. Note that the
88 * gcov_iterator interface is designed to be more restrictive than seq_file
89 * (no start from arbitrary position, etc.), to simplify the iterator
90 * implementation.
91 */
92static void *gcov_seq_start(struct seq_file *seq, loff_t *pos)
93{
94 loff_t i;
95
96 gcov_iter_start(seq->private);
97 for (i = 0; i < *pos; i++) {
98 if (gcov_iter_next(seq->private))
99 return NULL;
100 }
101 return seq->private;
102}
103
104/* seq_file.next() implementation for gcov data files. */
105static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos)
106{
107 struct gcov_iterator *iter = data;
108
109 if (gcov_iter_next(iter))
110 return NULL;
111 (*pos)++;
112
113 return iter;
114}
115
116/* seq_file.show() implementation for gcov data files. */
117static int gcov_seq_show(struct seq_file *seq, void *data)
118{
119 struct gcov_iterator *iter = data;
120
121 if (gcov_iter_write(iter, seq))
122 return -EINVAL;
123 return 0;
124}
125
126static void gcov_seq_stop(struct seq_file *seq, void *data)
127{
128 /* Unused. */
129}
130
131static const struct seq_operations gcov_seq_ops = {
132 .start = gcov_seq_start,
133 .next = gcov_seq_next,
134 .show = gcov_seq_show,
135 .stop = gcov_seq_stop,
136};
137
138/*
139 * Return the profiling data set for a given node. This can either be the
140 * original profiling data structure or a duplicate (also called "ghost")
141 * in case the associated object file has been unloaded.
142 */
143static struct gcov_info *get_node_info(struct gcov_node *node)
144{
145 if (node->info)
146 return node->info;
147
148 return node->ghost;
149}
150
151/*
152 * open() implementation for gcov data files. Create a copy of the profiling
153 * data set and initialize the iterator and seq_file interface.
154 */
155static int gcov_seq_open(struct inode *inode, struct file *file)
156{
157 struct gcov_node *node = inode->i_private;
158 struct gcov_iterator *iter;
159 struct seq_file *seq;
160 struct gcov_info *info;
161 int rc = -ENOMEM;
162
163 mutex_lock(&node_lock);
164 /*
165 * Read from a profiling data copy to minimize reference tracking
166 * complexity and concurrent access.
167 */
168 info = gcov_info_dup(get_node_info(node));
169 if (!info)
170 goto out_unlock;
171 iter = gcov_iter_new(info);
172 if (!iter)
173 goto err_free_info;
174 rc = seq_open(file, &gcov_seq_ops);
175 if (rc)
176 goto err_free_iter_info;
177 seq = file->private_data;
178 seq->private = iter;
179out_unlock:
180 mutex_unlock(&node_lock);
181 return rc;
182
183err_free_iter_info:
184 gcov_iter_free(iter);
185err_free_info:
186 gcov_info_free(info);
187 goto out_unlock;
188}
189
190/*
191 * release() implementation for gcov data files. Release resources allocated
192 * by open().
193 */
194static int gcov_seq_release(struct inode *inode, struct file *file)
195{
196 struct gcov_iterator *iter;
197 struct gcov_info *info;
198 struct seq_file *seq;
199
200 seq = file->private_data;
201 iter = seq->private;
202 info = gcov_iter_get_info(iter);
203 gcov_iter_free(iter);
204 gcov_info_free(info);
205 seq_release(inode, file);
206
207 return 0;
208}
209
210/*
211 * Find a node by the associated data file name. Needs to be called with
212 * node_lock held.
213 */
214static struct gcov_node *get_node_by_name(const char *name)
215{
216 struct gcov_node *node;
217 struct gcov_info *info;
218
219 list_for_each_entry(node, &all_head, all) {
220 info = get_node_info(node);
221 if (info && (strcmp(info->filename, name) == 0))
222 return node;
223 }
224
225 return NULL;
226}
227
228static void remove_node(struct gcov_node *node);
229
230/*
231 * write() implementation for gcov data files. Reset profiling data for the
232 * associated file. If the object file has been unloaded (i.e. this is
233 * a "ghost" node), remove the debug fs node as well.
234 */
235static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
236 size_t len, loff_t *pos)
237{
238 struct seq_file *seq;
239 struct gcov_info *info;
240 struct gcov_node *node;
241
242 seq = file->private_data;
243 info = gcov_iter_get_info(seq->private);
244 mutex_lock(&node_lock);
245 node = get_node_by_name(info->filename);
246 if (node) {
247 /* Reset counts or remove node for unloaded modules. */
248 if (node->ghost)
249 remove_node(node);
250 else
251 gcov_info_reset(node->info);
252 }
253 /* Reset counts for open file. */
254 gcov_info_reset(info);
255 mutex_unlock(&node_lock);
256
257 return len;
258}
259
260/*
261 * Given a string <path> representing a file path of format:
262 * path/to/file.gcda
263 * construct and return a new string:
264 * <dir/>path/to/file.<ext>
265 */
266static char *link_target(const char *dir, const char *path, const char *ext)
267{
268 char *target;
269 char *old_ext;
270 char *copy;
271
272 copy = kstrdup(path, GFP_KERNEL);
273 if (!copy)
274 return NULL;
275 old_ext = strrchr(copy, '.');
276 if (old_ext)
277 *old_ext = '\0';
278 if (dir)
279 target = kasprintf(GFP_KERNEL, "%s/%s.%s", dir, copy, ext);
280 else
281 target = kasprintf(GFP_KERNEL, "%s.%s", copy, ext);
282 kfree(copy);
283
284 return target;
285}
286
287/*
288 * Construct a string representing the symbolic link target for the given
289 * gcov data file name and link type. Depending on the link type and the
290 * location of the data file, the link target can either point to a
291 * subdirectory of srctree, objtree or in an external location.
292 */
293static char *get_link_target(const char *filename, const struct gcov_link *ext)
294{
295 const char *rel;
296 char *result;
297
298 if (strncmp(filename, objtree, strlen(objtree)) == 0) {
299 rel = filename + strlen(objtree) + 1;
300 if (ext->dir == SRC_TREE)
301 result = link_target(srctree, rel, ext->ext);
302 else
303 result = link_target(objtree, rel, ext->ext);
304 } else {
305 /* External compilation. */
306 result = link_target(NULL, filename, ext->ext);
307 }
308
309 return result;
310}
311
312#define SKEW_PREFIX ".tmp_"
313
314/*
315 * For a filename .tmp_filename.ext return filename.ext. Needed to compensate
316 * for filename skewing caused by the mod-versioning mechanism.
317 */
318static const char *deskew(const char *basename)
319{
320 if (strncmp(basename, SKEW_PREFIX, sizeof(SKEW_PREFIX) - 1) == 0)
321 return basename + sizeof(SKEW_PREFIX) - 1;
322 return basename;
323}
324
325/*
326 * Create links to additional files (usually .c and .gcno files) which the
327 * gcov tool expects to find in the same directory as the gcov data file.
328 */
329static void add_links(struct gcov_node *node, struct dentry *parent)
330{
331 char *basename;
332 char *target;
333 int num;
334 int i;
335
336 for (num = 0; gcov_link[num].ext; num++)
337 /* Nothing. */;
338 node->links = kcalloc(num, sizeof(struct dentry *), GFP_KERNEL);
339 if (!node->links)
340 return;
341 for (i = 0; i < num; i++) {
342 target = get_link_target(get_node_info(node)->filename,
343 &gcov_link[i]);
344 if (!target)
345 goto out_err;
346 basename = strrchr(target, '/');
347 if (!basename)
348 goto out_err;
349 basename++;
350 node->links[i] = debugfs_create_symlink(deskew(basename),
351 parent, target);
352 if (!node->links[i])
353 goto out_err;
354 kfree(target);
355 }
356
357 return;
358out_err:
359 kfree(target);
360 while (i-- > 0)
361 debugfs_remove(node->links[i]);
362 kfree(node->links);
363 node->links = NULL;
364}
365
366static const struct file_operations gcov_data_fops = {
367 .open = gcov_seq_open,
368 .release = gcov_seq_release,
369 .read = seq_read,
370 .llseek = seq_lseek,
371 .write = gcov_seq_write,
372};
373
374/* Basic initialization of a new node. */
375static void init_node(struct gcov_node *node, struct gcov_info *info,
376 const char *name, struct gcov_node *parent)
377{
378 INIT_LIST_HEAD(&node->list);
379 INIT_LIST_HEAD(&node->children);
380 INIT_LIST_HEAD(&node->all);
381 node->info = info;
382 node->parent = parent;
383 if (name)
384 strcpy(node->name, name);
385}
386
387/*
388 * Create a new node and associated debugfs entry. Needs to be called with
389 * node_lock held.
390 */
391static struct gcov_node *new_node(struct gcov_node *parent,
392 struct gcov_info *info, const char *name)
393{
394 struct gcov_node *node;
395
396 node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL);
397 if (!node) {
398 pr_warning("out of memory\n");
399 return NULL;
400 }
401 init_node(node, info, name, parent);
402 /* Differentiate between gcov data file nodes and directory nodes. */
403 if (info) {
404 node->dentry = debugfs_create_file(deskew(node->name), 0600,
405 parent->dentry, node, &gcov_data_fops);
406 } else
407 node->dentry = debugfs_create_dir(node->name, parent->dentry);
408 if (!node->dentry) {
409 pr_warning("could not create file\n");
410 kfree(node);
411 return NULL;
412 }
413 if (info)
414 add_links(node, parent->dentry);
415 list_add(&node->list, &parent->children);
416 list_add(&node->all, &all_head);
417
418 return node;
419}
420
421/* Remove symbolic links associated with node. */
422static void remove_links(struct gcov_node *node)
423{
424 int i;
425
426 if (!node->links)
427 return;
428 for (i = 0; gcov_link[i].ext; i++)
429 debugfs_remove(node->links[i]);
430 kfree(node->links);
431 node->links = NULL;
432}
433
434/*
435 * Remove node from all lists and debugfs and release associated resources.
436 * Needs to be called with node_lock held.
437 */
438static void release_node(struct gcov_node *node)
439{
440 list_del(&node->list);
441 list_del(&node->all);
442 debugfs_remove(node->dentry);
443 remove_links(node);
444 if (node->ghost)
445 gcov_info_free(node->ghost);
446 kfree(node);
447}
448
449/* Release node and empty parents. Needs to be called with node_lock held. */
450static void remove_node(struct gcov_node *node)
451{
452 struct gcov_node *parent;
453
454 while ((node != &root_node) && list_empty(&node->children)) {
455 parent = node->parent;
456 release_node(node);
457 node = parent;
458 }
459}
460
461/*
462 * Find child node with given basename. Needs to be called with node_lock
463 * held.
464 */
465static struct gcov_node *get_child_by_name(struct gcov_node *parent,
466 const char *name)
467{
468 struct gcov_node *node;
469
470 list_for_each_entry(node, &parent->children, list) {
471 if (strcmp(node->name, name) == 0)
472 return node;
473 }
474
475 return NULL;
476}
477
478/*
479 * write() implementation for reset file. Reset all profiling data to zero
480 * and remove ghost nodes.
481 */
482static ssize_t reset_write(struct file *file, const char __user *addr,
483 size_t len, loff_t *pos)
484{
485 struct gcov_node *node;
486
487 mutex_lock(&node_lock);
488restart:
489 list_for_each_entry(node, &all_head, all) {
490 if (node->info)
491 gcov_info_reset(node->info);
492 else if (list_empty(&node->children)) {
493 remove_node(node);
494 /* Several nodes may have gone - restart loop. */
495 goto restart;
496 }
497 }
498 mutex_unlock(&node_lock);
499
500 return len;
501}
502
503/* read() implementation for reset file. Unused. */
504static ssize_t reset_read(struct file *file, char __user *addr, size_t len,
505 loff_t *pos)
506{
507 /* Allow read operation so that a recursive copy won't fail. */
508 return 0;
509}
510
511static const struct file_operations gcov_reset_fops = {
512 .write = reset_write,
513 .read = reset_read,
514};
515
516/*
517 * Create a node for a given profiling data set and add it to all lists and
518 * debugfs. Needs to be called with node_lock held.
519 */
520static void add_node(struct gcov_info *info)
521{
522 char *filename;
523 char *curr;
524 char *next;
525 struct gcov_node *parent;
526 struct gcov_node *node;
527
528 filename = kstrdup(info->filename, GFP_KERNEL);
529 if (!filename)
530 return;
531 parent = &root_node;
532 /* Create directory nodes along the path. */
533 for (curr = filename; (next = strchr(curr, '/')); curr = next + 1) {
534 if (curr == next)
535 continue;
536 *next = 0;
537 if (strcmp(curr, ".") == 0)
538 continue;
539 if (strcmp(curr, "..") == 0) {
540 if (!parent->parent)
541 goto err_remove;
542 parent = parent->parent;
543 continue;
544 }
545 node = get_child_by_name(parent, curr);
546 if (!node) {
547 node = new_node(parent, NULL, curr);
548 if (!node)
549 goto err_remove;
550 }
551 parent = node;
552 }
553 /* Create file node. */
554 node = new_node(parent, info, curr);
555 if (!node)
556 goto err_remove;
557out:
558 kfree(filename);
559 return;
560
561err_remove:
562 remove_node(parent);
563 goto out;
564}
565
566/*
567 * The profiling data set associated with this node is being unloaded. Store a
568 * copy of the profiling data and turn this node into a "ghost".
569 */
570static int ghost_node(struct gcov_node *node)
571{
572 node->ghost = gcov_info_dup(node->info);
573 if (!node->ghost) {
574 pr_warning("could not save data for '%s' (out of memory)\n",
575 node->info->filename);
576 return -ENOMEM;
577 }
578 node->info = NULL;
579
580 return 0;
581}
582
583/*
584 * Profiling data for this node has been loaded again. Add profiling data
585 * from previous instantiation and turn this node into a regular node.
586 */
587static void revive_node(struct gcov_node *node, struct gcov_info *info)
588{
589 if (gcov_info_is_compatible(node->ghost, info))
590 gcov_info_add(info, node->ghost);
591 else {
592 pr_warning("discarding saved data for '%s' (version changed)\n",
593 info->filename);
594 }
595 gcov_info_free(node->ghost);
596 node->ghost = NULL;
597 node->info = info;
598}
599
600/*
601 * Callback to create/remove profiling files when code compiled with
602 * -fprofile-arcs is loaded/unloaded.
603 */
604void gcov_event(enum gcov_action action, struct gcov_info *info)
605{
606 struct gcov_node *node;
607
608 mutex_lock(&node_lock);
609 node = get_node_by_name(info->filename);
610 switch (action) {
611 case GCOV_ADD:
612 /* Add new node or revive ghost. */
613 if (!node) {
614 add_node(info);
615 break;
616 }
617 if (gcov_persist)
618 revive_node(node, info);
619 else {
620 pr_warning("could not add '%s' (already exists)\n",
621 info->filename);
622 }
623 break;
624 case GCOV_REMOVE:
625 /* Remove node or turn into ghost. */
626 if (!node) {
627 pr_warning("could not remove '%s' (not found)\n",
628 info->filename);
629 break;
630 }
631 if (gcov_persist) {
632 if (!ghost_node(node))
633 break;
634 }
635 remove_node(node);
636 break;
637 }
638 mutex_unlock(&node_lock);
639}
640
641/* Create debugfs entries. */
642static __init int gcov_fs_init(void)
643{
644 int rc = -EIO;
645
646 init_node(&root_node, NULL, NULL, NULL);
647 /*
648 * /sys/kernel/debug/gcov will be parent for the reset control file
649 * and all profiling files.
650 */
651 root_node.dentry = debugfs_create_dir("gcov", NULL);
652 if (!root_node.dentry)
653 goto err_remove;
654 /*
655 * Create reset file which resets all profiling counts when written
656 * to.
657 */
658 reset_dentry = debugfs_create_file("reset", 0600, root_node.dentry,
659 NULL, &gcov_reset_fops);
660 if (!reset_dentry)
661 goto err_remove;
662 /* Replay previous events to get our fs hierarchy up-to-date. */
663 gcov_enable_events();
664 return 0;
665
666err_remove:
667 pr_err("init failed\n");
668 if (root_node.dentry)
669 debugfs_remove(root_node.dentry);
670
671 return rc;
672}
673device_initcall(gcov_fs_init);
diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c
new file mode 100644
index 000000000000..ae5bb4260033
--- /dev/null
+++ b/kernel/gcov/gcc_3_4.c
@@ -0,0 +1,447 @@
1/*
2 * This code provides functions to handle gcc's profiling data format
3 * introduced with gcc 3.4. Future versions of gcc may change the gcov
4 * format (as happened before), so all format-specific information needs
5 * to be kept modular and easily exchangeable.
6 *
7 * This file is based on gcc-internal definitions. Functions and data
8 * structures are defined to be compatible with gcc counterparts.
9 * For a better understanding, refer to gcc source: gcc/gcov-io.h.
10 *
11 * Copyright IBM Corp. 2009
12 * Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
13 *
14 * Uses gcc-internal data definitions.
15 */
16
17#include <linux/errno.h>
18#include <linux/slab.h>
19#include <linux/string.h>
20#include <linux/seq_file.h>
21#include <linux/vmalloc.h>
22#include "gcov.h"
23
24/* Symbolic links to be created for each profiling data file. */
25const struct gcov_link gcov_link[] = {
26 { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */
27 { 0, NULL},
28};
29
30/*
31 * Determine whether a counter is active. Based on gcc magic. Doesn't change
32 * at run-time.
33 */
34static int counter_active(struct gcov_info *info, unsigned int type)
35{
36 return (1 << type) & info->ctr_mask;
37}
38
39/* Determine number of active counters. Based on gcc magic. */
40static unsigned int num_counter_active(struct gcov_info *info)
41{
42 unsigned int i;
43 unsigned int result = 0;
44
45 for (i = 0; i < GCOV_COUNTERS; i++) {
46 if (counter_active(info, i))
47 result++;
48 }
49 return result;
50}
51
52/**
53 * gcov_info_reset - reset profiling data to zero
54 * @info: profiling data set
55 */
56void gcov_info_reset(struct gcov_info *info)
57{
58 unsigned int active = num_counter_active(info);
59 unsigned int i;
60
61 for (i = 0; i < active; i++) {
62 memset(info->counts[i].values, 0,
63 info->counts[i].num * sizeof(gcov_type));
64 }
65}
66
67/**
68 * gcov_info_is_compatible - check if profiling data can be added
69 * @info1: first profiling data set
70 * @info2: second profiling data set
71 *
72 * Returns non-zero if profiling data can be added, zero otherwise.
73 */
74int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2)
75{
76 return (info1->stamp == info2->stamp);
77}
78
79/**
80 * gcov_info_add - add up profiling data
81 * @dest: profiling data set to which data is added
82 * @source: profiling data set which is added
83 *
84 * Adds profiling counts of @source to @dest.
85 */
86void gcov_info_add(struct gcov_info *dest, struct gcov_info *source)
87{
88 unsigned int i;
89 unsigned int j;
90
91 for (i = 0; i < num_counter_active(dest); i++) {
92 for (j = 0; j < dest->counts[i].num; j++) {
93 dest->counts[i].values[j] +=
94 source->counts[i].values[j];
95 }
96 }
97}
98
99/* Get size of function info entry. Based on gcc magic. */
100static size_t get_fn_size(struct gcov_info *info)
101{
102 size_t size;
103
104 size = sizeof(struct gcov_fn_info) + num_counter_active(info) *
105 sizeof(unsigned int);
106 if (__alignof__(struct gcov_fn_info) > sizeof(unsigned int))
107 size = ALIGN(size, __alignof__(struct gcov_fn_info));
108 return size;
109}
110
111/* Get address of function info entry. Based on gcc magic. */
112static struct gcov_fn_info *get_fn_info(struct gcov_info *info, unsigned int fn)
113{
114 return (struct gcov_fn_info *)
115 ((char *) info->functions + fn * get_fn_size(info));
116}
117
118/**
119 * gcov_info_dup - duplicate profiling data set
120 * @info: profiling data set to duplicate
121 *
122 * Return newly allocated duplicate on success, %NULL on error.
123 */
124struct gcov_info *gcov_info_dup(struct gcov_info *info)
125{
126 struct gcov_info *dup;
127 unsigned int i;
128 unsigned int active;
129
130 /* Duplicate gcov_info. */
131 active = num_counter_active(info);
132 dup = kzalloc(sizeof(struct gcov_info) +
133 sizeof(struct gcov_ctr_info) * active, GFP_KERNEL);
134 if (!dup)
135 return NULL;
136 dup->version = info->version;
137 dup->stamp = info->stamp;
138 dup->n_functions = info->n_functions;
139 dup->ctr_mask = info->ctr_mask;
140 /* Duplicate filename. */
141 dup->filename = kstrdup(info->filename, GFP_KERNEL);
142 if (!dup->filename)
143 goto err_free;
144 /* Duplicate table of functions. */
145 dup->functions = kmemdup(info->functions, info->n_functions *
146 get_fn_size(info), GFP_KERNEL);
147 if (!dup->functions)
148 goto err_free;
149 /* Duplicate counter arrays. */
150 for (i = 0; i < active ; i++) {
151 struct gcov_ctr_info *ctr = &info->counts[i];
152 size_t size = ctr->num * sizeof(gcov_type);
153
154 dup->counts[i].num = ctr->num;
155 dup->counts[i].merge = ctr->merge;
156 dup->counts[i].values = vmalloc(size);
157 if (!dup->counts[i].values)
158 goto err_free;
159 memcpy(dup->counts[i].values, ctr->values, size);
160 }
161 return dup;
162
163err_free:
164 gcov_info_free(dup);
165 return NULL;
166}
167
168/**
169 * gcov_info_free - release memory for profiling data set duplicate
170 * @info: profiling data set duplicate to free
171 */
172void gcov_info_free(struct gcov_info *info)
173{
174 unsigned int active = num_counter_active(info);
175 unsigned int i;
176
177 for (i = 0; i < active ; i++)
178 vfree(info->counts[i].values);
179 kfree(info->functions);
180 kfree(info->filename);
181 kfree(info);
182}
183
184/**
185 * struct type_info - iterator helper array
186 * @ctr_type: counter type
187 * @offset: index of the first value of the current function for this type
188 *
189 * This array is needed to convert the in-memory data format into the in-file
190 * data format:
191 *
192 * In-memory:
193 * for each counter type
194 * for each function
195 * values
196 *
197 * In-file:
198 * for each function
199 * for each counter type
200 * values
201 *
202 * See gcc source gcc/gcov-io.h for more information on data organization.
203 */
204struct type_info {
205 int ctr_type;
206 unsigned int offset;
207};
208
209/**
210 * struct gcov_iterator - specifies current file position in logical records
211 * @info: associated profiling data
212 * @record: record type
213 * @function: function number
214 * @type: counter type
215 * @count: index into values array
216 * @num_types: number of counter types
217 * @type_info: helper array to get values-array offset for current function
218 */
219struct gcov_iterator {
220 struct gcov_info *info;
221
222 int record;
223 unsigned int function;
224 unsigned int type;
225 unsigned int count;
226
227 int num_types;
228 struct type_info type_info[0];
229};
230
231static struct gcov_fn_info *get_func(struct gcov_iterator *iter)
232{
233 return get_fn_info(iter->info, iter->function);
234}
235
236static struct type_info *get_type(struct gcov_iterator *iter)
237{
238 return &iter->type_info[iter->type];
239}
240
241/**
242 * gcov_iter_new - allocate and initialize profiling data iterator
243 * @info: profiling data set to be iterated
244 *
245 * Return file iterator on success, %NULL otherwise.
246 */
247struct gcov_iterator *gcov_iter_new(struct gcov_info *info)
248{
249 struct gcov_iterator *iter;
250
251 iter = kzalloc(sizeof(struct gcov_iterator) +
252 num_counter_active(info) * sizeof(struct type_info),
253 GFP_KERNEL);
254 if (iter)
255 iter->info = info;
256
257 return iter;
258}
259
260/**
261 * gcov_iter_free - release memory for iterator
262 * @iter: file iterator to free
263 */
264void gcov_iter_free(struct gcov_iterator *iter)
265{
266 kfree(iter);
267}
268
269/**
270 * gcov_iter_get_info - return profiling data set for given file iterator
271 * @iter: file iterator
272 */
273struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter)
274{
275 return iter->info;
276}
277
278/**
279 * gcov_iter_start - reset file iterator to starting position
280 * @iter: file iterator
281 */
282void gcov_iter_start(struct gcov_iterator *iter)
283{
284 int i;
285
286 iter->record = 0;
287 iter->function = 0;
288 iter->type = 0;
289 iter->count = 0;
290 iter->num_types = 0;
291 for (i = 0; i < GCOV_COUNTERS; i++) {
292 if (counter_active(iter->info, i)) {
293 iter->type_info[iter->num_types].ctr_type = i;
294 iter->type_info[iter->num_types++].offset = 0;
295 }
296 }
297}
298
299/* Mapping of logical record number to actual file content. */
300#define RECORD_FILE_MAGIC 0
301#define RECORD_GCOV_VERSION 1
302#define RECORD_TIME_STAMP 2
303#define RECORD_FUNCTION_TAG 3
304#define RECORD_FUNCTON_TAG_LEN 4
305#define RECORD_FUNCTION_IDENT 5
306#define RECORD_FUNCTION_CHECK 6
307#define RECORD_COUNT_TAG 7
308#define RECORD_COUNT_LEN 8
309#define RECORD_COUNT 9
310
311/**
312 * gcov_iter_next - advance file iterator to next logical record
313 * @iter: file iterator
314 *
315 * Return zero if new position is valid, non-zero if iterator has reached end.
316 */
317int gcov_iter_next(struct gcov_iterator *iter)
318{
319 switch (iter->record) {
320 case RECORD_FILE_MAGIC:
321 case RECORD_GCOV_VERSION:
322 case RECORD_FUNCTION_TAG:
323 case RECORD_FUNCTON_TAG_LEN:
324 case RECORD_FUNCTION_IDENT:
325 case RECORD_COUNT_TAG:
326 /* Advance to next record */
327 iter->record++;
328 break;
329 case RECORD_COUNT:
330 /* Advance to next count */
331 iter->count++;
332 /* fall through */
333 case RECORD_COUNT_LEN:
334 if (iter->count < get_func(iter)->n_ctrs[iter->type]) {
335 iter->record = 9;
336 break;
337 }
338 /* Advance to next counter type */
339 get_type(iter)->offset += iter->count;
340 iter->count = 0;
341 iter->type++;
342 /* fall through */
343 case RECORD_FUNCTION_CHECK:
344 if (iter->type < iter->num_types) {
345 iter->record = 7;
346 break;
347 }
348 /* Advance to next function */
349 iter->type = 0;
350 iter->function++;
351 /* fall through */
352 case RECORD_TIME_STAMP:
353 if (iter->function < iter->info->n_functions)
354 iter->record = 3;
355 else
356 iter->record = -1;
357 break;
358 }
359 /* Check for EOF. */
360 if (iter->record == -1)
361 return -EINVAL;
362 else
363 return 0;
364}
365
366/**
367 * seq_write_gcov_u32 - write 32 bit number in gcov format to seq_file
368 * @seq: seq_file handle
369 * @v: value to be stored
370 *
371 * Number format defined by gcc: numbers are recorded in the 32 bit
372 * unsigned binary form of the endianness of the machine generating the
373 * file.
374 */
375static int seq_write_gcov_u32(struct seq_file *seq, u32 v)
376{
377 return seq_write(seq, &v, sizeof(v));
378}
379
380/**
381 * seq_write_gcov_u64 - write 64 bit number in gcov format to seq_file
382 * @seq: seq_file handle
383 * @v: value to be stored
384 *
385 * Number format defined by gcc: numbers are recorded in the 32 bit
386 * unsigned binary form of the endianness of the machine generating the
387 * file. 64 bit numbers are stored as two 32 bit numbers, the low part
388 * first.
389 */
390static int seq_write_gcov_u64(struct seq_file *seq, u64 v)
391{
392 u32 data[2];
393
394 data[0] = (v & 0xffffffffUL);
395 data[1] = (v >> 32);
396 return seq_write(seq, data, sizeof(data));
397}
398
399/**
400 * gcov_iter_write - write data for current pos to seq_file
401 * @iter: file iterator
402 * @seq: seq_file handle
403 *
404 * Return zero on success, non-zero otherwise.
405 */
406int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq)
407{
408 int rc = -EINVAL;
409
410 switch (iter->record) {
411 case RECORD_FILE_MAGIC:
412 rc = seq_write_gcov_u32(seq, GCOV_DATA_MAGIC);
413 break;
414 case RECORD_GCOV_VERSION:
415 rc = seq_write_gcov_u32(seq, iter->info->version);
416 break;
417 case RECORD_TIME_STAMP:
418 rc = seq_write_gcov_u32(seq, iter->info->stamp);
419 break;
420 case RECORD_FUNCTION_TAG:
421 rc = seq_write_gcov_u32(seq, GCOV_TAG_FUNCTION);
422 break;
423 case RECORD_FUNCTON_TAG_LEN:
424 rc = seq_write_gcov_u32(seq, 2);
425 break;
426 case RECORD_FUNCTION_IDENT:
427 rc = seq_write_gcov_u32(seq, get_func(iter)->ident);
428 break;
429 case RECORD_FUNCTION_CHECK:
430 rc = seq_write_gcov_u32(seq, get_func(iter)->checksum);
431 break;
432 case RECORD_COUNT_TAG:
433 rc = seq_write_gcov_u32(seq,
434 GCOV_TAG_FOR_COUNTER(get_type(iter)->ctr_type));
435 break;
436 case RECORD_COUNT_LEN:
437 rc = seq_write_gcov_u32(seq,
438 get_func(iter)->n_ctrs[iter->type] * 2);
439 break;
440 case RECORD_COUNT:
441 rc = seq_write_gcov_u64(seq,
442 iter->info->counts[iter->type].
443 values[iter->count + get_type(iter)->offset]);
444 break;
445 }
446 return rc;
447}
diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h
new file mode 100644
index 000000000000..060073ebf7a6
--- /dev/null
+++ b/kernel/gcov/gcov.h
@@ -0,0 +1,128 @@
1/*
2 * Profiling infrastructure declarations.
3 *
4 * This file is based on gcc-internal definitions. Data structures are
5 * defined to be compatible with gcc counterparts. For a better
6 * understanding, refer to gcc source: gcc/gcov-io.h.
7 *
8 * Copyright IBM Corp. 2009
9 * Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
10 *
11 * Uses gcc-internal data definitions.
12 */
13
14#ifndef GCOV_H
15#define GCOV_H GCOV_H
16
17#include <linux/types.h>
18
19/*
20 * Profiling data types used for gcc 3.4 and above - these are defined by
21 * gcc and need to be kept as close to the original definition as possible to
22 * remain compatible.
23 */
24#define GCOV_COUNTERS 5
25#define GCOV_DATA_MAGIC ((unsigned int) 0x67636461)
26#define GCOV_TAG_FUNCTION ((unsigned int) 0x01000000)
27#define GCOV_TAG_COUNTER_BASE ((unsigned int) 0x01a10000)
28#define GCOV_TAG_FOR_COUNTER(count) \
29 (GCOV_TAG_COUNTER_BASE + ((unsigned int) (count) << 17))
30
31#if BITS_PER_LONG >= 64
32typedef long gcov_type;
33#else
34typedef long long gcov_type;
35#endif
36
37/**
38 * struct gcov_fn_info - profiling meta data per function
39 * @ident: object file-unique function identifier
40 * @checksum: function checksum
41 * @n_ctrs: number of values per counter type belonging to this function
42 *
43 * This data is generated by gcc during compilation and doesn't change
44 * at run-time.
45 */
46struct gcov_fn_info {
47 unsigned int ident;
48 unsigned int checksum;
49 unsigned int n_ctrs[0];
50};
51
52/**
53 * struct gcov_ctr_info - profiling data per counter type
54 * @num: number of counter values for this type
55 * @values: array of counter values for this type
56 * @merge: merge function for counter values of this type (unused)
57 *
58 * This data is generated by gcc during compilation and doesn't change
59 * at run-time with the exception of the values array.
60 */
61struct gcov_ctr_info {
62 unsigned int num;
63 gcov_type *values;
64 void (*merge)(gcov_type *, unsigned int);
65};
66
67/**
68 * struct gcov_info - profiling data per object file
69 * @version: gcov version magic indicating the gcc version used for compilation
70 * @next: list head for a singly-linked list
71 * @stamp: time stamp
72 * @filename: name of the associated gcov data file
73 * @n_functions: number of instrumented functions
74 * @functions: function data
75 * @ctr_mask: mask specifying which counter types are active
76 * @counts: counter data per counter type
77 *
78 * This data is generated by gcc during compilation and doesn't change
79 * at run-time with the exception of the next pointer.
80 */
81struct gcov_info {
82 unsigned int version;
83 struct gcov_info *next;
84 unsigned int stamp;
85 const char *filename;
86 unsigned int n_functions;
87 const struct gcov_fn_info *functions;
88 unsigned int ctr_mask;
89 struct gcov_ctr_info counts[0];
90};
91
92/* Base interface. */
93enum gcov_action {
94 GCOV_ADD,
95 GCOV_REMOVE,
96};
97
98void gcov_event(enum gcov_action action, struct gcov_info *info);
99void gcov_enable_events(void);
100
101/* Iterator control. */
102struct seq_file;
103struct gcov_iterator;
104
105struct gcov_iterator *gcov_iter_new(struct gcov_info *info);
106void gcov_iter_free(struct gcov_iterator *iter);
107void gcov_iter_start(struct gcov_iterator *iter);
108int gcov_iter_next(struct gcov_iterator *iter);
109int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq);
110struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter);
111
112/* gcov_info control. */
113void gcov_info_reset(struct gcov_info *info);
114int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2);
115void gcov_info_add(struct gcov_info *dest, struct gcov_info *source);
116struct gcov_info *gcov_info_dup(struct gcov_info *info);
117void gcov_info_free(struct gcov_info *info);
118
119struct gcov_link {
120 enum {
121 OBJ_TREE,
122 SRC_TREE,
123 } dir;
124 const char *ext;
125};
126extern const struct gcov_link gcov_link[];
127
128#endif /* GCOV_H */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index aaf5c9d05770..50da67672901 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -856,7 +856,7 @@ EXPORT_SYMBOL(free_irq);
856 * still called in hard interrupt context and has to check 856 * still called in hard interrupt context and has to check
857 * whether the interrupt originates from the device. If yes it 857 * whether the interrupt originates from the device. If yes it
858 * needs to disable the interrupt on the device and return 858 * needs to disable the interrupt on the device and return
859 * IRQ_THREAD_WAKE which will wake up the handler thread and run 859 * IRQ_WAKE_THREAD which will wake up the handler thread and run
860 * @thread_fn. This split handler design is necessary to support 860 * @thread_fn. This split handler design is necessary to support
861 * shared interrupts. 861 * shared interrupts.
862 * 862 *
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 7fa441333529..9b1a7de26979 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -27,7 +27,6 @@ struct kthread_create_info
27 /* Information passed to kthread() from kthreadd. */ 27 /* Information passed to kthread() from kthreadd. */
28 int (*threadfn)(void *data); 28 int (*threadfn)(void *data);
29 void *data; 29 void *data;
30 struct completion started;
31 30
32 /* Result passed back to kthread_create() from kthreadd. */ 31 /* Result passed back to kthread_create() from kthreadd. */
33 struct task_struct *result; 32 struct task_struct *result;
@@ -36,17 +35,13 @@ struct kthread_create_info
36 struct list_head list; 35 struct list_head list;
37}; 36};
38 37
39struct kthread_stop_info 38struct kthread {
40{ 39 int should_stop;
41 struct task_struct *k; 40 struct completion exited;
42 int err;
43 struct completion done;
44}; 41};
45 42
46/* Thread stopping is done by setthing this var: lock serializes 43#define to_kthread(tsk) \
47 * multiple kthread_stop calls. */ 44 container_of((tsk)->vfork_done, struct kthread, exited)
48static DEFINE_MUTEX(kthread_stop_lock);
49static struct kthread_stop_info kthread_stop_info;
50 45
51/** 46/**
52 * kthread_should_stop - should this kthread return now? 47 * kthread_should_stop - should this kthread return now?
@@ -57,36 +52,35 @@ static struct kthread_stop_info kthread_stop_info;
57 */ 52 */
58int kthread_should_stop(void) 53int kthread_should_stop(void)
59{ 54{
60 return (kthread_stop_info.k == current); 55 return to_kthread(current)->should_stop;
61} 56}
62EXPORT_SYMBOL(kthread_should_stop); 57EXPORT_SYMBOL(kthread_should_stop);
63 58
64static int kthread(void *_create) 59static int kthread(void *_create)
65{ 60{
61 /* Copy data: it's on kthread's stack */
66 struct kthread_create_info *create = _create; 62 struct kthread_create_info *create = _create;
67 int (*threadfn)(void *data); 63 int (*threadfn)(void *data) = create->threadfn;
68 void *data; 64 void *data = create->data;
69 int ret = -EINTR; 65 struct kthread self;
66 int ret;
70 67
71 /* Copy data: it's on kthread's stack */ 68 self.should_stop = 0;
72 threadfn = create->threadfn; 69 init_completion(&self.exited);
73 data = create->data; 70 current->vfork_done = &self.exited;
74 71
75 /* OK, tell user we're spawned, wait for stop or wakeup */ 72 /* OK, tell user we're spawned, wait for stop or wakeup */
76 __set_current_state(TASK_UNINTERRUPTIBLE); 73 __set_current_state(TASK_UNINTERRUPTIBLE);
77 create->result = current; 74 create->result = current;
78 complete(&create->started); 75 complete(&create->done);
79 schedule(); 76 schedule();
80 77
81 if (!kthread_should_stop()) 78 ret = -EINTR;
79 if (!self.should_stop)
82 ret = threadfn(data); 80 ret = threadfn(data);
83 81
84 /* It might have exited on its own, w/o kthread_stop. Check. */ 82 /* we can't just return, we must preserve "self" on stack */
85 if (kthread_should_stop()) { 83 do_exit(ret);
86 kthread_stop_info.err = ret;
87 complete(&kthread_stop_info.done);
88 }
89 return 0;
90} 84}
91 85
92static void create_kthread(struct kthread_create_info *create) 86static void create_kthread(struct kthread_create_info *create)
@@ -95,11 +89,10 @@ static void create_kthread(struct kthread_create_info *create)
95 89
96 /* We want our own signal handler (we take no signals by default). */ 90 /* We want our own signal handler (we take no signals by default). */
97 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); 91 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
98 if (pid < 0) 92 if (pid < 0) {
99 create->result = ERR_PTR(pid); 93 create->result = ERR_PTR(pid);
100 else 94 complete(&create->done);
101 wait_for_completion(&create->started); 95 }
102 complete(&create->done);
103} 96}
104 97
105/** 98/**
@@ -130,7 +123,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
130 123
131 create.threadfn = threadfn; 124 create.threadfn = threadfn;
132 create.data = data; 125 create.data = data;
133 init_completion(&create.started);
134 init_completion(&create.done); 126 init_completion(&create.done);
135 127
136 spin_lock(&kthread_create_lock); 128 spin_lock(&kthread_create_lock);
@@ -198,30 +190,22 @@ EXPORT_SYMBOL(kthread_bind);
198 */ 190 */
199int kthread_stop(struct task_struct *k) 191int kthread_stop(struct task_struct *k)
200{ 192{
193 struct kthread *kthread;
201 int ret; 194 int ret;
202 195
203 mutex_lock(&kthread_stop_lock);
204
205 /* It could exit after stop_info.k set, but before wake_up_process. */
206 get_task_struct(k);
207
208 trace_sched_kthread_stop(k); 196 trace_sched_kthread_stop(k);
197 get_task_struct(k);
209 198
210 /* Must init completion *before* thread sees kthread_stop_info.k */ 199 kthread = to_kthread(k);
211 init_completion(&kthread_stop_info.done); 200 barrier(); /* it might have exited */
212 smp_wmb(); 201 if (k->vfork_done != NULL) {
202 kthread->should_stop = 1;
203 wake_up_process(k);
204 wait_for_completion(&kthread->exited);
205 }
206 ret = k->exit_code;
213 207
214 /* Now set kthread_should_stop() to true, and wake it up. */
215 kthread_stop_info.k = k;
216 wake_up_process(k);
217 put_task_struct(k); 208 put_task_struct(k);
218
219 /* Once it dies, reset stop ptr, gather result and we're done. */
220 wait_for_completion(&kthread_stop_info.done);
221 kthread_stop_info.k = NULL;
222 ret = kthread_stop_info.err;
223 mutex_unlock(&kthread_stop_lock);
224
225 trace_sched_kthread_stop_ret(ret); 209 trace_sched_kthread_stop_ret(ret);
226 210
227 return ret; 211 return ret;
diff --git a/kernel/module.c b/kernel/module.c
index 215aaab09e91..38928fcaff2b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2216,6 +2216,10 @@ static noinline struct module *load_module(void __user *umod,
2216 mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, 2216 mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
2217 "__kcrctab_unused_gpl"); 2217 "__kcrctab_unused_gpl");
2218#endif 2218#endif
2219#ifdef CONFIG_CONSTRUCTORS
2220 mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors",
2221 sizeof(*mod->ctors), &mod->num_ctors);
2222#endif
2219 2223
2220#ifdef CONFIG_MARKERS 2224#ifdef CONFIG_MARKERS
2221 mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", 2225 mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
@@ -2389,6 +2393,17 @@ static noinline struct module *load_module(void __user *umod,
2389 goto free_hdr; 2393 goto free_hdr;
2390} 2394}
2391 2395
2396/* Call module constructors. */
2397static void do_mod_ctors(struct module *mod)
2398{
2399#ifdef CONFIG_CONSTRUCTORS
2400 unsigned long i;
2401
2402 for (i = 0; i < mod->num_ctors; i++)
2403 mod->ctors[i]();
2404#endif
2405}
2406
2392/* This is where the real work happens */ 2407/* This is where the real work happens */
2393SYSCALL_DEFINE3(init_module, void __user *, umod, 2408SYSCALL_DEFINE3(init_module, void __user *, umod,
2394 unsigned long, len, const char __user *, uargs) 2409 unsigned long, len, const char __user *, uargs)
@@ -2417,6 +2432,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2417 blocking_notifier_call_chain(&module_notify_list, 2432 blocking_notifier_call_chain(&module_notify_list,
2418 MODULE_STATE_COMING, mod); 2433 MODULE_STATE_COMING, mod);
2419 2434
2435 do_mod_ctors(mod);
2420 /* Start the module */ 2436 /* Start the module */
2421 if (mod->init != NULL) 2437 if (mod->init != NULL)
2422 ret = do_one_initcall(mod->init); 2438 ret = do_one_initcall(mod->init);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 63598dca2d0c..09b4ff9711b2 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,19 +26,14 @@ static struct kmem_cache *nsproxy_cachep;
26 26
27struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); 27struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
28 28
29/* 29static inline struct nsproxy *create_nsproxy(void)
30 * creates a copy of "orig" with refcount 1.
31 */
32static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
33{ 30{
34 struct nsproxy *ns; 31 struct nsproxy *nsproxy;
35 32
36 ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); 33 nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
37 if (ns) { 34 if (nsproxy)
38 memcpy(ns, orig, sizeof(struct nsproxy)); 35 atomic_set(&nsproxy->count, 1);
39 atomic_set(&ns->count, 1); 36 return nsproxy;
40 }
41 return ns;
42} 37}
43 38
44/* 39/*
@@ -52,7 +47,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
52 struct nsproxy *new_nsp; 47 struct nsproxy *new_nsp;
53 int err; 48 int err;
54 49
55 new_nsp = clone_nsproxy(tsk->nsproxy); 50 new_nsp = create_nsproxy();
56 if (!new_nsp) 51 if (!new_nsp)
57 return ERR_PTR(-ENOMEM); 52 return ERR_PTR(-ENOMEM);
58 53
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 29b685f551aa..1a933a221ea4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -124,7 +124,7 @@ void perf_enable(void)
124 124
125static void get_ctx(struct perf_counter_context *ctx) 125static void get_ctx(struct perf_counter_context *ctx)
126{ 126{
127 atomic_inc(&ctx->refcount); 127 WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
128} 128}
129 129
130static void free_ctx(struct rcu_head *head) 130static void free_ctx(struct rcu_head *head)
@@ -175,6 +175,11 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
175 spin_unlock_irqrestore(&ctx->lock, *flags); 175 spin_unlock_irqrestore(&ctx->lock, *flags);
176 goto retry; 176 goto retry;
177 } 177 }
178
179 if (!atomic_inc_not_zero(&ctx->refcount)) {
180 spin_unlock_irqrestore(&ctx->lock, *flags);
181 ctx = NULL;
182 }
178 } 183 }
179 rcu_read_unlock(); 184 rcu_read_unlock();
180 return ctx; 185 return ctx;
@@ -193,7 +198,6 @@ static struct perf_counter_context *perf_pin_task_context(struct task_struct *ta
193 ctx = perf_lock_task_context(task, &flags); 198 ctx = perf_lock_task_context(task, &flags);
194 if (ctx) { 199 if (ctx) {
195 ++ctx->pin_count; 200 ++ctx->pin_count;
196 get_ctx(ctx);
197 spin_unlock_irqrestore(&ctx->lock, flags); 201 spin_unlock_irqrestore(&ctx->lock, flags);
198 } 202 }
199 return ctx; 203 return ctx;
@@ -1283,7 +1287,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
1283 if (!interrupts) { 1287 if (!interrupts) {
1284 perf_disable(); 1288 perf_disable();
1285 counter->pmu->disable(counter); 1289 counter->pmu->disable(counter);
1286 atomic_set(&hwc->period_left, 0); 1290 atomic64_set(&hwc->period_left, 0);
1287 counter->pmu->enable(counter); 1291 counter->pmu->enable(counter);
1288 perf_enable(); 1292 perf_enable();
1289 } 1293 }
@@ -1459,11 +1463,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
1459 put_ctx(parent_ctx); 1463 put_ctx(parent_ctx);
1460 ctx->parent_ctx = NULL; /* no longer a clone */ 1464 ctx->parent_ctx = NULL; /* no longer a clone */
1461 } 1465 }
1462 /*
1463 * Get an extra reference before dropping the lock so that
1464 * this context won't get freed if the task exits.
1465 */
1466 get_ctx(ctx);
1467 spin_unlock_irqrestore(&ctx->lock, flags); 1466 spin_unlock_irqrestore(&ctx->lock, flags);
1468 } 1467 }
1469 1468
@@ -1553,7 +1552,7 @@ static int perf_release(struct inode *inode, struct file *file)
1553static ssize_t 1552static ssize_t
1554perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) 1553perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1555{ 1554{
1556 u64 values[3]; 1555 u64 values[4];
1557 int n; 1556 int n;
1558 1557
1559 /* 1558 /*
@@ -1620,22 +1619,6 @@ static void perf_counter_reset(struct perf_counter *counter)
1620 perf_counter_update_userpage(counter); 1619 perf_counter_update_userpage(counter);
1621} 1620}
1622 1621
1623static void perf_counter_for_each_sibling(struct perf_counter *counter,
1624 void (*func)(struct perf_counter *))
1625{
1626 struct perf_counter_context *ctx = counter->ctx;
1627 struct perf_counter *sibling;
1628
1629 WARN_ON_ONCE(ctx->parent_ctx);
1630 mutex_lock(&ctx->mutex);
1631 counter = counter->group_leader;
1632
1633 func(counter);
1634 list_for_each_entry(sibling, &counter->sibling_list, list_entry)
1635 func(sibling);
1636 mutex_unlock(&ctx->mutex);
1637}
1638
1639/* 1622/*
1640 * Holding the top-level counter's child_mutex means that any 1623 * Holding the top-level counter's child_mutex means that any
1641 * descendant process that has inherited this counter will block 1624 * descendant process that has inherited this counter will block
@@ -1658,14 +1641,18 @@ static void perf_counter_for_each_child(struct perf_counter *counter,
1658static void perf_counter_for_each(struct perf_counter *counter, 1641static void perf_counter_for_each(struct perf_counter *counter,
1659 void (*func)(struct perf_counter *)) 1642 void (*func)(struct perf_counter *))
1660{ 1643{
1661 struct perf_counter *child; 1644 struct perf_counter_context *ctx = counter->ctx;
1645 struct perf_counter *sibling;
1662 1646
1663 WARN_ON_ONCE(counter->ctx->parent_ctx); 1647 WARN_ON_ONCE(ctx->parent_ctx);
1664 mutex_lock(&counter->child_mutex); 1648 mutex_lock(&ctx->mutex);
1665 perf_counter_for_each_sibling(counter, func); 1649 counter = counter->group_leader;
1666 list_for_each_entry(child, &counter->child_list, child_list) 1650
1667 perf_counter_for_each_sibling(child, func); 1651 perf_counter_for_each_child(counter, func);
1668 mutex_unlock(&counter->child_mutex); 1652 func(counter);
1653 list_for_each_entry(sibling, &counter->sibling_list, list_entry)
1654 perf_counter_for_each_child(counter, func);
1655 mutex_unlock(&ctx->mutex);
1669} 1656}
1670 1657
1671static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) 1658static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
@@ -1806,6 +1793,12 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1806 struct perf_mmap_data *data; 1793 struct perf_mmap_data *data;
1807 int ret = VM_FAULT_SIGBUS; 1794 int ret = VM_FAULT_SIGBUS;
1808 1795
1796 if (vmf->flags & FAULT_FLAG_MKWRITE) {
1797 if (vmf->pgoff == 0)
1798 ret = 0;
1799 return ret;
1800 }
1801
1809 rcu_read_lock(); 1802 rcu_read_lock();
1810 data = rcu_dereference(counter->data); 1803 data = rcu_dereference(counter->data);
1811 if (!data) 1804 if (!data)
@@ -1819,9 +1812,16 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1819 if ((unsigned)nr > data->nr_pages) 1812 if ((unsigned)nr > data->nr_pages)
1820 goto unlock; 1813 goto unlock;
1821 1814
1815 if (vmf->flags & FAULT_FLAG_WRITE)
1816 goto unlock;
1817
1822 vmf->page = virt_to_page(data->data_pages[nr]); 1818 vmf->page = virt_to_page(data->data_pages[nr]);
1823 } 1819 }
1820
1824 get_page(vmf->page); 1821 get_page(vmf->page);
1822 vmf->page->mapping = vma->vm_file->f_mapping;
1823 vmf->page->index = vmf->pgoff;
1824
1825 ret = 0; 1825 ret = 0;
1826unlock: 1826unlock:
1827 rcu_read_unlock(); 1827 rcu_read_unlock();
@@ -1874,6 +1874,14 @@ fail:
1874 return -ENOMEM; 1874 return -ENOMEM;
1875} 1875}
1876 1876
1877static void perf_mmap_free_page(unsigned long addr)
1878{
1879 struct page *page = virt_to_page(addr);
1880
1881 page->mapping = NULL;
1882 __free_page(page);
1883}
1884
1877static void __perf_mmap_data_free(struct rcu_head *rcu_head) 1885static void __perf_mmap_data_free(struct rcu_head *rcu_head)
1878{ 1886{
1879 struct perf_mmap_data *data; 1887 struct perf_mmap_data *data;
@@ -1881,9 +1889,10 @@ static void __perf_mmap_data_free(struct rcu_head *rcu_head)
1881 1889
1882 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 1890 data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
1883 1891
1884 free_page((unsigned long)data->user_page); 1892 perf_mmap_free_page((unsigned long)data->user_page);
1885 for (i = 0; i < data->nr_pages; i++) 1893 for (i = 0; i < data->nr_pages; i++)
1886 free_page((unsigned long)data->data_pages[i]); 1894 perf_mmap_free_page((unsigned long)data->data_pages[i]);
1895
1887 kfree(data); 1896 kfree(data);
1888} 1897}
1889 1898
@@ -1920,9 +1929,10 @@ static void perf_mmap_close(struct vm_area_struct *vma)
1920} 1929}
1921 1930
1922static struct vm_operations_struct perf_mmap_vmops = { 1931static struct vm_operations_struct perf_mmap_vmops = {
1923 .open = perf_mmap_open, 1932 .open = perf_mmap_open,
1924 .close = perf_mmap_close, 1933 .close = perf_mmap_close,
1925 .fault = perf_mmap_fault, 1934 .fault = perf_mmap_fault,
1935 .page_mkwrite = perf_mmap_fault,
1926}; 1936};
1927 1937
1928static int perf_mmap(struct file *file, struct vm_area_struct *vma) 1938static int perf_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1936,7 +1946,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
1936 long user_extra, extra; 1946 long user_extra, extra;
1937 int ret = 0; 1947 int ret = 0;
1938 1948
1939 if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) 1949 if (!(vma->vm_flags & VM_SHARED))
1940 return -EINVAL; 1950 return -EINVAL;
1941 1951
1942 vma_size = vma->vm_end - vma->vm_start; 1952 vma_size = vma->vm_end - vma->vm_start;
@@ -1995,10 +2005,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
1995 atomic_long_add(user_extra, &user->locked_vm); 2005 atomic_long_add(user_extra, &user->locked_vm);
1996 vma->vm_mm->locked_vm += extra; 2006 vma->vm_mm->locked_vm += extra;
1997 counter->data->nr_locked = extra; 2007 counter->data->nr_locked = extra;
2008 if (vma->vm_flags & VM_WRITE)
2009 counter->data->writable = 1;
2010
1998unlock: 2011unlock:
1999 mutex_unlock(&counter->mmap_mutex); 2012 mutex_unlock(&counter->mmap_mutex);
2000 2013
2001 vma->vm_flags &= ~VM_MAYWRITE;
2002 vma->vm_flags |= VM_RESERVED; 2014 vma->vm_flags |= VM_RESERVED;
2003 vma->vm_ops = &perf_mmap_vmops; 2015 vma->vm_ops = &perf_mmap_vmops;
2004 2016
@@ -2175,11 +2187,38 @@ struct perf_output_handle {
2175 unsigned long head; 2187 unsigned long head;
2176 unsigned long offset; 2188 unsigned long offset;
2177 int nmi; 2189 int nmi;
2178 int overflow; 2190 int sample;
2179 int locked; 2191 int locked;
2180 unsigned long flags; 2192 unsigned long flags;
2181}; 2193};
2182 2194
2195static bool perf_output_space(struct perf_mmap_data *data,
2196 unsigned int offset, unsigned int head)
2197{
2198 unsigned long tail;
2199 unsigned long mask;
2200
2201 if (!data->writable)
2202 return true;
2203
2204 mask = (data->nr_pages << PAGE_SHIFT) - 1;
2205 /*
2206 * Userspace could choose to issue a mb() before updating the tail
2207 * pointer. So that all reads will be completed before the write is
2208 * issued.
2209 */
2210 tail = ACCESS_ONCE(data->user_page->data_tail);
2211 smp_rmb();
2212
2213 offset = (offset - tail) & mask;
2214 head = (head - tail) & mask;
2215
2216 if ((int)(head - offset) < 0)
2217 return false;
2218
2219 return true;
2220}
2221
2183static void perf_output_wakeup(struct perf_output_handle *handle) 2222static void perf_output_wakeup(struct perf_output_handle *handle)
2184{ 2223{
2185 atomic_set(&handle->data->poll, POLL_IN); 2224 atomic_set(&handle->data->poll, POLL_IN);
@@ -2270,12 +2309,57 @@ out:
2270 local_irq_restore(handle->flags); 2309 local_irq_restore(handle->flags);
2271} 2310}
2272 2311
2312static void perf_output_copy(struct perf_output_handle *handle,
2313 const void *buf, unsigned int len)
2314{
2315 unsigned int pages_mask;
2316 unsigned int offset;
2317 unsigned int size;
2318 void **pages;
2319
2320 offset = handle->offset;
2321 pages_mask = handle->data->nr_pages - 1;
2322 pages = handle->data->data_pages;
2323
2324 do {
2325 unsigned int page_offset;
2326 int nr;
2327
2328 nr = (offset >> PAGE_SHIFT) & pages_mask;
2329 page_offset = offset & (PAGE_SIZE - 1);
2330 size = min_t(unsigned int, PAGE_SIZE - page_offset, len);
2331
2332 memcpy(pages[nr] + page_offset, buf, size);
2333
2334 len -= size;
2335 buf += size;
2336 offset += size;
2337 } while (len);
2338
2339 handle->offset = offset;
2340
2341 /*
2342 * Check we didn't copy past our reservation window, taking the
2343 * possible unsigned int wrap into account.
2344 */
2345 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
2346}
2347
2348#define perf_output_put(handle, x) \
2349 perf_output_copy((handle), &(x), sizeof(x))
2350
2273static int perf_output_begin(struct perf_output_handle *handle, 2351static int perf_output_begin(struct perf_output_handle *handle,
2274 struct perf_counter *counter, unsigned int size, 2352 struct perf_counter *counter, unsigned int size,
2275 int nmi, int overflow) 2353 int nmi, int sample)
2276{ 2354{
2277 struct perf_mmap_data *data; 2355 struct perf_mmap_data *data;
2278 unsigned int offset, head; 2356 unsigned int offset, head;
2357 int have_lost;
2358 struct {
2359 struct perf_event_header header;
2360 u64 id;
2361 u64 lost;
2362 } lost_event;
2279 2363
2280 /* 2364 /*
2281 * For inherited counters we send all the output towards the parent. 2365 * For inherited counters we send all the output towards the parent.
@@ -2288,19 +2372,25 @@ static int perf_output_begin(struct perf_output_handle *handle,
2288 if (!data) 2372 if (!data)
2289 goto out; 2373 goto out;
2290 2374
2291 handle->data = data; 2375 handle->data = data;
2292 handle->counter = counter; 2376 handle->counter = counter;
2293 handle->nmi = nmi; 2377 handle->nmi = nmi;
2294 handle->overflow = overflow; 2378 handle->sample = sample;
2295 2379
2296 if (!data->nr_pages) 2380 if (!data->nr_pages)
2297 goto fail; 2381 goto fail;
2298 2382
2383 have_lost = atomic_read(&data->lost);
2384 if (have_lost)
2385 size += sizeof(lost_event);
2386
2299 perf_output_lock(handle); 2387 perf_output_lock(handle);
2300 2388
2301 do { 2389 do {
2302 offset = head = atomic_long_read(&data->head); 2390 offset = head = atomic_long_read(&data->head);
2303 head += size; 2391 head += size;
2392 if (unlikely(!perf_output_space(data, offset, head)))
2393 goto fail;
2304 } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); 2394 } while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
2305 2395
2306 handle->offset = offset; 2396 handle->offset = offset;
@@ -2309,55 +2399,27 @@ static int perf_output_begin(struct perf_output_handle *handle,
2309 if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) 2399 if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
2310 atomic_set(&data->wakeup, 1); 2400 atomic_set(&data->wakeup, 1);
2311 2401
2402 if (have_lost) {
2403 lost_event.header.type = PERF_EVENT_LOST;
2404 lost_event.header.misc = 0;
2405 lost_event.header.size = sizeof(lost_event);
2406 lost_event.id = counter->id;
2407 lost_event.lost = atomic_xchg(&data->lost, 0);
2408
2409 perf_output_put(handle, lost_event);
2410 }
2411
2312 return 0; 2412 return 0;
2313 2413
2314fail: 2414fail:
2315 perf_output_wakeup(handle); 2415 atomic_inc(&data->lost);
2416 perf_output_unlock(handle);
2316out: 2417out:
2317 rcu_read_unlock(); 2418 rcu_read_unlock();
2318 2419
2319 return -ENOSPC; 2420 return -ENOSPC;
2320} 2421}
2321 2422
2322static void perf_output_copy(struct perf_output_handle *handle,
2323 const void *buf, unsigned int len)
2324{
2325 unsigned int pages_mask;
2326 unsigned int offset;
2327 unsigned int size;
2328 void **pages;
2329
2330 offset = handle->offset;
2331 pages_mask = handle->data->nr_pages - 1;
2332 pages = handle->data->data_pages;
2333
2334 do {
2335 unsigned int page_offset;
2336 int nr;
2337
2338 nr = (offset >> PAGE_SHIFT) & pages_mask;
2339 page_offset = offset & (PAGE_SIZE - 1);
2340 size = min_t(unsigned int, PAGE_SIZE - page_offset, len);
2341
2342 memcpy(pages[nr] + page_offset, buf, size);
2343
2344 len -= size;
2345 buf += size;
2346 offset += size;
2347 } while (len);
2348
2349 handle->offset = offset;
2350
2351 /*
2352 * Check we didn't copy past our reservation window, taking the
2353 * possible unsigned int wrap into account.
2354 */
2355 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
2356}
2357
2358#define perf_output_put(handle, x) \
2359 perf_output_copy((handle), &(x), sizeof(x))
2360
2361static void perf_output_end(struct perf_output_handle *handle) 2423static void perf_output_end(struct perf_output_handle *handle)
2362{ 2424{
2363 struct perf_counter *counter = handle->counter; 2425 struct perf_counter *counter = handle->counter;
@@ -2365,7 +2427,7 @@ static void perf_output_end(struct perf_output_handle *handle)
2365 2427
2366 int wakeup_events = counter->attr.wakeup_events; 2428 int wakeup_events = counter->attr.wakeup_events;
2367 2429
2368 if (handle->overflow && wakeup_events) { 2430 if (handle->sample && wakeup_events) {
2369 int events = atomic_inc_return(&data->events); 2431 int events = atomic_inc_return(&data->events);
2370 if (events >= wakeup_events) { 2432 if (events >= wakeup_events) {
2371 atomic_sub(wakeup_events, &data->events); 2433 atomic_sub(wakeup_events, &data->events);
@@ -2970,7 +3032,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
2970} 3032}
2971 3033
2972/* 3034/*
2973 * Generic counter overflow handling. 3035 * Generic counter overflow handling, sampling.
2974 */ 3036 */
2975 3037
2976int perf_counter_overflow(struct perf_counter *counter, int nmi, 3038int perf_counter_overflow(struct perf_counter *counter, int nmi,
@@ -3109,20 +3171,15 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3109} 3171}
3110 3172
3111static void perf_swcounter_overflow(struct perf_counter *counter, 3173static void perf_swcounter_overflow(struct perf_counter *counter,
3112 int nmi, struct pt_regs *regs, u64 addr) 3174 int nmi, struct perf_sample_data *data)
3113{ 3175{
3114 struct perf_sample_data data = { 3176 data->period = counter->hw.last_period;
3115 .regs = regs,
3116 .addr = addr,
3117 .period = counter->hw.last_period,
3118 };
3119 3177
3120 perf_swcounter_update(counter); 3178 perf_swcounter_update(counter);
3121 perf_swcounter_set_period(counter); 3179 perf_swcounter_set_period(counter);
3122 if (perf_counter_overflow(counter, nmi, &data)) 3180 if (perf_counter_overflow(counter, nmi, data))
3123 /* soft-disable the counter */ 3181 /* soft-disable the counter */
3124 ; 3182 ;
3125
3126} 3183}
3127 3184
3128static int perf_swcounter_is_counting(struct perf_counter *counter) 3185static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3187,18 +3244,18 @@ static int perf_swcounter_match(struct perf_counter *counter,
3187} 3244}
3188 3245
3189static void perf_swcounter_add(struct perf_counter *counter, u64 nr, 3246static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3190 int nmi, struct pt_regs *regs, u64 addr) 3247 int nmi, struct perf_sample_data *data)
3191{ 3248{
3192 int neg = atomic64_add_negative(nr, &counter->hw.count); 3249 int neg = atomic64_add_negative(nr, &counter->hw.count);
3193 3250
3194 if (counter->hw.sample_period && !neg && regs) 3251 if (counter->hw.sample_period && !neg && data->regs)
3195 perf_swcounter_overflow(counter, nmi, regs, addr); 3252 perf_swcounter_overflow(counter, nmi, data);
3196} 3253}
3197 3254
3198static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, 3255static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3199 enum perf_type_id type, u32 event, 3256 enum perf_type_id type,
3200 u64 nr, int nmi, struct pt_regs *regs, 3257 u32 event, u64 nr, int nmi,
3201 u64 addr) 3258 struct perf_sample_data *data)
3202{ 3259{
3203 struct perf_counter *counter; 3260 struct perf_counter *counter;
3204 3261
@@ -3207,8 +3264,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3207 3264
3208 rcu_read_lock(); 3265 rcu_read_lock();
3209 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { 3266 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
3210 if (perf_swcounter_match(counter, type, event, regs)) 3267 if (perf_swcounter_match(counter, type, event, data->regs))
3211 perf_swcounter_add(counter, nr, nmi, regs, addr); 3268 perf_swcounter_add(counter, nr, nmi, data);
3212 } 3269 }
3213 rcu_read_unlock(); 3270 rcu_read_unlock();
3214} 3271}
@@ -3227,9 +3284,9 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
3227 return &cpuctx->recursion[0]; 3284 return &cpuctx->recursion[0];
3228} 3285}
3229 3286
3230static void __perf_swcounter_event(enum perf_type_id type, u32 event, 3287static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3231 u64 nr, int nmi, struct pt_regs *regs, 3288 u64 nr, int nmi,
3232 u64 addr) 3289 struct perf_sample_data *data)
3233{ 3290{
3234 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3291 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3235 int *recursion = perf_swcounter_recursion_context(cpuctx); 3292 int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -3242,7 +3299,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event,
3242 barrier(); 3299 barrier();
3243 3300
3244 perf_swcounter_ctx_event(&cpuctx->ctx, type, event, 3301 perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
3245 nr, nmi, regs, addr); 3302 nr, nmi, data);
3246 rcu_read_lock(); 3303 rcu_read_lock();
3247 /* 3304 /*
3248 * doesn't really matter which of the child contexts the 3305 * doesn't really matter which of the child contexts the
@@ -3250,7 +3307,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event,
3250 */ 3307 */
3251 ctx = rcu_dereference(current->perf_counter_ctxp); 3308 ctx = rcu_dereference(current->perf_counter_ctxp);
3252 if (ctx) 3309 if (ctx)
3253 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, regs, addr); 3310 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data);
3254 rcu_read_unlock(); 3311 rcu_read_unlock();
3255 3312
3256 barrier(); 3313 barrier();
@@ -3263,7 +3320,12 @@ out:
3263void 3320void
3264perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) 3321perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
3265{ 3322{
3266 __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); 3323 struct perf_sample_data data = {
3324 .regs = regs,
3325 .addr = addr,
3326 };
3327
3328 do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data);
3267} 3329}
3268 3330
3269static void perf_swcounter_read(struct perf_counter *counter) 3331static void perf_swcounter_read(struct perf_counter *counter)
@@ -3404,36 +3466,18 @@ static const struct pmu perf_ops_task_clock = {
3404 .read = task_clock_perf_counter_read, 3466 .read = task_clock_perf_counter_read,
3405}; 3467};
3406 3468
3407/*
3408 * Software counter: cpu migrations
3409 */
3410void perf_counter_task_migration(struct task_struct *task, int cpu)
3411{
3412 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
3413 struct perf_counter_context *ctx;
3414
3415 perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE,
3416 PERF_COUNT_SW_CPU_MIGRATIONS,
3417 1, 1, NULL, 0);
3418
3419 ctx = perf_pin_task_context(task);
3420 if (ctx) {
3421 perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE,
3422 PERF_COUNT_SW_CPU_MIGRATIONS,
3423 1, 1, NULL, 0);
3424 perf_unpin_context(ctx);
3425 }
3426}
3427
3428#ifdef CONFIG_EVENT_PROFILE 3469#ifdef CONFIG_EVENT_PROFILE
3429void perf_tpcounter_event(int event_id) 3470void perf_tpcounter_event(int event_id)
3430{ 3471{
3431 struct pt_regs *regs = get_irq_regs(); 3472 struct perf_sample_data data = {
3473 .regs = get_irq_regs();
3474 .addr = 0,
3475 };
3432 3476
3433 if (!regs) 3477 if (!data.regs)
3434 regs = task_pt_regs(current); 3478 data.regs = task_pt_regs(current);
3435 3479
3436 __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); 3480 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data);
3437} 3481}
3438EXPORT_SYMBOL_GPL(perf_tpcounter_event); 3482EXPORT_SYMBOL_GPL(perf_tpcounter_event);
3439 3483
diff --git a/kernel/pid.c b/kernel/pid.c
index b2e5f78fd281..31310b5d3f50 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -378,26 +378,15 @@ EXPORT_SYMBOL(pid_task);
378/* 378/*
379 * Must be called under rcu_read_lock() or with tasklist_lock read-held. 379 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
380 */ 380 */
381struct task_struct *find_task_by_pid_type_ns(int type, int nr, 381struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
382 struct pid_namespace *ns)
383{ 382{
384 return pid_task(find_pid_ns(nr, ns), type); 383 return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
385} 384}
386 385
387EXPORT_SYMBOL(find_task_by_pid_type_ns);
388
389struct task_struct *find_task_by_vpid(pid_t vnr) 386struct task_struct *find_task_by_vpid(pid_t vnr)
390{ 387{
391 return find_task_by_pid_type_ns(PIDTYPE_PID, vnr, 388 return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
392 current->nsproxy->pid_ns);
393}
394EXPORT_SYMBOL(find_task_by_vpid);
395
396struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
397{
398 return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
399} 389}
400EXPORT_SYMBOL(find_task_by_pid_ns);
401 390
402struct pid *get_task_pid(struct task_struct *task, enum pid_type type) 391struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
403{ 392{
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 2d1001b4858d..821722ae58a7 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -67,9 +67,10 @@ err_alloc:
67 return NULL; 67 return NULL;
68} 68}
69 69
70static struct pid_namespace *create_pid_namespace(unsigned int level) 70static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
71{ 71{
72 struct pid_namespace *ns; 72 struct pid_namespace *ns;
73 unsigned int level = parent_pid_ns->level + 1;
73 int i; 74 int i;
74 75
75 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); 76 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
@@ -86,6 +87,7 @@ static struct pid_namespace *create_pid_namespace(unsigned int level)
86 87
87 kref_init(&ns->kref); 88 kref_init(&ns->kref);
88 ns->level = level; 89 ns->level = level;
90 ns->parent = get_pid_ns(parent_pid_ns);
89 91
90 set_bit(0, ns->pidmap[0].page); 92 set_bit(0, ns->pidmap[0].page);
91 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); 93 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -114,25 +116,11 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
114 116
115struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) 117struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
116{ 118{
117 struct pid_namespace *new_ns;
118
119 BUG_ON(!old_ns);
120 new_ns = get_pid_ns(old_ns);
121 if (!(flags & CLONE_NEWPID)) 119 if (!(flags & CLONE_NEWPID))
122 goto out; 120 return get_pid_ns(old_ns);
123
124 new_ns = ERR_PTR(-EINVAL);
125 if (flags & CLONE_THREAD) 121 if (flags & CLONE_THREAD)
126 goto out_put; 122 return ERR_PTR(-EINVAL);
127 123 return create_pid_namespace(old_ns);
128 new_ns = create_pid_namespace(old_ns->level + 1);
129 if (!IS_ERR(new_ns))
130 new_ns->parent = get_pid_ns(old_ns);
131
132out_put:
133 put_pid_ns(old_ns);
134out:
135 return new_ns;
136} 124}
137 125
138void free_pid_ns(struct kref *kref) 126void free_pid_ns(struct kref *kref)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index f6d8b8cb5e34..61c78b2c07ba 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -167,67 +167,82 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
167int ptrace_attach(struct task_struct *task) 167int ptrace_attach(struct task_struct *task)
168{ 168{
169 int retval; 169 int retval;
170 unsigned long flags;
171 170
172 audit_ptrace(task); 171 audit_ptrace(task);
173 172
174 retval = -EPERM; 173 retval = -EPERM;
174 if (unlikely(task->flags & PF_KTHREAD))
175 goto out;
175 if (same_thread_group(task, current)) 176 if (same_thread_group(task, current))
176 goto out; 177 goto out;
177 178
178 /* Protect the target's credential calculations against our 179 /*
180 * Protect exec's credential calculations against our interference;
179 * interference; SUID, SGID and LSM creds get determined differently 181 * interference; SUID, SGID and LSM creds get determined differently
180 * under ptrace. 182 * under ptrace.
181 */ 183 */
182 retval = mutex_lock_interruptible(&task->cred_guard_mutex); 184 retval = mutex_lock_interruptible(&task->cred_guard_mutex);
183 if (retval < 0) 185 if (retval < 0)
184 goto out; 186 goto out;
185 187
186 retval = -EPERM;
187repeat:
188 /*
189 * Nasty, nasty.
190 *
191 * We want to hold both the task-lock and the
192 * tasklist_lock for writing at the same time.
193 * But that's against the rules (tasklist_lock
194 * is taken for reading by interrupts on other
195 * cpu's that may have task_lock).
196 */
197 task_lock(task); 188 task_lock(task);
198 if (!write_trylock_irqsave(&tasklist_lock, flags)) {
199 task_unlock(task);
200 do {
201 cpu_relax();
202 } while (!write_can_lock(&tasklist_lock));
203 goto repeat;
204 }
205
206 if (!task->mm)
207 goto bad;
208 /* the same process cannot be attached many times */
209 if (task->ptrace & PT_PTRACED)
210 goto bad;
211 retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); 189 retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH);
190 task_unlock(task);
212 if (retval) 191 if (retval)
213 goto bad; 192 goto unlock_creds;
214 193
215 /* Go */ 194 write_lock_irq(&tasklist_lock);
216 task->ptrace |= PT_PTRACED; 195 retval = -EPERM;
196 if (unlikely(task->exit_state))
197 goto unlock_tasklist;
198 if (task->ptrace)
199 goto unlock_tasklist;
200
201 task->ptrace = PT_PTRACED;
217 if (capable(CAP_SYS_PTRACE)) 202 if (capable(CAP_SYS_PTRACE))
218 task->ptrace |= PT_PTRACE_CAP; 203 task->ptrace |= PT_PTRACE_CAP;
219 204
220 __ptrace_link(task, current); 205 __ptrace_link(task, current);
221
222 send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); 206 send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
223bad: 207
224 write_unlock_irqrestore(&tasklist_lock, flags); 208 retval = 0;
225 task_unlock(task); 209unlock_tasklist:
210 write_unlock_irq(&tasklist_lock);
211unlock_creds:
226 mutex_unlock(&task->cred_guard_mutex); 212 mutex_unlock(&task->cred_guard_mutex);
227out: 213out:
228 return retval; 214 return retval;
229} 215}
230 216
217/**
218 * ptrace_traceme -- helper for PTRACE_TRACEME
219 *
220 * Performs checks and sets PT_PTRACED.
221 * Should be used by all ptrace implementations for PTRACE_TRACEME.
222 */
223int ptrace_traceme(void)
224{
225 int ret = -EPERM;
226
227 write_lock_irq(&tasklist_lock);
228 /* Are we already being traced? */
229 if (!current->ptrace) {
230 ret = security_ptrace_traceme(current->parent);
231 /*
232 * Check PF_EXITING to ensure ->real_parent has not passed
233 * exit_ptrace(). Otherwise we don't report the error but
234 * pretend ->real_parent untraces us right after return.
235 */
236 if (!ret && !(current->real_parent->flags & PF_EXITING)) {
237 current->ptrace = PT_PTRACED;
238 __ptrace_link(current, current->real_parent);
239 }
240 }
241 write_unlock_irq(&tasklist_lock);
242
243 return ret;
244}
245
231/* 246/*
232 * Called with irqs disabled, returns true if childs should reap themselves. 247 * Called with irqs disabled, returns true if childs should reap themselves.
233 */ 248 */
@@ -409,37 +424,33 @@ static int ptrace_setoptions(struct task_struct *child, long data)
409 424
410static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) 425static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info)
411{ 426{
427 unsigned long flags;
412 int error = -ESRCH; 428 int error = -ESRCH;
413 429
414 read_lock(&tasklist_lock); 430 if (lock_task_sighand(child, &flags)) {
415 if (likely(child->sighand != NULL)) {
416 error = -EINVAL; 431 error = -EINVAL;
417 spin_lock_irq(&child->sighand->siglock);
418 if (likely(child->last_siginfo != NULL)) { 432 if (likely(child->last_siginfo != NULL)) {
419 *info = *child->last_siginfo; 433 *info = *child->last_siginfo;
420 error = 0; 434 error = 0;
421 } 435 }
422 spin_unlock_irq(&child->sighand->siglock); 436 unlock_task_sighand(child, &flags);
423 } 437 }
424 read_unlock(&tasklist_lock);
425 return error; 438 return error;
426} 439}
427 440
428static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) 441static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info)
429{ 442{
443 unsigned long flags;
430 int error = -ESRCH; 444 int error = -ESRCH;
431 445
432 read_lock(&tasklist_lock); 446 if (lock_task_sighand(child, &flags)) {
433 if (likely(child->sighand != NULL)) {
434 error = -EINVAL; 447 error = -EINVAL;
435 spin_lock_irq(&child->sighand->siglock);
436 if (likely(child->last_siginfo != NULL)) { 448 if (likely(child->last_siginfo != NULL)) {
437 *child->last_siginfo = *info; 449 *child->last_siginfo = *info;
438 error = 0; 450 error = 0;
439 } 451 }
440 spin_unlock_irq(&child->sighand->siglock); 452 unlock_task_sighand(child, &flags);
441 } 453 }
442 read_unlock(&tasklist_lock);
443 return error; 454 return error;
444} 455}
445 456
@@ -566,72 +577,16 @@ int ptrace_request(struct task_struct *child, long request,
566 return ret; 577 return ret;
567} 578}
568 579
569/** 580static struct task_struct *ptrace_get_task_struct(pid_t pid)
570 * ptrace_traceme -- helper for PTRACE_TRACEME
571 *
572 * Performs checks and sets PT_PTRACED.
573 * Should be used by all ptrace implementations for PTRACE_TRACEME.
574 */
575int ptrace_traceme(void)
576{
577 int ret = -EPERM;
578
579 /*
580 * Are we already being traced?
581 */
582repeat:
583 task_lock(current);
584 if (!(current->ptrace & PT_PTRACED)) {
585 /*
586 * See ptrace_attach() comments about the locking here.
587 */
588 unsigned long flags;
589 if (!write_trylock_irqsave(&tasklist_lock, flags)) {
590 task_unlock(current);
591 do {
592 cpu_relax();
593 } while (!write_can_lock(&tasklist_lock));
594 goto repeat;
595 }
596
597 ret = security_ptrace_traceme(current->parent);
598
599 /*
600 * Check PF_EXITING to ensure ->real_parent has not passed
601 * exit_ptrace(). Otherwise we don't report the error but
602 * pretend ->real_parent untraces us right after return.
603 */
604 if (!ret && !(current->real_parent->flags & PF_EXITING)) {
605 current->ptrace |= PT_PTRACED;
606 __ptrace_link(current, current->real_parent);
607 }
608
609 write_unlock_irqrestore(&tasklist_lock, flags);
610 }
611 task_unlock(current);
612 return ret;
613}
614
615/**
616 * ptrace_get_task_struct -- grab a task struct reference for ptrace
617 * @pid: process id to grab a task_struct reference of
618 *
619 * This function is a helper for ptrace implementations. It checks
620 * permissions and then grabs a task struct for use of the actual
621 * ptrace implementation.
622 *
623 * Returns the task_struct for @pid or an ERR_PTR() on failure.
624 */
625struct task_struct *ptrace_get_task_struct(pid_t pid)
626{ 581{
627 struct task_struct *child; 582 struct task_struct *child;
628 583
629 read_lock(&tasklist_lock); 584 rcu_read_lock();
630 child = find_task_by_vpid(pid); 585 child = find_task_by_vpid(pid);
631 if (child) 586 if (child)
632 get_task_struct(child); 587 get_task_struct(child);
588 rcu_read_unlock();
633 589
634 read_unlock(&tasklist_lock);
635 if (!child) 590 if (!child)
636 return ERR_PTR(-ESRCH); 591 return ERR_PTR(-ESRCH);
637 return child; 592 return child;
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bf8e7534c803..e1338f074314 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -18,7 +18,7 @@
18void res_counter_init(struct res_counter *counter, struct res_counter *parent) 18void res_counter_init(struct res_counter *counter, struct res_counter *parent)
19{ 19{
20 spin_lock_init(&counter->lock); 20 spin_lock_init(&counter->lock);
21 counter->limit = (unsigned long long)LLONG_MAX; 21 counter->limit = RESOURCE_MAX;
22 counter->parent = parent; 22 counter->parent = parent;
23} 23}
24 24
@@ -133,6 +133,16 @@ int res_counter_memparse_write_strategy(const char *buf,
133 unsigned long long *res) 133 unsigned long long *res)
134{ 134{
135 char *end; 135 char *end;
136
137 /* return RESOURCE_MAX(unlimited) if "-1" is specified */
138 if (*buf == '-') {
139 *res = simple_strtoull(buf + 1, &end, 10);
140 if (*res != 1 || *end != '\0')
141 return -EINVAL;
142 *res = RESOURCE_MAX;
143 return 0;
144 }
145
136 /* FIXME - make memparse() take const char* args */ 146 /* FIXME - make memparse() take const char* args */
137 *res = memparse((char *)buf, &end); 147 *res = memparse((char *)buf, &end);
138 if (*end != '\0') 148 if (*end != '\0')
diff --git a/kernel/sched.c b/kernel/sched.c
index 8fb88a906aaa..7c9098d186e6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1978,7 +1978,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1978 if (task_hot(p, old_rq->clock, NULL)) 1978 if (task_hot(p, old_rq->clock, NULL))
1979 schedstat_inc(p, se.nr_forced2_migrations); 1979 schedstat_inc(p, se.nr_forced2_migrations);
1980#endif 1980#endif
1981 perf_counter_task_migration(p, new_cpu); 1981 perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS,
1982 1, 1, NULL, 0);
1982 } 1983 }
1983 p->se.vruntime -= old_cfsrq->min_vruntime - 1984 p->se.vruntime -= old_cfsrq->min_vruntime -
1984 new_cfsrq->min_vruntime; 1985 new_cfsrq->min_vruntime;
@@ -7045,7 +7046,7 @@ static int migration_thread(void *data)
7045 7046
7046 if (cpu_is_offline(cpu)) { 7047 if (cpu_is_offline(cpu)) {
7047 spin_unlock_irq(&rq->lock); 7048 spin_unlock_irq(&rq->lock);
7048 goto wait_to_die; 7049 break;
7049 } 7050 }
7050 7051
7051 if (rq->active_balance) { 7052 if (rq->active_balance) {
@@ -7071,16 +7072,7 @@ static int migration_thread(void *data)
7071 complete(&req->done); 7072 complete(&req->done);
7072 } 7073 }
7073 __set_current_state(TASK_RUNNING); 7074 __set_current_state(TASK_RUNNING);
7074 return 0;
7075 7075
7076wait_to_die:
7077 /* Wait for kthread_stop */
7078 set_current_state(TASK_INTERRUPTIBLE);
7079 while (!kthread_should_stop()) {
7080 schedule();
7081 set_current_state(TASK_INTERRUPTIBLE);
7082 }
7083 __set_current_state(TASK_RUNNING);
7084 return 0; 7076 return 0;
7085} 7077}
7086 7078
@@ -7494,6 +7486,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7494 rq = task_rq_lock(p, &flags); 7486 rq = task_rq_lock(p, &flags);
7495 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); 7487 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
7496 task_rq_unlock(rq, &flags); 7488 task_rq_unlock(rq, &flags);
7489 get_task_struct(p);
7497 cpu_rq(cpu)->migration_thread = p; 7490 cpu_rq(cpu)->migration_thread = p;
7498 break; 7491 break;
7499 7492
@@ -7524,6 +7517,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7524 kthread_bind(cpu_rq(cpu)->migration_thread, 7517 kthread_bind(cpu_rq(cpu)->migration_thread,
7525 cpumask_any(cpu_online_mask)); 7518 cpumask_any(cpu_online_mask));
7526 kthread_stop(cpu_rq(cpu)->migration_thread); 7519 kthread_stop(cpu_rq(cpu)->migration_thread);
7520 put_task_struct(cpu_rq(cpu)->migration_thread);
7527 cpu_rq(cpu)->migration_thread = NULL; 7521 cpu_rq(cpu)->migration_thread = NULL;
7528 break; 7522 break;
7529 7523
@@ -7533,6 +7527,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7533 migrate_live_tasks(cpu); 7527 migrate_live_tasks(cpu);
7534 rq = cpu_rq(cpu); 7528 rq = cpu_rq(cpu);
7535 kthread_stop(rq->migration_thread); 7529 kthread_stop(rq->migration_thread);
7530 put_task_struct(rq->migration_thread);
7536 rq->migration_thread = NULL; 7531 rq->migration_thread = NULL;
7537 /* Idle task back to normal (off runqueue, low prio) */ 7532 /* Idle task back to normal (off runqueue, low prio) */
7538 spin_lock_irq(&rq->lock); 7533 spin_lock_irq(&rq->lock);
@@ -7828,7 +7823,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
7828 free_rootdomain(old_rd); 7823 free_rootdomain(old_rd);
7829} 7824}
7830 7825
7831static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) 7826static int init_rootdomain(struct root_domain *rd, bool bootmem)
7832{ 7827{
7833 gfp_t gfp = GFP_KERNEL; 7828 gfp_t gfp = GFP_KERNEL;
7834 7829
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 7deffc9f0e5f..e6c251790dde 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -152,7 +152,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
152 * 152 *
153 * Returns: -ENOMEM if memory fails. 153 * Returns: -ENOMEM if memory fails.
154 */ 154 */
155int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) 155int cpupri_init(struct cpupri *cp, bool bootmem)
156{ 156{
157 gfp_t gfp = GFP_KERNEL; 157 gfp_t gfp = GFP_KERNEL;
158 int i; 158 int i;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 467ca72f1657..70c7e0b79946 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -162,7 +162,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
162{ 162{
163 s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, 163 s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
164 spread, rq0_min_vruntime, spread0; 164 spread, rq0_min_vruntime, spread0;
165 struct rq *rq = &per_cpu(runqueues, cpu); 165 struct rq *rq = cpu_rq(cpu);
166 struct sched_entity *last; 166 struct sched_entity *last;
167 unsigned long flags; 167 unsigned long flags;
168 168
@@ -191,7 +191,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
191 if (last) 191 if (last)
192 max_vruntime = last->vruntime; 192 max_vruntime = last->vruntime;
193 min_vruntime = cfs_rq->min_vruntime; 193 min_vruntime = cfs_rq->min_vruntime;
194 rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime; 194 rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
195 spin_unlock_irqrestore(&rq->lock, flags); 195 spin_unlock_irqrestore(&rq->lock, flags);
196 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", 196 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
197 SPLIT_NS(MIN_vruntime)); 197 SPLIT_NS(MIN_vruntime));
@@ -248,7 +248,7 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
248 248
249static void print_cpu(struct seq_file *m, int cpu) 249static void print_cpu(struct seq_file *m, int cpu)
250{ 250{
251 struct rq *rq = &per_cpu(runqueues, cpu); 251 struct rq *rq = cpu_rq(cpu);
252 252
253#ifdef CONFIG_X86 253#ifdef CONFIG_X86
254 { 254 {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5f9650e8fe75..ba7fd6e9556f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -430,12 +430,13 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
430 430
431 for_each_sched_entity(se) { 431 for_each_sched_entity(se) {
432 struct load_weight *load; 432 struct load_weight *load;
433 struct load_weight lw;
433 434
434 cfs_rq = cfs_rq_of(se); 435 cfs_rq = cfs_rq_of(se);
435 load = &cfs_rq->load; 436 load = &cfs_rq->load;
436 437
437 if (unlikely(!se->on_rq)) { 438 if (unlikely(!se->on_rq)) {
438 struct load_weight lw = cfs_rq->load; 439 lw = cfs_rq->load;
439 440
440 update_load_add(&lw, se->load.weight); 441 update_load_add(&lw, se->load.weight);
441 load = &lw; 442 load = &lw;
diff --git a/kernel/signal.c b/kernel/signal.c
index d81f4952eebb..ccf1ceedaebe 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1410,7 +1410,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1410 /* do_notify_parent_cldstop should have been called instead. */ 1410 /* do_notify_parent_cldstop should have been called instead. */
1411 BUG_ON(task_is_stopped_or_traced(tsk)); 1411 BUG_ON(task_is_stopped_or_traced(tsk));
1412 1412
1413 BUG_ON(!tsk->ptrace && 1413 BUG_ON(!task_ptrace(tsk) &&
1414 (tsk->group_leader != tsk || !thread_group_empty(tsk))); 1414 (tsk->group_leader != tsk || !thread_group_empty(tsk)));
1415 1415
1416 info.si_signo = sig; 1416 info.si_signo = sig;
@@ -1449,7 +1449,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1449 1449
1450 psig = tsk->parent->sighand; 1450 psig = tsk->parent->sighand;
1451 spin_lock_irqsave(&psig->siglock, flags); 1451 spin_lock_irqsave(&psig->siglock, flags);
1452 if (!tsk->ptrace && sig == SIGCHLD && 1452 if (!task_ptrace(tsk) && sig == SIGCHLD &&
1453 (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || 1453 (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
1454 (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) { 1454 (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
1455 /* 1455 /*
@@ -1486,7 +1486,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1486 struct task_struct *parent; 1486 struct task_struct *parent;
1487 struct sighand_struct *sighand; 1487 struct sighand_struct *sighand;
1488 1488
1489 if (tsk->ptrace & PT_PTRACED) 1489 if (task_ptrace(tsk))
1490 parent = tsk->parent; 1490 parent = tsk->parent;
1491 else { 1491 else {
1492 tsk = tsk->group_leader; 1492 tsk = tsk->group_leader;
@@ -1499,7 +1499,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1499 * see comment in do_notify_parent() abot the following 3 lines 1499 * see comment in do_notify_parent() abot the following 3 lines
1500 */ 1500 */
1501 rcu_read_lock(); 1501 rcu_read_lock();
1502 info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); 1502 info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
1503 info.si_uid = __task_cred(tsk)->uid; 1503 info.si_uid = __task_cred(tsk)->uid;
1504 rcu_read_unlock(); 1504 rcu_read_unlock();
1505 1505
@@ -1535,7 +1535,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1535 1535
1536static inline int may_ptrace_stop(void) 1536static inline int may_ptrace_stop(void)
1537{ 1537{
1538 if (!likely(current->ptrace & PT_PTRACED)) 1538 if (!likely(task_ptrace(current)))
1539 return 0; 1539 return 0;
1540 /* 1540 /*
1541 * Are we in the middle of do_coredump? 1541 * Are we in the middle of do_coredump?
@@ -1753,7 +1753,7 @@ static int do_signal_stop(int signr)
1753static int ptrace_signal(int signr, siginfo_t *info, 1753static int ptrace_signal(int signr, siginfo_t *info,
1754 struct pt_regs *regs, void *cookie) 1754 struct pt_regs *regs, void *cookie)
1755{ 1755{
1756 if (!(current->ptrace & PT_PTRACED)) 1756 if (!task_ptrace(current))
1757 return signr; 1757 return signr;
1758 1758
1759 ptrace_signal_deliver(regs, cookie); 1759 ptrace_signal_deliver(regs, cookie);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b41fb710e114..3a94905fa5d2 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -213,6 +213,7 @@ restart:
213 do { 213 do {
214 if (pending & 1) { 214 if (pending & 1) {
215 int prev_count = preempt_count(); 215 int prev_count = preempt_count();
216 kstat_incr_softirqs_this_cpu(h - softirq_vec);
216 217
217 trace_softirq_entry(h, softirq_vec); 218 trace_softirq_entry(h, softirq_vec);
218 h->action(h); 219 h->action(h);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ab462b9968d5..62e4ff9968b5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2283,7 +2283,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2283 void *data) 2283 void *data)
2284{ 2284{
2285#define TMPBUFLEN 21 2285#define TMPBUFLEN 21
2286 int *i, vleft, first=1, neg, val; 2286 int *i, vleft, first = 1, neg;
2287 unsigned long lval; 2287 unsigned long lval;
2288 size_t left, len; 2288 size_t left, len;
2289 2289
@@ -2336,8 +2336,6 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2336 len = p-buf; 2336 len = p-buf;
2337 if ((len < left) && *p && !isspace(*p)) 2337 if ((len < left) && *p && !isspace(*p))
2338 break; 2338 break;
2339 if (neg)
2340 val = -val;
2341 s += len; 2339 s += len;
2342 left -= len; 2340 left -= len;
2343 2341
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2aff39c6f10c..e0f59a21c061 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -222,6 +222,15 @@ void tick_nohz_stop_sched_tick(int inidle)
222 222
223 cpu = smp_processor_id(); 223 cpu = smp_processor_id();
224 ts = &per_cpu(tick_cpu_sched, cpu); 224 ts = &per_cpu(tick_cpu_sched, cpu);
225
226 /*
227 * Call to tick_nohz_start_idle stops the last_update_time from being
228 * updated. Thus, it must not be called in the event we are called from
229 * irq_exit() with the prior state different than idle.
230 */
231 if (!inidle && !ts->inidle)
232 goto end;
233
225 now = tick_nohz_start_idle(ts); 234 now = tick_nohz_start_idle(ts);
226 235
227 /* 236 /*
@@ -239,9 +248,6 @@ void tick_nohz_stop_sched_tick(int inidle)
239 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) 248 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
240 goto end; 249 goto end;
241 250
242 if (!inidle && !ts->inidle)
243 goto end;
244
245 ts->inidle = 1; 251 ts->inidle = 1;
246 252
247 if (need_resched()) 253 if (need_resched())
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61071fecc82e..1551f47e7669 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER
18config HAVE_FUNCTION_GRAPH_TRACER 18config HAVE_FUNCTION_GRAPH_TRACER
19 bool 19 bool
20 20
21config HAVE_FUNCTION_GRAPH_FP_TEST
22 bool
23 help
24 An arch may pass in a unique value (frame pointer) to both the
25 entering and exiting of a function. On exit, the value is compared
26 and if it does not match, then it will panic the kernel.
27
21config HAVE_FUNCTION_TRACE_MCOUNT_TEST 28config HAVE_FUNCTION_TRACE_MCOUNT_TEST
22 bool 29 bool
23 help 30 help
@@ -121,6 +128,7 @@ config FUNCTION_GRAPH_TRACER
121 bool "Kernel Function Graph Tracer" 128 bool "Kernel Function Graph Tracer"
122 depends on HAVE_FUNCTION_GRAPH_TRACER 129 depends on HAVE_FUNCTION_GRAPH_TRACER
123 depends on FUNCTION_TRACER 130 depends on FUNCTION_TRACER
131 depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE
124 default y 132 default y
125 help 133 help
126 Enable the kernel to trace a function at both its return 134 Enable the kernel to trace a function at both its return
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bb60732ade0c..3718d55fb4c3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1224,6 +1224,13 @@ static void ftrace_shutdown(int command)
1224 return; 1224 return;
1225 1225
1226 ftrace_start_up--; 1226 ftrace_start_up--;
1227 /*
1228 * Just warn in case of unbalance, no need to kill ftrace, it's not
1229 * critical but the ftrace_call callers may be never nopped again after
1230 * further ftrace uses.
1231 */
1232 WARN_ON_ONCE(ftrace_start_up < 0);
1233
1227 if (!ftrace_start_up) 1234 if (!ftrace_start_up)
1228 command |= FTRACE_DISABLE_CALLS; 1235 command |= FTRACE_DISABLE_CALLS;
1229 1236
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 86cdf671d7e2..1edaa9516e81 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -186,7 +186,7 @@ static int kmem_trace_init(struct trace_array *tr)
186 int cpu; 186 int cpu;
187 kmemtrace_array = tr; 187 kmemtrace_array = tr;
188 188
189 for_each_cpu_mask(cpu, cpu_possible_map) 189 for_each_cpu(cpu, cpu_possible_mask)
190 tracing_reset(tr, cpu); 190 tracing_reset(tr, cpu);
191 191
192 kmemtrace_start_probes(); 192 kmemtrace_start_probes();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dc4dc70171ce..04dac2638258 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -206,6 +206,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
207#define RB_ALIGNMENT 4U 207#define RB_ALIGNMENT 4U
208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
209 210
210/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 211/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
211#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 212#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -415,6 +416,8 @@ struct ring_buffer_per_cpu {
415 unsigned long overrun; 416 unsigned long overrun;
416 unsigned long read; 417 unsigned long read;
417 local_t entries; 418 local_t entries;
419 local_t committing;
420 local_t commits;
418 u64 write_stamp; 421 u64 write_stamp;
419 u64 read_stamp; 422 u64 read_stamp;
420 atomic_t record_disabled; 423 atomic_t record_disabled;
@@ -618,12 +621,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
618 kfree(cpu_buffer); 621 kfree(cpu_buffer);
619} 622}
620 623
621/*
622 * Causes compile errors if the struct buffer_page gets bigger
623 * than the struct page.
624 */
625extern int ring_buffer_page_too_big(void);
626
627#ifdef CONFIG_HOTPLUG_CPU 624#ifdef CONFIG_HOTPLUG_CPU
628static int rb_cpu_notify(struct notifier_block *self, 625static int rb_cpu_notify(struct notifier_block *self,
629 unsigned long action, void *hcpu); 626 unsigned long action, void *hcpu);
@@ -646,11 +643,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
646 int bsize; 643 int bsize;
647 int cpu; 644 int cpu;
648 645
649 /* Paranoid! Optimizes out when all is well */
650 if (sizeof(struct buffer_page) > sizeof(struct page))
651 ring_buffer_page_too_big();
652
653
654 /* keep it in its own cache line */ 646 /* keep it in its own cache line */
655 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 647 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
656 GFP_KERNEL); 648 GFP_KERNEL);
@@ -666,8 +658,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
666 buffer->reader_lock_key = key; 658 buffer->reader_lock_key = key;
667 659
668 /* need at least two pages */ 660 /* need at least two pages */
669 if (buffer->pages == 1) 661 if (buffer->pages < 2)
670 buffer->pages++; 662 buffer->pages = 2;
671 663
672 /* 664 /*
673 * In case of non-hotplug cpu, if the ring-buffer is allocated 665 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1011,12 +1003,12 @@ rb_event_index(struct ring_buffer_event *event)
1011{ 1003{
1012 unsigned long addr = (unsigned long)event; 1004 unsigned long addr = (unsigned long)event;
1013 1005
1014 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 1006 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1015} 1007}
1016 1008
1017static inline int 1009static inline int
1018rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 1010rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1019 struct ring_buffer_event *event) 1011 struct ring_buffer_event *event)
1020{ 1012{
1021 unsigned long addr = (unsigned long)event; 1013 unsigned long addr = (unsigned long)event;
1022 unsigned long index; 1014 unsigned long index;
@@ -1029,31 +1021,6 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1029} 1021}
1030 1022
1031static void 1023static void
1032rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
1033 struct ring_buffer_event *event)
1034{
1035 unsigned long addr = (unsigned long)event;
1036 unsigned long index;
1037
1038 index = rb_event_index(event);
1039 addr &= PAGE_MASK;
1040
1041 while (cpu_buffer->commit_page->page != (void *)addr) {
1042 if (RB_WARN_ON(cpu_buffer,
1043 cpu_buffer->commit_page == cpu_buffer->tail_page))
1044 return;
1045 cpu_buffer->commit_page->page->commit =
1046 cpu_buffer->commit_page->write;
1047 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1048 cpu_buffer->write_stamp =
1049 cpu_buffer->commit_page->page->time_stamp;
1050 }
1051
1052 /* Now set the commit to the event's index */
1053 local_set(&cpu_buffer->commit_page->page->commit, index);
1054}
1055
1056static void
1057rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 1024rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1058{ 1025{
1059 /* 1026 /*
@@ -1171,6 +1138,60 @@ static unsigned rb_calculate_event_length(unsigned length)
1171 return length; 1138 return length;
1172} 1139}
1173 1140
1141static inline void
1142rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1143 struct buffer_page *tail_page,
1144 unsigned long tail, unsigned long length)
1145{
1146 struct ring_buffer_event *event;
1147
1148 /*
1149 * Only the event that crossed the page boundary
1150 * must fill the old tail_page with padding.
1151 */
1152 if (tail >= BUF_PAGE_SIZE) {
1153 local_sub(length, &tail_page->write);
1154 return;
1155 }
1156
1157 event = __rb_page_index(tail_page, tail);
1158 kmemcheck_annotate_bitfield(event, bitfield);
1159
1160 /*
1161 * If this event is bigger than the minimum size, then
1162 * we need to be careful that we don't subtract the
1163 * write counter enough to allow another writer to slip
1164 * in on this page.
1165 * We put in a discarded commit instead, to make sure
1166 * that this space is not used again.
1167 *
1168 * If we are less than the minimum size, we don't need to
1169 * worry about it.
1170 */
1171 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
1172 /* No room for any events */
1173
1174 /* Mark the rest of the page with padding */
1175 rb_event_set_padding(event);
1176
1177 /* Set the write back to the previous setting */
1178 local_sub(length, &tail_page->write);
1179 return;
1180 }
1181
1182 /* Put in a discarded event */
1183 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
1184 event->type_len = RINGBUF_TYPE_PADDING;
1185 /* time delta must be non zero */
1186 event->time_delta = 1;
1187 /* Account for this as an entry */
1188 local_inc(&tail_page->entries);
1189 local_inc(&cpu_buffer->entries);
1190
1191 /* Set write to end of buffer */
1192 length = (tail + length) - BUF_PAGE_SIZE;
1193 local_sub(length, &tail_page->write);
1194}
1174 1195
1175static struct ring_buffer_event * 1196static struct ring_buffer_event *
1176rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1197rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
@@ -1180,7 +1201,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1180{ 1201{
1181 struct buffer_page *next_page, *head_page, *reader_page; 1202 struct buffer_page *next_page, *head_page, *reader_page;
1182 struct ring_buffer *buffer = cpu_buffer->buffer; 1203 struct ring_buffer *buffer = cpu_buffer->buffer;
1183 struct ring_buffer_event *event;
1184 bool lock_taken = false; 1204 bool lock_taken = false;
1185 unsigned long flags; 1205 unsigned long flags;
1186 1206
@@ -1265,27 +1285,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1265 cpu_buffer->tail_page->page->time_stamp = *ts; 1285 cpu_buffer->tail_page->page->time_stamp = *ts;
1266 } 1286 }
1267 1287
1268 /* 1288 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1269 * The actual tail page has moved forward.
1270 */
1271 if (tail < BUF_PAGE_SIZE) {
1272 /* Mark the rest of the page with padding */
1273 event = __rb_page_index(tail_page, tail);
1274 kmemcheck_annotate_bitfield(event, bitfield);
1275 rb_event_set_padding(event);
1276 }
1277
1278 /* Set the write back to the previous setting */
1279 local_sub(length, &tail_page->write);
1280
1281 /*
1282 * If this was a commit entry that failed,
1283 * increment that too
1284 */
1285 if (tail_page == cpu_buffer->commit_page &&
1286 tail == rb_commit_index(cpu_buffer)) {
1287 rb_set_commit_to_write(cpu_buffer);
1288 }
1289 1289
1290 __raw_spin_unlock(&cpu_buffer->lock); 1290 __raw_spin_unlock(&cpu_buffer->lock);
1291 local_irq_restore(flags); 1291 local_irq_restore(flags);
@@ -1295,7 +1295,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1295 1295
1296 out_reset: 1296 out_reset:
1297 /* reset write */ 1297 /* reset write */
1298 local_sub(length, &tail_page->write); 1298 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1299 1299
1300 if (likely(lock_taken)) 1300 if (likely(lock_taken))
1301 __raw_spin_unlock(&cpu_buffer->lock); 1301 __raw_spin_unlock(&cpu_buffer->lock);
@@ -1325,9 +1325,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1325 1325
1326 /* We reserved something on the buffer */ 1326 /* We reserved something on the buffer */
1327 1327
1328 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1329 return NULL;
1330
1331 event = __rb_page_index(tail_page, tail); 1328 event = __rb_page_index(tail_page, tail);
1332 kmemcheck_annotate_bitfield(event, bitfield); 1329 kmemcheck_annotate_bitfield(event, bitfield);
1333 rb_update_event(event, type, length); 1330 rb_update_event(event, type, length);
@@ -1337,11 +1334,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1337 local_inc(&tail_page->entries); 1334 local_inc(&tail_page->entries);
1338 1335
1339 /* 1336 /*
1340 * If this is a commit and the tail is zero, then update 1337 * If this is the first commit on the page, then update
1341 * this page's time stamp. 1338 * its timestamp.
1342 */ 1339 */
1343 if (!tail && rb_is_commit(cpu_buffer, event)) 1340 if (!tail)
1344 cpu_buffer->commit_page->page->time_stamp = *ts; 1341 tail_page->page->time_stamp = *ts;
1345 1342
1346 return event; 1343 return event;
1347} 1344}
@@ -1410,16 +1407,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1410 return -EAGAIN; 1407 return -EAGAIN;
1411 1408
1412 /* Only a commited time event can update the write stamp */ 1409 /* Only a commited time event can update the write stamp */
1413 if (rb_is_commit(cpu_buffer, event)) { 1410 if (rb_event_is_commit(cpu_buffer, event)) {
1414 /* 1411 /*
1415 * If this is the first on the page, then we need to 1412 * If this is the first on the page, then it was
1416 * update the page itself, and just put in a zero. 1413 * updated with the page itself. Try to discard it
1414 * and if we can't just make it zero.
1417 */ 1415 */
1418 if (rb_event_index(event)) { 1416 if (rb_event_index(event)) {
1419 event->time_delta = *delta & TS_MASK; 1417 event->time_delta = *delta & TS_MASK;
1420 event->array[0] = *delta >> TS_SHIFT; 1418 event->array[0] = *delta >> TS_SHIFT;
1421 } else { 1419 } else {
1422 cpu_buffer->commit_page->page->time_stamp = *ts;
1423 /* try to discard, since we do not need this */ 1420 /* try to discard, since we do not need this */
1424 if (!rb_try_to_discard(cpu_buffer, event)) { 1421 if (!rb_try_to_discard(cpu_buffer, event)) {
1425 /* nope, just zero it */ 1422 /* nope, just zero it */
@@ -1445,6 +1442,44 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1445 return ret; 1442 return ret;
1446} 1443}
1447 1444
1445static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
1446{
1447 local_inc(&cpu_buffer->committing);
1448 local_inc(&cpu_buffer->commits);
1449}
1450
1451static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
1452{
1453 unsigned long commits;
1454
1455 if (RB_WARN_ON(cpu_buffer,
1456 !local_read(&cpu_buffer->committing)))
1457 return;
1458
1459 again:
1460 commits = local_read(&cpu_buffer->commits);
1461 /* synchronize with interrupts */
1462 barrier();
1463 if (local_read(&cpu_buffer->committing) == 1)
1464 rb_set_commit_to_write(cpu_buffer);
1465
1466 local_dec(&cpu_buffer->committing);
1467
1468 /* synchronize with interrupts */
1469 barrier();
1470
1471 /*
1472 * Need to account for interrupts coming in between the
1473 * updating of the commit page and the clearing of the
1474 * committing counter.
1475 */
1476 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
1477 !local_read(&cpu_buffer->committing)) {
1478 local_inc(&cpu_buffer->committing);
1479 goto again;
1480 }
1481}
1482
1448static struct ring_buffer_event * 1483static struct ring_buffer_event *
1449rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 1484rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1450 unsigned long length) 1485 unsigned long length)
@@ -1454,6 +1489,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1454 int commit = 0; 1489 int commit = 0;
1455 int nr_loops = 0; 1490 int nr_loops = 0;
1456 1491
1492 rb_start_commit(cpu_buffer);
1493
1457 length = rb_calculate_event_length(length); 1494 length = rb_calculate_event_length(length);
1458 again: 1495 again:
1459 /* 1496 /*
@@ -1466,7 +1503,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1466 * Bail! 1503 * Bail!
1467 */ 1504 */
1468 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 1505 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1469 return NULL; 1506 goto out_fail;
1470 1507
1471 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); 1508 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
1472 1509
@@ -1497,7 +1534,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1497 1534
1498 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 1535 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1499 if (commit == -EBUSY) 1536 if (commit == -EBUSY)
1500 return NULL; 1537 goto out_fail;
1501 1538
1502 if (commit == -EAGAIN) 1539 if (commit == -EAGAIN)
1503 goto again; 1540 goto again;
@@ -1511,28 +1548,19 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1511 if (unlikely(PTR_ERR(event) == -EAGAIN)) 1548 if (unlikely(PTR_ERR(event) == -EAGAIN))
1512 goto again; 1549 goto again;
1513 1550
1514 if (!event) { 1551 if (!event)
1515 if (unlikely(commit)) 1552 goto out_fail;
1516 /*
1517 * Ouch! We needed a timestamp and it was commited. But
1518 * we didn't get our event reserved.
1519 */
1520 rb_set_commit_to_write(cpu_buffer);
1521 return NULL;
1522 }
1523 1553
1524 /* 1554 if (!rb_event_is_commit(cpu_buffer, event))
1525 * If the timestamp was commited, make the commit our entry
1526 * now so that we will update it when needed.
1527 */
1528 if (unlikely(commit))
1529 rb_set_commit_event(cpu_buffer, event);
1530 else if (!rb_is_commit(cpu_buffer, event))
1531 delta = 0; 1555 delta = 0;
1532 1556
1533 event->time_delta = delta; 1557 event->time_delta = delta;
1534 1558
1535 return event; 1559 return event;
1560
1561 out_fail:
1562 rb_end_commit(cpu_buffer);
1563 return NULL;
1536} 1564}
1537 1565
1538#define TRACE_RECURSIVE_DEPTH 16 1566#define TRACE_RECURSIVE_DEPTH 16
@@ -1642,13 +1670,14 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1642{ 1670{
1643 local_inc(&cpu_buffer->entries); 1671 local_inc(&cpu_buffer->entries);
1644 1672
1645 /* Only process further if we own the commit */ 1673 /*
1646 if (!rb_is_commit(cpu_buffer, event)) 1674 * The event first in the commit queue updates the
1647 return; 1675 * time stamp.
1648 1676 */
1649 cpu_buffer->write_stamp += event->time_delta; 1677 if (rb_event_is_commit(cpu_buffer, event))
1678 cpu_buffer->write_stamp += event->time_delta;
1650 1679
1651 rb_set_commit_to_write(cpu_buffer); 1680 rb_end_commit(cpu_buffer);
1652} 1681}
1653 1682
1654/** 1683/**
@@ -1737,15 +1766,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1737 /* The event is discarded regardless */ 1766 /* The event is discarded regardless */
1738 rb_event_discard(event); 1767 rb_event_discard(event);
1739 1768
1769 cpu = smp_processor_id();
1770 cpu_buffer = buffer->buffers[cpu];
1771
1740 /* 1772 /*
1741 * This must only be called if the event has not been 1773 * This must only be called if the event has not been
1742 * committed yet. Thus we can assume that preemption 1774 * committed yet. Thus we can assume that preemption
1743 * is still disabled. 1775 * is still disabled.
1744 */ 1776 */
1745 RB_WARN_ON(buffer, preemptible()); 1777 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
1746
1747 cpu = smp_processor_id();
1748 cpu_buffer = buffer->buffers[cpu];
1749 1778
1750 if (!rb_try_to_discard(cpu_buffer, event)) 1779 if (!rb_try_to_discard(cpu_buffer, event))
1751 goto out; 1780 goto out;
@@ -1756,13 +1785,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1756 */ 1785 */
1757 local_inc(&cpu_buffer->entries); 1786 local_inc(&cpu_buffer->entries);
1758 out: 1787 out:
1759 /* 1788 rb_end_commit(cpu_buffer);
1760 * If a write came in and pushed the tail page
1761 * we still need to update the commit pointer
1762 * if we were the commit.
1763 */
1764 if (rb_is_commit(cpu_buffer, event))
1765 rb_set_commit_to_write(cpu_buffer);
1766 1789
1767 trace_recursive_unlock(); 1790 trace_recursive_unlock();
1768 1791
@@ -2446,6 +2469,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2446} 2469}
2447EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); 2470EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
2448 2471
2472static inline int rb_ok_to_lock(void)
2473{
2474 /*
2475 * If an NMI die dumps out the content of the ring buffer
2476 * do not grab locks. We also permanently disable the ring
2477 * buffer too. A one time deal is all you get from reading
2478 * the ring buffer from an NMI.
2479 */
2480 if (likely(!in_nmi() && !oops_in_progress))
2481 return 1;
2482
2483 tracing_off_permanent();
2484 return 0;
2485}
2486
2449/** 2487/**
2450 * ring_buffer_peek - peek at the next event to be read 2488 * ring_buffer_peek - peek at the next event to be read
2451 * @buffer: The ring buffer to read 2489 * @buffer: The ring buffer to read
@@ -2461,14 +2499,20 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2461 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2499 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2462 struct ring_buffer_event *event; 2500 struct ring_buffer_event *event;
2463 unsigned long flags; 2501 unsigned long flags;
2502 int dolock;
2464 2503
2465 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2504 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2466 return NULL; 2505 return NULL;
2467 2506
2507 dolock = rb_ok_to_lock();
2468 again: 2508 again:
2469 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2509 local_irq_save(flags);
2510 if (dolock)
2511 spin_lock(&cpu_buffer->reader_lock);
2470 event = rb_buffer_peek(buffer, cpu, ts); 2512 event = rb_buffer_peek(buffer, cpu, ts);
2471 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2513 if (dolock)
2514 spin_unlock(&cpu_buffer->reader_lock);
2515 local_irq_restore(flags);
2472 2516
2473 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 2517 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2474 cpu_relax(); 2518 cpu_relax();
@@ -2520,6 +2564,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2520 struct ring_buffer_per_cpu *cpu_buffer; 2564 struct ring_buffer_per_cpu *cpu_buffer;
2521 struct ring_buffer_event *event = NULL; 2565 struct ring_buffer_event *event = NULL;
2522 unsigned long flags; 2566 unsigned long flags;
2567 int dolock;
2568
2569 dolock = rb_ok_to_lock();
2523 2570
2524 again: 2571 again:
2525 /* might be called in atomic */ 2572 /* might be called in atomic */
@@ -2529,7 +2576,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2529 goto out; 2576 goto out;
2530 2577
2531 cpu_buffer = buffer->buffers[cpu]; 2578 cpu_buffer = buffer->buffers[cpu];
2532 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2579 local_irq_save(flags);
2580 if (dolock)
2581 spin_lock(&cpu_buffer->reader_lock);
2533 2582
2534 event = rb_buffer_peek(buffer, cpu, ts); 2583 event = rb_buffer_peek(buffer, cpu, ts);
2535 if (!event) 2584 if (!event)
@@ -2538,7 +2587,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2538 rb_advance_reader(cpu_buffer); 2587 rb_advance_reader(cpu_buffer);
2539 2588
2540 out_unlock: 2589 out_unlock:
2541 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2590 if (dolock)
2591 spin_unlock(&cpu_buffer->reader_lock);
2592 local_irq_restore(flags);
2542 2593
2543 out: 2594 out:
2544 preempt_enable(); 2595 preempt_enable();
@@ -2680,6 +2731,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2680 cpu_buffer->overrun = 0; 2731 cpu_buffer->overrun = 0;
2681 cpu_buffer->read = 0; 2732 cpu_buffer->read = 0;
2682 local_set(&cpu_buffer->entries, 0); 2733 local_set(&cpu_buffer->entries, 0);
2734 local_set(&cpu_buffer->committing, 0);
2735 local_set(&cpu_buffer->commits, 0);
2683 2736
2684 cpu_buffer->write_stamp = 0; 2737 cpu_buffer->write_stamp = 0;
2685 cpu_buffer->read_stamp = 0; 2738 cpu_buffer->read_stamp = 0;
@@ -2734,12 +2787,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset);
2734int ring_buffer_empty(struct ring_buffer *buffer) 2787int ring_buffer_empty(struct ring_buffer *buffer)
2735{ 2788{
2736 struct ring_buffer_per_cpu *cpu_buffer; 2789 struct ring_buffer_per_cpu *cpu_buffer;
2790 unsigned long flags;
2791 int dolock;
2737 int cpu; 2792 int cpu;
2793 int ret;
2794
2795 dolock = rb_ok_to_lock();
2738 2796
2739 /* yes this is racy, but if you don't like the race, lock the buffer */ 2797 /* yes this is racy, but if you don't like the race, lock the buffer */
2740 for_each_buffer_cpu(buffer, cpu) { 2798 for_each_buffer_cpu(buffer, cpu) {
2741 cpu_buffer = buffer->buffers[cpu]; 2799 cpu_buffer = buffer->buffers[cpu];
2742 if (!rb_per_cpu_empty(cpu_buffer)) 2800 local_irq_save(flags);
2801 if (dolock)
2802 spin_lock(&cpu_buffer->reader_lock);
2803 ret = rb_per_cpu_empty(cpu_buffer);
2804 if (dolock)
2805 spin_unlock(&cpu_buffer->reader_lock);
2806 local_irq_restore(flags);
2807
2808 if (!ret)
2743 return 0; 2809 return 0;
2744 } 2810 }
2745 2811
@@ -2755,14 +2821,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty);
2755int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) 2821int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2756{ 2822{
2757 struct ring_buffer_per_cpu *cpu_buffer; 2823 struct ring_buffer_per_cpu *cpu_buffer;
2824 unsigned long flags;
2825 int dolock;
2758 int ret; 2826 int ret;
2759 2827
2760 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2828 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2761 return 1; 2829 return 1;
2762 2830
2831 dolock = rb_ok_to_lock();
2832
2763 cpu_buffer = buffer->buffers[cpu]; 2833 cpu_buffer = buffer->buffers[cpu];
2834 local_irq_save(flags);
2835 if (dolock)
2836 spin_lock(&cpu_buffer->reader_lock);
2764 ret = rb_per_cpu_empty(cpu_buffer); 2837 ret = rb_per_cpu_empty(cpu_buffer);
2765 2838 if (dolock)
2839 spin_unlock(&cpu_buffer->reader_lock);
2840 local_irq_restore(flags);
2766 2841
2767 return ret; 2842 return ret;
2768} 2843}
@@ -3108,7 +3183,7 @@ static int rb_cpu_notify(struct notifier_block *self,
3108 switch (action) { 3183 switch (action) {
3109 case CPU_UP_PREPARE: 3184 case CPU_UP_PREPARE:
3110 case CPU_UP_PREPARE_FROZEN: 3185 case CPU_UP_PREPARE_FROZEN:
3111 if (cpu_isset(cpu, *buffer->cpumask)) 3186 if (cpumask_test_cpu(cpu, buffer->cpumask))
3112 return NOTIFY_OK; 3187 return NOTIFY_OK;
3113 3188
3114 buffer->buffers[cpu] = 3189 buffer->buffers[cpu] =
@@ -3119,7 +3194,7 @@ static int rb_cpu_notify(struct notifier_block *self,
3119 return NOTIFY_OK; 3194 return NOTIFY_OK;
3120 } 3195 }
3121 smp_wmb(); 3196 smp_wmb();
3122 cpu_set(cpu, *buffer->cpumask); 3197 cpumask_set_cpu(cpu, buffer->cpumask);
3123 break; 3198 break;
3124 case CPU_DOWN_PREPARE: 3199 case CPU_DOWN_PREPARE:
3125 case CPU_DOWN_PREPARE_FROZEN: 3200 case CPU_DOWN_PREPARE_FROZEN:
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 8d68e149a8b3..573d3cc762c3 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -102,8 +102,10 @@ static enum event_status read_page(int cpu)
102 event = (void *)&rpage->data[i]; 102 event = (void *)&rpage->data[i];
103 switch (event->type_len) { 103 switch (event->type_len) {
104 case RINGBUF_TYPE_PADDING: 104 case RINGBUF_TYPE_PADDING:
105 /* We don't expect any padding */ 105 /* failed writes may be discarded events */
106 KILL_TEST(); 106 if (!event->time_delta)
107 KILL_TEST();
108 inc = event->array[0] + 4;
107 break; 109 break;
108 case RINGBUF_TYPE_TIME_EXTEND: 110 case RINGBUF_TYPE_TIME_EXTEND:
109 inc = 8; 111 inc = 8;
@@ -119,7 +121,7 @@ static enum event_status read_page(int cpu)
119 KILL_TEST(); 121 KILL_TEST();
120 break; 122 break;
121 } 123 }
122 inc = event->array[0]; 124 inc = event->array[0] + 4;
123 break; 125 break;
124 default: 126 default:
125 entry = ring_buffer_event_data(event); 127 entry = ring_buffer_event_data(event);
@@ -201,7 +203,7 @@ static void ring_buffer_producer(void)
201 * Hammer the buffer for 10 secs (this may 203 * Hammer the buffer for 10 secs (this may
202 * make the system stall) 204 * make the system stall)
203 */ 205 */
204 pr_info("Starting ring buffer hammer\n"); 206 trace_printk("Starting ring buffer hammer\n");
205 do_gettimeofday(&start_tv); 207 do_gettimeofday(&start_tv);
206 do { 208 do {
207 struct ring_buffer_event *event; 209 struct ring_buffer_event *event;
@@ -237,7 +239,7 @@ static void ring_buffer_producer(void)
237#endif 239#endif
238 240
239 } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); 241 } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
240 pr_info("End ring buffer hammer\n"); 242 trace_printk("End ring buffer hammer\n");
241 243
242 if (consumer) { 244 if (consumer) {
243 /* Init both completions here to avoid races */ 245 /* Init both completions here to avoid races */
@@ -260,49 +262,50 @@ static void ring_buffer_producer(void)
260 overruns = ring_buffer_overruns(buffer); 262 overruns = ring_buffer_overruns(buffer);
261 263
262 if (kill_test) 264 if (kill_test)
263 pr_info("ERROR!\n"); 265 trace_printk("ERROR!\n");
264 pr_info("Time: %lld (usecs)\n", time); 266 trace_printk("Time: %lld (usecs)\n", time);
265 pr_info("Overruns: %lld\n", overruns); 267 trace_printk("Overruns: %lld\n", overruns);
266 if (disable_reader) 268 if (disable_reader)
267 pr_info("Read: (reader disabled)\n"); 269 trace_printk("Read: (reader disabled)\n");
268 else 270 else
269 pr_info("Read: %ld (by %s)\n", read, 271 trace_printk("Read: %ld (by %s)\n", read,
270 read_events ? "events" : "pages"); 272 read_events ? "events" : "pages");
271 pr_info("Entries: %lld\n", entries); 273 trace_printk("Entries: %lld\n", entries);
272 pr_info("Total: %lld\n", entries + overruns + read); 274 trace_printk("Total: %lld\n", entries + overruns + read);
273 pr_info("Missed: %ld\n", missed); 275 trace_printk("Missed: %ld\n", missed);
274 pr_info("Hit: %ld\n", hit); 276 trace_printk("Hit: %ld\n", hit);
275 277
276 /* Convert time from usecs to millisecs */ 278 /* Convert time from usecs to millisecs */
277 do_div(time, USEC_PER_MSEC); 279 do_div(time, USEC_PER_MSEC);
278 if (time) 280 if (time)
279 hit /= (long)time; 281 hit /= (long)time;
280 else 282 else
281 pr_info("TIME IS ZERO??\n"); 283 trace_printk("TIME IS ZERO??\n");
282 284
283 pr_info("Entries per millisec: %ld\n", hit); 285 trace_printk("Entries per millisec: %ld\n", hit);
284 286
285 if (hit) { 287 if (hit) {
286 /* Calculate the average time in nanosecs */ 288 /* Calculate the average time in nanosecs */
287 avg = NSEC_PER_MSEC / hit; 289 avg = NSEC_PER_MSEC / hit;
288 pr_info("%ld ns per entry\n", avg); 290 trace_printk("%ld ns per entry\n", avg);
289 } 291 }
290 292
291 if (missed) { 293 if (missed) {
292 if (time) 294 if (time)
293 missed /= (long)time; 295 missed /= (long)time;
294 296
295 pr_info("Total iterations per millisec: %ld\n", hit + missed); 297 trace_printk("Total iterations per millisec: %ld\n",
298 hit + missed);
296 299
297 /* it is possible that hit + missed will overflow and be zero */ 300 /* it is possible that hit + missed will overflow and be zero */
298 if (!(hit + missed)) { 301 if (!(hit + missed)) {
299 pr_info("hit + missed overflowed and totalled zero!\n"); 302 trace_printk("hit + missed overflowed and totalled zero!\n");
300 hit--; /* make it non zero */ 303 hit--; /* make it non zero */
301 } 304 }
302 305
303 /* Caculate the average time in nanosecs */ 306 /* Caculate the average time in nanosecs */
304 avg = NSEC_PER_MSEC / (hit + missed); 307 avg = NSEC_PER_MSEC / (hit + missed);
305 pr_info("%ld ns per entry\n", avg); 308 trace_printk("%ld ns per entry\n", avg);
306 } 309 }
307} 310}
308 311
@@ -353,7 +356,7 @@ static int ring_buffer_producer_thread(void *arg)
353 356
354 ring_buffer_producer(); 357 ring_buffer_producer();
355 358
356 pr_info("Sleeping for 10 secs\n"); 359 trace_printk("Sleeping for 10 secs\n");
357 set_current_state(TASK_INTERRUPTIBLE); 360 set_current_state(TASK_INTERRUPTIBLE);
358 schedule_timeout(HZ * SLEEP_TIME); 361 schedule_timeout(HZ * SLEEP_TIME);
359 __set_current_state(TASK_RUNNING); 362 __set_current_state(TASK_RUNNING);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c1878bfb2e1e..076fa6f0ee48 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2191,11 +2191,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2191 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 2191 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2192 return -ENOMEM; 2192 return -ENOMEM;
2193 2193
2194 mutex_lock(&tracing_cpumask_update_lock);
2195 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 2194 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2196 if (err) 2195 if (err)
2197 goto err_unlock; 2196 goto err_unlock;
2198 2197
2198 mutex_lock(&tracing_cpumask_update_lock);
2199
2199 local_irq_disable(); 2200 local_irq_disable();
2200 __raw_spin_lock(&ftrace_max_lock); 2201 __raw_spin_lock(&ftrace_max_lock);
2201 for_each_tracing_cpu(cpu) { 2202 for_each_tracing_cpu(cpu) {
@@ -2223,8 +2224,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2223 return count; 2224 return count;
2224 2225
2225err_unlock: 2226err_unlock:
2226 mutex_unlock(&tracing_cpumask_update_lock); 2227 free_cpumask_var(tracing_cpumask_new);
2227 free_cpumask_var(tracing_cpumask);
2228 2228
2229 return err; 2229 return err;
2230} 2230}
@@ -3626,7 +3626,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3626 struct trace_seq *s; 3626 struct trace_seq *s;
3627 unsigned long cnt; 3627 unsigned long cnt;
3628 3628
3629 s = kmalloc(sizeof(*s), GFP_ATOMIC); 3629 s = kmalloc(sizeof(*s), GFP_KERNEL);
3630 if (!s) 3630 if (!s)
3631 return ENOMEM; 3631 return ENOMEM;
3632 3632
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index db6e54bdb596..936c621bbf46 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -27,8 +27,6 @@
27#include "trace.h" 27#include "trace.h"
28#include "trace_output.h" 28#include "trace_output.h"
29 29
30static DEFINE_MUTEX(filter_mutex);
31
32enum filter_op_ids 30enum filter_op_ids
33{ 31{
34 OP_OR, 32 OP_OR,
@@ -178,7 +176,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
178static int filter_pred_strloc(struct filter_pred *pred, void *event, 176static int filter_pred_strloc(struct filter_pred *pred, void *event,
179 int val1, int val2) 177 int val1, int val2)
180{ 178{
181 int str_loc = *(int *)(event + pred->offset); 179 unsigned short str_loc = *(unsigned short *)(event + pred->offset);
182 char *addr = (char *)(event + str_loc); 180 char *addr = (char *)(event + str_loc);
183 int cmp, match; 181 int cmp, match;
184 182
@@ -294,12 +292,12 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
294{ 292{
295 struct event_filter *filter = call->filter; 293 struct event_filter *filter = call->filter;
296 294
297 mutex_lock(&filter_mutex); 295 mutex_lock(&event_mutex);
298 if (filter->filter_string) 296 if (filter->filter_string)
299 trace_seq_printf(s, "%s\n", filter->filter_string); 297 trace_seq_printf(s, "%s\n", filter->filter_string);
300 else 298 else
301 trace_seq_printf(s, "none\n"); 299 trace_seq_printf(s, "none\n");
302 mutex_unlock(&filter_mutex); 300 mutex_unlock(&event_mutex);
303} 301}
304 302
305void print_subsystem_event_filter(struct event_subsystem *system, 303void print_subsystem_event_filter(struct event_subsystem *system,
@@ -307,12 +305,12 @@ void print_subsystem_event_filter(struct event_subsystem *system,
307{ 305{
308 struct event_filter *filter = system->filter; 306 struct event_filter *filter = system->filter;
309 307
310 mutex_lock(&filter_mutex); 308 mutex_lock(&event_mutex);
311 if (filter->filter_string) 309 if (filter->filter_string)
312 trace_seq_printf(s, "%s\n", filter->filter_string); 310 trace_seq_printf(s, "%s\n", filter->filter_string);
313 else 311 else
314 trace_seq_printf(s, "none\n"); 312 trace_seq_printf(s, "none\n");
315 mutex_unlock(&filter_mutex); 313 mutex_unlock(&event_mutex);
316} 314}
317 315
318static struct ftrace_event_field * 316static struct ftrace_event_field *
@@ -381,6 +379,7 @@ void destroy_preds(struct ftrace_event_call *call)
381 filter_free_pred(filter->preds[i]); 379 filter_free_pred(filter->preds[i]);
382 } 380 }
383 kfree(filter->preds); 381 kfree(filter->preds);
382 kfree(filter->filter_string);
384 kfree(filter); 383 kfree(filter);
385 call->filter = NULL; 384 call->filter = NULL;
386} 385}
@@ -433,7 +432,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
433 filter->n_preds = 0; 432 filter->n_preds = 0;
434 } 433 }
435 434
436 mutex_lock(&event_mutex);
437 list_for_each_entry(call, &ftrace_events, list) { 435 list_for_each_entry(call, &ftrace_events, list) {
438 if (!call->define_fields) 436 if (!call->define_fields)
439 continue; 437 continue;
@@ -443,7 +441,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
443 remove_filter_string(call->filter); 441 remove_filter_string(call->filter);
444 } 442 }
445 } 443 }
446 mutex_unlock(&event_mutex);
447} 444}
448 445
449static int filter_add_pred_fn(struct filter_parse_state *ps, 446static int filter_add_pred_fn(struct filter_parse_state *ps,
@@ -546,6 +543,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
546 filter_pred_fn_t fn; 543 filter_pred_fn_t fn;
547 unsigned long long val; 544 unsigned long long val;
548 int string_type; 545 int string_type;
546 int ret;
549 547
550 pred->fn = filter_pred_none; 548 pred->fn = filter_pred_none;
551 549
@@ -581,7 +579,11 @@ static int filter_add_pred(struct filter_parse_state *ps,
581 pred->not = 1; 579 pred->not = 1;
582 return filter_add_pred_fn(ps, call, pred, fn); 580 return filter_add_pred_fn(ps, call, pred, fn);
583 } else { 581 } else {
584 if (strict_strtoull(pred->str_val, 0, &val)) { 582 if (field->is_signed)
583 ret = strict_strtoll(pred->str_val, 0, &val);
584 else
585 ret = strict_strtoull(pred->str_val, 0, &val);
586 if (ret) {
585 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 587 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
586 return -EINVAL; 588 return -EINVAL;
587 } 589 }
@@ -625,7 +627,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
625 filter->preds[filter->n_preds] = pred; 627 filter->preds[filter->n_preds] = pred;
626 filter->n_preds++; 628 filter->n_preds++;
627 629
628 mutex_lock(&event_mutex);
629 list_for_each_entry(call, &ftrace_events, list) { 630 list_for_each_entry(call, &ftrace_events, list) {
630 631
631 if (!call->define_fields) 632 if (!call->define_fields)
@@ -636,14 +637,12 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
636 637
637 err = filter_add_pred(ps, call, pred); 638 err = filter_add_pred(ps, call, pred);
638 if (err) { 639 if (err) {
639 mutex_unlock(&event_mutex);
640 filter_free_subsystem_preds(system); 640 filter_free_subsystem_preds(system);
641 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); 641 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
642 goto out; 642 goto out;
643 } 643 }
644 replace_filter_string(call->filter, filter_string); 644 replace_filter_string(call->filter, filter_string);
645 } 645 }
646 mutex_unlock(&event_mutex);
647out: 646out:
648 return err; 647 return err;
649} 648}
@@ -1070,12 +1069,12 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1070 1069
1071 struct filter_parse_state *ps; 1070 struct filter_parse_state *ps;
1072 1071
1073 mutex_lock(&filter_mutex); 1072 mutex_lock(&event_mutex);
1074 1073
1075 if (!strcmp(strstrip(filter_string), "0")) { 1074 if (!strcmp(strstrip(filter_string), "0")) {
1076 filter_disable_preds(call); 1075 filter_disable_preds(call);
1077 remove_filter_string(call->filter); 1076 remove_filter_string(call->filter);
1078 mutex_unlock(&filter_mutex); 1077 mutex_unlock(&event_mutex);
1079 return 0; 1078 return 0;
1080 } 1079 }
1081 1080
@@ -1103,7 +1102,7 @@ out:
1103 postfix_clear(ps); 1102 postfix_clear(ps);
1104 kfree(ps); 1103 kfree(ps);
1105out_unlock: 1104out_unlock:
1106 mutex_unlock(&filter_mutex); 1105 mutex_unlock(&event_mutex);
1107 1106
1108 return err; 1107 return err;
1109} 1108}
@@ -1115,12 +1114,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1115 1114
1116 struct filter_parse_state *ps; 1115 struct filter_parse_state *ps;
1117 1116
1118 mutex_lock(&filter_mutex); 1117 mutex_lock(&event_mutex);
1119 1118
1120 if (!strcmp(strstrip(filter_string), "0")) { 1119 if (!strcmp(strstrip(filter_string), "0")) {
1121 filter_free_subsystem_preds(system); 1120 filter_free_subsystem_preds(system);
1122 remove_filter_string(system->filter); 1121 remove_filter_string(system->filter);
1123 mutex_unlock(&filter_mutex); 1122 mutex_unlock(&event_mutex);
1124 return 0; 1123 return 0;
1125 } 1124 }
1126 1125
@@ -1148,7 +1147,7 @@ out:
1148 postfix_clear(ps); 1147 postfix_clear(ps);
1149 kfree(ps); 1148 kfree(ps);
1150out_unlock: 1149out_unlock:
1151 mutex_unlock(&filter_mutex); 1150 mutex_unlock(&event_mutex);
1152 1151
1153 return err; 1152 return err;
1154} 1153}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index c9a0b7df44ff..90f134764837 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -193,9 +193,11 @@ static void tracing_start_function_trace(void)
193static void tracing_stop_function_trace(void) 193static void tracing_stop_function_trace(void)
194{ 194{
195 ftrace_function_enabled = 0; 195 ftrace_function_enabled = 0;
196 /* OK if they are not registered */ 196
197 unregister_ftrace_function(&trace_stack_ops); 197 if (func_flags.val & TRACE_FUNC_OPT_STACK)
198 unregister_ftrace_function(&trace_ops); 198 unregister_ftrace_function(&trace_stack_ops);
199 else
200 unregister_ftrace_function(&trace_ops);
199} 201}
200 202
201static int func_set_flag(u32 old_flags, u32 bit, int set) 203static int func_set_flag(u32 old_flags, u32 bit, int set)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8b592418d8b2..d2249abafb53 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = {
57 57
58/* Add a function return address to the trace stack on thread info.*/ 58/* Add a function return address to the trace stack on thread info.*/
59int 59int
60ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) 60ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
61 unsigned long frame_pointer)
61{ 62{
62 unsigned long long calltime; 63 unsigned long long calltime;
63 int index; 64 int index;
@@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
85 current->ret_stack[index].func = func; 86 current->ret_stack[index].func = func;
86 current->ret_stack[index].calltime = calltime; 87 current->ret_stack[index].calltime = calltime;
87 current->ret_stack[index].subtime = 0; 88 current->ret_stack[index].subtime = 0;
89 current->ret_stack[index].fp = frame_pointer;
88 *depth = index; 90 *depth = index;
89 91
90 return 0; 92 return 0;
@@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
92 94
93/* Retrieve a function return address to the trace stack on thread info.*/ 95/* Retrieve a function return address to the trace stack on thread info.*/
94static void 96static void
95ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) 97ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
98 unsigned long frame_pointer)
96{ 99{
97 int index; 100 int index;
98 101
@@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
106 return; 109 return;
107 } 110 }
108 111
112#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
113 /*
114 * The arch may choose to record the frame pointer used
115 * and check it here to make sure that it is what we expect it
116 * to be. If gcc does not set the place holder of the return
117 * address in the frame pointer, and does a copy instead, then
118 * the function graph trace will fail. This test detects this
119 * case.
120 *
121 * Currently, x86_32 with optimize for size (-Os) makes the latest
122 * gcc do the above.
123 */
124 if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
125 ftrace_graph_stop();
126 WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
127 " from func %pF return to %lx\n",
128 current->ret_stack[index].fp,
129 frame_pointer,
130 (void *)current->ret_stack[index].func,
131 current->ret_stack[index].ret);
132 *ret = (unsigned long)panic;
133 return;
134 }
135#endif
136
109 *ret = current->ret_stack[index].ret; 137 *ret = current->ret_stack[index].ret;
110 trace->func = current->ret_stack[index].func; 138 trace->func = current->ret_stack[index].func;
111 trace->calltime = current->ret_stack[index].calltime; 139 trace->calltime = current->ret_stack[index].calltime;
@@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
117 * Send the trace to the ring-buffer. 145 * Send the trace to the ring-buffer.
118 * @return the original return address. 146 * @return the original return address.
119 */ 147 */
120unsigned long ftrace_return_to_handler(void) 148unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
121{ 149{
122 struct ftrace_graph_ret trace; 150 struct ftrace_graph_ret trace;
123 unsigned long ret; 151 unsigned long ret;
124 152
125 ftrace_pop_return_trace(&trace, &ret); 153 ftrace_pop_return_trace(&trace, &ret, frame_pointer);
126 trace.rettime = trace_clock_local(); 154 trace.rettime = trace_clock_local();
127 ftrace_graph_return(&trace); 155 ftrace_graph_return(&trace);
128 barrier(); 156 barrier();
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 815237a55af8..8a82b4b8ea52 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -15,6 +15,16 @@
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17 17
18static struct uts_namespace *create_uts_ns(void)
19{
20 struct uts_namespace *uts_ns;
21
22 uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
23 if (uts_ns)
24 kref_init(&uts_ns->kref);
25 return uts_ns;
26}
27
18/* 28/*
19 * Clone a new ns copying an original utsname, setting refcount to 1 29 * Clone a new ns copying an original utsname, setting refcount to 1
20 * @old_ns: namespace to clone 30 * @old_ns: namespace to clone
@@ -24,14 +34,13 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
24{ 34{
25 struct uts_namespace *ns; 35 struct uts_namespace *ns;
26 36
27 ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); 37 ns = create_uts_ns();
28 if (!ns) 38 if (!ns)
29 return ERR_PTR(-ENOMEM); 39 return ERR_PTR(-ENOMEM);
30 40
31 down_read(&uts_sem); 41 down_read(&uts_sem);
32 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 42 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
33 up_read(&uts_sem); 43 up_read(&uts_sem);
34 kref_init(&ns->kref);
35 return ns; 44 return ns;
36} 45}
37 46