aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/cpu.c1
-rw-r--r--kernel/cpuset.c4
-rw-r--r--kernel/exec_domain.c2
-rw-r--r--kernel/exit.c451
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/manage.c39
-rw-r--r--kernel/kmod.c2
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/module.c336
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/disk.c50
-rw-r--r--kernel/power/main.c16
-rw-r--r--kernel/power/process.c97
-rw-r--r--kernel/power/user.c71
-rw-r--r--kernel/profile.c6
-rw-r--r--kernel/ptrace.c37
-rw-r--r--kernel/rcuclassic.c34
-rw-r--r--kernel/rcupdate.c71
-rw-r--r--kernel/rcupreempt.c418
-rw-r--r--kernel/rcupreempt_trace.c1
-rw-r--r--kernel/rcutorture.c174
-rw-r--r--kernel/rtmutex-tester.c7
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/smp.c383
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c17
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/time/tick-broadcast.c2
32 files changed, 1721 insertions, 536 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index f6328e16dfdd..985ddb7da4d0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -mno-spe
15
16ifdef CONFIG_FTRACE 14ifdef CONFIG_FTRACE
17# Do not trace debug files and internal ftrace files 15# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg 16CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
21CFLAGS_REMOVE_rtmutex-debug.o = -pg 19CFLAGS_REMOVE_rtmutex-debug.o = -pg
22CFLAGS_REMOVE_cgroup-debug.o = -pg 20CFLAGS_REMOVE_cgroup-debug.o = -pg
23CFLAGS_REMOVE_sched_clock.o = -pg 21CFLAGS_REMOVE_sched_clock.o = -pg
22CFLAGS_REMOVE_sched.o = -mno-spe -pg
24endif 23endif
25 24
26obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 25obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
@@ -39,6 +38,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
39obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o 38obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
40obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o 39obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
41obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 40obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
41obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
42obj-$(CONFIG_SMP) += spinlock.o 42obj-$(CONFIG_SMP) += spinlock.o
43obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 43obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
44obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 44obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b11f06dc149a..cfb1d43ab801 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -299,6 +299,7 @@ int __ref cpu_down(unsigned int cpu)
299 cpu_maps_update_done(); 299 cpu_maps_update_done();
300 return err; 300 return err;
301} 301}
302EXPORT_SYMBOL(cpu_down);
302#endif /*CONFIG_HOTPLUG_CPU*/ 303#endif /*CONFIG_HOTPLUG_CPU*/
303 304
304/* Requires cpu_add_remove_lock to be held */ 305/* Requires cpu_add_remove_lock to be held */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 459d601947a8..d2cc67dac8b1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -679,7 +679,9 @@ restart:
679 if (apn == b->pn) { 679 if (apn == b->pn) {
680 cpus_or(*dp, *dp, b->cpus_allowed); 680 cpus_or(*dp, *dp, b->cpus_allowed);
681 b->pn = -1; 681 b->pn = -1;
682 update_domain_attr(dattr, b); 682 if (dattr)
683 update_domain_attr(dattr
684 + nslot, b);
683 } 685 }
684 } 686 }
685 nslot++; 687 nslot++;
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index a9e6bad9f706..c1ef192aa655 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -65,7 +65,7 @@ lookup_exec_domain(u_long personality)
65 goto out; 65 goto out;
66 } 66 }
67 67
68#ifdef CONFIG_KMOD 68#ifdef CONFIG_MODULES
69 read_unlock(&exec_domains_lock); 69 read_unlock(&exec_domains_lock);
70 request_module("personality-%ld", pers); 70 request_module("personality-%ld", pers);
71 read_lock(&exec_domains_lock); 71 read_lock(&exec_domains_lock);
diff --git a/kernel/exit.c b/kernel/exit.c
index ceb258782835..93d2711b9381 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -71,7 +71,7 @@ static void __unhash_process(struct task_struct *p)
71 __get_cpu_var(process_counts)--; 71 __get_cpu_var(process_counts)--;
72 } 72 }
73 list_del_rcu(&p->thread_group); 73 list_del_rcu(&p->thread_group);
74 remove_parent(p); 74 list_del_init(&p->sibling);
75} 75}
76 76
77/* 77/*
@@ -152,6 +152,18 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
152 put_task_struct(container_of(rhp, struct task_struct, rcu)); 152 put_task_struct(container_of(rhp, struct task_struct, rcu));
153} 153}
154 154
155/*
156 * Do final ptrace-related cleanup of a zombie being reaped.
157 *
158 * Called with write_lock(&tasklist_lock) held.
159 */
160static void ptrace_release_task(struct task_struct *p)
161{
162 BUG_ON(!list_empty(&p->ptraced));
163 ptrace_unlink(p);
164 BUG_ON(!list_empty(&p->ptrace_entry));
165}
166
155void release_task(struct task_struct * p) 167void release_task(struct task_struct * p)
156{ 168{
157 struct task_struct *leader; 169 struct task_struct *leader;
@@ -160,8 +172,7 @@ repeat:
160 atomic_dec(&p->user->processes); 172 atomic_dec(&p->user->processes);
161 proc_flush_task(p); 173 proc_flush_task(p);
162 write_lock_irq(&tasklist_lock); 174 write_lock_irq(&tasklist_lock);
163 ptrace_unlink(p); 175 ptrace_release_task(p);
164 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
165 __exit_signal(p); 176 __exit_signal(p);
166 177
167 /* 178 /*
@@ -315,9 +326,8 @@ static void reparent_to_kthreadd(void)
315 326
316 ptrace_unlink(current); 327 ptrace_unlink(current);
317 /* Reparent to init */ 328 /* Reparent to init */
318 remove_parent(current);
319 current->real_parent = current->parent = kthreadd_task; 329 current->real_parent = current->parent = kthreadd_task;
320 add_parent(current); 330 list_move_tail(&current->sibling, &current->real_parent->children);
321 331
322 /* Set the exit signal to SIGCHLD so we signal init on exit */ 332 /* Set the exit signal to SIGCHLD so we signal init on exit */
323 current->exit_signal = SIGCHLD; 333 current->exit_signal = SIGCHLD;
@@ -692,37 +702,97 @@ static void exit_mm(struct task_struct * tsk)
692 mmput(mm); 702 mmput(mm);
693} 703}
694 704
695static void 705/*
696reparent_thread(struct task_struct *p, struct task_struct *father, int traced) 706 * Return nonzero if @parent's children should reap themselves.
707 *
708 * Called with write_lock_irq(&tasklist_lock) held.
709 */
710static int ignoring_children(struct task_struct *parent)
697{ 711{
698 if (p->pdeath_signal) 712 int ret;
699 /* We already hold the tasklist_lock here. */ 713 struct sighand_struct *psig = parent->sighand;
700 group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); 714 unsigned long flags;
715 spin_lock_irqsave(&psig->siglock, flags);
716 ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
717 (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT));
718 spin_unlock_irqrestore(&psig->siglock, flags);
719 return ret;
720}
701 721
702 /* Move the child from its dying parent to the new one. */ 722/*
703 if (unlikely(traced)) { 723 * Detach all tasks we were using ptrace on.
704 /* Preserve ptrace links if someone else is tracing this child. */ 724 * Any that need to be release_task'd are put on the @dead list.
705 list_del_init(&p->ptrace_list); 725 *
706 if (ptrace_reparented(p)) 726 * Called with write_lock(&tasklist_lock) held.
707 list_add(&p->ptrace_list, &p->real_parent->ptrace_children); 727 */
708 } else { 728static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
709 /* If this child is being traced, then we're the one tracing it 729{
710 * anyway, so let go of it. 730 struct task_struct *p, *n;
731 int ign = -1;
732
733 list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
734 __ptrace_unlink(p);
735
736 if (p->exit_state != EXIT_ZOMBIE)
737 continue;
738
739 /*
740 * If it's a zombie, our attachedness prevented normal
741 * parent notification or self-reaping. Do notification
742 * now if it would have happened earlier. If it should
743 * reap itself, add it to the @dead list. We can't call
744 * release_task() here because we already hold tasklist_lock.
745 *
746 * If it's our own child, there is no notification to do.
747 * But if our normal children self-reap, then this child
748 * was prevented by ptrace and we must reap it now.
711 */ 749 */
712 p->ptrace = 0; 750 if (!task_detached(p) && thread_group_empty(p)) {
713 remove_parent(p); 751 if (!same_thread_group(p->real_parent, parent))
714 p->parent = p->real_parent; 752 do_notify_parent(p, p->exit_signal);
715 add_parent(p); 753 else {
754 if (ign < 0)
755 ign = ignoring_children(parent);
756 if (ign)
757 p->exit_signal = -1;
758 }
759 }
716 760
717 if (task_is_traced(p)) { 761 if (task_detached(p)) {
718 /* 762 /*
719 * If it was at a trace stop, turn it into 763 * Mark it as in the process of being reaped.
720 * a normal stop since it's no longer being
721 * traced.
722 */ 764 */
723 ptrace_untrace(p); 765 p->exit_state = EXIT_DEAD;
766 list_add(&p->ptrace_entry, dead);
724 } 767 }
725 } 768 }
769}
770
771/*
772 * Finish up exit-time ptrace cleanup.
773 *
774 * Called without locks.
775 */
776static void ptrace_exit_finish(struct task_struct *parent,
777 struct list_head *dead)
778{
779 struct task_struct *p, *n;
780
781 BUG_ON(!list_empty(&parent->ptraced));
782
783 list_for_each_entry_safe(p, n, dead, ptrace_entry) {
784 list_del_init(&p->ptrace_entry);
785 release_task(p);
786 }
787}
788
789static void reparent_thread(struct task_struct *p, struct task_struct *father)
790{
791 if (p->pdeath_signal)
792 /* We already hold the tasklist_lock here. */
793 group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
794
795 list_move_tail(&p->sibling, &p->real_parent->children);
726 796
727 /* If this is a threaded reparent there is no need to 797 /* If this is a threaded reparent there is no need to
728 * notify anyone anything has happened. 798 * notify anyone anything has happened.
@@ -737,7 +807,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
737 /* If we'd notified the old parent about this child's death, 807 /* If we'd notified the old parent about this child's death,
738 * also notify the new parent. 808 * also notify the new parent.
739 */ 809 */
740 if (!traced && p->exit_state == EXIT_ZOMBIE && 810 if (!ptrace_reparented(p) &&
811 p->exit_state == EXIT_ZOMBIE &&
741 !task_detached(p) && thread_group_empty(p)) 812 !task_detached(p) && thread_group_empty(p))
742 do_notify_parent(p, p->exit_signal); 813 do_notify_parent(p, p->exit_signal);
743 814
@@ -754,12 +825,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
754static void forget_original_parent(struct task_struct *father) 825static void forget_original_parent(struct task_struct *father)
755{ 826{
756 struct task_struct *p, *n, *reaper = father; 827 struct task_struct *p, *n, *reaper = father;
757 struct list_head ptrace_dead; 828 LIST_HEAD(ptrace_dead);
758
759 INIT_LIST_HEAD(&ptrace_dead);
760 829
761 write_lock_irq(&tasklist_lock); 830 write_lock_irq(&tasklist_lock);
762 831
832 /*
833 * First clean up ptrace if we were using it.
834 */
835 ptrace_exit(father, &ptrace_dead);
836
763 do { 837 do {
764 reaper = next_thread(reaper); 838 reaper = next_thread(reaper);
765 if (reaper == father) { 839 if (reaper == father) {
@@ -768,58 +842,19 @@ static void forget_original_parent(struct task_struct *father)
768 } 842 }
769 } while (reaper->flags & PF_EXITING); 843 } while (reaper->flags & PF_EXITING);
770 844
771 /*
772 * There are only two places where our children can be:
773 *
774 * - in our child list
775 * - in our ptraced child list
776 *
777 * Search them and reparent children.
778 */
779 list_for_each_entry_safe(p, n, &father->children, sibling) { 845 list_for_each_entry_safe(p, n, &father->children, sibling) {
780 int ptrace;
781
782 ptrace = p->ptrace;
783
784 /* if father isn't the real parent, then ptrace must be enabled */
785 BUG_ON(father != p->real_parent && !ptrace);
786
787 if (father == p->real_parent) {
788 /* reparent with a reaper, real father it's us */
789 p->real_parent = reaper;
790 reparent_thread(p, father, 0);
791 } else {
792 /* reparent ptraced task to its real parent */
793 __ptrace_unlink (p);
794 if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) &&
795 thread_group_empty(p))
796 do_notify_parent(p, p->exit_signal);
797 }
798
799 /*
800 * if the ptraced child is a detached zombie we must collect
801 * it before we exit, or it will remain zombie forever since
802 * we prevented it from self-reap itself while it was being
803 * traced by us, to be able to see it in wait4.
804 */
805 if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p)))
806 list_add(&p->ptrace_list, &ptrace_dead);
807 }
808
809 list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
810 p->real_parent = reaper; 846 p->real_parent = reaper;
811 reparent_thread(p, father, 1); 847 if (p->parent == father) {
848 BUG_ON(p->ptrace);
849 p->parent = p->real_parent;
850 }
851 reparent_thread(p, father);
812 } 852 }
813 853
814 write_unlock_irq(&tasklist_lock); 854 write_unlock_irq(&tasklist_lock);
815 BUG_ON(!list_empty(&father->children)); 855 BUG_ON(!list_empty(&father->children));
816 BUG_ON(!list_empty(&father->ptrace_children));
817
818 list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
819 list_del_init(&p->ptrace_list);
820 release_task(p);
821 }
822 856
857 ptrace_exit_finish(father, &ptrace_dead);
823} 858}
824 859
825/* 860/*
@@ -1180,13 +1215,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1180 return 0; 1215 return 0;
1181 } 1216 }
1182 1217
1183 /*
1184 * Do not consider detached threads that are
1185 * not ptraced:
1186 */
1187 if (task_detached(p) && !p->ptrace)
1188 return 0;
1189
1190 /* Wait for all children (clone and not) if __WALL is set; 1218 /* Wait for all children (clone and not) if __WALL is set;
1191 * otherwise, wait for clone children *only* if __WCLONE is 1219 * otherwise, wait for clone children *only* if __WCLONE is
1192 * set; otherwise, wait for non-clone children *only*. (Note: 1220 * set; otherwise, wait for non-clone children *only*. (Note:
@@ -1197,14 +1225,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1197 return 0; 1225 return 0;
1198 1226
1199 err = security_task_wait(p); 1227 err = security_task_wait(p);
1200 if (likely(!err)) 1228 if (err)
1201 return 1; 1229 return err;
1202 1230
1203 if (type != PIDTYPE_PID) 1231 return 1;
1204 return 0;
1205 /* This child was explicitly requested, abort */
1206 read_unlock(&tasklist_lock);
1207 return err;
1208} 1232}
1209 1233
1210static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, 1234static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
@@ -1238,7 +1262,7 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1238 * the lock and this task is uninteresting. If we return nonzero, we have 1262 * the lock and this task is uninteresting. If we return nonzero, we have
1239 * released the lock and the system call should return. 1263 * released the lock and the system call should return.
1240 */ 1264 */
1241static int wait_task_zombie(struct task_struct *p, int noreap, 1265static int wait_task_zombie(struct task_struct *p, int options,
1242 struct siginfo __user *infop, 1266 struct siginfo __user *infop,
1243 int __user *stat_addr, struct rusage __user *ru) 1267 int __user *stat_addr, struct rusage __user *ru)
1244{ 1268{
@@ -1246,7 +1270,10 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1246 int retval, status, traced; 1270 int retval, status, traced;
1247 pid_t pid = task_pid_vnr(p); 1271 pid_t pid = task_pid_vnr(p);
1248 1272
1249 if (unlikely(noreap)) { 1273 if (!likely(options & WEXITED))
1274 return 0;
1275
1276 if (unlikely(options & WNOWAIT)) {
1250 uid_t uid = p->uid; 1277 uid_t uid = p->uid;
1251 int exit_code = p->exit_code; 1278 int exit_code = p->exit_code;
1252 int why, status; 1279 int why, status;
@@ -1396,21 +1423,24 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1396 * the lock and this task is uninteresting. If we return nonzero, we have 1423 * the lock and this task is uninteresting. If we return nonzero, we have
1397 * released the lock and the system call should return. 1424 * released the lock and the system call should return.
1398 */ 1425 */
1399static int wait_task_stopped(struct task_struct *p, 1426static int wait_task_stopped(int ptrace, struct task_struct *p,
1400 int noreap, struct siginfo __user *infop, 1427 int options, struct siginfo __user *infop,
1401 int __user *stat_addr, struct rusage __user *ru) 1428 int __user *stat_addr, struct rusage __user *ru)
1402{ 1429{
1403 int retval, exit_code, why; 1430 int retval, exit_code, why;
1404 uid_t uid = 0; /* unneeded, required by compiler */ 1431 uid_t uid = 0; /* unneeded, required by compiler */
1405 pid_t pid; 1432 pid_t pid;
1406 1433
1434 if (!(options & WUNTRACED))
1435 return 0;
1436
1407 exit_code = 0; 1437 exit_code = 0;
1408 spin_lock_irq(&p->sighand->siglock); 1438 spin_lock_irq(&p->sighand->siglock);
1409 1439
1410 if (unlikely(!task_is_stopped_or_traced(p))) 1440 if (unlikely(!task_is_stopped_or_traced(p)))
1411 goto unlock_sig; 1441 goto unlock_sig;
1412 1442
1413 if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0) 1443 if (!ptrace && p->signal->group_stop_count > 0)
1414 /* 1444 /*
1415 * A group stop is in progress and this is the group leader. 1445 * A group stop is in progress and this is the group leader.
1416 * We won't report until all threads have stopped. 1446 * We won't report until all threads have stopped.
@@ -1421,7 +1451,7 @@ static int wait_task_stopped(struct task_struct *p,
1421 if (!exit_code) 1451 if (!exit_code)
1422 goto unlock_sig; 1452 goto unlock_sig;
1423 1453
1424 if (!noreap) 1454 if (!unlikely(options & WNOWAIT))
1425 p->exit_code = 0; 1455 p->exit_code = 0;
1426 1456
1427 uid = p->uid; 1457 uid = p->uid;
@@ -1439,10 +1469,10 @@ unlock_sig:
1439 */ 1469 */
1440 get_task_struct(p); 1470 get_task_struct(p);
1441 pid = task_pid_vnr(p); 1471 pid = task_pid_vnr(p);
1442 why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; 1472 why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
1443 read_unlock(&tasklist_lock); 1473 read_unlock(&tasklist_lock);
1444 1474
1445 if (unlikely(noreap)) 1475 if (unlikely(options & WNOWAIT))
1446 return wait_noreap_copyout(p, pid, uid, 1476 return wait_noreap_copyout(p, pid, uid,
1447 why, exit_code, 1477 why, exit_code,
1448 infop, ru); 1478 infop, ru);
@@ -1476,7 +1506,7 @@ unlock_sig:
1476 * the lock and this task is uninteresting. If we return nonzero, we have 1506 * the lock and this task is uninteresting. If we return nonzero, we have
1477 * released the lock and the system call should return. 1507 * released the lock and the system call should return.
1478 */ 1508 */
1479static int wait_task_continued(struct task_struct *p, int noreap, 1509static int wait_task_continued(struct task_struct *p, int options,
1480 struct siginfo __user *infop, 1510 struct siginfo __user *infop,
1481 int __user *stat_addr, struct rusage __user *ru) 1511 int __user *stat_addr, struct rusage __user *ru)
1482{ 1512{
@@ -1484,6 +1514,9 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1484 pid_t pid; 1514 pid_t pid;
1485 uid_t uid; 1515 uid_t uid;
1486 1516
1517 if (!unlikely(options & WCONTINUED))
1518 return 0;
1519
1487 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 1520 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
1488 return 0; 1521 return 0;
1489 1522
@@ -1493,7 +1526,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1493 spin_unlock_irq(&p->sighand->siglock); 1526 spin_unlock_irq(&p->sighand->siglock);
1494 return 0; 1527 return 0;
1495 } 1528 }
1496 if (!noreap) 1529 if (!unlikely(options & WNOWAIT))
1497 p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 1530 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1498 spin_unlock_irq(&p->sighand->siglock); 1531 spin_unlock_irq(&p->sighand->siglock);
1499 1532
@@ -1519,89 +1552,161 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1519 return retval; 1552 return retval;
1520} 1553}
1521 1554
1555/*
1556 * Consider @p for a wait by @parent.
1557 *
1558 * -ECHILD should be in *@notask_error before the first call.
1559 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1560 * Returns zero if the search for a child should continue;
1561 * then *@notask_error is 0 if @p is an eligible child,
1562 * or another error from security_task_wait(), or still -ECHILD.
1563 */
1564static int wait_consider_task(struct task_struct *parent, int ptrace,
1565 struct task_struct *p, int *notask_error,
1566 enum pid_type type, struct pid *pid, int options,
1567 struct siginfo __user *infop,
1568 int __user *stat_addr, struct rusage __user *ru)
1569{
1570 int ret = eligible_child(type, pid, options, p);
1571 if (!ret)
1572 return ret;
1573
1574 if (unlikely(ret < 0)) {
1575 /*
1576 * If we have not yet seen any eligible child,
1577 * then let this error code replace -ECHILD.
1578 * A permission error will give the user a clue
1579 * to look for security policy problems, rather
1580 * than for mysterious wait bugs.
1581 */
1582 if (*notask_error)
1583 *notask_error = ret;
1584 }
1585
1586 if (likely(!ptrace) && unlikely(p->ptrace)) {
1587 /*
1588 * This child is hidden by ptrace.
1589 * We aren't allowed to see it now, but eventually we will.
1590 */
1591 *notask_error = 0;
1592 return 0;
1593 }
1594
1595 if (p->exit_state == EXIT_DEAD)
1596 return 0;
1597
1598 /*
1599 * We don't reap group leaders with subthreads.
1600 */
1601 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
1602 return wait_task_zombie(p, options, infop, stat_addr, ru);
1603
1604 /*
1605 * It's stopped or running now, so it might
1606 * later continue, exit, or stop again.
1607 */
1608 *notask_error = 0;
1609
1610 if (task_is_stopped_or_traced(p))
1611 return wait_task_stopped(ptrace, p, options,
1612 infop, stat_addr, ru);
1613
1614 return wait_task_continued(p, options, infop, stat_addr, ru);
1615}
1616
1617/*
1618 * Do the work of do_wait() for one thread in the group, @tsk.
1619 *
1620 * -ECHILD should be in *@notask_error before the first call.
1621 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1622 * Returns zero if the search for a child should continue; then
1623 * *@notask_error is 0 if there were any eligible children,
1624 * or another error from security_task_wait(), or still -ECHILD.
1625 */
1626static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1627 enum pid_type type, struct pid *pid, int options,
1628 struct siginfo __user *infop, int __user *stat_addr,
1629 struct rusage __user *ru)
1630{
1631 struct task_struct *p;
1632
1633 list_for_each_entry(p, &tsk->children, sibling) {
1634 /*
1635 * Do not consider detached threads.
1636 */
1637 if (!task_detached(p)) {
1638 int ret = wait_consider_task(tsk, 0, p, notask_error,
1639 type, pid, options,
1640 infop, stat_addr, ru);
1641 if (ret)
1642 return ret;
1643 }
1644 }
1645
1646 return 0;
1647}
1648
1649static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
1650 enum pid_type type, struct pid *pid, int options,
1651 struct siginfo __user *infop, int __user *stat_addr,
1652 struct rusage __user *ru)
1653{
1654 struct task_struct *p;
1655
1656 /*
1657 * Traditionally we see ptrace'd stopped tasks regardless of options.
1658 */
1659 options |= WUNTRACED;
1660
1661 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1662 int ret = wait_consider_task(tsk, 1, p, notask_error,
1663 type, pid, options,
1664 infop, stat_addr, ru);
1665 if (ret)
1666 return ret;
1667 }
1668
1669 return 0;
1670}
1671
1522static long do_wait(enum pid_type type, struct pid *pid, int options, 1672static long do_wait(enum pid_type type, struct pid *pid, int options,
1523 struct siginfo __user *infop, int __user *stat_addr, 1673 struct siginfo __user *infop, int __user *stat_addr,
1524 struct rusage __user *ru) 1674 struct rusage __user *ru)
1525{ 1675{
1526 DECLARE_WAITQUEUE(wait, current); 1676 DECLARE_WAITQUEUE(wait, current);
1527 struct task_struct *tsk; 1677 struct task_struct *tsk;
1528 int flag, retval; 1678 int retval;
1529 1679
1530 add_wait_queue(&current->signal->wait_chldexit,&wait); 1680 add_wait_queue(&current->signal->wait_chldexit,&wait);
1531repeat: 1681repeat:
1532 /* If there is nothing that can match our critier just get out */ 1682 /*
1683 * If there is nothing that can match our critiera just get out.
1684 * We will clear @retval to zero if we see any child that might later
1685 * match our criteria, even if we are not able to reap it yet.
1686 */
1533 retval = -ECHILD; 1687 retval = -ECHILD;
1534 if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) 1688 if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type])))
1535 goto end; 1689 goto end;
1536 1690
1537 /*
1538 * We will set this flag if we see any child that might later
1539 * match our criteria, even if we are not able to reap it yet.
1540 */
1541 flag = retval = 0;
1542 current->state = TASK_INTERRUPTIBLE; 1691 current->state = TASK_INTERRUPTIBLE;
1543 read_lock(&tasklist_lock); 1692 read_lock(&tasklist_lock);
1544 tsk = current; 1693 tsk = current;
1545 do { 1694 do {
1546 struct task_struct *p; 1695 int tsk_result = do_wait_thread(tsk, &retval,
1547 1696 type, pid, options,
1548 list_for_each_entry(p, &tsk->children, sibling) { 1697 infop, stat_addr, ru);
1549 int ret = eligible_child(type, pid, options, p); 1698 if (!tsk_result)
1550 if (!ret) 1699 tsk_result = ptrace_do_wait(tsk, &retval,
1551 continue; 1700 type, pid, options,
1552 1701 infop, stat_addr, ru);
1553 if (unlikely(ret < 0)) { 1702 if (tsk_result) {
1554 retval = ret; 1703 /*
1555 } else if (task_is_stopped_or_traced(p)) { 1704 * tasklist_lock is unlocked and we have a final result.
1556 /* 1705 */
1557 * It's stopped now, so it might later 1706 retval = tsk_result;
1558 * continue, exit, or stop again. 1707 goto end;
1559 */
1560 flag = 1;
1561 if (!(p->ptrace & PT_PTRACED) &&
1562 !(options & WUNTRACED))
1563 continue;
1564
1565 retval = wait_task_stopped(p,
1566 (options & WNOWAIT), infop,
1567 stat_addr, ru);
1568 } else if (p->exit_state == EXIT_ZOMBIE &&
1569 !delay_group_leader(p)) {
1570 /*
1571 * We don't reap group leaders with subthreads.
1572 */
1573 if (!likely(options & WEXITED))
1574 continue;
1575 retval = wait_task_zombie(p,
1576 (options & WNOWAIT), infop,
1577 stat_addr, ru);
1578 } else if (p->exit_state != EXIT_DEAD) {
1579 /*
1580 * It's running now, so it might later
1581 * exit, stop, or stop and then continue.
1582 */
1583 flag = 1;
1584 if (!unlikely(options & WCONTINUED))
1585 continue;
1586 retval = wait_task_continued(p,
1587 (options & WNOWAIT), infop,
1588 stat_addr, ru);
1589 }
1590 if (retval != 0) /* tasklist_lock released */
1591 goto end;
1592 }
1593 if (!flag) {
1594 list_for_each_entry(p, &tsk->ptrace_children,
1595 ptrace_list) {
1596 flag = eligible_child(type, pid, options, p);
1597 if (!flag)
1598 continue;
1599 if (likely(flag > 0))
1600 break;
1601 retval = flag;
1602 goto end;
1603 }
1604 } 1708 }
1709
1605 if (options & __WNOTHREAD) 1710 if (options & __WNOTHREAD)
1606 break; 1711 break;
1607 tsk = next_thread(tsk); 1712 tsk = next_thread(tsk);
@@ -1609,16 +1714,14 @@ repeat:
1609 } while (tsk != current); 1714 } while (tsk != current);
1610 read_unlock(&tasklist_lock); 1715 read_unlock(&tasklist_lock);
1611 1716
1612 if (flag) { 1717 if (!retval && !(options & WNOHANG)) {
1613 if (options & WNOHANG)
1614 goto end;
1615 retval = -ERESTARTSYS; 1718 retval = -ERESTARTSYS;
1616 if (signal_pending(current)) 1719 if (!signal_pending(current)) {
1617 goto end; 1720 schedule();
1618 schedule(); 1721 goto repeat;
1619 goto repeat; 1722 }
1620 } 1723 }
1621 retval = -ECHILD; 1724
1622end: 1725end:
1623 current->state = TASK_RUNNING; 1726 current->state = TASK_RUNNING;
1624 remove_wait_queue(&current->signal->wait_chldexit,&wait); 1727 remove_wait_queue(&current->signal->wait_chldexit,&wait);
diff --git a/kernel/fork.c b/kernel/fork.c
index 4bd2f516401f..adefc1131f27 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1125,8 +1125,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1125 */ 1125 */
1126 p->group_leader = p; 1126 p->group_leader = p;
1127 INIT_LIST_HEAD(&p->thread_group); 1127 INIT_LIST_HEAD(&p->thread_group);
1128 INIT_LIST_HEAD(&p->ptrace_children); 1128 INIT_LIST_HEAD(&p->ptrace_entry);
1129 INIT_LIST_HEAD(&p->ptrace_list); 1129 INIT_LIST_HEAD(&p->ptraced);
1130 1130
1131 /* Now that the task is set up, run cgroup callbacks if 1131 /* Now that the task is set up, run cgroup callbacks if
1132 * necessary. We need to run them before the task is visible 1132 * necessary. We need to run them before the task is visible
@@ -1198,7 +1198,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1198 } 1198 }
1199 1199
1200 if (likely(p->pid)) { 1200 if (likely(p->pid)) {
1201 add_parent(p); 1201 list_add_tail(&p->sibling, &p->real_parent->children);
1202 if (unlikely(p->ptrace & PT_PTRACED)) 1202 if (unlikely(p->ptrace & PT_PTRACED))
1203 __ptrace_link(p, current->parent); 1203 __ptrace_link(p, current->parent);
1204 1204
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2913a8bff612..b8e4dce80a74 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -622,7 +622,7 @@ static void retrigger_next_event(void *arg)
622void clock_was_set(void) 622void clock_was_set(void)
623{ 623{
624 /* Retrigger the CPU local events everywhere */ 624 /* Retrigger the CPU local events everywhere */
625 on_each_cpu(retrigger_next_event, NULL, 0, 1); 625 on_each_cpu(retrigger_next_event, NULL, 1);
626} 626}
627 627
628/* 628/*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 77a51be36010..3cfc0fefb5ee 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -217,6 +217,17 @@ void enable_irq(unsigned int irq)
217} 217}
218EXPORT_SYMBOL(enable_irq); 218EXPORT_SYMBOL(enable_irq);
219 219
220int set_irq_wake_real(unsigned int irq, unsigned int on)
221{
222 struct irq_desc *desc = irq_desc + irq;
223 int ret = -ENXIO;
224
225 if (desc->chip->set_wake)
226 ret = desc->chip->set_wake(irq, on);
227
228 return ret;
229}
230
220/** 231/**
221 * set_irq_wake - control irq power management wakeup 232 * set_irq_wake - control irq power management wakeup
222 * @irq: interrupt to control 233 * @irq: interrupt to control
@@ -233,30 +244,34 @@ int set_irq_wake(unsigned int irq, unsigned int on)
233{ 244{
234 struct irq_desc *desc = irq_desc + irq; 245 struct irq_desc *desc = irq_desc + irq;
235 unsigned long flags; 246 unsigned long flags;
236 int ret = -ENXIO; 247 int ret = 0;
237 int (*set_wake)(unsigned, unsigned) = desc->chip->set_wake;
238 248
239 /* wakeup-capable irqs can be shared between drivers that 249 /* wakeup-capable irqs can be shared between drivers that
240 * don't need to have the same sleep mode behaviors. 250 * don't need to have the same sleep mode behaviors.
241 */ 251 */
242 spin_lock_irqsave(&desc->lock, flags); 252 spin_lock_irqsave(&desc->lock, flags);
243 if (on) { 253 if (on) {
244 if (desc->wake_depth++ == 0) 254 if (desc->wake_depth++ == 0) {
245 desc->status |= IRQ_WAKEUP; 255 ret = set_irq_wake_real(irq, on);
246 else 256 if (ret)
247 set_wake = NULL; 257 desc->wake_depth = 0;
258 else
259 desc->status |= IRQ_WAKEUP;
260 }
248 } else { 261 } else {
249 if (desc->wake_depth == 0) { 262 if (desc->wake_depth == 0) {
250 printk(KERN_WARNING "Unbalanced IRQ %d " 263 printk(KERN_WARNING "Unbalanced IRQ %d "
251 "wake disable\n", irq); 264 "wake disable\n", irq);
252 WARN_ON(1); 265 WARN_ON(1);
253 } else if (--desc->wake_depth == 0) 266 } else if (--desc->wake_depth == 0) {
254 desc->status &= ~IRQ_WAKEUP; 267 ret = set_irq_wake_real(irq, on);
255 else 268 if (ret)
256 set_wake = NULL; 269 desc->wake_depth = 1;
270 else
271 desc->status &= ~IRQ_WAKEUP;
272 }
257 } 273 }
258 if (set_wake) 274
259 ret = desc->chip->set_wake(irq, on);
260 spin_unlock_irqrestore(&desc->lock, flags); 275 spin_unlock_irqrestore(&desc->lock, flags);
261 return ret; 276 return ret;
262} 277}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 8df97d3dfda8..90d7af1c1655 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -42,7 +42,7 @@ extern int max_threads;
42 42
43static struct workqueue_struct *khelper_wq; 43static struct workqueue_struct *khelper_wq;
44 44
45#ifdef CONFIG_KMOD 45#ifdef CONFIG_MODULES
46 46
47/* 47/*
48 modprobe_path is set via /proc/sys. 48 modprobe_path is set via /proc/sys.
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 97747cdd37c9..ac3fb7326641 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -235,7 +235,7 @@ int kthreadd(void *unused)
235 set_user_nice(tsk, KTHREAD_NICE_LEVEL); 235 set_user_nice(tsk, KTHREAD_NICE_LEVEL);
236 set_cpus_allowed(tsk, CPU_MASK_ALL); 236 set_cpus_allowed(tsk, CPU_MASK_ALL);
237 237
238 current->flags |= PF_NOFREEZE; 238 current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
239 239
240 for (;;) { 240 for (;;) {
241 set_current_state(TASK_INTERRUPTIBLE); 241 set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/module.c b/kernel/module.c
index 5f80478b746d..d8b5605132a0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -70,6 +70,9 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
70 70
71static BLOCKING_NOTIFIER_HEAD(module_notify_list); 71static BLOCKING_NOTIFIER_HEAD(module_notify_list);
72 72
73/* Bounds of module allocation, for speeding __module_text_address */
74static unsigned long module_addr_min = -1UL, module_addr_max = 0;
75
73int register_module_notifier(struct notifier_block * nb) 76int register_module_notifier(struct notifier_block * nb)
74{ 77{
75 return blocking_notifier_chain_register(&module_notify_list, nb); 78 return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -134,17 +137,19 @@ extern const struct kernel_symbol __start___ksymtab_gpl[];
134extern const struct kernel_symbol __stop___ksymtab_gpl[]; 137extern const struct kernel_symbol __stop___ksymtab_gpl[];
135extern const struct kernel_symbol __start___ksymtab_gpl_future[]; 138extern const struct kernel_symbol __start___ksymtab_gpl_future[];
136extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; 139extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
137extern const struct kernel_symbol __start___ksymtab_unused[];
138extern const struct kernel_symbol __stop___ksymtab_unused[];
139extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
140extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
141extern const struct kernel_symbol __start___ksymtab_gpl_future[]; 140extern const struct kernel_symbol __start___ksymtab_gpl_future[];
142extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; 141extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
143extern const unsigned long __start___kcrctab[]; 142extern const unsigned long __start___kcrctab[];
144extern const unsigned long __start___kcrctab_gpl[]; 143extern const unsigned long __start___kcrctab_gpl[];
145extern const unsigned long __start___kcrctab_gpl_future[]; 144extern const unsigned long __start___kcrctab_gpl_future[];
145#ifdef CONFIG_UNUSED_SYMBOLS
146extern const struct kernel_symbol __start___ksymtab_unused[];
147extern const struct kernel_symbol __stop___ksymtab_unused[];
148extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
149extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
146extern const unsigned long __start___kcrctab_unused[]; 150extern const unsigned long __start___kcrctab_unused[];
147extern const unsigned long __start___kcrctab_unused_gpl[]; 151extern const unsigned long __start___kcrctab_unused_gpl[];
152#endif
148 153
149#ifndef CONFIG_MODVERSIONS 154#ifndef CONFIG_MODVERSIONS
150#define symversion(base, idx) NULL 155#define symversion(base, idx) NULL
@@ -152,156 +157,186 @@ extern const unsigned long __start___kcrctab_unused_gpl[];
152#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) 157#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
153#endif 158#endif
154 159
155/* lookup symbol in given range of kernel_symbols */
156static const struct kernel_symbol *lookup_symbol(const char *name,
157 const struct kernel_symbol *start,
158 const struct kernel_symbol *stop)
159{
160 const struct kernel_symbol *ks = start;
161 for (; ks < stop; ks++)
162 if (strcmp(ks->name, name) == 0)
163 return ks;
164 return NULL;
165}
166
167static bool always_ok(bool gplok, bool warn, const char *name)
168{
169 return true;
170}
171
172static bool printk_unused_warning(bool gplok, bool warn, const char *name)
173{
174 if (warn) {
175 printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
176 "however this module is using it.\n", name);
177 printk(KERN_WARNING
178 "This symbol will go away in the future.\n");
179 printk(KERN_WARNING
180 "Please evalute if this is the right api to use and if "
181 "it really is, submit a report the linux kernel "
182 "mailinglist together with submitting your code for "
183 "inclusion.\n");
184 }
185 return true;
186}
187
188static bool gpl_only_unused_warning(bool gplok, bool warn, const char *name)
189{
190 if (!gplok)
191 return false;
192 return printk_unused_warning(gplok, warn, name);
193}
194
195static bool gpl_only(bool gplok, bool warn, const char *name)
196{
197 return gplok;
198}
199
200static bool warn_if_not_gpl(bool gplok, bool warn, const char *name)
201{
202 if (!gplok && warn) {
203 printk(KERN_WARNING "Symbol %s is being used "
204 "by a non-GPL module, which will not "
205 "be allowed in the future\n", name);
206 printk(KERN_WARNING "Please see the file "
207 "Documentation/feature-removal-schedule.txt "
208 "in the kernel source tree for more details.\n");
209 }
210 return true;
211}
212
213struct symsearch { 160struct symsearch {
214 const struct kernel_symbol *start, *stop; 161 const struct kernel_symbol *start, *stop;
215 const unsigned long *crcs; 162 const unsigned long *crcs;
216 bool (*check)(bool gplok, bool warn, const char *name); 163 enum {
164 NOT_GPL_ONLY,
165 GPL_ONLY,
166 WILL_BE_GPL_ONLY,
167 } licence;
168 bool unused;
217}; 169};
218 170
219/* Look through this array of symbol tables for a symbol match which 171static bool each_symbol_in_section(const struct symsearch *arr,
220 * passes the check function. */ 172 unsigned int arrsize,
221static const struct kernel_symbol *search_symarrays(const struct symsearch *arr, 173 struct module *owner,
222 unsigned int num, 174 bool (*fn)(const struct symsearch *syms,
223 const char *name, 175 struct module *owner,
224 bool gplok, 176 unsigned int symnum, void *data),
225 bool warn, 177 void *data)
226 const unsigned long **crc)
227{ 178{
228 unsigned int i; 179 unsigned int i, j;
229 const struct kernel_symbol *ks;
230 180
231 for (i = 0; i < num; i++) { 181 for (j = 0; j < arrsize; j++) {
232 ks = lookup_symbol(name, arr[i].start, arr[i].stop); 182 for (i = 0; i < arr[j].stop - arr[j].start; i++)
233 if (!ks || !arr[i].check(gplok, warn, name)) 183 if (fn(&arr[j], owner, i, data))
234 continue; 184 return true;
235
236 if (crc)
237 *crc = symversion(arr[i].crcs, ks - arr[i].start);
238 return ks;
239 } 185 }
240 return NULL; 186
187 return false;
241} 188}
242 189
243/* Find a symbol, return value, (optional) crc and (optional) module 190/* Returns true as soon as fn returns true, otherwise false. */
244 * which owns it */ 191static bool each_symbol(bool (*fn)(const struct symsearch *arr,
245static unsigned long find_symbol(const char *name, 192 struct module *owner,
246 struct module **owner, 193 unsigned int symnum, void *data),
247 const unsigned long **crc, 194 void *data)
248 bool gplok,
249 bool warn)
250{ 195{
251 struct module *mod; 196 struct module *mod;
252 const struct kernel_symbol *ks;
253 const struct symsearch arr[] = { 197 const struct symsearch arr[] = {
254 { __start___ksymtab, __stop___ksymtab, __start___kcrctab, 198 { __start___ksymtab, __stop___ksymtab, __start___kcrctab,
255 always_ok }, 199 NOT_GPL_ONLY, false },
256 { __start___ksymtab_gpl, __stop___ksymtab_gpl, 200 { __start___ksymtab_gpl, __stop___ksymtab_gpl,
257 __start___kcrctab_gpl, gpl_only }, 201 __start___kcrctab_gpl,
202 GPL_ONLY, false },
258 { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future, 203 { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
259 __start___kcrctab_gpl_future, warn_if_not_gpl }, 204 __start___kcrctab_gpl_future,
205 WILL_BE_GPL_ONLY, false },
206#ifdef CONFIG_UNUSED_SYMBOLS
260 { __start___ksymtab_unused, __stop___ksymtab_unused, 207 { __start___ksymtab_unused, __stop___ksymtab_unused,
261 __start___kcrctab_unused, printk_unused_warning }, 208 __start___kcrctab_unused,
209 NOT_GPL_ONLY, true },
262 { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl, 210 { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
263 __start___kcrctab_unused_gpl, gpl_only_unused_warning }, 211 __start___kcrctab_unused_gpl,
212 GPL_ONLY, true },
213#endif
264 }; 214 };
265 215
266 /* Core kernel first. */ 216 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
267 ks = search_symarrays(arr, ARRAY_SIZE(arr), name, gplok, warn, crc); 217 return true;
268 if (ks) {
269 if (owner)
270 *owner = NULL;
271 return ks->value;
272 }
273 218
274 /* Now try modules. */
275 list_for_each_entry(mod, &modules, list) { 219 list_for_each_entry(mod, &modules, list) {
276 struct symsearch arr[] = { 220 struct symsearch arr[] = {
277 { mod->syms, mod->syms + mod->num_syms, mod->crcs, 221 { mod->syms, mod->syms + mod->num_syms, mod->crcs,
278 always_ok }, 222 NOT_GPL_ONLY, false },
279 { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, 223 { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
280 mod->gpl_crcs, gpl_only }, 224 mod->gpl_crcs,
225 GPL_ONLY, false },
281 { mod->gpl_future_syms, 226 { mod->gpl_future_syms,
282 mod->gpl_future_syms + mod->num_gpl_future_syms, 227 mod->gpl_future_syms + mod->num_gpl_future_syms,
283 mod->gpl_future_crcs, warn_if_not_gpl }, 228 mod->gpl_future_crcs,
229 WILL_BE_GPL_ONLY, false },
230#ifdef CONFIG_UNUSED_SYMBOLS
284 { mod->unused_syms, 231 { mod->unused_syms,
285 mod->unused_syms + mod->num_unused_syms, 232 mod->unused_syms + mod->num_unused_syms,
286 mod->unused_crcs, printk_unused_warning }, 233 mod->unused_crcs,
234 NOT_GPL_ONLY, true },
287 { mod->unused_gpl_syms, 235 { mod->unused_gpl_syms,
288 mod->unused_gpl_syms + mod->num_unused_gpl_syms, 236 mod->unused_gpl_syms + mod->num_unused_gpl_syms,
289 mod->unused_gpl_crcs, gpl_only_unused_warning }, 237 mod->unused_gpl_crcs,
238 GPL_ONLY, true },
239#endif
290 }; 240 };
291 241
292 ks = search_symarrays(arr, ARRAY_SIZE(arr), 242 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
293 name, gplok, warn, crc); 243 return true;
294 if (ks) { 244 }
295 if (owner) 245 return false;
296 *owner = mod; 246}
297 return ks->value; 247
248struct find_symbol_arg {
249 /* Input */
250 const char *name;
251 bool gplok;
252 bool warn;
253
254 /* Output */
255 struct module *owner;
256 const unsigned long *crc;
257 unsigned long value;
258};
259
260static bool find_symbol_in_section(const struct symsearch *syms,
261 struct module *owner,
262 unsigned int symnum, void *data)
263{
264 struct find_symbol_arg *fsa = data;
265
266 if (strcmp(syms->start[symnum].name, fsa->name) != 0)
267 return false;
268
269 if (!fsa->gplok) {
270 if (syms->licence == GPL_ONLY)
271 return false;
272 if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) {
273 printk(KERN_WARNING "Symbol %s is being used "
274 "by a non-GPL module, which will not "
275 "be allowed in the future\n", fsa->name);
276 printk(KERN_WARNING "Please see the file "
277 "Documentation/feature-removal-schedule.txt "
278 "in the kernel source tree for more details.\n");
298 } 279 }
299 } 280 }
300 281
282#ifdef CONFIG_UNUSED_SYMBOLS
283 if (syms->unused && fsa->warn) {
284 printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
285 "however this module is using it.\n", fsa->name);
286 printk(KERN_WARNING
287 "This symbol will go away in the future.\n");
288 printk(KERN_WARNING
289 "Please evalute if this is the right api to use and if "
290 "it really is, submit a report the linux kernel "
291 "mailinglist together with submitting your code for "
292 "inclusion.\n");
293 }
294#endif
295
296 fsa->owner = owner;
297 fsa->crc = symversion(syms->crcs, symnum);
298 fsa->value = syms->start[symnum].value;
299 return true;
300}
301
302/* Find a symbol, return value, (optional) crc and (optional) module
303 * which owns it */
304static unsigned long find_symbol(const char *name,
305 struct module **owner,
306 const unsigned long **crc,
307 bool gplok,
308 bool warn)
309{
310 struct find_symbol_arg fsa;
311
312 fsa.name = name;
313 fsa.gplok = gplok;
314 fsa.warn = warn;
315
316 if (each_symbol(find_symbol_in_section, &fsa)) {
317 if (owner)
318 *owner = fsa.owner;
319 if (crc)
320 *crc = fsa.crc;
321 return fsa.value;
322 }
323
301 DEBUGP("Failed to find symbol %s\n", name); 324 DEBUGP("Failed to find symbol %s\n", name);
302 return -ENOENT; 325 return -ENOENT;
303} 326}
304 327
328/* lookup symbol in given range of kernel_symbols */
329static const struct kernel_symbol *lookup_symbol(const char *name,
330 const struct kernel_symbol *start,
331 const struct kernel_symbol *stop)
332{
333 const struct kernel_symbol *ks = start;
334 for (; ks < stop; ks++)
335 if (strcmp(ks->name, name) == 0)
336 return ks;
337 return NULL;
338}
339
305/* Search for module by name: must hold module_mutex. */ 340/* Search for module by name: must hold module_mutex. */
306static struct module *find_module(const char *name) 341static struct module *find_module(const char *name)
307{ 342{
@@ -639,8 +674,8 @@ static int __try_stop_module(void *_sref)
639{ 674{
640 struct stopref *sref = _sref; 675 struct stopref *sref = _sref;
641 676
642 /* If it's not unused, quit unless we are told to block. */ 677 /* If it's not unused, quit unless we're forcing. */
643 if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { 678 if (module_refcount(sref->mod) != 0) {
644 if (!(*sref->forced = try_force_unload(sref->flags))) 679 if (!(*sref->forced = try_force_unload(sref->flags)))
645 return -EWOULDBLOCK; 680 return -EWOULDBLOCK;
646 } 681 }
@@ -652,9 +687,16 @@ static int __try_stop_module(void *_sref)
652 687
653static int try_stop_module(struct module *mod, int flags, int *forced) 688static int try_stop_module(struct module *mod, int flags, int *forced)
654{ 689{
655 struct stopref sref = { mod, flags, forced }; 690 if (flags & O_NONBLOCK) {
691 struct stopref sref = { mod, flags, forced };
656 692
657 return stop_machine_run(__try_stop_module, &sref, NR_CPUS); 693 return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
694 } else {
695 /* We don't need to stop the machine for this. */
696 mod->state = MODULE_STATE_GOING;
697 synchronize_sched();
698 return 0;
699 }
658} 700}
659 701
660unsigned int module_refcount(struct module *mod) 702unsigned int module_refcount(struct module *mod)
@@ -1445,8 +1487,10 @@ static int verify_export_symbols(struct module *mod)
1445 { mod->syms, mod->num_syms }, 1487 { mod->syms, mod->num_syms },
1446 { mod->gpl_syms, mod->num_gpl_syms }, 1488 { mod->gpl_syms, mod->num_gpl_syms },
1447 { mod->gpl_future_syms, mod->num_gpl_future_syms }, 1489 { mod->gpl_future_syms, mod->num_gpl_future_syms },
1490#ifdef CONFIG_UNUSED_SYMBOLS
1448 { mod->unused_syms, mod->num_unused_syms }, 1491 { mod->unused_syms, mod->num_unused_syms },
1449 { mod->unused_gpl_syms, mod->num_unused_gpl_syms }, 1492 { mod->unused_gpl_syms, mod->num_unused_gpl_syms },
1493#endif
1450 }; 1494 };
1451 1495
1452 for (i = 0; i < ARRAY_SIZE(arr); i++) { 1496 for (i = 0; i < ARRAY_SIZE(arr); i++) {
@@ -1526,7 +1570,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
1526} 1570}
1527 1571
1528/* Update size with this section: return offset. */ 1572/* Update size with this section: return offset. */
1529static long get_offset(unsigned long *size, Elf_Shdr *sechdr) 1573static long get_offset(unsigned int *size, Elf_Shdr *sechdr)
1530{ 1574{
1531 long ret; 1575 long ret;
1532 1576
@@ -1738,6 +1782,20 @@ static inline void add_kallsyms(struct module *mod,
1738} 1782}
1739#endif /* CONFIG_KALLSYMS */ 1783#endif /* CONFIG_KALLSYMS */
1740 1784
1785static void *module_alloc_update_bounds(unsigned long size)
1786{
1787 void *ret = module_alloc(size);
1788
1789 if (ret) {
1790 /* Update module bounds. */
1791 if ((unsigned long)ret < module_addr_min)
1792 module_addr_min = (unsigned long)ret;
1793 if ((unsigned long)ret + size > module_addr_max)
1794 module_addr_max = (unsigned long)ret + size;
1795 }
1796 return ret;
1797}
1798
1741/* Allocate and load the module: note that size of section 0 is always 1799/* Allocate and load the module: note that size of section 0 is always
1742 zero, and we rely on this for optional sections. */ 1800 zero, and we rely on this for optional sections. */
1743static struct module *load_module(void __user *umod, 1801static struct module *load_module(void __user *umod,
@@ -1764,10 +1822,12 @@ static struct module *load_module(void __user *umod,
1764 unsigned int gplfutureindex; 1822 unsigned int gplfutureindex;
1765 unsigned int gplfuturecrcindex; 1823 unsigned int gplfuturecrcindex;
1766 unsigned int unwindex = 0; 1824 unsigned int unwindex = 0;
1825#ifdef CONFIG_UNUSED_SYMBOLS
1767 unsigned int unusedindex; 1826 unsigned int unusedindex;
1768 unsigned int unusedcrcindex; 1827 unsigned int unusedcrcindex;
1769 unsigned int unusedgplindex; 1828 unsigned int unusedgplindex;
1770 unsigned int unusedgplcrcindex; 1829 unsigned int unusedgplcrcindex;
1830#endif
1771 unsigned int markersindex; 1831 unsigned int markersindex;
1772 unsigned int markersstringsindex; 1832 unsigned int markersstringsindex;
1773 struct module *mod; 1833 struct module *mod;
@@ -1850,13 +1910,15 @@ static struct module *load_module(void __user *umod,
1850 exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); 1910 exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
1851 gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); 1911 gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
1852 gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); 1912 gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
1853 unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
1854 unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
1855 crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); 1913 crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
1856 gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); 1914 gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
1857 gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); 1915 gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
1916#ifdef CONFIG_UNUSED_SYMBOLS
1917 unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
1918 unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
1858 unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused"); 1919 unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
1859 unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); 1920 unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
1921#endif
1860 setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); 1922 setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
1861 exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); 1923 exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
1862 obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); 1924 obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
@@ -1935,7 +1997,7 @@ static struct module *load_module(void __user *umod,
1935 layout_sections(mod, hdr, sechdrs, secstrings); 1997 layout_sections(mod, hdr, sechdrs, secstrings);
1936 1998
1937 /* Do the allocs. */ 1999 /* Do the allocs. */
1938 ptr = module_alloc(mod->core_size); 2000 ptr = module_alloc_update_bounds(mod->core_size);
1939 if (!ptr) { 2001 if (!ptr) {
1940 err = -ENOMEM; 2002 err = -ENOMEM;
1941 goto free_percpu; 2003 goto free_percpu;
@@ -1943,7 +2005,7 @@ static struct module *load_module(void __user *umod,
1943 memset(ptr, 0, mod->core_size); 2005 memset(ptr, 0, mod->core_size);
1944 mod->module_core = ptr; 2006 mod->module_core = ptr;
1945 2007
1946 ptr = module_alloc(mod->init_size); 2008 ptr = module_alloc_update_bounds(mod->init_size);
1947 if (!ptr && mod->init_size) { 2009 if (!ptr && mod->init_size) {
1948 err = -ENOMEM; 2010 err = -ENOMEM;
1949 goto free_core; 2011 goto free_core;
@@ -2018,14 +2080,15 @@ static struct module *load_module(void __user *umod,
2018 mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; 2080 mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr;
2019 mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / 2081 mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size /
2020 sizeof(*mod->gpl_future_syms); 2082 sizeof(*mod->gpl_future_syms);
2021 mod->num_unused_syms = sechdrs[unusedindex].sh_size /
2022 sizeof(*mod->unused_syms);
2023 mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
2024 sizeof(*mod->unused_gpl_syms);
2025 mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; 2083 mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
2026 if (gplfuturecrcindex) 2084 if (gplfuturecrcindex)
2027 mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; 2085 mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
2028 2086
2087#ifdef CONFIG_UNUSED_SYMBOLS
2088 mod->num_unused_syms = sechdrs[unusedindex].sh_size /
2089 sizeof(*mod->unused_syms);
2090 mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
2091 sizeof(*mod->unused_gpl_syms);
2029 mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; 2092 mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
2030 if (unusedcrcindex) 2093 if (unusedcrcindex)
2031 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; 2094 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr;
@@ -2033,13 +2096,17 @@ static struct module *load_module(void __user *umod,
2033 if (unusedgplcrcindex) 2096 if (unusedgplcrcindex)
2034 mod->unused_gpl_crcs 2097 mod->unused_gpl_crcs
2035 = (void *)sechdrs[unusedgplcrcindex].sh_addr; 2098 = (void *)sechdrs[unusedgplcrcindex].sh_addr;
2099#endif
2036 2100
2037#ifdef CONFIG_MODVERSIONS 2101#ifdef CONFIG_MODVERSIONS
2038 if ((mod->num_syms && !crcindex) || 2102 if ((mod->num_syms && !crcindex)
2039 (mod->num_gpl_syms && !gplcrcindex) || 2103 || (mod->num_gpl_syms && !gplcrcindex)
2040 (mod->num_gpl_future_syms && !gplfuturecrcindex) || 2104 || (mod->num_gpl_future_syms && !gplfuturecrcindex)
2041 (mod->num_unused_syms && !unusedcrcindex) || 2105#ifdef CONFIG_UNUSED_SYMBOLS
2042 (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { 2106 || (mod->num_unused_syms && !unusedcrcindex)
2107 || (mod->num_unused_gpl_syms && !unusedgplcrcindex)
2108#endif
2109 ) {
2043 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); 2110 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
2044 err = try_to_force_load(mod, "nocrc"); 2111 err = try_to_force_load(mod, "nocrc");
2045 if (err) 2112 if (err)
@@ -2512,7 +2579,7 @@ static int m_show(struct seq_file *m, void *p)
2512 struct module *mod = list_entry(p, struct module, list); 2579 struct module *mod = list_entry(p, struct module, list);
2513 char buf[8]; 2580 char buf[8];
2514 2581
2515 seq_printf(m, "%s %lu", 2582 seq_printf(m, "%s %u",
2516 mod->name, mod->init_size + mod->core_size); 2583 mod->name, mod->init_size + mod->core_size);
2517 print_unload_info(m, mod); 2584 print_unload_info(m, mod);
2518 2585
@@ -2595,6 +2662,9 @@ struct module *__module_text_address(unsigned long addr)
2595{ 2662{
2596 struct module *mod; 2663 struct module *mod;
2597 2664
2665 if (addr < module_addr_min || addr > module_addr_max)
2666 return NULL;
2667
2598 list_for_each_entry(mod, &modules, list) 2668 list_for_each_entry(mod, &modules, list)
2599 if (within(addr, mod->module_init, mod->init_text_size) 2669 if (within(addr, mod->module_init, mod->init_text_size)
2600 || within(addr, mod->module_core, mod->core_text_size)) 2670 || within(addr, mod->module_core, mod->core_text_size))
diff --git a/kernel/pid.c b/kernel/pid.c
index 20d59fa2d493..30bd5d4b2ac7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32#include <linux/init.h> 32#include <linux/init.h>
33#include <linux/rculist.h>
33#include <linux/bootmem.h> 34#include <linux/bootmem.h>
34#include <linux/hash.h> 35#include <linux/hash.h>
35#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index b45da40e8d25..59dfdf1e1d20 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -82,7 +82,7 @@ config PM_SLEEP_SMP
82 82
83config PM_SLEEP 83config PM_SLEEP
84 bool 84 bool
85 depends on SUSPEND || HIBERNATION 85 depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
86 default y 86 default y
87 87
88config SUSPEND 88config SUSPEND
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 14a656cdc652..f011e0870b52 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -180,6 +180,17 @@ static void platform_restore_cleanup(int platform_mode)
180} 180}
181 181
182/** 182/**
183 * platform_recover - recover the platform from a failure to suspend
184 * devices.
185 */
186
187static void platform_recover(int platform_mode)
188{
189 if (platform_mode && hibernation_ops && hibernation_ops->recover)
190 hibernation_ops->recover();
191}
192
193/**
183 * create_image - freeze devices that need to be frozen with interrupts 194 * create_image - freeze devices that need to be frozen with interrupts
184 * off, create the hibernation image and thaw those devices. Control 195 * off, create the hibernation image and thaw those devices. Control
185 * reappears in this routine after a restore. 196 * reappears in this routine after a restore.
@@ -193,6 +204,7 @@ static int create_image(int platform_mode)
193 if (error) 204 if (error)
194 return error; 205 return error;
195 206
207 device_pm_lock();
196 local_irq_disable(); 208 local_irq_disable();
197 /* At this point, device_suspend() has been called, but *not* 209 /* At this point, device_suspend() has been called, but *not*
198 * device_power_down(). We *must* call device_power_down() now. 210 * device_power_down(). We *must* call device_power_down() now.
@@ -224,9 +236,11 @@ static int create_image(int platform_mode)
224 /* NOTE: device_power_up() is just a resume() for devices 236 /* NOTE: device_power_up() is just a resume() for devices
225 * that suspended with irqs off ... no overall powerup. 237 * that suspended with irqs off ... no overall powerup.
226 */ 238 */
227 device_power_up(); 239 device_power_up(in_suspend ?
240 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
228 Enable_irqs: 241 Enable_irqs:
229 local_irq_enable(); 242 local_irq_enable();
243 device_pm_unlock();
230 return error; 244 return error;
231} 245}
232 246
@@ -255,10 +269,10 @@ int hibernation_snapshot(int platform_mode)
255 suspend_console(); 269 suspend_console();
256 error = device_suspend(PMSG_FREEZE); 270 error = device_suspend(PMSG_FREEZE);
257 if (error) 271 if (error)
258 goto Resume_console; 272 goto Recover_platform;
259 273
260 if (hibernation_test(TEST_DEVICES)) 274 if (hibernation_test(TEST_DEVICES))
261 goto Resume_devices; 275 goto Recover_platform;
262 276
263 error = platform_pre_snapshot(platform_mode); 277 error = platform_pre_snapshot(platform_mode);
264 if (error || hibernation_test(TEST_PLATFORM)) 278 if (error || hibernation_test(TEST_PLATFORM))
@@ -280,12 +294,16 @@ int hibernation_snapshot(int platform_mode)
280 Finish: 294 Finish:
281 platform_finish(platform_mode); 295 platform_finish(platform_mode);
282 Resume_devices: 296 Resume_devices:
283 device_resume(); 297 device_resume(in_suspend ?
284 Resume_console: 298 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
285 resume_console(); 299 resume_console();
286 Close: 300 Close:
287 platform_end(platform_mode); 301 platform_end(platform_mode);
288 return error; 302 return error;
303
304 Recover_platform:
305 platform_recover(platform_mode);
306 goto Resume_devices;
289} 307}
290 308
291/** 309/**
@@ -300,8 +318,9 @@ static int resume_target_kernel(void)
300{ 318{
301 int error; 319 int error;
302 320
321 device_pm_lock();
303 local_irq_disable(); 322 local_irq_disable();
304 error = device_power_down(PMSG_PRETHAW); 323 error = device_power_down(PMSG_QUIESCE);
305 if (error) { 324 if (error) {
306 printk(KERN_ERR "PM: Some devices failed to power down, " 325 printk(KERN_ERR "PM: Some devices failed to power down, "
307 "aborting resume\n"); 326 "aborting resume\n");
@@ -329,9 +348,10 @@ static int resume_target_kernel(void)
329 swsusp_free(); 348 swsusp_free();
330 restore_processor_state(); 349 restore_processor_state();
331 touch_softlockup_watchdog(); 350 touch_softlockup_watchdog();
332 device_power_up(); 351 device_power_up(PMSG_RECOVER);
333 Enable_irqs: 352 Enable_irqs:
334 local_irq_enable(); 353 local_irq_enable();
354 device_pm_unlock();
335 return error; 355 return error;
336} 356}
337 357
@@ -350,7 +370,7 @@ int hibernation_restore(int platform_mode)
350 370
351 pm_prepare_console(); 371 pm_prepare_console();
352 suspend_console(); 372 suspend_console();
353 error = device_suspend(PMSG_PRETHAW); 373 error = device_suspend(PMSG_QUIESCE);
354 if (error) 374 if (error)
355 goto Finish; 375 goto Finish;
356 376
@@ -362,7 +382,7 @@ int hibernation_restore(int platform_mode)
362 enable_nonboot_cpus(); 382 enable_nonboot_cpus();
363 } 383 }
364 platform_restore_cleanup(platform_mode); 384 platform_restore_cleanup(platform_mode);
365 device_resume(); 385 device_resume(PMSG_RECOVER);
366 Finish: 386 Finish:
367 resume_console(); 387 resume_console();
368 pm_restore_console(); 388 pm_restore_console();
@@ -392,8 +412,11 @@ int hibernation_platform_enter(void)
392 412
393 suspend_console(); 413 suspend_console();
394 error = device_suspend(PMSG_HIBERNATE); 414 error = device_suspend(PMSG_HIBERNATE);
395 if (error) 415 if (error) {
396 goto Resume_console; 416 if (hibernation_ops->recover)
417 hibernation_ops->recover();
418 goto Resume_devices;
419 }
397 420
398 error = hibernation_ops->prepare(); 421 error = hibernation_ops->prepare();
399 if (error) 422 if (error)
@@ -403,6 +426,7 @@ int hibernation_platform_enter(void)
403 if (error) 426 if (error)
404 goto Finish; 427 goto Finish;
405 428
429 device_pm_lock();
406 local_irq_disable(); 430 local_irq_disable();
407 error = device_power_down(PMSG_HIBERNATE); 431 error = device_power_down(PMSG_HIBERNATE);
408 if (!error) { 432 if (!error) {
@@ -411,6 +435,7 @@ int hibernation_platform_enter(void)
411 while (1); 435 while (1);
412 } 436 }
413 local_irq_enable(); 437 local_irq_enable();
438 device_pm_unlock();
414 439
415 /* 440 /*
416 * We don't need to reenable the nonboot CPUs or resume consoles, since 441 * We don't need to reenable the nonboot CPUs or resume consoles, since
@@ -419,8 +444,7 @@ int hibernation_platform_enter(void)
419 Finish: 444 Finish:
420 hibernation_ops->finish(); 445 hibernation_ops->finish();
421 Resume_devices: 446 Resume_devices:
422 device_resume(); 447 device_resume(PMSG_RESTORE);
423 Resume_console:
424 resume_console(); 448 resume_console();
425 Close: 449 Close:
426 hibernation_ops->end(); 450 hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6a6d5eb3524e..3398f4651aa1 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -228,6 +228,7 @@ static int suspend_enter(suspend_state_t state)
228{ 228{
229 int error = 0; 229 int error = 0;
230 230
231 device_pm_lock();
231 arch_suspend_disable_irqs(); 232 arch_suspend_disable_irqs();
232 BUG_ON(!irqs_disabled()); 233 BUG_ON(!irqs_disabled());
233 234
@@ -239,10 +240,11 @@ static int suspend_enter(suspend_state_t state)
239 if (!suspend_test(TEST_CORE)) 240 if (!suspend_test(TEST_CORE))
240 error = suspend_ops->enter(state); 241 error = suspend_ops->enter(state);
241 242
242 device_power_up(); 243 device_power_up(PMSG_RESUME);
243 Done: 244 Done:
244 arch_suspend_enable_irqs(); 245 arch_suspend_enable_irqs();
245 BUG_ON(irqs_disabled()); 246 BUG_ON(irqs_disabled());
247 device_pm_unlock();
246 return error; 248 return error;
247} 249}
248 250
@@ -267,11 +269,11 @@ int suspend_devices_and_enter(suspend_state_t state)
267 error = device_suspend(PMSG_SUSPEND); 269 error = device_suspend(PMSG_SUSPEND);
268 if (error) { 270 if (error) {
269 printk(KERN_ERR "PM: Some devices failed to suspend\n"); 271 printk(KERN_ERR "PM: Some devices failed to suspend\n");
270 goto Resume_console; 272 goto Recover_platform;
271 } 273 }
272 274
273 if (suspend_test(TEST_DEVICES)) 275 if (suspend_test(TEST_DEVICES))
274 goto Resume_devices; 276 goto Recover_platform;
275 277
276 if (suspend_ops->prepare) { 278 if (suspend_ops->prepare) {
277 error = suspend_ops->prepare(); 279 error = suspend_ops->prepare();
@@ -291,13 +293,17 @@ int suspend_devices_and_enter(suspend_state_t state)
291 if (suspend_ops->finish) 293 if (suspend_ops->finish)
292 suspend_ops->finish(); 294 suspend_ops->finish();
293 Resume_devices: 295 Resume_devices:
294 device_resume(); 296 device_resume(PMSG_RESUME);
295 Resume_console:
296 resume_console(); 297 resume_console();
297 Close: 298 Close:
298 if (suspend_ops->end) 299 if (suspend_ops->end)
299 suspend_ops->end(); 300 suspend_ops->end();
300 return error; 301 return error;
302
303 Recover_platform:
304 if (suspend_ops->recover)
305 suspend_ops->recover();
306 goto Resume_devices;
301} 307}
302 308
303/** 309/**
diff --git a/kernel/power/process.c b/kernel/power/process.c
index f1d0b345c9ba..5fb87652f214 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -19,9 +19,6 @@
19 */ 19 */
20#define TIMEOUT (20 * HZ) 20#define TIMEOUT (20 * HZ)
21 21
22#define FREEZER_KERNEL_THREADS 0
23#define FREEZER_USER_SPACE 1
24
25static inline int freezeable(struct task_struct * p) 22static inline int freezeable(struct task_struct * p)
26{ 23{
27 if ((p == current) || 24 if ((p == current) ||
@@ -84,63 +81,53 @@ static void fake_signal_wake_up(struct task_struct *p)
84 spin_unlock_irqrestore(&p->sighand->siglock, flags); 81 spin_unlock_irqrestore(&p->sighand->siglock, flags);
85} 82}
86 83
87static int has_mm(struct task_struct *p) 84static inline bool should_send_signal(struct task_struct *p)
88{ 85{
89 return (p->mm && !(p->flags & PF_BORROWED_MM)); 86 return !(p->flags & PF_FREEZER_NOSIG);
90} 87}
91 88
92/** 89/**
93 * freeze_task - send a freeze request to given task 90 * freeze_task - send a freeze request to given task
94 * @p: task to send the request to 91 * @p: task to send the request to
95 * @with_mm_only: if set, the request will only be sent if the task has its 92 * @sig_only: if set, the request will only be sent if the task has the
96 * own mm 93 * PF_FREEZER_NOSIG flag unset
97 * Return value: 0, if @with_mm_only is set and the task has no mm of its 94 * Return value: 'false', if @sig_only is set and the task has
98 * own or the task is frozen, 1, otherwise 95 * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
99 * 96 *
100 * The freeze request is sent by seting the tasks's TIF_FREEZE flag and 97 * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
101 * either sending a fake signal to it or waking it up, depending on whether 98 * either sending a fake signal to it or waking it up, depending on whether
102 * or not it has its own mm (ie. it is a user land task). If @with_mm_only 99 * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
103 * is set and the task has no mm of its own (ie. it is a kernel thread), 100 * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
104 * its TIF_FREEZE flag should not be set. 101 * TIF_FREEZE flag will not be set.
105 *
106 * The task_lock() is necessary to prevent races with exit_mm() or
107 * use_mm()/unuse_mm() from occuring.
108 */ 102 */
109static int freeze_task(struct task_struct *p, int with_mm_only) 103static bool freeze_task(struct task_struct *p, bool sig_only)
110{ 104{
111 int ret = 1; 105 /*
106 * We first check if the task is freezing and next if it has already
107 * been frozen to avoid the race with frozen_process() which first marks
108 * the task as frozen and next clears its TIF_FREEZE.
109 */
110 if (!freezing(p)) {
111 rmb();
112 if (frozen(p))
113 return false;
112 114
113 task_lock(p); 115 if (!sig_only || should_send_signal(p))
114 if (freezing(p)) { 116 set_freeze_flag(p);
115 if (has_mm(p)) { 117 else
116 if (!signal_pending(p)) 118 return false;
117 fake_signal_wake_up(p); 119 }
118 } else { 120
119 if (with_mm_only) 121 if (should_send_signal(p)) {
120 ret = 0; 122 if (!signal_pending(p))
121 else 123 fake_signal_wake_up(p);
122 wake_up_state(p, TASK_INTERRUPTIBLE); 124 } else if (sig_only) {
123 } 125 return false;
124 } else { 126 } else {
125 rmb(); 127 wake_up_state(p, TASK_INTERRUPTIBLE);
126 if (frozen(p)) {
127 ret = 0;
128 } else {
129 if (has_mm(p)) {
130 set_freeze_flag(p);
131 fake_signal_wake_up(p);
132 } else {
133 if (with_mm_only) {
134 ret = 0;
135 } else {
136 set_freeze_flag(p);
137 wake_up_state(p, TASK_INTERRUPTIBLE);
138 }
139 }
140 }
141 } 128 }
142 task_unlock(p); 129
143 return ret; 130 return true;
144} 131}
145 132
146static void cancel_freezing(struct task_struct *p) 133static void cancel_freezing(struct task_struct *p)
@@ -156,7 +143,7 @@ static void cancel_freezing(struct task_struct *p)
156 } 143 }
157} 144}
158 145
159static int try_to_freeze_tasks(int freeze_user_space) 146static int try_to_freeze_tasks(bool sig_only)
160{ 147{
161 struct task_struct *g, *p; 148 struct task_struct *g, *p;
162 unsigned long end_time; 149 unsigned long end_time;
@@ -175,7 +162,7 @@ static int try_to_freeze_tasks(int freeze_user_space)
175 if (frozen(p) || !freezeable(p)) 162 if (frozen(p) || !freezeable(p))
176 continue; 163 continue;
177 164
178 if (!freeze_task(p, freeze_user_space)) 165 if (!freeze_task(p, sig_only))
179 continue; 166 continue;
180 167
181 /* 168 /*
@@ -235,13 +222,13 @@ int freeze_processes(void)
235 int error; 222 int error;
236 223
237 printk("Freezing user space processes ... "); 224 printk("Freezing user space processes ... ");
238 error = try_to_freeze_tasks(FREEZER_USER_SPACE); 225 error = try_to_freeze_tasks(true);
239 if (error) 226 if (error)
240 goto Exit; 227 goto Exit;
241 printk("done.\n"); 228 printk("done.\n");
242 229
243 printk("Freezing remaining freezable tasks ... "); 230 printk("Freezing remaining freezable tasks ... ");
244 error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); 231 error = try_to_freeze_tasks(false);
245 if (error) 232 if (error)
246 goto Exit; 233 goto Exit;
247 printk("done."); 234 printk("done.");
@@ -251,7 +238,7 @@ int freeze_processes(void)
251 return error; 238 return error;
252} 239}
253 240
254static void thaw_tasks(int thaw_user_space) 241static void thaw_tasks(bool nosig_only)
255{ 242{
256 struct task_struct *g, *p; 243 struct task_struct *g, *p;
257 244
@@ -260,7 +247,7 @@ static void thaw_tasks(int thaw_user_space)
260 if (!freezeable(p)) 247 if (!freezeable(p))
261 continue; 248 continue;
262 249
263 if (!p->mm == thaw_user_space) 250 if (nosig_only && should_send_signal(p))
264 continue; 251 continue;
265 252
266 thaw_process(p); 253 thaw_process(p);
@@ -271,8 +258,8 @@ static void thaw_tasks(int thaw_user_space)
271void thaw_processes(void) 258void thaw_processes(void)
272{ 259{
273 printk("Restarting tasks ... "); 260 printk("Restarting tasks ... ");
274 thaw_tasks(FREEZER_KERNEL_THREADS); 261 thaw_tasks(true);
275 thaw_tasks(FREEZER_USER_SPACE); 262 thaw_tasks(false);
276 schedule(); 263 schedule();
277 printk("done.\n"); 264 printk("done.\n");
278} 265}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index f5512cb3aa86..a6332a313262 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,6 +23,7 @@
23#include <linux/console.h> 23#include <linux/console.h>
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/freezer.h> 25#include <linux/freezer.h>
26#include <linux/smp_lock.h>
26 27
27#include <asm/uaccess.h> 28#include <asm/uaccess.h>
28 29
@@ -69,16 +70,22 @@ static int snapshot_open(struct inode *inode, struct file *filp)
69 struct snapshot_data *data; 70 struct snapshot_data *data;
70 int error; 71 int error;
71 72
72 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) 73 mutex_lock(&pm_mutex);
73 return -EBUSY; 74
75 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
76 error = -EBUSY;
77 goto Unlock;
78 }
74 79
75 if ((filp->f_flags & O_ACCMODE) == O_RDWR) { 80 if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
76 atomic_inc(&snapshot_device_available); 81 atomic_inc(&snapshot_device_available);
77 return -ENOSYS; 82 error = -ENOSYS;
83 goto Unlock;
78 } 84 }
79 if(create_basic_memory_bitmaps()) { 85 if(create_basic_memory_bitmaps()) {
80 atomic_inc(&snapshot_device_available); 86 atomic_inc(&snapshot_device_available);
81 return -ENOMEM; 87 error = -ENOMEM;
88 goto Unlock;
82 } 89 }
83 nonseekable_open(inode, filp); 90 nonseekable_open(inode, filp);
84 data = &snapshot_state; 91 data = &snapshot_state;
@@ -98,33 +105,36 @@ static int snapshot_open(struct inode *inode, struct file *filp)
98 if (error) 105 if (error)
99 pm_notifier_call_chain(PM_POST_HIBERNATION); 106 pm_notifier_call_chain(PM_POST_HIBERNATION);
100 } 107 }
101 if (error) { 108 if (error)
102 atomic_inc(&snapshot_device_available); 109 atomic_inc(&snapshot_device_available);
103 return error;
104 }
105 data->frozen = 0; 110 data->frozen = 0;
106 data->ready = 0; 111 data->ready = 0;
107 data->platform_support = 0; 112 data->platform_support = 0;
108 113
109 return 0; 114 Unlock:
115 mutex_unlock(&pm_mutex);
116
117 return error;
110} 118}
111 119
112static int snapshot_release(struct inode *inode, struct file *filp) 120static int snapshot_release(struct inode *inode, struct file *filp)
113{ 121{
114 struct snapshot_data *data; 122 struct snapshot_data *data;
115 123
124 mutex_lock(&pm_mutex);
125
116 swsusp_free(); 126 swsusp_free();
117 free_basic_memory_bitmaps(); 127 free_basic_memory_bitmaps();
118 data = filp->private_data; 128 data = filp->private_data;
119 free_all_swap_pages(data->swap); 129 free_all_swap_pages(data->swap);
120 if (data->frozen) { 130 if (data->frozen)
121 mutex_lock(&pm_mutex);
122 thaw_processes(); 131 thaw_processes();
123 mutex_unlock(&pm_mutex);
124 }
125 pm_notifier_call_chain(data->mode == O_WRONLY ? 132 pm_notifier_call_chain(data->mode == O_WRONLY ?
126 PM_POST_HIBERNATION : PM_POST_RESTORE); 133 PM_POST_HIBERNATION : PM_POST_RESTORE);
127 atomic_inc(&snapshot_device_available); 134 atomic_inc(&snapshot_device_available);
135
136 mutex_unlock(&pm_mutex);
137
128 return 0; 138 return 0;
129} 139}
130 140
@@ -134,9 +144,13 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
134 struct snapshot_data *data; 144 struct snapshot_data *data;
135 ssize_t res; 145 ssize_t res;
136 146
147 mutex_lock(&pm_mutex);
148
137 data = filp->private_data; 149 data = filp->private_data;
138 if (!data->ready) 150 if (!data->ready) {
139 return -ENODATA; 151 res = -ENODATA;
152 goto Unlock;
153 }
140 res = snapshot_read_next(&data->handle, count); 154 res = snapshot_read_next(&data->handle, count);
141 if (res > 0) { 155 if (res > 0) {
142 if (copy_to_user(buf, data_of(data->handle), res)) 156 if (copy_to_user(buf, data_of(data->handle), res))
@@ -144,6 +158,10 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
144 else 158 else
145 *offp = data->handle.offset; 159 *offp = data->handle.offset;
146 } 160 }
161
162 Unlock:
163 mutex_unlock(&pm_mutex);
164
147 return res; 165 return res;
148} 166}
149 167
@@ -153,6 +171,8 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
153 struct snapshot_data *data; 171 struct snapshot_data *data;
154 ssize_t res; 172 ssize_t res;
155 173
174 mutex_lock(&pm_mutex);
175
156 data = filp->private_data; 176 data = filp->private_data;
157 res = snapshot_write_next(&data->handle, count); 177 res = snapshot_write_next(&data->handle, count);
158 if (res > 0) { 178 if (res > 0) {
@@ -161,11 +181,14 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
161 else 181 else
162 *offp = data->handle.offset; 182 *offp = data->handle.offset;
163 } 183 }
184
185 mutex_unlock(&pm_mutex);
186
164 return res; 187 return res;
165} 188}
166 189
167static int snapshot_ioctl(struct inode *inode, struct file *filp, 190static long snapshot_ioctl(struct file *filp, unsigned int cmd,
168 unsigned int cmd, unsigned long arg) 191 unsigned long arg)
169{ 192{
170 int error = 0; 193 int error = 0;
171 struct snapshot_data *data; 194 struct snapshot_data *data;
@@ -179,6 +202,9 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
179 if (!capable(CAP_SYS_ADMIN)) 202 if (!capable(CAP_SYS_ADMIN))
180 return -EPERM; 203 return -EPERM;
181 204
205 if (!mutex_trylock(&pm_mutex))
206 return -EBUSY;
207
182 data = filp->private_data; 208 data = filp->private_data;
183 209
184 switch (cmd) { 210 switch (cmd) {
@@ -186,7 +212,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
186 case SNAPSHOT_FREEZE: 212 case SNAPSHOT_FREEZE:
187 if (data->frozen) 213 if (data->frozen)
188 break; 214 break;
189 mutex_lock(&pm_mutex);
190 printk("Syncing filesystems ... "); 215 printk("Syncing filesystems ... ");
191 sys_sync(); 216 sys_sync();
192 printk("done.\n"); 217 printk("done.\n");
@@ -194,7 +219,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
194 error = freeze_processes(); 219 error = freeze_processes();
195 if (error) 220 if (error)
196 thaw_processes(); 221 thaw_processes();
197 mutex_unlock(&pm_mutex);
198 if (!error) 222 if (!error)
199 data->frozen = 1; 223 data->frozen = 1;
200 break; 224 break;
@@ -202,9 +226,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
202 case SNAPSHOT_UNFREEZE: 226 case SNAPSHOT_UNFREEZE:
203 if (!data->frozen || data->ready) 227 if (!data->frozen || data->ready)
204 break; 228 break;
205 mutex_lock(&pm_mutex);
206 thaw_processes(); 229 thaw_processes();
207 mutex_unlock(&pm_mutex);
208 data->frozen = 0; 230 data->frozen = 0;
209 break; 231 break;
210 232
@@ -307,16 +329,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
307 error = -EPERM; 329 error = -EPERM;
308 break; 330 break;
309 } 331 }
310 if (!mutex_trylock(&pm_mutex)) {
311 error = -EBUSY;
312 break;
313 }
314 /* 332 /*
315 * Tasks are frozen and the notifiers have been called with 333 * Tasks are frozen and the notifiers have been called with
316 * PM_HIBERNATION_PREPARE 334 * PM_HIBERNATION_PREPARE
317 */ 335 */
318 error = suspend_devices_and_enter(PM_SUSPEND_MEM); 336 error = suspend_devices_and_enter(PM_SUSPEND_MEM);
319 mutex_unlock(&pm_mutex);
320 break; 337 break;
321 338
322 case SNAPSHOT_PLATFORM_SUPPORT: 339 case SNAPSHOT_PLATFORM_SUPPORT:
@@ -390,6 +407,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
390 407
391 } 408 }
392 409
410 mutex_unlock(&pm_mutex);
411
393 return error; 412 return error;
394} 413}
395 414
@@ -399,7 +418,7 @@ static const struct file_operations snapshot_fops = {
399 .read = snapshot_read, 418 .read = snapshot_read,
400 .write = snapshot_write, 419 .write = snapshot_write,
401 .llseek = no_llseek, 420 .llseek = no_llseek,
402 .ioctl = snapshot_ioctl, 421 .unlocked_ioctl = snapshot_ioctl,
403}; 422};
404 423
405static struct miscdevice snapshot_device = { 424static struct miscdevice snapshot_device = {
diff --git a/kernel/profile.c b/kernel/profile.c
index ae7ead82cbc9..58926411eb2a 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -252,7 +252,7 @@ static void profile_flip_buffers(void)
252 mutex_lock(&profile_flip_mutex); 252 mutex_lock(&profile_flip_mutex);
253 j = per_cpu(cpu_profile_flip, get_cpu()); 253 j = per_cpu(cpu_profile_flip, get_cpu());
254 put_cpu(); 254 put_cpu();
255 on_each_cpu(__profile_flip_buffers, NULL, 0, 1); 255 on_each_cpu(__profile_flip_buffers, NULL, 1);
256 for_each_online_cpu(cpu) { 256 for_each_online_cpu(cpu) {
257 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j]; 257 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
258 for (i = 0; i < NR_PROFILE_HIT; ++i) { 258 for (i = 0; i < NR_PROFILE_HIT; ++i) {
@@ -275,7 +275,7 @@ static void profile_discard_flip_buffers(void)
275 mutex_lock(&profile_flip_mutex); 275 mutex_lock(&profile_flip_mutex);
276 i = per_cpu(cpu_profile_flip, get_cpu()); 276 i = per_cpu(cpu_profile_flip, get_cpu());
277 put_cpu(); 277 put_cpu();
278 on_each_cpu(__profile_flip_buffers, NULL, 0, 1); 278 on_each_cpu(__profile_flip_buffers, NULL, 1);
279 for_each_online_cpu(cpu) { 279 for_each_online_cpu(cpu) {
280 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i]; 280 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
281 memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit)); 281 memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
@@ -558,7 +558,7 @@ static int __init create_hash_tables(void)
558out_cleanup: 558out_cleanup:
559 prof_on = 0; 559 prof_on = 0;
560 smp_mb(); 560 smp_mb();
561 on_each_cpu(profile_nop, NULL, 0, 1); 561 on_each_cpu(profile_nop, NULL, 1);
562 for_each_online_cpu(cpu) { 562 for_each_online_cpu(cpu) {
563 struct page *page; 563 struct page *page;
564 564
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e337390fce01..8392a9da6450 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -33,13 +33,9 @@
33 */ 33 */
34void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) 34void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
35{ 35{
36 BUG_ON(!list_empty(&child->ptrace_list)); 36 BUG_ON(!list_empty(&child->ptrace_entry));
37 if (child->parent == new_parent) 37 list_add(&child->ptrace_entry, &new_parent->ptraced);
38 return;
39 list_add(&child->ptrace_list, &child->parent->ptrace_children);
40 remove_parent(child);
41 child->parent = new_parent; 38 child->parent = new_parent;
42 add_parent(child);
43} 39}
44 40
45/* 41/*
@@ -73,12 +69,8 @@ void __ptrace_unlink(struct task_struct *child)
73 BUG_ON(!child->ptrace); 69 BUG_ON(!child->ptrace);
74 70
75 child->ptrace = 0; 71 child->ptrace = 0;
76 if (ptrace_reparented(child)) { 72 child->parent = child->real_parent;
77 list_del_init(&child->ptrace_list); 73 list_del_init(&child->ptrace_entry);
78 remove_parent(child);
79 child->parent = child->real_parent;
80 add_parent(child);
81 }
82 74
83 if (task_is_traced(child)) 75 if (task_is_traced(child))
84 ptrace_untrace(child); 76 ptrace_untrace(child);
@@ -492,15 +484,34 @@ int ptrace_traceme(void)
492 /* 484 /*
493 * Are we already being traced? 485 * Are we already being traced?
494 */ 486 */
487repeat:
495 task_lock(current); 488 task_lock(current);
496 if (!(current->ptrace & PT_PTRACED)) { 489 if (!(current->ptrace & PT_PTRACED)) {
490 /*
491 * See ptrace_attach() comments about the locking here.
492 */
493 unsigned long flags;
494 if (!write_trylock_irqsave(&tasklist_lock, flags)) {
495 task_unlock(current);
496 do {
497 cpu_relax();
498 } while (!write_can_lock(&tasklist_lock));
499 goto repeat;
500 }
501
497 ret = security_ptrace(current->parent, current, 502 ret = security_ptrace(current->parent, current,
498 PTRACE_MODE_ATTACH); 503 PTRACE_MODE_ATTACH);
504
499 /* 505 /*
500 * Set the ptrace bit in the process ptrace flags. 506 * Set the ptrace bit in the process ptrace flags.
507 * Then link us on our parent's ptraced list.
501 */ 508 */
502 if (!ret) 509 if (!ret) {
503 current->ptrace |= PT_PTRACED; 510 current->ptrace |= PT_PTRACED;
511 __ptrace_link(current, current->real_parent);
512 }
513
514 write_unlock_irqrestore(&tasklist_lock, flags);
504 } 515 }
505 task_unlock(current); 516 task_unlock(current);
506 return ret; 517 return ret;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 65c0906080ef..16eeeaa9d618 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -387,6 +387,10 @@ static void __rcu_offline_cpu(struct rcu_data *this_rdp,
387 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); 387 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
388 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); 388 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
389 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); 389 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
390
391 local_irq_disable();
392 this_rdp->qlen += rdp->qlen;
393 local_irq_enable();
390} 394}
391 395
392static void rcu_offline_cpu(int cpu) 396static void rcu_offline_cpu(int cpu)
@@ -516,10 +520,38 @@ void rcu_check_callbacks(int cpu, int user)
516 if (user || 520 if (user ||
517 (idle_cpu(cpu) && !in_softirq() && 521 (idle_cpu(cpu) && !in_softirq() &&
518 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 522 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
523
524 /*
525 * Get here if this CPU took its interrupt from user
526 * mode or from the idle loop, and if this is not a
527 * nested interrupt. In this case, the CPU is in
528 * a quiescent state, so count it.
529 *
530 * Also do a memory barrier. This is needed to handle
531 * the case where writes from a preempt-disable section
532 * of code get reordered into schedule() by this CPU's
533 * write buffer. The memory barrier makes sure that
534 * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
535 * by other CPUs to happen after any such write.
536 */
537
538 smp_mb(); /* See above block comment. */
519 rcu_qsctr_inc(cpu); 539 rcu_qsctr_inc(cpu);
520 rcu_bh_qsctr_inc(cpu); 540 rcu_bh_qsctr_inc(cpu);
521 } else if (!in_softirq()) 541
542 } else if (!in_softirq()) {
543
544 /*
545 * Get here if this CPU did not take its interrupt from
546 * softirq, in other words, if it is not interrupting
547 * a rcu_bh read-side critical section. This is an _bh
548 * critical section, so count it. The memory barrier
549 * is needed for the same reason as is the above one.
550 */
551
552 smp_mb(); /* See above block comment. */
522 rcu_bh_qsctr_inc(cpu); 553 rcu_bh_qsctr_inc(cpu);
554 }
523 raise_rcu_softirq(); 555 raise_rcu_softirq();
524} 556}
525 557
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c09605f8d16c..f14f372cf6f5 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -39,16 +39,16 @@
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <asm/atomic.h> 40#include <asm/atomic.h>
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/completion.h>
43#include <linux/percpu.h> 42#include <linux/percpu.h>
44#include <linux/notifier.h> 43#include <linux/notifier.h>
45#include <linux/cpu.h> 44#include <linux/cpu.h>
46#include <linux/mutex.h> 45#include <linux/mutex.h>
47#include <linux/module.h> 46#include <linux/module.h>
48 47
49struct rcu_synchronize { 48enum rcu_barrier {
50 struct rcu_head head; 49 RCU_BARRIER_STD,
51 struct completion completion; 50 RCU_BARRIER_BH,
51 RCU_BARRIER_SCHED,
52}; 52};
53 53
54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -60,7 +60,7 @@ static struct completion rcu_barrier_completion;
60 * Awaken the corresponding synchronize_rcu() instance now that a 60 * Awaken the corresponding synchronize_rcu() instance now that a
61 * grace period has elapsed. 61 * grace period has elapsed.
62 */ 62 */
63static void wakeme_after_rcu(struct rcu_head *head) 63void wakeme_after_rcu(struct rcu_head *head)
64{ 64{
65 struct rcu_synchronize *rcu; 65 struct rcu_synchronize *rcu;
66 66
@@ -77,17 +77,7 @@ static void wakeme_after_rcu(struct rcu_head *head)
77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
78 * and may be nested. 78 * and may be nested.
79 */ 79 */
80void synchronize_rcu(void) 80synchronize_rcu_xxx(synchronize_rcu, call_rcu)
81{
82 struct rcu_synchronize rcu;
83
84 init_completion(&rcu.completion);
85 /* Will wake me after RCU finished */
86 call_rcu(&rcu.head, wakeme_after_rcu);
87
88 /* Wait for it */
89 wait_for_completion(&rcu.completion);
90}
91EXPORT_SYMBOL_GPL(synchronize_rcu); 81EXPORT_SYMBOL_GPL(synchronize_rcu);
92 82
93static void rcu_barrier_callback(struct rcu_head *notused) 83static void rcu_barrier_callback(struct rcu_head *notused)
@@ -99,19 +89,30 @@ static void rcu_barrier_callback(struct rcu_head *notused)
99/* 89/*
100 * Called with preemption disabled, and from cross-cpu IRQ context. 90 * Called with preemption disabled, and from cross-cpu IRQ context.
101 */ 91 */
102static void rcu_barrier_func(void *notused) 92static void rcu_barrier_func(void *type)
103{ 93{
104 int cpu = smp_processor_id(); 94 int cpu = smp_processor_id();
105 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 95 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
106 96
107 atomic_inc(&rcu_barrier_cpu_count); 97 atomic_inc(&rcu_barrier_cpu_count);
108 call_rcu(head, rcu_barrier_callback); 98 switch ((enum rcu_barrier)type) {
99 case RCU_BARRIER_STD:
100 call_rcu(head, rcu_barrier_callback);
101 break;
102 case RCU_BARRIER_BH:
103 call_rcu_bh(head, rcu_barrier_callback);
104 break;
105 case RCU_BARRIER_SCHED:
106 call_rcu_sched(head, rcu_barrier_callback);
107 break;
108 }
109} 109}
110 110
111/** 111/*
112 * rcu_barrier - Wait until all the in-flight RCUs are complete. 112 * Orchestrate the specified type of RCU barrier, waiting for all
113 * RCU callbacks of the specified type to complete.
113 */ 114 */
114void rcu_barrier(void) 115static void _rcu_barrier(enum rcu_barrier type)
115{ 116{
116 BUG_ON(in_interrupt()); 117 BUG_ON(in_interrupt());
117 /* Take cpucontrol mutex to protect against CPU hotplug */ 118 /* Take cpucontrol mutex to protect against CPU hotplug */
@@ -127,13 +128,39 @@ void rcu_barrier(void)
127 * until all the callbacks are queued. 128 * until all the callbacks are queued.
128 */ 129 */
129 rcu_read_lock(); 130 rcu_read_lock();
130 on_each_cpu(rcu_barrier_func, NULL, 0, 1); 131 on_each_cpu(rcu_barrier_func, (void *)type, 1);
131 rcu_read_unlock(); 132 rcu_read_unlock();
132 wait_for_completion(&rcu_barrier_completion); 133 wait_for_completion(&rcu_barrier_completion);
133 mutex_unlock(&rcu_barrier_mutex); 134 mutex_unlock(&rcu_barrier_mutex);
134} 135}
136
137/**
138 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
139 */
140void rcu_barrier(void)
141{
142 _rcu_barrier(RCU_BARRIER_STD);
143}
135EXPORT_SYMBOL_GPL(rcu_barrier); 144EXPORT_SYMBOL_GPL(rcu_barrier);
136 145
146/**
147 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
148 */
149void rcu_barrier_bh(void)
150{
151 _rcu_barrier(RCU_BARRIER_BH);
152}
153EXPORT_SYMBOL_GPL(rcu_barrier_bh);
154
155/**
156 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
157 */
158void rcu_barrier_sched(void)
159{
160 _rcu_barrier(RCU_BARRIER_SCHED);
161}
162EXPORT_SYMBOL_GPL(rcu_barrier_sched);
163
137void __init rcu_init(void) 164void __init rcu_init(void)
138{ 165{
139 __rcu_init(); 166 __rcu_init();
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 9bf445664457..6f62b77d93c4 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -46,11 +46,11 @@
46#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <linux/bitops.h> 47#include <linux/bitops.h>
48#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/kthread.h>
49#include <linux/completion.h> 50#include <linux/completion.h>
50#include <linux/moduleparam.h> 51#include <linux/moduleparam.h>
51#include <linux/percpu.h> 52#include <linux/percpu.h>
52#include <linux/notifier.h> 53#include <linux/notifier.h>
53#include <linux/rcupdate.h>
54#include <linux/cpu.h> 54#include <linux/cpu.h>
55#include <linux/random.h> 55#include <linux/random.h>
56#include <linux/delay.h> 56#include <linux/delay.h>
@@ -82,14 +82,18 @@ struct rcu_data {
82 spinlock_t lock; /* Protect rcu_data fields. */ 82 spinlock_t lock; /* Protect rcu_data fields. */
83 long completed; /* Number of last completed batch. */ 83 long completed; /* Number of last completed batch. */
84 int waitlistcount; 84 int waitlistcount;
85 struct tasklet_struct rcu_tasklet;
86 struct rcu_head *nextlist; 85 struct rcu_head *nextlist;
87 struct rcu_head **nexttail; 86 struct rcu_head **nexttail;
88 struct rcu_head *waitlist[GP_STAGES]; 87 struct rcu_head *waitlist[GP_STAGES];
89 struct rcu_head **waittail[GP_STAGES]; 88 struct rcu_head **waittail[GP_STAGES];
90 struct rcu_head *donelist; 89 struct rcu_head *donelist; /* from waitlist & waitschedlist */
91 struct rcu_head **donetail; 90 struct rcu_head **donetail;
92 long rcu_flipctr[2]; 91 long rcu_flipctr[2];
92 struct rcu_head *nextschedlist;
93 struct rcu_head **nextschedtail;
94 struct rcu_head *waitschedlist;
95 struct rcu_head **waitschedtail;
96 int rcu_sched_sleeping;
93#ifdef CONFIG_RCU_TRACE 97#ifdef CONFIG_RCU_TRACE
94 struct rcupreempt_trace trace; 98 struct rcupreempt_trace trace;
95#endif /* #ifdef CONFIG_RCU_TRACE */ 99#endif /* #ifdef CONFIG_RCU_TRACE */
@@ -131,11 +135,24 @@ enum rcu_try_flip_states {
131 rcu_try_flip_waitmb_state, 135 rcu_try_flip_waitmb_state,
132}; 136};
133 137
138/*
139 * States for rcu_ctrlblk.rcu_sched_sleep.
140 */
141
142enum rcu_sched_sleep_states {
143 rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
144 rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
145 rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
146};
147
134struct rcu_ctrlblk { 148struct rcu_ctrlblk {
135 spinlock_t fliplock; /* Protect state-machine transitions. */ 149 spinlock_t fliplock; /* Protect state-machine transitions. */
136 long completed; /* Number of last completed batch. */ 150 long completed; /* Number of last completed batch. */
137 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of 151 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
138 the rcu state machine */ 152 the rcu state machine */
153 spinlock_t schedlock; /* Protect rcu_sched sleep state. */
154 enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
155 wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
139}; 156};
140 157
141static DEFINE_PER_CPU(struct rcu_data, rcu_data); 158static DEFINE_PER_CPU(struct rcu_data, rcu_data);
@@ -143,8 +160,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
143 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), 160 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
144 .completed = 0, 161 .completed = 0,
145 .rcu_try_flip_state = rcu_try_flip_idle_state, 162 .rcu_try_flip_state = rcu_try_flip_idle_state,
163 .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
164 .sched_sleep = rcu_sched_not_sleeping,
165 .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
146}; 166};
147 167
168static struct task_struct *rcu_sched_grace_period_task;
148 169
149#ifdef CONFIG_RCU_TRACE 170#ifdef CONFIG_RCU_TRACE
150static char *rcu_try_flip_state_names[] = 171static char *rcu_try_flip_state_names[] =
@@ -207,6 +228,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
207 */ 228 */
208#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); 229#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
209 230
231#define RCU_SCHED_BATCH_TIME (HZ / 50)
232
210/* 233/*
211 * Return the number of RCU batches processed thus far. Useful 234 * Return the number of RCU batches processed thus far. Useful
212 * for debug and statistics. 235 * for debug and statistics.
@@ -411,32 +434,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
411 } 434 }
412} 435}
413 436
414#ifdef CONFIG_NO_HZ 437DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
438 .dynticks = 1,
439};
415 440
416DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; 441#ifdef CONFIG_NO_HZ
417static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
418static DEFINE_PER_CPU(int, rcu_update_flag); 442static DEFINE_PER_CPU(int, rcu_update_flag);
419 443
420/** 444/**
421 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. 445 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
422 * 446 *
423 * If the CPU was idle with dynamic ticks active, this updates the 447 * If the CPU was idle with dynamic ticks active, this updates the
424 * dynticks_progress_counter to let the RCU handling know that the 448 * rcu_dyntick_sched.dynticks to let the RCU handling know that the
425 * CPU is active. 449 * CPU is active.
426 */ 450 */
427void rcu_irq_enter(void) 451void rcu_irq_enter(void)
428{ 452{
429 int cpu = smp_processor_id(); 453 int cpu = smp_processor_id();
454 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
430 455
431 if (per_cpu(rcu_update_flag, cpu)) 456 if (per_cpu(rcu_update_flag, cpu))
432 per_cpu(rcu_update_flag, cpu)++; 457 per_cpu(rcu_update_flag, cpu)++;
433 458
434 /* 459 /*
435 * Only update if we are coming from a stopped ticks mode 460 * Only update if we are coming from a stopped ticks mode
436 * (dynticks_progress_counter is even). 461 * (rcu_dyntick_sched.dynticks is even).
437 */ 462 */
438 if (!in_interrupt() && 463 if (!in_interrupt() &&
439 (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { 464 (rdssp->dynticks & 0x1) == 0) {
440 /* 465 /*
441 * The following might seem like we could have a race 466 * The following might seem like we could have a race
442 * with NMI/SMIs. But this really isn't a problem. 467 * with NMI/SMIs. But this really isn't a problem.
@@ -459,12 +484,12 @@ void rcu_irq_enter(void)
459 * RCU read-side critical sections on this CPU would 484 * RCU read-side critical sections on this CPU would
460 * have already completed. 485 * have already completed.
461 */ 486 */
462 per_cpu(dynticks_progress_counter, cpu)++; 487 rdssp->dynticks++;
463 /* 488 /*
464 * The following memory barrier ensures that any 489 * The following memory barrier ensures that any
465 * rcu_read_lock() primitives in the irq handler 490 * rcu_read_lock() primitives in the irq handler
466 * are seen by other CPUs to follow the above 491 * are seen by other CPUs to follow the above
467 * increment to dynticks_progress_counter. This is 492 * increment to rcu_dyntick_sched.dynticks. This is
468 * required in order for other CPUs to correctly 493 * required in order for other CPUs to correctly
469 * determine when it is safe to advance the RCU 494 * determine when it is safe to advance the RCU
470 * grace-period state machine. 495 * grace-period state machine.
@@ -472,7 +497,7 @@ void rcu_irq_enter(void)
472 smp_mb(); /* see above block comment. */ 497 smp_mb(); /* see above block comment. */
473 /* 498 /*
474 * Since we can't determine the dynamic tick mode from 499 * Since we can't determine the dynamic tick mode from
475 * the dynticks_progress_counter after this routine, 500 * the rcu_dyntick_sched.dynticks after this routine,
476 * we use a second flag to acknowledge that we came 501 * we use a second flag to acknowledge that we came
477 * from an idle state with ticks stopped. 502 * from an idle state with ticks stopped.
478 */ 503 */
@@ -480,7 +505,7 @@ void rcu_irq_enter(void)
480 /* 505 /*
481 * If we take an NMI/SMI now, they will also increment 506 * If we take an NMI/SMI now, they will also increment
482 * the rcu_update_flag, and will not update the 507 * the rcu_update_flag, and will not update the
483 * dynticks_progress_counter on exit. That is for 508 * rcu_dyntick_sched.dynticks on exit. That is for
484 * this IRQ to do. 509 * this IRQ to do.
485 */ 510 */
486 } 511 }
@@ -490,12 +515,13 @@ void rcu_irq_enter(void)
490 * rcu_irq_exit - Called from exiting Hard irq context. 515 * rcu_irq_exit - Called from exiting Hard irq context.
491 * 516 *
492 * If the CPU was idle with dynamic ticks active, update the 517 * If the CPU was idle with dynamic ticks active, update the
493 * dynticks_progress_counter to put let the RCU handling be 518 * rcu_dyntick_sched.dynticks to put let the RCU handling be
494 * aware that the CPU is going back to idle with no ticks. 519 * aware that the CPU is going back to idle with no ticks.
495 */ 520 */
496void rcu_irq_exit(void) 521void rcu_irq_exit(void)
497{ 522{
498 int cpu = smp_processor_id(); 523 int cpu = smp_processor_id();
524 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
499 525
500 /* 526 /*
501 * rcu_update_flag is set if we interrupted the CPU 527 * rcu_update_flag is set if we interrupted the CPU
@@ -503,7 +529,7 @@ void rcu_irq_exit(void)
503 * Once this occurs, we keep track of interrupt nesting 529 * Once this occurs, we keep track of interrupt nesting
504 * because a NMI/SMI could also come in, and we still 530 * because a NMI/SMI could also come in, and we still
505 * only want the IRQ that started the increment of the 531 * only want the IRQ that started the increment of the
506 * dynticks_progress_counter to be the one that modifies 532 * rcu_dyntick_sched.dynticks to be the one that modifies
507 * it on exit. 533 * it on exit.
508 */ 534 */
509 if (per_cpu(rcu_update_flag, cpu)) { 535 if (per_cpu(rcu_update_flag, cpu)) {
@@ -515,28 +541,29 @@ void rcu_irq_exit(void)
515 541
516 /* 542 /*
517 * If an NMI/SMI happens now we are still 543 * If an NMI/SMI happens now we are still
518 * protected by the dynticks_progress_counter being odd. 544 * protected by the rcu_dyntick_sched.dynticks being odd.
519 */ 545 */
520 546
521 /* 547 /*
522 * The following memory barrier ensures that any 548 * The following memory barrier ensures that any
523 * rcu_read_unlock() primitives in the irq handler 549 * rcu_read_unlock() primitives in the irq handler
524 * are seen by other CPUs to preceed the following 550 * are seen by other CPUs to preceed the following
525 * increment to dynticks_progress_counter. This 551 * increment to rcu_dyntick_sched.dynticks. This
526 * is required in order for other CPUs to determine 552 * is required in order for other CPUs to determine
527 * when it is safe to advance the RCU grace-period 553 * when it is safe to advance the RCU grace-period
528 * state machine. 554 * state machine.
529 */ 555 */
530 smp_mb(); /* see above block comment. */ 556 smp_mb(); /* see above block comment. */
531 per_cpu(dynticks_progress_counter, cpu)++; 557 rdssp->dynticks++;
532 WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); 558 WARN_ON(rdssp->dynticks & 0x1);
533 } 559 }
534} 560}
535 561
536static void dyntick_save_progress_counter(int cpu) 562static void dyntick_save_progress_counter(int cpu)
537{ 563{
538 per_cpu(rcu_dyntick_snapshot, cpu) = 564 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
539 per_cpu(dynticks_progress_counter, cpu); 565
566 rdssp->dynticks_snap = rdssp->dynticks;
540} 567}
541 568
542static inline int 569static inline int
@@ -544,9 +571,10 @@ rcu_try_flip_waitack_needed(int cpu)
544{ 571{
545 long curr; 572 long curr;
546 long snap; 573 long snap;
574 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
547 575
548 curr = per_cpu(dynticks_progress_counter, cpu); 576 curr = rdssp->dynticks;
549 snap = per_cpu(rcu_dyntick_snapshot, cpu); 577 snap = rdssp->dynticks_snap;
550 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 578 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
551 579
552 /* 580 /*
@@ -567,7 +595,7 @@ rcu_try_flip_waitack_needed(int cpu)
567 * that this CPU already acknowledged the counter. 595 * that this CPU already acknowledged the counter.
568 */ 596 */
569 597
570 if ((curr - snap) > 2 || (snap & 0x1) == 0) 598 if ((curr - snap) > 2 || (curr & 0x1) == 0)
571 return 0; 599 return 0;
572 600
573 /* We need this CPU to explicitly acknowledge the counter flip. */ 601 /* We need this CPU to explicitly acknowledge the counter flip. */
@@ -580,9 +608,10 @@ rcu_try_flip_waitmb_needed(int cpu)
580{ 608{
581 long curr; 609 long curr;
582 long snap; 610 long snap;
611 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
583 612
584 curr = per_cpu(dynticks_progress_counter, cpu); 613 curr = rdssp->dynticks;
585 snap = per_cpu(rcu_dyntick_snapshot, cpu); 614 snap = rdssp->dynticks_snap;
586 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 615 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
587 616
588 /* 617 /*
@@ -609,14 +638,86 @@ rcu_try_flip_waitmb_needed(int cpu)
609 return 1; 638 return 1;
610} 639}
611 640
641static void dyntick_save_progress_counter_sched(int cpu)
642{
643 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
644
645 rdssp->sched_dynticks_snap = rdssp->dynticks;
646}
647
648static int rcu_qsctr_inc_needed_dyntick(int cpu)
649{
650 long curr;
651 long snap;
652 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
653
654 curr = rdssp->dynticks;
655 snap = rdssp->sched_dynticks_snap;
656 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
657
658 /*
659 * If the CPU remained in dynticks mode for the entire time
660 * and didn't take any interrupts, NMIs, SMIs, or whatever,
661 * then it cannot be in the middle of an rcu_read_lock(), so
662 * the next rcu_read_lock() it executes must use the new value
663 * of the counter. Therefore, this CPU has been in a quiescent
664 * state the entire time, and we don't need to wait for it.
665 */
666
667 if ((curr == snap) && ((curr & 0x1) == 0))
668 return 0;
669
670 /*
671 * If the CPU passed through or entered a dynticks idle phase with
672 * no active irq handlers, then, as above, this CPU has already
673 * passed through a quiescent state.
674 */
675
676 if ((curr - snap) > 2 || (snap & 0x1) == 0)
677 return 0;
678
679 /* We need this CPU to go through a quiescent state. */
680
681 return 1;
682}
683
612#else /* !CONFIG_NO_HZ */ 684#else /* !CONFIG_NO_HZ */
613 685
614# define dyntick_save_progress_counter(cpu) do { } while (0) 686# define dyntick_save_progress_counter(cpu) do { } while (0)
615# define rcu_try_flip_waitack_needed(cpu) (1) 687# define rcu_try_flip_waitack_needed(cpu) (1)
616# define rcu_try_flip_waitmb_needed(cpu) (1) 688# define rcu_try_flip_waitmb_needed(cpu) (1)
689
690# define dyntick_save_progress_counter_sched(cpu) do { } while (0)
691# define rcu_qsctr_inc_needed_dyntick(cpu) (1)
617 692
618#endif /* CONFIG_NO_HZ */ 693#endif /* CONFIG_NO_HZ */
619 694
695static void save_qsctr_sched(int cpu)
696{
697 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
698
699 rdssp->sched_qs_snap = rdssp->sched_qs;
700}
701
702static inline int rcu_qsctr_inc_needed(int cpu)
703{
704 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
705
706 /*
707 * If there has been a quiescent state, no more need to wait
708 * on this CPU.
709 */
710
711 if (rdssp->sched_qs != rdssp->sched_qs_snap) {
712 smp_mb(); /* force ordering with cpu entering schedule(). */
713 return 0;
714 }
715
716 /* We need this CPU to go through a quiescent state. */
717
718 return 1;
719}
720
620/* 721/*
621 * Get here when RCU is idle. Decide whether we need to 722 * Get here when RCU is idle. Decide whether we need to
622 * move out of idle state, and return non-zero if so. 723 * move out of idle state, and return non-zero if so.
@@ -819,6 +920,26 @@ void rcu_check_callbacks(int cpu, int user)
819 unsigned long flags; 920 unsigned long flags;
820 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 921 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
821 922
923 /*
924 * If this CPU took its interrupt from user mode or from the
925 * idle loop, and this is not a nested interrupt, then
926 * this CPU has to have exited all prior preept-disable
927 * sections of code. So increment the counter to note this.
928 *
929 * The memory barrier is needed to handle the case where
930 * writes from a preempt-disable section of code get reordered
931 * into schedule() by this CPU's write buffer. So the memory
932 * barrier makes sure that the rcu_qsctr_inc() is seen by other
933 * CPUs to happen after any such write.
934 */
935
936 if (user ||
937 (idle_cpu(cpu) && !in_softirq() &&
938 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
939 smp_mb(); /* Guard against aggressive schedule(). */
940 rcu_qsctr_inc(cpu);
941 }
942
822 rcu_check_mb(cpu); 943 rcu_check_mb(cpu);
823 if (rcu_ctrlblk.completed == rdp->completed) 944 if (rcu_ctrlblk.completed == rdp->completed)
824 rcu_try_flip(); 945 rcu_try_flip();
@@ -869,6 +990,8 @@ void rcu_offline_cpu(int cpu)
869 struct rcu_head *list = NULL; 990 struct rcu_head *list = NULL;
870 unsigned long flags; 991 unsigned long flags;
871 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 992 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
993 struct rcu_head *schedlist = NULL;
994 struct rcu_head **schedtail = &schedlist;
872 struct rcu_head **tail = &list; 995 struct rcu_head **tail = &list;
873 996
874 /* 997 /*
@@ -882,6 +1005,11 @@ void rcu_offline_cpu(int cpu)
882 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], 1005 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
883 list, tail); 1006 list, tail);
884 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); 1007 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
1008 rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
1009 schedlist, schedtail);
1010 rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
1011 schedlist, schedtail);
1012 rdp->rcu_sched_sleeping = 0;
885 spin_unlock_irqrestore(&rdp->lock, flags); 1013 spin_unlock_irqrestore(&rdp->lock, flags);
886 rdp->waitlistcount = 0; 1014 rdp->waitlistcount = 0;
887 1015
@@ -916,12 +1044,15 @@ void rcu_offline_cpu(int cpu)
916 * fix. 1044 * fix.
917 */ 1045 */
918 1046
919 local_irq_save(flags); 1047 local_irq_save(flags); /* disable preempt till we know what lock. */
920 rdp = RCU_DATA_ME(); 1048 rdp = RCU_DATA_ME();
921 spin_lock(&rdp->lock); 1049 spin_lock(&rdp->lock);
922 *rdp->nexttail = list; 1050 *rdp->nexttail = list;
923 if (list) 1051 if (list)
924 rdp->nexttail = tail; 1052 rdp->nexttail = tail;
1053 *rdp->nextschedtail = schedlist;
1054 if (schedlist)
1055 rdp->nextschedtail = schedtail;
925 spin_unlock_irqrestore(&rdp->lock, flags); 1056 spin_unlock_irqrestore(&rdp->lock, flags);
926} 1057}
927 1058
@@ -936,10 +1067,25 @@ void rcu_offline_cpu(int cpu)
936void __cpuinit rcu_online_cpu(int cpu) 1067void __cpuinit rcu_online_cpu(int cpu)
937{ 1068{
938 unsigned long flags; 1069 unsigned long flags;
1070 struct rcu_data *rdp;
939 1071
940 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); 1072 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
941 cpu_set(cpu, rcu_cpu_online_map); 1073 cpu_set(cpu, rcu_cpu_online_map);
942 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); 1074 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
1075
1076 /*
1077 * The rcu_sched grace-period processing might have bypassed
1078 * this CPU, given that it was not in the rcu_cpu_online_map
1079 * when the grace-period scan started. This means that the
1080 * grace-period task might sleep. So make sure that if this
1081 * should happen, the first callback posted to this CPU will
1082 * wake up the grace-period task if need be.
1083 */
1084
1085 rdp = RCU_DATA_CPU(cpu);
1086 spin_lock_irqsave(&rdp->lock, flags);
1087 rdp->rcu_sched_sleeping = 1;
1088 spin_unlock_irqrestore(&rdp->lock, flags);
943} 1089}
944 1090
945static void rcu_process_callbacks(struct softirq_action *unused) 1091static void rcu_process_callbacks(struct softirq_action *unused)
@@ -982,31 +1128,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
982 *rdp->nexttail = head; 1128 *rdp->nexttail = head;
983 rdp->nexttail = &head->next; 1129 rdp->nexttail = &head->next;
984 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); 1130 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
985 spin_unlock(&rdp->lock); 1131 spin_unlock_irqrestore(&rdp->lock, flags);
986 local_irq_restore(flags);
987} 1132}
988EXPORT_SYMBOL_GPL(call_rcu); 1133EXPORT_SYMBOL_GPL(call_rcu);
989 1134
1135void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1136{
1137 unsigned long flags;
1138 struct rcu_data *rdp;
1139 int wake_gp = 0;
1140
1141 head->func = func;
1142 head->next = NULL;
1143 local_irq_save(flags);
1144 rdp = RCU_DATA_ME();
1145 spin_lock(&rdp->lock);
1146 *rdp->nextschedtail = head;
1147 rdp->nextschedtail = &head->next;
1148 if (rdp->rcu_sched_sleeping) {
1149
1150 /* Grace-period processing might be sleeping... */
1151
1152 rdp->rcu_sched_sleeping = 0;
1153 wake_gp = 1;
1154 }
1155 spin_unlock_irqrestore(&rdp->lock, flags);
1156 if (wake_gp) {
1157
1158 /* Wake up grace-period processing, unless someone beat us. */
1159
1160 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1161 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
1162 wake_gp = 0;
1163 rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
1164 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1165 if (wake_gp)
1166 wake_up_interruptible(&rcu_ctrlblk.sched_wq);
1167 }
1168}
1169EXPORT_SYMBOL_GPL(call_rcu_sched);
1170
990/* 1171/*
991 * Wait until all currently running preempt_disable() code segments 1172 * Wait until all currently running preempt_disable() code segments
992 * (including hardware-irq-disable segments) complete. Note that 1173 * (including hardware-irq-disable segments) complete. Note that
993 * in -rt this does -not- necessarily result in all currently executing 1174 * in -rt this does -not- necessarily result in all currently executing
994 * interrupt -handlers- having completed. 1175 * interrupt -handlers- having completed.
995 */ 1176 */
996void __synchronize_sched(void) 1177synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
1178EXPORT_SYMBOL_GPL(__synchronize_sched);
1179
1180/*
1181 * kthread function that manages call_rcu_sched grace periods.
1182 */
1183static int rcu_sched_grace_period(void *arg)
997{ 1184{
998 cpumask_t oldmask; 1185 int couldsleep; /* might sleep after current pass. */
1186 int couldsleepnext = 0; /* might sleep after next pass. */
999 int cpu; 1187 int cpu;
1188 unsigned long flags;
1189 struct rcu_data *rdp;
1190 int ret;
1000 1191
1001 if (sched_getaffinity(0, &oldmask) < 0) 1192 /*
1002 oldmask = cpu_possible_map; 1193 * Each pass through the following loop handles one
1003 for_each_online_cpu(cpu) { 1194 * rcu_sched grace period cycle.
1004 sched_setaffinity(0, &cpumask_of_cpu(cpu)); 1195 */
1005 schedule(); 1196 do {
1006 } 1197 /* Save each CPU's current state. */
1007 sched_setaffinity(0, &oldmask); 1198
1199 for_each_online_cpu(cpu) {
1200 dyntick_save_progress_counter_sched(cpu);
1201 save_qsctr_sched(cpu);
1202 }
1203
1204 /*
1205 * Sleep for about an RCU grace-period's worth to
1206 * allow better batching and to consume less CPU.
1207 */
1208 schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
1209
1210 /*
1211 * If there was nothing to do last time, prepare to
1212 * sleep at the end of the current grace period cycle.
1213 */
1214 couldsleep = couldsleepnext;
1215 couldsleepnext = 1;
1216 if (couldsleep) {
1217 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1218 rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
1219 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1220 }
1221
1222 /*
1223 * Wait on each CPU in turn to have either visited
1224 * a quiescent state or been in dynticks-idle mode.
1225 */
1226 for_each_online_cpu(cpu) {
1227 while (rcu_qsctr_inc_needed(cpu) &&
1228 rcu_qsctr_inc_needed_dyntick(cpu)) {
1229 /* resched_cpu(cpu); @@@ */
1230 schedule_timeout_interruptible(1);
1231 }
1232 }
1233
1234 /* Advance callbacks for each CPU. */
1235
1236 for_each_online_cpu(cpu) {
1237
1238 rdp = RCU_DATA_CPU(cpu);
1239 spin_lock_irqsave(&rdp->lock, flags);
1240
1241 /*
1242 * We are running on this CPU irq-disabled, so no
1243 * CPU can go offline until we re-enable irqs.
1244 * The current CPU might have already gone
1245 * offline (between the for_each_offline_cpu and
1246 * the spin_lock_irqsave), but in that case all its
1247 * callback lists will be empty, so no harm done.
1248 *
1249 * Advance the callbacks! We share normal RCU's
1250 * donelist, since callbacks are invoked the
1251 * same way in either case.
1252 */
1253 if (rdp->waitschedlist != NULL) {
1254 *rdp->donetail = rdp->waitschedlist;
1255 rdp->donetail = rdp->waitschedtail;
1256
1257 /*
1258 * Next rcu_check_callbacks() will
1259 * do the required raise_softirq().
1260 */
1261 }
1262 if (rdp->nextschedlist != NULL) {
1263 rdp->waitschedlist = rdp->nextschedlist;
1264 rdp->waitschedtail = rdp->nextschedtail;
1265 couldsleep = 0;
1266 couldsleepnext = 0;
1267 } else {
1268 rdp->waitschedlist = NULL;
1269 rdp->waitschedtail = &rdp->waitschedlist;
1270 }
1271 rdp->nextschedlist = NULL;
1272 rdp->nextschedtail = &rdp->nextschedlist;
1273
1274 /* Mark sleep intention. */
1275
1276 rdp->rcu_sched_sleeping = couldsleep;
1277
1278 spin_unlock_irqrestore(&rdp->lock, flags);
1279 }
1280
1281 /* If we saw callbacks on the last scan, go deal with them. */
1282
1283 if (!couldsleep)
1284 continue;
1285
1286 /* Attempt to block... */
1287
1288 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1289 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
1290
1291 /*
1292 * Someone posted a callback after we scanned.
1293 * Go take care of it.
1294 */
1295 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1296 couldsleepnext = 0;
1297 continue;
1298 }
1299
1300 /* Block until the next person posts a callback. */
1301
1302 rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
1303 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1304 ret = 0;
1305 __wait_event_interruptible(rcu_ctrlblk.sched_wq,
1306 rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
1307 ret);
1308
1309 /*
1310 * Signals would prevent us from sleeping, and we cannot
1311 * do much with them in any case. So flush them.
1312 */
1313 if (ret)
1314 flush_signals(current);
1315 couldsleepnext = 0;
1316
1317 } while (!kthread_should_stop());
1318
1319 return (0);
1008} 1320}
1009EXPORT_SYMBOL_GPL(__synchronize_sched);
1010 1321
1011/* 1322/*
1012 * Check to see if any future RCU-related work will need to be done 1323 * Check to see if any future RCU-related work will need to be done
@@ -1023,7 +1334,9 @@ int rcu_needs_cpu(int cpu)
1023 1334
1024 return (rdp->donelist != NULL || 1335 return (rdp->donelist != NULL ||
1025 !!rdp->waitlistcount || 1336 !!rdp->waitlistcount ||
1026 rdp->nextlist != NULL); 1337 rdp->nextlist != NULL ||
1338 rdp->nextschedlist != NULL ||
1339 rdp->waitschedlist != NULL);
1027} 1340}
1028 1341
1029int rcu_pending(int cpu) 1342int rcu_pending(int cpu)
@@ -1034,7 +1347,9 @@ int rcu_pending(int cpu)
1034 1347
1035 if (rdp->donelist != NULL || 1348 if (rdp->donelist != NULL ||
1036 !!rdp->waitlistcount || 1349 !!rdp->waitlistcount ||
1037 rdp->nextlist != NULL) 1350 rdp->nextlist != NULL ||
1351 rdp->nextschedlist != NULL ||
1352 rdp->waitschedlist != NULL)
1038 return 1; 1353 return 1;
1039 1354
1040 /* The RCU core needs an acknowledgement from this CPU. */ 1355 /* The RCU core needs an acknowledgement from this CPU. */
@@ -1101,6 +1416,11 @@ void __init __rcu_init(void)
1101 rdp->donetail = &rdp->donelist; 1416 rdp->donetail = &rdp->donelist;
1102 rdp->rcu_flipctr[0] = 0; 1417 rdp->rcu_flipctr[0] = 0;
1103 rdp->rcu_flipctr[1] = 0; 1418 rdp->rcu_flipctr[1] = 0;
1419 rdp->nextschedlist = NULL;
1420 rdp->nextschedtail = &rdp->nextschedlist;
1421 rdp->waitschedlist = NULL;
1422 rdp->waitschedtail = &rdp->waitschedlist;
1423 rdp->rcu_sched_sleeping = 0;
1104 } 1424 }
1105 register_cpu_notifier(&rcu_nb); 1425 register_cpu_notifier(&rcu_nb);
1106 1426
@@ -1123,11 +1443,15 @@ void __init __rcu_init(void)
1123} 1443}
1124 1444
1125/* 1445/*
1126 * Deprecated, use synchronize_rcu() or synchronize_sched() instead. 1446 * Late-boot-time RCU initialization that must wait until after scheduler
1447 * has been initialized.
1127 */ 1448 */
1128void synchronize_kernel(void) 1449void __init rcu_init_sched(void)
1129{ 1450{
1130 synchronize_rcu(); 1451 rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
1452 NULL,
1453 "rcu_sched_grace_period");
1454 WARN_ON(IS_ERR(rcu_sched_grace_period_task));
1131} 1455}
1132 1456
1133#ifdef CONFIG_RCU_TRACE 1457#ifdef CONFIG_RCU_TRACE
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
index 49ac4947af24..5edf82c34bbc 100644
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -38,7 +38,6 @@
38#include <linux/moduleparam.h> 38#include <linux/moduleparam.h>
39#include <linux/percpu.h> 39#include <linux/percpu.h>
40#include <linux/notifier.h> 40#include <linux/notifier.h>
41#include <linux/rcupdate.h>
42#include <linux/cpu.h> 41#include <linux/cpu.h>
43#include <linux/mutex.h> 42#include <linux/mutex.h>
44#include <linux/rcupreempt_trace.h> 43#include <linux/rcupreempt_trace.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 33acc424667e..90b5b123f7a1 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -57,7 +57,9 @@ static int stat_interval; /* Interval between stats, in seconds. */
57 /* Defaults to "only at end of test". */ 57 /* Defaults to "only at end of test". */
58static int verbose; /* Print more debug info. */ 58static int verbose; /* Print more debug info. */
59static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ 59static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
60static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ 60static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
61static int stutter = 5; /* Start/stop testing interval (in sec) */
62static int irqreader = 1; /* RCU readers from irq (timers). */
61static char *torture_type = "rcu"; /* What RCU implementation to torture. */ 63static char *torture_type = "rcu"; /* What RCU implementation to torture. */
62 64
63module_param(nreaders, int, 0444); 65module_param(nreaders, int, 0444);
@@ -72,6 +74,10 @@ module_param(test_no_idle_hz, bool, 0444);
72MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); 74MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
73module_param(shuffle_interval, int, 0444); 75module_param(shuffle_interval, int, 0444);
74MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); 76MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
77module_param(stutter, int, 0444);
78MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
79module_param(irqreader, int, 0444);
80MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
75module_param(torture_type, charp, 0444); 81module_param(torture_type, charp, 0444);
76MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); 82MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
77 83
@@ -91,6 +97,7 @@ static struct task_struct **fakewriter_tasks;
91static struct task_struct **reader_tasks; 97static struct task_struct **reader_tasks;
92static struct task_struct *stats_task; 98static struct task_struct *stats_task;
93static struct task_struct *shuffler_task; 99static struct task_struct *shuffler_task;
100static struct task_struct *stutter_task;
94 101
95#define RCU_TORTURE_PIPE_LEN 10 102#define RCU_TORTURE_PIPE_LEN 10
96 103
@@ -117,8 +124,18 @@ static atomic_t n_rcu_torture_alloc_fail;
117static atomic_t n_rcu_torture_free; 124static atomic_t n_rcu_torture_free;
118static atomic_t n_rcu_torture_mberror; 125static atomic_t n_rcu_torture_mberror;
119static atomic_t n_rcu_torture_error; 126static atomic_t n_rcu_torture_error;
127static long n_rcu_torture_timers = 0;
120static struct list_head rcu_torture_removed; 128static struct list_head rcu_torture_removed;
121 129
130static int stutter_pause_test = 0;
131
132#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
133#define RCUTORTURE_RUNNABLE_INIT 1
134#else
135#define RCUTORTURE_RUNNABLE_INIT 0
136#endif
137int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
138
122/* 139/*
123 * Allocate an element from the rcu_tortures pool. 140 * Allocate an element from the rcu_tortures pool.
124 */ 141 */
@@ -179,6 +196,16 @@ rcu_random(struct rcu_random_state *rrsp)
179 return swahw32(rrsp->rrs_state); 196 return swahw32(rrsp->rrs_state);
180} 197}
181 198
199static void
200rcu_stutter_wait(void)
201{
202 while (stutter_pause_test || !rcutorture_runnable)
203 if (rcutorture_runnable)
204 schedule_timeout_interruptible(1);
205 else
206 schedule_timeout_interruptible(round_jiffies_relative(HZ));
207}
208
182/* 209/*
183 * Operations vector for selecting different types of tests. 210 * Operations vector for selecting different types of tests.
184 */ 211 */
@@ -192,7 +219,9 @@ struct rcu_torture_ops {
192 int (*completed)(void); 219 int (*completed)(void);
193 void (*deferredfree)(struct rcu_torture *p); 220 void (*deferredfree)(struct rcu_torture *p);
194 void (*sync)(void); 221 void (*sync)(void);
222 void (*cb_barrier)(void);
195 int (*stats)(char *page); 223 int (*stats)(char *page);
224 int irqcapable;
196 char *name; 225 char *name;
197}; 226};
198static struct rcu_torture_ops *cur_ops = NULL; 227static struct rcu_torture_ops *cur_ops = NULL;
@@ -265,7 +294,9 @@ static struct rcu_torture_ops rcu_ops = {
265 .completed = rcu_torture_completed, 294 .completed = rcu_torture_completed,
266 .deferredfree = rcu_torture_deferred_free, 295 .deferredfree = rcu_torture_deferred_free,
267 .sync = synchronize_rcu, 296 .sync = synchronize_rcu,
297 .cb_barrier = rcu_barrier,
268 .stats = NULL, 298 .stats = NULL,
299 .irqcapable = 1,
269 .name = "rcu" 300 .name = "rcu"
270}; 301};
271 302
@@ -304,7 +335,9 @@ static struct rcu_torture_ops rcu_sync_ops = {
304 .completed = rcu_torture_completed, 335 .completed = rcu_torture_completed,
305 .deferredfree = rcu_sync_torture_deferred_free, 336 .deferredfree = rcu_sync_torture_deferred_free,
306 .sync = synchronize_rcu, 337 .sync = synchronize_rcu,
338 .cb_barrier = NULL,
307 .stats = NULL, 339 .stats = NULL,
340 .irqcapable = 1,
308 .name = "rcu_sync" 341 .name = "rcu_sync"
309}; 342};
310 343
@@ -364,7 +397,9 @@ static struct rcu_torture_ops rcu_bh_ops = {
364 .completed = rcu_bh_torture_completed, 397 .completed = rcu_bh_torture_completed,
365 .deferredfree = rcu_bh_torture_deferred_free, 398 .deferredfree = rcu_bh_torture_deferred_free,
366 .sync = rcu_bh_torture_synchronize, 399 .sync = rcu_bh_torture_synchronize,
400 .cb_barrier = rcu_barrier_bh,
367 .stats = NULL, 401 .stats = NULL,
402 .irqcapable = 1,
368 .name = "rcu_bh" 403 .name = "rcu_bh"
369}; 404};
370 405
@@ -377,7 +412,9 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
377 .completed = rcu_bh_torture_completed, 412 .completed = rcu_bh_torture_completed,
378 .deferredfree = rcu_sync_torture_deferred_free, 413 .deferredfree = rcu_sync_torture_deferred_free,
379 .sync = rcu_bh_torture_synchronize, 414 .sync = rcu_bh_torture_synchronize,
415 .cb_barrier = NULL,
380 .stats = NULL, 416 .stats = NULL,
417 .irqcapable = 1,
381 .name = "rcu_bh_sync" 418 .name = "rcu_bh_sync"
382}; 419};
383 420
@@ -458,6 +495,7 @@ static struct rcu_torture_ops srcu_ops = {
458 .completed = srcu_torture_completed, 495 .completed = srcu_torture_completed,
459 .deferredfree = rcu_sync_torture_deferred_free, 496 .deferredfree = rcu_sync_torture_deferred_free,
460 .sync = srcu_torture_synchronize, 497 .sync = srcu_torture_synchronize,
498 .cb_barrier = NULL,
461 .stats = srcu_torture_stats, 499 .stats = srcu_torture_stats,
462 .name = "srcu" 500 .name = "srcu"
463}; 501};
@@ -482,6 +520,11 @@ static int sched_torture_completed(void)
482 return 0; 520 return 0;
483} 521}
484 522
523static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
524{
525 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
526}
527
485static void sched_torture_synchronize(void) 528static void sched_torture_synchronize(void)
486{ 529{
487 synchronize_sched(); 530 synchronize_sched();
@@ -494,12 +537,28 @@ static struct rcu_torture_ops sched_ops = {
494 .readdelay = rcu_read_delay, /* just reuse rcu's version. */ 537 .readdelay = rcu_read_delay, /* just reuse rcu's version. */
495 .readunlock = sched_torture_read_unlock, 538 .readunlock = sched_torture_read_unlock,
496 .completed = sched_torture_completed, 539 .completed = sched_torture_completed,
497 .deferredfree = rcu_sync_torture_deferred_free, 540 .deferredfree = rcu_sched_torture_deferred_free,
498 .sync = sched_torture_synchronize, 541 .sync = sched_torture_synchronize,
542 .cb_barrier = rcu_barrier_sched,
499 .stats = NULL, 543 .stats = NULL,
544 .irqcapable = 1,
500 .name = "sched" 545 .name = "sched"
501}; 546};
502 547
548static struct rcu_torture_ops sched_ops_sync = {
549 .init = rcu_sync_torture_init,
550 .cleanup = NULL,
551 .readlock = sched_torture_read_lock,
552 .readdelay = rcu_read_delay, /* just reuse rcu's version. */
553 .readunlock = sched_torture_read_unlock,
554 .completed = sched_torture_completed,
555 .deferredfree = rcu_sync_torture_deferred_free,
556 .sync = sched_torture_synchronize,
557 .cb_barrier = NULL,
558 .stats = NULL,
559 .name = "sched_sync"
560};
561
503/* 562/*
504 * RCU torture writer kthread. Repeatedly substitutes a new structure 563 * RCU torture writer kthread. Repeatedly substitutes a new structure
505 * for that pointed to by rcu_torture_current, freeing the old structure 564 * for that pointed to by rcu_torture_current, freeing the old structure
@@ -537,6 +596,7 @@ rcu_torture_writer(void *arg)
537 } 596 }
538 rcu_torture_current_version++; 597 rcu_torture_current_version++;
539 oldbatch = cur_ops->completed(); 598 oldbatch = cur_ops->completed();
599 rcu_stutter_wait();
540 } while (!kthread_should_stop() && !fullstop); 600 } while (!kthread_should_stop() && !fullstop);
541 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); 601 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
542 while (!kthread_should_stop()) 602 while (!kthread_should_stop())
@@ -560,6 +620,7 @@ rcu_torture_fakewriter(void *arg)
560 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); 620 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
561 udelay(rcu_random(&rand) & 0x3ff); 621 udelay(rcu_random(&rand) & 0x3ff);
562 cur_ops->sync(); 622 cur_ops->sync();
623 rcu_stutter_wait();
563 } while (!kthread_should_stop() && !fullstop); 624 } while (!kthread_should_stop() && !fullstop);
564 625
565 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); 626 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
@@ -569,6 +630,52 @@ rcu_torture_fakewriter(void *arg)
569} 630}
570 631
571/* 632/*
633 * RCU torture reader from timer handler. Dereferences rcu_torture_current,
634 * incrementing the corresponding element of the pipeline array. The
635 * counter in the element should never be greater than 1, otherwise, the
636 * RCU implementation is broken.
637 */
638static void rcu_torture_timer(unsigned long unused)
639{
640 int idx;
641 int completed;
642 static DEFINE_RCU_RANDOM(rand);
643 static DEFINE_SPINLOCK(rand_lock);
644 struct rcu_torture *p;
645 int pipe_count;
646
647 idx = cur_ops->readlock();
648 completed = cur_ops->completed();
649 p = rcu_dereference(rcu_torture_current);
650 if (p == NULL) {
651 /* Leave because rcu_torture_writer is not yet underway */
652 cur_ops->readunlock(idx);
653 return;
654 }
655 if (p->rtort_mbtest == 0)
656 atomic_inc(&n_rcu_torture_mberror);
657 spin_lock(&rand_lock);
658 cur_ops->readdelay(&rand);
659 n_rcu_torture_timers++;
660 spin_unlock(&rand_lock);
661 preempt_disable();
662 pipe_count = p->rtort_pipe_count;
663 if (pipe_count > RCU_TORTURE_PIPE_LEN) {
664 /* Should not happen, but... */
665 pipe_count = RCU_TORTURE_PIPE_LEN;
666 }
667 ++__get_cpu_var(rcu_torture_count)[pipe_count];
668 completed = cur_ops->completed() - completed;
669 if (completed > RCU_TORTURE_PIPE_LEN) {
670 /* Should not happen, but... */
671 completed = RCU_TORTURE_PIPE_LEN;
672 }
673 ++__get_cpu_var(rcu_torture_batch)[completed];
674 preempt_enable();
675 cur_ops->readunlock(idx);
676}
677
678/*
572 * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current, 679 * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current,
573 * incrementing the corresponding element of the pipeline array. The 680 * incrementing the corresponding element of the pipeline array. The
574 * counter in the element should never be greater than 1, otherwise, the 681 * counter in the element should never be greater than 1, otherwise, the
@@ -582,11 +689,18 @@ rcu_torture_reader(void *arg)
582 DEFINE_RCU_RANDOM(rand); 689 DEFINE_RCU_RANDOM(rand);
583 struct rcu_torture *p; 690 struct rcu_torture *p;
584 int pipe_count; 691 int pipe_count;
692 struct timer_list t;
585 693
586 VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); 694 VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
587 set_user_nice(current, 19); 695 set_user_nice(current, 19);
696 if (irqreader && cur_ops->irqcapable)
697 setup_timer_on_stack(&t, rcu_torture_timer, 0);
588 698
589 do { 699 do {
700 if (irqreader && cur_ops->irqcapable) {
701 if (!timer_pending(&t))
702 mod_timer(&t, 1);
703 }
590 idx = cur_ops->readlock(); 704 idx = cur_ops->readlock();
591 completed = cur_ops->completed(); 705 completed = cur_ops->completed();
592 p = rcu_dereference(rcu_torture_current); 706 p = rcu_dereference(rcu_torture_current);
@@ -615,8 +729,11 @@ rcu_torture_reader(void *arg)
615 preempt_enable(); 729 preempt_enable();
616 cur_ops->readunlock(idx); 730 cur_ops->readunlock(idx);
617 schedule(); 731 schedule();
732 rcu_stutter_wait();
618 } while (!kthread_should_stop() && !fullstop); 733 } while (!kthread_should_stop() && !fullstop);
619 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); 734 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
735 if (irqreader && cur_ops->irqcapable)
736 del_timer_sync(&t);
620 while (!kthread_should_stop()) 737 while (!kthread_should_stop())
621 schedule_timeout_uninterruptible(1); 738 schedule_timeout_uninterruptible(1);
622 return 0; 739 return 0;
@@ -647,20 +764,22 @@ rcu_torture_printk(char *page)
647 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); 764 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
648 cnt += sprintf(&page[cnt], 765 cnt += sprintf(&page[cnt],
649 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " 766 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
650 "rtmbe: %d", 767 "rtmbe: %d nt: %ld",
651 rcu_torture_current, 768 rcu_torture_current,
652 rcu_torture_current_version, 769 rcu_torture_current_version,
653 list_empty(&rcu_torture_freelist), 770 list_empty(&rcu_torture_freelist),
654 atomic_read(&n_rcu_torture_alloc), 771 atomic_read(&n_rcu_torture_alloc),
655 atomic_read(&n_rcu_torture_alloc_fail), 772 atomic_read(&n_rcu_torture_alloc_fail),
656 atomic_read(&n_rcu_torture_free), 773 atomic_read(&n_rcu_torture_free),
657 atomic_read(&n_rcu_torture_mberror)); 774 atomic_read(&n_rcu_torture_mberror),
775 n_rcu_torture_timers);
658 if (atomic_read(&n_rcu_torture_mberror) != 0) 776 if (atomic_read(&n_rcu_torture_mberror) != 0)
659 cnt += sprintf(&page[cnt], " !!!"); 777 cnt += sprintf(&page[cnt], " !!!");
660 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); 778 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
661 if (i > 1) { 779 if (i > 1) {
662 cnt += sprintf(&page[cnt], "!!! "); 780 cnt += sprintf(&page[cnt], "!!! ");
663 atomic_inc(&n_rcu_torture_error); 781 atomic_inc(&n_rcu_torture_error);
782 WARN_ON_ONCE(1);
664 } 783 }
665 cnt += sprintf(&page[cnt], "Reader Pipe: "); 784 cnt += sprintf(&page[cnt], "Reader Pipe: ");
666 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 785 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -785,15 +904,34 @@ rcu_torture_shuffle(void *arg)
785 return 0; 904 return 0;
786} 905}
787 906
907/* Cause the rcutorture test to "stutter", starting and stopping all
908 * threads periodically.
909 */
910static int
911rcu_torture_stutter(void *arg)
912{
913 VERBOSE_PRINTK_STRING("rcu_torture_stutter task started");
914 do {
915 schedule_timeout_interruptible(stutter * HZ);
916 stutter_pause_test = 1;
917 if (!kthread_should_stop())
918 schedule_timeout_interruptible(stutter * HZ);
919 stutter_pause_test = 0;
920 } while (!kthread_should_stop());
921 VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
922 return 0;
923}
924
788static inline void 925static inline void
789rcu_torture_print_module_parms(char *tag) 926rcu_torture_print_module_parms(char *tag)
790{ 927{
791 printk(KERN_ALERT "%s" TORTURE_FLAG 928 printk(KERN_ALERT "%s" TORTURE_FLAG
792 "--- %s: nreaders=%d nfakewriters=%d " 929 "--- %s: nreaders=%d nfakewriters=%d "
793 "stat_interval=%d verbose=%d test_no_idle_hz=%d " 930 "stat_interval=%d verbose=%d test_no_idle_hz=%d "
794 "shuffle_interval = %d\n", 931 "shuffle_interval=%d stutter=%d irqreader=%d\n",
795 torture_type, tag, nrealreaders, nfakewriters, 932 torture_type, tag, nrealreaders, nfakewriters,
796 stat_interval, verbose, test_no_idle_hz, shuffle_interval); 933 stat_interval, verbose, test_no_idle_hz, shuffle_interval,
934 stutter, irqreader);
797} 935}
798 936
799static void 937static void
@@ -802,6 +940,11 @@ rcu_torture_cleanup(void)
802 int i; 940 int i;
803 941
804 fullstop = 1; 942 fullstop = 1;
943 if (stutter_task) {
944 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
945 kthread_stop(stutter_task);
946 }
947 stutter_task = NULL;
805 if (shuffler_task) { 948 if (shuffler_task) {
806 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); 949 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
807 kthread_stop(shuffler_task); 950 kthread_stop(shuffler_task);
@@ -848,7 +991,9 @@ rcu_torture_cleanup(void)
848 stats_task = NULL; 991 stats_task = NULL;
849 992
850 /* Wait for all RCU callbacks to fire. */ 993 /* Wait for all RCU callbacks to fire. */
851 rcu_barrier(); 994
995 if (cur_ops->cb_barrier != NULL)
996 cur_ops->cb_barrier();
852 997
853 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ 998 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
854 999
@@ -868,7 +1013,7 @@ rcu_torture_init(void)
868 int firsterr = 0; 1013 int firsterr = 0;
869 static struct rcu_torture_ops *torture_ops[] = 1014 static struct rcu_torture_ops *torture_ops[] =
870 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, 1015 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
871 &srcu_ops, &sched_ops, }; 1016 &srcu_ops, &sched_ops, &sched_ops_sync, };
872 1017
873 /* Process args and tell the world that the torturer is on the job. */ 1018 /* Process args and tell the world that the torturer is on the job. */
874 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 1019 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -988,6 +1133,19 @@ rcu_torture_init(void)
988 goto unwind; 1133 goto unwind;
989 } 1134 }
990 } 1135 }
1136 if (stutter < 0)
1137 stutter = 0;
1138 if (stutter) {
1139 /* Create the stutter thread */
1140 stutter_task = kthread_run(rcu_torture_stutter, NULL,
1141 "rcu_torture_stutter");
1142 if (IS_ERR(stutter_task)) {
1143 firsterr = PTR_ERR(stutter_task);
1144 VERBOSE_PRINTK_ERRSTRING("Failed to create stutter");
1145 stutter_task = NULL;
1146 goto unwind;
1147 }
1148 }
991 return 0; 1149 return 0;
992 1150
993unwind: 1151unwind:
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 092e4c620af9..a56f629b057a 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -297,8 +297,8 @@ static int test_func(void *data)
297 * 297 *
298 * opcode:data 298 * opcode:data
299 */ 299 */
300static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf, 300static ssize_t sysfs_test_command(struct sys_device *dev, struct sysdev_attribute *attr,
301 size_t count) 301 const char *buf, size_t count)
302{ 302{
303 struct sched_param schedpar; 303 struct sched_param schedpar;
304 struct test_thread_data *td; 304 struct test_thread_data *td;
@@ -360,7 +360,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
360 * @dev: thread to query 360 * @dev: thread to query
361 * @buf: char buffer to be filled with thread status info 361 * @buf: char buffer to be filled with thread status info
362 */ 362 */
363static ssize_t sysfs_test_status(struct sys_device *dev, char *buf) 363static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute *attr,
364 char *buf)
364{ 365{
365 struct test_thread_data *td; 366 struct test_thread_data *td;
366 struct task_struct *tsk; 367 struct task_struct *tsk;
diff --git a/kernel/sched.c b/kernel/sched.c
index 99e6d850ecab..b1104ea5d255 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7737,11 +7737,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
7737} 7737}
7738 7738
7739#ifdef CONFIG_SCHED_MC 7739#ifdef CONFIG_SCHED_MC
7740static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) 7740static ssize_t sched_mc_power_savings_show(struct sys_device *dev,
7741 struct sysdev_attribute *attr, char *page)
7741{ 7742{
7742 return sprintf(page, "%u\n", sched_mc_power_savings); 7743 return sprintf(page, "%u\n", sched_mc_power_savings);
7743} 7744}
7744static ssize_t sched_mc_power_savings_store(struct sys_device *dev, 7745static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
7746 struct sysdev_attribute *attr,
7745 const char *buf, size_t count) 7747 const char *buf, size_t count)
7746{ 7748{
7747 return sched_power_savings_store(buf, count, 0); 7749 return sched_power_savings_store(buf, count, 0);
@@ -7751,11 +7753,13 @@ static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
7751#endif 7753#endif
7752 7754
7753#ifdef CONFIG_SCHED_SMT 7755#ifdef CONFIG_SCHED_SMT
7754static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page) 7756static ssize_t sched_smt_power_savings_show(struct sys_device *dev,
7757 struct sysdev_attribute *attr, char *page)
7755{ 7758{
7756 return sprintf(page, "%u\n", sched_smt_power_savings); 7759 return sprintf(page, "%u\n", sched_smt_power_savings);
7757} 7760}
7758static ssize_t sched_smt_power_savings_store(struct sys_device *dev, 7761static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
7762 struct sysdev_attribute *attr,
7759 const char *buf, size_t count) 7763 const char *buf, size_t count)
7760{ 7764{
7761 return sched_power_savings_store(buf, count, 1); 7765 return sched_power_savings_store(buf, count, 1);
diff --git a/kernel/smp.c b/kernel/smp.c
new file mode 100644
index 000000000000..462c785ca1ee
--- /dev/null
+++ b/kernel/smp.c
@@ -0,0 +1,383 @@
1/*
2 * Generic helpers for smp ipi calls
3 *
4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
5 *
6 */
7#include <linux/init.h>
8#include <linux/module.h>
9#include <linux/percpu.h>
10#include <linux/rcupdate.h>
11#include <linux/rculist.h>
12#include <linux/smp.h>
13
14static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
15static LIST_HEAD(call_function_queue);
16__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
17
18enum {
19 CSD_FLAG_WAIT = 0x01,
20 CSD_FLAG_ALLOC = 0x02,
21};
22
23struct call_function_data {
24 struct call_single_data csd;
25 spinlock_t lock;
26 unsigned int refs;
27 cpumask_t cpumask;
28 struct rcu_head rcu_head;
29};
30
31struct call_single_queue {
32 struct list_head list;
33 spinlock_t lock;
34};
35
36void __cpuinit init_call_single_data(void)
37{
38 int i;
39
40 for_each_possible_cpu(i) {
41 struct call_single_queue *q = &per_cpu(call_single_queue, i);
42
43 spin_lock_init(&q->lock);
44 INIT_LIST_HEAD(&q->list);
45 }
46}
47
48static void csd_flag_wait(struct call_single_data *data)
49{
50 /* Wait for response */
51 do {
52 /*
53 * We need to see the flags store in the IPI handler
54 */
55 smp_mb();
56 if (!(data->flags & CSD_FLAG_WAIT))
57 break;
58 cpu_relax();
59 } while (1);
60}
61
62/*
63 * Insert a previously allocated call_single_data element for execution
64 * on the given CPU. data must already have ->func, ->info, and ->flags set.
65 */
66static void generic_exec_single(int cpu, struct call_single_data *data)
67{
68 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
69 int wait = data->flags & CSD_FLAG_WAIT, ipi;
70 unsigned long flags;
71
72 spin_lock_irqsave(&dst->lock, flags);
73 ipi = list_empty(&dst->list);
74 list_add_tail(&data->list, &dst->list);
75 spin_unlock_irqrestore(&dst->lock, flags);
76
77 if (ipi)
78 arch_send_call_function_single_ipi(cpu);
79
80 if (wait)
81 csd_flag_wait(data);
82}
83
84static void rcu_free_call_data(struct rcu_head *head)
85{
86 struct call_function_data *data;
87
88 data = container_of(head, struct call_function_data, rcu_head);
89
90 kfree(data);
91}
92
93/*
94 * Invoked by arch to handle an IPI for call function. Must be called with
95 * interrupts disabled.
96 */
97void generic_smp_call_function_interrupt(void)
98{
99 struct call_function_data *data;
100 int cpu = get_cpu();
101
102 /*
103 * It's ok to use list_for_each_rcu() here even though we may delete
104 * 'pos', since list_del_rcu() doesn't clear ->next
105 */
106 rcu_read_lock();
107 list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
108 int refs;
109
110 if (!cpu_isset(cpu, data->cpumask))
111 continue;
112
113 data->csd.func(data->csd.info);
114
115 spin_lock(&data->lock);
116 cpu_clear(cpu, data->cpumask);
117 WARN_ON(data->refs == 0);
118 data->refs--;
119 refs = data->refs;
120 spin_unlock(&data->lock);
121
122 if (refs)
123 continue;
124
125 spin_lock(&call_function_lock);
126 list_del_rcu(&data->csd.list);
127 spin_unlock(&call_function_lock);
128
129 if (data->csd.flags & CSD_FLAG_WAIT) {
130 /*
131 * serialize stores to data with the flag clear
132 * and wakeup
133 */
134 smp_wmb();
135 data->csd.flags &= ~CSD_FLAG_WAIT;
136 } else
137 call_rcu(&data->rcu_head, rcu_free_call_data);
138 }
139 rcu_read_unlock();
140
141 put_cpu();
142}
143
144/*
145 * Invoked by arch to handle an IPI for call function single. Must be called
146 * from the arch with interrupts disabled.
147 */
148void generic_smp_call_function_single_interrupt(void)
149{
150 struct call_single_queue *q = &__get_cpu_var(call_single_queue);
151 LIST_HEAD(list);
152
153 /*
154 * Need to see other stores to list head for checking whether
155 * list is empty without holding q->lock
156 */
157 smp_mb();
158 while (!list_empty(&q->list)) {
159 unsigned int data_flags;
160
161 spin_lock(&q->lock);
162 list_replace_init(&q->list, &list);
163 spin_unlock(&q->lock);
164
165 while (!list_empty(&list)) {
166 struct call_single_data *data;
167
168 data = list_entry(list.next, struct call_single_data,
169 list);
170 list_del(&data->list);
171
172 /*
173 * 'data' can be invalid after this call if
174 * flags == 0 (when called through
175 * generic_exec_single(), so save them away before
176 * making the call.
177 */
178 data_flags = data->flags;
179
180 data->func(data->info);
181
182 if (data_flags & CSD_FLAG_WAIT) {
183 smp_wmb();
184 data->flags &= ~CSD_FLAG_WAIT;
185 } else if (data_flags & CSD_FLAG_ALLOC)
186 kfree(data);
187 }
188 /*
189 * See comment on outer loop
190 */
191 smp_mb();
192 }
193}
194
195/*
196 * smp_call_function_single - Run a function on a specific CPU
197 * @func: The function to run. This must be fast and non-blocking.
198 * @info: An arbitrary pointer to pass to the function.
199 * @wait: If true, wait until function has completed on other CPUs.
200 *
201 * Returns 0 on success, else a negative status code. Note that @wait
202 * will be implicitly turned on in case of allocation failures, since
203 * we fall back to on-stack allocation.
204 */
205int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
206 int wait)
207{
208 struct call_single_data d;
209 unsigned long flags;
210 /* prevent preemption and reschedule on another processor */
211 int me = get_cpu();
212
213 /* Can deadlock when called with interrupts disabled */
214 WARN_ON(irqs_disabled());
215
216 if (cpu == me) {
217 local_irq_save(flags);
218 func(info);
219 local_irq_restore(flags);
220 } else {
221 struct call_single_data *data = NULL;
222
223 if (!wait) {
224 data = kmalloc(sizeof(*data), GFP_ATOMIC);
225 if (data)
226 data->flags = CSD_FLAG_ALLOC;
227 }
228 if (!data) {
229 data = &d;
230 data->flags = CSD_FLAG_WAIT;
231 }
232
233 data->func = func;
234 data->info = info;
235 generic_exec_single(cpu, data);
236 }
237
238 put_cpu();
239 return 0;
240}
241EXPORT_SYMBOL(smp_call_function_single);
242
243/**
244 * __smp_call_function_single(): Run a function on another CPU
245 * @cpu: The CPU to run on.
246 * @data: Pre-allocated and setup data structure
247 *
248 * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
249 * data structure. Useful for embedding @data inside other structures, for
250 * instance.
251 *
252 */
253void __smp_call_function_single(int cpu, struct call_single_data *data)
254{
255 /* Can deadlock when called with interrupts disabled */
256 WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
257
258 generic_exec_single(cpu, data);
259}
260
261/**
262 * smp_call_function_mask(): Run a function on a set of other CPUs.
263 * @mask: The set of cpus to run on.
264 * @func: The function to run. This must be fast and non-blocking.
265 * @info: An arbitrary pointer to pass to the function.
266 * @wait: If true, wait (atomically) until function has completed on other CPUs.
267 *
268 * Returns 0 on success, else a negative status code.
269 *
270 * If @wait is true, then returns once @func has returned. Note that @wait
271 * will be implicitly turned on in case of allocation failures, since
272 * we fall back to on-stack allocation.
273 *
274 * You must not call this function with disabled interrupts or from a
275 * hardware interrupt handler or from a bottom half handler. Preemption
276 * must be disabled when calling this function.
277 */
278int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
279 int wait)
280{
281 struct call_function_data d;
282 struct call_function_data *data = NULL;
283 cpumask_t allbutself;
284 unsigned long flags;
285 int cpu, num_cpus;
286
287 /* Can deadlock when called with interrupts disabled */
288 WARN_ON(irqs_disabled());
289
290 cpu = smp_processor_id();
291 allbutself = cpu_online_map;
292 cpu_clear(cpu, allbutself);
293 cpus_and(mask, mask, allbutself);
294 num_cpus = cpus_weight(mask);
295
296 /*
297 * If zero CPUs, return. If just a single CPU, turn this request
298 * into a targetted single call instead since it's faster.
299 */
300 if (!num_cpus)
301 return 0;
302 else if (num_cpus == 1) {
303 cpu = first_cpu(mask);
304 return smp_call_function_single(cpu, func, info, wait);
305 }
306
307 if (!wait) {
308 data = kmalloc(sizeof(*data), GFP_ATOMIC);
309 if (data)
310 data->csd.flags = CSD_FLAG_ALLOC;
311 }
312 if (!data) {
313 data = &d;
314 data->csd.flags = CSD_FLAG_WAIT;
315 wait = 1;
316 }
317
318 spin_lock_init(&data->lock);
319 data->csd.func = func;
320 data->csd.info = info;
321 data->refs = num_cpus;
322 data->cpumask = mask;
323
324 spin_lock_irqsave(&call_function_lock, flags);
325 list_add_tail_rcu(&data->csd.list, &call_function_queue);
326 spin_unlock_irqrestore(&call_function_lock, flags);
327
328 /* Send a message to all CPUs in the map */
329 arch_send_call_function_ipi(mask);
330
331 /* optionally wait for the CPUs to complete */
332 if (wait)
333 csd_flag_wait(&data->csd);
334
335 return 0;
336}
337EXPORT_SYMBOL(smp_call_function_mask);
338
339/**
340 * smp_call_function(): Run a function on all other CPUs.
341 * @func: The function to run. This must be fast and non-blocking.
342 * @info: An arbitrary pointer to pass to the function.
343 * @wait: If true, wait (atomically) until function has completed on other CPUs.
344 *
345 * Returns 0 on success, else a negative status code.
346 *
347 * If @wait is true, then returns once @func has returned; otherwise
348 * it returns just before the target cpu calls @func. In case of allocation
349 * failure, @wait will be implicitly turned on.
350 *
351 * You must not call this function with disabled interrupts or from a
352 * hardware interrupt handler or from a bottom half handler.
353 */
354int smp_call_function(void (*func)(void *), void *info, int wait)
355{
356 int ret;
357
358 preempt_disable();
359 ret = smp_call_function_mask(cpu_online_map, func, info, wait);
360 preempt_enable();
361 return ret;
362}
363EXPORT_SYMBOL(smp_call_function);
364
365void ipi_call_lock(void)
366{
367 spin_lock(&call_function_lock);
368}
369
370void ipi_call_unlock(void)
371{
372 spin_unlock(&call_function_lock);
373}
374
375void ipi_call_lock_irq(void)
376{
377 spin_lock_irq(&call_function_lock);
378}
379
380void ipi_call_unlock_irq(void)
381{
382 spin_unlock_irq(&call_function_lock);
383}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3e9e896fdc5b..81e2fe0f983a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -645,12 +645,12 @@ __init int spawn_ksoftirqd(void)
645/* 645/*
646 * Call a function on all processors 646 * Call a function on all processors
647 */ 647 */
648int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait) 648int on_each_cpu(void (*func) (void *info), void *info, int wait)
649{ 649{
650 int ret = 0; 650 int ret = 0;
651 651
652 preempt_disable(); 652 preempt_disable();
653 ret = smp_call_function(func, info, retry, wait); 653 ret = smp_call_function(func, info, wait);
654 local_irq_disable(); 654 local_irq_disable();
655 func(info); 655 func(info);
656 local_irq_enable(); 656 local_irq_enable();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5b9b467de070..0fea0ee12da9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -59,6 +59,7 @@ cond_syscall(sys_epoll_create);
59cond_syscall(sys_epoll_ctl); 59cond_syscall(sys_epoll_ctl);
60cond_syscall(sys_epoll_wait); 60cond_syscall(sys_epoll_wait);
61cond_syscall(sys_epoll_pwait); 61cond_syscall(sys_epoll_pwait);
62cond_syscall(compat_sys_epoll_pwait);
62cond_syscall(sys_semget); 63cond_syscall(sys_semget);
63cond_syscall(sys_semop); 64cond_syscall(sys_semop);
64cond_syscall(sys_semtimedop); 65cond_syscall(sys_semtimedop);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ab59ac008caf..2a7b9d88706b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -83,6 +83,9 @@ extern int maps_protect;
83extern int sysctl_stat_interval; 83extern int sysctl_stat_interval;
84extern int latencytop_enabled; 84extern int latencytop_enabled;
85extern int sysctl_nr_open_min, sysctl_nr_open_max; 85extern int sysctl_nr_open_min, sysctl_nr_open_max;
86#ifdef CONFIG_RCU_TORTURE_TEST
87extern int rcutorture_runnable;
88#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
86 89
87/* Constants used for minimum and maximum */ 90/* Constants used for minimum and maximum */
88#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) 91#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
@@ -108,7 +111,7 @@ static int min_percpu_pagelist_fract = 8;
108 111
109static int ngroups_max = NGROUPS_MAX; 112static int ngroups_max = NGROUPS_MAX;
110 113
111#ifdef CONFIG_KMOD 114#ifdef CONFIG_MODULES
112extern char modprobe_path[]; 115extern char modprobe_path[];
113#endif 116#endif
114#ifdef CONFIG_CHR_DEV_SG 117#ifdef CONFIG_CHR_DEV_SG
@@ -473,7 +476,7 @@ static struct ctl_table kern_table[] = {
473 .proc_handler = &ftrace_enable_sysctl, 476 .proc_handler = &ftrace_enable_sysctl,
474 }, 477 },
475#endif 478#endif
476#ifdef CONFIG_KMOD 479#ifdef CONFIG_MODULES
477 { 480 {
478 .ctl_name = KERN_MODPROBE, 481 .ctl_name = KERN_MODPROBE,
479 .procname = "modprobe", 482 .procname = "modprobe",
@@ -832,6 +835,16 @@ static struct ctl_table kern_table[] = {
832 .child = key_sysctls, 835 .child = key_sysctls,
833 }, 836 },
834#endif 837#endif
838#ifdef CONFIG_RCU_TORTURE_TEST
839 {
840 .ctl_name = CTL_UNNUMBERED,
841 .procname = "rcutorture_runnable",
842 .data = &rcutorture_runnable,
843 .maxlen = sizeof(int),
844 .mode = 0644,
845 .proc_handler = &proc_dointvec,
846 },
847#endif
835/* 848/*
836 * NOTE: do not add new entries to this table unless you have read 849 * NOTE: do not add new entries to this table unless you have read
837 * Documentation/sysctl/ctl_unnumbered.txt 850 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index dadde5361f32..b1c2da81b050 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -376,7 +376,8 @@ void clocksource_unregister(struct clocksource *cs)
376 * Provides sysfs interface for listing current clocksource. 376 * Provides sysfs interface for listing current clocksource.
377 */ 377 */
378static ssize_t 378static ssize_t
379sysfs_show_current_clocksources(struct sys_device *dev, char *buf) 379sysfs_show_current_clocksources(struct sys_device *dev,
380 struct sysdev_attribute *attr, char *buf)
380{ 381{
381 ssize_t count = 0; 382 ssize_t count = 0;
382 383
@@ -397,6 +398,7 @@ sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
397 * clocksource selction. 398 * clocksource selction.
398 */ 399 */
399static ssize_t sysfs_override_clocksource(struct sys_device *dev, 400static ssize_t sysfs_override_clocksource(struct sys_device *dev,
401 struct sysdev_attribute *attr,
400 const char *buf, size_t count) 402 const char *buf, size_t count)
401{ 403{
402 struct clocksource *ovr = NULL; 404 struct clocksource *ovr = NULL;
@@ -449,7 +451,9 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
449 * Provides sysfs interface for listing registered clocksources 451 * Provides sysfs interface for listing registered clocksources
450 */ 452 */
451static ssize_t 453static ssize_t
452sysfs_show_available_clocksources(struct sys_device *dev, char *buf) 454sysfs_show_available_clocksources(struct sys_device *dev,
455 struct sysdev_attribute *attr,
456 char *buf)
453{ 457{
454 struct clocksource *src; 458 struct clocksource *src;
455 ssize_t count = 0; 459 ssize_t count = 0;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 67f80c261709..f48d0f09d32f 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -268,7 +268,7 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
268 "offline CPU #%d\n", *oncpu); 268 "offline CPU #%d\n", *oncpu);
269 else 269 else
270 smp_call_function_single(*oncpu, tick_do_broadcast_on_off, 270 smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
271 &reason, 1, 1); 271 &reason, 1);
272} 272}
273 273
274/* 274/*