aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks4
-rw-r--r--kernel/Kconfig.preempt1
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/cgroup.c29
-rw-r--r--kernel/compat.c68
-rw-r--r--kernel/cpuset.c74
-rw-r--r--kernel/cred.c3
-rw-r--r--kernel/debug/debug_core.c87
-rw-r--r--kernel/debug/gdbstub.c10
-rw-r--r--kernel/debug/kdb/kdb_bp.c7
-rw-r--r--kernel/debug/kdb/kdb_bt.c1
-rw-r--r--kernel/debug/kdb/kdb_io.c4
-rw-r--r--kernel/debug/kdb/kdb_keyboard.c95
-rw-r--r--kernel/debug/kdb/kdb_main.c3
-rw-r--r--kernel/debug/kdb/kdb_private.h7
-rw-r--r--kernel/debug/kdb/kdb_support.c4
-rw-r--r--kernel/dma.c1
-rw-r--r--kernel/events/core.c11
-rw-r--r--kernel/exit.c47
-rw-r--r--kernel/fork.c28
-rw-r--r--kernel/freezer.c6
-rw-r--r--kernel/futex.c38
-rw-r--r--kernel/futex_compat.c38
-rw-r--r--kernel/irq/Kconfig15
-rw-r--r--kernel/irq/handle.c16
-rw-r--r--kernel/irq/irqdomain.c815
-rw-r--r--kernel/irq/manage.c19
-rw-r--r--kernel/irq/migration.c10
-rw-r--r--kernel/irq_work.c2
-rw-r--r--kernel/itimer.c8
-rw-r--r--kernel/kexec.c15
-rw-r--r--kernel/kmod.c201
-rw-r--r--kernel/module.c37
-rw-r--r--kernel/padata.c57
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/params.c40
-rw-r--r--kernel/pid_namespace.c41
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/hibernate.c59
-rw-r--r--kernel/power/main.c20
-rw-r--r--kernel/power/power.h7
-rw-r--r--kernel/power/process.c32
-rw-r--r--kernel/power/qos.c73
-rw-r--r--kernel/power/snapshot.c35
-rw-r--r--kernel/power/suspend.c91
-rw-r--r--kernel/power/user.c22
-rw-r--r--kernel/ptrace.c66
-rw-r--r--kernel/resource.c3
-rw-r--r--kernel/rwsem.c1
-rw-r--r--kernel/sched/core.c73
-rw-r--r--kernel/sched/fair.c16
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/signal.c19
-rw-r--r--kernel/smp.c90
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/sys.c17
-rw-r--r--kernel/sysctl.c522
-rw-r--r--kernel/sysctl_check.c160
-rw-r--r--kernel/time.c6
-rw-r--r--kernel/time/Kconfig4
-rw-r--r--kernel/time/alarmtimer.c8
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/ntp.c134
-rw-r--r--kernel/time/tick-broadcast.c4
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/time/timekeeping.c53
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/blktrace.c18
-rw-r--r--kernel/trace/ftrace.c3
-rw-r--r--kernel/trace/ring_buffer.c157
-rw-r--r--kernel/trace/trace.c113
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--kernel/watchdog.c27
78 files changed, 2083 insertions, 1638 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 5068e2a4e75f..2251882daf53 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -124,8 +124,8 @@ config INLINE_SPIN_LOCK_IRQSAVE
124 def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ 124 def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
125 ARCH_INLINE_SPIN_LOCK_IRQSAVE 125 ARCH_INLINE_SPIN_LOCK_IRQSAVE
126 126
127config INLINE_SPIN_UNLOCK 127config UNINLINE_SPIN_UNLOCK
128 def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK) 128 bool
129 129
130config INLINE_SPIN_UNLOCK_BH 130config INLINE_SPIN_UNLOCK_BH
131 def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH 131 def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 24e7cb0ba26a..3f9c97419f02 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY
36config PREEMPT 36config PREEMPT
37 bool "Preemptible Kernel (Low-Latency Desktop)" 37 bool "Preemptible Kernel (Low-Latency Desktop)"
38 select PREEMPT_COUNT 38 select PREEMPT_COUNT
39 select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
39 help 40 help
40 This option reduces the latency of the kernel by making 41 This option reduces the latency of the kernel by making
41 all kernel code (that is not executing in a critical section) 42 all kernel code (that is not executing in a critical section)
diff --git a/kernel/Makefile b/kernel/Makefile
index 2d9de86b7e76..cb41b9547c9f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -27,7 +27,6 @@ obj-y += power/
27 27
28obj-$(CONFIG_FREEZER) += freezer.o 28obj-$(CONFIG_FREEZER) += freezer.o
29obj-$(CONFIG_PROFILING) += profile.o 29obj-$(CONFIG_PROFILING) += profile.o
30obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
31obj-$(CONFIG_STACKTRACE) += stacktrace.o 30obj-$(CONFIG_STACKTRACE) += stacktrace.o
32obj-y += time/ 31obj-y += time/
33obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 32obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
diff --git a/kernel/audit.c b/kernel/audit.c
index bb0eb5bb9a0a..1c7f2c61416b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1418,7 +1418,7 @@ void audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
1418 1418
1419/* This is a helper-function to print the escaped d_path */ 1419/* This is a helper-function to print the escaped d_path */
1420void audit_log_d_path(struct audit_buffer *ab, const char *prefix, 1420void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
1421 struct path *path) 1421 const struct path *path)
1422{ 1422{
1423 char *p, *pathname; 1423 char *p, *pathname;
1424 1424
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c6877fe9a831..ed64ccac67c9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1472,7 +1472,6 @@ static int cgroup_get_rootdir(struct super_block *sb)
1472 1472
1473 struct inode *inode = 1473 struct inode *inode =
1474 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); 1474 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1475 struct dentry *dentry;
1476 1475
1477 if (!inode) 1476 if (!inode)
1478 return -ENOMEM; 1477 return -ENOMEM;
@@ -1481,12 +1480,9 @@ static int cgroup_get_rootdir(struct super_block *sb)
1481 inode->i_op = &cgroup_dir_inode_operations; 1480 inode->i_op = &cgroup_dir_inode_operations;
1482 /* directories start off with i_nlink == 2 (for "." entry) */ 1481 /* directories start off with i_nlink == 2 (for "." entry) */
1483 inc_nlink(inode); 1482 inc_nlink(inode);
1484 dentry = d_alloc_root(inode); 1483 sb->s_root = d_make_root(inode);
1485 if (!dentry) { 1484 if (!sb->s_root)
1486 iput(inode);
1487 return -ENOMEM; 1485 return -ENOMEM;
1488 }
1489 sb->s_root = dentry;
1490 /* for everything else we want ->d_op set */ 1486 /* for everything else we want ->d_op set */
1491 sb->s_d_op = &cgroup_dops; 1487 sb->s_d_op = &cgroup_dops;
1492 return 0; 1488 return 0;
@@ -1887,7 +1883,7 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1887 */ 1883 */
1888int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1884int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1889{ 1885{
1890 int retval; 1886 int retval = 0;
1891 struct cgroup_subsys *ss, *failed_ss = NULL; 1887 struct cgroup_subsys *ss, *failed_ss = NULL;
1892 struct cgroup *oldcgrp; 1888 struct cgroup *oldcgrp;
1893 struct cgroupfs_root *root = cgrp->root; 1889 struct cgroupfs_root *root = cgrp->root;
@@ -4885,9 +4881,9 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4885 4881
4886 rcu_assign_pointer(id->css, NULL); 4882 rcu_assign_pointer(id->css, NULL);
4887 rcu_assign_pointer(css->id, NULL); 4883 rcu_assign_pointer(css->id, NULL);
4888 write_lock(&ss->id_lock); 4884 spin_lock(&ss->id_lock);
4889 idr_remove(&ss->idr, id->id); 4885 idr_remove(&ss->idr, id->id);
4890 write_unlock(&ss->id_lock); 4886 spin_unlock(&ss->id_lock);
4891 kfree_rcu(id, rcu_head); 4887 kfree_rcu(id, rcu_head);
4892} 4888}
4893EXPORT_SYMBOL_GPL(free_css_id); 4889EXPORT_SYMBOL_GPL(free_css_id);
@@ -4913,10 +4909,10 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4913 error = -ENOMEM; 4909 error = -ENOMEM;
4914 goto err_out; 4910 goto err_out;
4915 } 4911 }
4916 write_lock(&ss->id_lock); 4912 spin_lock(&ss->id_lock);
4917 /* Don't use 0. allocates an ID of 1-65535 */ 4913 /* Don't use 0. allocates an ID of 1-65535 */
4918 error = idr_get_new_above(&ss->idr, newid, 1, &myid); 4914 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4919 write_unlock(&ss->id_lock); 4915 spin_unlock(&ss->id_lock);
4920 4916
4921 /* Returns error when there are no free spaces for new ID.*/ 4917 /* Returns error when there are no free spaces for new ID.*/
4922 if (error) { 4918 if (error) {
@@ -4931,9 +4927,9 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4931 return newid; 4927 return newid;
4932remove_idr: 4928remove_idr:
4933 error = -ENOSPC; 4929 error = -ENOSPC;
4934 write_lock(&ss->id_lock); 4930 spin_lock(&ss->id_lock);
4935 idr_remove(&ss->idr, myid); 4931 idr_remove(&ss->idr, myid);
4936 write_unlock(&ss->id_lock); 4932 spin_unlock(&ss->id_lock);
4937err_out: 4933err_out:
4938 kfree(newid); 4934 kfree(newid);
4939 return ERR_PTR(error); 4935 return ERR_PTR(error);
@@ -4945,7 +4941,7 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4945{ 4941{
4946 struct css_id *newid; 4942 struct css_id *newid;
4947 4943
4948 rwlock_init(&ss->id_lock); 4944 spin_lock_init(&ss->id_lock);
4949 idr_init(&ss->idr); 4945 idr_init(&ss->idr);
4950 4946
4951 newid = get_new_cssid(ss, 0); 4947 newid = get_new_cssid(ss, 0);
@@ -5033,6 +5029,8 @@ css_get_next(struct cgroup_subsys *ss, int id,
5033 return NULL; 5029 return NULL;
5034 5030
5035 BUG_ON(!ss->use_id); 5031 BUG_ON(!ss->use_id);
5032 WARN_ON_ONCE(!rcu_read_lock_held());
5033
5036 /* fill start point for scan */ 5034 /* fill start point for scan */
5037 tmpid = id; 5035 tmpid = id;
5038 while (1) { 5036 while (1) {
@@ -5040,10 +5038,7 @@ css_get_next(struct cgroup_subsys *ss, int id,
5040 * scan next entry from bitmap(tree), tmpid is updated after 5038 * scan next entry from bitmap(tree), tmpid is updated after
5041 * idr_get_next(). 5039 * idr_get_next().
5042 */ 5040 */
5043 read_lock(&ss->id_lock);
5044 tmp = idr_get_next(&ss->idr, &tmpid); 5041 tmp = idr_get_next(&ss->idr, &tmpid);
5045 read_unlock(&ss->id_lock);
5046
5047 if (!tmp) 5042 if (!tmp)
5048 break; 5043 break;
5049 if (tmp->depth >= depth && tmp->stack[depth] == rootid) { 5044 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
diff --git a/kernel/compat.c b/kernel/compat.c
index f346cedfe24d..74ff8498809a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -31,11 +31,10 @@
31#include <asm/uaccess.h> 31#include <asm/uaccess.h>
32 32
33/* 33/*
34 * Note that the native side is already converted to a timespec, because 34 * Get/set struct timeval with struct timespec on the native side
35 * that's what we want anyway.
36 */ 35 */
37static int compat_get_timeval(struct timespec *o, 36static int compat_get_timeval_convert(struct timespec *o,
38 struct compat_timeval __user *i) 37 struct compat_timeval __user *i)
39{ 38{
40 long usec; 39 long usec;
41 40
@@ -46,8 +45,8 @@ static int compat_get_timeval(struct timespec *o,
46 return 0; 45 return 0;
47} 46}
48 47
49static int compat_put_timeval(struct compat_timeval __user *o, 48static int compat_put_timeval_convert(struct compat_timeval __user *o,
50 struct timeval *i) 49 struct timeval *i)
51{ 50{
52 return (put_user(i->tv_sec, &o->tv_sec) || 51 return (put_user(i->tv_sec, &o->tv_sec) ||
53 put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; 52 put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
@@ -117,7 +116,7 @@ asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
117 if (tv) { 116 if (tv) {
118 struct timeval ktv; 117 struct timeval ktv;
119 do_gettimeofday(&ktv); 118 do_gettimeofday(&ktv);
120 if (compat_put_timeval(tv, &ktv)) 119 if (compat_put_timeval_convert(tv, &ktv))
121 return -EFAULT; 120 return -EFAULT;
122 } 121 }
123 if (tz) { 122 if (tz) {
@@ -135,7 +134,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
135 struct timezone ktz; 134 struct timezone ktz;
136 135
137 if (tv) { 136 if (tv) {
138 if (compat_get_timeval(&kts, tv)) 137 if (compat_get_timeval_convert(&kts, tv))
139 return -EFAULT; 138 return -EFAULT;
140 } 139 }
141 if (tz) { 140 if (tz) {
@@ -146,12 +145,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
146 return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); 145 return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
147} 146}
148 147
148int get_compat_timeval(struct timeval *tv, const struct compat_timeval __user *ctv)
149{
150 return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) ||
151 __get_user(tv->tv_sec, &ctv->tv_sec) ||
152 __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
153}
154EXPORT_SYMBOL_GPL(get_compat_timeval);
155
156int put_compat_timeval(const struct timeval *tv, struct compat_timeval __user *ctv)
157{
158 return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) ||
159 __put_user(tv->tv_sec, &ctv->tv_sec) ||
160 __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
161}
162EXPORT_SYMBOL_GPL(put_compat_timeval);
163
149int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) 164int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
150{ 165{
151 return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || 166 return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
152 __get_user(ts->tv_sec, &cts->tv_sec) || 167 __get_user(ts->tv_sec, &cts->tv_sec) ||
153 __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; 168 __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
154} 169}
170EXPORT_SYMBOL_GPL(get_compat_timespec);
155 171
156int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts) 172int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts)
157{ 173{
@@ -161,6 +177,42 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user
161} 177}
162EXPORT_SYMBOL_GPL(put_compat_timespec); 178EXPORT_SYMBOL_GPL(put_compat_timespec);
163 179
180int compat_get_timeval(struct timeval *tv, const void __user *utv)
181{
182 if (COMPAT_USE_64BIT_TIME)
183 return copy_from_user(tv, utv, sizeof *tv) ? -EFAULT : 0;
184 else
185 return get_compat_timeval(tv, utv);
186}
187EXPORT_SYMBOL_GPL(compat_get_timeval);
188
189int compat_put_timeval(const struct timeval *tv, void __user *utv)
190{
191 if (COMPAT_USE_64BIT_TIME)
192 return copy_to_user(utv, tv, sizeof *tv) ? -EFAULT : 0;
193 else
194 return put_compat_timeval(tv, utv);
195}
196EXPORT_SYMBOL_GPL(compat_put_timeval);
197
198int compat_get_timespec(struct timespec *ts, const void __user *uts)
199{
200 if (COMPAT_USE_64BIT_TIME)
201 return copy_from_user(ts, uts, sizeof *ts) ? -EFAULT : 0;
202 else
203 return get_compat_timespec(ts, uts);
204}
205EXPORT_SYMBOL_GPL(compat_get_timespec);
206
207int compat_put_timespec(const struct timespec *ts, void __user *uts)
208{
209 if (COMPAT_USE_64BIT_TIME)
210 return copy_to_user(uts, ts, sizeof *ts) ? -EFAULT : 0;
211 else
212 return put_compat_timespec(ts, uts);
213}
214EXPORT_SYMBOL_GPL(compat_put_timespec);
215
164static long compat_nanosleep_restart(struct restart_block *restart) 216static long compat_nanosleep_restart(struct restart_block *restart)
165{ 217{
166 struct compat_timespec __user *rmtp; 218 struct compat_timespec __user *rmtp;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 5d575836dba6..14f7070b4ba2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -270,11 +270,11 @@ static struct file_system_type cpuset_fs_type = {
270 * are online. If none are online, walk up the cpuset hierarchy 270 * are online. If none are online, walk up the cpuset hierarchy
271 * until we find one that does have some online cpus. If we get 271 * until we find one that does have some online cpus. If we get
272 * all the way to the top and still haven't found any online cpus, 272 * all the way to the top and still haven't found any online cpus,
273 * return cpu_online_map. Or if passed a NULL cs from an exit'ing 273 * return cpu_online_mask. Or if passed a NULL cs from an exit'ing
274 * task, return cpu_online_map. 274 * task, return cpu_online_mask.
275 * 275 *
276 * One way or another, we guarantee to return some non-empty subset 276 * One way or another, we guarantee to return some non-empty subset
277 * of cpu_online_map. 277 * of cpu_online_mask.
278 * 278 *
279 * Call with callback_mutex held. 279 * Call with callback_mutex held.
280 */ 280 */
@@ -867,7 +867,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
867 int retval; 867 int retval;
868 int is_load_balanced; 868 int is_load_balanced;
869 869
870 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ 870 /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
871 if (cs == &top_cpuset) 871 if (cs == &top_cpuset)
872 return -EACCES; 872 return -EACCES;
873 873
@@ -964,7 +964,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
964{ 964{
965 bool need_loop; 965 bool need_loop;
966 966
967repeat:
968 /* 967 /*
969 * Allow tasks that have access to memory reserves because they have 968 * Allow tasks that have access to memory reserves because they have
970 * been OOM killed to get memory anywhere. 969 * been OOM killed to get memory anywhere.
@@ -983,45 +982,19 @@ repeat:
983 */ 982 */
984 need_loop = task_has_mempolicy(tsk) || 983 need_loop = task_has_mempolicy(tsk) ||
985 !nodes_intersects(*newmems, tsk->mems_allowed); 984 !nodes_intersects(*newmems, tsk->mems_allowed);
986 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
987 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
988
989 /*
990 * ensure checking ->mems_allowed_change_disable after setting all new
991 * allowed nodes.
992 *
993 * the read-side task can see an nodemask with new allowed nodes and
994 * old allowed nodes. and if it allocates page when cpuset clears newly
995 * disallowed ones continuous, it can see the new allowed bits.
996 *
997 * And if setting all new allowed nodes is after the checking, setting
998 * all new allowed nodes and clearing newly disallowed ones will be done
999 * continuous, and the read-side task may find no node to alloc page.
1000 */
1001 smp_mb();
1002 985
1003 /* 986 if (need_loop)
1004 * Allocation of memory is very fast, we needn't sleep when waiting 987 write_seqcount_begin(&tsk->mems_allowed_seq);
1005 * for the read-side.
1006 */
1007 while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
1008 task_unlock(tsk);
1009 if (!task_curr(tsk))
1010 yield();
1011 goto repeat;
1012 }
1013 988
1014 /* 989 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
1015 * ensure checking ->mems_allowed_change_disable before clearing all new 990 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
1016 * disallowed nodes.
1017 *
1018 * if clearing newly disallowed bits before the checking, the read-side
1019 * task may find no node to alloc page.
1020 */
1021 smp_mb();
1022 991
1023 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); 992 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
1024 tsk->mems_allowed = *newmems; 993 tsk->mems_allowed = *newmems;
994
995 if (need_loop)
996 write_seqcount_end(&tsk->mems_allowed_seq);
997
1025 task_unlock(tsk); 998 task_unlock(tsk);
1026} 999}
1027 1000
@@ -2176,7 +2149,7 @@ void __init cpuset_init_smp(void)
2176 * 2149 *
2177 * Description: Returns the cpumask_var_t cpus_allowed of the cpuset 2150 * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
2178 * attached to the specified @tsk. Guaranteed to return some non-empty 2151 * attached to the specified @tsk. Guaranteed to return some non-empty
2179 * subset of cpu_online_map, even if this means going outside the 2152 * subset of cpu_online_mask, even if this means going outside the
2180 * tasks cpuset. 2153 * tasks cpuset.
2181 **/ 2154 **/
2182 2155
@@ -2189,10 +2162,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2189 mutex_unlock(&callback_mutex); 2162 mutex_unlock(&callback_mutex);
2190} 2163}
2191 2164
2192int cpuset_cpus_allowed_fallback(struct task_struct *tsk) 2165void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2193{ 2166{
2194 const struct cpuset *cs; 2167 const struct cpuset *cs;
2195 int cpu;
2196 2168
2197 rcu_read_lock(); 2169 rcu_read_lock();
2198 cs = task_cs(tsk); 2170 cs = task_cs(tsk);
@@ -2213,22 +2185,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2213 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary 2185 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
2214 * set any mask even if it is not right from task_cs() pov, 2186 * set any mask even if it is not right from task_cs() pov,
2215 * the pending set_cpus_allowed_ptr() will fix things. 2187 * the pending set_cpus_allowed_ptr() will fix things.
2188 *
2189 * select_fallback_rq() will fix things ups and set cpu_possible_mask
2190 * if required.
2216 */ 2191 */
2217
2218 cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
2219 if (cpu >= nr_cpu_ids) {
2220 /*
2221 * Either tsk->cpus_allowed is wrong (see above) or it
2222 * is actually empty. The latter case is only possible
2223 * if we are racing with remove_tasks_in_empty_cpuset().
2224 * Like above we can temporary set any mask and rely on
2225 * set_cpus_allowed_ptr() as synchronization point.
2226 */
2227 do_set_cpus_allowed(tsk, cpu_possible_mask);
2228 cpu = cpumask_any(cpu_active_mask);
2229 }
2230
2231 return cpu;
2232} 2192}
2233 2193
2234void cpuset_init_current_mems_allowed(void) 2194void cpuset_init_current_mems_allowed(void)
diff --git a/kernel/cred.c b/kernel/cred.c
index 5791612a4045..e70683d9ec32 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -16,6 +16,7 @@
16#include <linux/keyctl.h> 16#include <linux/keyctl.h>
17#include <linux/init_task.h> 17#include <linux/init_task.h>
18#include <linux/security.h> 18#include <linux/security.h>
19#include <linux/binfmts.h>
19#include <linux/cn_proc.h> 20#include <linux/cn_proc.h>
20 21
21#if 0 22#if 0
@@ -385,6 +386,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
385 struct cred *new; 386 struct cred *new;
386 int ret; 387 int ret;
387 388
389 p->replacement_session_keyring = NULL;
390
388 if ( 391 if (
389#ifdef CONFIG_KEYS 392#ifdef CONFIG_KEYS
390 !p->cred->thread_keyring && 393 !p->cred->thread_keyring &&
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0d7c08784efb..0557f24c6bca 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -41,6 +41,7 @@
41#include <linux/delay.h> 41#include <linux/delay.h>
42#include <linux/sched.h> 42#include <linux/sched.h>
43#include <linux/sysrq.h> 43#include <linux/sysrq.h>
44#include <linux/reboot.h>
44#include <linux/init.h> 45#include <linux/init.h>
45#include <linux/kgdb.h> 46#include <linux/kgdb.h>
46#include <linux/kdb.h> 47#include <linux/kdb.h>
@@ -52,7 +53,6 @@
52#include <asm/cacheflush.h> 53#include <asm/cacheflush.h>
53#include <asm/byteorder.h> 54#include <asm/byteorder.h>
54#include <linux/atomic.h> 55#include <linux/atomic.h>
55#include <asm/system.h>
56 56
57#include "debug_core.h" 57#include "debug_core.h"
58 58
@@ -75,6 +75,8 @@ static int exception_level;
75struct kgdb_io *dbg_io_ops; 75struct kgdb_io *dbg_io_ops;
76static DEFINE_SPINLOCK(kgdb_registration_lock); 76static DEFINE_SPINLOCK(kgdb_registration_lock);
77 77
78/* Action for the reboot notifiter, a global allow kdb to change it */
79static int kgdbreboot;
78/* kgdb console driver is loaded */ 80/* kgdb console driver is loaded */
79static int kgdb_con_registered; 81static int kgdb_con_registered;
80/* determine if kgdb console output should be used */ 82/* determine if kgdb console output should be used */
@@ -96,6 +98,7 @@ static int __init opt_kgdb_con(char *str)
96early_param("kgdbcon", opt_kgdb_con); 98early_param("kgdbcon", opt_kgdb_con);
97 99
98module_param(kgdb_use_con, int, 0644); 100module_param(kgdb_use_con, int, 0644);
101module_param(kgdbreboot, int, 0644);
99 102
100/* 103/*
101 * Holds information about breakpoints in a kernel. These breakpoints are 104 * Holds information about breakpoints in a kernel. These breakpoints are
@@ -157,37 +160,39 @@ early_param("nokgdbroundup", opt_nokgdbroundup);
157 * Weak aliases for breakpoint management, 160 * Weak aliases for breakpoint management,
158 * can be overriden by architectures when needed: 161 * can be overriden by architectures when needed:
159 */ 162 */
160int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) 163int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
161{ 164{
162 int err; 165 int err;
163 166
164 err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE); 167 err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
168 BREAK_INSTR_SIZE);
165 if (err) 169 if (err)
166 return err; 170 return err;
167 171 err = probe_kernel_write((char *)bpt->bpt_addr,
168 return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr, 172 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
169 BREAK_INSTR_SIZE); 173 return err;
170} 174}
171 175
172int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) 176int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
173{ 177{
174 return probe_kernel_write((char *)addr, 178 return probe_kernel_write((char *)bpt->bpt_addr,
175 (char *)bundle, BREAK_INSTR_SIZE); 179 (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
176} 180}
177 181
178int __weak kgdb_validate_break_address(unsigned long addr) 182int __weak kgdb_validate_break_address(unsigned long addr)
179{ 183{
180 char tmp_variable[BREAK_INSTR_SIZE]; 184 struct kgdb_bkpt tmp;
181 int err; 185 int err;
182 /* Validate setting the breakpoint and then removing it. In the 186 /* Validate setting the breakpoint and then removing it. If the
183 * remove fails, the kernel needs to emit a bad message because we 187 * remove fails, the kernel needs to emit a bad message because we
184 * are deep trouble not being able to put things back the way we 188 * are deep trouble not being able to put things back the way we
185 * found them. 189 * found them.
186 */ 190 */
187 err = kgdb_arch_set_breakpoint(addr, tmp_variable); 191 tmp.bpt_addr = addr;
192 err = kgdb_arch_set_breakpoint(&tmp);
188 if (err) 193 if (err)
189 return err; 194 return err;
190 err = kgdb_arch_remove_breakpoint(addr, tmp_variable); 195 err = kgdb_arch_remove_breakpoint(&tmp);
191 if (err) 196 if (err)
192 printk(KERN_ERR "KGDB: Critical breakpoint error, kernel " 197 printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
193 "memory destroyed at: %lx", addr); 198 "memory destroyed at: %lx", addr);
@@ -231,7 +236,6 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
231 */ 236 */
232int dbg_activate_sw_breakpoints(void) 237int dbg_activate_sw_breakpoints(void)
233{ 238{
234 unsigned long addr;
235 int error; 239 int error;
236 int ret = 0; 240 int ret = 0;
237 int i; 241 int i;
@@ -240,16 +244,15 @@ int dbg_activate_sw_breakpoints(void)
240 if (kgdb_break[i].state != BP_SET) 244 if (kgdb_break[i].state != BP_SET)
241 continue; 245 continue;
242 246
243 addr = kgdb_break[i].bpt_addr; 247 error = kgdb_arch_set_breakpoint(&kgdb_break[i]);
244 error = kgdb_arch_set_breakpoint(addr,
245 kgdb_break[i].saved_instr);
246 if (error) { 248 if (error) {
247 ret = error; 249 ret = error;
248 printk(KERN_INFO "KGDB: BP install failed: %lx", addr); 250 printk(KERN_INFO "KGDB: BP install failed: %lx",
251 kgdb_break[i].bpt_addr);
249 continue; 252 continue;
250 } 253 }
251 254
252 kgdb_flush_swbreak_addr(addr); 255 kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr);
253 kgdb_break[i].state = BP_ACTIVE; 256 kgdb_break[i].state = BP_ACTIVE;
254 } 257 }
255 return ret; 258 return ret;
@@ -298,7 +301,6 @@ int dbg_set_sw_break(unsigned long addr)
298 301
299int dbg_deactivate_sw_breakpoints(void) 302int dbg_deactivate_sw_breakpoints(void)
300{ 303{
301 unsigned long addr;
302 int error; 304 int error;
303 int ret = 0; 305 int ret = 0;
304 int i; 306 int i;
@@ -306,15 +308,14 @@ int dbg_deactivate_sw_breakpoints(void)
306 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { 308 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
307 if (kgdb_break[i].state != BP_ACTIVE) 309 if (kgdb_break[i].state != BP_ACTIVE)
308 continue; 310 continue;
309 addr = kgdb_break[i].bpt_addr; 311 error = kgdb_arch_remove_breakpoint(&kgdb_break[i]);
310 error = kgdb_arch_remove_breakpoint(addr,
311 kgdb_break[i].saved_instr);
312 if (error) { 312 if (error) {
313 printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr); 313 printk(KERN_INFO "KGDB: BP remove failed: %lx\n",
314 kgdb_break[i].bpt_addr);
314 ret = error; 315 ret = error;
315 } 316 }
316 317
317 kgdb_flush_swbreak_addr(addr); 318 kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr);
318 kgdb_break[i].state = BP_SET; 319 kgdb_break[i].state = BP_SET;
319 } 320 }
320 return ret; 321 return ret;
@@ -348,7 +349,6 @@ int kgdb_isremovedbreak(unsigned long addr)
348 349
349int dbg_remove_all_break(void) 350int dbg_remove_all_break(void)
350{ 351{
351 unsigned long addr;
352 int error; 352 int error;
353 int i; 353 int i;
354 354
@@ -356,12 +356,10 @@ int dbg_remove_all_break(void)
356 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { 356 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
357 if (kgdb_break[i].state != BP_ACTIVE) 357 if (kgdb_break[i].state != BP_ACTIVE)
358 goto setundefined; 358 goto setundefined;
359 addr = kgdb_break[i].bpt_addr; 359 error = kgdb_arch_remove_breakpoint(&kgdb_break[i]);
360 error = kgdb_arch_remove_breakpoint(addr,
361 kgdb_break[i].saved_instr);
362 if (error) 360 if (error)
363 printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n", 361 printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n",
364 addr); 362 kgdb_break[i].bpt_addr);
365setundefined: 363setundefined:
366 kgdb_break[i].state = BP_UNDEFINED; 364 kgdb_break[i].state = BP_UNDEFINED;
367 } 365 }
@@ -784,6 +782,33 @@ void __init dbg_late_init(void)
784 kdb_init(KDB_INIT_FULL); 782 kdb_init(KDB_INIT_FULL);
785} 783}
786 784
785static int
786dbg_notify_reboot(struct notifier_block *this, unsigned long code, void *x)
787{
788 /*
789 * Take the following action on reboot notify depending on value:
790 * 1 == Enter debugger
791 * 0 == [the default] detatch debug client
792 * -1 == Do nothing... and use this until the board resets
793 */
794 switch (kgdbreboot) {
795 case 1:
796 kgdb_breakpoint();
797 case -1:
798 goto done;
799 }
800 if (!dbg_kdb_mode)
801 gdbstub_exit(code);
802done:
803 return NOTIFY_DONE;
804}
805
806static struct notifier_block dbg_reboot_notifier = {
807 .notifier_call = dbg_notify_reboot,
808 .next = NULL,
809 .priority = INT_MAX,
810};
811
787static void kgdb_register_callbacks(void) 812static void kgdb_register_callbacks(void)
788{ 813{
789 if (!kgdb_io_module_registered) { 814 if (!kgdb_io_module_registered) {
@@ -791,6 +816,7 @@ static void kgdb_register_callbacks(void)
791 kgdb_arch_init(); 816 kgdb_arch_init();
792 if (!dbg_is_early) 817 if (!dbg_is_early)
793 kgdb_arch_late(); 818 kgdb_arch_late();
819 register_reboot_notifier(&dbg_reboot_notifier);
794 atomic_notifier_chain_register(&panic_notifier_list, 820 atomic_notifier_chain_register(&panic_notifier_list,
795 &kgdb_panic_event_nb); 821 &kgdb_panic_event_nb);
796#ifdef CONFIG_MAGIC_SYSRQ 822#ifdef CONFIG_MAGIC_SYSRQ
@@ -812,6 +838,7 @@ static void kgdb_unregister_callbacks(void)
812 */ 838 */
813 if (kgdb_io_module_registered) { 839 if (kgdb_io_module_registered) {
814 kgdb_io_module_registered = 0; 840 kgdb_io_module_registered = 0;
841 unregister_reboot_notifier(&dbg_reboot_notifier);
815 atomic_notifier_chain_unregister(&panic_notifier_list, 842 atomic_notifier_chain_unregister(&panic_notifier_list,
816 &kgdb_panic_event_nb); 843 &kgdb_panic_event_nb);
817 kgdb_arch_exit(); 844 kgdb_arch_exit();
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index c22d8c28ad84..ce615e064482 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -1111,6 +1111,13 @@ void gdbstub_exit(int status)
1111 unsigned char checksum, ch, buffer[3]; 1111 unsigned char checksum, ch, buffer[3];
1112 int loop; 1112 int loop;
1113 1113
1114 if (!kgdb_connected)
1115 return;
1116 kgdb_connected = 0;
1117
1118 if (!dbg_io_ops || dbg_kdb_mode)
1119 return;
1120
1114 buffer[0] = 'W'; 1121 buffer[0] = 'W';
1115 buffer[1] = hex_asc_hi(status); 1122 buffer[1] = hex_asc_hi(status);
1116 buffer[2] = hex_asc_lo(status); 1123 buffer[2] = hex_asc_lo(status);
@@ -1129,5 +1136,6 @@ void gdbstub_exit(int status)
1129 dbg_io_ops->write_char(hex_asc_lo(checksum)); 1136 dbg_io_ops->write_char(hex_asc_lo(checksum));
1130 1137
1131 /* make sure the output is flushed, lest the bootloader clobber it */ 1138 /* make sure the output is flushed, lest the bootloader clobber it */
1132 dbg_io_ops->flush(); 1139 if (dbg_io_ops->flush)
1140 dbg_io_ops->flush();
1133} 1141}
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index 20059ef4459a..8418c2f8ec5d 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -153,6 +153,13 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp)
153 } else { 153 } else {
154 kdb_printf("%s: failed to set breakpoint at 0x%lx\n", 154 kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
155 __func__, bp->bp_addr); 155 __func__, bp->bp_addr);
156#ifdef CONFIG_DEBUG_RODATA
157 if (!bp->bp_type) {
158 kdb_printf("Software breakpoints are unavailable.\n"
159 " Change the kernel CONFIG_DEBUG_RODATA=n\n"
160 " OR use hw breaks: help bph\n");
161 }
162#endif
156 return 1; 163 return 1;
157 } 164 }
158 return 0; 165 return 0;
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 7179eac7b41c..07c9bbb94a0b 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -15,7 +15,6 @@
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/kdb.h> 16#include <linux/kdb.h>
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <asm/system.h>
19#include "kdb_private.h" 18#include "kdb_private.h"
20 19
21 20
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 4802eb5840e1..bb9520f0f6ff 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -689,7 +689,7 @@ kdb_printit:
689 if (!dbg_kdb_mode && kgdb_connected) { 689 if (!dbg_kdb_mode && kgdb_connected) {
690 gdbstub_msg_write(kdb_buffer, retlen); 690 gdbstub_msg_write(kdb_buffer, retlen);
691 } else { 691 } else {
692 if (!dbg_io_ops->is_console) { 692 if (dbg_io_ops && !dbg_io_ops->is_console) {
693 len = strlen(kdb_buffer); 693 len = strlen(kdb_buffer);
694 cp = kdb_buffer; 694 cp = kdb_buffer;
695 while (len--) { 695 while (len--) {
@@ -743,7 +743,7 @@ kdb_printit:
743 kdb_input_flush(); 743 kdb_input_flush();
744 c = console_drivers; 744 c = console_drivers;
745 745
746 if (!dbg_io_ops->is_console) { 746 if (dbg_io_ops && !dbg_io_ops->is_console) {
747 len = strlen(moreprompt); 747 len = strlen(moreprompt);
748 cp = moreprompt; 748 cp = moreprompt;
749 while (len--) { 749 while (len--) {
diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c
index 4bca634975c0..118527aa60ea 100644
--- a/kernel/debug/kdb/kdb_keyboard.c
+++ b/kernel/debug/kdb/kdb_keyboard.c
@@ -25,6 +25,7 @@
25#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */ 25#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */
26 26
27static int kbd_exists; 27static int kbd_exists;
28static int kbd_last_ret;
28 29
29/* 30/*
30 * Check if the keyboard controller has a keypress for us. 31 * Check if the keyboard controller has a keypress for us.
@@ -90,8 +91,11 @@ int kdb_get_kbd_char(void)
90 return -1; 91 return -1;
91 } 92 }
92 93
93 if ((scancode & 0x80) != 0) 94 if ((scancode & 0x80) != 0) {
95 if (scancode == 0x9c)
96 kbd_last_ret = 0;
94 return -1; 97 return -1;
98 }
95 99
96 scancode &= 0x7f; 100 scancode &= 0x7f;
97 101
@@ -178,35 +182,82 @@ int kdb_get_kbd_char(void)
178 return -1; /* ignore unprintables */ 182 return -1; /* ignore unprintables */
179 } 183 }
180 184
181 if ((scancode & 0x7f) == 0x1c) { 185 if (scancode == 0x1c) {
182 /* 186 kbd_last_ret = 1;
183 * enter key. All done. Absorb the release scancode. 187 return 13;
184 */ 188 }
189
190 return keychar & 0xff;
191}
192EXPORT_SYMBOL_GPL(kdb_get_kbd_char);
193
194/*
195 * Best effort cleanup of ENTER break codes on leaving KDB. Called on
196 * exiting KDB, when we know we processed an ENTER or KP ENTER scan
197 * code.
198 */
199void kdb_kbd_cleanup_state(void)
200{
201 int scancode, scanstatus;
202
203 /*
204 * Nothing to clean up, since either
205 * ENTER was never pressed, or has already
206 * gotten cleaned up.
207 */
208 if (!kbd_last_ret)
209 return;
210
211 kbd_last_ret = 0;
212 /*
213 * Enter key. Need to absorb the break code here, lest it gets
214 * leaked out if we exit KDB as the result of processing 'g'.
215 *
216 * This has several interesting implications:
217 * + Need to handle KP ENTER, which has break code 0xe0 0x9c.
218 * + Need to handle repeat ENTER and repeat KP ENTER. Repeats
219 * only get a break code at the end of the repeated
220 * sequence. This means we can't propagate the repeated key
221 * press, and must swallow it away.
222 * + Need to handle possible PS/2 mouse input.
223 * + Need to handle mashed keys.
224 */
225
226 while (1) {
185 while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) 227 while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0)
186 ; 228 cpu_relax();
187 229
188 /* 230 /*
189 * Fetch the scancode 231 * Fetch the scancode.
190 */ 232 */
191 scancode = inb(KBD_DATA_REG); 233 scancode = inb(KBD_DATA_REG);
192 scanstatus = inb(KBD_STATUS_REG); 234 scanstatus = inb(KBD_STATUS_REG);
193 235
194 while (scanstatus & KBD_STAT_MOUSE_OBF) { 236 /*
195 scancode = inb(KBD_DATA_REG); 237 * Skip mouse input.
196 scanstatus = inb(KBD_STATUS_REG); 238 */
197 } 239 if (scanstatus & KBD_STAT_MOUSE_OBF)
240 continue;
198 241
199 if (scancode != 0x9c) { 242 /*
200 /* 243 * If we see 0xe0, this is either a break code for KP
201 * Wasn't an enter-release, why not? 244 * ENTER, or a repeat make for KP ENTER. Either way,
202 */ 245 * since the second byte is equivalent to an ENTER,
203 kdb_printf("kdb: expected enter got 0x%x status 0x%x\n", 246 * skip the 0xe0 and try again.
204 scancode, scanstatus); 247 *
205 } 248 * If we see 0x1c, this must be a repeat ENTER or KP
249 * ENTER (and we swallowed 0xe0 before). Try again.
250 *
251 * We can also see make and break codes for other keys
252 * mashed before or after pressing ENTER. Thus, if we
253 * see anything other than 0x9c, we have to try again.
254 *
255 * Note, if you held some key as ENTER was depressed,
256 * that break code would get leaked out.
257 */
258 if (scancode != 0x9c)
259 continue;
206 260
207 return 13; 261 return;
208 } 262 }
209
210 return keychar & 0xff;
211} 263}
212EXPORT_SYMBOL_GPL(kdb_get_kbd_char);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index e2ae7349437f..67b847dfa2bb 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1400,6 +1400,9 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
1400 if (KDB_STATE(DOING_SS)) 1400 if (KDB_STATE(DOING_SS))
1401 KDB_STATE_CLEAR(SSBPT); 1401 KDB_STATE_CLEAR(SSBPT);
1402 1402
1403 /* Clean up any keyboard devices before leaving */
1404 kdb_kbd_cleanup_state();
1405
1403 return result; 1406 return result;
1404} 1407}
1405 1408
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index e381d105b40b..47c4e56e513b 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -246,6 +246,13 @@ extern void debug_kusage(void);
246 246
247extern void kdb_set_current_task(struct task_struct *); 247extern void kdb_set_current_task(struct task_struct *);
248extern struct task_struct *kdb_current_task; 248extern struct task_struct *kdb_current_task;
249
250#ifdef CONFIG_KDB_KEYBOARD
251extern void kdb_kbd_cleanup_state(void);
252#else /* ! CONFIG_KDB_KEYBOARD */
253#define kdb_kbd_cleanup_state()
254#endif /* ! CONFIG_KDB_KEYBOARD */
255
249#ifdef CONFIG_MODULES 256#ifdef CONFIG_MODULES
250extern struct list_head *kdb_modules; 257extern struct list_head *kdb_modules;
251#endif /* CONFIG_MODULES */ 258#endif /* CONFIG_MODULES */
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 7d6fb40d2188..d35cc2d3a4cc 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -384,9 +384,9 @@ static int kdb_getphys(void *res, unsigned long addr, size_t size)
384 if (!pfn_valid(pfn)) 384 if (!pfn_valid(pfn))
385 return 1; 385 return 1;
386 page = pfn_to_page(pfn); 386 page = pfn_to_page(pfn);
387 vaddr = kmap_atomic(page, KM_KDB); 387 vaddr = kmap_atomic(page);
388 memcpy(res, vaddr + (addr & (PAGE_SIZE - 1)), size); 388 memcpy(res, vaddr + (addr & (PAGE_SIZE - 1)), size);
389 kunmap_atomic(vaddr, KM_KDB); 389 kunmap_atomic(vaddr);
390 390
391 return 0; 391 return 0;
392} 392}
diff --git a/kernel/dma.c b/kernel/dma.c
index 68a2306522c8..6c6262f86c17 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -18,7 +18,6 @@
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <asm/dma.h> 20#include <asm/dma.h>
21#include <asm/system.h>
22 21
23 22
24 23
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4b50357914fb..a6a9ec4cd8f5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3348,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event,
3348 *running = ctx_time - event->tstamp_running; 3348 *running = ctx_time - event->tstamp_running;
3349} 3349}
3350 3350
3351void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) 3351void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
3352{ 3352{
3353} 3353}
3354 3354
@@ -3398,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event)
3398 userpg->time_running = running + 3398 userpg->time_running = running +
3399 atomic64_read(&event->child_total_time_running); 3399 atomic64_read(&event->child_total_time_running);
3400 3400
3401 perf_update_user_clock(userpg, now); 3401 arch_perf_update_userpage(userpg, now);
3402 3402
3403 barrier(); 3403 barrier();
3404 ++userpg->lock; 3404 ++userpg->lock;
@@ -7116,6 +7116,13 @@ void __init perf_event_init(void)
7116 7116
7117 /* do not patch jump label more than once per second */ 7117 /* do not patch jump label more than once per second */
7118 jump_label_rate_limit(&perf_sched_events, HZ); 7118 jump_label_rate_limit(&perf_sched_events, HZ);
7119
7120 /*
7121 * Build time assertion that we keep the data_head at the intended
7122 * location. IOW, validation we got the __reserved[] size right.
7123 */
7124 BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
7125 != 1024);
7119} 7126}
7120 7127
7121static int __init perf_event_sysfs_init(void) 7128static int __init perf_event_sysfs_init(void)
diff --git a/kernel/exit.c b/kernel/exit.c
index ce5f758f40bd..d8bd3b425fa7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -52,6 +52,7 @@
52#include <linux/hw_breakpoint.h> 52#include <linux/hw_breakpoint.h>
53#include <linux/oom.h> 53#include <linux/oom.h>
54#include <linux/writeback.h> 54#include <linux/writeback.h>
55#include <linux/shm.h>
55 56
56#include <asm/uaccess.h> 57#include <asm/uaccess.h>
57#include <asm/unistd.h> 58#include <asm/unistd.h>
@@ -424,7 +425,7 @@ void daemonize(const char *name, ...)
424 */ 425 */
425 exit_mm(current); 426 exit_mm(current);
426 /* 427 /*
427 * We don't want to have TIF_FREEZE set if the system-wide hibernation 428 * We don't want to get frozen, in case system-wide hibernation
428 * or suspend transition begins right now. 429 * or suspend transition begins right now.
429 */ 430 */
430 current->flags |= (PF_NOFREEZE | PF_KTHREAD); 431 current->flags |= (PF_NOFREEZE | PF_KTHREAD);
@@ -473,7 +474,7 @@ static void close_files(struct files_struct * files)
473 i = j * __NFDBITS; 474 i = j * __NFDBITS;
474 if (i >= fdt->max_fds) 475 if (i >= fdt->max_fds)
475 break; 476 break;
476 set = fdt->open_fds->fds_bits[j++]; 477 set = fdt->open_fds[j++];
477 while (set) { 478 while (set) {
478 if (set & 1) { 479 if (set & 1) {
479 struct file * file = xchg(&fdt->fd[i], NULL); 480 struct file * file = xchg(&fdt->fd[i], NULL);
@@ -686,11 +687,11 @@ static void exit_mm(struct task_struct * tsk)
686} 687}
687 688
688/* 689/*
689 * When we die, we re-parent all our children. 690 * When we die, we re-parent all our children, and try to:
690 * Try to give them to another thread in our thread 691 * 1. give them to another thread in our thread group, if such a member exists
691 * group, and if no such member exists, give it to 692 * 2. give it to the first ancestor process which prctl'd itself as a
692 * the child reaper process (ie "init") in our pid 693 * child_subreaper for its children (like a service manager)
693 * space. 694 * 3. give it to the init process (PID 1) in our pid namespace
694 */ 695 */
695static struct task_struct *find_new_reaper(struct task_struct *father) 696static struct task_struct *find_new_reaper(struct task_struct *father)
696 __releases(&tasklist_lock) 697 __releases(&tasklist_lock)
@@ -710,8 +711,11 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
710 711
711 if (unlikely(pid_ns->child_reaper == father)) { 712 if (unlikely(pid_ns->child_reaper == father)) {
712 write_unlock_irq(&tasklist_lock); 713 write_unlock_irq(&tasklist_lock);
713 if (unlikely(pid_ns == &init_pid_ns)) 714 if (unlikely(pid_ns == &init_pid_ns)) {
714 panic("Attempted to kill init!"); 715 panic("Attempted to kill init! exitcode=0x%08x\n",
716 father->signal->group_exit_code ?:
717 father->exit_code);
718 }
715 719
716 zap_pid_ns_processes(pid_ns); 720 zap_pid_ns_processes(pid_ns);
717 write_lock_irq(&tasklist_lock); 721 write_lock_irq(&tasklist_lock);
@@ -721,6 +725,29 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
721 * forget_original_parent() must move them somewhere. 725 * forget_original_parent() must move them somewhere.
722 */ 726 */
723 pid_ns->child_reaper = init_pid_ns.child_reaper; 727 pid_ns->child_reaper = init_pid_ns.child_reaper;
728 } else if (father->signal->has_child_subreaper) {
729 struct task_struct *reaper;
730
731 /*
732 * Find the first ancestor marked as child_subreaper.
733 * Note that the code below checks same_thread_group(reaper,
734 * pid_ns->child_reaper). This is what we need to DTRT in a
735 * PID namespace. However we still need the check above, see
736 * http://marc.info/?l=linux-kernel&m=131385460420380
737 */
738 for (reaper = father->real_parent;
739 reaper != &init_task;
740 reaper = reaper->real_parent) {
741 if (same_thread_group(reaper, pid_ns->child_reaper))
742 break;
743 if (!reaper->signal->is_child_subreaper)
744 continue;
745 thread = reaper;
746 do {
747 if (!(thread->flags & PF_EXITING))
748 return reaper;
749 } while_each_thread(reaper, thread);
750 }
724 } 751 }
725 752
726 return pid_ns->child_reaper; 753 return pid_ns->child_reaper;
@@ -934,7 +961,7 @@ void do_exit(long code)
934 acct_update_integrals(tsk); 961 acct_update_integrals(tsk);
935 /* sync mm's RSS info before statistics gathering */ 962 /* sync mm's RSS info before statistics gathering */
936 if (tsk->mm) 963 if (tsk->mm)
937 sync_mm_rss(tsk, tsk->mm); 964 sync_mm_rss(tsk->mm);
938 group_dead = atomic_dec_and_test(&tsk->signal->live); 965 group_dead = atomic_dec_and_test(&tsk->signal->live);
939 if (group_dead) { 966 if (group_dead) {
940 hrtimer_cancel(&tsk->signal->real_timer); 967 hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/kernel/fork.c b/kernel/fork.c
index c4f38a849436..b9372a0bff18 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,6 +193,7 @@ void __put_task_struct(struct task_struct *tsk)
193 WARN_ON(atomic_read(&tsk->usage)); 193 WARN_ON(atomic_read(&tsk->usage));
194 WARN_ON(tsk == current); 194 WARN_ON(tsk == current);
195 195
196 security_task_free(tsk);
196 exit_creds(tsk); 197 exit_creds(tsk);
197 delayacct_tsk_free(tsk); 198 delayacct_tsk_free(tsk);
198 put_signal_struct(tsk->signal); 199 put_signal_struct(tsk->signal);
@@ -355,7 +356,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
355 charge = 0; 356 charge = 0;
356 if (mpnt->vm_flags & VM_ACCOUNT) { 357 if (mpnt->vm_flags & VM_ACCOUNT) {
357 unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; 358 unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
358 if (security_vm_enough_memory(len)) 359 if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
359 goto fail_nomem; 360 goto fail_nomem;
360 charge = len; 361 charge = len;
361 } 362 }
@@ -511,6 +512,23 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
511 return NULL; 512 return NULL;
512} 513}
513 514
515static void check_mm(struct mm_struct *mm)
516{
517 int i;
518
519 for (i = 0; i < NR_MM_COUNTERS; i++) {
520 long x = atomic_long_read(&mm->rss_stat.count[i]);
521
522 if (unlikely(x))
523 printk(KERN_ALERT "BUG: Bad rss-counter state "
524 "mm:%p idx:%d val:%ld\n", mm, i, x);
525 }
526
527#ifdef CONFIG_TRANSPARENT_HUGEPAGE
528 VM_BUG_ON(mm->pmd_huge_pte);
529#endif
530}
531
514/* 532/*
515 * Allocate and initialize an mm_struct. 533 * Allocate and initialize an mm_struct.
516 */ 534 */
@@ -538,9 +556,7 @@ void __mmdrop(struct mm_struct *mm)
538 mm_free_pgd(mm); 556 mm_free_pgd(mm);
539 destroy_context(mm); 557 destroy_context(mm);
540 mmu_notifier_mm_destroy(mm); 558 mmu_notifier_mm_destroy(mm);
541#ifdef CONFIG_TRANSPARENT_HUGEPAGE 559 check_mm(mm);
542 VM_BUG_ON(mm->pmd_huge_pte);
543#endif
544 free_mm(mm); 560 free_mm(mm);
545} 561}
546EXPORT_SYMBOL_GPL(__mmdrop); 562EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1035,6 +1051,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1035 sig->oom_score_adj = current->signal->oom_score_adj; 1051 sig->oom_score_adj = current->signal->oom_score_adj;
1036 sig->oom_score_adj_min = current->signal->oom_score_adj_min; 1052 sig->oom_score_adj_min = current->signal->oom_score_adj_min;
1037 1053
1054 sig->has_child_subreaper = current->signal->has_child_subreaper ||
1055 current->signal->is_child_subreaper;
1056
1038 mutex_init(&sig->cred_guard_mutex); 1057 mutex_init(&sig->cred_guard_mutex);
1039 1058
1040 return 0; 1059 return 0;
@@ -1222,6 +1241,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1222#ifdef CONFIG_CPUSETS 1241#ifdef CONFIG_CPUSETS
1223 p->cpuset_mem_spread_rotor = NUMA_NO_NODE; 1242 p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
1224 p->cpuset_slab_spread_rotor = NUMA_NO_NODE; 1243 p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
1244 seqcount_init(&p->mems_allowed_seq);
1225#endif 1245#endif
1226#ifdef CONFIG_TRACE_IRQFLAGS 1246#ifdef CONFIG_TRACE_IRQFLAGS
1227 p->irq_events = 0; 1247 p->irq_events = 0;
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 9815b8d1eed5..11f82a4d4eae 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -99,9 +99,9 @@ static void fake_signal_wake_up(struct task_struct *p)
99 * freeze_task - send a freeze request to given task 99 * freeze_task - send a freeze request to given task
100 * @p: task to send the request to 100 * @p: task to send the request to
101 * 101 *
102 * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE 102 * If @p is freezing, the freeze request is sent either by sending a fake
103 * flag and either sending a fake signal to it or waking it up, depending 103 * signal (if it's not a kernel thread) or waking it up (if it's a kernel
104 * on whether it has %PF_FREEZER_NOSIG set. 104 * thread).
105 * 105 *
106 * RETURNS: 106 * RETURNS:
107 * %false, if @p is not freezing or already frozen; %true, otherwise 107 * %false, if @p is not freezing or already frozen; %true, otherwise
diff --git a/kernel/futex.c b/kernel/futex.c
index 72efa1e4359a..e2b0fb9a0b3b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -59,6 +59,7 @@
59#include <linux/magic.h> 59#include <linux/magic.h>
60#include <linux/pid.h> 60#include <linux/pid.h>
61#include <linux/nsproxy.h> 61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
62 63
63#include <asm/futex.h> 64#include <asm/futex.h>
64 65
@@ -2443,40 +2444,31 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
2443{ 2444{
2444 struct robust_list_head __user *head; 2445 struct robust_list_head __user *head;
2445 unsigned long ret; 2446 unsigned long ret;
2446 const struct cred *cred = current_cred(), *pcred; 2447 struct task_struct *p;
2447 2448
2448 if (!futex_cmpxchg_enabled) 2449 if (!futex_cmpxchg_enabled)
2449 return -ENOSYS; 2450 return -ENOSYS;
2450 2451
2452 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
2453
2454 rcu_read_lock();
2455
2456 ret = -ESRCH;
2451 if (!pid) 2457 if (!pid)
2452 head = current->robust_list; 2458 p = current;
2453 else { 2459 else {
2454 struct task_struct *p;
2455
2456 ret = -ESRCH;
2457 rcu_read_lock();
2458 p = find_task_by_vpid(pid); 2460 p = find_task_by_vpid(pid);
2459 if (!p) 2461 if (!p)
2460 goto err_unlock; 2462 goto err_unlock;
2461 ret = -EPERM;
2462 pcred = __task_cred(p);
2463 /* If victim is in different user_ns, then uids are not
2464 comparable, so we must have CAP_SYS_PTRACE */
2465 if (cred->user->user_ns != pcred->user->user_ns) {
2466 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2467 goto err_unlock;
2468 goto ok;
2469 }
2470 /* If victim is in same user_ns, then uids are comparable */
2471 if (cred->euid != pcred->euid &&
2472 cred->euid != pcred->uid &&
2473 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2474 goto err_unlock;
2475ok:
2476 head = p->robust_list;
2477 rcu_read_unlock();
2478 } 2463 }
2479 2464
2465 ret = -EPERM;
2466 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2467 goto err_unlock;
2468
2469 head = p->robust_list;
2470 rcu_read_unlock();
2471
2480 if (put_user(sizeof(*head), len_ptr)) 2472 if (put_user(sizeof(*head), len_ptr))
2481 return -EFAULT; 2473 return -EFAULT;
2482 return put_user(head, head_ptr); 2474 return put_user(head, head_ptr);
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 5f9e689dc8f0..83e368b005fc 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -10,6 +10,7 @@
10#include <linux/compat.h> 10#include <linux/compat.h>
11#include <linux/nsproxy.h> 11#include <linux/nsproxy.h>
12#include <linux/futex.h> 12#include <linux/futex.h>
13#include <linux/ptrace.h>
13 14
14#include <asm/uaccess.h> 15#include <asm/uaccess.h>
15 16
@@ -136,40 +137,31 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
136{ 137{
137 struct compat_robust_list_head __user *head; 138 struct compat_robust_list_head __user *head;
138 unsigned long ret; 139 unsigned long ret;
139 const struct cred *cred = current_cred(), *pcred; 140 struct task_struct *p;
140 141
141 if (!futex_cmpxchg_enabled) 142 if (!futex_cmpxchg_enabled)
142 return -ENOSYS; 143 return -ENOSYS;
143 144
145 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
146
147 rcu_read_lock();
148
149 ret = -ESRCH;
144 if (!pid) 150 if (!pid)
145 head = current->compat_robust_list; 151 p = current;
146 else { 152 else {
147 struct task_struct *p;
148
149 ret = -ESRCH;
150 rcu_read_lock();
151 p = find_task_by_vpid(pid); 153 p = find_task_by_vpid(pid);
152 if (!p) 154 if (!p)
153 goto err_unlock; 155 goto err_unlock;
154 ret = -EPERM;
155 pcred = __task_cred(p);
156 /* If victim is in different user_ns, then uids are not
157 comparable, so we must have CAP_SYS_PTRACE */
158 if (cred->user->user_ns != pcred->user->user_ns) {
159 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
160 goto err_unlock;
161 goto ok;
162 }
163 /* If victim is in same user_ns, then uids are comparable */
164 if (cred->euid != pcred->euid &&
165 cred->euid != pcred->uid &&
166 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
167 goto err_unlock;
168ok:
169 head = p->compat_robust_list;
170 rcu_read_unlock();
171 } 156 }
172 157
158 ret = -EPERM;
159 if (!ptrace_may_access(p, PTRACE_MODE_READ))
160 goto err_unlock;
161
162 head = p->compat_robust_list;
163 rcu_read_unlock();
164
173 if (put_user(sizeof(*head), len_ptr)) 165 if (put_user(sizeof(*head), len_ptr))
174 return -EFAULT; 166 return -EFAULT;
175 return put_user(ptr_to_compat(head), head_ptr); 167 return put_user(ptr_to_compat(head), head_ptr);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 5a38bf4de641..d1a758bc972a 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -13,7 +13,7 @@ config GENERIC_HARDIRQS
13# Options selectable by the architecture code 13# Options selectable by the architecture code
14 14
15# Make sparse irq Kconfig switch below available 15# Make sparse irq Kconfig switch below available
16config HAVE_SPARSE_IRQ 16config MAY_HAVE_SPARSE_IRQ
17 bool 17 bool
18 18
19# Enable the generic irq autoprobe mechanism 19# Enable the generic irq autoprobe mechanism
@@ -56,13 +56,22 @@ config GENERIC_IRQ_CHIP
56config IRQ_DOMAIN 56config IRQ_DOMAIN
57 bool 57 bool
58 58
59config IRQ_DOMAIN_DEBUG
60 bool "Expose hardware/virtual IRQ mapping via debugfs"
61 depends on IRQ_DOMAIN && DEBUG_FS
62 help
63 This option will show the mapping relationship between hardware irq
64 numbers and Linux irq numbers. The mapping is exposed via debugfs
65 in the file "irq_domain_mapping".
66
67 If you don't know what this means you don't need it.
68
59# Support forced irq threading 69# Support forced irq threading
60config IRQ_FORCED_THREADING 70config IRQ_FORCED_THREADING
61 bool 71 bool
62 72
63config SPARSE_IRQ 73config SPARSE_IRQ
64 bool "Support sparse irq numbering" 74 bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ
65 depends on HAVE_SPARSE_IRQ
66 ---help--- 75 ---help---
67 76
68 Sparse irq numbering is useful for distro kernels that want 77 Sparse irq numbering is useful for distro kernels that want
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6ff84e6a954c..bdb180325551 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -54,14 +54,18 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
54static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action) 54static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
55{ 55{
56 /* 56 /*
57 * Wake up the handler thread for this action. In case the 57 * In case the thread crashed and was killed we just pretend that
58 * thread crashed and was killed we just pretend that we 58 * we handled the interrupt. The hardirq handler has disabled the
59 * handled the interrupt. The hardirq handler has disabled the 59 * device interrupt, so no irq storm is lurking.
60 * device interrupt, so no irq storm is lurking. If the 60 */
61 if (action->thread->flags & PF_EXITING)
62 return;
63
64 /*
65 * Wake up the handler thread for this action. If the
61 * RUNTHREAD bit is already set, nothing to do. 66 * RUNTHREAD bit is already set, nothing to do.
62 */ 67 */
63 if ((action->thread->flags & PF_EXITING) || 68 if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
64 test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
65 return; 69 return;
66 70
67 /* 71 /*
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 1f9e26526b69..0e0ba5f840b2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1,189 +1,780 @@
1#include <linux/debugfs.h>
2#include <linux/hardirq.h>
3#include <linux/interrupt.h>
1#include <linux/irq.h> 4#include <linux/irq.h>
5#include <linux/irqdesc.h>
2#include <linux/irqdomain.h> 6#include <linux/irqdomain.h>
3#include <linux/module.h> 7#include <linux/module.h>
4#include <linux/mutex.h> 8#include <linux/mutex.h>
5#include <linux/of.h> 9#include <linux/of.h>
6#include <linux/of_address.h> 10#include <linux/of_address.h>
11#include <linux/seq_file.h>
7#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp.h>
14#include <linux/fs.h>
15
16#define IRQ_DOMAIN_MAP_LEGACY 0 /* driver allocated fixed range of irqs.
17 * ie. legacy 8259, gets irqs 1..15 */
18#define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */
19#define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */
20#define IRQ_DOMAIN_MAP_TREE 3 /* radix tree */
8 21
9static LIST_HEAD(irq_domain_list); 22static LIST_HEAD(irq_domain_list);
10static DEFINE_MUTEX(irq_domain_mutex); 23static DEFINE_MUTEX(irq_domain_mutex);
11 24
25static DEFINE_MUTEX(revmap_trees_mutex);
26static struct irq_domain *irq_default_domain;
27
12/** 28/**
13 * irq_domain_add() - Register an irq_domain 29 * irq_domain_alloc() - Allocate a new irq_domain data structure
14 * @domain: ptr to initialized irq_domain structure 30 * @of_node: optional device-tree node of the interrupt controller
31 * @revmap_type: type of reverse mapping to use
32 * @ops: map/unmap domain callbacks
33 * @host_data: Controller private data pointer
15 * 34 *
16 * Registers an irq_domain structure. The irq_domain must at a minimum be 35 * Allocates and initialize and irq_domain structure. Caller is expected to
17 * initialized with an ops structure pointer, and either a ->to_irq hook or 36 * register allocated irq_domain with irq_domain_register(). Returns pointer
18 * a valid irq_base value. Everything else is optional. 37 * to IRQ domain, or NULL on failure.
19 */ 38 */
20void irq_domain_add(struct irq_domain *domain) 39static struct irq_domain *irq_domain_alloc(struct device_node *of_node,
40 unsigned int revmap_type,
41 const struct irq_domain_ops *ops,
42 void *host_data)
21{ 43{
22 struct irq_data *d; 44 struct irq_domain *domain;
23 int hwirq, irq;
24 45
25 /* 46 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
26 * This assumes that the irq_domain owner has already allocated 47 if (WARN_ON(!domain))
27 * the irq_descs. This block will be removed when support for dynamic 48 return NULL;
28 * allocation of irq_descs is added to irq_domain. 49
29 */ 50 /* Fill structure */
30 irq_domain_for_each_irq(domain, hwirq, irq) { 51 domain->revmap_type = revmap_type;
31 d = irq_get_irq_data(irq); 52 domain->ops = ops;
32 if (!d) { 53 domain->host_data = host_data;
33 WARN(1, "error: assigning domain to non existant irq_desc"); 54 domain->of_node = of_node_get(of_node);
34 return; 55
35 } 56 return domain;
36 if (d->domain) { 57}
37 /* things are broken; just report, don't clean up */ 58
38 WARN(1, "error: irq_desc already assigned to a domain"); 59static void irq_domain_add(struct irq_domain *domain)
39 return; 60{
61 mutex_lock(&irq_domain_mutex);
62 list_add(&domain->link, &irq_domain_list);
63 mutex_unlock(&irq_domain_mutex);
64 pr_debug("irq: Allocated domain of type %d @0x%p\n",
65 domain->revmap_type, domain);
66}
67
68static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain,
69 irq_hw_number_t hwirq)
70{
71 irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq;
72 int size = domain->revmap_data.legacy.size;
73
74 if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size))
75 return 0;
76 return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq;
77}
78
79/**
80 * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain.
81 * @of_node: pointer to interrupt controller's device tree node.
82 * @size: total number of irqs in legacy mapping
83 * @first_irq: first number of irq block assigned to the domain
84 * @first_hwirq: first hwirq number to use for the translation. Should normally
85 * be '0', but a positive integer can be used if the effective
86 * hwirqs numbering does not begin at zero.
87 * @ops: map/unmap domain callbacks
88 * @host_data: Controller private data pointer
89 *
90 * Note: the map() callback will be called before this function returns
91 * for all legacy interrupts except 0 (which is always the invalid irq for
92 * a legacy controller).
93 */
94struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
95 unsigned int size,
96 unsigned int first_irq,
97 irq_hw_number_t first_hwirq,
98 const struct irq_domain_ops *ops,
99 void *host_data)
100{
101 struct irq_domain *domain;
102 unsigned int i;
103
104 domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, host_data);
105 if (!domain)
106 return NULL;
107
108 domain->revmap_data.legacy.first_irq = first_irq;
109 domain->revmap_data.legacy.first_hwirq = first_hwirq;
110 domain->revmap_data.legacy.size = size;
111
112 mutex_lock(&irq_domain_mutex);
113 /* Verify that all the irqs are available */
114 for (i = 0; i < size; i++) {
115 int irq = first_irq + i;
116 struct irq_data *irq_data = irq_get_irq_data(irq);
117
118 if (WARN_ON(!irq_data || irq_data->domain)) {
119 mutex_unlock(&irq_domain_mutex);
120 of_node_put(domain->of_node);
121 kfree(domain);
122 return NULL;
40 } 123 }
41 d->domain = domain;
42 d->hwirq = hwirq;
43 } 124 }
44 125
45 mutex_lock(&irq_domain_mutex); 126 /* Claim all of the irqs before registering a legacy domain */
46 list_add(&domain->list, &irq_domain_list); 127 for (i = 0; i < size; i++) {
128 struct irq_data *irq_data = irq_get_irq_data(first_irq + i);
129 irq_data->hwirq = first_hwirq + i;
130 irq_data->domain = domain;
131 }
47 mutex_unlock(&irq_domain_mutex); 132 mutex_unlock(&irq_domain_mutex);
133
134 for (i = 0; i < size; i++) {
135 int irq = first_irq + i;
136 int hwirq = first_hwirq + i;
137
138 /* IRQ0 gets ignored */
139 if (!irq)
140 continue;
141
142 /* Legacy flags are left to default at this point,
143 * one can then use irq_create_mapping() to
144 * explicitly change them
145 */
146 ops->map(domain, irq, hwirq);
147
148 /* Clear norequest flags */
149 irq_clear_status_flags(irq, IRQ_NOREQUEST);
150 }
151
152 irq_domain_add(domain);
153 return domain;
154}
155
156/**
157 * irq_domain_add_linear() - Allocate and register a legacy revmap irq_domain.
158 * @of_node: pointer to interrupt controller's device tree node.
159 * @ops: map/unmap domain callbacks
160 * @host_data: Controller private data pointer
161 */
162struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
163 unsigned int size,
164 const struct irq_domain_ops *ops,
165 void *host_data)
166{
167 struct irq_domain *domain;
168 unsigned int *revmap;
169
170 revmap = kzalloc(sizeof(*revmap) * size, GFP_KERNEL);
171 if (WARN_ON(!revmap))
172 return NULL;
173
174 domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LINEAR, ops, host_data);
175 if (!domain) {
176 kfree(revmap);
177 return NULL;
178 }
179 domain->revmap_data.linear.size = size;
180 domain->revmap_data.linear.revmap = revmap;
181 irq_domain_add(domain);
182 return domain;
183}
184
185struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
186 unsigned int max_irq,
187 const struct irq_domain_ops *ops,
188 void *host_data)
189{
190 struct irq_domain *domain = irq_domain_alloc(of_node,
191 IRQ_DOMAIN_MAP_NOMAP, ops, host_data);
192 if (domain) {
193 domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0;
194 irq_domain_add(domain);
195 }
196 return domain;
197}
198
199/**
200 * irq_domain_add_tree()
201 * @of_node: pointer to interrupt controller's device tree node.
202 * @ops: map/unmap domain callbacks
203 *
204 * Note: The radix tree will be allocated later during boot automatically
205 * (the reverse mapping will use the slow path until that happens).
206 */
207struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
208 const struct irq_domain_ops *ops,
209 void *host_data)
210{
211 struct irq_domain *domain = irq_domain_alloc(of_node,
212 IRQ_DOMAIN_MAP_TREE, ops, host_data);
213 if (domain) {
214 INIT_RADIX_TREE(&domain->revmap_data.tree, GFP_KERNEL);
215 irq_domain_add(domain);
216 }
217 return domain;
48} 218}
49 219
50/** 220/**
51 * irq_domain_del() - Unregister an irq_domain 221 * irq_find_host() - Locates a domain for a given device node
52 * @domain: ptr to registered irq_domain. 222 * @node: device-tree node of the interrupt controller
53 */ 223 */
54void irq_domain_del(struct irq_domain *domain) 224struct irq_domain *irq_find_host(struct device_node *node)
55{ 225{
56 struct irq_data *d; 226 struct irq_domain *h, *found = NULL;
57 int hwirq, irq; 227 int rc;
58 228
229 /* We might want to match the legacy controller last since
230 * it might potentially be set to match all interrupts in
231 * the absence of a device node. This isn't a problem so far
232 * yet though...
233 */
59 mutex_lock(&irq_domain_mutex); 234 mutex_lock(&irq_domain_mutex);
60 list_del(&domain->list); 235 list_for_each_entry(h, &irq_domain_list, link) {
236 if (h->ops->match)
237 rc = h->ops->match(h, node);
238 else
239 rc = (h->of_node != NULL) && (h->of_node == node);
240
241 if (rc) {
242 found = h;
243 break;
244 }
245 }
61 mutex_unlock(&irq_domain_mutex); 246 mutex_unlock(&irq_domain_mutex);
247 return found;
248}
249EXPORT_SYMBOL_GPL(irq_find_host);
250
251/**
252 * irq_set_default_host() - Set a "default" irq domain
253 * @domain: default domain pointer
254 *
255 * For convenience, it's possible to set a "default" domain that will be used
256 * whenever NULL is passed to irq_create_mapping(). It makes life easier for
257 * platforms that want to manipulate a few hard coded interrupt numbers that
258 * aren't properly represented in the device-tree.
259 */
260void irq_set_default_host(struct irq_domain *domain)
261{
262 pr_debug("irq: Default domain set to @0x%p\n", domain);
263
264 irq_default_domain = domain;
265}
266
267static int irq_setup_virq(struct irq_domain *domain, unsigned int virq,
268 irq_hw_number_t hwirq)
269{
270 struct irq_data *irq_data = irq_get_irq_data(virq);
62 271
63 /* Clear the irq_domain assignments */ 272 irq_data->hwirq = hwirq;
64 irq_domain_for_each_irq(domain, hwirq, irq) { 273 irq_data->domain = domain;
65 d = irq_get_irq_data(irq); 274 if (domain->ops->map(domain, virq, hwirq)) {
66 d->domain = NULL; 275 pr_debug("irq: -> mapping failed, freeing\n");
276 irq_data->domain = NULL;
277 irq_data->hwirq = 0;
278 return -1;
67 } 279 }
280
281 irq_clear_status_flags(virq, IRQ_NOREQUEST);
282
283 return 0;
68} 284}
69 285
70#if defined(CONFIG_OF_IRQ)
71/** 286/**
72 * irq_create_of_mapping() - Map a linux irq number from a DT interrupt spec 287 * irq_create_direct_mapping() - Allocate an irq for direct mapping
288 * @domain: domain to allocate the irq for or NULL for default domain
73 * 289 *
74 * Used by the device tree interrupt mapping code to translate a device tree 290 * This routine is used for irq controllers which can choose the hardware
75 * interrupt specifier to a valid linux irq number. Returns either a valid 291 * interrupt numbers they generate. In such a case it's simplest to use
76 * linux IRQ number or 0. 292 * the linux irq as the hardware interrupt number.
293 */
294unsigned int irq_create_direct_mapping(struct irq_domain *domain)
295{
296 unsigned int virq;
297
298 if (domain == NULL)
299 domain = irq_default_domain;
300
301 BUG_ON(domain == NULL);
302 WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP);
303
304 virq = irq_alloc_desc_from(1, 0);
305 if (!virq) {
306 pr_debug("irq: create_direct virq allocation failed\n");
307 return 0;
308 }
309 if (virq >= domain->revmap_data.nomap.max_irq) {
310 pr_err("ERROR: no free irqs available below %i maximum\n",
311 domain->revmap_data.nomap.max_irq);
312 irq_free_desc(virq);
313 return 0;
314 }
315 pr_debug("irq: create_direct obtained virq %d\n", virq);
316
317 if (irq_setup_virq(domain, virq, virq)) {
318 irq_free_desc(virq);
319 return 0;
320 }
321
322 return virq;
323}
324
325/**
326 * irq_create_mapping() - Map a hardware interrupt into linux irq space
327 * @domain: domain owning this hardware interrupt or NULL for default domain
328 * @hwirq: hardware irq number in that domain space
77 * 329 *
78 * When the caller no longer need the irq number returned by this function it 330 * Only one mapping per hardware interrupt is permitted. Returns a linux
79 * should arrange to call irq_dispose_mapping(). 331 * irq number.
332 * If the sense/trigger is to be specified, set_irq_type() should be called
333 * on the number returned from that call.
80 */ 334 */
335unsigned int irq_create_mapping(struct irq_domain *domain,
336 irq_hw_number_t hwirq)
337{
338 unsigned int hint;
339 int virq;
340
341 pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
342
343 /* Look for default domain if nececssary */
344 if (domain == NULL)
345 domain = irq_default_domain;
346 if (domain == NULL) {
347 printk(KERN_WARNING "irq_create_mapping called for"
348 " NULL domain, hwirq=%lx\n", hwirq);
349 WARN_ON(1);
350 return 0;
351 }
352 pr_debug("irq: -> using domain @%p\n", domain);
353
354 /* Check if mapping already exists */
355 virq = irq_find_mapping(domain, hwirq);
356 if (virq) {
357 pr_debug("irq: -> existing mapping on virq %d\n", virq);
358 return virq;
359 }
360
361 /* Get a virtual interrupt number */
362 if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY)
363 return irq_domain_legacy_revmap(domain, hwirq);
364
365 /* Allocate a virtual interrupt number */
366 hint = hwirq % nr_irqs;
367 if (hint == 0)
368 hint++;
369 virq = irq_alloc_desc_from(hint, 0);
370 if (virq <= 0)
371 virq = irq_alloc_desc_from(1, 0);
372 if (virq <= 0) {
373 pr_debug("irq: -> virq allocation failed\n");
374 return 0;
375 }
376
377 if (irq_setup_virq(domain, virq, hwirq)) {
378 if (domain->revmap_type != IRQ_DOMAIN_MAP_LEGACY)
379 irq_free_desc(virq);
380 return 0;
381 }
382
383 pr_debug("irq: irq %lu on domain %s mapped to virtual irq %u\n",
384 hwirq, domain->of_node ? domain->of_node->full_name : "null", virq);
385
386 return virq;
387}
388EXPORT_SYMBOL_GPL(irq_create_mapping);
389
81unsigned int irq_create_of_mapping(struct device_node *controller, 390unsigned int irq_create_of_mapping(struct device_node *controller,
82 const u32 *intspec, unsigned int intsize) 391 const u32 *intspec, unsigned int intsize)
83{ 392{
84 struct irq_domain *domain; 393 struct irq_domain *domain;
85 unsigned long hwirq; 394 irq_hw_number_t hwirq;
86 unsigned int irq, type; 395 unsigned int type = IRQ_TYPE_NONE;
87 int rc = -EINVAL; 396 unsigned int virq;
88 397
89 /* Find a domain which can translate the irq spec */ 398 domain = controller ? irq_find_host(controller) : irq_default_domain;
90 mutex_lock(&irq_domain_mutex); 399 if (!domain) {
91 list_for_each_entry(domain, &irq_domain_list, list) { 400#ifdef CONFIG_MIPS
92 if (!domain->ops->dt_translate) 401 /*
93 continue; 402 * Workaround to avoid breaking interrupt controller drivers
94 rc = domain->ops->dt_translate(domain, controller, 403 * that don't yet register an irq_domain. This is temporary
95 intspec, intsize, &hwirq, &type); 404 * code. ~~~gcl, Feb 24, 2012
96 if (rc == 0) 405 *
97 break; 406 * Scheduled for removal in Linux v3.6. That should be enough
407 * time.
408 */
409 if (intsize > 0)
410 return intspec[0];
411#endif
412 printk(KERN_WARNING "irq: no irq domain found for %s !\n",
413 controller->full_name);
414 return 0;
98 } 415 }
99 mutex_unlock(&irq_domain_mutex);
100 416
101 if (rc != 0) 417 /* If domain has no translation, then we assume interrupt line */
102 return 0; 418 if (domain->ops->xlate == NULL)
419 hwirq = intspec[0];
420 else {
421 if (domain->ops->xlate(domain, controller, intspec, intsize,
422 &hwirq, &type))
423 return 0;
424 }
425
426 /* Create mapping */
427 virq = irq_create_mapping(domain, hwirq);
428 if (!virq)
429 return virq;
103 430
104 irq = irq_domain_to_irq(domain, hwirq); 431 /* Set type if specified and different than the current one */
105 if (type != IRQ_TYPE_NONE) 432 if (type != IRQ_TYPE_NONE &&
106 irq_set_irq_type(irq, type); 433 type != (irqd_get_trigger_type(irq_get_irq_data(virq))))
107 pr_debug("%s: mapped hwirq=%i to irq=%i, flags=%x\n", 434 irq_set_irq_type(virq, type);
108 controller->full_name, (int)hwirq, irq, type); 435 return virq;
109 return irq;
110} 436}
111EXPORT_SYMBOL_GPL(irq_create_of_mapping); 437EXPORT_SYMBOL_GPL(irq_create_of_mapping);
112 438
113/** 439/**
114 * irq_dispose_mapping() - Discard a mapping created by irq_create_of_mapping() 440 * irq_dispose_mapping() - Unmap an interrupt
115 * @irq: linux irq number to be discarded 441 * @virq: linux irq number of the interrupt to unmap
442 */
443void irq_dispose_mapping(unsigned int virq)
444{
445 struct irq_data *irq_data = irq_get_irq_data(virq);
446 struct irq_domain *domain;
447 irq_hw_number_t hwirq;
448
449 if (!virq || !irq_data)
450 return;
451
452 domain = irq_data->domain;
453 if (WARN_ON(domain == NULL))
454 return;
455
456 /* Never unmap legacy interrupts */
457 if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY)
458 return;
459
460 irq_set_status_flags(virq, IRQ_NOREQUEST);
461
462 /* remove chip and handler */
463 irq_set_chip_and_handler(virq, NULL, NULL);
464
465 /* Make sure it's completed */
466 synchronize_irq(virq);
467
468 /* Tell the PIC about it */
469 if (domain->ops->unmap)
470 domain->ops->unmap(domain, virq);
471 smp_mb();
472
473 /* Clear reverse map */
474 hwirq = irq_data->hwirq;
475 switch(domain->revmap_type) {
476 case IRQ_DOMAIN_MAP_LINEAR:
477 if (hwirq < domain->revmap_data.linear.size)
478 domain->revmap_data.linear.revmap[hwirq] = 0;
479 break;
480 case IRQ_DOMAIN_MAP_TREE:
481 mutex_lock(&revmap_trees_mutex);
482 radix_tree_delete(&domain->revmap_data.tree, hwirq);
483 mutex_unlock(&revmap_trees_mutex);
484 break;
485 }
486
487 irq_free_desc(virq);
488}
489EXPORT_SYMBOL_GPL(irq_dispose_mapping);
490
491/**
492 * irq_find_mapping() - Find a linux irq from an hw irq number.
493 * @domain: domain owning this hardware interrupt
494 * @hwirq: hardware irq number in that domain space
495 *
496 * This is a slow path, for use by generic code. It's expected that an
497 * irq controller implementation directly calls the appropriate low level
498 * mapping function.
499 */
500unsigned int irq_find_mapping(struct irq_domain *domain,
501 irq_hw_number_t hwirq)
502{
503 unsigned int i;
504 unsigned int hint = hwirq % nr_irqs;
505
506 /* Look for default domain if nececssary */
507 if (domain == NULL)
508 domain = irq_default_domain;
509 if (domain == NULL)
510 return 0;
511
512 /* legacy -> bail early */
513 if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY)
514 return irq_domain_legacy_revmap(domain, hwirq);
515
516 /* Slow path does a linear search of the map */
517 if (hint == 0)
518 hint = 1;
519 i = hint;
520 do {
521 struct irq_data *data = irq_get_irq_data(i);
522 if (data && (data->domain == domain) && (data->hwirq == hwirq))
523 return i;
524 i++;
525 if (i >= nr_irqs)
526 i = 1;
527 } while(i != hint);
528 return 0;
529}
530EXPORT_SYMBOL_GPL(irq_find_mapping);
531
532/**
533 * irq_radix_revmap_lookup() - Find a linux irq from a hw irq number.
534 * @domain: domain owning this hardware interrupt
535 * @hwirq: hardware irq number in that domain space
116 * 536 *
117 * Calling this function indicates the caller no longer needs a reference to 537 * This is a fast path, for use by irq controller code that uses radix tree
118 * the linux irq number returned by a prior call to irq_create_of_mapping(). 538 * revmaps
119 */ 539 */
120void irq_dispose_mapping(unsigned int irq) 540unsigned int irq_radix_revmap_lookup(struct irq_domain *domain,
541 irq_hw_number_t hwirq)
121{ 542{
543 struct irq_data *irq_data;
544
545 if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_TREE))
546 return irq_find_mapping(domain, hwirq);
547
548 /*
549 * Freeing an irq can delete nodes along the path to
550 * do the lookup via call_rcu.
551 */
552 rcu_read_lock();
553 irq_data = radix_tree_lookup(&domain->revmap_data.tree, hwirq);
554 rcu_read_unlock();
555
122 /* 556 /*
123 * nothing yet; will be filled when support for dynamic allocation of 557 * If found in radix tree, then fine.
124 * irq_descs is added to irq_domain 558 * Else fallback to linear lookup - this should not happen in practice
559 * as it means that we failed to insert the node in the radix tree.
125 */ 560 */
561 return irq_data ? irq_data->irq : irq_find_mapping(domain, hwirq);
126} 562}
127EXPORT_SYMBOL_GPL(irq_dispose_mapping);
128 563
129int irq_domain_simple_dt_translate(struct irq_domain *d, 564/**
130 struct device_node *controller, 565 * irq_radix_revmap_insert() - Insert a hw irq to linux irq number mapping.
131 const u32 *intspec, unsigned int intsize, 566 * @domain: domain owning this hardware interrupt
132 unsigned long *out_hwirq, unsigned int *out_type) 567 * @virq: linux irq number
568 * @hwirq: hardware irq number in that domain space
569 *
570 * This is for use by irq controllers that use a radix tree reverse
571 * mapping for fast lookup.
572 */
573void irq_radix_revmap_insert(struct irq_domain *domain, unsigned int virq,
574 irq_hw_number_t hwirq)
133{ 575{
134 if (d->of_node != controller) 576 struct irq_data *irq_data = irq_get_irq_data(virq);
135 return -EINVAL; 577
136 if (intsize < 1) 578 if (WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_TREE))
137 return -EINVAL; 579 return;
138 if (d->nr_irq && ((intspec[0] < d->hwirq_base) || 580
139 (intspec[0] >= d->hwirq_base + d->nr_irq))) 581 if (virq) {
140 return -EINVAL; 582 mutex_lock(&revmap_trees_mutex);
583 radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data);
584 mutex_unlock(&revmap_trees_mutex);
585 }
586}
587
588/**
589 * irq_linear_revmap() - Find a linux irq from a hw irq number.
590 * @domain: domain owning this hardware interrupt
591 * @hwirq: hardware irq number in that domain space
592 *
593 * This is a fast path, for use by irq controller code that uses linear
594 * revmaps. It does fallback to the slow path if the revmap doesn't exist
595 * yet and will create the revmap entry with appropriate locking
596 */
597unsigned int irq_linear_revmap(struct irq_domain *domain,
598 irq_hw_number_t hwirq)
599{
600 unsigned int *revmap;
601
602 if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR))
603 return irq_find_mapping(domain, hwirq);
604
605 /* Check revmap bounds */
606 if (unlikely(hwirq >= domain->revmap_data.linear.size))
607 return irq_find_mapping(domain, hwirq);
608
609 /* Check if revmap was allocated */
610 revmap = domain->revmap_data.linear.revmap;
611 if (unlikely(revmap == NULL))
612 return irq_find_mapping(domain, hwirq);
613
614 /* Fill up revmap with slow path if no mapping found */
615 if (unlikely(!revmap[hwirq]))
616 revmap[hwirq] = irq_find_mapping(domain, hwirq);
617
618 return revmap[hwirq];
619}
620
621#ifdef CONFIG_IRQ_DOMAIN_DEBUG
622static int virq_debug_show(struct seq_file *m, void *private)
623{
624 unsigned long flags;
625 struct irq_desc *desc;
626 const char *p;
627 static const char none[] = "none";
628 void *data;
629 int i;
630
631 seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq",
632 "chip name", (int)(2 * sizeof(void *) + 2), "chip data",
633 "domain name");
141 634
635 for (i = 1; i < nr_irqs; i++) {
636 desc = irq_to_desc(i);
637 if (!desc)
638 continue;
639
640 raw_spin_lock_irqsave(&desc->lock, flags);
641
642 if (desc->action && desc->action->handler) {
643 struct irq_chip *chip;
644
645 seq_printf(m, "%5d ", i);
646 seq_printf(m, "0x%05lx ", desc->irq_data.hwirq);
647
648 chip = irq_desc_get_chip(desc);
649 if (chip && chip->name)
650 p = chip->name;
651 else
652 p = none;
653 seq_printf(m, "%-15s ", p);
654
655 data = irq_desc_get_chip_data(desc);
656 seq_printf(m, data ? "0x%p " : " %p ", data);
657
658 if (desc->irq_data.domain && desc->irq_data.domain->of_node)
659 p = desc->irq_data.domain->of_node->full_name;
660 else
661 p = none;
662 seq_printf(m, "%s\n", p);
663 }
664
665 raw_spin_unlock_irqrestore(&desc->lock, flags);
666 }
667
668 return 0;
669}
670
671static int virq_debug_open(struct inode *inode, struct file *file)
672{
673 return single_open(file, virq_debug_show, inode->i_private);
674}
675
676static const struct file_operations virq_debug_fops = {
677 .open = virq_debug_open,
678 .read = seq_read,
679 .llseek = seq_lseek,
680 .release = single_release,
681};
682
683static int __init irq_debugfs_init(void)
684{
685 if (debugfs_create_file("irq_domain_mapping", S_IRUGO, NULL,
686 NULL, &virq_debug_fops) == NULL)
687 return -ENOMEM;
688
689 return 0;
690}
691__initcall(irq_debugfs_init);
692#endif /* CONFIG_IRQ_DOMAIN_DEBUG */
693
694int irq_domain_simple_map(struct irq_domain *d, unsigned int irq,
695 irq_hw_number_t hwirq)
696{
697 return 0;
698}
699
700/**
701 * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings
702 *
703 * Device Tree IRQ specifier translation function which works with one cell
704 * bindings where the cell value maps directly to the hwirq number.
705 */
706int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr,
707 const u32 *intspec, unsigned int intsize,
708 unsigned long *out_hwirq, unsigned int *out_type)
709{
710 if (WARN_ON(intsize < 1))
711 return -EINVAL;
142 *out_hwirq = intspec[0]; 712 *out_hwirq = intspec[0];
143 *out_type = IRQ_TYPE_NONE; 713 *out_type = IRQ_TYPE_NONE;
144 if (intsize > 1)
145 *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
146 return 0; 714 return 0;
147} 715}
716EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell);
148 717
149/** 718/**
150 * irq_domain_create_simple() - Set up a 'simple' translation range 719 * irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings
720 *
721 * Device Tree IRQ specifier translation function which works with two cell
722 * bindings where the cell values map directly to the hwirq number
723 * and linux irq flags.
151 */ 724 */
152void irq_domain_add_simple(struct device_node *controller, int irq_base) 725int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
726 const u32 *intspec, unsigned int intsize,
727 irq_hw_number_t *out_hwirq, unsigned int *out_type)
153{ 728{
154 struct irq_domain *domain; 729 if (WARN_ON(intsize < 2))
155 730 return -EINVAL;
156 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 731 *out_hwirq = intspec[0];
157 if (!domain) { 732 *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
158 WARN_ON(1); 733 return 0;
159 return; 734}
160 } 735EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell);
161 736
162 domain->irq_base = irq_base; 737/**
163 domain->of_node = of_node_get(controller); 738 * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings
164 domain->ops = &irq_domain_simple_ops; 739 *
165 irq_domain_add(domain); 740 * Device Tree IRQ specifier translation function which works with either one
741 * or two cell bindings where the cell values map directly to the hwirq number
742 * and linux irq flags.
743 *
744 * Note: don't use this function unless your interrupt controller explicitly
745 * supports both one and two cell bindings. For the majority of controllers
746 * the _onecell() or _twocell() variants above should be used.
747 */
748int irq_domain_xlate_onetwocell(struct irq_domain *d,
749 struct device_node *ctrlr,
750 const u32 *intspec, unsigned int intsize,
751 unsigned long *out_hwirq, unsigned int *out_type)
752{
753 if (WARN_ON(intsize < 1))
754 return -EINVAL;
755 *out_hwirq = intspec[0];
756 *out_type = (intsize > 1) ? intspec[1] : IRQ_TYPE_NONE;
757 return 0;
166} 758}
167EXPORT_SYMBOL_GPL(irq_domain_add_simple); 759EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell);
168 760
761const struct irq_domain_ops irq_domain_simple_ops = {
762 .map = irq_domain_simple_map,
763 .xlate = irq_domain_xlate_onetwocell,
764};
765EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
766
767#ifdef CONFIG_OF_IRQ
169void irq_domain_generate_simple(const struct of_device_id *match, 768void irq_domain_generate_simple(const struct of_device_id *match,
170 u64 phys_base, unsigned int irq_start) 769 u64 phys_base, unsigned int irq_start)
171{ 770{
172 struct device_node *node; 771 struct device_node *node;
173 pr_info("looking for phys_base=%llx, irq_start=%i\n", 772 pr_debug("looking for phys_base=%llx, irq_start=%i\n",
174 (unsigned long long) phys_base, (int) irq_start); 773 (unsigned long long) phys_base, (int) irq_start);
175 node = of_find_matching_node_by_address(NULL, match, phys_base); 774 node = of_find_matching_node_by_address(NULL, match, phys_base);
176 if (node) 775 if (node)
177 irq_domain_add_simple(node, irq_start); 776 irq_domain_add_legacy(node, 32, irq_start, 0,
178 else 777 &irq_domain_simple_ops, NULL);
179 pr_info("no node found\n");
180} 778}
181EXPORT_SYMBOL_GPL(irq_domain_generate_simple); 779EXPORT_SYMBOL_GPL(irq_domain_generate_simple);
182#endif /* CONFIG_OF_IRQ */ 780#endif
183
184struct irq_domain_ops irq_domain_simple_ops = {
185#ifdef CONFIG_OF_IRQ
186 .dt_translate = irq_domain_simple_dt_translate,
187#endif /* CONFIG_OF_IRQ */
188};
189EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index b0ccd1ac2d6a..89a3ea82569b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -282,7 +282,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
282{ 282{
283 struct irq_chip *chip = irq_desc_get_chip(desc); 283 struct irq_chip *chip = irq_desc_get_chip(desc);
284 struct cpumask *set = irq_default_affinity; 284 struct cpumask *set = irq_default_affinity;
285 int ret; 285 int ret, node = desc->irq_data.node;
286 286
287 /* Excludes PER_CPU and NO_BALANCE interrupts */ 287 /* Excludes PER_CPU and NO_BALANCE interrupts */
288 if (!irq_can_set_affinity(irq)) 288 if (!irq_can_set_affinity(irq))
@@ -301,6 +301,13 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
301 } 301 }
302 302
303 cpumask_and(mask, cpu_online_mask, set); 303 cpumask_and(mask, cpu_online_mask, set);
304 if (node != NUMA_NO_NODE) {
305 const struct cpumask *nodemask = cpumask_of_node(node);
306
307 /* make sure at least one of the cpus in nodemask is online */
308 if (cpumask_intersects(mask, nodemask))
309 cpumask_and(mask, mask, nodemask);
310 }
304 ret = chip->irq_set_affinity(&desc->irq_data, mask, false); 311 ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
305 switch (ret) { 312 switch (ret) {
306 case IRQ_SET_MASK_OK: 313 case IRQ_SET_MASK_OK:
@@ -645,7 +652,7 @@ static int irq_wait_for_interrupt(struct irqaction *action)
645 * is marked MASKED. 652 * is marked MASKED.
646 */ 653 */
647static void irq_finalize_oneshot(struct irq_desc *desc, 654static void irq_finalize_oneshot(struct irq_desc *desc,
648 struct irqaction *action, bool force) 655 struct irqaction *action)
649{ 656{
650 if (!(desc->istate & IRQS_ONESHOT)) 657 if (!(desc->istate & IRQS_ONESHOT))
651 return; 658 return;
@@ -679,7 +686,7 @@ again:
679 * we would clear the threads_oneshot bit of this thread which 686 * we would clear the threads_oneshot bit of this thread which
680 * was just set. 687 * was just set.
681 */ 688 */
682 if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) 689 if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
683 goto out_unlock; 690 goto out_unlock;
684 691
685 desc->threads_oneshot &= ~action->thread_mask; 692 desc->threads_oneshot &= ~action->thread_mask;
@@ -739,7 +746,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
739 746
740 local_bh_disable(); 747 local_bh_disable();
741 ret = action->thread_fn(action->irq, action->dev_id); 748 ret = action->thread_fn(action->irq, action->dev_id);
742 irq_finalize_oneshot(desc, action, false); 749 irq_finalize_oneshot(desc, action);
743 local_bh_enable(); 750 local_bh_enable();
744 return ret; 751 return ret;
745} 752}
@@ -755,7 +762,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,
755 irqreturn_t ret; 762 irqreturn_t ret;
756 763
757 ret = action->thread_fn(action->irq, action->dev_id); 764 ret = action->thread_fn(action->irq, action->dev_id);
758 irq_finalize_oneshot(desc, action, false); 765 irq_finalize_oneshot(desc, action);
759 return ret; 766 return ret;
760} 767}
761 768
@@ -844,7 +851,7 @@ void exit_irq_thread(void)
844 wake_threads_waitq(desc); 851 wake_threads_waitq(desc);
845 852
846 /* Prevent a stale desc->threads_oneshot */ 853 /* Prevent a stale desc->threads_oneshot */
847 irq_finalize_oneshot(desc, action, true); 854 irq_finalize_oneshot(desc, action);
848} 855}
849 856
850static void irq_setup_forced_threading(struct irqaction *new) 857static void irq_setup_forced_threading(struct irqaction *new)
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 47420908fba0..c3c89751b327 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -43,12 +43,16 @@ void irq_move_masked_irq(struct irq_data *idata)
43 * masking the irqs. 43 * masking the irqs.
44 */ 44 */
45 if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) 45 if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
46 < nr_cpu_ids)) 46 < nr_cpu_ids)) {
47 if (!chip->irq_set_affinity(&desc->irq_data, 47 int ret = chip->irq_set_affinity(&desc->irq_data,
48 desc->pending_mask, false)) { 48 desc->pending_mask, false);
49 switch (ret) {
50 case IRQ_SET_MASK_OK:
49 cpumask_copy(desc->irq_data.affinity, desc->pending_mask); 51 cpumask_copy(desc->irq_data.affinity, desc->pending_mask);
52 case IRQ_SET_MASK_OK_NOCOPY:
50 irq_set_thread_affinity(desc); 53 irq_set_thread_affinity(desc);
51 } 54 }
55 }
52 56
53 cpumask_clear(desc->pending_mask); 57 cpumask_clear(desc->pending_mask);
54} 58}
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index c3c46c72046e..1588e3b2871b 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -5,11 +5,13 @@
5 * context. The enqueueing is NMI-safe. 5 * context. The enqueueing is NMI-safe.
6 */ 6 */
7 7
8#include <linux/bug.h>
8#include <linux/kernel.h> 9#include <linux/kernel.h>
9#include <linux/export.h> 10#include <linux/export.h>
10#include <linux/irq_work.h> 11#include <linux/irq_work.h>
11#include <linux/percpu.h> 12#include <linux/percpu.h>
12#include <linux/hardirq.h> 13#include <linux/hardirq.h>
14#include <linux/irqflags.h>
13#include <asm/processor.h> 15#include <asm/processor.h>
14 16
15/* 17/*
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 22000c3db0dd..8d262b467573 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -284,8 +284,12 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
284 if (value) { 284 if (value) {
285 if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) 285 if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
286 return -EFAULT; 286 return -EFAULT;
287 } else 287 } else {
288 memset((char *) &set_buffer, 0, sizeof(set_buffer)); 288 memset(&set_buffer, 0, sizeof(set_buffer));
289 printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer."
290 " Misfeature support will be removed\n",
291 current->comm);
292 }
289 293
290 error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL); 294 error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
291 if (error || !ovalue) 295 if (error || !ovalue)
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 7b0886786701..4e2e472f6aeb 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -37,7 +37,6 @@
37#include <asm/page.h> 37#include <asm/page.h>
38#include <asm/uaccess.h> 38#include <asm/uaccess.h>
39#include <asm/io.h> 39#include <asm/io.h>
40#include <asm/system.h>
41#include <asm/sections.h> 40#include <asm/sections.h>
42 41
43/* Per cpu memory for storing cpu states in case of system crash. */ 42/* Per cpu memory for storing cpu states in case of system crash. */
@@ -1359,6 +1358,10 @@ static int __init parse_crashkernel_simple(char *cmdline,
1359 1358
1360 if (*cur == '@') 1359 if (*cur == '@')
1361 *crash_base = memparse(cur+1, &cur); 1360 *crash_base = memparse(cur+1, &cur);
1361 else if (*cur != ' ' && *cur != '\0') {
1362 pr_warning("crashkernel: unrecognized char\n");
1363 return -EINVAL;
1364 }
1362 1365
1363 return 0; 1366 return 0;
1364} 1367}
@@ -1462,7 +1465,9 @@ static int __init crash_save_vmcoreinfo_init(void)
1462 1465
1463 VMCOREINFO_SYMBOL(init_uts_ns); 1466 VMCOREINFO_SYMBOL(init_uts_ns);
1464 VMCOREINFO_SYMBOL(node_online_map); 1467 VMCOREINFO_SYMBOL(node_online_map);
1468#ifdef CONFIG_MMU
1465 VMCOREINFO_SYMBOL(swapper_pg_dir); 1469 VMCOREINFO_SYMBOL(swapper_pg_dir);
1470#endif
1466 VMCOREINFO_SYMBOL(_stext); 1471 VMCOREINFO_SYMBOL(_stext);
1467 VMCOREINFO_SYMBOL(vmlist); 1472 VMCOREINFO_SYMBOL(vmlist);
1468 1473
@@ -1546,13 +1551,13 @@ int kernel_kexec(void)
1546 if (error) 1551 if (error)
1547 goto Resume_console; 1552 goto Resume_console;
1548 /* At this point, dpm_suspend_start() has been called, 1553 /* At this point, dpm_suspend_start() has been called,
1549 * but *not* dpm_suspend_noirq(). We *must* call 1554 * but *not* dpm_suspend_end(). We *must* call
1550 * dpm_suspend_noirq() now. Otherwise, drivers for 1555 * dpm_suspend_end() now. Otherwise, drivers for
1551 * some devices (e.g. interrupt controllers) become 1556 * some devices (e.g. interrupt controllers) become
1552 * desynchronized with the actual state of the 1557 * desynchronized with the actual state of the
1553 * hardware at resume time, and evil weirdness ensues. 1558 * hardware at resume time, and evil weirdness ensues.
1554 */ 1559 */
1555 error = dpm_suspend_noirq(PMSG_FREEZE); 1560 error = dpm_suspend_end(PMSG_FREEZE);
1556 if (error) 1561 if (error)
1557 goto Resume_devices; 1562 goto Resume_devices;
1558 error = disable_nonboot_cpus(); 1563 error = disable_nonboot_cpus();
@@ -1579,7 +1584,7 @@ int kernel_kexec(void)
1579 local_irq_enable(); 1584 local_irq_enable();
1580 Enable_cpus: 1585 Enable_cpus:
1581 enable_nonboot_cpus(); 1586 enable_nonboot_cpus();
1582 dpm_resume_noirq(PMSG_RESTORE); 1587 dpm_resume_start(PMSG_RESTORE);
1583 Resume_devices: 1588 Resume_devices:
1584 dpm_resume_end(PMSG_RESTORE); 1589 dpm_resume_end(PMSG_RESTORE);
1585 Resume_console: 1590 Resume_console:
diff --git a/kernel/kmod.c b/kernel/kmod.c
index a0a88543934e..05698a7415fe 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -60,6 +60,43 @@ static DECLARE_RWSEM(umhelper_sem);
60*/ 60*/
61char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; 61char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
62 62
63static void free_modprobe_argv(struct subprocess_info *info)
64{
65 kfree(info->argv[3]); /* check call_modprobe() */
66 kfree(info->argv);
67}
68
69static int call_modprobe(char *module_name, int wait)
70{
71 static char *envp[] = {
72 "HOME=/",
73 "TERM=linux",
74 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
75 NULL
76 };
77
78 char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
79 if (!argv)
80 goto out;
81
82 module_name = kstrdup(module_name, GFP_KERNEL);
83 if (!module_name)
84 goto free_argv;
85
86 argv[0] = modprobe_path;
87 argv[1] = "-q";
88 argv[2] = "--";
89 argv[3] = module_name; /* check free_modprobe_argv() */
90 argv[4] = NULL;
91
92 return call_usermodehelper_fns(modprobe_path, argv, envp,
93 wait | UMH_KILLABLE, NULL, free_modprobe_argv, NULL);
94free_argv:
95 kfree(argv);
96out:
97 return -ENOMEM;
98}
99
63/** 100/**
64 * __request_module - try to load a kernel module 101 * __request_module - try to load a kernel module
65 * @wait: wait (or not) for the operation to complete 102 * @wait: wait (or not) for the operation to complete
@@ -81,11 +118,6 @@ int __request_module(bool wait, const char *fmt, ...)
81 char module_name[MODULE_NAME_LEN]; 118 char module_name[MODULE_NAME_LEN];
82 unsigned int max_modprobes; 119 unsigned int max_modprobes;
83 int ret; 120 int ret;
84 char *argv[] = { modprobe_path, "-q", "--", module_name, NULL };
85 static char *envp[] = { "HOME=/",
86 "TERM=linux",
87 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
88 NULL };
89 static atomic_t kmod_concurrent = ATOMIC_INIT(0); 121 static atomic_t kmod_concurrent = ATOMIC_INIT(0);
90#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ 122#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
91 static int kmod_loop_msg; 123 static int kmod_loop_msg;
@@ -128,9 +160,7 @@ int __request_module(bool wait, const char *fmt, ...)
128 160
129 trace_module_request(module_name, wait, _RET_IP_); 161 trace_module_request(module_name, wait, _RET_IP_);
130 162
131 ret = call_usermodehelper_fns(modprobe_path, argv, envp, 163 ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
132 wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC,
133 NULL, NULL, NULL);
134 164
135 atomic_dec(&kmod_concurrent); 165 atomic_dec(&kmod_concurrent);
136 return ret; 166 return ret;
@@ -188,7 +218,7 @@ static int ____call_usermodehelper(void *data)
188 /* Exec failed? */ 218 /* Exec failed? */
189fail: 219fail:
190 sub_info->retval = retval; 220 sub_info->retval = retval;
191 do_exit(0); 221 return 0;
192} 222}
193 223
194void call_usermodehelper_freeinfo(struct subprocess_info *info) 224void call_usermodehelper_freeinfo(struct subprocess_info *info)
@@ -199,6 +229,19 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info)
199} 229}
200EXPORT_SYMBOL(call_usermodehelper_freeinfo); 230EXPORT_SYMBOL(call_usermodehelper_freeinfo);
201 231
232static void umh_complete(struct subprocess_info *sub_info)
233{
234 struct completion *comp = xchg(&sub_info->complete, NULL);
235 /*
236 * See call_usermodehelper_exec(). If xchg() returns NULL
237 * we own sub_info, the UMH_KILLABLE caller has gone away.
238 */
239 if (comp)
240 complete(comp);
241 else
242 call_usermodehelper_freeinfo(sub_info);
243}
244
202/* Keventd can't block, but this (a child) can. */ 245/* Keventd can't block, but this (a child) can. */
203static int wait_for_helper(void *data) 246static int wait_for_helper(void *data)
204{ 247{
@@ -235,7 +278,7 @@ static int wait_for_helper(void *data)
235 sub_info->retval = ret; 278 sub_info->retval = ret;
236 } 279 }
237 280
238 complete(sub_info->complete); 281 umh_complete(sub_info);
239 return 0; 282 return 0;
240} 283}
241 284
@@ -244,7 +287,7 @@ static void __call_usermodehelper(struct work_struct *work)
244{ 287{
245 struct subprocess_info *sub_info = 288 struct subprocess_info *sub_info =
246 container_of(work, struct subprocess_info, work); 289 container_of(work, struct subprocess_info, work);
247 enum umh_wait wait = sub_info->wait; 290 int wait = sub_info->wait & ~UMH_KILLABLE;
248 pid_t pid; 291 pid_t pid;
249 292
250 /* CLONE_VFORK: wait until the usermode helper has execve'd 293 /* CLONE_VFORK: wait until the usermode helper has execve'd
@@ -269,7 +312,7 @@ static void __call_usermodehelper(struct work_struct *work)
269 case UMH_WAIT_EXEC: 312 case UMH_WAIT_EXEC:
270 if (pid < 0) 313 if (pid < 0)
271 sub_info->retval = pid; 314 sub_info->retval = pid;
272 complete(sub_info->complete); 315 umh_complete(sub_info);
273 } 316 }
274} 317}
275 318
@@ -279,7 +322,7 @@ static void __call_usermodehelper(struct work_struct *work)
279 * land has been frozen during a system-wide hibernation or suspend operation). 322 * land has been frozen during a system-wide hibernation or suspend operation).
280 * Should always be manipulated under umhelper_sem acquired for write. 323 * Should always be manipulated under umhelper_sem acquired for write.
281 */ 324 */
282static int usermodehelper_disabled = 1; 325static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED;
283 326
284/* Number of helpers running */ 327/* Number of helpers running */
285static atomic_t running_helpers = ATOMIC_INIT(0); 328static atomic_t running_helpers = ATOMIC_INIT(0);
@@ -291,32 +334,110 @@ static atomic_t running_helpers = ATOMIC_INIT(0);
291static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); 334static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
292 335
293/* 336/*
337 * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled
338 * to become 'false'.
339 */
340static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq);
341
342/*
294 * Time to wait for running_helpers to become zero before the setting of 343 * Time to wait for running_helpers to become zero before the setting of
295 * usermodehelper_disabled in usermodehelper_disable() fails 344 * usermodehelper_disabled in usermodehelper_disable() fails
296 */ 345 */
297#define RUNNING_HELPERS_TIMEOUT (5 * HZ) 346#define RUNNING_HELPERS_TIMEOUT (5 * HZ)
298 347
299void read_lock_usermodehelper(void) 348int usermodehelper_read_trylock(void)
349{
350 DEFINE_WAIT(wait);
351 int ret = 0;
352
353 down_read(&umhelper_sem);
354 for (;;) {
355 prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
356 TASK_INTERRUPTIBLE);
357 if (!usermodehelper_disabled)
358 break;
359
360 if (usermodehelper_disabled == UMH_DISABLED)
361 ret = -EAGAIN;
362
363 up_read(&umhelper_sem);
364
365 if (ret)
366 break;
367
368 schedule();
369 try_to_freeze();
370
371 down_read(&umhelper_sem);
372 }
373 finish_wait(&usermodehelper_disabled_waitq, &wait);
374 return ret;
375}
376EXPORT_SYMBOL_GPL(usermodehelper_read_trylock);
377
378long usermodehelper_read_lock_wait(long timeout)
300{ 379{
380 DEFINE_WAIT(wait);
381
382 if (timeout < 0)
383 return -EINVAL;
384
301 down_read(&umhelper_sem); 385 down_read(&umhelper_sem);
386 for (;;) {
387 prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
388 TASK_UNINTERRUPTIBLE);
389 if (!usermodehelper_disabled)
390 break;
391
392 up_read(&umhelper_sem);
393
394 timeout = schedule_timeout(timeout);
395 if (!timeout)
396 break;
397
398 down_read(&umhelper_sem);
399 }
400 finish_wait(&usermodehelper_disabled_waitq, &wait);
401 return timeout;
302} 402}
303EXPORT_SYMBOL_GPL(read_lock_usermodehelper); 403EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait);
304 404
305void read_unlock_usermodehelper(void) 405void usermodehelper_read_unlock(void)
306{ 406{
307 up_read(&umhelper_sem); 407 up_read(&umhelper_sem);
308} 408}
309EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); 409EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
310 410
311/** 411/**
312 * usermodehelper_disable - prevent new helpers from being started 412 * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled.
413 * depth: New value to assign to usermodehelper_disabled.
414 *
415 * Change the value of usermodehelper_disabled (under umhelper_sem locked for
416 * writing) and wakeup tasks waiting for it to change.
313 */ 417 */
314int usermodehelper_disable(void) 418void __usermodehelper_set_disable_depth(enum umh_disable_depth depth)
419{
420 down_write(&umhelper_sem);
421 usermodehelper_disabled = depth;
422 wake_up(&usermodehelper_disabled_waitq);
423 up_write(&umhelper_sem);
424}
425
426/**
427 * __usermodehelper_disable - Prevent new helpers from being started.
428 * @depth: New value to assign to usermodehelper_disabled.
429 *
430 * Set usermodehelper_disabled to @depth and wait for running helpers to exit.
431 */
432int __usermodehelper_disable(enum umh_disable_depth depth)
315{ 433{
316 long retval; 434 long retval;
317 435
436 if (!depth)
437 return -EINVAL;
438
318 down_write(&umhelper_sem); 439 down_write(&umhelper_sem);
319 usermodehelper_disabled = 1; 440 usermodehelper_disabled = depth;
320 up_write(&umhelper_sem); 441 up_write(&umhelper_sem);
321 442
322 /* 443 /*
@@ -331,31 +452,10 @@ int usermodehelper_disable(void)
331 if (retval) 452 if (retval)
332 return 0; 453 return 0;
333 454
334 down_write(&umhelper_sem); 455 __usermodehelper_set_disable_depth(UMH_ENABLED);
335 usermodehelper_disabled = 0;
336 up_write(&umhelper_sem);
337 return -EAGAIN; 456 return -EAGAIN;
338} 457}
339 458
340/**
341 * usermodehelper_enable - allow new helpers to be started again
342 */
343void usermodehelper_enable(void)
344{
345 down_write(&umhelper_sem);
346 usermodehelper_disabled = 0;
347 up_write(&umhelper_sem);
348}
349
350/**
351 * usermodehelper_is_disabled - check if new helpers are allowed to be started
352 */
353bool usermodehelper_is_disabled(void)
354{
355 return usermodehelper_disabled;
356}
357EXPORT_SYMBOL_GPL(usermodehelper_is_disabled);
358
359static void helper_lock(void) 459static void helper_lock(void)
360{ 460{
361 atomic_inc(&running_helpers); 461 atomic_inc(&running_helpers);
@@ -435,8 +535,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns);
435 * asynchronously if wait is not set, and runs as a child of keventd. 535 * asynchronously if wait is not set, and runs as a child of keventd.
436 * (ie. it runs with full root capabilities). 536 * (ie. it runs with full root capabilities).
437 */ 537 */
438int call_usermodehelper_exec(struct subprocess_info *sub_info, 538int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
439 enum umh_wait wait)
440{ 539{
441 DECLARE_COMPLETION_ONSTACK(done); 540 DECLARE_COMPLETION_ONSTACK(done);
442 int retval = 0; 541 int retval = 0;
@@ -456,9 +555,21 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
456 queue_work(khelper_wq, &sub_info->work); 555 queue_work(khelper_wq, &sub_info->work);
457 if (wait == UMH_NO_WAIT) /* task has freed sub_info */ 556 if (wait == UMH_NO_WAIT) /* task has freed sub_info */
458 goto unlock; 557 goto unlock;
558
559 if (wait & UMH_KILLABLE) {
560 retval = wait_for_completion_killable(&done);
561 if (!retval)
562 goto wait_done;
563
564 /* umh_complete() will see NULL and free sub_info */
565 if (xchg(&sub_info->complete, NULL))
566 goto unlock;
567 /* fallthrough, umh_complete() was already called */
568 }
569
459 wait_for_completion(&done); 570 wait_for_completion(&done);
571wait_done:
460 retval = sub_info->retval; 572 retval = sub_info->retval;
461
462out: 573out:
463 call_usermodehelper_freeinfo(sub_info); 574 call_usermodehelper_freeinfo(sub_info);
464unlock: 575unlock:
diff --git a/kernel/module.c b/kernel/module.c
index 2c932760fd33..78ac6ec1e425 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -105,6 +105,7 @@ struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
105 105
106/* Block module loading/unloading? */ 106/* Block module loading/unloading? */
107int modules_disabled = 0; 107int modules_disabled = 0;
108core_param(nomodule, modules_disabled, bint, 0);
108 109
109/* Waiting for a module to finish initializing? */ 110/* Waiting for a module to finish initializing? */
110static DECLARE_WAIT_QUEUE_HEAD(module_wq); 111static DECLARE_WAIT_QUEUE_HEAD(module_wq);
@@ -903,6 +904,36 @@ static ssize_t show_refcnt(struct module_attribute *mattr,
903static struct module_attribute modinfo_refcnt = 904static struct module_attribute modinfo_refcnt =
904 __ATTR(refcnt, 0444, show_refcnt, NULL); 905 __ATTR(refcnt, 0444, show_refcnt, NULL);
905 906
907void __module_get(struct module *module)
908{
909 if (module) {
910 preempt_disable();
911 __this_cpu_inc(module->refptr->incs);
912 trace_module_get(module, _RET_IP_);
913 preempt_enable();
914 }
915}
916EXPORT_SYMBOL(__module_get);
917
918bool try_module_get(struct module *module)
919{
920 bool ret = true;
921
922 if (module) {
923 preempt_disable();
924
925 if (likely(module_is_live(module))) {
926 __this_cpu_inc(module->refptr->incs);
927 trace_module_get(module, _RET_IP_);
928 } else
929 ret = false;
930
931 preempt_enable();
932 }
933 return ret;
934}
935EXPORT_SYMBOL(try_module_get);
936
906void module_put(struct module *module) 937void module_put(struct module *module)
907{ 938{
908 if (module) { 939 if (module) {
@@ -2380,8 +2411,7 @@ static int copy_and_check(struct load_info *info,
2380 return -ENOEXEC; 2411 return -ENOEXEC;
2381 2412
2382 /* Suck in entire file: we'll want most of it. */ 2413 /* Suck in entire file: we'll want most of it. */
2383 /* vmalloc barfs on "unusual" numbers. Check here */ 2414 if ((hdr = vmalloc(len)) == NULL)
2384 if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
2385 return -ENOMEM; 2415 return -ENOMEM;
2386 2416
2387 if (copy_from_user(hdr, umod, len) != 0) { 2417 if (copy_from_user(hdr, umod, len) != 0) {
@@ -2922,7 +2952,8 @@ static struct module *load_module(void __user *umod,
2922 mutex_unlock(&module_mutex); 2952 mutex_unlock(&module_mutex);
2923 2953
2924 /* Module is ready to execute: parsing args may do that. */ 2954 /* Module is ready to execute: parsing args may do that. */
2925 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); 2955 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
2956 -32768, 32767, NULL);
2926 if (err < 0) 2957 if (err < 0)
2927 goto unlink; 2958 goto unlink;
2928 2959
diff --git a/kernel/padata.c b/kernel/padata.c
index b45259931512..89fe3d1b9efb 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -1,6 +1,8 @@
1/* 1/*
2 * padata.c - generic interface to process data streams in parallel 2 * padata.c - generic interface to process data streams in parallel
3 * 3 *
4 * See Documentation/padata.txt for an api documentation.
5 *
4 * Copyright (C) 2008, 2009 secunet Security Networks AG 6 * Copyright (C) 2008, 2009 secunet Security Networks AG
5 * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com> 7 * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
6 * 8 *
@@ -29,7 +31,6 @@
29#include <linux/sysfs.h> 31#include <linux/sysfs.h>
30#include <linux/rcupdate.h> 32#include <linux/rcupdate.h>
31 33
32#define MAX_SEQ_NR (INT_MAX - NR_CPUS)
33#define MAX_OBJ_NUM 1000 34#define MAX_OBJ_NUM 1000
34 35
35static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) 36static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
@@ -43,18 +44,19 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
43 return target_cpu; 44 return target_cpu;
44} 45}
45 46
46static int padata_cpu_hash(struct padata_priv *padata) 47static int padata_cpu_hash(struct parallel_data *pd)
47{ 48{
48 int cpu_index; 49 int cpu_index;
49 struct parallel_data *pd;
50
51 pd = padata->pd;
52 50
53 /* 51 /*
54 * Hash the sequence numbers to the cpus by taking 52 * Hash the sequence numbers to the cpus by taking
55 * seq_nr mod. number of cpus in use. 53 * seq_nr mod. number of cpus in use.
56 */ 54 */
57 cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask.pcpu); 55
56 spin_lock(&pd->seq_lock);
57 cpu_index = pd->seq_nr % cpumask_weight(pd->cpumask.pcpu);
58 pd->seq_nr++;
59 spin_unlock(&pd->seq_lock);
58 60
59 return padata_index_to_cpu(pd, cpu_index); 61 return padata_index_to_cpu(pd, cpu_index);
60} 62}
@@ -132,12 +134,7 @@ int padata_do_parallel(struct padata_instance *pinst,
132 padata->pd = pd; 134 padata->pd = pd;
133 padata->cb_cpu = cb_cpu; 135 padata->cb_cpu = cb_cpu;
134 136
135 if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr)) 137 target_cpu = padata_cpu_hash(pd);
136 atomic_set(&pd->seq_nr, -1);
137
138 padata->seq_nr = atomic_inc_return(&pd->seq_nr);
139
140 target_cpu = padata_cpu_hash(padata);
141 queue = per_cpu_ptr(pd->pqueue, target_cpu); 138 queue = per_cpu_ptr(pd->pqueue, target_cpu);
142 139
143 spin_lock(&queue->parallel.lock); 140 spin_lock(&queue->parallel.lock);
@@ -173,7 +170,7 @@ EXPORT_SYMBOL(padata_do_parallel);
173static struct padata_priv *padata_get_next(struct parallel_data *pd) 170static struct padata_priv *padata_get_next(struct parallel_data *pd)
174{ 171{
175 int cpu, num_cpus; 172 int cpu, num_cpus;
176 int next_nr, next_index; 173 unsigned int next_nr, next_index;
177 struct padata_parallel_queue *queue, *next_queue; 174 struct padata_parallel_queue *queue, *next_queue;
178 struct padata_priv *padata; 175 struct padata_priv *padata;
179 struct padata_list *reorder; 176 struct padata_list *reorder;
@@ -189,14 +186,6 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
189 cpu = padata_index_to_cpu(pd, next_index); 186 cpu = padata_index_to_cpu(pd, next_index);
190 next_queue = per_cpu_ptr(pd->pqueue, cpu); 187 next_queue = per_cpu_ptr(pd->pqueue, cpu);
191 188
192 if (unlikely(next_nr > pd->max_seq_nr)) {
193 next_nr = next_nr - pd->max_seq_nr - 1;
194 next_index = next_nr % num_cpus;
195 cpu = padata_index_to_cpu(pd, next_index);
196 next_queue = per_cpu_ptr(pd->pqueue, cpu);
197 pd->processed = 0;
198 }
199
200 padata = NULL; 189 padata = NULL;
201 190
202 reorder = &next_queue->reorder; 191 reorder = &next_queue->reorder;
@@ -205,8 +194,6 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
205 padata = list_entry(reorder->list.next, 194 padata = list_entry(reorder->list.next,
206 struct padata_priv, list); 195 struct padata_priv, list);
207 196
208 BUG_ON(next_nr != padata->seq_nr);
209
210 spin_lock(&reorder->lock); 197 spin_lock(&reorder->lock);
211 list_del_init(&padata->list); 198 list_del_init(&padata->list);
212 atomic_dec(&pd->reorder_objects); 199 atomic_dec(&pd->reorder_objects);
@@ -230,6 +217,7 @@ out:
230 217
231static void padata_reorder(struct parallel_data *pd) 218static void padata_reorder(struct parallel_data *pd)
232{ 219{
220 int cb_cpu;
233 struct padata_priv *padata; 221 struct padata_priv *padata;
234 struct padata_serial_queue *squeue; 222 struct padata_serial_queue *squeue;
235 struct padata_instance *pinst = pd->pinst; 223 struct padata_instance *pinst = pd->pinst;
@@ -270,13 +258,14 @@ static void padata_reorder(struct parallel_data *pd)
270 return; 258 return;
271 } 259 }
272 260
273 squeue = per_cpu_ptr(pd->squeue, padata->cb_cpu); 261 cb_cpu = padata->cb_cpu;
262 squeue = per_cpu_ptr(pd->squeue, cb_cpu);
274 263
275 spin_lock(&squeue->serial.lock); 264 spin_lock(&squeue->serial.lock);
276 list_add_tail(&padata->list, &squeue->serial.list); 265 list_add_tail(&padata->list, &squeue->serial.list);
277 spin_unlock(&squeue->serial.lock); 266 spin_unlock(&squeue->serial.lock);
278 267
279 queue_work_on(padata->cb_cpu, pinst->wq, &squeue->work); 268 queue_work_on(cb_cpu, pinst->wq, &squeue->work);
280 } 269 }
281 270
282 spin_unlock_bh(&pd->lock); 271 spin_unlock_bh(&pd->lock);
@@ -367,13 +356,13 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
367 if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) 356 if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
368 return -ENOMEM; 357 return -ENOMEM;
369 358
370 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask); 359 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
371 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { 360 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
372 free_cpumask_var(pd->cpumask.cbcpu); 361 free_cpumask_var(pd->cpumask.cbcpu);
373 return -ENOMEM; 362 return -ENOMEM;
374 } 363 }
375 364
376 cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask); 365 cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
377 return 0; 366 return 0;
378} 367}
379 368
@@ -400,7 +389,7 @@ static void padata_init_squeues(struct parallel_data *pd)
400/* Initialize all percpu queues used by parallel workers */ 389/* Initialize all percpu queues used by parallel workers */
401static void padata_init_pqueues(struct parallel_data *pd) 390static void padata_init_pqueues(struct parallel_data *pd)
402{ 391{
403 int cpu_index, num_cpus, cpu; 392 int cpu_index, cpu;
404 struct padata_parallel_queue *pqueue; 393 struct padata_parallel_queue *pqueue;
405 394
406 cpu_index = 0; 395 cpu_index = 0;
@@ -415,9 +404,6 @@ static void padata_init_pqueues(struct parallel_data *pd)
415 INIT_WORK(&pqueue->work, padata_parallel_worker); 404 INIT_WORK(&pqueue->work, padata_parallel_worker);
416 atomic_set(&pqueue->num_obj, 0); 405 atomic_set(&pqueue->num_obj, 0);
417 } 406 }
418
419 num_cpus = cpumask_weight(pd->cpumask.pcpu);
420 pd->max_seq_nr = num_cpus ? (MAX_SEQ_NR / num_cpus) * num_cpus - 1 : 0;
421} 407}
422 408
423/* Allocate and initialize the internal cpumask dependend resources. */ 409/* Allocate and initialize the internal cpumask dependend resources. */
@@ -444,7 +430,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
444 padata_init_pqueues(pd); 430 padata_init_pqueues(pd);
445 padata_init_squeues(pd); 431 padata_init_squeues(pd);
446 setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); 432 setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
447 atomic_set(&pd->seq_nr, -1); 433 pd->seq_nr = 0;
448 atomic_set(&pd->reorder_objects, 0); 434 atomic_set(&pd->reorder_objects, 0);
449 atomic_set(&pd->refcnt, 0); 435 atomic_set(&pd->refcnt, 0);
450 pd->pinst = pinst; 436 pd->pinst = pinst;
@@ -580,7 +566,7 @@ EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
580static bool padata_validate_cpumask(struct padata_instance *pinst, 566static bool padata_validate_cpumask(struct padata_instance *pinst,
581 const struct cpumask *cpumask) 567 const struct cpumask *cpumask)
582{ 568{
583 if (!cpumask_intersects(cpumask, cpu_active_mask)) { 569 if (!cpumask_intersects(cpumask, cpu_online_mask)) {
584 pinst->flags |= PADATA_INVALID; 570 pinst->flags |= PADATA_INVALID;
585 return false; 571 return false;
586 } 572 }
@@ -694,7 +680,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
694{ 680{
695 struct parallel_data *pd; 681 struct parallel_data *pd;
696 682
697 if (cpumask_test_cpu(cpu, cpu_active_mask)) { 683 if (cpumask_test_cpu(cpu, cpu_online_mask)) {
698 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, 684 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
699 pinst->cpumask.cbcpu); 685 pinst->cpumask.cbcpu);
700 if (!pd) 686 if (!pd)
@@ -762,6 +748,9 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
762 return -ENOMEM; 748 return -ENOMEM;
763 749
764 padata_replace(pinst, pd); 750 padata_replace(pinst, pd);
751
752 cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
753 cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
765 } 754 }
766 755
767 return 0; 756 return 0;
diff --git a/kernel/panic.c b/kernel/panic.c
index 80aed44e345a..8ed89a175d79 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -97,7 +97,7 @@ void panic(const char *fmt, ...)
97 /* 97 /*
98 * Avoid nested stack-dumping if a panic occurs during oops processing 98 * Avoid nested stack-dumping if a panic occurs during oops processing
99 */ 99 */
100 if (!oops_in_progress) 100 if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
101 dump_stack(); 101 dump_stack();
102#endif 102#endif
103 103
diff --git a/kernel/params.c b/kernel/params.c
index 4bc965d8a1fe..f37d82631347 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -15,7 +15,6 @@
15 along with this program; if not, write to the Free Software 15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/ 17*/
18#include <linux/module.h>
19#include <linux/kernel.h> 18#include <linux/kernel.h>
20#include <linux/string.h> 19#include <linux/string.h>
21#include <linux/errno.h> 20#include <linux/errno.h>
@@ -88,6 +87,8 @@ static int parse_one(char *param,
88 char *val, 87 char *val,
89 const struct kernel_param *params, 88 const struct kernel_param *params,
90 unsigned num_params, 89 unsigned num_params,
90 s16 min_level,
91 s16 max_level,
91 int (*handle_unknown)(char *param, char *val)) 92 int (*handle_unknown)(char *param, char *val))
92{ 93{
93 unsigned int i; 94 unsigned int i;
@@ -96,6 +97,9 @@ static int parse_one(char *param,
96 /* Find parameter */ 97 /* Find parameter */
97 for (i = 0; i < num_params; i++) { 98 for (i = 0; i < num_params; i++) {
98 if (parameq(param, params[i].name)) { 99 if (parameq(param, params[i].name)) {
100 if (params[i].level < min_level
101 || params[i].level > max_level)
102 return 0;
99 /* No one handled NULL, so do it here. */ 103 /* No one handled NULL, so do it here. */
100 if (!val && params[i].ops->set != param_set_bool 104 if (!val && params[i].ops->set != param_set_bool
101 && params[i].ops->set != param_set_bint) 105 && params[i].ops->set != param_set_bint)
@@ -175,6 +179,8 @@ int parse_args(const char *name,
175 char *args, 179 char *args,
176 const struct kernel_param *params, 180 const struct kernel_param *params,
177 unsigned num, 181 unsigned num,
182 s16 min_level,
183 s16 max_level,
178 int (*unknown)(char *param, char *val)) 184 int (*unknown)(char *param, char *val))
179{ 185{
180 char *param, *val; 186 char *param, *val;
@@ -190,7 +196,8 @@ int parse_args(const char *name,
190 196
191 args = next_arg(args, &param, &val); 197 args = next_arg(args, &param, &val);
192 irq_was_disabled = irqs_disabled(); 198 irq_was_disabled = irqs_disabled();
193 ret = parse_one(param, val, params, num, unknown); 199 ret = parse_one(param, val, params, num,
200 min_level, max_level, unknown);
194 if (irq_was_disabled && !irqs_disabled()) { 201 if (irq_was_disabled && !irqs_disabled()) {
195 printk(KERN_WARNING "parse_args(): option '%s' enabled " 202 printk(KERN_WARNING "parse_args(): option '%s' enabled "
196 "irq's!\n", param); 203 "irq's!\n", param);
@@ -298,35 +305,18 @@ EXPORT_SYMBOL(param_ops_charp);
298/* Actually could be a bool or an int, for historical reasons. */ 305/* Actually could be a bool or an int, for historical reasons. */
299int param_set_bool(const char *val, const struct kernel_param *kp) 306int param_set_bool(const char *val, const struct kernel_param *kp)
300{ 307{
301 bool v;
302 int ret;
303
304 /* No equals means "set"... */ 308 /* No equals means "set"... */
305 if (!val) val = "1"; 309 if (!val) val = "1";
306 310
307 /* One of =[yYnN01] */ 311 /* One of =[yYnN01] */
308 ret = strtobool(val, &v); 312 return strtobool(val, kp->arg);
309 if (ret)
310 return ret;
311
312 if (kp->flags & KPARAM_ISBOOL)
313 *(bool *)kp->arg = v;
314 else
315 *(int *)kp->arg = v;
316 return 0;
317} 313}
318EXPORT_SYMBOL(param_set_bool); 314EXPORT_SYMBOL(param_set_bool);
319 315
320int param_get_bool(char *buffer, const struct kernel_param *kp) 316int param_get_bool(char *buffer, const struct kernel_param *kp)
321{ 317{
322 bool val;
323 if (kp->flags & KPARAM_ISBOOL)
324 val = *(bool *)kp->arg;
325 else
326 val = *(int *)kp->arg;
327
328 /* Y and N chosen as being relatively non-coder friendly */ 318 /* Y and N chosen as being relatively non-coder friendly */
329 return sprintf(buffer, "%c", val ? 'Y' : 'N'); 319 return sprintf(buffer, "%c", *(bool *)kp->arg ? 'Y' : 'N');
330} 320}
331EXPORT_SYMBOL(param_get_bool); 321EXPORT_SYMBOL(param_get_bool);
332 322
@@ -344,7 +334,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp)
344 struct kernel_param dummy; 334 struct kernel_param dummy;
345 335
346 dummy.arg = &boolval; 336 dummy.arg = &boolval;
347 dummy.flags = KPARAM_ISBOOL;
348 ret = param_set_bool(val, &dummy); 337 ret = param_set_bool(val, &dummy);
349 if (ret == 0) 338 if (ret == 0)
350 *(bool *)kp->arg = !boolval; 339 *(bool *)kp->arg = !boolval;
@@ -373,7 +362,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
373 /* Match bool exactly, by re-using it. */ 362 /* Match bool exactly, by re-using it. */
374 boolkp = *kp; 363 boolkp = *kp;
375 boolkp.arg = &v; 364 boolkp.arg = &v;
376 boolkp.flags |= KPARAM_ISBOOL;
377 365
378 ret = param_set_bool(val, &boolkp); 366 ret = param_set_bool(val, &boolkp);
379 if (ret == 0) 367 if (ret == 0)
@@ -394,7 +382,7 @@ static int param_array(const char *name,
394 unsigned int min, unsigned int max, 382 unsigned int min, unsigned int max,
395 void *elem, int elemsize, 383 void *elem, int elemsize,
396 int (*set)(const char *, const struct kernel_param *kp), 384 int (*set)(const char *, const struct kernel_param *kp),
397 u16 flags, 385 s16 level,
398 unsigned int *num) 386 unsigned int *num)
399{ 387{
400 int ret; 388 int ret;
@@ -404,7 +392,7 @@ static int param_array(const char *name,
404 /* Get the name right for errors. */ 392 /* Get the name right for errors. */
405 kp.name = name; 393 kp.name = name;
406 kp.arg = elem; 394 kp.arg = elem;
407 kp.flags = flags; 395 kp.level = level;
408 396
409 *num = 0; 397 *num = 0;
410 /* We expect a comma-separated list of values. */ 398 /* We expect a comma-separated list of values. */
@@ -445,7 +433,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp)
445 unsigned int temp_num; 433 unsigned int temp_num;
446 434
447 return param_array(kp->name, val, 1, arr->max, arr->elem, 435 return param_array(kp->name, val, 1, arr->max, arr->elem,
448 arr->elemsize, arr->ops->set, kp->flags, 436 arr->elemsize, arr->ops->set, kp->level,
449 arr->num ?: &temp_num); 437 arr->num ?: &temp_num);
450} 438}
451 439
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a8968396046d..57bc1fd35b3c 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -15,6 +15,7 @@
15#include <linux/acct.h> 15#include <linux/acct.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/reboot.h>
18 19
19#define BITS_PER_PAGE (PAGE_SIZE*8) 20#define BITS_PER_PAGE (PAGE_SIZE*8)
20 21
@@ -168,13 +169,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
168 while (nr > 0) { 169 while (nr > 0) {
169 rcu_read_lock(); 170 rcu_read_lock();
170 171
171 /*
172 * Any nested-container's init processes won't ignore the
173 * SEND_SIG_NOINFO signal, see send_signal()->si_fromuser().
174 */
175 task = pid_task(find_vpid(nr), PIDTYPE_PID); 172 task = pid_task(find_vpid(nr), PIDTYPE_PID);
176 if (task) 173 if (task && !__fatal_signal_pending(task))
177 send_sig_info(SIGKILL, SEND_SIG_NOINFO, task); 174 send_sig_info(SIGKILL, SEND_SIG_FORCED, task);
178 175
179 rcu_read_unlock(); 176 rcu_read_unlock();
180 177
@@ -187,6 +184,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
187 rc = sys_wait4(-1, NULL, __WALL, NULL); 184 rc = sys_wait4(-1, NULL, __WALL, NULL);
188 } while (rc != -ECHILD); 185 } while (rc != -ECHILD);
189 186
187 if (pid_ns->reboot)
188 current->signal->group_exit_code = pid_ns->reboot;
189
190 acct_exit_ns(pid_ns); 190 acct_exit_ns(pid_ns);
191 return; 191 return;
192} 192}
@@ -221,6 +221,35 @@ static struct ctl_table pid_ns_ctl_table[] = {
221 221
222static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; 222static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
223 223
224int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
225{
226 if (pid_ns == &init_pid_ns)
227 return 0;
228
229 switch (cmd) {
230 case LINUX_REBOOT_CMD_RESTART2:
231 case LINUX_REBOOT_CMD_RESTART:
232 pid_ns->reboot = SIGHUP;
233 break;
234
235 case LINUX_REBOOT_CMD_POWER_OFF:
236 case LINUX_REBOOT_CMD_HALT:
237 pid_ns->reboot = SIGINT;
238 break;
239 default:
240 return -EINVAL;
241 }
242
243 read_lock(&tasklist_lock);
244 force_sig(SIGKILL, pid_ns->child_reaper);
245 read_unlock(&tasklist_lock);
246
247 do_exit(0);
248
249 /* Not reached */
250 return 0;
251}
252
224static __init int pid_namespaces_init(void) 253static __init int pid_namespaces_init(void)
225{ 254{
226 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); 255 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 07e0e28ffba7..66d808ec5252 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,7 +1,8 @@
1 1
2ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG 2ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG
3 3
4obj-$(CONFIG_PM) += main.o qos.o 4obj-y += qos.o
5obj-$(CONFIG_PM) += main.o
5obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o 6obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
6obj-$(CONFIG_FREEZER) += process.o 7obj-$(CONFIG_FREEZER) += process.o
7obj-$(CONFIG_SUSPEND) += suspend.o 8obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 6d6d28870335..e09dfbfeecee 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -16,7 +16,6 @@
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/device.h> 17#include <linux/device.h>
18#include <linux/async.h> 18#include <linux/async.h>
19#include <linux/kmod.h>
20#include <linux/delay.h> 19#include <linux/delay.h>
21#include <linux/fs.h> 20#include <linux/fs.h>
22#include <linux/mount.h> 21#include <linux/mount.h>
@@ -245,8 +244,8 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop,
245 * create_image - Create a hibernation image. 244 * create_image - Create a hibernation image.
246 * @platform_mode: Whether or not to use the platform driver. 245 * @platform_mode: Whether or not to use the platform driver.
247 * 246 *
248 * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image 247 * Execute device drivers' "late" and "noirq" freeze callbacks, create a
249 * and execute the drivers' .thaw_noirq() callbacks. 248 * hibernation image and run the drivers' "noirq" and "early" thaw callbacks.
250 * 249 *
251 * Control reappears in this routine after the subsequent restore. 250 * Control reappears in this routine after the subsequent restore.
252 */ 251 */
@@ -254,7 +253,7 @@ static int create_image(int platform_mode)
254{ 253{
255 int error; 254 int error;
256 255
257 error = dpm_suspend_noirq(PMSG_FREEZE); 256 error = dpm_suspend_end(PMSG_FREEZE);
258 if (error) { 257 if (error) {
259 printk(KERN_ERR "PM: Some devices failed to power down, " 258 printk(KERN_ERR "PM: Some devices failed to power down, "
260 "aborting hibernation\n"); 259 "aborting hibernation\n");
@@ -306,7 +305,7 @@ static int create_image(int platform_mode)
306 Platform_finish: 305 Platform_finish:
307 platform_finish(platform_mode); 306 platform_finish(platform_mode);
308 307
309 dpm_resume_noirq(in_suspend ? 308 dpm_resume_start(in_suspend ?
310 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 309 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
311 310
312 return error; 311 return error;
@@ -343,13 +342,13 @@ int hibernation_snapshot(int platform_mode)
343 * successful freezer test. 342 * successful freezer test.
344 */ 343 */
345 freezer_test_done = true; 344 freezer_test_done = true;
346 goto Cleanup; 345 goto Thaw;
347 } 346 }
348 347
349 error = dpm_prepare(PMSG_FREEZE); 348 error = dpm_prepare(PMSG_FREEZE);
350 if (error) { 349 if (error) {
351 dpm_complete(PMSG_RECOVER); 350 dpm_complete(PMSG_RECOVER);
352 goto Cleanup; 351 goto Thaw;
353 } 352 }
354 353
355 suspend_console(); 354 suspend_console();
@@ -385,6 +384,8 @@ int hibernation_snapshot(int platform_mode)
385 platform_end(platform_mode); 384 platform_end(platform_mode);
386 return error; 385 return error;
387 386
387 Thaw:
388 thaw_kernel_threads();
388 Cleanup: 389 Cleanup:
389 swsusp_free(); 390 swsusp_free();
390 goto Close; 391 goto Close;
@@ -394,16 +395,16 @@ int hibernation_snapshot(int platform_mode)
394 * resume_target_kernel - Restore system state from a hibernation image. 395 * resume_target_kernel - Restore system state from a hibernation image.
395 * @platform_mode: Whether or not to use the platform driver. 396 * @platform_mode: Whether or not to use the platform driver.
396 * 397 *
397 * Execute device drivers' .freeze_noirq() callbacks, restore the contents of 398 * Execute device drivers' "noirq" and "late" freeze callbacks, restore the
398 * highmem that have not been restored yet from the image and run the low-level 399 * contents of highmem that have not been restored yet from the image and run
399 * code that will restore the remaining contents of memory and switch to the 400 * the low-level code that will restore the remaining contents of memory and
400 * just restored target kernel. 401 * switch to the just restored target kernel.
401 */ 402 */
402static int resume_target_kernel(bool platform_mode) 403static int resume_target_kernel(bool platform_mode)
403{ 404{
404 int error; 405 int error;
405 406
406 error = dpm_suspend_noirq(PMSG_QUIESCE); 407 error = dpm_suspend_end(PMSG_QUIESCE);
407 if (error) { 408 if (error) {
408 printk(KERN_ERR "PM: Some devices failed to power down, " 409 printk(KERN_ERR "PM: Some devices failed to power down, "
409 "aborting resume\n"); 410 "aborting resume\n");
@@ -460,7 +461,7 @@ static int resume_target_kernel(bool platform_mode)
460 Cleanup: 461 Cleanup:
461 platform_restore_cleanup(platform_mode); 462 platform_restore_cleanup(platform_mode);
462 463
463 dpm_resume_noirq(PMSG_RECOVER); 464 dpm_resume_start(PMSG_RECOVER);
464 465
465 return error; 466 return error;
466} 467}
@@ -518,7 +519,7 @@ int hibernation_platform_enter(void)
518 goto Resume_devices; 519 goto Resume_devices;
519 } 520 }
520 521
521 error = dpm_suspend_noirq(PMSG_HIBERNATE); 522 error = dpm_suspend_end(PMSG_HIBERNATE);
522 if (error) 523 if (error)
523 goto Resume_devices; 524 goto Resume_devices;
524 525
@@ -549,7 +550,7 @@ int hibernation_platform_enter(void)
549 Platform_finish: 550 Platform_finish:
550 hibernation_ops->finish(); 551 hibernation_ops->finish();
551 552
552 dpm_resume_noirq(PMSG_RESTORE); 553 dpm_resume_start(PMSG_RESTORE);
553 554
554 Resume_devices: 555 Resume_devices:
555 entering_platform_hibernation = false; 556 entering_platform_hibernation = false;
@@ -609,10 +610,6 @@ int hibernate(void)
609 if (error) 610 if (error)
610 goto Exit; 611 goto Exit;
611 612
612 error = usermodehelper_disable();
613 if (error)
614 goto Exit;
615
616 /* Allocate memory management structures */ 613 /* Allocate memory management structures */
617 error = create_basic_memory_bitmaps(); 614 error = create_basic_memory_bitmaps();
618 if (error) 615 if (error)
@@ -624,15 +621,11 @@ int hibernate(void)
624 621
625 error = freeze_processes(); 622 error = freeze_processes();
626 if (error) 623 if (error)
627 goto Finish; 624 goto Free_bitmaps;
628 625
629 error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); 626 error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
630 if (error) 627 if (error || freezer_test_done)
631 goto Thaw;
632 if (freezer_test_done) {
633 freezer_test_done = false;
634 goto Thaw; 628 goto Thaw;
635 }
636 629
637 if (in_suspend) { 630 if (in_suspend) {
638 unsigned int flags = 0; 631 unsigned int flags = 0;
@@ -657,9 +650,12 @@ int hibernate(void)
657 650
658 Thaw: 651 Thaw:
659 thaw_processes(); 652 thaw_processes();
660 Finish: 653
654 /* Don't bother checking whether freezer_test_done is true */
655 freezer_test_done = false;
656
657 Free_bitmaps:
661 free_basic_memory_bitmaps(); 658 free_basic_memory_bitmaps();
662 usermodehelper_enable();
663 Exit: 659 Exit:
664 pm_notifier_call_chain(PM_POST_HIBERNATION); 660 pm_notifier_call_chain(PM_POST_HIBERNATION);
665 pm_restore_console(); 661 pm_restore_console();
@@ -774,15 +770,9 @@ static int software_resume(void)
774 if (error) 770 if (error)
775 goto close_finish; 771 goto close_finish;
776 772
777 error = usermodehelper_disable();
778 if (error)
779 goto close_finish;
780
781 error = create_basic_memory_bitmaps(); 773 error = create_basic_memory_bitmaps();
782 if (error) { 774 if (error)
783 usermodehelper_enable();
784 goto close_finish; 775 goto close_finish;
785 }
786 776
787 pr_debug("PM: Preparing processes for restore.\n"); 777 pr_debug("PM: Preparing processes for restore.\n");
788 error = freeze_processes(); 778 error = freeze_processes();
@@ -803,7 +793,6 @@ static int software_resume(void)
803 thaw_processes(); 793 thaw_processes();
804 Done: 794 Done:
805 free_basic_memory_bitmaps(); 795 free_basic_memory_bitmaps();
806 usermodehelper_enable();
807 Finish: 796 Finish:
808 pm_notifier_call_chain(PM_POST_RESTORE); 797 pm_notifier_call_chain(PM_POST_RESTORE);
809 pm_restore_console(); 798 pm_restore_console();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 9824b41e5a18..1c12581f1c62 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -165,16 +165,20 @@ static int suspend_stats_show(struct seq_file *s, void *unused)
165 last_errno %= REC_FAILED_NUM; 165 last_errno %= REC_FAILED_NUM;
166 last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; 166 last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1;
167 last_step %= REC_FAILED_NUM; 167 last_step %= REC_FAILED_NUM;
168 seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n" 168 seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n"
169 "%s: %d\n%s: %d\n%s: %d\n%s: %d\n", 169 "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n",
170 "success", suspend_stats.success, 170 "success", suspend_stats.success,
171 "fail", suspend_stats.fail, 171 "fail", suspend_stats.fail,
172 "failed_freeze", suspend_stats.failed_freeze, 172 "failed_freeze", suspend_stats.failed_freeze,
173 "failed_prepare", suspend_stats.failed_prepare, 173 "failed_prepare", suspend_stats.failed_prepare,
174 "failed_suspend", suspend_stats.failed_suspend, 174 "failed_suspend", suspend_stats.failed_suspend,
175 "failed_suspend_late",
176 suspend_stats.failed_suspend_late,
175 "failed_suspend_noirq", 177 "failed_suspend_noirq",
176 suspend_stats.failed_suspend_noirq, 178 suspend_stats.failed_suspend_noirq,
177 "failed_resume", suspend_stats.failed_resume, 179 "failed_resume", suspend_stats.failed_resume,
180 "failed_resume_early",
181 suspend_stats.failed_resume_early,
178 "failed_resume_noirq", 182 "failed_resume_noirq",
179 suspend_stats.failed_resume_noirq); 183 suspend_stats.failed_resume_noirq);
180 seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", 184 seq_printf(s, "failures:\n last_failed_dev:\t%-s\n",
@@ -287,16 +291,10 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
287 291
288#ifdef CONFIG_SUSPEND 292#ifdef CONFIG_SUSPEND
289 for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { 293 for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
290 if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) 294 if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) {
295 error = pm_suspend(state);
291 break; 296 break;
292 } 297 }
293 if (state < PM_SUSPEND_MAX && *s) {
294 error = enter_state(state);
295 if (error) {
296 suspend_stats.fail++;
297 dpm_save_failed_errno(error);
298 } else
299 suspend_stats.success++;
300 } 298 }
301#endif 299#endif
302 300
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 21724eee5206..98f3622d7407 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -177,13 +177,11 @@ extern const char *const pm_states[];
177 177
178extern bool valid_state(suspend_state_t state); 178extern bool valid_state(suspend_state_t state);
179extern int suspend_devices_and_enter(suspend_state_t state); 179extern int suspend_devices_and_enter(suspend_state_t state);
180extern int enter_state(suspend_state_t state);
181#else /* !CONFIG_SUSPEND */ 180#else /* !CONFIG_SUSPEND */
182static inline int suspend_devices_and_enter(suspend_state_t state) 181static inline int suspend_devices_and_enter(suspend_state_t state)
183{ 182{
184 return -ENOSYS; 183 return -ENOSYS;
185} 184}
186static inline int enter_state(suspend_state_t state) { return -ENOSYS; }
187static inline bool valid_state(suspend_state_t state) { return false; } 185static inline bool valid_state(suspend_state_t state) { return false; }
188#endif /* !CONFIG_SUSPEND */ 186#endif /* !CONFIG_SUSPEND */
189 187
@@ -234,16 +232,14 @@ static inline int suspend_freeze_processes(void)
234 int error; 232 int error;
235 233
236 error = freeze_processes(); 234 error = freeze_processes();
237
238 /* 235 /*
239 * freeze_processes() automatically thaws every task if freezing 236 * freeze_processes() automatically thaws every task if freezing
240 * fails. So we need not do anything extra upon error. 237 * fails. So we need not do anything extra upon error.
241 */ 238 */
242 if (error) 239 if (error)
243 goto Finish; 240 return error;
244 241
245 error = freeze_kernel_threads(); 242 error = freeze_kernel_threads();
246
247 /* 243 /*
248 * freeze_kernel_threads() thaws only kernel threads upon freezing 244 * freeze_kernel_threads() thaws only kernel threads upon freezing
249 * failure. So we have to thaw the userspace tasks ourselves. 245 * failure. So we have to thaw the userspace tasks ourselves.
@@ -251,7 +247,6 @@ static inline int suspend_freeze_processes(void)
251 if (error) 247 if (error)
252 thaw_processes(); 248 thaw_processes();
253 249
254 Finish:
255 return error; 250 return error;
256} 251}
257 252
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7e426459e60a..19db29f67558 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -16,6 +16,7 @@
16#include <linux/freezer.h> 16#include <linux/freezer.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/workqueue.h> 18#include <linux/workqueue.h>
19#include <linux/kmod.h>
19 20
20/* 21/*
21 * Timeout for stopping processes 22 * Timeout for stopping processes
@@ -53,11 +54,9 @@ static int try_to_freeze_tasks(bool user_only)
53 * It is "frozen enough". If the task does wake 54 * It is "frozen enough". If the task does wake
54 * up, it will immediately call try_to_freeze. 55 * up, it will immediately call try_to_freeze.
55 * 56 *
56 * Because freeze_task() goes through p's 57 * Because freeze_task() goes through p's scheduler lock, it's
57 * scheduler lock after setting TIF_FREEZE, it's 58 * guaranteed that TASK_STOPPED/TRACED -> TASK_RUNNING
58 * guaranteed that either we see TASK_RUNNING or 59 * transition can't race with task state testing here.
59 * try_to_stop() after schedule() in ptrace/signal
60 * stop sees TIF_FREEZE.
61 */ 60 */
62 if (!task_is_stopped_or_traced(p) && 61 if (!task_is_stopped_or_traced(p) &&
63 !freezer_should_skip(p)) 62 !freezer_should_skip(p))
@@ -98,13 +97,15 @@ static int try_to_freeze_tasks(bool user_only)
98 elapsed_csecs / 100, elapsed_csecs % 100, 97 elapsed_csecs / 100, elapsed_csecs % 100,
99 todo - wq_busy, wq_busy); 98 todo - wq_busy, wq_busy);
100 99
101 read_lock(&tasklist_lock); 100 if (!wakeup) {
102 do_each_thread(g, p) { 101 read_lock(&tasklist_lock);
103 if (!wakeup && !freezer_should_skip(p) && 102 do_each_thread(g, p) {
104 p != current && freezing(p) && !frozen(p)) 103 if (p != current && !freezer_should_skip(p)
105 sched_show_task(p); 104 && freezing(p) && !frozen(p))
106 } while_each_thread(g, p); 105 sched_show_task(p);
107 read_unlock(&tasklist_lock); 106 } while_each_thread(g, p);
107 read_unlock(&tasklist_lock);
108 }
108 } else { 109 } else {
109 printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, 110 printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100,
110 elapsed_csecs % 100); 111 elapsed_csecs % 100);
@@ -122,6 +123,10 @@ int freeze_processes(void)
122{ 123{
123 int error; 124 int error;
124 125
126 error = __usermodehelper_disable(UMH_FREEZING);
127 if (error)
128 return error;
129
125 if (!pm_freezing) 130 if (!pm_freezing)
126 atomic_inc(&system_freezing_cnt); 131 atomic_inc(&system_freezing_cnt);
127 132
@@ -130,6 +135,7 @@ int freeze_processes(void)
130 error = try_to_freeze_tasks(true); 135 error = try_to_freeze_tasks(true);
131 if (!error) { 136 if (!error) {
132 printk("done."); 137 printk("done.");
138 __usermodehelper_set_disable_depth(UMH_DISABLED);
133 oom_killer_disable(); 139 oom_killer_disable();
134 } 140 }
135 printk("\n"); 141 printk("\n");
@@ -187,6 +193,8 @@ void thaw_processes(void)
187 } while_each_thread(g, p); 193 } while_each_thread(g, p);
188 read_unlock(&tasklist_lock); 194 read_unlock(&tasklist_lock);
189 195
196 usermodehelper_enable();
197
190 schedule(); 198 schedule();
191 printk("done.\n"); 199 printk("done.\n");
192} 200}
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 995e3bd3417b..6a031e684026 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -230,6 +230,21 @@ int pm_qos_request_active(struct pm_qos_request *req)
230EXPORT_SYMBOL_GPL(pm_qos_request_active); 230EXPORT_SYMBOL_GPL(pm_qos_request_active);
231 231
232/** 232/**
233 * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout
234 * @work: work struct for the delayed work (timeout)
235 *
236 * This cancels the timeout request by falling back to the default at timeout.
237 */
238static void pm_qos_work_fn(struct work_struct *work)
239{
240 struct pm_qos_request *req = container_of(to_delayed_work(work),
241 struct pm_qos_request,
242 work);
243
244 pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE);
245}
246
247/**
233 * pm_qos_add_request - inserts new qos request into the list 248 * pm_qos_add_request - inserts new qos request into the list
234 * @req: pointer to a preallocated handle 249 * @req: pointer to a preallocated handle
235 * @pm_qos_class: identifies which list of qos request to use 250 * @pm_qos_class: identifies which list of qos request to use
@@ -253,6 +268,7 @@ void pm_qos_add_request(struct pm_qos_request *req,
253 return; 268 return;
254 } 269 }
255 req->pm_qos_class = pm_qos_class; 270 req->pm_qos_class = pm_qos_class;
271 INIT_DELAYED_WORK(&req->work, pm_qos_work_fn);
256 pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, 272 pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints,
257 &req->node, PM_QOS_ADD_REQ, value); 273 &req->node, PM_QOS_ADD_REQ, value);
258} 274}
@@ -279,6 +295,9 @@ void pm_qos_update_request(struct pm_qos_request *req,
279 return; 295 return;
280 } 296 }
281 297
298 if (delayed_work_pending(&req->work))
299 cancel_delayed_work_sync(&req->work);
300
282 if (new_value != req->node.prio) 301 if (new_value != req->node.prio)
283 pm_qos_update_target( 302 pm_qos_update_target(
284 pm_qos_array[req->pm_qos_class]->constraints, 303 pm_qos_array[req->pm_qos_class]->constraints,
@@ -287,6 +306,34 @@ void pm_qos_update_request(struct pm_qos_request *req,
287EXPORT_SYMBOL_GPL(pm_qos_update_request); 306EXPORT_SYMBOL_GPL(pm_qos_update_request);
288 307
289/** 308/**
309 * pm_qos_update_request_timeout - modifies an existing qos request temporarily.
310 * @req : handle to list element holding a pm_qos request to use
311 * @new_value: defines the temporal qos request
312 * @timeout_us: the effective duration of this qos request in usecs.
313 *
314 * After timeout_us, this qos request is cancelled automatically.
315 */
316void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value,
317 unsigned long timeout_us)
318{
319 if (!req)
320 return;
321 if (WARN(!pm_qos_request_active(req),
322 "%s called for unknown object.", __func__))
323 return;
324
325 if (delayed_work_pending(&req->work))
326 cancel_delayed_work_sync(&req->work);
327
328 if (new_value != req->node.prio)
329 pm_qos_update_target(
330 pm_qos_array[req->pm_qos_class]->constraints,
331 &req->node, PM_QOS_UPDATE_REQ, new_value);
332
333 schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us));
334}
335
336/**
290 * pm_qos_remove_request - modifies an existing qos request 337 * pm_qos_remove_request - modifies an existing qos request
291 * @req: handle to request list element 338 * @req: handle to request list element
292 * 339 *
@@ -305,6 +352,9 @@ void pm_qos_remove_request(struct pm_qos_request *req)
305 return; 352 return;
306 } 353 }
307 354
355 if (delayed_work_pending(&req->work))
356 cancel_delayed_work_sync(&req->work);
357
308 pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, 358 pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints,
309 &req->node, PM_QOS_REMOVE_REQ, 359 &req->node, PM_QOS_REMOVE_REQ,
310 PM_QOS_DEFAULT_VALUE); 360 PM_QOS_DEFAULT_VALUE);
@@ -469,21 +519,18 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
469static int __init pm_qos_power_init(void) 519static int __init pm_qos_power_init(void)
470{ 520{
471 int ret = 0; 521 int ret = 0;
522 int i;
472 523
473 ret = register_pm_qos_misc(&cpu_dma_pm_qos); 524 BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES);
474 if (ret < 0) { 525
475 printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n"); 526 for (i = 1; i < PM_QOS_NUM_CLASSES; i++) {
476 return ret; 527 ret = register_pm_qos_misc(pm_qos_array[i]);
477 } 528 if (ret < 0) {
478 ret = register_pm_qos_misc(&network_lat_pm_qos); 529 printk(KERN_ERR "pm_qos_param: %s setup failed\n",
479 if (ret < 0) { 530 pm_qos_array[i]->name);
480 printk(KERN_ERR "pm_qos_param: network_latency setup failed\n"); 531 return ret;
481 return ret; 532 }
482 } 533 }
483 ret = register_pm_qos_misc(&network_throughput_pm_qos);
484 if (ret < 0)
485 printk(KERN_ERR
486 "pm_qos_param: network_throughput setup failed\n");
487 534
488 return ret; 535 return ret;
489} 536}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 6a768e537001..0de28576807d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -711,9 +711,10 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
711 list_for_each_entry(region, &nosave_regions, list) { 711 list_for_each_entry(region, &nosave_regions, list) {
712 unsigned long pfn; 712 unsigned long pfn;
713 713
714 pr_debug("PM: Marking nosave pages: %016lx - %016lx\n", 714 pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n",
715 region->start_pfn << PAGE_SHIFT, 715 (unsigned long long) region->start_pfn << PAGE_SHIFT,
716 region->end_pfn << PAGE_SHIFT); 716 ((unsigned long long) region->end_pfn << PAGE_SHIFT)
717 - 1);
717 718
718 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 719 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
719 if (pfn_valid(pfn)) { 720 if (pfn_valid(pfn)) {
@@ -1000,20 +1001,20 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1000 s_page = pfn_to_page(src_pfn); 1001 s_page = pfn_to_page(src_pfn);
1001 d_page = pfn_to_page(dst_pfn); 1002 d_page = pfn_to_page(dst_pfn);
1002 if (PageHighMem(s_page)) { 1003 if (PageHighMem(s_page)) {
1003 src = kmap_atomic(s_page, KM_USER0); 1004 src = kmap_atomic(s_page);
1004 dst = kmap_atomic(d_page, KM_USER1); 1005 dst = kmap_atomic(d_page);
1005 do_copy_page(dst, src); 1006 do_copy_page(dst, src);
1006 kunmap_atomic(dst, KM_USER1); 1007 kunmap_atomic(dst);
1007 kunmap_atomic(src, KM_USER0); 1008 kunmap_atomic(src);
1008 } else { 1009 } else {
1009 if (PageHighMem(d_page)) { 1010 if (PageHighMem(d_page)) {
1010 /* Page pointed to by src may contain some kernel 1011 /* Page pointed to by src may contain some kernel
1011 * data modified by kmap_atomic() 1012 * data modified by kmap_atomic()
1012 */ 1013 */
1013 safe_copy_page(buffer, s_page); 1014 safe_copy_page(buffer, s_page);
1014 dst = kmap_atomic(d_page, KM_USER0); 1015 dst = kmap_atomic(d_page);
1015 copy_page(dst, buffer); 1016 copy_page(dst, buffer);
1016 kunmap_atomic(dst, KM_USER0); 1017 kunmap_atomic(dst);
1017 } else { 1018 } else {
1018 safe_copy_page(page_address(d_page), s_page); 1019 safe_copy_page(page_address(d_page), s_page);
1019 } 1020 }
@@ -1728,9 +1729,9 @@ int snapshot_read_next(struct snapshot_handle *handle)
1728 */ 1729 */
1729 void *kaddr; 1730 void *kaddr;
1730 1731
1731 kaddr = kmap_atomic(page, KM_USER0); 1732 kaddr = kmap_atomic(page);
1732 copy_page(buffer, kaddr); 1733 copy_page(buffer, kaddr);
1733 kunmap_atomic(kaddr, KM_USER0); 1734 kunmap_atomic(kaddr);
1734 handle->buffer = buffer; 1735 handle->buffer = buffer;
1735 } else { 1736 } else {
1736 handle->buffer = page_address(page); 1737 handle->buffer = page_address(page);
@@ -2014,9 +2015,9 @@ static void copy_last_highmem_page(void)
2014 if (last_highmem_page) { 2015 if (last_highmem_page) {
2015 void *dst; 2016 void *dst;
2016 2017
2017 dst = kmap_atomic(last_highmem_page, KM_USER0); 2018 dst = kmap_atomic(last_highmem_page);
2018 copy_page(dst, buffer); 2019 copy_page(dst, buffer);
2019 kunmap_atomic(dst, KM_USER0); 2020 kunmap_atomic(dst);
2020 last_highmem_page = NULL; 2021 last_highmem_page = NULL;
2021 } 2022 }
2022} 2023}
@@ -2309,13 +2310,13 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
2309{ 2310{
2310 void *kaddr1, *kaddr2; 2311 void *kaddr1, *kaddr2;
2311 2312
2312 kaddr1 = kmap_atomic(p1, KM_USER0); 2313 kaddr1 = kmap_atomic(p1);
2313 kaddr2 = kmap_atomic(p2, KM_USER1); 2314 kaddr2 = kmap_atomic(p2);
2314 copy_page(buf, kaddr1); 2315 copy_page(buf, kaddr1);
2315 copy_page(kaddr1, kaddr2); 2316 copy_page(kaddr1, kaddr2);
2316 copy_page(kaddr2, buf); 2317 copy_page(kaddr2, buf);
2317 kunmap_atomic(kaddr2, KM_USER1); 2318 kunmap_atomic(kaddr2);
2318 kunmap_atomic(kaddr1, KM_USER0); 2319 kunmap_atomic(kaddr1);
2319} 2320}
2320 2321
2321/** 2322/**
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4fd51beed879..396d262b8fd0 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -12,7 +12,6 @@
12#include <linux/delay.h> 12#include <linux/delay.h>
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/kmod.h>
16#include <linux/console.h> 15#include <linux/console.h>
17#include <linux/cpu.h> 16#include <linux/cpu.h>
18#include <linux/syscalls.h> 17#include <linux/syscalls.h>
@@ -37,8 +36,8 @@ const char *const pm_states[PM_SUSPEND_MAX] = {
37static const struct platform_suspend_ops *suspend_ops; 36static const struct platform_suspend_ops *suspend_ops;
38 37
39/** 38/**
40 * suspend_set_ops - Set the global suspend method table. 39 * suspend_set_ops - Set the global suspend method table.
41 * @ops: Pointer to ops structure. 40 * @ops: Suspend operations to use.
42 */ 41 */
43void suspend_set_ops(const struct platform_suspend_ops *ops) 42void suspend_set_ops(const struct platform_suspend_ops *ops)
44{ 43{
@@ -58,11 +57,11 @@ bool valid_state(suspend_state_t state)
58} 57}
59 58
60/** 59/**
61 * suspend_valid_only_mem - generic memory-only valid callback 60 * suspend_valid_only_mem - Generic memory-only valid callback.
62 * 61 *
63 * Platform drivers that implement mem suspend only and only need 62 * Platform drivers that implement mem suspend only and only need to check for
64 * to check for that in their .valid callback can use this instead 63 * that in their .valid() callback can use this instead of rolling their own
65 * of rolling their own .valid callback. 64 * .valid() callback.
66 */ 65 */
67int suspend_valid_only_mem(suspend_state_t state) 66int suspend_valid_only_mem(suspend_state_t state)
68{ 67{
@@ -83,10 +82,11 @@ static int suspend_test(int level)
83} 82}
84 83
85/** 84/**
86 * suspend_prepare - Do prep work before entering low-power state. 85 * suspend_prepare - Prepare for entering system sleep state.
87 * 86 *
88 * This is common code that is called for each state that we're entering. 87 * Common code run for every system sleep state that can be entered (except for
89 * Run suspend notifiers, allocate a console and stop all processes. 88 * hibernation). Run suspend notifiers, allocate the "suspend" console and
89 * freeze processes.
90 */ 90 */
91static int suspend_prepare(void) 91static int suspend_prepare(void)
92{ 92{
@@ -101,17 +101,12 @@ static int suspend_prepare(void)
101 if (error) 101 if (error)
102 goto Finish; 102 goto Finish;
103 103
104 error = usermodehelper_disable();
105 if (error)
106 goto Finish;
107
108 error = suspend_freeze_processes(); 104 error = suspend_freeze_processes();
109 if (!error) 105 if (!error)
110 return 0; 106 return 0;
111 107
112 suspend_stats.failed_freeze++; 108 suspend_stats.failed_freeze++;
113 dpm_save_failed_step(SUSPEND_FREEZE); 109 dpm_save_failed_step(SUSPEND_FREEZE);
114 usermodehelper_enable();
115 Finish: 110 Finish:
116 pm_notifier_call_chain(PM_POST_SUSPEND); 111 pm_notifier_call_chain(PM_POST_SUSPEND);
117 pm_restore_console(); 112 pm_restore_console();
@@ -131,9 +126,9 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
131} 126}
132 127
133/** 128/**
134 * suspend_enter - enter the desired system sleep state. 129 * suspend_enter - Make the system enter the given sleep state.
135 * @state: State to enter 130 * @state: System sleep state to enter.
136 * @wakeup: Returns information that suspend should not be entered again. 131 * @wakeup: Returns information that the sleep state should not be re-entered.
137 * 132 *
138 * This function should be called after devices have been suspended. 133 * This function should be called after devices have been suspended.
139 */ 134 */
@@ -147,7 +142,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
147 goto Platform_finish; 142 goto Platform_finish;
148 } 143 }
149 144
150 error = dpm_suspend_noirq(PMSG_SUSPEND); 145 error = dpm_suspend_end(PMSG_SUSPEND);
151 if (error) { 146 if (error) {
152 printk(KERN_ERR "PM: Some devices failed to power down\n"); 147 printk(KERN_ERR "PM: Some devices failed to power down\n");
153 goto Platform_finish; 148 goto Platform_finish;
@@ -189,7 +184,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
189 if (suspend_ops->wake) 184 if (suspend_ops->wake)
190 suspend_ops->wake(); 185 suspend_ops->wake();
191 186
192 dpm_resume_noirq(PMSG_RESUME); 187 dpm_resume_start(PMSG_RESUME);
193 188
194 Platform_finish: 189 Platform_finish:
195 if (suspend_ops->finish) 190 if (suspend_ops->finish)
@@ -199,9 +194,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
199} 194}
200 195
201/** 196/**
202 * suspend_devices_and_enter - suspend devices and enter the desired system 197 * suspend_devices_and_enter - Suspend devices and enter system sleep state.
203 * sleep state. 198 * @state: System sleep state to enter.
204 * @state: state to enter
205 */ 199 */
206int suspend_devices_and_enter(suspend_state_t state) 200int suspend_devices_and_enter(suspend_state_t state)
207{ 201{
@@ -251,30 +245,27 @@ int suspend_devices_and_enter(suspend_state_t state)
251} 245}
252 246
253/** 247/**
254 * suspend_finish - Do final work before exiting suspend sequence. 248 * suspend_finish - Clean up before finishing the suspend sequence.
255 * 249 *
256 * Call platform code to clean up, restart processes, and free the 250 * Call platform code to clean up, restart processes, and free the console that
257 * console that we've allocated. This is not called for suspend-to-disk. 251 * we've allocated. This routine is not called for hibernation.
258 */ 252 */
259static void suspend_finish(void) 253static void suspend_finish(void)
260{ 254{
261 suspend_thaw_processes(); 255 suspend_thaw_processes();
262 usermodehelper_enable();
263 pm_notifier_call_chain(PM_POST_SUSPEND); 256 pm_notifier_call_chain(PM_POST_SUSPEND);
264 pm_restore_console(); 257 pm_restore_console();
265} 258}
266 259
267/** 260/**
268 * enter_state - Do common work of entering low-power state. 261 * enter_state - Do common work needed to enter system sleep state.
269 * @state: pm_state structure for state we're entering. 262 * @state: System sleep state to enter.
270 * 263 *
271 * Make sure we're the only ones trying to enter a sleep state. Fail 264 * Make sure that no one else is trying to put the system into a sleep state.
272 * if someone has beat us to it, since we don't want anything weird to 265 * Fail if that's not the case. Otherwise, prepare for system suspend, make the
273 * happen when we wake up. 266 * system enter the given sleep state and clean up after wakeup.
274 * Then, do the setup for suspend, enter the state, and cleaup (after
275 * we've woken up).
276 */ 267 */
277int enter_state(suspend_state_t state) 268static int enter_state(suspend_state_t state)
278{ 269{
279 int error; 270 int error;
280 271
@@ -310,24 +301,26 @@ int enter_state(suspend_state_t state)
310} 301}
311 302
312/** 303/**
313 * pm_suspend - Externally visible function for suspending system. 304 * pm_suspend - Externally visible function for suspending the system.
314 * @state: Enumerated value of state to enter. 305 * @state: System sleep state to enter.
315 * 306 *
316 * Determine whether or not value is within range, get state 307 * Check if the value of @state represents one of the supported states,
317 * structure, and enter (above). 308 * execute enter_state() and update system suspend statistics.
318 */ 309 */
319int pm_suspend(suspend_state_t state) 310int pm_suspend(suspend_state_t state)
320{ 311{
321 int ret; 312 int error;
322 if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) { 313
323 ret = enter_state(state); 314 if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
324 if (ret) { 315 return -EINVAL;
325 suspend_stats.fail++; 316
326 dpm_save_failed_errno(ret); 317 error = enter_state(state);
327 } else 318 if (error) {
328 suspend_stats.success++; 319 suspend_stats.fail++;
329 return ret; 320 dpm_save_failed_errno(error);
321 } else {
322 suspend_stats.success++;
330 } 323 }
331 return -EINVAL; 324 return error;
332} 325}
333EXPORT_SYMBOL(pm_suspend); 326EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 3e100075b13c..91b0fd021a95 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -12,7 +12,6 @@
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <linux/syscalls.h> 13#include <linux/syscalls.h>
14#include <linux/reboot.h> 14#include <linux/reboot.h>
15#include <linux/kmod.h>
16#include <linux/string.h> 15#include <linux/string.h>
17#include <linux/device.h> 16#include <linux/device.h>
18#include <linux/miscdevice.h> 17#include <linux/miscdevice.h>
@@ -222,14 +221,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
222 sys_sync(); 221 sys_sync();
223 printk("done.\n"); 222 printk("done.\n");
224 223
225 error = usermodehelper_disable();
226 if (error)
227 break;
228
229 error = freeze_processes(); 224 error = freeze_processes();
230 if (error) 225 if (!error)
231 usermodehelper_enable();
232 else
233 data->frozen = 1; 226 data->frozen = 1;
234 break; 227 break;
235 228
@@ -238,7 +231,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
238 break; 231 break;
239 pm_restore_gfp_mask(); 232 pm_restore_gfp_mask();
240 thaw_processes(); 233 thaw_processes();
241 usermodehelper_enable();
242 data->frozen = 0; 234 data->frozen = 0;
243 break; 235 break;
244 236
@@ -249,16 +241,10 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
249 } 241 }
250 pm_restore_gfp_mask(); 242 pm_restore_gfp_mask();
251 error = hibernation_snapshot(data->platform_support); 243 error = hibernation_snapshot(data->platform_support);
252 if (error) { 244 if (!error) {
253 thaw_kernel_threads();
254 } else {
255 error = put_user(in_suspend, (int __user *)arg); 245 error = put_user(in_suspend, (int __user *)arg);
256 if (!error && !freezer_test_done) 246 data->ready = !freezer_test_done && !error;
257 data->ready = 1; 247 freezer_test_done = false;
258 if (freezer_test_done) {
259 freezer_test_done = false;
260 thaw_kernel_threads();
261 }
262 } 248 }
263 break; 249 break;
264 250
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 00ab2ca5ed11..ee8d49b9c309 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -231,26 +231,22 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
231} 231}
232 232
233static int ptrace_attach(struct task_struct *task, long request, 233static int ptrace_attach(struct task_struct *task, long request,
234 unsigned long addr,
234 unsigned long flags) 235 unsigned long flags)
235{ 236{
236 bool seize = (request == PTRACE_SEIZE); 237 bool seize = (request == PTRACE_SEIZE);
237 int retval; 238 int retval;
238 239
239 /*
240 * SEIZE will enable new ptrace behaviors which will be implemented
241 * gradually. SEIZE_DEVEL is used to prevent applications
242 * expecting full SEIZE behaviors trapping on kernel commits which
243 * are still in the process of implementing them.
244 *
245 * Only test programs for new ptrace behaviors being implemented
246 * should set SEIZE_DEVEL. If unset, SEIZE will fail with -EIO.
247 *
248 * Once SEIZE behaviors are completely implemented, this flag and
249 * the following test will be removed.
250 */
251 retval = -EIO; 240 retval = -EIO;
252 if (seize && !(flags & PTRACE_SEIZE_DEVEL)) 241 if (seize) {
253 goto out; 242 if (addr != 0)
243 goto out;
244 if (flags & ~(unsigned long)PTRACE_O_MASK)
245 goto out;
246 flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT);
247 } else {
248 flags = PT_PTRACED;
249 }
254 250
255 audit_ptrace(task); 251 audit_ptrace(task);
256 252
@@ -262,7 +258,7 @@ static int ptrace_attach(struct task_struct *task, long request,
262 258
263 /* 259 /*
264 * Protect exec's credential calculations against our interference; 260 * Protect exec's credential calculations against our interference;
265 * interference; SUID, SGID and LSM creds get determined differently 261 * SUID, SGID and LSM creds get determined differently
266 * under ptrace. 262 * under ptrace.
267 */ 263 */
268 retval = -ERESTARTNOINTR; 264 retval = -ERESTARTNOINTR;
@@ -282,11 +278,11 @@ static int ptrace_attach(struct task_struct *task, long request,
282 if (task->ptrace) 278 if (task->ptrace)
283 goto unlock_tasklist; 279 goto unlock_tasklist;
284 280
285 task->ptrace = PT_PTRACED;
286 if (seize) 281 if (seize)
287 task->ptrace |= PT_SEIZED; 282 flags |= PT_SEIZED;
288 if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) 283 if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
289 task->ptrace |= PT_PTRACE_CAP; 284 flags |= PT_PTRACE_CAP;
285 task->ptrace = flags;
290 286
291 __ptrace_link(task, current); 287 __ptrace_link(task, current);
292 288
@@ -528,30 +524,18 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
528 524
529static int ptrace_setoptions(struct task_struct *child, unsigned long data) 525static int ptrace_setoptions(struct task_struct *child, unsigned long data)
530{ 526{
531 child->ptrace &= ~PT_TRACE_MASK; 527 unsigned flags;
532 528
533 if (data & PTRACE_O_TRACESYSGOOD) 529 if (data & ~(unsigned long)PTRACE_O_MASK)
534 child->ptrace |= PT_TRACESYSGOOD; 530 return -EINVAL;
535
536 if (data & PTRACE_O_TRACEFORK)
537 child->ptrace |= PT_TRACE_FORK;
538
539 if (data & PTRACE_O_TRACEVFORK)
540 child->ptrace |= PT_TRACE_VFORK;
541
542 if (data & PTRACE_O_TRACECLONE)
543 child->ptrace |= PT_TRACE_CLONE;
544
545 if (data & PTRACE_O_TRACEEXEC)
546 child->ptrace |= PT_TRACE_EXEC;
547
548 if (data & PTRACE_O_TRACEVFORKDONE)
549 child->ptrace |= PT_TRACE_VFORK_DONE;
550 531
551 if (data & PTRACE_O_TRACEEXIT) 532 /* Avoid intermediate state when all opts are cleared */
552 child->ptrace |= PT_TRACE_EXIT; 533 flags = child->ptrace;
534 flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT);
535 flags |= (data << PT_OPT_FLAG_SHIFT);
536 child->ptrace = flags;
553 537
554 return (data & ~PTRACE_O_MASK) ? -EINVAL : 0; 538 return 0;
555} 539}
556 540
557static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) 541static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info)
@@ -891,7 +875,7 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
891 } 875 }
892 876
893 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { 877 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
894 ret = ptrace_attach(child, request, data); 878 ret = ptrace_attach(child, request, addr, data);
895 /* 879 /*
896 * Some architectures need to do book-keeping after 880 * Some architectures need to do book-keeping after
897 * a ptrace attach. 881 * a ptrace attach.
@@ -1034,7 +1018,7 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
1034 } 1018 }
1035 1019
1036 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { 1020 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
1037 ret = ptrace_attach(child, request, data); 1021 ret = ptrace_attach(child, request, addr, data);
1038 /* 1022 /*
1039 * Some architectures need to do book-keeping after 1023 * Some architectures need to do book-keeping after
1040 * a ptrace attach. 1024 * a ptrace attach.
diff --git a/kernel/resource.c b/kernel/resource.c
index 7640b3a947d0..7e8ea66a8c01 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -749,6 +749,7 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t
749 write_unlock(&resource_lock); 749 write_unlock(&resource_lock);
750 return result; 750 return result;
751} 751}
752EXPORT_SYMBOL(adjust_resource);
752 753
753static void __init __reserve_region_with_split(struct resource *root, 754static void __init __reserve_region_with_split(struct resource *root,
754 resource_size_t start, resource_size_t end, 755 resource_size_t start, resource_size_t end,
@@ -792,8 +793,6 @@ void __init reserve_region_with_split(struct resource *root,
792 write_unlock(&resource_lock); 793 write_unlock(&resource_lock);
793} 794}
794 795
795EXPORT_SYMBOL(adjust_resource);
796
797/** 796/**
798 * resource_alignment - calculate resource's alignment 797 * resource_alignment - calculate resource's alignment
799 * @res: resource pointer 798 * @res: resource pointer
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index b152f74f02de..6850f53e02d8 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -10,7 +10,6 @@
10#include <linux/export.h> 10#include <linux/export.h>
11#include <linux/rwsem.h> 11#include <linux/rwsem.h>
12 12
13#include <asm/system.h>
14#include <linux/atomic.h> 13#include <linux/atomic.h>
15 14
16/* 15/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a35cb8dbd8c4..4603b9d8f30a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -71,7 +71,9 @@
71#include <linux/ftrace.h> 71#include <linux/ftrace.h>
72#include <linux/slab.h> 72#include <linux/slab.h>
73#include <linux/init_task.h> 73#include <linux/init_task.h>
74#include <linux/binfmts.h>
74 75
76#include <asm/switch_to.h>
75#include <asm/tlb.h> 77#include <asm/tlb.h>
76#include <asm/irq_regs.h> 78#include <asm/irq_regs.h>
77#include <asm/mutex.h> 79#include <asm/mutex.h>
@@ -1263,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process);
1263 */ 1265 */
1264static int select_fallback_rq(int cpu, struct task_struct *p) 1266static int select_fallback_rq(int cpu, struct task_struct *p)
1265{ 1267{
1266 int dest_cpu;
1267 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); 1268 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
1269 enum { cpuset, possible, fail } state = cpuset;
1270 int dest_cpu;
1268 1271
1269 /* Look for allowed, online CPU in same node. */ 1272 /* Look for allowed, online CPU in same node. */
1270 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) 1273 for_each_cpu(dest_cpu, nodemask) {
1274 if (!cpu_online(dest_cpu))
1275 continue;
1276 if (!cpu_active(dest_cpu))
1277 continue;
1271 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) 1278 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
1272 return dest_cpu; 1279 return dest_cpu;
1280 }
1281
1282 for (;;) {
1283 /* Any allowed, online CPU? */
1284 for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
1285 if (!cpu_online(dest_cpu))
1286 continue;
1287 if (!cpu_active(dest_cpu))
1288 continue;
1289 goto out;
1290 }
1273 1291
1274 /* Any allowed, online CPU? */ 1292 switch (state) {
1275 dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); 1293 case cpuset:
1276 if (dest_cpu < nr_cpu_ids) 1294 /* No more Mr. Nice Guy. */
1277 return dest_cpu; 1295 cpuset_cpus_allowed_fallback(p);
1296 state = possible;
1297 break;
1278 1298
1279 /* No more Mr. Nice Guy. */ 1299 case possible:
1280 dest_cpu = cpuset_cpus_allowed_fallback(p); 1300 do_set_cpus_allowed(p, cpu_possible_mask);
1281 /* 1301 state = fail;
1282 * Don't tell them about moving exiting tasks or 1302 break;
1283 * kernel threads (both mm NULL), since they never 1303
1284 * leave kernel. 1304 case fail:
1285 */ 1305 BUG();
1286 if (p->mm && printk_ratelimit()) { 1306 break;
1287 printk_sched("process %d (%s) no longer affine to cpu%d\n", 1307 }
1288 task_pid_nr(p), p->comm, cpu); 1308 }
1309
1310out:
1311 if (state != cpuset) {
1312 /*
1313 * Don't tell them about moving exiting tasks or
1314 * kernel threads (both mm NULL), since they never
1315 * leave kernel.
1316 */
1317 if (p->mm && printk_ratelimit()) {
1318 printk_sched("process %d (%s) no longer affine to cpu%d\n",
1319 task_pid_nr(p), p->comm, cpu);
1320 }
1289 } 1321 }
1290 1322
1291 return dest_cpu; 1323 return dest_cpu;
@@ -1932,6 +1964,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1932 local_irq_enable(); 1964 local_irq_enable();
1933#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 1965#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
1934 finish_lock_switch(rq, prev); 1966 finish_lock_switch(rq, prev);
1967 finish_arch_post_lock_switch();
1935 1968
1936 fire_sched_in_preempt_notifiers(current); 1969 fire_sched_in_preempt_notifiers(current);
1937 if (mm) 1970 if (mm)
@@ -3069,8 +3102,6 @@ EXPORT_SYMBOL(sub_preempt_count);
3069 */ 3102 */
3070static noinline void __schedule_bug(struct task_struct *prev) 3103static noinline void __schedule_bug(struct task_struct *prev)
3071{ 3104{
3072 struct pt_regs *regs = get_irq_regs();
3073
3074 if (oops_in_progress) 3105 if (oops_in_progress)
3075 return; 3106 return;
3076 3107
@@ -3081,11 +3112,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
3081 print_modules(); 3112 print_modules();
3082 if (irqs_disabled()) 3113 if (irqs_disabled())
3083 print_irqtrace_events(prev); 3114 print_irqtrace_events(prev);
3084 3115 dump_stack();
3085 if (regs)
3086 show_regs(regs);
3087 else
3088 dump_stack();
3089} 3116}
3090 3117
3091/* 3118/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 94340c7544a9..0d97ebdc58f0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
416 416
417#endif /* CONFIG_FAIR_GROUP_SCHED */ 417#endif /* CONFIG_FAIR_GROUP_SCHED */
418 418
419static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, 419static __always_inline
420 unsigned long delta_exec); 420void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec);
421 421
422/************************************************************** 422/**************************************************************
423 * Scheduling class tree data structure manipulation methods: 423 * Scheduling class tree data structure manipulation methods:
@@ -1162,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
1162 __clear_buddies_skip(se); 1162 __clear_buddies_skip(se);
1163} 1163}
1164 1164
1165static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); 1165static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
1166 1166
1167static void 1167static void
1168dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) 1168dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -1546,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
1546 resched_task(rq_of(cfs_rq)->curr); 1546 resched_task(rq_of(cfs_rq)->curr);
1547} 1547}
1548 1548
1549static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, 1549static __always_inline
1550 unsigned long delta_exec) 1550void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec)
1551{ 1551{
1552 if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) 1552 if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
1553 return; 1553 return;
@@ -2073,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq)
2073} 2073}
2074 2074
2075#else /* CONFIG_CFS_BANDWIDTH */ 2075#else /* CONFIG_CFS_BANDWIDTH */
2076static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, 2076static __always_inline
2077 unsigned long delta_exec) {} 2077void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {}
2078static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} 2078static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
2079static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} 2079static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
2080static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} 2080static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
2081 2081
2082static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) 2082static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
2083{ 2083{
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b60dad720173..44af55e6d5d0 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1428,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
1428next_idx: 1428next_idx:
1429 if (idx >= MAX_RT_PRIO) 1429 if (idx >= MAX_RT_PRIO)
1430 continue; 1430 continue;
1431 if (next && next->prio < idx) 1431 if (next && next->prio <= idx)
1432 continue; 1432 continue;
1433 list_for_each_entry(rt_se, array->queue + idx, run_list) { 1433 list_for_each_entry(rt_se, array->queue + idx, run_list) {
1434 struct task_struct *p; 1434 struct task_struct *p;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 42b1f304b044..fb3acba4d52e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -681,6 +681,9 @@ static inline int task_running(struct rq *rq, struct task_struct *p)
681#ifndef finish_arch_switch 681#ifndef finish_arch_switch
682# define finish_arch_switch(prev) do { } while (0) 682# define finish_arch_switch(prev) do { } while (0)
683#endif 683#endif
684#ifndef finish_arch_post_lock_switch
685# define finish_arch_post_lock_switch() do { } while (0)
686#endif
684 687
685#ifndef __ARCH_WANT_UNLOCKED_CTXSW 688#ifndef __ARCH_WANT_UNLOCKED_CTXSW
686static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) 689static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
diff --git a/kernel/signal.c b/kernel/signal.c
index e76001ccf5cd..17afcaf582d0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -36,6 +36,7 @@
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37#include <asm/unistd.h> 37#include <asm/unistd.h>
38#include <asm/siginfo.h> 38#include <asm/siginfo.h>
39#include <asm/cacheflush.h>
39#include "audit.h" /* audit_signal_info() */ 40#include "audit.h" /* audit_signal_info() */
40 41
41/* 42/*
@@ -58,21 +59,20 @@ static int sig_handler_ignored(void __user *handler, int sig)
58 (handler == SIG_DFL && sig_kernel_ignore(sig)); 59 (handler == SIG_DFL && sig_kernel_ignore(sig));
59} 60}
60 61
61static int sig_task_ignored(struct task_struct *t, int sig, 62static int sig_task_ignored(struct task_struct *t, int sig, bool force)
62 int from_ancestor_ns)
63{ 63{
64 void __user *handler; 64 void __user *handler;
65 65
66 handler = sig_handler(t, sig); 66 handler = sig_handler(t, sig);
67 67
68 if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && 68 if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
69 handler == SIG_DFL && !from_ancestor_ns) 69 handler == SIG_DFL && !force)
70 return 1; 70 return 1;
71 71
72 return sig_handler_ignored(handler, sig); 72 return sig_handler_ignored(handler, sig);
73} 73}
74 74
75static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) 75static int sig_ignored(struct task_struct *t, int sig, bool force)
76{ 76{
77 /* 77 /*
78 * Blocked signals are never ignored, since the 78 * Blocked signals are never ignored, since the
@@ -82,7 +82,7 @@ static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns)
82 if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) 82 if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
83 return 0; 83 return 0;
84 84
85 if (!sig_task_ignored(t, sig, from_ancestor_ns)) 85 if (!sig_task_ignored(t, sig, force))
86 return 0; 86 return 0;
87 87
88 /* 88 /*
@@ -855,7 +855,7 @@ static void ptrace_trap_notify(struct task_struct *t)
855 * Returns true if the signal should be actually delivered, otherwise 855 * Returns true if the signal should be actually delivered, otherwise
856 * it should be dropped. 856 * it should be dropped.
857 */ 857 */
858static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) 858static int prepare_signal(int sig, struct task_struct *p, bool force)
859{ 859{
860 struct signal_struct *signal = p->signal; 860 struct signal_struct *signal = p->signal;
861 struct task_struct *t; 861 struct task_struct *t;
@@ -915,7 +915,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
915 } 915 }
916 } 916 }
917 917
918 return !sig_ignored(p, sig, from_ancestor_ns); 918 return !sig_ignored(p, sig, force);
919} 919}
920 920
921/* 921/*
@@ -1059,7 +1059,8 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
1059 assert_spin_locked(&t->sighand->siglock); 1059 assert_spin_locked(&t->sighand->siglock);
1060 1060
1061 result = TRACE_SIGNAL_IGNORED; 1061 result = TRACE_SIGNAL_IGNORED;
1062 if (!prepare_signal(sig, t, from_ancestor_ns)) 1062 if (!prepare_signal(sig, t,
1063 from_ancestor_ns || (info == SEND_SIG_FORCED)))
1063 goto ret; 1064 goto ret;
1064 1065
1065 pending = group ? &t->signal->shared_pending : &t->pending; 1066 pending = group ? &t->signal->shared_pending : &t->pending;
@@ -1601,7 +1602,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1601 1602
1602 ret = 1; /* the signal is ignored */ 1603 ret = 1; /* the signal is ignored */
1603 result = TRACE_SIGNAL_IGNORED; 1604 result = TRACE_SIGNAL_IGNORED;
1604 if (!prepare_signal(sig, t, 0)) 1605 if (!prepare_signal(sig, t, false))
1605 goto out; 1606 goto out;
1606 1607
1607 ret = 0; 1608 ret = 0;
diff --git a/kernel/smp.c b/kernel/smp.c
index db197d60489b..2f8b10ecf759 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -701,3 +701,93 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
701 return ret; 701 return ret;
702} 702}
703EXPORT_SYMBOL(on_each_cpu); 703EXPORT_SYMBOL(on_each_cpu);
704
705/**
706 * on_each_cpu_mask(): Run a function on processors specified by
707 * cpumask, which may include the local processor.
708 * @mask: The set of cpus to run on (only runs on online subset).
709 * @func: The function to run. This must be fast and non-blocking.
710 * @info: An arbitrary pointer to pass to the function.
711 * @wait: If true, wait (atomically) until function has completed
712 * on other CPUs.
713 *
714 * If @wait is true, then returns once @func has returned.
715 *
716 * You must not call this function with disabled interrupts or
717 * from a hardware interrupt handler or from a bottom half handler.
718 */
719void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
720 void *info, bool wait)
721{
722 int cpu = get_cpu();
723
724 smp_call_function_many(mask, func, info, wait);
725 if (cpumask_test_cpu(cpu, mask)) {
726 local_irq_disable();
727 func(info);
728 local_irq_enable();
729 }
730 put_cpu();
731}
732EXPORT_SYMBOL(on_each_cpu_mask);
733
734/*
735 * on_each_cpu_cond(): Call a function on each processor for which
736 * the supplied function cond_func returns true, optionally waiting
737 * for all the required CPUs to finish. This may include the local
738 * processor.
739 * @cond_func: A callback function that is passed a cpu id and
740 * the the info parameter. The function is called
741 * with preemption disabled. The function should
742 * return a blooean value indicating whether to IPI
743 * the specified CPU.
744 * @func: The function to run on all applicable CPUs.
745 * This must be fast and non-blocking.
746 * @info: An arbitrary pointer to pass to both functions.
747 * @wait: If true, wait (atomically) until function has
748 * completed on other CPUs.
749 * @gfp_flags: GFP flags to use when allocating the cpumask
750 * used internally by the function.
751 *
752 * The function might sleep if the GFP flags indicates a non
753 * atomic allocation is allowed.
754 *
755 * Preemption is disabled to protect against CPUs going offline but not online.
756 * CPUs going online during the call will not be seen or sent an IPI.
757 *
758 * You must not call this function with disabled interrupts or
759 * from a hardware interrupt handler or from a bottom half handler.
760 */
761void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
762 smp_call_func_t func, void *info, bool wait,
763 gfp_t gfp_flags)
764{
765 cpumask_var_t cpus;
766 int cpu, ret;
767
768 might_sleep_if(gfp_flags & __GFP_WAIT);
769
770 if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
771 preempt_disable();
772 for_each_online_cpu(cpu)
773 if (cond_func(cpu, info))
774 cpumask_set_cpu(cpu, cpus);
775 on_each_cpu_mask(cpus, func, info, wait);
776 preempt_enable();
777 free_cpumask_var(cpus);
778 } else {
779 /*
780 * No free cpumask, bother. No matter, we'll
781 * just have to IPI them one by one.
782 */
783 preempt_disable();
784 for_each_online_cpu(cpu)
785 if (cond_func(cpu, info)) {
786 ret = smp_call_function_single(cpu, func,
787 info, wait);
788 WARN_ON_ONCE(!ret);
789 }
790 preempt_enable();
791 }
792}
793EXPORT_SYMBOL(on_each_cpu_cond);
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 84c7d96918bf..5cdd8065a3ce 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -163,7 +163,7 @@ void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
163EXPORT_SYMBOL(_raw_spin_lock_bh); 163EXPORT_SYMBOL(_raw_spin_lock_bh);
164#endif 164#endif
165 165
166#ifndef CONFIG_INLINE_SPIN_UNLOCK 166#ifdef CONFIG_UNINLINE_SPIN_UNLOCK
167void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) 167void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
168{ 168{
169 __raw_spin_unlock(lock); 169 __raw_spin_unlock(lock);
diff --git a/kernel/sys.c b/kernel/sys.c
index 888d227fd195..e7006eb6c1e4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -444,6 +444,15 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
444 magic2 != LINUX_REBOOT_MAGIC2C)) 444 magic2 != LINUX_REBOOT_MAGIC2C))
445 return -EINVAL; 445 return -EINVAL;
446 446
447 /*
448 * If pid namespaces are enabled and the current task is in a child
449 * pid_namespace, the command is handled by reboot_pid_ns() which will
450 * call do_exit().
451 */
452 ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
453 if (ret)
454 return ret;
455
447 /* Instead of trying to make the power_off code look like 456 /* Instead of trying to make the power_off code look like
448 * halt when pm_power_off is not set do it the easy way. 457 * halt when pm_power_off is not set do it the easy way.
449 */ 458 */
@@ -1962,6 +1971,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1962 case PR_SET_MM: 1971 case PR_SET_MM:
1963 error = prctl_set_mm(arg2, arg3, arg4, arg5); 1972 error = prctl_set_mm(arg2, arg3, arg4, arg5);
1964 break; 1973 break;
1974 case PR_SET_CHILD_SUBREAPER:
1975 me->signal->is_child_subreaper = !!arg2;
1976 error = 0;
1977 break;
1978 case PR_GET_CHILD_SUBREAPER:
1979 error = put_user(me->signal->is_child_subreaper,
1980 (int __user *) arg2);
1981 break;
1965 default: 1982 default:
1966 error = -EINVAL; 1983 error = -EINVAL;
1967 break; 1984 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f487f257e05e..4ab11879aeb4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -23,6 +23,7 @@
23#include <linux/swap.h> 23#include <linux/swap.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/sysctl.h> 25#include <linux/sysctl.h>
26#include <linux/bitmap.h>
26#include <linux/signal.h> 27#include <linux/signal.h>
27#include <linux/printk.h> 28#include <linux/printk.h>
28#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
@@ -58,6 +59,7 @@
58#include <linux/oom.h> 59#include <linux/oom.h>
59#include <linux/kmod.h> 60#include <linux/kmod.h>
60#include <linux/capability.h> 61#include <linux/capability.h>
62#include <linux/binfmts.h>
61 63
62#include <asm/uaccess.h> 64#include <asm/uaccess.h>
63#include <asm/processor.h> 65#include <asm/processor.h>
@@ -67,6 +69,9 @@
67#include <asm/stacktrace.h> 69#include <asm/stacktrace.h>
68#include <asm/io.h> 70#include <asm/io.h>
69#endif 71#endif
72#ifdef CONFIG_SPARC
73#include <asm/setup.h>
74#endif
70#ifdef CONFIG_BSD_PROCESS_ACCT 75#ifdef CONFIG_BSD_PROCESS_ACCT
71#include <linux/acct.h> 76#include <linux/acct.h>
72#endif 77#endif
@@ -141,7 +146,6 @@ static const int cap_last_cap = CAP_LAST_CAP;
141#include <linux/inotify.h> 146#include <linux/inotify.h>
142#endif 147#endif
143#ifdef CONFIG_SPARC 148#ifdef CONFIG_SPARC
144#include <asm/system.h>
145#endif 149#endif
146 150
147#ifdef CONFIG_SPARC64 151#ifdef CONFIG_SPARC64
@@ -166,7 +170,7 @@ static int proc_taint(struct ctl_table *table, int write,
166#endif 170#endif
167 171
168#ifdef CONFIG_PRINTK 172#ifdef CONFIG_PRINTK
169static int proc_dmesg_restrict(struct ctl_table *table, int write, 173static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
170 void __user *buffer, size_t *lenp, loff_t *ppos); 174 void __user *buffer, size_t *lenp, loff_t *ppos);
171#endif 175#endif
172 176
@@ -192,20 +196,6 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
192 196
193#endif 197#endif
194 198
195static struct ctl_table root_table[];
196static struct ctl_table_root sysctl_table_root;
197static struct ctl_table_header root_table_header = {
198 {{.count = 1,
199 .ctl_table = root_table,
200 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
201 .root = &sysctl_table_root,
202 .set = &sysctl_table_root.default_set,
203};
204static struct ctl_table_root sysctl_table_root = {
205 .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
206 .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
207};
208
209static struct ctl_table kern_table[]; 199static struct ctl_table kern_table[];
210static struct ctl_table vm_table[]; 200static struct ctl_table vm_table[];
211static struct ctl_table fs_table[]; 201static struct ctl_table fs_table[];
@@ -222,7 +212,7 @@ int sysctl_legacy_va_layout;
222 212
223/* The default sysctl tables: */ 213/* The default sysctl tables: */
224 214
225static struct ctl_table root_table[] = { 215static struct ctl_table sysctl_base_table[] = {
226 { 216 {
227 .procname = "kernel", 217 .procname = "kernel",
228 .mode = 0555, 218 .mode = 0555,
@@ -713,7 +703,7 @@ static struct ctl_table kern_table[] = {
713 .data = &dmesg_restrict, 703 .data = &dmesg_restrict,
714 .maxlen = sizeof(int), 704 .maxlen = sizeof(int),
715 .mode = 0644, 705 .mode = 0644,
716 .proc_handler = proc_dointvec_minmax, 706 .proc_handler = proc_dointvec_minmax_sysadmin,
717 .extra1 = &zero, 707 .extra1 = &zero,
718 .extra2 = &one, 708 .extra2 = &one,
719 }, 709 },
@@ -722,7 +712,7 @@ static struct ctl_table kern_table[] = {
722 .data = &kptr_restrict, 712 .data = &kptr_restrict,
723 .maxlen = sizeof(int), 713 .maxlen = sizeof(int),
724 .mode = 0644, 714 .mode = 0644,
725 .proc_handler = proc_dmesg_restrict, 715 .proc_handler = proc_dointvec_minmax_sysadmin,
726 .extra1 = &zero, 716 .extra1 = &zero,
727 .extra2 = &two, 717 .extra2 = &two,
728 }, 718 },
@@ -1559,490 +1549,12 @@ static struct ctl_table dev_table[] = {
1559 { } 1549 { }
1560}; 1550};
1561 1551
1562static DEFINE_SPINLOCK(sysctl_lock); 1552int __init sysctl_init(void)
1563
1564/* called under sysctl_lock */
1565static int use_table(struct ctl_table_header *p)
1566{
1567 if (unlikely(p->unregistering))
1568 return 0;
1569 p->used++;
1570 return 1;
1571}
1572
1573/* called under sysctl_lock */
1574static void unuse_table(struct ctl_table_header *p)
1575{
1576 if (!--p->used)
1577 if (unlikely(p->unregistering))
1578 complete(p->unregistering);
1579}
1580
1581/* called under sysctl_lock, will reacquire if has to wait */
1582static void start_unregistering(struct ctl_table_header *p)
1583{
1584 /*
1585 * if p->used is 0, nobody will ever touch that entry again;
1586 * we'll eliminate all paths to it before dropping sysctl_lock
1587 */
1588 if (unlikely(p->used)) {
1589 struct completion wait;
1590 init_completion(&wait);
1591 p->unregistering = &wait;
1592 spin_unlock(&sysctl_lock);
1593 wait_for_completion(&wait);
1594 spin_lock(&sysctl_lock);
1595 } else {
1596 /* anything non-NULL; we'll never dereference it */
1597 p->unregistering = ERR_PTR(-EINVAL);
1598 }
1599 /*
1600 * do not remove from the list until nobody holds it; walking the
1601 * list in do_sysctl() relies on that.
1602 */
1603 list_del_init(&p->ctl_entry);
1604}
1605
1606void sysctl_head_get(struct ctl_table_header *head)
1607{
1608 spin_lock(&sysctl_lock);
1609 head->count++;
1610 spin_unlock(&sysctl_lock);
1611}
1612
1613void sysctl_head_put(struct ctl_table_header *head)
1614{
1615 spin_lock(&sysctl_lock);
1616 if (!--head->count)
1617 kfree_rcu(head, rcu);
1618 spin_unlock(&sysctl_lock);
1619}
1620
1621struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1622{
1623 if (!head)
1624 BUG();
1625 spin_lock(&sysctl_lock);
1626 if (!use_table(head))
1627 head = ERR_PTR(-ENOENT);
1628 spin_unlock(&sysctl_lock);
1629 return head;
1630}
1631
1632void sysctl_head_finish(struct ctl_table_header *head)
1633{
1634 if (!head)
1635 return;
1636 spin_lock(&sysctl_lock);
1637 unuse_table(head);
1638 spin_unlock(&sysctl_lock);
1639}
1640
1641static struct ctl_table_set *
1642lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1643{
1644 struct ctl_table_set *set = &root->default_set;
1645 if (root->lookup)
1646 set = root->lookup(root, namespaces);
1647 return set;
1648}
1649
1650static struct list_head *
1651lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1652{
1653 struct ctl_table_set *set = lookup_header_set(root, namespaces);
1654 return &set->list;
1655}
1656
1657struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1658 struct ctl_table_header *prev)
1659{ 1553{
1660 struct ctl_table_root *root; 1554 register_sysctl_table(sysctl_base_table);
1661 struct list_head *header_list;
1662 struct ctl_table_header *head;
1663 struct list_head *tmp;
1664
1665 spin_lock(&sysctl_lock);
1666 if (prev) {
1667 head = prev;
1668 tmp = &prev->ctl_entry;
1669 unuse_table(prev);
1670 goto next;
1671 }
1672 tmp = &root_table_header.ctl_entry;
1673 for (;;) {
1674 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1675
1676 if (!use_table(head))
1677 goto next;
1678 spin_unlock(&sysctl_lock);
1679 return head;
1680 next:
1681 root = head->root;
1682 tmp = tmp->next;
1683 header_list = lookup_header_list(root, namespaces);
1684 if (tmp != header_list)
1685 continue;
1686
1687 do {
1688 root = list_entry(root->root_list.next,
1689 struct ctl_table_root, root_list);
1690 if (root == &sysctl_table_root)
1691 goto out;
1692 header_list = lookup_header_list(root, namespaces);
1693 } while (list_empty(header_list));
1694 tmp = header_list->next;
1695 }
1696out:
1697 spin_unlock(&sysctl_lock);
1698 return NULL;
1699}
1700
1701struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1702{
1703 return __sysctl_head_next(current->nsproxy, prev);
1704}
1705
1706void register_sysctl_root(struct ctl_table_root *root)
1707{
1708 spin_lock(&sysctl_lock);
1709 list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1710 spin_unlock(&sysctl_lock);
1711}
1712
1713/*
1714 * sysctl_perm does NOT grant the superuser all rights automatically, because
1715 * some sysctl variables are readonly even to root.
1716 */
1717
1718static int test_perm(int mode, int op)
1719{
1720 if (!current_euid())
1721 mode >>= 6;
1722 else if (in_egroup_p(0))
1723 mode >>= 3;
1724 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1725 return 0;
1726 return -EACCES;
1727}
1728
1729int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1730{
1731 int mode;
1732
1733 if (root->permissions)
1734 mode = root->permissions(root, current->nsproxy, table);
1735 else
1736 mode = table->mode;
1737
1738 return test_perm(mode, op);
1739}
1740
1741static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1742{
1743 for (; table->procname; table++) {
1744 table->parent = parent;
1745 if (table->child)
1746 sysctl_set_parent(table, table->child);
1747 }
1748}
1749
1750static __init int sysctl_init(void)
1751{
1752 sysctl_set_parent(NULL, root_table);
1753#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1754 sysctl_check_table(current->nsproxy, root_table);
1755#endif
1756 return 0; 1555 return 0;
1757} 1556}
1758 1557
1759core_initcall(sysctl_init);
1760
1761static struct ctl_table *is_branch_in(struct ctl_table *branch,
1762 struct ctl_table *table)
1763{
1764 struct ctl_table *p;
1765 const char *s = branch->procname;
1766
1767 /* branch should have named subdirectory as its first element */
1768 if (!s || !branch->child)
1769 return NULL;
1770
1771 /* ... and nothing else */
1772 if (branch[1].procname)
1773 return NULL;
1774
1775 /* table should contain subdirectory with the same name */
1776 for (p = table; p->procname; p++) {
1777 if (!p->child)
1778 continue;
1779 if (p->procname && strcmp(p->procname, s) == 0)
1780 return p;
1781 }
1782 return NULL;
1783}
1784
1785/* see if attaching q to p would be an improvement */
1786static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1787{
1788 struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1789 struct ctl_table *next;
1790 int is_better = 0;
1791 int not_in_parent = !p->attached_by;
1792
1793 while ((next = is_branch_in(by, to)) != NULL) {
1794 if (by == q->attached_by)
1795 is_better = 1;
1796 if (to == p->attached_by)
1797 not_in_parent = 1;
1798 by = by->child;
1799 to = next->child;
1800 }
1801
1802 if (is_better && not_in_parent) {
1803 q->attached_by = by;
1804 q->attached_to = to;
1805 q->parent = p;
1806 }
1807}
1808
1809/**
1810 * __register_sysctl_paths - register a sysctl hierarchy
1811 * @root: List of sysctl headers to register on
1812 * @namespaces: Data to compute which lists of sysctl entries are visible
1813 * @path: The path to the directory the sysctl table is in.
1814 * @table: the top-level table structure
1815 *
1816 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1817 * array. A completely 0 filled entry terminates the table.
1818 *
1819 * The members of the &struct ctl_table structure are used as follows:
1820 *
1821 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1822 * enter a sysctl file
1823 *
1824 * data - a pointer to data for use by proc_handler
1825 *
1826 * maxlen - the maximum size in bytes of the data
1827 *
1828 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1829 *
1830 * child - a pointer to the child sysctl table if this entry is a directory, or
1831 * %NULL.
1832 *
1833 * proc_handler - the text handler routine (described below)
1834 *
1835 * de - for internal use by the sysctl routines
1836 *
1837 * extra1, extra2 - extra pointers usable by the proc handler routines
1838 *
1839 * Leaf nodes in the sysctl tree will be represented by a single file
1840 * under /proc; non-leaf nodes will be represented by directories.
1841 *
1842 * sysctl(2) can automatically manage read and write requests through
1843 * the sysctl table. The data and maxlen fields of the ctl_table
1844 * struct enable minimal validation of the values being written to be
1845 * performed, and the mode field allows minimal authentication.
1846 *
1847 * There must be a proc_handler routine for any terminal nodes
1848 * mirrored under /proc/sys (non-terminals are handled by a built-in
1849 * directory handler). Several default handlers are available to
1850 * cover common cases -
1851 *
1852 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1853 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1854 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1855 *
1856 * It is the handler's job to read the input buffer from user memory
1857 * and process it. The handler should return 0 on success.
1858 *
1859 * This routine returns %NULL on a failure to register, and a pointer
1860 * to the table header on success.
1861 */
1862struct ctl_table_header *__register_sysctl_paths(
1863 struct ctl_table_root *root,
1864 struct nsproxy *namespaces,
1865 const struct ctl_path *path, struct ctl_table *table)
1866{
1867 struct ctl_table_header *header;
1868 struct ctl_table *new, **prevp;
1869 unsigned int n, npath;
1870 struct ctl_table_set *set;
1871
1872 /* Count the path components */
1873 for (npath = 0; path[npath].procname; ++npath)
1874 ;
1875
1876 /*
1877 * For each path component, allocate a 2-element ctl_table array.
1878 * The first array element will be filled with the sysctl entry
1879 * for this, the second will be the sentinel (procname == 0).
1880 *
1881 * We allocate everything in one go so that we don't have to
1882 * worry about freeing additional memory in unregister_sysctl_table.
1883 */
1884 header = kzalloc(sizeof(struct ctl_table_header) +
1885 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1886 if (!header)
1887 return NULL;
1888
1889 new = (struct ctl_table *) (header + 1);
1890
1891 /* Now connect the dots */
1892 prevp = &header->ctl_table;
1893 for (n = 0; n < npath; ++n, ++path) {
1894 /* Copy the procname */
1895 new->procname = path->procname;
1896 new->mode = 0555;
1897
1898 *prevp = new;
1899 prevp = &new->child;
1900
1901 new += 2;
1902 }
1903 *prevp = table;
1904 header->ctl_table_arg = table;
1905
1906 INIT_LIST_HEAD(&header->ctl_entry);
1907 header->used = 0;
1908 header->unregistering = NULL;
1909 header->root = root;
1910 sysctl_set_parent(NULL, header->ctl_table);
1911 header->count = 1;
1912#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1913 if (sysctl_check_table(namespaces, header->ctl_table)) {
1914 kfree(header);
1915 return NULL;
1916 }
1917#endif
1918 spin_lock(&sysctl_lock);
1919 header->set = lookup_header_set(root, namespaces);
1920 header->attached_by = header->ctl_table;
1921 header->attached_to = root_table;
1922 header->parent = &root_table_header;
1923 for (set = header->set; set; set = set->parent) {
1924 struct ctl_table_header *p;
1925 list_for_each_entry(p, &set->list, ctl_entry) {
1926 if (p->unregistering)
1927 continue;
1928 try_attach(p, header);
1929 }
1930 }
1931 header->parent->count++;
1932 list_add_tail(&header->ctl_entry, &header->set->list);
1933 spin_unlock(&sysctl_lock);
1934
1935 return header;
1936}
1937
1938/**
1939 * register_sysctl_table_path - register a sysctl table hierarchy
1940 * @path: The path to the directory the sysctl table is in.
1941 * @table: the top-level table structure
1942 *
1943 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1944 * array. A completely 0 filled entry terminates the table.
1945 *
1946 * See __register_sysctl_paths for more details.
1947 */
1948struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1949 struct ctl_table *table)
1950{
1951 return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1952 path, table);
1953}
1954
1955/**
1956 * register_sysctl_table - register a sysctl table hierarchy
1957 * @table: the top-level table structure
1958 *
1959 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1960 * array. A completely 0 filled entry terminates the table.
1961 *
1962 * See register_sysctl_paths for more details.
1963 */
1964struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1965{
1966 static const struct ctl_path null_path[] = { {} };
1967
1968 return register_sysctl_paths(null_path, table);
1969}
1970
1971/**
1972 * unregister_sysctl_table - unregister a sysctl table hierarchy
1973 * @header: the header returned from register_sysctl_table
1974 *
1975 * Unregisters the sysctl table and all children. proc entries may not
1976 * actually be removed until they are no longer used by anyone.
1977 */
1978void unregister_sysctl_table(struct ctl_table_header * header)
1979{
1980 might_sleep();
1981
1982 if (header == NULL)
1983 return;
1984
1985 spin_lock(&sysctl_lock);
1986 start_unregistering(header);
1987 if (!--header->parent->count) {
1988 WARN_ON(1);
1989 kfree_rcu(header->parent, rcu);
1990 }
1991 if (!--header->count)
1992 kfree_rcu(header, rcu);
1993 spin_unlock(&sysctl_lock);
1994}
1995
1996int sysctl_is_seen(struct ctl_table_header *p)
1997{
1998 struct ctl_table_set *set = p->set;
1999 int res;
2000 spin_lock(&sysctl_lock);
2001 if (p->unregistering)
2002 res = 0;
2003 else if (!set->is_seen)
2004 res = 1;
2005 else
2006 res = set->is_seen(set);
2007 spin_unlock(&sysctl_lock);
2008 return res;
2009}
2010
2011void setup_sysctl_set(struct ctl_table_set *p,
2012 struct ctl_table_set *parent,
2013 int (*is_seen)(struct ctl_table_set *))
2014{
2015 INIT_LIST_HEAD(&p->list);
2016 p->parent = parent ? parent : &sysctl_table_root.default_set;
2017 p->is_seen = is_seen;
2018}
2019
2020#else /* !CONFIG_SYSCTL */
2021struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2022{
2023 return NULL;
2024}
2025
2026struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2027 struct ctl_table *table)
2028{
2029 return NULL;
2030}
2031
2032void unregister_sysctl_table(struct ctl_table_header * table)
2033{
2034}
2035
2036void setup_sysctl_set(struct ctl_table_set *p,
2037 struct ctl_table_set *parent,
2038 int (*is_seen)(struct ctl_table_set *))
2039{
2040}
2041
2042void sysctl_head_put(struct ctl_table_header *head)
2043{
2044}
2045
2046#endif /* CONFIG_SYSCTL */ 1558#endif /* CONFIG_SYSCTL */
2047 1559
2048/* 1560/*
@@ -2431,7 +1943,7 @@ static int proc_taint(struct ctl_table *table, int write,
2431} 1943}
2432 1944
2433#ifdef CONFIG_PRINTK 1945#ifdef CONFIG_PRINTK
2434static int proc_dmesg_restrict(struct ctl_table *table, int write, 1946static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2435 void __user *buffer, size_t *lenp, loff_t *ppos) 1947 void __user *buffer, size_t *lenp, loff_t *ppos)
2436{ 1948{
2437 if (write && !capable(CAP_SYS_ADMIN)) 1949 if (write && !capable(CAP_SYS_ADMIN))
@@ -2884,9 +2396,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
2884 } 2396 }
2885 } 2397 }
2886 2398
2887 while (val_a <= val_b) 2399 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
2888 set_bit(val_a++, tmp_bitmap);
2889
2890 first = 0; 2400 first = 0;
2891 proc_skip_char(&kbuf, &left, '\n'); 2401 proc_skip_char(&kbuf, &left, '\n');
2892 } 2402 }
@@ -2929,8 +2439,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
2929 if (*ppos) 2439 if (*ppos)
2930 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); 2440 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2931 else 2441 else
2932 memcpy(bitmap, tmp_bitmap, 2442 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
2933 BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2934 } 2443 }
2935 kfree(tmp_bitmap); 2444 kfree(tmp_bitmap);
2936 *lenp -= left; 2445 *lenp -= left;
@@ -3008,6 +2517,3 @@ EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3008EXPORT_SYMBOL(proc_dostring); 2517EXPORT_SYMBOL(proc_dostring);
3009EXPORT_SYMBOL(proc_doulongvec_minmax); 2518EXPORT_SYMBOL(proc_doulongvec_minmax);
3010EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); 2519EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3011EXPORT_SYMBOL(register_sysctl_table);
3012EXPORT_SYMBOL(register_sysctl_paths);
3013EXPORT_SYMBOL(unregister_sysctl_table);
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
deleted file mode 100644
index 362da653813d..000000000000
--- a/kernel/sysctl_check.c
+++ /dev/null
@@ -1,160 +0,0 @@
1#include <linux/stat.h>
2#include <linux/sysctl.h>
3#include "../fs/xfs/xfs_sysctl.h"
4#include <linux/sunrpc/debug.h>
5#include <linux/string.h>
6#include <net/ip_vs.h>
7
8
9static int sysctl_depth(struct ctl_table *table)
10{
11 struct ctl_table *tmp;
12 int depth;
13
14 depth = 0;
15 for (tmp = table; tmp->parent; tmp = tmp->parent)
16 depth++;
17
18 return depth;
19}
20
21static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
22{
23 int i;
24
25 for (i = 0; table && i < n; i++)
26 table = table->parent;
27
28 return table;
29}
30
31
32static void sysctl_print_path(struct ctl_table *table)
33{
34 struct ctl_table *tmp;
35 int depth, i;
36 depth = sysctl_depth(table);
37 if (table->procname) {
38 for (i = depth; i >= 0; i--) {
39 tmp = sysctl_parent(table, i);
40 printk("/%s", tmp->procname?tmp->procname:"");
41 }
42 }
43 printk(" ");
44}
45
46static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
47 struct ctl_table *table)
48{
49 struct ctl_table_header *head;
50 struct ctl_table *ref, *test;
51 int depth, cur_depth;
52
53 depth = sysctl_depth(table);
54
55 for (head = __sysctl_head_next(namespaces, NULL); head;
56 head = __sysctl_head_next(namespaces, head)) {
57 cur_depth = depth;
58 ref = head->ctl_table;
59repeat:
60 test = sysctl_parent(table, cur_depth);
61 for (; ref->procname; ref++) {
62 int match = 0;
63 if (cur_depth && !ref->child)
64 continue;
65
66 if (test->procname && ref->procname &&
67 (strcmp(test->procname, ref->procname) == 0))
68 match++;
69
70 if (match) {
71 if (cur_depth != 0) {
72 cur_depth--;
73 ref = ref->child;
74 goto repeat;
75 }
76 goto out;
77 }
78 }
79 }
80 ref = NULL;
81out:
82 sysctl_head_finish(head);
83 return ref;
84}
85
86static void set_fail(const char **fail, struct ctl_table *table, const char *str)
87{
88 if (*fail) {
89 printk(KERN_ERR "sysctl table check failed: ");
90 sysctl_print_path(table);
91 printk(" %s\n", *fail);
92 dump_stack();
93 }
94 *fail = str;
95}
96
97static void sysctl_check_leaf(struct nsproxy *namespaces,
98 struct ctl_table *table, const char **fail)
99{
100 struct ctl_table *ref;
101
102 ref = sysctl_check_lookup(namespaces, table);
103 if (ref && (ref != table))
104 set_fail(fail, table, "Sysctl already exists");
105}
106
107int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
108{
109 int error = 0;
110 for (; table->procname; table++) {
111 const char *fail = NULL;
112
113 if (table->parent) {
114 if (!table->parent->procname)
115 set_fail(&fail, table, "Parent without procname");
116 }
117 if (table->child) {
118 if (table->data)
119 set_fail(&fail, table, "Directory with data?");
120 if (table->maxlen)
121 set_fail(&fail, table, "Directory with maxlen?");
122 if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
123 set_fail(&fail, table, "Writable sysctl directory");
124 if (table->proc_handler)
125 set_fail(&fail, table, "Directory with proc_handler");
126 if (table->extra1)
127 set_fail(&fail, table, "Directory with extra1");
128 if (table->extra2)
129 set_fail(&fail, table, "Directory with extra2");
130 } else {
131 if ((table->proc_handler == proc_dostring) ||
132 (table->proc_handler == proc_dointvec) ||
133 (table->proc_handler == proc_dointvec_minmax) ||
134 (table->proc_handler == proc_dointvec_jiffies) ||
135 (table->proc_handler == proc_dointvec_userhz_jiffies) ||
136 (table->proc_handler == proc_dointvec_ms_jiffies) ||
137 (table->proc_handler == proc_doulongvec_minmax) ||
138 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
139 if (!table->data)
140 set_fail(&fail, table, "No data");
141 if (!table->maxlen)
142 set_fail(&fail, table, "No maxlen");
143 }
144#ifdef CONFIG_PROC_SYSCTL
145 if (!table->proc_handler)
146 set_fail(&fail, table, "No proc_handler");
147#endif
148 sysctl_check_leaf(namespaces, table, &fail);
149 }
150 if (table->mode > 0777)
151 set_fail(&fail, table, "bogus .mode");
152 if (fail) {
153 set_fail(&fail, table, NULL);
154 error = -EINVAL;
155 }
156 if (table->child)
157 error |= sysctl_check_table(namespaces, table->child);
158 }
159 return error;
160}
diff --git a/kernel/time.c b/kernel/time.c
index 73e416db0a1e..ba744cf80696 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -163,7 +163,6 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
163 return error; 163 return error;
164 164
165 if (tz) { 165 if (tz) {
166 /* SMP safe, global irq locking makes it work. */
167 sys_tz = *tz; 166 sys_tz = *tz;
168 update_vsyscall_tz(); 167 update_vsyscall_tz();
169 if (firsttime) { 168 if (firsttime) {
@@ -173,12 +172,7 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
173 } 172 }
174 } 173 }
175 if (tv) 174 if (tv)
176 {
177 /* SMP safe, again the code in arch/foo/time.c should
178 * globally block out interrupts when it runs.
179 */
180 return do_settimeofday(tv); 175 return do_settimeofday(tv);
181 }
182 return 0; 176 return 0;
183} 177}
184 178
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 2cf9cc7aa103..a20dc8a3c949 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -1,6 +1,10 @@
1# 1#
2# Timer subsystem related configuration options 2# Timer subsystem related configuration options
3# 3#
4
5# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is
6# only related to the tick functionality. Oneshot clockevent devices
7# are supported independ of this.
4config TICK_ONESHOT 8config TICK_ONESHOT
5 bool 9 bool
6 10
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 8a46f5d64504..8a538c55fc7b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -96,6 +96,11 @@ static int alarmtimer_rtc_add_device(struct device *dev,
96 return 0; 96 return 0;
97} 97}
98 98
99static inline void alarmtimer_rtc_timer_init(void)
100{
101 rtc_timer_init(&rtctimer, NULL, NULL);
102}
103
99static struct class_interface alarmtimer_rtc_interface = { 104static struct class_interface alarmtimer_rtc_interface = {
100 .add_dev = &alarmtimer_rtc_add_device, 105 .add_dev = &alarmtimer_rtc_add_device,
101}; 106};
@@ -117,6 +122,7 @@ static inline struct rtc_device *alarmtimer_get_rtcdev(void)
117#define rtcdev (NULL) 122#define rtcdev (NULL)
118static inline int alarmtimer_rtc_interface_setup(void) { return 0; } 123static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
119static inline void alarmtimer_rtc_interface_remove(void) { } 124static inline void alarmtimer_rtc_interface_remove(void) { }
125static inline void alarmtimer_rtc_timer_init(void) { }
120#endif 126#endif
121 127
122/** 128/**
@@ -783,6 +789,8 @@ static int __init alarmtimer_init(void)
783 .nsleep = alarm_timer_nsleep, 789 .nsleep = alarm_timer_nsleep,
784 }; 790 };
785 791
792 alarmtimer_rtc_timer_init();
793
786 posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); 794 posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
787 posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); 795 posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
788 796
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index a45ca167ab24..c9583382141a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -500,7 +500,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
500{ 500{
501 u64 ret; 501 u64 ret;
502 /* 502 /*
503 * We won't try to correct for more then 11% adjustments (110,000 ppm), 503 * We won't try to correct for more than 11% adjustments (110,000 ppm),
504 */ 504 */
505 ret = (u64)cs->mult * 11; 505 ret = (u64)cs->mult * 11;
506 do_div(ret,100); 506 do_div(ret,100);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 6e039b144daf..f03fd83b170b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -34,8 +34,6 @@ unsigned long tick_nsec;
34static u64 tick_length; 34static u64 tick_length;
35static u64 tick_length_base; 35static u64 tick_length_base;
36 36
37static struct hrtimer leap_timer;
38
39#define MAX_TICKADJ 500LL /* usecs */ 37#define MAX_TICKADJ 500LL /* usecs */
40#define MAX_TICKADJ_SCALED \ 38#define MAX_TICKADJ_SCALED \
41 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) 39 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -381,70 +379,63 @@ u64 ntp_tick_length(void)
381 379
382 380
383/* 381/*
384 * Leap second processing. If in leap-insert state at the end of the 382 * this routine handles the overflow of the microsecond field
385 * day, the system clock is set back one second; if in leap-delete 383 *
386 * state, the system clock is set ahead one second. 384 * The tricky bits of code to handle the accurate clock support
385 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
386 * They were originally developed for SUN and DEC kernels.
387 * All the kudos should go to Dave for this stuff.
388 *
389 * Also handles leap second processing, and returns leap offset
387 */ 390 */
388static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) 391int second_overflow(unsigned long secs)
389{ 392{
390 enum hrtimer_restart res = HRTIMER_NORESTART; 393 s64 delta;
391 unsigned long flags;
392 int leap = 0; 394 int leap = 0;
395 unsigned long flags;
393 396
394 spin_lock_irqsave(&ntp_lock, flags); 397 spin_lock_irqsave(&ntp_lock, flags);
398
399 /*
400 * Leap second processing. If in leap-insert state at the end of the
401 * day, the system clock is set back one second; if in leap-delete
402 * state, the system clock is set ahead one second.
403 */
395 switch (time_state) { 404 switch (time_state) {
396 case TIME_OK: 405 case TIME_OK:
406 if (time_status & STA_INS)
407 time_state = TIME_INS;
408 else if (time_status & STA_DEL)
409 time_state = TIME_DEL;
397 break; 410 break;
398 case TIME_INS: 411 case TIME_INS:
399 leap = -1; 412 if (secs % 86400 == 0) {
400 time_state = TIME_OOP; 413 leap = -1;
401 printk(KERN_NOTICE 414 time_state = TIME_OOP;
402 "Clock: inserting leap second 23:59:60 UTC\n"); 415 printk(KERN_NOTICE
403 hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); 416 "Clock: inserting leap second 23:59:60 UTC\n");
404 res = HRTIMER_RESTART; 417 }
405 break; 418 break;
406 case TIME_DEL: 419 case TIME_DEL:
407 leap = 1; 420 if ((secs + 1) % 86400 == 0) {
408 time_tai--; 421 leap = 1;
409 time_state = TIME_WAIT; 422 time_tai--;
410 printk(KERN_NOTICE 423 time_state = TIME_WAIT;
411 "Clock: deleting leap second 23:59:59 UTC\n"); 424 printk(KERN_NOTICE
425 "Clock: deleting leap second 23:59:59 UTC\n");
426 }
412 break; 427 break;
413 case TIME_OOP: 428 case TIME_OOP:
414 time_tai++; 429 time_tai++;
415 time_state = TIME_WAIT; 430 time_state = TIME_WAIT;
416 /* fall through */ 431 break;
432
417 case TIME_WAIT: 433 case TIME_WAIT:
418 if (!(time_status & (STA_INS | STA_DEL))) 434 if (!(time_status & (STA_INS | STA_DEL)))
419 time_state = TIME_OK; 435 time_state = TIME_OK;
420 break; 436 break;
421 } 437 }
422 spin_unlock_irqrestore(&ntp_lock, flags);
423 438
424 /*
425 * We have to call this outside of the ntp_lock to keep
426 * the proper locking hierarchy
427 */
428 if (leap)
429 timekeeping_leap_insert(leap);
430
431 return res;
432}
433
434/*
435 * this routine handles the overflow of the microsecond field
436 *
437 * The tricky bits of code to handle the accurate clock support
438 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
439 * They were originally developed for SUN and DEC kernels.
440 * All the kudos should go to Dave for this stuff.
441 */
442void second_overflow(void)
443{
444 s64 delta;
445 unsigned long flags;
446
447 spin_lock_irqsave(&ntp_lock, flags);
448 439
449 /* Bump the maxerror field */ 440 /* Bump the maxerror field */
450 time_maxerror += MAXFREQ / NSEC_PER_USEC; 441 time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -481,15 +472,17 @@ void second_overflow(void)
481 tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) 472 tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
482 << NTP_SCALE_SHIFT; 473 << NTP_SCALE_SHIFT;
483 time_adjust = 0; 474 time_adjust = 0;
475
476
477
484out: 478out:
485 spin_unlock_irqrestore(&ntp_lock, flags); 479 spin_unlock_irqrestore(&ntp_lock, flags);
480
481 return leap;
486} 482}
487 483
488#ifdef CONFIG_GENERIC_CMOS_UPDATE 484#ifdef CONFIG_GENERIC_CMOS_UPDATE
489 485
490/* Disable the cmos update - used by virtualization and embedded */
491int no_sync_cmos_clock __read_mostly;
492
493static void sync_cmos_clock(struct work_struct *work); 486static void sync_cmos_clock(struct work_struct *work);
494 487
495static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); 488static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
@@ -536,35 +529,13 @@ static void sync_cmos_clock(struct work_struct *work)
536 529
537static void notify_cmos_timer(void) 530static void notify_cmos_timer(void)
538{ 531{
539 if (!no_sync_cmos_clock) 532 schedule_delayed_work(&sync_cmos_work, 0);
540 schedule_delayed_work(&sync_cmos_work, 0);
541} 533}
542 534
543#else 535#else
544static inline void notify_cmos_timer(void) { } 536static inline void notify_cmos_timer(void) { }
545#endif 537#endif
546 538
547/*
548 * Start the leap seconds timer:
549 */
550static inline void ntp_start_leap_timer(struct timespec *ts)
551{
552 long now = ts->tv_sec;
553
554 if (time_status & STA_INS) {
555 time_state = TIME_INS;
556 now += 86400 - now % 86400;
557 hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
558
559 return;
560 }
561
562 if (time_status & STA_DEL) {
563 time_state = TIME_DEL;
564 now += 86400 - (now + 1) % 86400;
565 hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
566 }
567}
568 539
569/* 540/*
570 * Propagate a new txc->status value into the NTP state: 541 * Propagate a new txc->status value into the NTP state:
@@ -589,22 +560,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
589 time_status &= STA_RONLY; 560 time_status &= STA_RONLY;
590 time_status |= txc->status & ~STA_RONLY; 561 time_status |= txc->status & ~STA_RONLY;
591 562
592 switch (time_state) {
593 case TIME_OK:
594 ntp_start_leap_timer(ts);
595 break;
596 case TIME_INS:
597 case TIME_DEL:
598 time_state = TIME_OK;
599 ntp_start_leap_timer(ts);
600 case TIME_WAIT:
601 if (!(time_status & (STA_INS | STA_DEL)))
602 time_state = TIME_OK;
603 break;
604 case TIME_OOP:
605 hrtimer_restart(&leap_timer);
606 break;
607 }
608} 563}
609/* 564/*
610 * Called with the xtime lock held, so we can access and modify 565 * Called with the xtime lock held, so we can access and modify
@@ -686,9 +641,6 @@ int do_adjtimex(struct timex *txc)
686 (txc->tick < 900000/USER_HZ || 641 (txc->tick < 900000/USER_HZ ||
687 txc->tick > 1100000/USER_HZ)) 642 txc->tick > 1100000/USER_HZ))
688 return -EINVAL; 643 return -EINVAL;
689
690 if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
691 hrtimer_cancel(&leap_timer);
692 } 644 }
693 645
694 if (txc->modes & ADJ_SETOFFSET) { 646 if (txc->modes & ADJ_SETOFFSET) {
@@ -1010,6 +962,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
1010void __init ntp_init(void) 962void __init ntp_init(void)
1011{ 963{
1012 ntp_clear(); 964 ntp_clear();
1013 hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
1014 leap_timer.function = ntp_leap_second;
1015} 965}
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index e883f57a3cd3..bf57abdc7bd0 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -575,10 +575,12 @@ void tick_broadcast_switch_to_oneshot(void)
575 unsigned long flags; 575 unsigned long flags;
576 576
577 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 577 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
578
579 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
580
578 if (cpumask_empty(tick_get_broadcast_mask())) 581 if (cpumask_empty(tick_get_broadcast_mask()))
579 goto end; 582 goto end;
580 583
581 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
582 bc = tick_broadcast_device.evtdev; 584 bc = tick_broadcast_device.evtdev;
583 if (bc) 585 if (bc)
584 tick_broadcast_setup_oneshot(bc); 586 tick_broadcast_setup_oneshot(bc);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3526038f2836..6a3a5b9ff561 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -534,9 +534,9 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
534 hrtimer_get_expires(&ts->sched_timer), 0)) 534 hrtimer_get_expires(&ts->sched_timer), 0))
535 break; 535 break;
536 } 536 }
537 /* Update jiffies and reread time */ 537 /* Reread time and update jiffies */
538 tick_do_update_jiffies64(now);
539 now = ktime_get(); 538 now = ktime_get();
539 tick_do_update_jiffies64(now);
540 } 540 }
541} 541}
542 542
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 403c2a092830..d66b21308f7c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp)
184} 184}
185 185
186 186
187void timekeeping_leap_insert(int leapsecond)
188{
189 unsigned long flags;
190
191 write_seqlock_irqsave(&timekeeper.lock, flags);
192 timekeeper.xtime.tv_sec += leapsecond;
193 timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
194 timekeeping_update(false);
195 write_sequnlock_irqrestore(&timekeeper.lock, flags);
196
197}
198
199/** 187/**
200 * timekeeping_forward_now - update clock to the current time 188 * timekeeping_forward_now - update clock to the current time
201 * 189 *
@@ -448,9 +436,12 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
448static int change_clocksource(void *data) 436static int change_clocksource(void *data)
449{ 437{
450 struct clocksource *new, *old; 438 struct clocksource *new, *old;
439 unsigned long flags;
451 440
452 new = (struct clocksource *) data; 441 new = (struct clocksource *) data;
453 442
443 write_seqlock_irqsave(&timekeeper.lock, flags);
444
454 timekeeping_forward_now(); 445 timekeeping_forward_now();
455 if (!new->enable || new->enable(new) == 0) { 446 if (!new->enable || new->enable(new) == 0) {
456 old = timekeeper.clock; 447 old = timekeeper.clock;
@@ -458,6 +449,10 @@ static int change_clocksource(void *data)
458 if (old->disable) 449 if (old->disable)
459 old->disable(old); 450 old->disable(old);
460 } 451 }
452 timekeeping_update(true);
453
454 write_sequnlock_irqrestore(&timekeeper.lock, flags);
455
461 return 0; 456 return 0;
462} 457}
463 458
@@ -827,7 +822,7 @@ static void timekeeping_adjust(s64 offset)
827 int adj; 822 int adj;
828 823
829 /* 824 /*
830 * The point of this is to check if the error is greater then half 825 * The point of this is to check if the error is greater than half
831 * an interval. 826 * an interval.
832 * 827 *
833 * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. 828 * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
@@ -835,7 +830,7 @@ static void timekeeping_adjust(s64 offset)
835 * Note we subtract one in the shift, so that error is really error*2. 830 * Note we subtract one in the shift, so that error is really error*2.
836 * This "saves" dividing(shifting) interval twice, but keeps the 831 * This "saves" dividing(shifting) interval twice, but keeps the
837 * (error > interval) comparison as still measuring if error is 832 * (error > interval) comparison as still measuring if error is
838 * larger then half an interval. 833 * larger than half an interval.
839 * 834 *
840 * Note: It does not "save" on aggravation when reading the code. 835 * Note: It does not "save" on aggravation when reading the code.
841 */ 836 */
@@ -843,7 +838,7 @@ static void timekeeping_adjust(s64 offset)
843 if (error > interval) { 838 if (error > interval) {
844 /* 839 /*
845 * We now divide error by 4(via shift), which checks if 840 * We now divide error by 4(via shift), which checks if
846 * the error is greater then twice the interval. 841 * the error is greater than twice the interval.
847 * If it is greater, we need a bigadjust, if its smaller, 842 * If it is greater, we need a bigadjust, if its smaller,
848 * we can adjust by 1. 843 * we can adjust by 1.
849 */ 844 */
@@ -874,13 +869,15 @@ static void timekeeping_adjust(s64 offset)
874 } else /* No adjustment needed */ 869 } else /* No adjustment needed */
875 return; 870 return;
876 871
877 WARN_ONCE(timekeeper.clock->maxadj && 872 if (unlikely(timekeeper.clock->maxadj &&
878 (timekeeper.mult + adj > timekeeper.clock->mult + 873 (timekeeper.mult + adj >
879 timekeeper.clock->maxadj), 874 timekeeper.clock->mult + timekeeper.clock->maxadj))) {
880 "Adjusting %s more then 11%% (%ld vs %ld)\n", 875 printk_once(KERN_WARNING
876 "Adjusting %s more than 11%% (%ld vs %ld)\n",
881 timekeeper.clock->name, (long)timekeeper.mult + adj, 877 timekeeper.clock->name, (long)timekeeper.mult + adj,
882 (long)timekeeper.clock->mult + 878 (long)timekeeper.clock->mult +
883 timekeeper.clock->maxadj); 879 timekeeper.clock->maxadj);
880 }
884 /* 881 /*
885 * So the following can be confusing. 882 * So the following can be confusing.
886 * 883 *
@@ -952,7 +949,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
952 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; 949 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
953 u64 raw_nsecs; 950 u64 raw_nsecs;
954 951
955 /* If the offset is smaller then a shifted interval, do nothing */ 952 /* If the offset is smaller than a shifted interval, do nothing */
956 if (offset < timekeeper.cycle_interval<<shift) 953 if (offset < timekeeper.cycle_interval<<shift)
957 return offset; 954 return offset;
958 955
@@ -962,9 +959,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
962 959
963 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; 960 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
964 while (timekeeper.xtime_nsec >= nsecps) { 961 while (timekeeper.xtime_nsec >= nsecps) {
962 int leap;
965 timekeeper.xtime_nsec -= nsecps; 963 timekeeper.xtime_nsec -= nsecps;
966 timekeeper.xtime.tv_sec++; 964 timekeeper.xtime.tv_sec++;
967 second_overflow(); 965 leap = second_overflow(timekeeper.xtime.tv_sec);
966 timekeeper.xtime.tv_sec += leap;
968 } 967 }
969 968
970 /* Accumulate raw time */ 969 /* Accumulate raw time */
@@ -1018,13 +1017,13 @@ static void update_wall_time(void)
1018 * With NO_HZ we may have to accumulate many cycle_intervals 1017 * With NO_HZ we may have to accumulate many cycle_intervals
1019 * (think "ticks") worth of time at once. To do this efficiently, 1018 * (think "ticks") worth of time at once. To do this efficiently,
1020 * we calculate the largest doubling multiple of cycle_intervals 1019 * we calculate the largest doubling multiple of cycle_intervals
1021 * that is smaller then the offset. We then accumulate that 1020 * that is smaller than the offset. We then accumulate that
1022 * chunk in one go, and then try to consume the next smaller 1021 * chunk in one go, and then try to consume the next smaller
1023 * doubled multiple. 1022 * doubled multiple.
1024 */ 1023 */
1025 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); 1024 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
1026 shift = max(0, shift); 1025 shift = max(0, shift);
1027 /* Bound shift to one less then what overflows tick_length */ 1026 /* Bound shift to one less than what overflows tick_length */
1028 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; 1027 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
1029 shift = min(shift, maxshift); 1028 shift = min(shift, maxshift);
1030 while (offset >= timekeeper.cycle_interval) { 1029 while (offset >= timekeeper.cycle_interval) {
@@ -1072,12 +1071,14 @@ static void update_wall_time(void)
1072 1071
1073 /* 1072 /*
1074 * Finally, make sure that after the rounding 1073 * Finally, make sure that after the rounding
1075 * xtime.tv_nsec isn't larger then NSEC_PER_SEC 1074 * xtime.tv_nsec isn't larger than NSEC_PER_SEC
1076 */ 1075 */
1077 if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { 1076 if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
1077 int leap;
1078 timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; 1078 timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
1079 timekeeper.xtime.tv_sec++; 1079 timekeeper.xtime.tv_sec++;
1080 second_overflow(); 1080 leap = second_overflow(timekeeper.xtime.tv_sec);
1081 timekeeper.xtime.tv_sec += leap;
1081 } 1082 }
1082 1083
1083 timekeeping_update(false); 1084 timekeeping_update(false);
@@ -1260,6 +1261,8 @@ ktime_t ktime_get_monotonic_offset(void)
1260 1261
1261 return timespec_to_ktime(wtom); 1262 return timespec_to_ktime(wtom);
1262} 1263}
1264EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
1265
1263 1266
1264/** 1267/**
1265 * xtime_update() - advances the timekeeping infrastructure 1268 * xtime_update() - advances the timekeeping infrastructure
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index cd3134510f3d..a1d2849f2473 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -141,7 +141,7 @@ if FTRACE
141config FUNCTION_TRACER 141config FUNCTION_TRACER
142 bool "Kernel Function Tracer" 142 bool "Kernel Function Tracer"
143 depends on HAVE_FUNCTION_TRACER 143 depends on HAVE_FUNCTION_TRACER
144 select FRAME_POINTER if !ARM_UNWIND && !S390 && !MICROBLAZE 144 select FRAME_POINTER if !ARM_UNWIND && !PPC && !S390 && !MICROBLAZE
145 select KALLSYMS 145 select KALLSYMS
146 select GENERIC_TRACER 146 select GENERIC_TRACER
147 select CONTEXT_SWITCH_TRACER 147 select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index cdea7b56b0c9..c0bd0308741c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -311,13 +311,6 @@ int blk_trace_remove(struct request_queue *q)
311} 311}
312EXPORT_SYMBOL_GPL(blk_trace_remove); 312EXPORT_SYMBOL_GPL(blk_trace_remove);
313 313
314static int blk_dropped_open(struct inode *inode, struct file *filp)
315{
316 filp->private_data = inode->i_private;
317
318 return 0;
319}
320
321static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, 314static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
322 size_t count, loff_t *ppos) 315 size_t count, loff_t *ppos)
323{ 316{
@@ -331,18 +324,11 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
331 324
332static const struct file_operations blk_dropped_fops = { 325static const struct file_operations blk_dropped_fops = {
333 .owner = THIS_MODULE, 326 .owner = THIS_MODULE,
334 .open = blk_dropped_open, 327 .open = simple_open,
335 .read = blk_dropped_read, 328 .read = blk_dropped_read,
336 .llseek = default_llseek, 329 .llseek = default_llseek,
337}; 330};
338 331
339static int blk_msg_open(struct inode *inode, struct file *filp)
340{
341 filp->private_data = inode->i_private;
342
343 return 0;
344}
345
346static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, 332static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
347 size_t count, loff_t *ppos) 333 size_t count, loff_t *ppos)
348{ 334{
@@ -371,7 +357,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
371 357
372static const struct file_operations blk_msg_fops = { 358static const struct file_operations blk_msg_fops = {
373 .owner = THIS_MODULE, 359 .owner = THIS_MODULE,
374 .open = blk_msg_open, 360 .open = simple_open,
375 .write = blk_msg_write, 361 .write = blk_msg_write,
376 .llseek = noop_llseek, 362 .llseek = noop_llseek,
377}; 363};
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 867bd1dd2dd0..0fa92f677c92 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -249,7 +249,8 @@ static void update_ftrace_function(void)
249#else 249#else
250 __ftrace_trace_function = func; 250 __ftrace_trace_function = func;
251#endif 251#endif
252 ftrace_trace_function = ftrace_test_stop_func; 252 ftrace_trace_function =
253 (func == ftrace_stub) ? func : ftrace_test_stop_func;
253#endif 254#endif
254} 255}
255 256
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f5b7b5c1195b..cf8d11e91efd 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -154,33 +154,10 @@ enum {
154 154
155static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 155static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
156 156
157#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) 157/* Used for individual buffers (after the counter) */
158 158#define RB_BUFFER_OFF (1 << 20)
159/**
160 * tracing_on - enable all tracing buffers
161 *
162 * This function enables all tracing buffers that may have been
163 * disabled with tracing_off.
164 */
165void tracing_on(void)
166{
167 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
168}
169EXPORT_SYMBOL_GPL(tracing_on);
170 159
171/** 160#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
172 * tracing_off - turn off all tracing buffers
173 *
174 * This function stops all tracing buffers from recording data.
175 * It does not disable any overhead the tracers themselves may
176 * be causing. This function simply causes all recording to
177 * the ring buffers to fail.
178 */
179void tracing_off(void)
180{
181 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
182}
183EXPORT_SYMBOL_GPL(tracing_off);
184 161
185/** 162/**
186 * tracing_off_permanent - permanently disable ring buffers 163 * tracing_off_permanent - permanently disable ring buffers
@@ -193,15 +170,6 @@ void tracing_off_permanent(void)
193 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); 170 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
194} 171}
195 172
196/**
197 * tracing_is_on - show state of ring buffers enabled
198 */
199int tracing_is_on(void)
200{
201 return ring_buffer_flags == RB_BUFFERS_ON;
202}
203EXPORT_SYMBOL_GPL(tracing_is_on);
204
205#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 173#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
206#define RB_ALIGNMENT 4U 174#define RB_ALIGNMENT 4U
207#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 175#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
@@ -2619,6 +2587,63 @@ void ring_buffer_record_enable(struct ring_buffer *buffer)
2619EXPORT_SYMBOL_GPL(ring_buffer_record_enable); 2587EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
2620 2588
2621/** 2589/**
2590 * ring_buffer_record_off - stop all writes into the buffer
2591 * @buffer: The ring buffer to stop writes to.
2592 *
2593 * This prevents all writes to the buffer. Any attempt to write
2594 * to the buffer after this will fail and return NULL.
2595 *
2596 * This is different than ring_buffer_record_disable() as
2597 * it works like an on/off switch, where as the disable() verison
2598 * must be paired with a enable().
2599 */
2600void ring_buffer_record_off(struct ring_buffer *buffer)
2601{
2602 unsigned int rd;
2603 unsigned int new_rd;
2604
2605 do {
2606 rd = atomic_read(&buffer->record_disabled);
2607 new_rd = rd | RB_BUFFER_OFF;
2608 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2609}
2610EXPORT_SYMBOL_GPL(ring_buffer_record_off);
2611
2612/**
2613 * ring_buffer_record_on - restart writes into the buffer
2614 * @buffer: The ring buffer to start writes to.
2615 *
2616 * This enables all writes to the buffer that was disabled by
2617 * ring_buffer_record_off().
2618 *
2619 * This is different than ring_buffer_record_enable() as
2620 * it works like an on/off switch, where as the enable() verison
2621 * must be paired with a disable().
2622 */
2623void ring_buffer_record_on(struct ring_buffer *buffer)
2624{
2625 unsigned int rd;
2626 unsigned int new_rd;
2627
2628 do {
2629 rd = atomic_read(&buffer->record_disabled);
2630 new_rd = rd & ~RB_BUFFER_OFF;
2631 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2632}
2633EXPORT_SYMBOL_GPL(ring_buffer_record_on);
2634
2635/**
2636 * ring_buffer_record_is_on - return true if the ring buffer can write
2637 * @buffer: The ring buffer to see if write is enabled
2638 *
2639 * Returns true if the ring buffer is in a state that it accepts writes.
2640 */
2641int ring_buffer_record_is_on(struct ring_buffer *buffer)
2642{
2643 return !atomic_read(&buffer->record_disabled);
2644}
2645
2646/**
2622 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer 2647 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
2623 * @buffer: The ring buffer to stop writes to. 2648 * @buffer: The ring buffer to stop writes to.
2624 * @cpu: The CPU buffer to stop 2649 * @cpu: The CPU buffer to stop
@@ -4039,68 +4064,6 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
4039} 4064}
4040EXPORT_SYMBOL_GPL(ring_buffer_read_page); 4065EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4041 4066
4042#ifdef CONFIG_TRACING
4043static ssize_t
4044rb_simple_read(struct file *filp, char __user *ubuf,
4045 size_t cnt, loff_t *ppos)
4046{
4047 unsigned long *p = filp->private_data;
4048 char buf[64];
4049 int r;
4050
4051 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
4052 r = sprintf(buf, "permanently disabled\n");
4053 else
4054 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
4055
4056 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4057}
4058
4059static ssize_t
4060rb_simple_write(struct file *filp, const char __user *ubuf,
4061 size_t cnt, loff_t *ppos)
4062{
4063 unsigned long *p = filp->private_data;
4064 unsigned long val;
4065 int ret;
4066
4067 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4068 if (ret)
4069 return ret;
4070
4071 if (val)
4072 set_bit(RB_BUFFERS_ON_BIT, p);
4073 else
4074 clear_bit(RB_BUFFERS_ON_BIT, p);
4075
4076 (*ppos)++;
4077
4078 return cnt;
4079}
4080
4081static const struct file_operations rb_simple_fops = {
4082 .open = tracing_open_generic,
4083 .read = rb_simple_read,
4084 .write = rb_simple_write,
4085 .llseek = default_llseek,
4086};
4087
4088
4089static __init int rb_init_debugfs(void)
4090{
4091 struct dentry *d_tracer;
4092
4093 d_tracer = tracing_init_dentry();
4094
4095 trace_create_file("tracing_on", 0644, d_tracer,
4096 &ring_buffer_flags, &rb_simple_fops);
4097
4098 return 0;
4099}
4100
4101fs_initcall(rb_init_debugfs);
4102#endif
4103
4104#ifdef CONFIG_HOTPLUG_CPU 4067#ifdef CONFIG_HOTPLUG_CPU
4105static int rb_cpu_notify(struct notifier_block *self, 4068static int rb_cpu_notify(struct notifier_block *self,
4106 unsigned long action, void *hcpu) 4069 unsigned long action, void *hcpu)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 10d5503f0d04..ed7b5d1e12f4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -36,6 +36,7 @@
36#include <linux/ctype.h> 36#include <linux/ctype.h>
37#include <linux/init.h> 37#include <linux/init.h>
38#include <linux/poll.h> 38#include <linux/poll.h>
39#include <linux/nmi.h>
39#include <linux/fs.h> 40#include <linux/fs.h>
40 41
41#include "trace.h" 42#include "trace.h"
@@ -352,6 +353,59 @@ static void wakeup_work_handler(struct work_struct *work)
352static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); 353static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
353 354
354/** 355/**
356 * tracing_on - enable tracing buffers
357 *
358 * This function enables tracing buffers that may have been
359 * disabled with tracing_off.
360 */
361void tracing_on(void)
362{
363 if (global_trace.buffer)
364 ring_buffer_record_on(global_trace.buffer);
365 /*
366 * This flag is only looked at when buffers haven't been
367 * allocated yet. We don't really care about the race
368 * between setting this flag and actually turning
369 * on the buffer.
370 */
371 global_trace.buffer_disabled = 0;
372}
373EXPORT_SYMBOL_GPL(tracing_on);
374
375/**
376 * tracing_off - turn off tracing buffers
377 *
378 * This function stops the tracing buffers from recording data.
379 * It does not disable any overhead the tracers themselves may
380 * be causing. This function simply causes all recording to
381 * the ring buffers to fail.
382 */
383void tracing_off(void)
384{
385 if (global_trace.buffer)
386 ring_buffer_record_on(global_trace.buffer);
387 /*
388 * This flag is only looked at when buffers haven't been
389 * allocated yet. We don't really care about the race
390 * between setting this flag and actually turning
391 * on the buffer.
392 */
393 global_trace.buffer_disabled = 1;
394}
395EXPORT_SYMBOL_GPL(tracing_off);
396
397/**
398 * tracing_is_on - show state of ring buffers enabled
399 */
400int tracing_is_on(void)
401{
402 if (global_trace.buffer)
403 return ring_buffer_record_is_on(global_trace.buffer);
404 return !global_trace.buffer_disabled;
405}
406EXPORT_SYMBOL_GPL(tracing_is_on);
407
408/**
355 * trace_wake_up - wake up tasks waiting for trace input 409 * trace_wake_up - wake up tasks waiting for trace input
356 * 410 *
357 * Schedules a delayed work to wake up any task that is blocked on the 411 * Schedules a delayed work to wake up any task that is blocked on the
@@ -1644,6 +1698,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1644 int cpu_file = iter->cpu_file; 1698 int cpu_file = iter->cpu_file;
1645 u64 next_ts = 0, ts; 1699 u64 next_ts = 0, ts;
1646 int next_cpu = -1; 1700 int next_cpu = -1;
1701 int next_size = 0;
1647 int cpu; 1702 int cpu;
1648 1703
1649 /* 1704 /*
@@ -1675,9 +1730,12 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1675 next_cpu = cpu; 1730 next_cpu = cpu;
1676 next_ts = ts; 1731 next_ts = ts;
1677 next_lost = lost_events; 1732 next_lost = lost_events;
1733 next_size = iter->ent_size;
1678 } 1734 }
1679 } 1735 }
1680 1736
1737 iter->ent_size = next_size;
1738
1681 if (ent_cpu) 1739 if (ent_cpu)
1682 *ent_cpu = next_cpu; 1740 *ent_cpu = next_cpu;
1683 1741
@@ -4567,6 +4625,55 @@ static __init void create_trace_options_dir(void)
4567 create_trace_option_core_file(trace_options[i], i); 4625 create_trace_option_core_file(trace_options[i], i);
4568} 4626}
4569 4627
4628static ssize_t
4629rb_simple_read(struct file *filp, char __user *ubuf,
4630 size_t cnt, loff_t *ppos)
4631{
4632 struct ring_buffer *buffer = filp->private_data;
4633 char buf[64];
4634 int r;
4635
4636 if (buffer)
4637 r = ring_buffer_record_is_on(buffer);
4638 else
4639 r = 0;
4640
4641 r = sprintf(buf, "%d\n", r);
4642
4643 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4644}
4645
4646static ssize_t
4647rb_simple_write(struct file *filp, const char __user *ubuf,
4648 size_t cnt, loff_t *ppos)
4649{
4650 struct ring_buffer *buffer = filp->private_data;
4651 unsigned long val;
4652 int ret;
4653
4654 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4655 if (ret)
4656 return ret;
4657
4658 if (buffer) {
4659 if (val)
4660 ring_buffer_record_on(buffer);
4661 else
4662 ring_buffer_record_off(buffer);
4663 }
4664
4665 (*ppos)++;
4666
4667 return cnt;
4668}
4669
4670static const struct file_operations rb_simple_fops = {
4671 .open = tracing_open_generic,
4672 .read = rb_simple_read,
4673 .write = rb_simple_write,
4674 .llseek = default_llseek,
4675};
4676
4570static __init int tracer_init_debugfs(void) 4677static __init int tracer_init_debugfs(void)
4571{ 4678{
4572 struct dentry *d_tracer; 4679 struct dentry *d_tracer;
@@ -4626,6 +4733,9 @@ static __init int tracer_init_debugfs(void)
4626 trace_create_file("trace_clock", 0644, d_tracer, NULL, 4733 trace_create_file("trace_clock", 0644, d_tracer, NULL,
4627 &trace_clock_fops); 4734 &trace_clock_fops);
4628 4735
4736 trace_create_file("tracing_on", 0644, d_tracer,
4737 global_trace.buffer, &rb_simple_fops);
4738
4629#ifdef CONFIG_DYNAMIC_FTRACE 4739#ifdef CONFIG_DYNAMIC_FTRACE
4630 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4740 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4631 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 4741 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -4798,6 +4908,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4798 if (ret != TRACE_TYPE_NO_CONSUME) 4908 if (ret != TRACE_TYPE_NO_CONSUME)
4799 trace_consume(&iter); 4909 trace_consume(&iter);
4800 } 4910 }
4911 touch_nmi_watchdog();
4801 4912
4802 trace_printk_seq(&iter.seq); 4913 trace_printk_seq(&iter.seq);
4803 } 4914 }
@@ -4863,6 +4974,8 @@ __init static int tracer_alloc_buffers(void)
4863 goto out_free_cpumask; 4974 goto out_free_cpumask;
4864 } 4975 }
4865 global_trace.entries = ring_buffer_size(global_trace.buffer); 4976 global_trace.entries = ring_buffer_size(global_trace.buffer);
4977 if (global_trace.buffer_disabled)
4978 tracing_off();
4866 4979
4867 4980
4868#ifdef CONFIG_TRACER_MAX_TRACE 4981#ifdef CONFIG_TRACER_MAX_TRACE
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 54faec790bc1..95059f091a24 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -154,6 +154,7 @@ struct trace_array {
154 struct ring_buffer *buffer; 154 struct ring_buffer *buffer;
155 unsigned long entries; 155 unsigned long entries;
156 int cpu; 156 int cpu;
157 int buffer_disabled;
157 cycle_t time_start; 158 cycle_t time_start;
158 struct task_struct *waiter; 159 struct task_struct *waiter;
159 struct trace_array_cpu *data[NR_CPUS]; 160 struct trace_array_cpu *data[NR_CPUS];
@@ -835,13 +836,11 @@ extern const char *__stop___trace_bprintk_fmt[];
835 filter) 836 filter)
836#include "trace_entries.h" 837#include "trace_entries.h"
837 838
838#ifdef CONFIG_PERF_EVENTS
839#ifdef CONFIG_FUNCTION_TRACER 839#ifdef CONFIG_FUNCTION_TRACER
840int perf_ftrace_event_register(struct ftrace_event_call *call, 840int perf_ftrace_event_register(struct ftrace_event_call *call,
841 enum trace_reg type, void *data); 841 enum trace_reg type, void *data);
842#else 842#else
843#define perf_ftrace_event_register NULL 843#define perf_ftrace_event_register NULL
844#endif /* CONFIG_FUNCTION_TRACER */ 844#endif /* CONFIG_FUNCTION_TRACER */
845#endif /* CONFIG_PERF_EVENTS */
846 845
847#endif /* _LINUX_KERNEL_TRACE_H */ 846#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index d91eb0541b3a..4108e1250ca2 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -166,6 +166,12 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
166 166
167#define FTRACE_STACK_ENTRIES 8 167#define FTRACE_STACK_ENTRIES 8
168 168
169#ifndef CONFIG_64BIT
170# define IP_FMT "%08lx"
171#else
172# define IP_FMT "%016lx"
173#endif
174
169FTRACE_ENTRY(kernel_stack, stack_entry, 175FTRACE_ENTRY(kernel_stack, stack_entry,
170 176
171 TRACE_STACK, 177 TRACE_STACK,
@@ -175,8 +181,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
175 __dynamic_array(unsigned long, caller ) 181 __dynamic_array(unsigned long, caller )
176 ), 182 ),
177 183
178 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" 184 F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
179 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", 185 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
186 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
180 __entry->caller[0], __entry->caller[1], __entry->caller[2], 187 __entry->caller[0], __entry->caller[1], __entry->caller[2],
181 __entry->caller[3], __entry->caller[4], __entry->caller[5], 188 __entry->caller[3], __entry->caller[4], __entry->caller[5],
182 __entry->caller[6], __entry->caller[7]), 189 __entry->caller[6], __entry->caller[7]),
@@ -193,8 +200,9 @@ FTRACE_ENTRY(user_stack, userstack_entry,
193 __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) 200 __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
194 ), 201 ),
195 202
196 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" 203 F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
197 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", 204 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
205 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
198 __entry->caller[0], __entry->caller[1], __entry->caller[2], 206 __entry->caller[0], __entry->caller[1], __entry->caller[2],
199 __entry->caller[3], __entry->caller[4], __entry->caller[5], 207 __entry->caller[3], __entry->caller[4], __entry->caller[5],
200 __entry->caller[6], __entry->caller[7]), 208 __entry->caller[6], __entry->caller[7]),
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 7b46c9bd22ae..3dd15e8bc856 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -162,7 +162,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
162#define __dynamic_array(type, item) 162#define __dynamic_array(type, item)
163 163
164#undef F_printk 164#undef F_printk
165#define F_printk(fmt, args...) #fmt ", " __stringify(args) 165#define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args)
166 166
167#undef FTRACE_ENTRY_REG 167#undef FTRACE_ENTRY_REG
168#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ 168#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index c5a01873567d..859fae6b1825 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -264,7 +264,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
264 return ret; 264 return ret;
265} 265}
266 266
267int trace_seq_path(struct trace_seq *s, struct path *path) 267int trace_seq_path(struct trace_seq *s, const struct path *path)
268{ 268{
269 unsigned char *p; 269 unsigned char *p;
270 270
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 14bc092fb12c..df30ee08bdd4 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -9,6 +9,8 @@
9 * to those contributors as well. 9 * to those contributors as well.
10 */ 10 */
11 11
12#define pr_fmt(fmt) "NMI watchdog: " fmt
13
12#include <linux/mm.h> 14#include <linux/mm.h>
13#include <linux/cpu.h> 15#include <linux/cpu.h>
14#include <linux/nmi.h> 16#include <linux/nmi.h>
@@ -319,11 +321,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
319 */ 321 */
320static int watchdog(void *unused) 322static int watchdog(void *unused)
321{ 323{
322 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 324 struct sched_param param = { .sched_priority = 0 };
323 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); 325 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
324 326
325 sched_setscheduler(current, SCHED_FIFO, &param);
326
327 /* initialize timestamp */ 327 /* initialize timestamp */
328 __touch_watchdog(); 328 __touch_watchdog();
329 329
@@ -349,8 +349,11 @@ static int watchdog(void *unused)
349 349
350 set_current_state(TASK_INTERRUPTIBLE); 350 set_current_state(TASK_INTERRUPTIBLE);
351 } 351 }
352 /*
353 * Drop the policy/priority elevation during thread exit to avoid a
354 * scheduling latency spike.
355 */
352 __set_current_state(TASK_RUNNING); 356 __set_current_state(TASK_RUNNING);
353 param.sched_priority = 0;
354 sched_setscheduler(current, SCHED_NORMAL, &param); 357 sched_setscheduler(current, SCHED_NORMAL, &param);
355 return 0; 358 return 0;
356} 359}
@@ -376,18 +379,20 @@ static int watchdog_nmi_enable(int cpu)
376 /* Try to register using hardware perf events */ 379 /* Try to register using hardware perf events */
377 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); 380 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
378 if (!IS_ERR(event)) { 381 if (!IS_ERR(event)) {
379 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); 382 pr_info("enabled, takes one hw-pmu counter.\n");
380 goto out_save; 383 goto out_save;
381 } 384 }
382 385
383 386
384 /* vary the KERN level based on the returned errno */ 387 /* vary the KERN level based on the returned errno */
385 if (PTR_ERR(event) == -EOPNOTSUPP) 388 if (PTR_ERR(event) == -EOPNOTSUPP)
386 printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); 389 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
387 else if (PTR_ERR(event) == -ENOENT) 390 else if (PTR_ERR(event) == -ENOENT)
388 printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); 391 pr_warning("disabled (cpu%i): hardware events not enabled\n",
392 cpu);
389 else 393 else
390 printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); 394 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
395 cpu, PTR_ERR(event));
391 return PTR_ERR(event); 396 return PTR_ERR(event);
392 397
393 /* success path */ 398 /* success path */
@@ -439,9 +444,10 @@ static int watchdog_enable(int cpu)
439 444
440 /* create the watchdog thread */ 445 /* create the watchdog thread */
441 if (!p) { 446 if (!p) {
447 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
442 p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); 448 p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
443 if (IS_ERR(p)) { 449 if (IS_ERR(p)) {
444 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); 450 pr_err("softlockup watchdog for %i failed\n", cpu);
445 if (!err) { 451 if (!err) {
446 /* if hardlockup hasn't already set this */ 452 /* if hardlockup hasn't already set this */
447 err = PTR_ERR(p); 453 err = PTR_ERR(p);
@@ -450,6 +456,7 @@ static int watchdog_enable(int cpu)
450 } 456 }
451 goto out; 457 goto out;
452 } 458 }
459 sched_setscheduler(p, SCHED_FIFO, &param);
453 kthread_bind(p, cpu); 460 kthread_bind(p, cpu);
454 per_cpu(watchdog_touch_ts, cpu) = 0; 461 per_cpu(watchdog_touch_ts, cpu) = 0;
455 per_cpu(softlockup_watchdog, cpu) = p; 462 per_cpu(softlockup_watchdog, cpu) = p;
@@ -496,7 +503,7 @@ static void watchdog_enable_all_cpus(void)
496 watchdog_enabled = 1; 503 watchdog_enabled = 1;
497 504
498 if (!watchdog_enabled) 505 if (!watchdog_enabled)
499 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); 506 pr_err("failed to be enabled on some cpus\n");
500 507
501} 508}
502 509