aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-04-19 08:23:28 -0400
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-04-19 08:23:28 -0400
commitd5aeee8cb28317ef608ecac421abc4d986d585d2 (patch)
tree70ec8ed8891f26e5c58152ffca9924ea1c58fe3a /kernel
parent32898a145404acbebe3256709e012c2830a2043b (diff)
parente816b57a337ea3b755de72bec38c10c864f23015 (diff)
Merge tag 'v3.4-rc3' into staging/for_v3.5
* tag 'v3.4-rc3': (3755 commits) Linux 3.4-rc3 x86-32: fix up strncpy_from_user() sign error ARM: 7386/1: jump_label: fixup for rename to static_key ARM: 7384/1: ThumbEE: Disable userspace TEEHBR access for !CONFIG_ARM_THUMBEE ARM: 7382/1: mm: truncate memory banks to fit in 4GB space for classic MMU ARM: 7359/2: smp_twd: Only wait for reprogramming on active cpus PCI: Fix regression in pci_restore_state(), v3 SCSI: Fix error handling when no ULD is attached ARM: OMAP: clock: cleanup CPUfreq leftovers, fix build errors ARM: dts: remove blank interrupt-parent properties ARM: EXYNOS: Fix Kconfig dependencies for device tree enabled machine files do not export kernel's NULL #define to userspace ARM: EXYNOS: Remove broken config values for touchscren for NURI board ARM: EXYNOS: set fix xusbxti clock for NURI and Universal210 boards ARM: EXYNOS: fix regulator name for NURI board ARM: SAMSUNG: make SAMSUNG_PM_DEBUG select DEBUG_LL cpufreq: OMAP: fix build errors: depends on ARCH_OMAP2PLUS sparc64: Eliminate obsolete __handle_softirq() function sparc64: Fix bootup crash on sun4v. ARM: msm: Fix section mismatches in proc_comm.c ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks4
-rw-r--r--kernel/Kconfig.preempt1
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/compat.c68
-rw-r--r--kernel/cpuset.c31
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/debug/debug_core.c54
-rw-r--r--kernel/debug/kdb/kdb_bt.c1
-rw-r--r--kernel/debug/kdb/kdb_io.c2
-rw-r--r--kernel/dma.c1
-rw-r--r--kernel/events/core.c11
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/futex.c38
-rw-r--r--kernel/futex_compat.c38
-rw-r--r--kernel/irq/Kconfig15
-rw-r--r--kernel/irq/handle.c16
-rw-r--r--kernel/irq/irqdomain.c55
-rw-r--r--kernel/irq/manage.c19
-rw-r--r--kernel/irq/migration.c10
-rw-r--r--kernel/irq_work.c2
-rw-r--r--kernel/itimer.c8
-rw-r--r--kernel/kexec.c7
-rw-r--r--kernel/kmod.c201
-rw-r--r--kernel/module.c37
-rw-r--r--kernel/padata.c13
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/params.c40
-rw-r--r--kernel/pid_namespace.c41
-rw-r--r--kernel/power/hibernate.c18
-rw-r--r--kernel/power/process.c8
-rw-r--r--kernel/power/qos.c50
-rw-r--r--kernel/power/suspend.c7
-rw-r--r--kernel/power/user.c10
-rw-r--r--kernel/ptrace.c66
-rw-r--r--kernel/rwsem.c1
-rw-r--r--kernel/sched/core.c72
-rw-r--r--kernel/sched/fair.c16
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/signal.c19
-rw-r--r--kernel/smp.c90
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/sys.c9
-rw-r--r--kernel/sysctl.c521
-rw-r--r--kernel/sysctl_check.c160
-rw-r--r--kernel/time.c6
-rw-r--r--kernel/time/Kconfig4
-rw-r--r--kernel/time/alarmtimer.c8
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/ntp.c134
-rw-r--r--kernel/time/tick-broadcast.c4
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/time/timekeeping.c51
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/blktrace.c18
-rw-r--r--kernel/trace/ftrace.c3
-rw-r--r--kernel/trace/ring_buffer.c157
-rw-r--r--kernel/trace/trace.c113
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_export.c2
62 files changed, 1014 insertions, 1289 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 5068e2a4e75..2251882daf5 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -124,8 +124,8 @@ config INLINE_SPIN_LOCK_IRQSAVE
124 def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ 124 def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
125 ARCH_INLINE_SPIN_LOCK_IRQSAVE 125 ARCH_INLINE_SPIN_LOCK_IRQSAVE
126 126
127config INLINE_SPIN_UNLOCK 127config UNINLINE_SPIN_UNLOCK
128 def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK) 128 bool
129 129
130config INLINE_SPIN_UNLOCK_BH 130config INLINE_SPIN_UNLOCK_BH
131 def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH 131 def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 24e7cb0ba26..3f9c97419f0 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY
36config PREEMPT 36config PREEMPT
37 bool "Preemptible Kernel (Low-Latency Desktop)" 37 bool "Preemptible Kernel (Low-Latency Desktop)"
38 select PREEMPT_COUNT 38 select PREEMPT_COUNT
39 select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
39 help 40 help
40 This option reduces the latency of the kernel by making 41 This option reduces the latency of the kernel by making
41 all kernel code (that is not executing in a critical section) 42 all kernel code (that is not executing in a critical section)
diff --git a/kernel/Makefile b/kernel/Makefile
index 2d9de86b7e7..cb41b9547c9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -27,7 +27,6 @@ obj-y += power/
27 27
28obj-$(CONFIG_FREEZER) += freezer.o 28obj-$(CONFIG_FREEZER) += freezer.o
29obj-$(CONFIG_PROFILING) += profile.o 29obj-$(CONFIG_PROFILING) += profile.o
30obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
31obj-$(CONFIG_STACKTRACE) += stacktrace.o 30obj-$(CONFIG_STACKTRACE) += stacktrace.o
32obj-y += time/ 31obj-y += time/
33obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 32obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f4ea4b6f3cf..ed64ccac67c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1883,7 +1883,7 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1883 */ 1883 */
1884int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1884int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1885{ 1885{
1886 int retval; 1886 int retval = 0;
1887 struct cgroup_subsys *ss, *failed_ss = NULL; 1887 struct cgroup_subsys *ss, *failed_ss = NULL;
1888 struct cgroup *oldcgrp; 1888 struct cgroup *oldcgrp;
1889 struct cgroupfs_root *root = cgrp->root; 1889 struct cgroupfs_root *root = cgrp->root;
diff --git a/kernel/compat.c b/kernel/compat.c
index f346cedfe24..74ff8498809 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -31,11 +31,10 @@
31#include <asm/uaccess.h> 31#include <asm/uaccess.h>
32 32
33/* 33/*
34 * Note that the native side is already converted to a timespec, because 34 * Get/set struct timeval with struct timespec on the native side
35 * that's what we want anyway.
36 */ 35 */
37static int compat_get_timeval(struct timespec *o, 36static int compat_get_timeval_convert(struct timespec *o,
38 struct compat_timeval __user *i) 37 struct compat_timeval __user *i)
39{ 38{
40 long usec; 39 long usec;
41 40
@@ -46,8 +45,8 @@ static int compat_get_timeval(struct timespec *o,
46 return 0; 45 return 0;
47} 46}
48 47
49static int compat_put_timeval(struct compat_timeval __user *o, 48static int compat_put_timeval_convert(struct compat_timeval __user *o,
50 struct timeval *i) 49 struct timeval *i)
51{ 50{
52 return (put_user(i->tv_sec, &o->tv_sec) || 51 return (put_user(i->tv_sec, &o->tv_sec) ||
53 put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; 52 put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
@@ -117,7 +116,7 @@ asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
117 if (tv) { 116 if (tv) {
118 struct timeval ktv; 117 struct timeval ktv;
119 do_gettimeofday(&ktv); 118 do_gettimeofday(&ktv);
120 if (compat_put_timeval(tv, &ktv)) 119 if (compat_put_timeval_convert(tv, &ktv))
121 return -EFAULT; 120 return -EFAULT;
122 } 121 }
123 if (tz) { 122 if (tz) {
@@ -135,7 +134,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
135 struct timezone ktz; 134 struct timezone ktz;
136 135
137 if (tv) { 136 if (tv) {
138 if (compat_get_timeval(&kts, tv)) 137 if (compat_get_timeval_convert(&kts, tv))
139 return -EFAULT; 138 return -EFAULT;
140 } 139 }
141 if (tz) { 140 if (tz) {
@@ -146,12 +145,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
146 return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); 145 return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
147} 146}
148 147
148int get_compat_timeval(struct timeval *tv, const struct compat_timeval __user *ctv)
149{
150 return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) ||
151 __get_user(tv->tv_sec, &ctv->tv_sec) ||
152 __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
153}
154EXPORT_SYMBOL_GPL(get_compat_timeval);
155
156int put_compat_timeval(const struct timeval *tv, struct compat_timeval __user *ctv)
157{
158 return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) ||
159 __put_user(tv->tv_sec, &ctv->tv_sec) ||
160 __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
161}
162EXPORT_SYMBOL_GPL(put_compat_timeval);
163
149int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) 164int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
150{ 165{
151 return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || 166 return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
152 __get_user(ts->tv_sec, &cts->tv_sec) || 167 __get_user(ts->tv_sec, &cts->tv_sec) ||
153 __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; 168 __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
154} 169}
170EXPORT_SYMBOL_GPL(get_compat_timespec);
155 171
156int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts) 172int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts)
157{ 173{
@@ -161,6 +177,42 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user
161} 177}
162EXPORT_SYMBOL_GPL(put_compat_timespec); 178EXPORT_SYMBOL_GPL(put_compat_timespec);
163 179
180int compat_get_timeval(struct timeval *tv, const void __user *utv)
181{
182 if (COMPAT_USE_64BIT_TIME)
183 return copy_from_user(tv, utv, sizeof *tv) ? -EFAULT : 0;
184 else
185 return get_compat_timeval(tv, utv);
186}
187EXPORT_SYMBOL_GPL(compat_get_timeval);
188
189int compat_put_timeval(const struct timeval *tv, void __user *utv)
190{
191 if (COMPAT_USE_64BIT_TIME)
192 return copy_to_user(utv, tv, sizeof *tv) ? -EFAULT : 0;
193 else
194 return put_compat_timeval(tv, utv);
195}
196EXPORT_SYMBOL_GPL(compat_put_timeval);
197
198int compat_get_timespec(struct timespec *ts, const void __user *uts)
199{
200 if (COMPAT_USE_64BIT_TIME)
201 return copy_from_user(ts, uts, sizeof *ts) ? -EFAULT : 0;
202 else
203 return get_compat_timespec(ts, uts);
204}
205EXPORT_SYMBOL_GPL(compat_get_timespec);
206
207int compat_put_timespec(const struct timespec *ts, void __user *uts)
208{
209 if (COMPAT_USE_64BIT_TIME)
210 return copy_to_user(uts, ts, sizeof *ts) ? -EFAULT : 0;
211 else
212 return put_compat_timespec(ts, uts);
213}
214EXPORT_SYMBOL_GPL(compat_put_timespec);
215
164static long compat_nanosleep_restart(struct restart_block *restart) 216static long compat_nanosleep_restart(struct restart_block *restart)
165{ 217{
166 struct compat_timespec __user *rmtp; 218 struct compat_timespec __user *rmtp;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1010cc61931..14f7070b4ba 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -270,11 +270,11 @@ static struct file_system_type cpuset_fs_type = {
270 * are online. If none are online, walk up the cpuset hierarchy 270 * are online. If none are online, walk up the cpuset hierarchy
271 * until we find one that does have some online cpus. If we get 271 * until we find one that does have some online cpus. If we get
272 * all the way to the top and still haven't found any online cpus, 272 * all the way to the top and still haven't found any online cpus,
273 * return cpu_online_map. Or if passed a NULL cs from an exit'ing 273 * return cpu_online_mask. Or if passed a NULL cs from an exit'ing
274 * task, return cpu_online_map. 274 * task, return cpu_online_mask.
275 * 275 *
276 * One way or another, we guarantee to return some non-empty subset 276 * One way or another, we guarantee to return some non-empty subset
277 * of cpu_online_map. 277 * of cpu_online_mask.
278 * 278 *
279 * Call with callback_mutex held. 279 * Call with callback_mutex held.
280 */ 280 */
@@ -867,7 +867,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
867 int retval; 867 int retval;
868 int is_load_balanced; 868 int is_load_balanced;
869 869
870 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ 870 /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
871 if (cs == &top_cpuset) 871 if (cs == &top_cpuset)
872 return -EACCES; 872 return -EACCES;
873 873
@@ -2149,7 +2149,7 @@ void __init cpuset_init_smp(void)
2149 * 2149 *
2150 * Description: Returns the cpumask_var_t cpus_allowed of the cpuset 2150 * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
2151 * attached to the specified @tsk. Guaranteed to return some non-empty 2151 * attached to the specified @tsk. Guaranteed to return some non-empty
2152 * subset of cpu_online_map, even if this means going outside the 2152 * subset of cpu_online_mask, even if this means going outside the
2153 * tasks cpuset. 2153 * tasks cpuset.
2154 **/ 2154 **/
2155 2155
@@ -2162,10 +2162,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2162 mutex_unlock(&callback_mutex); 2162 mutex_unlock(&callback_mutex);
2163} 2163}
2164 2164
2165int cpuset_cpus_allowed_fallback(struct task_struct *tsk) 2165void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2166{ 2166{
2167 const struct cpuset *cs; 2167 const struct cpuset *cs;
2168 int cpu;
2169 2168
2170 rcu_read_lock(); 2169 rcu_read_lock();
2171 cs = task_cs(tsk); 2170 cs = task_cs(tsk);
@@ -2186,22 +2185,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2186 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary 2185 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
2187 * set any mask even if it is not right from task_cs() pov, 2186 * set any mask even if it is not right from task_cs() pov,
2188 * the pending set_cpus_allowed_ptr() will fix things. 2187 * the pending set_cpus_allowed_ptr() will fix things.
2188 *
2189 * select_fallback_rq() will fix things ups and set cpu_possible_mask
2190 * if required.
2189 */ 2191 */
2190
2191 cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
2192 if (cpu >= nr_cpu_ids) {
2193 /*
2194 * Either tsk->cpus_allowed is wrong (see above) or it
2195 * is actually empty. The latter case is only possible
2196 * if we are racing with remove_tasks_in_empty_cpuset().
2197 * Like above we can temporary set any mask and rely on
2198 * set_cpus_allowed_ptr() as synchronization point.
2199 */
2200 do_set_cpus_allowed(tsk, cpu_possible_mask);
2201 cpu = cpumask_any(cpu_active_mask);
2202 }
2203
2204 return cpu;
2205} 2192}
2206 2193
2207void cpuset_init_current_mems_allowed(void) 2194void cpuset_init_current_mems_allowed(void)
diff --git a/kernel/cred.c b/kernel/cred.c
index 97b36eeca4c..e70683d9ec3 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -386,6 +386,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
386 struct cred *new; 386 struct cred *new;
387 int ret; 387 int ret;
388 388
389 p->replacement_session_keyring = NULL;
390
389 if ( 391 if (
390#ifdef CONFIG_KEYS 392#ifdef CONFIG_KEYS
391 !p->cred->thread_keyring && 393 !p->cred->thread_keyring &&
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 3f88a45e6f0..0557f24c6bc 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -53,7 +53,6 @@
53#include <asm/cacheflush.h> 53#include <asm/cacheflush.h>
54#include <asm/byteorder.h> 54#include <asm/byteorder.h>
55#include <linux/atomic.h> 55#include <linux/atomic.h>
56#include <asm/system.h>
57 56
58#include "debug_core.h" 57#include "debug_core.h"
59 58
@@ -161,37 +160,39 @@ early_param("nokgdbroundup", opt_nokgdbroundup);
161 * Weak aliases for breakpoint management, 160 * Weak aliases for breakpoint management,
162 * can be overriden by architectures when needed: 161 * can be overriden by architectures when needed:
163 */ 162 */
164int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) 163int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
165{ 164{
166 int err; 165 int err;
167 166
168 err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE); 167 err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
168 BREAK_INSTR_SIZE);
169 if (err) 169 if (err)
170 return err; 170 return err;
171 171 err = probe_kernel_write((char *)bpt->bpt_addr,
172 return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr, 172 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
173 BREAK_INSTR_SIZE); 173 return err;
174} 174}
175 175
176int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) 176int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
177{ 177{
178 return probe_kernel_write((char *)addr, 178 return probe_kernel_write((char *)bpt->bpt_addr,
179 (char *)bundle, BREAK_INSTR_SIZE); 179 (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
180} 180}
181 181
182int __weak kgdb_validate_break_address(unsigned long addr) 182int __weak kgdb_validate_break_address(unsigned long addr)
183{ 183{
184 char tmp_variable[BREAK_INSTR_SIZE]; 184 struct kgdb_bkpt tmp;
185 int err; 185 int err;
186 /* Validate setting the breakpoint and then removing it. In the 186 /* Validate setting the breakpoint and then removing it. If the
187 * remove fails, the kernel needs to emit a bad message because we 187 * remove fails, the kernel needs to emit a bad message because we
188 * are deep trouble not being able to put things back the way we 188 * are deep trouble not being able to put things back the way we
189 * found them. 189 * found them.
190 */ 190 */
191 err = kgdb_arch_set_breakpoint(addr, tmp_variable); 191 tmp.bpt_addr = addr;
192 err = kgdb_arch_set_breakpoint(&tmp);
192 if (err) 193 if (err)
193 return err; 194 return err;
194 err = kgdb_arch_remove_breakpoint(addr, tmp_variable); 195 err = kgdb_arch_remove_breakpoint(&tmp);
195 if (err) 196 if (err)
196 printk(KERN_ERR "KGDB: Critical breakpoint error, kernel " 197 printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
197 "memory destroyed at: %lx", addr); 198 "memory destroyed at: %lx", addr);
@@ -235,7 +236,6 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
235 */ 236 */
236int dbg_activate_sw_breakpoints(void) 237int dbg_activate_sw_breakpoints(void)
237{ 238{
238 unsigned long addr;
239 int error; 239 int error;
240 int ret = 0; 240 int ret = 0;
241 int i; 241 int i;
@@ -244,16 +244,15 @@ int dbg_activate_sw_breakpoints(void)
244 if (kgdb_break[i].state != BP_SET) 244 if (kgdb_break[i].state != BP_SET)
245 continue; 245 continue;
246 246
247 addr = kgdb_break[i].bpt_addr; 247 error = kgdb_arch_set_breakpoint(&kgdb_break[i]);
248 error = kgdb_arch_set_breakpoint(addr,
249 kgdb_break[i].saved_instr);
250 if (error) { 248 if (error) {
251 ret = error; 249 ret = error;
252 printk(KERN_INFO "KGDB: BP install failed: %lx", addr); 250 printk(KERN_INFO "KGDB: BP install failed: %lx",
251 kgdb_break[i].bpt_addr);
253 continue; 252 continue;
254 } 253 }
255 254
256 kgdb_flush_swbreak_addr(addr); 255 kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr);
257 kgdb_break[i].state = BP_ACTIVE; 256 kgdb_break[i].state = BP_ACTIVE;
258 } 257 }
259 return ret; 258 return ret;
@@ -302,7 +301,6 @@ int dbg_set_sw_break(unsigned long addr)
302 301
303int dbg_deactivate_sw_breakpoints(void) 302int dbg_deactivate_sw_breakpoints(void)
304{ 303{
305 unsigned long addr;
306 int error; 304 int error;
307 int ret = 0; 305 int ret = 0;
308 int i; 306 int i;
@@ -310,15 +308,14 @@ int dbg_deactivate_sw_breakpoints(void)
310 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { 308 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
311 if (kgdb_break[i].state != BP_ACTIVE) 309 if (kgdb_break[i].state != BP_ACTIVE)
312 continue; 310 continue;
313 addr = kgdb_break[i].bpt_addr; 311 error = kgdb_arch_remove_breakpoint(&kgdb_break[i]);
314 error = kgdb_arch_remove_breakpoint(addr,
315 kgdb_break[i].saved_instr);
316 if (error) { 312 if (error) {
317 printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr); 313 printk(KERN_INFO "KGDB: BP remove failed: %lx\n",
314 kgdb_break[i].bpt_addr);
318 ret = error; 315 ret = error;
319 } 316 }
320 317
321 kgdb_flush_swbreak_addr(addr); 318 kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr);
322 kgdb_break[i].state = BP_SET; 319 kgdb_break[i].state = BP_SET;
323 } 320 }
324 return ret; 321 return ret;
@@ -352,7 +349,6 @@ int kgdb_isremovedbreak(unsigned long addr)
352 349
353int dbg_remove_all_break(void) 350int dbg_remove_all_break(void)
354{ 351{
355 unsigned long addr;
356 int error; 352 int error;
357 int i; 353 int i;
358 354
@@ -360,12 +356,10 @@ int dbg_remove_all_break(void)
360 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { 356 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
361 if (kgdb_break[i].state != BP_ACTIVE) 357 if (kgdb_break[i].state != BP_ACTIVE)
362 goto setundefined; 358 goto setundefined;
363 addr = kgdb_break[i].bpt_addr; 359 error = kgdb_arch_remove_breakpoint(&kgdb_break[i]);
364 error = kgdb_arch_remove_breakpoint(addr,
365 kgdb_break[i].saved_instr);
366 if (error) 360 if (error)
367 printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n", 361 printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n",
368 addr); 362 kgdb_break[i].bpt_addr);
369setundefined: 363setundefined:
370 kgdb_break[i].state = BP_UNDEFINED; 364 kgdb_break[i].state = BP_UNDEFINED;
371 } 365 }
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 7179eac7b41..07c9bbb94a0 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -15,7 +15,6 @@
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/kdb.h> 16#include <linux/kdb.h>
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <asm/system.h>
19#include "kdb_private.h" 18#include "kdb_private.h"
20 19
21 20
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 9b5f17da1c5..bb9520f0f6f 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -743,7 +743,7 @@ kdb_printit:
743 kdb_input_flush(); 743 kdb_input_flush();
744 c = console_drivers; 744 c = console_drivers;
745 745
746 if (!dbg_io_ops->is_console) { 746 if (dbg_io_ops && !dbg_io_ops->is_console) {
747 len = strlen(moreprompt); 747 len = strlen(moreprompt);
748 cp = moreprompt; 748 cp = moreprompt;
749 while (len--) { 749 while (len--) {
diff --git a/kernel/dma.c b/kernel/dma.c
index 68a2306522c..6c6262f86c1 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -18,7 +18,6 @@
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <asm/dma.h> 20#include <asm/dma.h>
21#include <asm/system.h>
22 21
23 22
24 23
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4b50357914f..a6a9ec4cd8f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3348,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event,
3348 *running = ctx_time - event->tstamp_running; 3348 *running = ctx_time - event->tstamp_running;
3349} 3349}
3350 3350
3351void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) 3351void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
3352{ 3352{
3353} 3353}
3354 3354
@@ -3398,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event)
3398 userpg->time_running = running + 3398 userpg->time_running = running +
3399 atomic64_read(&event->child_total_time_running); 3399 atomic64_read(&event->child_total_time_running);
3400 3400
3401 perf_update_user_clock(userpg, now); 3401 arch_perf_update_userpage(userpg, now);
3402 3402
3403 barrier(); 3403 barrier();
3404 ++userpg->lock; 3404 ++userpg->lock;
@@ -7116,6 +7116,13 @@ void __init perf_event_init(void)
7116 7116
7117 /* do not patch jump label more than once per second */ 7117 /* do not patch jump label more than once per second */
7118 jump_label_rate_limit(&perf_sched_events, HZ); 7118 jump_label_rate_limit(&perf_sched_events, HZ);
7119
7120 /*
7121 * Build time assertion that we keep the data_head at the intended
7122 * location. IOW, validation we got the __reserved[] size right.
7123 */
7124 BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
7125 != 1024);
7119} 7126}
7120 7127
7121static int __init perf_event_sysfs_init(void) 7128static int __init perf_event_sysfs_init(void)
diff --git a/kernel/exit.c b/kernel/exit.c
index 3db1909faed..d8bd3b425fa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -474,7 +474,7 @@ static void close_files(struct files_struct * files)
474 i = j * __NFDBITS; 474 i = j * __NFDBITS;
475 if (i >= fdt->max_fds) 475 if (i >= fdt->max_fds)
476 break; 476 break;
477 set = fdt->open_fds->fds_bits[j++]; 477 set = fdt->open_fds[j++];
478 while (set) { 478 while (set) {
479 if (set & 1) { 479 if (set & 1) {
480 struct file * file = xchg(&fdt->fd[i], NULL); 480 struct file * file = xchg(&fdt->fd[i], NULL);
diff --git a/kernel/futex.c b/kernel/futex.c
index 72efa1e4359..e2b0fb9a0b3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -59,6 +59,7 @@
59#include <linux/magic.h> 59#include <linux/magic.h>
60#include <linux/pid.h> 60#include <linux/pid.h>
61#include <linux/nsproxy.h> 61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
62 63
63#include <asm/futex.h> 64#include <asm/futex.h>
64 65
@@ -2443,40 +2444,31 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
2443{ 2444{
2444 struct robust_list_head __user *head; 2445 struct robust_list_head __user *head;
2445 unsigned long ret; 2446 unsigned long ret;
2446 const struct cred *cred = current_cred(), *pcred; 2447 struct task_struct *p;
2447 2448
2448 if (!futex_cmpxchg_enabled) 2449 if (!futex_cmpxchg_enabled)
2449 return -ENOSYS; 2450 return -ENOSYS;
2450 2451
2452 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
2453
2454 rcu_read_lock();
2455
2456 ret = -ESRCH;
2451 if (!pid) 2457 if (!pid)
2452 head = current->robust_list; 2458 p = current;
2453 else { 2459 else {
2454 struct task_struct *p;
2455
2456 ret = -ESRCH;
2457 rcu_read_lock();
2458 p = find_task_by_vpid(pid); 2460 p = find_task_by_vpid(pid);
2459 if (!p) 2461 if (!p)
2460 goto err_unlock; 2462 goto err_unlock;
2461 ret = -EPERM;
2462 pcred = __task_cred(p);
2463 /* If victim is in different user_ns, then uids are not
2464 comparable, so we must have CAP_SYS_PTRACE */
2465 if (cred->user->user_ns != pcred->user->user_ns) {
2466 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2467 goto err_unlock;
2468 goto ok;
2469 }
2470 /* If victim is in same user_ns, then uids are comparable */
2471 if (cred->euid != pcred->euid &&
2472 cred->euid != pcred->uid &&
2473 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2474 goto err_unlock;
2475ok:
2476 head = p->robust_list;
2477 rcu_read_unlock();
2478 } 2463 }
2479 2464
2465 ret = -EPERM;
2466 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2467 goto err_unlock;
2468
2469 head = p->robust_list;
2470 rcu_read_unlock();
2471
2480 if (put_user(sizeof(*head), len_ptr)) 2472 if (put_user(sizeof(*head), len_ptr))
2481 return -EFAULT; 2473 return -EFAULT;
2482 return put_user(head, head_ptr); 2474 return put_user(head, head_ptr);
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 5f9e689dc8f..83e368b005f 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -10,6 +10,7 @@
10#include <linux/compat.h> 10#include <linux/compat.h>
11#include <linux/nsproxy.h> 11#include <linux/nsproxy.h>
12#include <linux/futex.h> 12#include <linux/futex.h>
13#include <linux/ptrace.h>
13 14
14#include <asm/uaccess.h> 15#include <asm/uaccess.h>
15 16
@@ -136,40 +137,31 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
136{ 137{
137 struct compat_robust_list_head __user *head; 138 struct compat_robust_list_head __user *head;
138 unsigned long ret; 139 unsigned long ret;
139 const struct cred *cred = current_cred(), *pcred; 140 struct task_struct *p;
140 141
141 if (!futex_cmpxchg_enabled) 142 if (!futex_cmpxchg_enabled)
142 return -ENOSYS; 143 return -ENOSYS;
143 144
145 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
146
147 rcu_read_lock();
148
149 ret = -ESRCH;
144 if (!pid) 150 if (!pid)
145 head = current->compat_robust_list; 151 p = current;
146 else { 152 else {
147 struct task_struct *p;
148
149 ret = -ESRCH;
150 rcu_read_lock();
151 p = find_task_by_vpid(pid); 153 p = find_task_by_vpid(pid);
152 if (!p) 154 if (!p)
153 goto err_unlock; 155 goto err_unlock;
154 ret = -EPERM;
155 pcred = __task_cred(p);
156 /* If victim is in different user_ns, then uids are not
157 comparable, so we must have CAP_SYS_PTRACE */
158 if (cred->user->user_ns != pcred->user->user_ns) {
159 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
160 goto err_unlock;
161 goto ok;
162 }
163 /* If victim is in same user_ns, then uids are comparable */
164 if (cred->euid != pcred->euid &&
165 cred->euid != pcred->uid &&
166 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
167 goto err_unlock;
168ok:
169 head = p->compat_robust_list;
170 rcu_read_unlock();
171 } 156 }
172 157
158 ret = -EPERM;
159 if (!ptrace_may_access(p, PTRACE_MODE_READ))
160 goto err_unlock;
161
162 head = p->compat_robust_list;
163 rcu_read_unlock();
164
173 if (put_user(sizeof(*head), len_ptr)) 165 if (put_user(sizeof(*head), len_ptr))
174 return -EFAULT; 166 return -EFAULT;
175 return put_user(ptr_to_compat(head), head_ptr); 167 return put_user(ptr_to_compat(head), head_ptr);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 5a38bf4de64..d1a758bc972 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -13,7 +13,7 @@ config GENERIC_HARDIRQS
13# Options selectable by the architecture code 13# Options selectable by the architecture code
14 14
15# Make sparse irq Kconfig switch below available 15# Make sparse irq Kconfig switch below available
16config HAVE_SPARSE_IRQ 16config MAY_HAVE_SPARSE_IRQ
17 bool 17 bool
18 18
19# Enable the generic irq autoprobe mechanism 19# Enable the generic irq autoprobe mechanism
@@ -56,13 +56,22 @@ config GENERIC_IRQ_CHIP
56config IRQ_DOMAIN 56config IRQ_DOMAIN
57 bool 57 bool
58 58
59config IRQ_DOMAIN_DEBUG
60 bool "Expose hardware/virtual IRQ mapping via debugfs"
61 depends on IRQ_DOMAIN && DEBUG_FS
62 help
63 This option will show the mapping relationship between hardware irq
64 numbers and Linux irq numbers. The mapping is exposed via debugfs
65 in the file "irq_domain_mapping".
66
67 If you don't know what this means you don't need it.
68
59# Support forced irq threading 69# Support forced irq threading
60config IRQ_FORCED_THREADING 70config IRQ_FORCED_THREADING
61 bool 71 bool
62 72
63config SPARSE_IRQ 73config SPARSE_IRQ
64 bool "Support sparse irq numbering" 74 bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ
65 depends on HAVE_SPARSE_IRQ
66 ---help--- 75 ---help---
67 76
68 Sparse irq numbering is useful for distro kernels that want 77 Sparse irq numbering is useful for distro kernels that want
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6ff84e6a954..bdb18032555 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -54,14 +54,18 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
54static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action) 54static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
55{ 55{
56 /* 56 /*
57 * Wake up the handler thread for this action. In case the 57 * In case the thread crashed and was killed we just pretend that
58 * thread crashed and was killed we just pretend that we 58 * we handled the interrupt. The hardirq handler has disabled the
59 * handled the interrupt. The hardirq handler has disabled the 59 * device interrupt, so no irq storm is lurking.
60 * device interrupt, so no irq storm is lurking. If the 60 */
61 if (action->thread->flags & PF_EXITING)
62 return;
63
64 /*
65 * Wake up the handler thread for this action. If the
61 * RUNTHREAD bit is already set, nothing to do. 66 * RUNTHREAD bit is already set, nothing to do.
62 */ 67 */
63 if ((action->thread->flags & PF_EXITING) || 68 if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
64 test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
65 return; 69 return;
66 70
67 /* 71 /*
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index af48e59bc2f..0e0ba5f840b 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -23,7 +23,6 @@ static LIST_HEAD(irq_domain_list);
23static DEFINE_MUTEX(irq_domain_mutex); 23static DEFINE_MUTEX(irq_domain_mutex);
24 24
25static DEFINE_MUTEX(revmap_trees_mutex); 25static DEFINE_MUTEX(revmap_trees_mutex);
26static unsigned int irq_virq_count = NR_IRQS;
27static struct irq_domain *irq_default_domain; 26static struct irq_domain *irq_default_domain;
28 27
29/** 28/**
@@ -184,13 +183,16 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
184} 183}
185 184
186struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, 185struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
186 unsigned int max_irq,
187 const struct irq_domain_ops *ops, 187 const struct irq_domain_ops *ops,
188 void *host_data) 188 void *host_data)
189{ 189{
190 struct irq_domain *domain = irq_domain_alloc(of_node, 190 struct irq_domain *domain = irq_domain_alloc(of_node,
191 IRQ_DOMAIN_MAP_NOMAP, ops, host_data); 191 IRQ_DOMAIN_MAP_NOMAP, ops, host_data);
192 if (domain) 192 if (domain) {
193 domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0;
193 irq_domain_add(domain); 194 irq_domain_add(domain);
195 }
194 return domain; 196 return domain;
195} 197}
196 198
@@ -262,22 +264,6 @@ void irq_set_default_host(struct irq_domain *domain)
262 irq_default_domain = domain; 264 irq_default_domain = domain;
263} 265}
264 266
265/**
266 * irq_set_virq_count() - Set the maximum number of linux irqs
267 * @count: number of linux irqs, capped with NR_IRQS
268 *
269 * This is mainly for use by platforms like iSeries who want to program
270 * the virtual irq number in the controller to avoid the reverse mapping
271 */
272void irq_set_virq_count(unsigned int count)
273{
274 pr_debug("irq: Trying to set virq count to %d\n", count);
275
276 BUG_ON(count < NUM_ISA_INTERRUPTS);
277 if (count < NR_IRQS)
278 irq_virq_count = count;
279}
280
281static int irq_setup_virq(struct irq_domain *domain, unsigned int virq, 267static int irq_setup_virq(struct irq_domain *domain, unsigned int virq,
282 irq_hw_number_t hwirq) 268 irq_hw_number_t hwirq)
283{ 269{
@@ -320,13 +306,12 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
320 pr_debug("irq: create_direct virq allocation failed\n"); 306 pr_debug("irq: create_direct virq allocation failed\n");
321 return 0; 307 return 0;
322 } 308 }
323 if (virq >= irq_virq_count) { 309 if (virq >= domain->revmap_data.nomap.max_irq) {
324 pr_err("ERROR: no free irqs available below %i maximum\n", 310 pr_err("ERROR: no free irqs available below %i maximum\n",
325 irq_virq_count); 311 domain->revmap_data.nomap.max_irq);
326 irq_free_desc(virq); 312 irq_free_desc(virq);
327 return 0; 313 return 0;
328 } 314 }
329
330 pr_debug("irq: create_direct obtained virq %d\n", virq); 315 pr_debug("irq: create_direct obtained virq %d\n", virq);
331 316
332 if (irq_setup_virq(domain, virq, virq)) { 317 if (irq_setup_virq(domain, virq, virq)) {
@@ -350,7 +335,8 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
350unsigned int irq_create_mapping(struct irq_domain *domain, 335unsigned int irq_create_mapping(struct irq_domain *domain,
351 irq_hw_number_t hwirq) 336 irq_hw_number_t hwirq)
352{ 337{
353 unsigned int virq, hint; 338 unsigned int hint;
339 int virq;
354 340
355 pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); 341 pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
356 342
@@ -377,13 +363,13 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
377 return irq_domain_legacy_revmap(domain, hwirq); 363 return irq_domain_legacy_revmap(domain, hwirq);
378 364
379 /* Allocate a virtual interrupt number */ 365 /* Allocate a virtual interrupt number */
380 hint = hwirq % irq_virq_count; 366 hint = hwirq % nr_irqs;
381 if (hint == 0) 367 if (hint == 0)
382 hint++; 368 hint++;
383 virq = irq_alloc_desc_from(hint, 0); 369 virq = irq_alloc_desc_from(hint, 0);
384 if (!virq) 370 if (virq <= 0)
385 virq = irq_alloc_desc_from(1, 0); 371 virq = irq_alloc_desc_from(1, 0);
386 if (!virq) { 372 if (virq <= 0) {
387 pr_debug("irq: -> virq allocation failed\n"); 373 pr_debug("irq: -> virq allocation failed\n");
388 return 0; 374 return 0;
389 } 375 }
@@ -515,7 +501,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
515 irq_hw_number_t hwirq) 501 irq_hw_number_t hwirq)
516{ 502{
517 unsigned int i; 503 unsigned int i;
518 unsigned int hint = hwirq % irq_virq_count; 504 unsigned int hint = hwirq % nr_irqs;
519 505
520 /* Look for default domain if nececssary */ 506 /* Look for default domain if nececssary */
521 if (domain == NULL) 507 if (domain == NULL)
@@ -536,7 +522,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
536 if (data && (data->domain == domain) && (data->hwirq == hwirq)) 522 if (data && (data->domain == domain) && (data->hwirq == hwirq))
537 return i; 523 return i;
538 i++; 524 i++;
539 if (i >= irq_virq_count) 525 if (i >= nr_irqs)
540 i = 1; 526 i = 1;
541 } while(i != hint); 527 } while(i != hint);
542 return 0; 528 return 0;
@@ -632,7 +618,7 @@ unsigned int irq_linear_revmap(struct irq_domain *domain,
632 return revmap[hwirq]; 618 return revmap[hwirq];
633} 619}
634 620
635#ifdef CONFIG_VIRQ_DEBUG 621#ifdef CONFIG_IRQ_DOMAIN_DEBUG
636static int virq_debug_show(struct seq_file *m, void *private) 622static int virq_debug_show(struct seq_file *m, void *private)
637{ 623{
638 unsigned long flags; 624 unsigned long flags;
@@ -642,8 +628,9 @@ static int virq_debug_show(struct seq_file *m, void *private)
642 void *data; 628 void *data;
643 int i; 629 int i;
644 630
645 seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", 631 seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq",
646 "chip name", "chip data", "domain name"); 632 "chip name", (int)(2 * sizeof(void *) + 2), "chip data",
633 "domain name");
647 634
648 for (i = 1; i < nr_irqs; i++) { 635 for (i = 1; i < nr_irqs; i++) {
649 desc = irq_to_desc(i); 636 desc = irq_to_desc(i);
@@ -666,9 +653,9 @@ static int virq_debug_show(struct seq_file *m, void *private)
666 seq_printf(m, "%-15s ", p); 653 seq_printf(m, "%-15s ", p);
667 654
668 data = irq_desc_get_chip_data(desc); 655 data = irq_desc_get_chip_data(desc);
669 seq_printf(m, "0x%16p ", data); 656 seq_printf(m, data ? "0x%p " : " %p ", data);
670 657
671 if (desc->irq_data.domain->of_node) 658 if (desc->irq_data.domain && desc->irq_data.domain->of_node)
672 p = desc->irq_data.domain->of_node->full_name; 659 p = desc->irq_data.domain->of_node->full_name;
673 else 660 else
674 p = none; 661 p = none;
@@ -695,14 +682,14 @@ static const struct file_operations virq_debug_fops = {
695 682
696static int __init irq_debugfs_init(void) 683static int __init irq_debugfs_init(void)
697{ 684{
698 if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root, 685 if (debugfs_create_file("irq_domain_mapping", S_IRUGO, NULL,
699 NULL, &virq_debug_fops) == NULL) 686 NULL, &virq_debug_fops) == NULL)
700 return -ENOMEM; 687 return -ENOMEM;
701 688
702 return 0; 689 return 0;
703} 690}
704__initcall(irq_debugfs_init); 691__initcall(irq_debugfs_init);
705#endif /* CONFIG_VIRQ_DEBUG */ 692#endif /* CONFIG_IRQ_DOMAIN_DEBUG */
706 693
707int irq_domain_simple_map(struct irq_domain *d, unsigned int irq, 694int irq_domain_simple_map(struct irq_domain *d, unsigned int irq,
708 irq_hw_number_t hwirq) 695 irq_hw_number_t hwirq)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index b0ccd1ac2d6..89a3ea82569 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -282,7 +282,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
282{ 282{
283 struct irq_chip *chip = irq_desc_get_chip(desc); 283 struct irq_chip *chip = irq_desc_get_chip(desc);
284 struct cpumask *set = irq_default_affinity; 284 struct cpumask *set = irq_default_affinity;
285 int ret; 285 int ret, node = desc->irq_data.node;
286 286
287 /* Excludes PER_CPU and NO_BALANCE interrupts */ 287 /* Excludes PER_CPU and NO_BALANCE interrupts */
288 if (!irq_can_set_affinity(irq)) 288 if (!irq_can_set_affinity(irq))
@@ -301,6 +301,13 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
301 } 301 }
302 302
303 cpumask_and(mask, cpu_online_mask, set); 303 cpumask_and(mask, cpu_online_mask, set);
304 if (node != NUMA_NO_NODE) {
305 const struct cpumask *nodemask = cpumask_of_node(node);
306
307 /* make sure at least one of the cpus in nodemask is online */
308 if (cpumask_intersects(mask, nodemask))
309 cpumask_and(mask, mask, nodemask);
310 }
304 ret = chip->irq_set_affinity(&desc->irq_data, mask, false); 311 ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
305 switch (ret) { 312 switch (ret) {
306 case IRQ_SET_MASK_OK: 313 case IRQ_SET_MASK_OK:
@@ -645,7 +652,7 @@ static int irq_wait_for_interrupt(struct irqaction *action)
645 * is marked MASKED. 652 * is marked MASKED.
646 */ 653 */
647static void irq_finalize_oneshot(struct irq_desc *desc, 654static void irq_finalize_oneshot(struct irq_desc *desc,
648 struct irqaction *action, bool force) 655 struct irqaction *action)
649{ 656{
650 if (!(desc->istate & IRQS_ONESHOT)) 657 if (!(desc->istate & IRQS_ONESHOT))
651 return; 658 return;
@@ -679,7 +686,7 @@ again:
679 * we would clear the threads_oneshot bit of this thread which 686 * we would clear the threads_oneshot bit of this thread which
680 * was just set. 687 * was just set.
681 */ 688 */
682 if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) 689 if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
683 goto out_unlock; 690 goto out_unlock;
684 691
685 desc->threads_oneshot &= ~action->thread_mask; 692 desc->threads_oneshot &= ~action->thread_mask;
@@ -739,7 +746,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
739 746
740 local_bh_disable(); 747 local_bh_disable();
741 ret = action->thread_fn(action->irq, action->dev_id); 748 ret = action->thread_fn(action->irq, action->dev_id);
742 irq_finalize_oneshot(desc, action, false); 749 irq_finalize_oneshot(desc, action);
743 local_bh_enable(); 750 local_bh_enable();
744 return ret; 751 return ret;
745} 752}
@@ -755,7 +762,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,
755 irqreturn_t ret; 762 irqreturn_t ret;
756 763
757 ret = action->thread_fn(action->irq, action->dev_id); 764 ret = action->thread_fn(action->irq, action->dev_id);
758 irq_finalize_oneshot(desc, action, false); 765 irq_finalize_oneshot(desc, action);
759 return ret; 766 return ret;
760} 767}
761 768
@@ -844,7 +851,7 @@ void exit_irq_thread(void)
844 wake_threads_waitq(desc); 851 wake_threads_waitq(desc);
845 852
846 /* Prevent a stale desc->threads_oneshot */ 853 /* Prevent a stale desc->threads_oneshot */
847 irq_finalize_oneshot(desc, action, true); 854 irq_finalize_oneshot(desc, action);
848} 855}
849 856
850static void irq_setup_forced_threading(struct irqaction *new) 857static void irq_setup_forced_threading(struct irqaction *new)
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 47420908fba..c3c89751b32 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -43,12 +43,16 @@ void irq_move_masked_irq(struct irq_data *idata)
43 * masking the irqs. 43 * masking the irqs.
44 */ 44 */
45 if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) 45 if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
46 < nr_cpu_ids)) 46 < nr_cpu_ids)) {
47 if (!chip->irq_set_affinity(&desc->irq_data, 47 int ret = chip->irq_set_affinity(&desc->irq_data,
48 desc->pending_mask, false)) { 48 desc->pending_mask, false);
49 switch (ret) {
50 case IRQ_SET_MASK_OK:
49 cpumask_copy(desc->irq_data.affinity, desc->pending_mask); 51 cpumask_copy(desc->irq_data.affinity, desc->pending_mask);
52 case IRQ_SET_MASK_OK_NOCOPY:
50 irq_set_thread_affinity(desc); 53 irq_set_thread_affinity(desc);
51 } 54 }
55 }
52 56
53 cpumask_clear(desc->pending_mask); 57 cpumask_clear(desc->pending_mask);
54} 58}
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index c3c46c72046..1588e3b2871 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -5,11 +5,13 @@
5 * context. The enqueueing is NMI-safe. 5 * context. The enqueueing is NMI-safe.
6 */ 6 */
7 7
8#include <linux/bug.h>
8#include <linux/kernel.h> 9#include <linux/kernel.h>
9#include <linux/export.h> 10#include <linux/export.h>
10#include <linux/irq_work.h> 11#include <linux/irq_work.h>
11#include <linux/percpu.h> 12#include <linux/percpu.h>
12#include <linux/hardirq.h> 13#include <linux/hardirq.h>
14#include <linux/irqflags.h>
13#include <asm/processor.h> 15#include <asm/processor.h>
14 16
15/* 17/*
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 22000c3db0d..8d262b46757 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -284,8 +284,12 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
284 if (value) { 284 if (value) {
285 if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) 285 if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
286 return -EFAULT; 286 return -EFAULT;
287 } else 287 } else {
288 memset((char *) &set_buffer, 0, sizeof(set_buffer)); 288 memset(&set_buffer, 0, sizeof(set_buffer));
289 printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer."
290 " Misfeature support will be removed\n",
291 current->comm);
292 }
289 293
290 error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL); 294 error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
291 if (error || !ovalue) 295 if (error || !ovalue)
diff --git a/kernel/kexec.c b/kernel/kexec.c
index a6a675cb981..4e2e472f6ae 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -37,7 +37,6 @@
37#include <asm/page.h> 37#include <asm/page.h>
38#include <asm/uaccess.h> 38#include <asm/uaccess.h>
39#include <asm/io.h> 39#include <asm/io.h>
40#include <asm/system.h>
41#include <asm/sections.h> 40#include <asm/sections.h>
42 41
43/* Per cpu memory for storing cpu states in case of system crash. */ 42/* Per cpu memory for storing cpu states in case of system crash. */
@@ -1359,6 +1358,10 @@ static int __init parse_crashkernel_simple(char *cmdline,
1359 1358
1360 if (*cur == '@') 1359 if (*cur == '@')
1361 *crash_base = memparse(cur+1, &cur); 1360 *crash_base = memparse(cur+1, &cur);
1361 else if (*cur != ' ' && *cur != '\0') {
1362 pr_warning("crashkernel: unrecognized char\n");
1363 return -EINVAL;
1364 }
1362 1365
1363 return 0; 1366 return 0;
1364} 1367}
@@ -1462,7 +1465,9 @@ static int __init crash_save_vmcoreinfo_init(void)
1462 1465
1463 VMCOREINFO_SYMBOL(init_uts_ns); 1466 VMCOREINFO_SYMBOL(init_uts_ns);
1464 VMCOREINFO_SYMBOL(node_online_map); 1467 VMCOREINFO_SYMBOL(node_online_map);
1468#ifdef CONFIG_MMU
1465 VMCOREINFO_SYMBOL(swapper_pg_dir); 1469 VMCOREINFO_SYMBOL(swapper_pg_dir);
1470#endif
1466 VMCOREINFO_SYMBOL(_stext); 1471 VMCOREINFO_SYMBOL(_stext);
1467 VMCOREINFO_SYMBOL(vmlist); 1472 VMCOREINFO_SYMBOL(vmlist);
1468 1473
diff --git a/kernel/kmod.c b/kernel/kmod.c
index a0a88543934..05698a7415f 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -60,6 +60,43 @@ static DECLARE_RWSEM(umhelper_sem);
60*/ 60*/
61char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; 61char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
62 62
63static void free_modprobe_argv(struct subprocess_info *info)
64{
65 kfree(info->argv[3]); /* check call_modprobe() */
66 kfree(info->argv);
67}
68
69static int call_modprobe(char *module_name, int wait)
70{
71 static char *envp[] = {
72 "HOME=/",
73 "TERM=linux",
74 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
75 NULL
76 };
77
78 char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
79 if (!argv)
80 goto out;
81
82 module_name = kstrdup(module_name, GFP_KERNEL);
83 if (!module_name)
84 goto free_argv;
85
86 argv[0] = modprobe_path;
87 argv[1] = "-q";
88 argv[2] = "--";
89 argv[3] = module_name; /* check free_modprobe_argv() */
90 argv[4] = NULL;
91
92 return call_usermodehelper_fns(modprobe_path, argv, envp,
93 wait | UMH_KILLABLE, NULL, free_modprobe_argv, NULL);
94free_argv:
95 kfree(argv);
96out:
97 return -ENOMEM;
98}
99
63/** 100/**
64 * __request_module - try to load a kernel module 101 * __request_module - try to load a kernel module
65 * @wait: wait (or not) for the operation to complete 102 * @wait: wait (or not) for the operation to complete
@@ -81,11 +118,6 @@ int __request_module(bool wait, const char *fmt, ...)
81 char module_name[MODULE_NAME_LEN]; 118 char module_name[MODULE_NAME_LEN];
82 unsigned int max_modprobes; 119 unsigned int max_modprobes;
83 int ret; 120 int ret;
84 char *argv[] = { modprobe_path, "-q", "--", module_name, NULL };
85 static char *envp[] = { "HOME=/",
86 "TERM=linux",
87 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
88 NULL };
89 static atomic_t kmod_concurrent = ATOMIC_INIT(0); 121 static atomic_t kmod_concurrent = ATOMIC_INIT(0);
90#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ 122#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
91 static int kmod_loop_msg; 123 static int kmod_loop_msg;
@@ -128,9 +160,7 @@ int __request_module(bool wait, const char *fmt, ...)
128 160
129 trace_module_request(module_name, wait, _RET_IP_); 161 trace_module_request(module_name, wait, _RET_IP_);
130 162
131 ret = call_usermodehelper_fns(modprobe_path, argv, envp, 163 ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
132 wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC,
133 NULL, NULL, NULL);
134 164
135 atomic_dec(&kmod_concurrent); 165 atomic_dec(&kmod_concurrent);
136 return ret; 166 return ret;
@@ -188,7 +218,7 @@ static int ____call_usermodehelper(void *data)
188 /* Exec failed? */ 218 /* Exec failed? */
189fail: 219fail:
190 sub_info->retval = retval; 220 sub_info->retval = retval;
191 do_exit(0); 221 return 0;
192} 222}
193 223
194void call_usermodehelper_freeinfo(struct subprocess_info *info) 224void call_usermodehelper_freeinfo(struct subprocess_info *info)
@@ -199,6 +229,19 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info)
199} 229}
200EXPORT_SYMBOL(call_usermodehelper_freeinfo); 230EXPORT_SYMBOL(call_usermodehelper_freeinfo);
201 231
232static void umh_complete(struct subprocess_info *sub_info)
233{
234 struct completion *comp = xchg(&sub_info->complete, NULL);
235 /*
236 * See call_usermodehelper_exec(). If xchg() returns NULL
237 * we own sub_info, the UMH_KILLABLE caller has gone away.
238 */
239 if (comp)
240 complete(comp);
241 else
242 call_usermodehelper_freeinfo(sub_info);
243}
244
202/* Keventd can't block, but this (a child) can. */ 245/* Keventd can't block, but this (a child) can. */
203static int wait_for_helper(void *data) 246static int wait_for_helper(void *data)
204{ 247{
@@ -235,7 +278,7 @@ static int wait_for_helper(void *data)
235 sub_info->retval = ret; 278 sub_info->retval = ret;
236 } 279 }
237 280
238 complete(sub_info->complete); 281 umh_complete(sub_info);
239 return 0; 282 return 0;
240} 283}
241 284
@@ -244,7 +287,7 @@ static void __call_usermodehelper(struct work_struct *work)
244{ 287{
245 struct subprocess_info *sub_info = 288 struct subprocess_info *sub_info =
246 container_of(work, struct subprocess_info, work); 289 container_of(work, struct subprocess_info, work);
247 enum umh_wait wait = sub_info->wait; 290 int wait = sub_info->wait & ~UMH_KILLABLE;
248 pid_t pid; 291 pid_t pid;
249 292
250 /* CLONE_VFORK: wait until the usermode helper has execve'd 293 /* CLONE_VFORK: wait until the usermode helper has execve'd
@@ -269,7 +312,7 @@ static void __call_usermodehelper(struct work_struct *work)
269 case UMH_WAIT_EXEC: 312 case UMH_WAIT_EXEC:
270 if (pid < 0) 313 if (pid < 0)
271 sub_info->retval = pid; 314 sub_info->retval = pid;
272 complete(sub_info->complete); 315 umh_complete(sub_info);
273 } 316 }
274} 317}
275 318
@@ -279,7 +322,7 @@ static void __call_usermodehelper(struct work_struct *work)
279 * land has been frozen during a system-wide hibernation or suspend operation). 322 * land has been frozen during a system-wide hibernation or suspend operation).
280 * Should always be manipulated under umhelper_sem acquired for write. 323 * Should always be manipulated under umhelper_sem acquired for write.
281 */ 324 */
282static int usermodehelper_disabled = 1; 325static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED;
283 326
284/* Number of helpers running */ 327/* Number of helpers running */
285static atomic_t running_helpers = ATOMIC_INIT(0); 328static atomic_t running_helpers = ATOMIC_INIT(0);
@@ -291,32 +334,110 @@ static atomic_t running_helpers = ATOMIC_INIT(0);
291static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); 334static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
292 335
293/* 336/*
337 * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled
338 * to become 'false'.
339 */
340static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq);
341
342/*
294 * Time to wait for running_helpers to become zero before the setting of 343 * Time to wait for running_helpers to become zero before the setting of
295 * usermodehelper_disabled in usermodehelper_disable() fails 344 * usermodehelper_disabled in usermodehelper_disable() fails
296 */ 345 */
297#define RUNNING_HELPERS_TIMEOUT (5 * HZ) 346#define RUNNING_HELPERS_TIMEOUT (5 * HZ)
298 347
299void read_lock_usermodehelper(void) 348int usermodehelper_read_trylock(void)
349{
350 DEFINE_WAIT(wait);
351 int ret = 0;
352
353 down_read(&umhelper_sem);
354 for (;;) {
355 prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
356 TASK_INTERRUPTIBLE);
357 if (!usermodehelper_disabled)
358 break;
359
360 if (usermodehelper_disabled == UMH_DISABLED)
361 ret = -EAGAIN;
362
363 up_read(&umhelper_sem);
364
365 if (ret)
366 break;
367
368 schedule();
369 try_to_freeze();
370
371 down_read(&umhelper_sem);
372 }
373 finish_wait(&usermodehelper_disabled_waitq, &wait);
374 return ret;
375}
376EXPORT_SYMBOL_GPL(usermodehelper_read_trylock);
377
378long usermodehelper_read_lock_wait(long timeout)
300{ 379{
380 DEFINE_WAIT(wait);
381
382 if (timeout < 0)
383 return -EINVAL;
384
301 down_read(&umhelper_sem); 385 down_read(&umhelper_sem);
386 for (;;) {
387 prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
388 TASK_UNINTERRUPTIBLE);
389 if (!usermodehelper_disabled)
390 break;
391
392 up_read(&umhelper_sem);
393
394 timeout = schedule_timeout(timeout);
395 if (!timeout)
396 break;
397
398 down_read(&umhelper_sem);
399 }
400 finish_wait(&usermodehelper_disabled_waitq, &wait);
401 return timeout;
302} 402}
303EXPORT_SYMBOL_GPL(read_lock_usermodehelper); 403EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait);
304 404
305void read_unlock_usermodehelper(void) 405void usermodehelper_read_unlock(void)
306{ 406{
307 up_read(&umhelper_sem); 407 up_read(&umhelper_sem);
308} 408}
309EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); 409EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
310 410
311/** 411/**
312 * usermodehelper_disable - prevent new helpers from being started 412 * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled.
413 * depth: New value to assign to usermodehelper_disabled.
414 *
415 * Change the value of usermodehelper_disabled (under umhelper_sem locked for
416 * writing) and wakeup tasks waiting for it to change.
313 */ 417 */
314int usermodehelper_disable(void) 418void __usermodehelper_set_disable_depth(enum umh_disable_depth depth)
419{
420 down_write(&umhelper_sem);
421 usermodehelper_disabled = depth;
422 wake_up(&usermodehelper_disabled_waitq);
423 up_write(&umhelper_sem);
424}
425
426/**
427 * __usermodehelper_disable - Prevent new helpers from being started.
428 * @depth: New value to assign to usermodehelper_disabled.
429 *
430 * Set usermodehelper_disabled to @depth and wait for running helpers to exit.
431 */
432int __usermodehelper_disable(enum umh_disable_depth depth)
315{ 433{
316 long retval; 434 long retval;
317 435
436 if (!depth)
437 return -EINVAL;
438
318 down_write(&umhelper_sem); 439 down_write(&umhelper_sem);
319 usermodehelper_disabled = 1; 440 usermodehelper_disabled = depth;
320 up_write(&umhelper_sem); 441 up_write(&umhelper_sem);
321 442
322 /* 443 /*
@@ -331,31 +452,10 @@ int usermodehelper_disable(void)
331 if (retval) 452 if (retval)
332 return 0; 453 return 0;
333 454
334 down_write(&umhelper_sem); 455 __usermodehelper_set_disable_depth(UMH_ENABLED);
335 usermodehelper_disabled = 0;
336 up_write(&umhelper_sem);
337 return -EAGAIN; 456 return -EAGAIN;
338} 457}
339 458
340/**
341 * usermodehelper_enable - allow new helpers to be started again
342 */
343void usermodehelper_enable(void)
344{
345 down_write(&umhelper_sem);
346 usermodehelper_disabled = 0;
347 up_write(&umhelper_sem);
348}
349
350/**
351 * usermodehelper_is_disabled - check if new helpers are allowed to be started
352 */
353bool usermodehelper_is_disabled(void)
354{
355 return usermodehelper_disabled;
356}
357EXPORT_SYMBOL_GPL(usermodehelper_is_disabled);
358
359static void helper_lock(void) 459static void helper_lock(void)
360{ 460{
361 atomic_inc(&running_helpers); 461 atomic_inc(&running_helpers);
@@ -435,8 +535,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns);
435 * asynchronously if wait is not set, and runs as a child of keventd. 535 * asynchronously if wait is not set, and runs as a child of keventd.
436 * (ie. it runs with full root capabilities). 536 * (ie. it runs with full root capabilities).
437 */ 537 */
438int call_usermodehelper_exec(struct subprocess_info *sub_info, 538int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
439 enum umh_wait wait)
440{ 539{
441 DECLARE_COMPLETION_ONSTACK(done); 540 DECLARE_COMPLETION_ONSTACK(done);
442 int retval = 0; 541 int retval = 0;
@@ -456,9 +555,21 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
456 queue_work(khelper_wq, &sub_info->work); 555 queue_work(khelper_wq, &sub_info->work);
457 if (wait == UMH_NO_WAIT) /* task has freed sub_info */ 556 if (wait == UMH_NO_WAIT) /* task has freed sub_info */
458 goto unlock; 557 goto unlock;
558
559 if (wait & UMH_KILLABLE) {
560 retval = wait_for_completion_killable(&done);
561 if (!retval)
562 goto wait_done;
563
564 /* umh_complete() will see NULL and free sub_info */
565 if (xchg(&sub_info->complete, NULL))
566 goto unlock;
567 /* fallthrough, umh_complete() was already called */
568 }
569
459 wait_for_completion(&done); 570 wait_for_completion(&done);
571wait_done:
460 retval = sub_info->retval; 572 retval = sub_info->retval;
461
462out: 573out:
463 call_usermodehelper_freeinfo(sub_info); 574 call_usermodehelper_freeinfo(sub_info);
464unlock: 575unlock:
diff --git a/kernel/module.c b/kernel/module.c
index 2c932760fd3..78ac6ec1e42 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -105,6 +105,7 @@ struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
105 105
106/* Block module loading/unloading? */ 106/* Block module loading/unloading? */
107int modules_disabled = 0; 107int modules_disabled = 0;
108core_param(nomodule, modules_disabled, bint, 0);
108 109
109/* Waiting for a module to finish initializing? */ 110/* Waiting for a module to finish initializing? */
110static DECLARE_WAIT_QUEUE_HEAD(module_wq); 111static DECLARE_WAIT_QUEUE_HEAD(module_wq);
@@ -903,6 +904,36 @@ static ssize_t show_refcnt(struct module_attribute *mattr,
903static struct module_attribute modinfo_refcnt = 904static struct module_attribute modinfo_refcnt =
904 __ATTR(refcnt, 0444, show_refcnt, NULL); 905 __ATTR(refcnt, 0444, show_refcnt, NULL);
905 906
907void __module_get(struct module *module)
908{
909 if (module) {
910 preempt_disable();
911 __this_cpu_inc(module->refptr->incs);
912 trace_module_get(module, _RET_IP_);
913 preempt_enable();
914 }
915}
916EXPORT_SYMBOL(__module_get);
917
918bool try_module_get(struct module *module)
919{
920 bool ret = true;
921
922 if (module) {
923 preempt_disable();
924
925 if (likely(module_is_live(module))) {
926 __this_cpu_inc(module->refptr->incs);
927 trace_module_get(module, _RET_IP_);
928 } else
929 ret = false;
930
931 preempt_enable();
932 }
933 return ret;
934}
935EXPORT_SYMBOL(try_module_get);
936
906void module_put(struct module *module) 937void module_put(struct module *module)
907{ 938{
908 if (module) { 939 if (module) {
@@ -2380,8 +2411,7 @@ static int copy_and_check(struct load_info *info,
2380 return -ENOEXEC; 2411 return -ENOEXEC;
2381 2412
2382 /* Suck in entire file: we'll want most of it. */ 2413 /* Suck in entire file: we'll want most of it. */
2383 /* vmalloc barfs on "unusual" numbers. Check here */ 2414 if ((hdr = vmalloc(len)) == NULL)
2384 if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
2385 return -ENOMEM; 2415 return -ENOMEM;
2386 2416
2387 if (copy_from_user(hdr, umod, len) != 0) { 2417 if (copy_from_user(hdr, umod, len) != 0) {
@@ -2922,7 +2952,8 @@ static struct module *load_module(void __user *umod,
2922 mutex_unlock(&module_mutex); 2952 mutex_unlock(&module_mutex);
2923 2953
2924 /* Module is ready to execute: parsing args may do that. */ 2954 /* Module is ready to execute: parsing args may do that. */
2925 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); 2955 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
2956 -32768, 32767, NULL);
2926 if (err < 0) 2957 if (err < 0)
2927 goto unlink; 2958 goto unlink;
2928 2959
diff --git a/kernel/padata.c b/kernel/padata.c
index 6f10eb285ec..89fe3d1b9ef 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -1,6 +1,8 @@
1/* 1/*
2 * padata.c - generic interface to process data streams in parallel 2 * padata.c - generic interface to process data streams in parallel
3 * 3 *
4 * See Documentation/padata.txt for an api documentation.
5 *
4 * Copyright (C) 2008, 2009 secunet Security Networks AG 6 * Copyright (C) 2008, 2009 secunet Security Networks AG
5 * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com> 7 * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
6 * 8 *
@@ -354,13 +356,13 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
354 if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) 356 if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
355 return -ENOMEM; 357 return -ENOMEM;
356 358
357 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask); 359 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
358 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { 360 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
359 free_cpumask_var(pd->cpumask.cbcpu); 361 free_cpumask_var(pd->cpumask.cbcpu);
360 return -ENOMEM; 362 return -ENOMEM;
361 } 363 }
362 364
363 cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask); 365 cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
364 return 0; 366 return 0;
365} 367}
366 368
@@ -564,7 +566,7 @@ EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
564static bool padata_validate_cpumask(struct padata_instance *pinst, 566static bool padata_validate_cpumask(struct padata_instance *pinst,
565 const struct cpumask *cpumask) 567 const struct cpumask *cpumask)
566{ 568{
567 if (!cpumask_intersects(cpumask, cpu_active_mask)) { 569 if (!cpumask_intersects(cpumask, cpu_online_mask)) {
568 pinst->flags |= PADATA_INVALID; 570 pinst->flags |= PADATA_INVALID;
569 return false; 571 return false;
570 } 572 }
@@ -678,7 +680,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
678{ 680{
679 struct parallel_data *pd; 681 struct parallel_data *pd;
680 682
681 if (cpumask_test_cpu(cpu, cpu_active_mask)) { 683 if (cpumask_test_cpu(cpu, cpu_online_mask)) {
682 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, 684 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
683 pinst->cpumask.cbcpu); 685 pinst->cpumask.cbcpu);
684 if (!pd) 686 if (!pd)
@@ -746,6 +748,9 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
746 return -ENOMEM; 748 return -ENOMEM;
747 749
748 padata_replace(pinst, pd); 750 padata_replace(pinst, pd);
751
752 cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
753 cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
749 } 754 }
750 755
751 return 0; 756 return 0;
diff --git a/kernel/panic.c b/kernel/panic.c
index 80aed44e345..8ed89a175d7 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -97,7 +97,7 @@ void panic(const char *fmt, ...)
97 /* 97 /*
98 * Avoid nested stack-dumping if a panic occurs during oops processing 98 * Avoid nested stack-dumping if a panic occurs during oops processing
99 */ 99 */
100 if (!oops_in_progress) 100 if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
101 dump_stack(); 101 dump_stack();
102#endif 102#endif
103 103
diff --git a/kernel/params.c b/kernel/params.c
index 4bc965d8a1f..f37d8263134 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -15,7 +15,6 @@
15 along with this program; if not, write to the Free Software 15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/ 17*/
18#include <linux/module.h>
19#include <linux/kernel.h> 18#include <linux/kernel.h>
20#include <linux/string.h> 19#include <linux/string.h>
21#include <linux/errno.h> 20#include <linux/errno.h>
@@ -88,6 +87,8 @@ static int parse_one(char *param,
88 char *val, 87 char *val,
89 const struct kernel_param *params, 88 const struct kernel_param *params,
90 unsigned num_params, 89 unsigned num_params,
90 s16 min_level,
91 s16 max_level,
91 int (*handle_unknown)(char *param, char *val)) 92 int (*handle_unknown)(char *param, char *val))
92{ 93{
93 unsigned int i; 94 unsigned int i;
@@ -96,6 +97,9 @@ static int parse_one(char *param,
96 /* Find parameter */ 97 /* Find parameter */
97 for (i = 0; i < num_params; i++) { 98 for (i = 0; i < num_params; i++) {
98 if (parameq(param, params[i].name)) { 99 if (parameq(param, params[i].name)) {
100 if (params[i].level < min_level
101 || params[i].level > max_level)
102 return 0;
99 /* No one handled NULL, so do it here. */ 103 /* No one handled NULL, so do it here. */
100 if (!val && params[i].ops->set != param_set_bool 104 if (!val && params[i].ops->set != param_set_bool
101 && params[i].ops->set != param_set_bint) 105 && params[i].ops->set != param_set_bint)
@@ -175,6 +179,8 @@ int parse_args(const char *name,
175 char *args, 179 char *args,
176 const struct kernel_param *params, 180 const struct kernel_param *params,
177 unsigned num, 181 unsigned num,
182 s16 min_level,
183 s16 max_level,
178 int (*unknown)(char *param, char *val)) 184 int (*unknown)(char *param, char *val))
179{ 185{
180 char *param, *val; 186 char *param, *val;
@@ -190,7 +196,8 @@ int parse_args(const char *name,
190 196
191 args = next_arg(args, &param, &val); 197 args = next_arg(args, &param, &val);
192 irq_was_disabled = irqs_disabled(); 198 irq_was_disabled = irqs_disabled();
193 ret = parse_one(param, val, params, num, unknown); 199 ret = parse_one(param, val, params, num,
200 min_level, max_level, unknown);
194 if (irq_was_disabled && !irqs_disabled()) { 201 if (irq_was_disabled && !irqs_disabled()) {
195 printk(KERN_WARNING "parse_args(): option '%s' enabled " 202 printk(KERN_WARNING "parse_args(): option '%s' enabled "
196 "irq's!\n", param); 203 "irq's!\n", param);
@@ -298,35 +305,18 @@ EXPORT_SYMBOL(param_ops_charp);
298/* Actually could be a bool or an int, for historical reasons. */ 305/* Actually could be a bool or an int, for historical reasons. */
299int param_set_bool(const char *val, const struct kernel_param *kp) 306int param_set_bool(const char *val, const struct kernel_param *kp)
300{ 307{
301 bool v;
302 int ret;
303
304 /* No equals means "set"... */ 308 /* No equals means "set"... */
305 if (!val) val = "1"; 309 if (!val) val = "1";
306 310
307 /* One of =[yYnN01] */ 311 /* One of =[yYnN01] */
308 ret = strtobool(val, &v); 312 return strtobool(val, kp->arg);
309 if (ret)
310 return ret;
311
312 if (kp->flags & KPARAM_ISBOOL)
313 *(bool *)kp->arg = v;
314 else
315 *(int *)kp->arg = v;
316 return 0;
317} 313}
318EXPORT_SYMBOL(param_set_bool); 314EXPORT_SYMBOL(param_set_bool);
319 315
320int param_get_bool(char *buffer, const struct kernel_param *kp) 316int param_get_bool(char *buffer, const struct kernel_param *kp)
321{ 317{
322 bool val;
323 if (kp->flags & KPARAM_ISBOOL)
324 val = *(bool *)kp->arg;
325 else
326 val = *(int *)kp->arg;
327
328 /* Y and N chosen as being relatively non-coder friendly */ 318 /* Y and N chosen as being relatively non-coder friendly */
329 return sprintf(buffer, "%c", val ? 'Y' : 'N'); 319 return sprintf(buffer, "%c", *(bool *)kp->arg ? 'Y' : 'N');
330} 320}
331EXPORT_SYMBOL(param_get_bool); 321EXPORT_SYMBOL(param_get_bool);
332 322
@@ -344,7 +334,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp)
344 struct kernel_param dummy; 334 struct kernel_param dummy;
345 335
346 dummy.arg = &boolval; 336 dummy.arg = &boolval;
347 dummy.flags = KPARAM_ISBOOL;
348 ret = param_set_bool(val, &dummy); 337 ret = param_set_bool(val, &dummy);
349 if (ret == 0) 338 if (ret == 0)
350 *(bool *)kp->arg = !boolval; 339 *(bool *)kp->arg = !boolval;
@@ -373,7 +362,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
373 /* Match bool exactly, by re-using it. */ 362 /* Match bool exactly, by re-using it. */
374 boolkp = *kp; 363 boolkp = *kp;
375 boolkp.arg = &v; 364 boolkp.arg = &v;
376 boolkp.flags |= KPARAM_ISBOOL;
377 365
378 ret = param_set_bool(val, &boolkp); 366 ret = param_set_bool(val, &boolkp);
379 if (ret == 0) 367 if (ret == 0)
@@ -394,7 +382,7 @@ static int param_array(const char *name,
394 unsigned int min, unsigned int max, 382 unsigned int min, unsigned int max,
395 void *elem, int elemsize, 383 void *elem, int elemsize,
396 int (*set)(const char *, const struct kernel_param *kp), 384 int (*set)(const char *, const struct kernel_param *kp),
397 u16 flags, 385 s16 level,
398 unsigned int *num) 386 unsigned int *num)
399{ 387{
400 int ret; 388 int ret;
@@ -404,7 +392,7 @@ static int param_array(const char *name,
404 /* Get the name right for errors. */ 392 /* Get the name right for errors. */
405 kp.name = name; 393 kp.name = name;
406 kp.arg = elem; 394 kp.arg = elem;
407 kp.flags = flags; 395 kp.level = level;
408 396
409 *num = 0; 397 *num = 0;
410 /* We expect a comma-separated list of values. */ 398 /* We expect a comma-separated list of values. */
@@ -445,7 +433,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp)
445 unsigned int temp_num; 433 unsigned int temp_num;
446 434
447 return param_array(kp->name, val, 1, arr->max, arr->elem, 435 return param_array(kp->name, val, 1, arr->max, arr->elem,
448 arr->elemsize, arr->ops->set, kp->flags, 436 arr->elemsize, arr->ops->set, kp->level,
449 arr->num ?: &temp_num); 437 arr->num ?: &temp_num);
450} 438}
451 439
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a8968396046..57bc1fd35b3 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -15,6 +15,7 @@
15#include <linux/acct.h> 15#include <linux/acct.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/reboot.h>
18 19
19#define BITS_PER_PAGE (PAGE_SIZE*8) 20#define BITS_PER_PAGE (PAGE_SIZE*8)
20 21
@@ -168,13 +169,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
168 while (nr > 0) { 169 while (nr > 0) {
169 rcu_read_lock(); 170 rcu_read_lock();
170 171
171 /*
172 * Any nested-container's init processes won't ignore the
173 * SEND_SIG_NOINFO signal, see send_signal()->si_fromuser().
174 */
175 task = pid_task(find_vpid(nr), PIDTYPE_PID); 172 task = pid_task(find_vpid(nr), PIDTYPE_PID);
176 if (task) 173 if (task && !__fatal_signal_pending(task))
177 send_sig_info(SIGKILL, SEND_SIG_NOINFO, task); 174 send_sig_info(SIGKILL, SEND_SIG_FORCED, task);
178 175
179 rcu_read_unlock(); 176 rcu_read_unlock();
180 177
@@ -187,6 +184,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
187 rc = sys_wait4(-1, NULL, __WALL, NULL); 184 rc = sys_wait4(-1, NULL, __WALL, NULL);
188 } while (rc != -ECHILD); 185 } while (rc != -ECHILD);
189 186
187 if (pid_ns->reboot)
188 current->signal->group_exit_code = pid_ns->reboot;
189
190 acct_exit_ns(pid_ns); 190 acct_exit_ns(pid_ns);
191 return; 191 return;
192} 192}
@@ -221,6 +221,35 @@ static struct ctl_table pid_ns_ctl_table[] = {
221 221
222static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; 222static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
223 223
224int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
225{
226 if (pid_ns == &init_pid_ns)
227 return 0;
228
229 switch (cmd) {
230 case LINUX_REBOOT_CMD_RESTART2:
231 case LINUX_REBOOT_CMD_RESTART:
232 pid_ns->reboot = SIGHUP;
233 break;
234
235 case LINUX_REBOOT_CMD_POWER_OFF:
236 case LINUX_REBOOT_CMD_HALT:
237 pid_ns->reboot = SIGINT;
238 break;
239 default:
240 return -EINVAL;
241 }
242
243 read_lock(&tasklist_lock);
244 force_sig(SIGKILL, pid_ns->child_reaper);
245 read_unlock(&tasklist_lock);
246
247 do_exit(0);
248
249 /* Not reached */
250 return 0;
251}
252
224static __init int pid_namespaces_init(void) 253static __init int pid_namespaces_init(void)
225{ 254{
226 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); 255 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 0a186cfde78..e09dfbfeece 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -16,7 +16,6 @@
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/device.h> 17#include <linux/device.h>
18#include <linux/async.h> 18#include <linux/async.h>
19#include <linux/kmod.h>
20#include <linux/delay.h> 19#include <linux/delay.h>
21#include <linux/fs.h> 20#include <linux/fs.h>
22#include <linux/mount.h> 21#include <linux/mount.h>
@@ -611,14 +610,10 @@ int hibernate(void)
611 if (error) 610 if (error)
612 goto Exit; 611 goto Exit;
613 612
614 error = usermodehelper_disable();
615 if (error)
616 goto Exit;
617
618 /* Allocate memory management structures */ 613 /* Allocate memory management structures */
619 error = create_basic_memory_bitmaps(); 614 error = create_basic_memory_bitmaps();
620 if (error) 615 if (error)
621 goto Enable_umh; 616 goto Exit;
622 617
623 printk(KERN_INFO "PM: Syncing filesystems ... "); 618 printk(KERN_INFO "PM: Syncing filesystems ... ");
624 sys_sync(); 619 sys_sync();
@@ -661,8 +656,6 @@ int hibernate(void)
661 656
662 Free_bitmaps: 657 Free_bitmaps:
663 free_basic_memory_bitmaps(); 658 free_basic_memory_bitmaps();
664 Enable_umh:
665 usermodehelper_enable();
666 Exit: 659 Exit:
667 pm_notifier_call_chain(PM_POST_HIBERNATION); 660 pm_notifier_call_chain(PM_POST_HIBERNATION);
668 pm_restore_console(); 661 pm_restore_console();
@@ -777,15 +770,9 @@ static int software_resume(void)
777 if (error) 770 if (error)
778 goto close_finish; 771 goto close_finish;
779 772
780 error = usermodehelper_disable();
781 if (error)
782 goto close_finish;
783
784 error = create_basic_memory_bitmaps(); 773 error = create_basic_memory_bitmaps();
785 if (error) { 774 if (error)
786 usermodehelper_enable();
787 goto close_finish; 775 goto close_finish;
788 }
789 776
790 pr_debug("PM: Preparing processes for restore.\n"); 777 pr_debug("PM: Preparing processes for restore.\n");
791 error = freeze_processes(); 778 error = freeze_processes();
@@ -806,7 +793,6 @@ static int software_resume(void)
806 thaw_processes(); 793 thaw_processes();
807 Done: 794 Done:
808 free_basic_memory_bitmaps(); 795 free_basic_memory_bitmaps();
809 usermodehelper_enable();
810 Finish: 796 Finish:
811 pm_notifier_call_chain(PM_POST_RESTORE); 797 pm_notifier_call_chain(PM_POST_RESTORE);
812 pm_restore_console(); 798 pm_restore_console();
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0d2aeb22610..19db29f6755 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -16,6 +16,7 @@
16#include <linux/freezer.h> 16#include <linux/freezer.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/workqueue.h> 18#include <linux/workqueue.h>
19#include <linux/kmod.h>
19 20
20/* 21/*
21 * Timeout for stopping processes 22 * Timeout for stopping processes
@@ -122,6 +123,10 @@ int freeze_processes(void)
122{ 123{
123 int error; 124 int error;
124 125
126 error = __usermodehelper_disable(UMH_FREEZING);
127 if (error)
128 return error;
129
125 if (!pm_freezing) 130 if (!pm_freezing)
126 atomic_inc(&system_freezing_cnt); 131 atomic_inc(&system_freezing_cnt);
127 132
@@ -130,6 +135,7 @@ int freeze_processes(void)
130 error = try_to_freeze_tasks(true); 135 error = try_to_freeze_tasks(true);
131 if (!error) { 136 if (!error) {
132 printk("done."); 137 printk("done.");
138 __usermodehelper_set_disable_depth(UMH_DISABLED);
133 oom_killer_disable(); 139 oom_killer_disable();
134 } 140 }
135 printk("\n"); 141 printk("\n");
@@ -187,6 +193,8 @@ void thaw_processes(void)
187 } while_each_thread(g, p); 193 } while_each_thread(g, p);
188 read_unlock(&tasklist_lock); 194 read_unlock(&tasklist_lock);
189 195
196 usermodehelper_enable();
197
190 schedule(); 198 schedule();
191 printk("done.\n"); 199 printk("done.\n");
192} 200}
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index d6d6dbd1ecc..6a031e68402 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -230,6 +230,21 @@ int pm_qos_request_active(struct pm_qos_request *req)
230EXPORT_SYMBOL_GPL(pm_qos_request_active); 230EXPORT_SYMBOL_GPL(pm_qos_request_active);
231 231
232/** 232/**
233 * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout
234 * @work: work struct for the delayed work (timeout)
235 *
236 * This cancels the timeout request by falling back to the default at timeout.
237 */
238static void pm_qos_work_fn(struct work_struct *work)
239{
240 struct pm_qos_request *req = container_of(to_delayed_work(work),
241 struct pm_qos_request,
242 work);
243
244 pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE);
245}
246
247/**
233 * pm_qos_add_request - inserts new qos request into the list 248 * pm_qos_add_request - inserts new qos request into the list
234 * @req: pointer to a preallocated handle 249 * @req: pointer to a preallocated handle
235 * @pm_qos_class: identifies which list of qos request to use 250 * @pm_qos_class: identifies which list of qos request to use
@@ -253,6 +268,7 @@ void pm_qos_add_request(struct pm_qos_request *req,
253 return; 268 return;
254 } 269 }
255 req->pm_qos_class = pm_qos_class; 270 req->pm_qos_class = pm_qos_class;
271 INIT_DELAYED_WORK(&req->work, pm_qos_work_fn);
256 pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, 272 pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints,
257 &req->node, PM_QOS_ADD_REQ, value); 273 &req->node, PM_QOS_ADD_REQ, value);
258} 274}
@@ -279,6 +295,9 @@ void pm_qos_update_request(struct pm_qos_request *req,
279 return; 295 return;
280 } 296 }
281 297
298 if (delayed_work_pending(&req->work))
299 cancel_delayed_work_sync(&req->work);
300
282 if (new_value != req->node.prio) 301 if (new_value != req->node.prio)
283 pm_qos_update_target( 302 pm_qos_update_target(
284 pm_qos_array[req->pm_qos_class]->constraints, 303 pm_qos_array[req->pm_qos_class]->constraints,
@@ -287,6 +306,34 @@ void pm_qos_update_request(struct pm_qos_request *req,
287EXPORT_SYMBOL_GPL(pm_qos_update_request); 306EXPORT_SYMBOL_GPL(pm_qos_update_request);
288 307
289/** 308/**
309 * pm_qos_update_request_timeout - modifies an existing qos request temporarily.
310 * @req : handle to list element holding a pm_qos request to use
311 * @new_value: defines the temporal qos request
312 * @timeout_us: the effective duration of this qos request in usecs.
313 *
314 * After timeout_us, this qos request is cancelled automatically.
315 */
316void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value,
317 unsigned long timeout_us)
318{
319 if (!req)
320 return;
321 if (WARN(!pm_qos_request_active(req),
322 "%s called for unknown object.", __func__))
323 return;
324
325 if (delayed_work_pending(&req->work))
326 cancel_delayed_work_sync(&req->work);
327
328 if (new_value != req->node.prio)
329 pm_qos_update_target(
330 pm_qos_array[req->pm_qos_class]->constraints,
331 &req->node, PM_QOS_UPDATE_REQ, new_value);
332
333 schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us));
334}
335
336/**
290 * pm_qos_remove_request - modifies an existing qos request 337 * pm_qos_remove_request - modifies an existing qos request
291 * @req: handle to request list element 338 * @req: handle to request list element
292 * 339 *
@@ -305,6 +352,9 @@ void pm_qos_remove_request(struct pm_qos_request *req)
305 return; 352 return;
306 } 353 }
307 354
355 if (delayed_work_pending(&req->work))
356 cancel_delayed_work_sync(&req->work);
357
308 pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, 358 pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints,
309 &req->node, PM_QOS_REMOVE_REQ, 359 &req->node, PM_QOS_REMOVE_REQ,
310 PM_QOS_DEFAULT_VALUE); 360 PM_QOS_DEFAULT_VALUE);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 88e5c967370..396d262b8fd 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -12,7 +12,6 @@
12#include <linux/delay.h> 12#include <linux/delay.h>
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/kmod.h>
16#include <linux/console.h> 15#include <linux/console.h>
17#include <linux/cpu.h> 16#include <linux/cpu.h>
18#include <linux/syscalls.h> 17#include <linux/syscalls.h>
@@ -102,17 +101,12 @@ static int suspend_prepare(void)
102 if (error) 101 if (error)
103 goto Finish; 102 goto Finish;
104 103
105 error = usermodehelper_disable();
106 if (error)
107 goto Finish;
108
109 error = suspend_freeze_processes(); 104 error = suspend_freeze_processes();
110 if (!error) 105 if (!error)
111 return 0; 106 return 0;
112 107
113 suspend_stats.failed_freeze++; 108 suspend_stats.failed_freeze++;
114 dpm_save_failed_step(SUSPEND_FREEZE); 109 dpm_save_failed_step(SUSPEND_FREEZE);
115 usermodehelper_enable();
116 Finish: 110 Finish:
117 pm_notifier_call_chain(PM_POST_SUSPEND); 111 pm_notifier_call_chain(PM_POST_SUSPEND);
118 pm_restore_console(); 112 pm_restore_console();
@@ -259,7 +253,6 @@ int suspend_devices_and_enter(suspend_state_t state)
259static void suspend_finish(void) 253static void suspend_finish(void)
260{ 254{
261 suspend_thaw_processes(); 255 suspend_thaw_processes();
262 usermodehelper_enable();
263 pm_notifier_call_chain(PM_POST_SUSPEND); 256 pm_notifier_call_chain(PM_POST_SUSPEND);
264 pm_restore_console(); 257 pm_restore_console();
265} 258}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 33c4329205a..91b0fd021a9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -12,7 +12,6 @@
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <linux/syscalls.h> 13#include <linux/syscalls.h>
14#include <linux/reboot.h> 14#include <linux/reboot.h>
15#include <linux/kmod.h>
16#include <linux/string.h> 15#include <linux/string.h>
17#include <linux/device.h> 16#include <linux/device.h>
18#include <linux/miscdevice.h> 17#include <linux/miscdevice.h>
@@ -222,14 +221,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
222 sys_sync(); 221 sys_sync();
223 printk("done.\n"); 222 printk("done.\n");
224 223
225 error = usermodehelper_disable();
226 if (error)
227 break;
228
229 error = freeze_processes(); 224 error = freeze_processes();
230 if (error) 225 if (!error)
231 usermodehelper_enable();
232 else
233 data->frozen = 1; 226 data->frozen = 1;
234 break; 227 break;
235 228
@@ -238,7 +231,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
238 break; 231 break;
239 pm_restore_gfp_mask(); 232 pm_restore_gfp_mask();
240 thaw_processes(); 233 thaw_processes();
241 usermodehelper_enable();
242 data->frozen = 0; 234 data->frozen = 0;
243 break; 235 break;
244 236
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 00ab2ca5ed1..ee8d49b9c30 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -231,26 +231,22 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
231} 231}
232 232
233static int ptrace_attach(struct task_struct *task, long request, 233static int ptrace_attach(struct task_struct *task, long request,
234 unsigned long addr,
234 unsigned long flags) 235 unsigned long flags)
235{ 236{
236 bool seize = (request == PTRACE_SEIZE); 237 bool seize = (request == PTRACE_SEIZE);
237 int retval; 238 int retval;
238 239
239 /*
240 * SEIZE will enable new ptrace behaviors which will be implemented
241 * gradually. SEIZE_DEVEL is used to prevent applications
242 * expecting full SEIZE behaviors trapping on kernel commits which
243 * are still in the process of implementing them.
244 *
245 * Only test programs for new ptrace behaviors being implemented
246 * should set SEIZE_DEVEL. If unset, SEIZE will fail with -EIO.
247 *
248 * Once SEIZE behaviors are completely implemented, this flag and
249 * the following test will be removed.
250 */
251 retval = -EIO; 240 retval = -EIO;
252 if (seize && !(flags & PTRACE_SEIZE_DEVEL)) 241 if (seize) {
253 goto out; 242 if (addr != 0)
243 goto out;
244 if (flags & ~(unsigned long)PTRACE_O_MASK)
245 goto out;
246 flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT);
247 } else {
248 flags = PT_PTRACED;
249 }
254 250
255 audit_ptrace(task); 251 audit_ptrace(task);
256 252
@@ -262,7 +258,7 @@ static int ptrace_attach(struct task_struct *task, long request,
262 258
263 /* 259 /*
264 * Protect exec's credential calculations against our interference; 260 * Protect exec's credential calculations against our interference;
265 * interference; SUID, SGID and LSM creds get determined differently 261 * SUID, SGID and LSM creds get determined differently
266 * under ptrace. 262 * under ptrace.
267 */ 263 */
268 retval = -ERESTARTNOINTR; 264 retval = -ERESTARTNOINTR;
@@ -282,11 +278,11 @@ static int ptrace_attach(struct task_struct *task, long request,
282 if (task->ptrace) 278 if (task->ptrace)
283 goto unlock_tasklist; 279 goto unlock_tasklist;
284 280
285 task->ptrace = PT_PTRACED;
286 if (seize) 281 if (seize)
287 task->ptrace |= PT_SEIZED; 282 flags |= PT_SEIZED;
288 if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) 283 if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
289 task->ptrace |= PT_PTRACE_CAP; 284 flags |= PT_PTRACE_CAP;
285 task->ptrace = flags;
290 286
291 __ptrace_link(task, current); 287 __ptrace_link(task, current);
292 288
@@ -528,30 +524,18 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
528 524
529static int ptrace_setoptions(struct task_struct *child, unsigned long data) 525static int ptrace_setoptions(struct task_struct *child, unsigned long data)
530{ 526{
531 child->ptrace &= ~PT_TRACE_MASK; 527 unsigned flags;
532 528
533 if (data & PTRACE_O_TRACESYSGOOD) 529 if (data & ~(unsigned long)PTRACE_O_MASK)
534 child->ptrace |= PT_TRACESYSGOOD; 530 return -EINVAL;
535
536 if (data & PTRACE_O_TRACEFORK)
537 child->ptrace |= PT_TRACE_FORK;
538
539 if (data & PTRACE_O_TRACEVFORK)
540 child->ptrace |= PT_TRACE_VFORK;
541
542 if (data & PTRACE_O_TRACECLONE)
543 child->ptrace |= PT_TRACE_CLONE;
544
545 if (data & PTRACE_O_TRACEEXEC)
546 child->ptrace |= PT_TRACE_EXEC;
547
548 if (data & PTRACE_O_TRACEVFORKDONE)
549 child->ptrace |= PT_TRACE_VFORK_DONE;
550 531
551 if (data & PTRACE_O_TRACEEXIT) 532 /* Avoid intermediate state when all opts are cleared */
552 child->ptrace |= PT_TRACE_EXIT; 533 flags = child->ptrace;
534 flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT);
535 flags |= (data << PT_OPT_FLAG_SHIFT);
536 child->ptrace = flags;
553 537
554 return (data & ~PTRACE_O_MASK) ? -EINVAL : 0; 538 return 0;
555} 539}
556 540
557static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) 541static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info)
@@ -891,7 +875,7 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
891 } 875 }
892 876
893 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { 877 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
894 ret = ptrace_attach(child, request, data); 878 ret = ptrace_attach(child, request, addr, data);
895 /* 879 /*
896 * Some architectures need to do book-keeping after 880 * Some architectures need to do book-keeping after
897 * a ptrace attach. 881 * a ptrace attach.
@@ -1034,7 +1018,7 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
1034 } 1018 }
1035 1019
1036 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { 1020 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
1037 ret = ptrace_attach(child, request, data); 1021 ret = ptrace_attach(child, request, addr, data);
1038 /* 1022 /*
1039 * Some architectures need to do book-keeping after 1023 * Some architectures need to do book-keeping after
1040 * a ptrace attach. 1024 * a ptrace attach.
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index b152f74f02d..6850f53e02d 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -10,7 +10,6 @@
10#include <linux/export.h> 10#include <linux/export.h>
11#include <linux/rwsem.h> 11#include <linux/rwsem.h>
12 12
13#include <asm/system.h>
14#include <linux/atomic.h> 13#include <linux/atomic.h>
15 14
16/* 15/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 503d6426126..4603b9d8f30 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -73,6 +73,7 @@
73#include <linux/init_task.h> 73#include <linux/init_task.h>
74#include <linux/binfmts.h> 74#include <linux/binfmts.h>
75 75
76#include <asm/switch_to.h>
76#include <asm/tlb.h> 77#include <asm/tlb.h>
77#include <asm/irq_regs.h> 78#include <asm/irq_regs.h>
78#include <asm/mutex.h> 79#include <asm/mutex.h>
@@ -1264,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process);
1264 */ 1265 */
1265static int select_fallback_rq(int cpu, struct task_struct *p) 1266static int select_fallback_rq(int cpu, struct task_struct *p)
1266{ 1267{
1267 int dest_cpu;
1268 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); 1268 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
1269 enum { cpuset, possible, fail } state = cpuset;
1270 int dest_cpu;
1269 1271
1270 /* Look for allowed, online CPU in same node. */ 1272 /* Look for allowed, online CPU in same node. */
1271 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) 1273 for_each_cpu(dest_cpu, nodemask) {
1274 if (!cpu_online(dest_cpu))
1275 continue;
1276 if (!cpu_active(dest_cpu))
1277 continue;
1272 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) 1278 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
1273 return dest_cpu; 1279 return dest_cpu;
1280 }
1281
1282 for (;;) {
1283 /* Any allowed, online CPU? */
1284 for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
1285 if (!cpu_online(dest_cpu))
1286 continue;
1287 if (!cpu_active(dest_cpu))
1288 continue;
1289 goto out;
1290 }
1274 1291
1275 /* Any allowed, online CPU? */ 1292 switch (state) {
1276 dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); 1293 case cpuset:
1277 if (dest_cpu < nr_cpu_ids) 1294 /* No more Mr. Nice Guy. */
1278 return dest_cpu; 1295 cpuset_cpus_allowed_fallback(p);
1296 state = possible;
1297 break;
1279 1298
1280 /* No more Mr. Nice Guy. */ 1299 case possible:
1281 dest_cpu = cpuset_cpus_allowed_fallback(p); 1300 do_set_cpus_allowed(p, cpu_possible_mask);
1282 /* 1301 state = fail;
1283 * Don't tell them about moving exiting tasks or 1302 break;
1284 * kernel threads (both mm NULL), since they never 1303
1285 * leave kernel. 1304 case fail:
1286 */ 1305 BUG();
1287 if (p->mm && printk_ratelimit()) { 1306 break;
1288 printk_sched("process %d (%s) no longer affine to cpu%d\n", 1307 }
1289 task_pid_nr(p), p->comm, cpu); 1308 }
1309
1310out:
1311 if (state != cpuset) {
1312 /*
1313 * Don't tell them about moving exiting tasks or
1314 * kernel threads (both mm NULL), since they never
1315 * leave kernel.
1316 */
1317 if (p->mm && printk_ratelimit()) {
1318 printk_sched("process %d (%s) no longer affine to cpu%d\n",
1319 task_pid_nr(p), p->comm, cpu);
1320 }
1290 } 1321 }
1291 1322
1292 return dest_cpu; 1323 return dest_cpu;
@@ -1933,6 +1964,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1933 local_irq_enable(); 1964 local_irq_enable();
1934#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 1965#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
1935 finish_lock_switch(rq, prev); 1966 finish_lock_switch(rq, prev);
1967 finish_arch_post_lock_switch();
1936 1968
1937 fire_sched_in_preempt_notifiers(current); 1969 fire_sched_in_preempt_notifiers(current);
1938 if (mm) 1970 if (mm)
@@ -3070,8 +3102,6 @@ EXPORT_SYMBOL(sub_preempt_count);
3070 */ 3102 */
3071static noinline void __schedule_bug(struct task_struct *prev) 3103static noinline void __schedule_bug(struct task_struct *prev)
3072{ 3104{
3073 struct pt_regs *regs = get_irq_regs();
3074
3075 if (oops_in_progress) 3105 if (oops_in_progress)
3076 return; 3106 return;
3077 3107
@@ -3082,11 +3112,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
3082 print_modules(); 3112 print_modules();
3083 if (irqs_disabled()) 3113 if (irqs_disabled())
3084 print_irqtrace_events(prev); 3114 print_irqtrace_events(prev);
3085 3115 dump_stack();
3086 if (regs)
3087 show_regs(regs);
3088 else
3089 dump_stack();
3090} 3116}
3091 3117
3092/* 3118/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 94340c7544a..0d97ebdc58f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
416 416
417#endif /* CONFIG_FAIR_GROUP_SCHED */ 417#endif /* CONFIG_FAIR_GROUP_SCHED */
418 418
419static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, 419static __always_inline
420 unsigned long delta_exec); 420void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec);
421 421
422/************************************************************** 422/**************************************************************
423 * Scheduling class tree data structure manipulation methods: 423 * Scheduling class tree data structure manipulation methods:
@@ -1162,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
1162 __clear_buddies_skip(se); 1162 __clear_buddies_skip(se);
1163} 1163}
1164 1164
1165static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); 1165static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
1166 1166
1167static void 1167static void
1168dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) 1168dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -1546,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
1546 resched_task(rq_of(cfs_rq)->curr); 1546 resched_task(rq_of(cfs_rq)->curr);
1547} 1547}
1548 1548
1549static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, 1549static __always_inline
1550 unsigned long delta_exec) 1550void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec)
1551{ 1551{
1552 if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) 1552 if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
1553 return; 1553 return;
@@ -2073,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq)
2073} 2073}
2074 2074
2075#else /* CONFIG_CFS_BANDWIDTH */ 2075#else /* CONFIG_CFS_BANDWIDTH */
2076static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, 2076static __always_inline
2077 unsigned long delta_exec) {} 2077void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {}
2078static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} 2078static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
2079static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} 2079static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
2080static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} 2080static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
2081 2081
2082static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) 2082static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
2083{ 2083{
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b60dad72017..44af55e6d5d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1428,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
1428next_idx: 1428next_idx:
1429 if (idx >= MAX_RT_PRIO) 1429 if (idx >= MAX_RT_PRIO)
1430 continue; 1430 continue;
1431 if (next && next->prio < idx) 1431 if (next && next->prio <= idx)
1432 continue; 1432 continue;
1433 list_for_each_entry(rt_se, array->queue + idx, run_list) { 1433 list_for_each_entry(rt_se, array->queue + idx, run_list) {
1434 struct task_struct *p; 1434 struct task_struct *p;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 42b1f304b04..fb3acba4d52 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -681,6 +681,9 @@ static inline int task_running(struct rq *rq, struct task_struct *p)
681#ifndef finish_arch_switch 681#ifndef finish_arch_switch
682# define finish_arch_switch(prev) do { } while (0) 682# define finish_arch_switch(prev) do { } while (0)
683#endif 683#endif
684#ifndef finish_arch_post_lock_switch
685# define finish_arch_post_lock_switch() do { } while (0)
686#endif
684 687
685#ifndef __ARCH_WANT_UNLOCKED_CTXSW 688#ifndef __ARCH_WANT_UNLOCKED_CTXSW
686static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) 689static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
diff --git a/kernel/signal.c b/kernel/signal.c
index e76001ccf5c..17afcaf582d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -36,6 +36,7 @@
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37#include <asm/unistd.h> 37#include <asm/unistd.h>
38#include <asm/siginfo.h> 38#include <asm/siginfo.h>
39#include <asm/cacheflush.h>
39#include "audit.h" /* audit_signal_info() */ 40#include "audit.h" /* audit_signal_info() */
40 41
41/* 42/*
@@ -58,21 +59,20 @@ static int sig_handler_ignored(void __user *handler, int sig)
58 (handler == SIG_DFL && sig_kernel_ignore(sig)); 59 (handler == SIG_DFL && sig_kernel_ignore(sig));
59} 60}
60 61
61static int sig_task_ignored(struct task_struct *t, int sig, 62static int sig_task_ignored(struct task_struct *t, int sig, bool force)
62 int from_ancestor_ns)
63{ 63{
64 void __user *handler; 64 void __user *handler;
65 65
66 handler = sig_handler(t, sig); 66 handler = sig_handler(t, sig);
67 67
68 if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && 68 if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
69 handler == SIG_DFL && !from_ancestor_ns) 69 handler == SIG_DFL && !force)
70 return 1; 70 return 1;
71 71
72 return sig_handler_ignored(handler, sig); 72 return sig_handler_ignored(handler, sig);
73} 73}
74 74
75static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) 75static int sig_ignored(struct task_struct *t, int sig, bool force)
76{ 76{
77 /* 77 /*
78 * Blocked signals are never ignored, since the 78 * Blocked signals are never ignored, since the
@@ -82,7 +82,7 @@ static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns)
82 if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) 82 if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
83 return 0; 83 return 0;
84 84
85 if (!sig_task_ignored(t, sig, from_ancestor_ns)) 85 if (!sig_task_ignored(t, sig, force))
86 return 0; 86 return 0;
87 87
88 /* 88 /*
@@ -855,7 +855,7 @@ static void ptrace_trap_notify(struct task_struct *t)
855 * Returns true if the signal should be actually delivered, otherwise 855 * Returns true if the signal should be actually delivered, otherwise
856 * it should be dropped. 856 * it should be dropped.
857 */ 857 */
858static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) 858static int prepare_signal(int sig, struct task_struct *p, bool force)
859{ 859{
860 struct signal_struct *signal = p->signal; 860 struct signal_struct *signal = p->signal;
861 struct task_struct *t; 861 struct task_struct *t;
@@ -915,7 +915,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
915 } 915 }
916 } 916 }
917 917
918 return !sig_ignored(p, sig, from_ancestor_ns); 918 return !sig_ignored(p, sig, force);
919} 919}
920 920
921/* 921/*
@@ -1059,7 +1059,8 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
1059 assert_spin_locked(&t->sighand->siglock); 1059 assert_spin_locked(&t->sighand->siglock);
1060 1060
1061 result = TRACE_SIGNAL_IGNORED; 1061 result = TRACE_SIGNAL_IGNORED;
1062 if (!prepare_signal(sig, t, from_ancestor_ns)) 1062 if (!prepare_signal(sig, t,
1063 from_ancestor_ns || (info == SEND_SIG_FORCED)))
1063 goto ret; 1064 goto ret;
1064 1065
1065 pending = group ? &t->signal->shared_pending : &t->pending; 1066 pending = group ? &t->signal->shared_pending : &t->pending;
@@ -1601,7 +1602,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1601 1602
1602 ret = 1; /* the signal is ignored */ 1603 ret = 1; /* the signal is ignored */
1603 result = TRACE_SIGNAL_IGNORED; 1604 result = TRACE_SIGNAL_IGNORED;
1604 if (!prepare_signal(sig, t, 0)) 1605 if (!prepare_signal(sig, t, false))
1605 goto out; 1606 goto out;
1606 1607
1607 ret = 0; 1608 ret = 0;
diff --git a/kernel/smp.c b/kernel/smp.c
index db197d60489..2f8b10ecf75 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -701,3 +701,93 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
701 return ret; 701 return ret;
702} 702}
703EXPORT_SYMBOL(on_each_cpu); 703EXPORT_SYMBOL(on_each_cpu);
704
705/**
706 * on_each_cpu_mask(): Run a function on processors specified by
707 * cpumask, which may include the local processor.
708 * @mask: The set of cpus to run on (only runs on online subset).
709 * @func: The function to run. This must be fast and non-blocking.
710 * @info: An arbitrary pointer to pass to the function.
711 * @wait: If true, wait (atomically) until function has completed
712 * on other CPUs.
713 *
714 * If @wait is true, then returns once @func has returned.
715 *
716 * You must not call this function with disabled interrupts or
717 * from a hardware interrupt handler or from a bottom half handler.
718 */
719void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
720 void *info, bool wait)
721{
722 int cpu = get_cpu();
723
724 smp_call_function_many(mask, func, info, wait);
725 if (cpumask_test_cpu(cpu, mask)) {
726 local_irq_disable();
727 func(info);
728 local_irq_enable();
729 }
730 put_cpu();
731}
732EXPORT_SYMBOL(on_each_cpu_mask);
733
734/*
735 * on_each_cpu_cond(): Call a function on each processor for which
736 * the supplied function cond_func returns true, optionally waiting
737 * for all the required CPUs to finish. This may include the local
738 * processor.
739 * @cond_func: A callback function that is passed a cpu id and
740 * the the info parameter. The function is called
741 * with preemption disabled. The function should
742 * return a blooean value indicating whether to IPI
743 * the specified CPU.
744 * @func: The function to run on all applicable CPUs.
745 * This must be fast and non-blocking.
746 * @info: An arbitrary pointer to pass to both functions.
747 * @wait: If true, wait (atomically) until function has
748 * completed on other CPUs.
749 * @gfp_flags: GFP flags to use when allocating the cpumask
750 * used internally by the function.
751 *
752 * The function might sleep if the GFP flags indicates a non
753 * atomic allocation is allowed.
754 *
755 * Preemption is disabled to protect against CPUs going offline but not online.
756 * CPUs going online during the call will not be seen or sent an IPI.
757 *
758 * You must not call this function with disabled interrupts or
759 * from a hardware interrupt handler or from a bottom half handler.
760 */
761void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
762 smp_call_func_t func, void *info, bool wait,
763 gfp_t gfp_flags)
764{
765 cpumask_var_t cpus;
766 int cpu, ret;
767
768 might_sleep_if(gfp_flags & __GFP_WAIT);
769
770 if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
771 preempt_disable();
772 for_each_online_cpu(cpu)
773 if (cond_func(cpu, info))
774 cpumask_set_cpu(cpu, cpus);
775 on_each_cpu_mask(cpus, func, info, wait);
776 preempt_enable();
777 free_cpumask_var(cpus);
778 } else {
779 /*
780 * No free cpumask, bother. No matter, we'll
781 * just have to IPI them one by one.
782 */
783 preempt_disable();
784 for_each_online_cpu(cpu)
785 if (cond_func(cpu, info)) {
786 ret = smp_call_function_single(cpu, func,
787 info, wait);
788 WARN_ON_ONCE(!ret);
789 }
790 preempt_enable();
791 }
792}
793EXPORT_SYMBOL(on_each_cpu_cond);
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 84c7d96918b..5cdd8065a3c 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -163,7 +163,7 @@ void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
163EXPORT_SYMBOL(_raw_spin_lock_bh); 163EXPORT_SYMBOL(_raw_spin_lock_bh);
164#endif 164#endif
165 165
166#ifndef CONFIG_INLINE_SPIN_UNLOCK 166#ifdef CONFIG_UNINLINE_SPIN_UNLOCK
167void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) 167void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
168{ 168{
169 __raw_spin_unlock(lock); 169 __raw_spin_unlock(lock);
diff --git a/kernel/sys.c b/kernel/sys.c
index 9eb7fcab8df..e7006eb6c1e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -444,6 +444,15 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
444 magic2 != LINUX_REBOOT_MAGIC2C)) 444 magic2 != LINUX_REBOOT_MAGIC2C))
445 return -EINVAL; 445 return -EINVAL;
446 446
447 /*
448 * If pid namespaces are enabled and the current task is in a child
449 * pid_namespace, the command is handled by reboot_pid_ns() which will
450 * call do_exit().
451 */
452 ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
453 if (ret)
454 return ret;
455
447 /* Instead of trying to make the power_off code look like 456 /* Instead of trying to make the power_off code look like
448 * halt when pm_power_off is not set do it the easy way. 457 * halt when pm_power_off is not set do it the easy way.
449 */ 458 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 11d53046b90..4ab11879aeb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -23,6 +23,7 @@
23#include <linux/swap.h> 23#include <linux/swap.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/sysctl.h> 25#include <linux/sysctl.h>
26#include <linux/bitmap.h>
26#include <linux/signal.h> 27#include <linux/signal.h>
27#include <linux/printk.h> 28#include <linux/printk.h>
28#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
@@ -68,6 +69,9 @@
68#include <asm/stacktrace.h> 69#include <asm/stacktrace.h>
69#include <asm/io.h> 70#include <asm/io.h>
70#endif 71#endif
72#ifdef CONFIG_SPARC
73#include <asm/setup.h>
74#endif
71#ifdef CONFIG_BSD_PROCESS_ACCT 75#ifdef CONFIG_BSD_PROCESS_ACCT
72#include <linux/acct.h> 76#include <linux/acct.h>
73#endif 77#endif
@@ -142,7 +146,6 @@ static const int cap_last_cap = CAP_LAST_CAP;
142#include <linux/inotify.h> 146#include <linux/inotify.h>
143#endif 147#endif
144#ifdef CONFIG_SPARC 148#ifdef CONFIG_SPARC
145#include <asm/system.h>
146#endif 149#endif
147 150
148#ifdef CONFIG_SPARC64 151#ifdef CONFIG_SPARC64
@@ -167,7 +170,7 @@ static int proc_taint(struct ctl_table *table, int write,
167#endif 170#endif
168 171
169#ifdef CONFIG_PRINTK 172#ifdef CONFIG_PRINTK
170static int proc_dmesg_restrict(struct ctl_table *table, int write, 173static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
171 void __user *buffer, size_t *lenp, loff_t *ppos); 174 void __user *buffer, size_t *lenp, loff_t *ppos);
172#endif 175#endif
173 176
@@ -193,20 +196,6 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
193 196
194#endif 197#endif
195 198
196static struct ctl_table root_table[];
197static struct ctl_table_root sysctl_table_root;
198static struct ctl_table_header root_table_header = {
199 {{.count = 1,
200 .ctl_table = root_table,
201 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
202 .root = &sysctl_table_root,
203 .set = &sysctl_table_root.default_set,
204};
205static struct ctl_table_root sysctl_table_root = {
206 .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
207 .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
208};
209
210static struct ctl_table kern_table[]; 199static struct ctl_table kern_table[];
211static struct ctl_table vm_table[]; 200static struct ctl_table vm_table[];
212static struct ctl_table fs_table[]; 201static struct ctl_table fs_table[];
@@ -223,7 +212,7 @@ int sysctl_legacy_va_layout;
223 212
224/* The default sysctl tables: */ 213/* The default sysctl tables: */
225 214
226static struct ctl_table root_table[] = { 215static struct ctl_table sysctl_base_table[] = {
227 { 216 {
228 .procname = "kernel", 217 .procname = "kernel",
229 .mode = 0555, 218 .mode = 0555,
@@ -714,7 +703,7 @@ static struct ctl_table kern_table[] = {
714 .data = &dmesg_restrict, 703 .data = &dmesg_restrict,
715 .maxlen = sizeof(int), 704 .maxlen = sizeof(int),
716 .mode = 0644, 705 .mode = 0644,
717 .proc_handler = proc_dointvec_minmax, 706 .proc_handler = proc_dointvec_minmax_sysadmin,
718 .extra1 = &zero, 707 .extra1 = &zero,
719 .extra2 = &one, 708 .extra2 = &one,
720 }, 709 },
@@ -723,7 +712,7 @@ static struct ctl_table kern_table[] = {
723 .data = &kptr_restrict, 712 .data = &kptr_restrict,
724 .maxlen = sizeof(int), 713 .maxlen = sizeof(int),
725 .mode = 0644, 714 .mode = 0644,
726 .proc_handler = proc_dmesg_restrict, 715 .proc_handler = proc_dointvec_minmax_sysadmin,
727 .extra1 = &zero, 716 .extra1 = &zero,
728 .extra2 = &two, 717 .extra2 = &two,
729 }, 718 },
@@ -1560,490 +1549,12 @@ static struct ctl_table dev_table[] = {
1560 { } 1549 { }
1561}; 1550};
1562 1551
1563static DEFINE_SPINLOCK(sysctl_lock); 1552int __init sysctl_init(void)
1564
1565/* called under sysctl_lock */
1566static int use_table(struct ctl_table_header *p)
1567{
1568 if (unlikely(p->unregistering))
1569 return 0;
1570 p->used++;
1571 return 1;
1572}
1573
1574/* called under sysctl_lock */
1575static void unuse_table(struct ctl_table_header *p)
1576{
1577 if (!--p->used)
1578 if (unlikely(p->unregistering))
1579 complete(p->unregistering);
1580}
1581
1582/* called under sysctl_lock, will reacquire if has to wait */
1583static void start_unregistering(struct ctl_table_header *p)
1584{
1585 /*
1586 * if p->used is 0, nobody will ever touch that entry again;
1587 * we'll eliminate all paths to it before dropping sysctl_lock
1588 */
1589 if (unlikely(p->used)) {
1590 struct completion wait;
1591 init_completion(&wait);
1592 p->unregistering = &wait;
1593 spin_unlock(&sysctl_lock);
1594 wait_for_completion(&wait);
1595 spin_lock(&sysctl_lock);
1596 } else {
1597 /* anything non-NULL; we'll never dereference it */
1598 p->unregistering = ERR_PTR(-EINVAL);
1599 }
1600 /*
1601 * do not remove from the list until nobody holds it; walking the
1602 * list in do_sysctl() relies on that.
1603 */
1604 list_del_init(&p->ctl_entry);
1605}
1606
1607void sysctl_head_get(struct ctl_table_header *head)
1608{
1609 spin_lock(&sysctl_lock);
1610 head->count++;
1611 spin_unlock(&sysctl_lock);
1612}
1613
1614void sysctl_head_put(struct ctl_table_header *head)
1615{
1616 spin_lock(&sysctl_lock);
1617 if (!--head->count)
1618 kfree_rcu(head, rcu);
1619 spin_unlock(&sysctl_lock);
1620}
1621
1622struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1623{
1624 if (!head)
1625 BUG();
1626 spin_lock(&sysctl_lock);
1627 if (!use_table(head))
1628 head = ERR_PTR(-ENOENT);
1629 spin_unlock(&sysctl_lock);
1630 return head;
1631}
1632
1633void sysctl_head_finish(struct ctl_table_header *head)
1634{
1635 if (!head)
1636 return;
1637 spin_lock(&sysctl_lock);
1638 unuse_table(head);
1639 spin_unlock(&sysctl_lock);
1640}
1641
1642static struct ctl_table_set *
1643lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1644{
1645 struct ctl_table_set *set = &root->default_set;
1646 if (root->lookup)
1647 set = root->lookup(root, namespaces);
1648 return set;
1649}
1650
1651static struct list_head *
1652lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1653{
1654 struct ctl_table_set *set = lookup_header_set(root, namespaces);
1655 return &set->list;
1656}
1657
1658struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1659 struct ctl_table_header *prev)
1660{ 1553{
1661 struct ctl_table_root *root; 1554 register_sysctl_table(sysctl_base_table);
1662 struct list_head *header_list;
1663 struct ctl_table_header *head;
1664 struct list_head *tmp;
1665
1666 spin_lock(&sysctl_lock);
1667 if (prev) {
1668 head = prev;
1669 tmp = &prev->ctl_entry;
1670 unuse_table(prev);
1671 goto next;
1672 }
1673 tmp = &root_table_header.ctl_entry;
1674 for (;;) {
1675 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1676
1677 if (!use_table(head))
1678 goto next;
1679 spin_unlock(&sysctl_lock);
1680 return head;
1681 next:
1682 root = head->root;
1683 tmp = tmp->next;
1684 header_list = lookup_header_list(root, namespaces);
1685 if (tmp != header_list)
1686 continue;
1687
1688 do {
1689 root = list_entry(root->root_list.next,
1690 struct ctl_table_root, root_list);
1691 if (root == &sysctl_table_root)
1692 goto out;
1693 header_list = lookup_header_list(root, namespaces);
1694 } while (list_empty(header_list));
1695 tmp = header_list->next;
1696 }
1697out:
1698 spin_unlock(&sysctl_lock);
1699 return NULL;
1700}
1701
1702struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1703{
1704 return __sysctl_head_next(current->nsproxy, prev);
1705}
1706
1707void register_sysctl_root(struct ctl_table_root *root)
1708{
1709 spin_lock(&sysctl_lock);
1710 list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1711 spin_unlock(&sysctl_lock);
1712}
1713
1714/*
1715 * sysctl_perm does NOT grant the superuser all rights automatically, because
1716 * some sysctl variables are readonly even to root.
1717 */
1718
1719static int test_perm(int mode, int op)
1720{
1721 if (!current_euid())
1722 mode >>= 6;
1723 else if (in_egroup_p(0))
1724 mode >>= 3;
1725 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1726 return 0;
1727 return -EACCES;
1728}
1729
1730int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1731{
1732 int mode;
1733
1734 if (root->permissions)
1735 mode = root->permissions(root, current->nsproxy, table);
1736 else
1737 mode = table->mode;
1738
1739 return test_perm(mode, op);
1740}
1741
1742static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1743{
1744 for (; table->procname; table++) {
1745 table->parent = parent;
1746 if (table->child)
1747 sysctl_set_parent(table, table->child);
1748 }
1749}
1750
1751static __init int sysctl_init(void)
1752{
1753 sysctl_set_parent(NULL, root_table);
1754#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1755 sysctl_check_table(current->nsproxy, root_table);
1756#endif
1757 return 0; 1555 return 0;
1758} 1556}
1759 1557
1760core_initcall(sysctl_init);
1761
1762static struct ctl_table *is_branch_in(struct ctl_table *branch,
1763 struct ctl_table *table)
1764{
1765 struct ctl_table *p;
1766 const char *s = branch->procname;
1767
1768 /* branch should have named subdirectory as its first element */
1769 if (!s || !branch->child)
1770 return NULL;
1771
1772 /* ... and nothing else */
1773 if (branch[1].procname)
1774 return NULL;
1775
1776 /* table should contain subdirectory with the same name */
1777 for (p = table; p->procname; p++) {
1778 if (!p->child)
1779 continue;
1780 if (p->procname && strcmp(p->procname, s) == 0)
1781 return p;
1782 }
1783 return NULL;
1784}
1785
1786/* see if attaching q to p would be an improvement */
1787static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1788{
1789 struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1790 struct ctl_table *next;
1791 int is_better = 0;
1792 int not_in_parent = !p->attached_by;
1793
1794 while ((next = is_branch_in(by, to)) != NULL) {
1795 if (by == q->attached_by)
1796 is_better = 1;
1797 if (to == p->attached_by)
1798 not_in_parent = 1;
1799 by = by->child;
1800 to = next->child;
1801 }
1802
1803 if (is_better && not_in_parent) {
1804 q->attached_by = by;
1805 q->attached_to = to;
1806 q->parent = p;
1807 }
1808}
1809
1810/**
1811 * __register_sysctl_paths - register a sysctl hierarchy
1812 * @root: List of sysctl headers to register on
1813 * @namespaces: Data to compute which lists of sysctl entries are visible
1814 * @path: The path to the directory the sysctl table is in.
1815 * @table: the top-level table structure
1816 *
1817 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1818 * array. A completely 0 filled entry terminates the table.
1819 *
1820 * The members of the &struct ctl_table structure are used as follows:
1821 *
1822 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1823 * enter a sysctl file
1824 *
1825 * data - a pointer to data for use by proc_handler
1826 *
1827 * maxlen - the maximum size in bytes of the data
1828 *
1829 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1830 *
1831 * child - a pointer to the child sysctl table if this entry is a directory, or
1832 * %NULL.
1833 *
1834 * proc_handler - the text handler routine (described below)
1835 *
1836 * de - for internal use by the sysctl routines
1837 *
1838 * extra1, extra2 - extra pointers usable by the proc handler routines
1839 *
1840 * Leaf nodes in the sysctl tree will be represented by a single file
1841 * under /proc; non-leaf nodes will be represented by directories.
1842 *
1843 * sysctl(2) can automatically manage read and write requests through
1844 * the sysctl table. The data and maxlen fields of the ctl_table
1845 * struct enable minimal validation of the values being written to be
1846 * performed, and the mode field allows minimal authentication.
1847 *
1848 * There must be a proc_handler routine for any terminal nodes
1849 * mirrored under /proc/sys (non-terminals are handled by a built-in
1850 * directory handler). Several default handlers are available to
1851 * cover common cases -
1852 *
1853 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1854 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1855 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1856 *
1857 * It is the handler's job to read the input buffer from user memory
1858 * and process it. The handler should return 0 on success.
1859 *
1860 * This routine returns %NULL on a failure to register, and a pointer
1861 * to the table header on success.
1862 */
1863struct ctl_table_header *__register_sysctl_paths(
1864 struct ctl_table_root *root,
1865 struct nsproxy *namespaces,
1866 const struct ctl_path *path, struct ctl_table *table)
1867{
1868 struct ctl_table_header *header;
1869 struct ctl_table *new, **prevp;
1870 unsigned int n, npath;
1871 struct ctl_table_set *set;
1872
1873 /* Count the path components */
1874 for (npath = 0; path[npath].procname; ++npath)
1875 ;
1876
1877 /*
1878 * For each path component, allocate a 2-element ctl_table array.
1879 * The first array element will be filled with the sysctl entry
1880 * for this, the second will be the sentinel (procname == 0).
1881 *
1882 * We allocate everything in one go so that we don't have to
1883 * worry about freeing additional memory in unregister_sysctl_table.
1884 */
1885 header = kzalloc(sizeof(struct ctl_table_header) +
1886 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1887 if (!header)
1888 return NULL;
1889
1890 new = (struct ctl_table *) (header + 1);
1891
1892 /* Now connect the dots */
1893 prevp = &header->ctl_table;
1894 for (n = 0; n < npath; ++n, ++path) {
1895 /* Copy the procname */
1896 new->procname = path->procname;
1897 new->mode = 0555;
1898
1899 *prevp = new;
1900 prevp = &new->child;
1901
1902 new += 2;
1903 }
1904 *prevp = table;
1905 header->ctl_table_arg = table;
1906
1907 INIT_LIST_HEAD(&header->ctl_entry);
1908 header->used = 0;
1909 header->unregistering = NULL;
1910 header->root = root;
1911 sysctl_set_parent(NULL, header->ctl_table);
1912 header->count = 1;
1913#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1914 if (sysctl_check_table(namespaces, header->ctl_table)) {
1915 kfree(header);
1916 return NULL;
1917 }
1918#endif
1919 spin_lock(&sysctl_lock);
1920 header->set = lookup_header_set(root, namespaces);
1921 header->attached_by = header->ctl_table;
1922 header->attached_to = root_table;
1923 header->parent = &root_table_header;
1924 for (set = header->set; set; set = set->parent) {
1925 struct ctl_table_header *p;
1926 list_for_each_entry(p, &set->list, ctl_entry) {
1927 if (p->unregistering)
1928 continue;
1929 try_attach(p, header);
1930 }
1931 }
1932 header->parent->count++;
1933 list_add_tail(&header->ctl_entry, &header->set->list);
1934 spin_unlock(&sysctl_lock);
1935
1936 return header;
1937}
1938
1939/**
1940 * register_sysctl_table_path - register a sysctl table hierarchy
1941 * @path: The path to the directory the sysctl table is in.
1942 * @table: the top-level table structure
1943 *
1944 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1945 * array. A completely 0 filled entry terminates the table.
1946 *
1947 * See __register_sysctl_paths for more details.
1948 */
1949struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1950 struct ctl_table *table)
1951{
1952 return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1953 path, table);
1954}
1955
1956/**
1957 * register_sysctl_table - register a sysctl table hierarchy
1958 * @table: the top-level table structure
1959 *
1960 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1961 * array. A completely 0 filled entry terminates the table.
1962 *
1963 * See register_sysctl_paths for more details.
1964 */
1965struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1966{
1967 static const struct ctl_path null_path[] = { {} };
1968
1969 return register_sysctl_paths(null_path, table);
1970}
1971
1972/**
1973 * unregister_sysctl_table - unregister a sysctl table hierarchy
1974 * @header: the header returned from register_sysctl_table
1975 *
1976 * Unregisters the sysctl table and all children. proc entries may not
1977 * actually be removed until they are no longer used by anyone.
1978 */
1979void unregister_sysctl_table(struct ctl_table_header * header)
1980{
1981 might_sleep();
1982
1983 if (header == NULL)
1984 return;
1985
1986 spin_lock(&sysctl_lock);
1987 start_unregistering(header);
1988 if (!--header->parent->count) {
1989 WARN_ON(1);
1990 kfree_rcu(header->parent, rcu);
1991 }
1992 if (!--header->count)
1993 kfree_rcu(header, rcu);
1994 spin_unlock(&sysctl_lock);
1995}
1996
1997int sysctl_is_seen(struct ctl_table_header *p)
1998{
1999 struct ctl_table_set *set = p->set;
2000 int res;
2001 spin_lock(&sysctl_lock);
2002 if (p->unregistering)
2003 res = 0;
2004 else if (!set->is_seen)
2005 res = 1;
2006 else
2007 res = set->is_seen(set);
2008 spin_unlock(&sysctl_lock);
2009 return res;
2010}
2011
2012void setup_sysctl_set(struct ctl_table_set *p,
2013 struct ctl_table_set *parent,
2014 int (*is_seen)(struct ctl_table_set *))
2015{
2016 INIT_LIST_HEAD(&p->list);
2017 p->parent = parent ? parent : &sysctl_table_root.default_set;
2018 p->is_seen = is_seen;
2019}
2020
2021#else /* !CONFIG_SYSCTL */
2022struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2023{
2024 return NULL;
2025}
2026
2027struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2028 struct ctl_table *table)
2029{
2030 return NULL;
2031}
2032
2033void unregister_sysctl_table(struct ctl_table_header * table)
2034{
2035}
2036
2037void setup_sysctl_set(struct ctl_table_set *p,
2038 struct ctl_table_set *parent,
2039 int (*is_seen)(struct ctl_table_set *))
2040{
2041}
2042
2043void sysctl_head_put(struct ctl_table_header *head)
2044{
2045}
2046
2047#endif /* CONFIG_SYSCTL */ 1558#endif /* CONFIG_SYSCTL */
2048 1559
2049/* 1560/*
@@ -2432,7 +1943,7 @@ static int proc_taint(struct ctl_table *table, int write,
2432} 1943}
2433 1944
2434#ifdef CONFIG_PRINTK 1945#ifdef CONFIG_PRINTK
2435static int proc_dmesg_restrict(struct ctl_table *table, int write, 1946static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2436 void __user *buffer, size_t *lenp, loff_t *ppos) 1947 void __user *buffer, size_t *lenp, loff_t *ppos)
2437{ 1948{
2438 if (write && !capable(CAP_SYS_ADMIN)) 1949 if (write && !capable(CAP_SYS_ADMIN))
@@ -2885,9 +2396,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
2885 } 2396 }
2886 } 2397 }
2887 2398
2888 while (val_a <= val_b) 2399 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
2889 set_bit(val_a++, tmp_bitmap);
2890
2891 first = 0; 2400 first = 0;
2892 proc_skip_char(&kbuf, &left, '\n'); 2401 proc_skip_char(&kbuf, &left, '\n');
2893 } 2402 }
@@ -2930,8 +2439,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
2930 if (*ppos) 2439 if (*ppos)
2931 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); 2440 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2932 else 2441 else
2933 memcpy(bitmap, tmp_bitmap, 2442 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
2934 BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2935 } 2443 }
2936 kfree(tmp_bitmap); 2444 kfree(tmp_bitmap);
2937 *lenp -= left; 2445 *lenp -= left;
@@ -3009,6 +2517,3 @@ EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3009EXPORT_SYMBOL(proc_dostring); 2517EXPORT_SYMBOL(proc_dostring);
3010EXPORT_SYMBOL(proc_doulongvec_minmax); 2518EXPORT_SYMBOL(proc_doulongvec_minmax);
3011EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); 2519EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3012EXPORT_SYMBOL(register_sysctl_table);
3013EXPORT_SYMBOL(register_sysctl_paths);
3014EXPORT_SYMBOL(unregister_sysctl_table);
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
deleted file mode 100644
index 362da653813..00000000000
--- a/kernel/sysctl_check.c
+++ /dev/null
@@ -1,160 +0,0 @@
1#include <linux/stat.h>
2#include <linux/sysctl.h>
3#include "../fs/xfs/xfs_sysctl.h"
4#include <linux/sunrpc/debug.h>
5#include <linux/string.h>
6#include <net/ip_vs.h>
7
8
9static int sysctl_depth(struct ctl_table *table)
10{
11 struct ctl_table *tmp;
12 int depth;
13
14 depth = 0;
15 for (tmp = table; tmp->parent; tmp = tmp->parent)
16 depth++;
17
18 return depth;
19}
20
21static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
22{
23 int i;
24
25 for (i = 0; table && i < n; i++)
26 table = table->parent;
27
28 return table;
29}
30
31
32static void sysctl_print_path(struct ctl_table *table)
33{
34 struct ctl_table *tmp;
35 int depth, i;
36 depth = sysctl_depth(table);
37 if (table->procname) {
38 for (i = depth; i >= 0; i--) {
39 tmp = sysctl_parent(table, i);
40 printk("/%s", tmp->procname?tmp->procname:"");
41 }
42 }
43 printk(" ");
44}
45
46static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
47 struct ctl_table *table)
48{
49 struct ctl_table_header *head;
50 struct ctl_table *ref, *test;
51 int depth, cur_depth;
52
53 depth = sysctl_depth(table);
54
55 for (head = __sysctl_head_next(namespaces, NULL); head;
56 head = __sysctl_head_next(namespaces, head)) {
57 cur_depth = depth;
58 ref = head->ctl_table;
59repeat:
60 test = sysctl_parent(table, cur_depth);
61 for (; ref->procname; ref++) {
62 int match = 0;
63 if (cur_depth && !ref->child)
64 continue;
65
66 if (test->procname && ref->procname &&
67 (strcmp(test->procname, ref->procname) == 0))
68 match++;
69
70 if (match) {
71 if (cur_depth != 0) {
72 cur_depth--;
73 ref = ref->child;
74 goto repeat;
75 }
76 goto out;
77 }
78 }
79 }
80 ref = NULL;
81out:
82 sysctl_head_finish(head);
83 return ref;
84}
85
86static void set_fail(const char **fail, struct ctl_table *table, const char *str)
87{
88 if (*fail) {
89 printk(KERN_ERR "sysctl table check failed: ");
90 sysctl_print_path(table);
91 printk(" %s\n", *fail);
92 dump_stack();
93 }
94 *fail = str;
95}
96
97static void sysctl_check_leaf(struct nsproxy *namespaces,
98 struct ctl_table *table, const char **fail)
99{
100 struct ctl_table *ref;
101
102 ref = sysctl_check_lookup(namespaces, table);
103 if (ref && (ref != table))
104 set_fail(fail, table, "Sysctl already exists");
105}
106
107int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
108{
109 int error = 0;
110 for (; table->procname; table++) {
111 const char *fail = NULL;
112
113 if (table->parent) {
114 if (!table->parent->procname)
115 set_fail(&fail, table, "Parent without procname");
116 }
117 if (table->child) {
118 if (table->data)
119 set_fail(&fail, table, "Directory with data?");
120 if (table->maxlen)
121 set_fail(&fail, table, "Directory with maxlen?");
122 if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
123 set_fail(&fail, table, "Writable sysctl directory");
124 if (table->proc_handler)
125 set_fail(&fail, table, "Directory with proc_handler");
126 if (table->extra1)
127 set_fail(&fail, table, "Directory with extra1");
128 if (table->extra2)
129 set_fail(&fail, table, "Directory with extra2");
130 } else {
131 if ((table->proc_handler == proc_dostring) ||
132 (table->proc_handler == proc_dointvec) ||
133 (table->proc_handler == proc_dointvec_minmax) ||
134 (table->proc_handler == proc_dointvec_jiffies) ||
135 (table->proc_handler == proc_dointvec_userhz_jiffies) ||
136 (table->proc_handler == proc_dointvec_ms_jiffies) ||
137 (table->proc_handler == proc_doulongvec_minmax) ||
138 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
139 if (!table->data)
140 set_fail(&fail, table, "No data");
141 if (!table->maxlen)
142 set_fail(&fail, table, "No maxlen");
143 }
144#ifdef CONFIG_PROC_SYSCTL
145 if (!table->proc_handler)
146 set_fail(&fail, table, "No proc_handler");
147#endif
148 sysctl_check_leaf(namespaces, table, &fail);
149 }
150 if (table->mode > 0777)
151 set_fail(&fail, table, "bogus .mode");
152 if (fail) {
153 set_fail(&fail, table, NULL);
154 error = -EINVAL;
155 }
156 if (table->child)
157 error |= sysctl_check_table(namespaces, table->child);
158 }
159 return error;
160}
diff --git a/kernel/time.c b/kernel/time.c
index 73e416db0a1..ba744cf8069 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -163,7 +163,6 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
163 return error; 163 return error;
164 164
165 if (tz) { 165 if (tz) {
166 /* SMP safe, global irq locking makes it work. */
167 sys_tz = *tz; 166 sys_tz = *tz;
168 update_vsyscall_tz(); 167 update_vsyscall_tz();
169 if (firsttime) { 168 if (firsttime) {
@@ -173,12 +172,7 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
173 } 172 }
174 } 173 }
175 if (tv) 174 if (tv)
176 {
177 /* SMP safe, again the code in arch/foo/time.c should
178 * globally block out interrupts when it runs.
179 */
180 return do_settimeofday(tv); 175 return do_settimeofday(tv);
181 }
182 return 0; 176 return 0;
183} 177}
184 178
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 2cf9cc7aa10..a20dc8a3c94 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -1,6 +1,10 @@
1# 1#
2# Timer subsystem related configuration options 2# Timer subsystem related configuration options
3# 3#
4
5# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is
6# only related to the tick functionality. Oneshot clockevent devices
7# are supported independ of this.
4config TICK_ONESHOT 8config TICK_ONESHOT
5 bool 9 bool
6 10
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 8a46f5d6450..8a538c55fc7 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -96,6 +96,11 @@ static int alarmtimer_rtc_add_device(struct device *dev,
96 return 0; 96 return 0;
97} 97}
98 98
99static inline void alarmtimer_rtc_timer_init(void)
100{
101 rtc_timer_init(&rtctimer, NULL, NULL);
102}
103
99static struct class_interface alarmtimer_rtc_interface = { 104static struct class_interface alarmtimer_rtc_interface = {
100 .add_dev = &alarmtimer_rtc_add_device, 105 .add_dev = &alarmtimer_rtc_add_device,
101}; 106};
@@ -117,6 +122,7 @@ static inline struct rtc_device *alarmtimer_get_rtcdev(void)
117#define rtcdev (NULL) 122#define rtcdev (NULL)
118static inline int alarmtimer_rtc_interface_setup(void) { return 0; } 123static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
119static inline void alarmtimer_rtc_interface_remove(void) { } 124static inline void alarmtimer_rtc_interface_remove(void) { }
125static inline void alarmtimer_rtc_timer_init(void) { }
120#endif 126#endif
121 127
122/** 128/**
@@ -783,6 +789,8 @@ static int __init alarmtimer_init(void)
783 .nsleep = alarm_timer_nsleep, 789 .nsleep = alarm_timer_nsleep,
784 }; 790 };
785 791
792 alarmtimer_rtc_timer_init();
793
786 posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); 794 posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
787 posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); 795 posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
788 796
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index a45ca167ab2..c9583382141 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -500,7 +500,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
500{ 500{
501 u64 ret; 501 u64 ret;
502 /* 502 /*
503 * We won't try to correct for more then 11% adjustments (110,000 ppm), 503 * We won't try to correct for more than 11% adjustments (110,000 ppm),
504 */ 504 */
505 ret = (u64)cs->mult * 11; 505 ret = (u64)cs->mult * 11;
506 do_div(ret,100); 506 do_div(ret,100);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 6e039b144da..f03fd83b170 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -34,8 +34,6 @@ unsigned long tick_nsec;
34static u64 tick_length; 34static u64 tick_length;
35static u64 tick_length_base; 35static u64 tick_length_base;
36 36
37static struct hrtimer leap_timer;
38
39#define MAX_TICKADJ 500LL /* usecs */ 37#define MAX_TICKADJ 500LL /* usecs */
40#define MAX_TICKADJ_SCALED \ 38#define MAX_TICKADJ_SCALED \
41 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) 39 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -381,70 +379,63 @@ u64 ntp_tick_length(void)
381 379
382 380
383/* 381/*
384 * Leap second processing. If in leap-insert state at the end of the 382 * this routine handles the overflow of the microsecond field
385 * day, the system clock is set back one second; if in leap-delete 383 *
386 * state, the system clock is set ahead one second. 384 * The tricky bits of code to handle the accurate clock support
385 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
386 * They were originally developed for SUN and DEC kernels.
387 * All the kudos should go to Dave for this stuff.
388 *
389 * Also handles leap second processing, and returns leap offset
387 */ 390 */
388static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) 391int second_overflow(unsigned long secs)
389{ 392{
390 enum hrtimer_restart res = HRTIMER_NORESTART; 393 s64 delta;
391 unsigned long flags;
392 int leap = 0; 394 int leap = 0;
395 unsigned long flags;
393 396
394 spin_lock_irqsave(&ntp_lock, flags); 397 spin_lock_irqsave(&ntp_lock, flags);
398
399 /*
400 * Leap second processing. If in leap-insert state at the end of the
401 * day, the system clock is set back one second; if in leap-delete
402 * state, the system clock is set ahead one second.
403 */
395 switch (time_state) { 404 switch (time_state) {
396 case TIME_OK: 405 case TIME_OK:
406 if (time_status & STA_INS)
407 time_state = TIME_INS;
408 else if (time_status & STA_DEL)
409 time_state = TIME_DEL;
397 break; 410 break;
398 case TIME_INS: 411 case TIME_INS:
399 leap = -1; 412 if (secs % 86400 == 0) {
400 time_state = TIME_OOP; 413 leap = -1;
401 printk(KERN_NOTICE 414 time_state = TIME_OOP;
402 "Clock: inserting leap second 23:59:60 UTC\n"); 415 printk(KERN_NOTICE
403 hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); 416 "Clock: inserting leap second 23:59:60 UTC\n");
404 res = HRTIMER_RESTART; 417 }
405 break; 418 break;
406 case TIME_DEL: 419 case TIME_DEL:
407 leap = 1; 420 if ((secs + 1) % 86400 == 0) {
408 time_tai--; 421 leap = 1;
409 time_state = TIME_WAIT; 422 time_tai--;
410 printk(KERN_NOTICE 423 time_state = TIME_WAIT;
411 "Clock: deleting leap second 23:59:59 UTC\n"); 424 printk(KERN_NOTICE
425 "Clock: deleting leap second 23:59:59 UTC\n");
426 }
412 break; 427 break;
413 case TIME_OOP: 428 case TIME_OOP:
414 time_tai++; 429 time_tai++;
415 time_state = TIME_WAIT; 430 time_state = TIME_WAIT;
416 /* fall through */ 431 break;
432
417 case TIME_WAIT: 433 case TIME_WAIT:
418 if (!(time_status & (STA_INS | STA_DEL))) 434 if (!(time_status & (STA_INS | STA_DEL)))
419 time_state = TIME_OK; 435 time_state = TIME_OK;
420 break; 436 break;
421 } 437 }
422 spin_unlock_irqrestore(&ntp_lock, flags);
423 438
424 /*
425 * We have to call this outside of the ntp_lock to keep
426 * the proper locking hierarchy
427 */
428 if (leap)
429 timekeeping_leap_insert(leap);
430
431 return res;
432}
433
434/*
435 * this routine handles the overflow of the microsecond field
436 *
437 * The tricky bits of code to handle the accurate clock support
438 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
439 * They were originally developed for SUN and DEC kernels.
440 * All the kudos should go to Dave for this stuff.
441 */
442void second_overflow(void)
443{
444 s64 delta;
445 unsigned long flags;
446
447 spin_lock_irqsave(&ntp_lock, flags);
448 439
449 /* Bump the maxerror field */ 440 /* Bump the maxerror field */
450 time_maxerror += MAXFREQ / NSEC_PER_USEC; 441 time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -481,15 +472,17 @@ void second_overflow(void)
481 tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) 472 tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
482 << NTP_SCALE_SHIFT; 473 << NTP_SCALE_SHIFT;
483 time_adjust = 0; 474 time_adjust = 0;
475
476
477
484out: 478out:
485 spin_unlock_irqrestore(&ntp_lock, flags); 479 spin_unlock_irqrestore(&ntp_lock, flags);
480
481 return leap;
486} 482}
487 483
488#ifdef CONFIG_GENERIC_CMOS_UPDATE 484#ifdef CONFIG_GENERIC_CMOS_UPDATE
489 485
490/* Disable the cmos update - used by virtualization and embedded */
491int no_sync_cmos_clock __read_mostly;
492
493static void sync_cmos_clock(struct work_struct *work); 486static void sync_cmos_clock(struct work_struct *work);
494 487
495static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); 488static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
@@ -536,35 +529,13 @@ static void sync_cmos_clock(struct work_struct *work)
536 529
537static void notify_cmos_timer(void) 530static void notify_cmos_timer(void)
538{ 531{
539 if (!no_sync_cmos_clock) 532 schedule_delayed_work(&sync_cmos_work, 0);
540 schedule_delayed_work(&sync_cmos_work, 0);
541} 533}
542 534
543#else 535#else
544static inline void notify_cmos_timer(void) { } 536static inline void notify_cmos_timer(void) { }
545#endif 537#endif
546 538
547/*
548 * Start the leap seconds timer:
549 */
550static inline void ntp_start_leap_timer(struct timespec *ts)
551{
552 long now = ts->tv_sec;
553
554 if (time_status & STA_INS) {
555 time_state = TIME_INS;
556 now += 86400 - now % 86400;
557 hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
558
559 return;
560 }
561
562 if (time_status & STA_DEL) {
563 time_state = TIME_DEL;
564 now += 86400 - (now + 1) % 86400;
565 hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
566 }
567}
568 539
569/* 540/*
570 * Propagate a new txc->status value into the NTP state: 541 * Propagate a new txc->status value into the NTP state:
@@ -589,22 +560,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
589 time_status &= STA_RONLY; 560 time_status &= STA_RONLY;
590 time_status |= txc->status & ~STA_RONLY; 561 time_status |= txc->status & ~STA_RONLY;
591 562
592 switch (time_state) {
593 case TIME_OK:
594 ntp_start_leap_timer(ts);
595 break;
596 case TIME_INS:
597 case TIME_DEL:
598 time_state = TIME_OK;
599 ntp_start_leap_timer(ts);
600 case TIME_WAIT:
601 if (!(time_status & (STA_INS | STA_DEL)))
602 time_state = TIME_OK;
603 break;
604 case TIME_OOP:
605 hrtimer_restart(&leap_timer);
606 break;
607 }
608} 563}
609/* 564/*
610 * Called with the xtime lock held, so we can access and modify 565 * Called with the xtime lock held, so we can access and modify
@@ -686,9 +641,6 @@ int do_adjtimex(struct timex *txc)
686 (txc->tick < 900000/USER_HZ || 641 (txc->tick < 900000/USER_HZ ||
687 txc->tick > 1100000/USER_HZ)) 642 txc->tick > 1100000/USER_HZ))
688 return -EINVAL; 643 return -EINVAL;
689
690 if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
691 hrtimer_cancel(&leap_timer);
692 } 644 }
693 645
694 if (txc->modes & ADJ_SETOFFSET) { 646 if (txc->modes & ADJ_SETOFFSET) {
@@ -1010,6 +962,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
1010void __init ntp_init(void) 962void __init ntp_init(void)
1011{ 963{
1012 ntp_clear(); 964 ntp_clear();
1013 hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
1014 leap_timer.function = ntp_leap_second;
1015} 965}
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index e883f57a3cd..bf57abdc7bd 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -575,10 +575,12 @@ void tick_broadcast_switch_to_oneshot(void)
575 unsigned long flags; 575 unsigned long flags;
576 576
577 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 577 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
578
579 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
580
578 if (cpumask_empty(tick_get_broadcast_mask())) 581 if (cpumask_empty(tick_get_broadcast_mask()))
579 goto end; 582 goto end;
580 583
581 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
582 bc = tick_broadcast_device.evtdev; 584 bc = tick_broadcast_device.evtdev;
583 if (bc) 585 if (bc)
584 tick_broadcast_setup_oneshot(bc); 586 tick_broadcast_setup_oneshot(bc);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3526038f283..6a3a5b9ff56 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -534,9 +534,9 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
534 hrtimer_get_expires(&ts->sched_timer), 0)) 534 hrtimer_get_expires(&ts->sched_timer), 0))
535 break; 535 break;
536 } 536 }
537 /* Update jiffies and reread time */ 537 /* Reread time and update jiffies */
538 tick_do_update_jiffies64(now);
539 now = ktime_get(); 538 now = ktime_get();
539 tick_do_update_jiffies64(now);
540 } 540 }
541} 541}
542 542
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 15be32e19c6..d66b21308f7 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp)
184} 184}
185 185
186 186
187void timekeeping_leap_insert(int leapsecond)
188{
189 unsigned long flags;
190
191 write_seqlock_irqsave(&timekeeper.lock, flags);
192 timekeeper.xtime.tv_sec += leapsecond;
193 timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
194 timekeeping_update(false);
195 write_sequnlock_irqrestore(&timekeeper.lock, flags);
196
197}
198
199/** 187/**
200 * timekeeping_forward_now - update clock to the current time 188 * timekeeping_forward_now - update clock to the current time
201 * 189 *
@@ -448,9 +436,12 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
448static int change_clocksource(void *data) 436static int change_clocksource(void *data)
449{ 437{
450 struct clocksource *new, *old; 438 struct clocksource *new, *old;
439 unsigned long flags;
451 440
452 new = (struct clocksource *) data; 441 new = (struct clocksource *) data;
453 442
443 write_seqlock_irqsave(&timekeeper.lock, flags);
444
454 timekeeping_forward_now(); 445 timekeeping_forward_now();
455 if (!new->enable || new->enable(new) == 0) { 446 if (!new->enable || new->enable(new) == 0) {
456 old = timekeeper.clock; 447 old = timekeeper.clock;
@@ -458,6 +449,10 @@ static int change_clocksource(void *data)
458 if (old->disable) 449 if (old->disable)
459 old->disable(old); 450 old->disable(old);
460 } 451 }
452 timekeeping_update(true);
453
454 write_sequnlock_irqrestore(&timekeeper.lock, flags);
455
461 return 0; 456 return 0;
462} 457}
463 458
@@ -827,7 +822,7 @@ static void timekeeping_adjust(s64 offset)
827 int adj; 822 int adj;
828 823
829 /* 824 /*
830 * The point of this is to check if the error is greater then half 825 * The point of this is to check if the error is greater than half
831 * an interval. 826 * an interval.
832 * 827 *
833 * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. 828 * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
@@ -835,7 +830,7 @@ static void timekeeping_adjust(s64 offset)
835 * Note we subtract one in the shift, so that error is really error*2. 830 * Note we subtract one in the shift, so that error is really error*2.
836 * This "saves" dividing(shifting) interval twice, but keeps the 831 * This "saves" dividing(shifting) interval twice, but keeps the
837 * (error > interval) comparison as still measuring if error is 832 * (error > interval) comparison as still measuring if error is
838 * larger then half an interval. 833 * larger than half an interval.
839 * 834 *
840 * Note: It does not "save" on aggravation when reading the code. 835 * Note: It does not "save" on aggravation when reading the code.
841 */ 836 */
@@ -843,7 +838,7 @@ static void timekeeping_adjust(s64 offset)
843 if (error > interval) { 838 if (error > interval) {
844 /* 839 /*
845 * We now divide error by 4(via shift), which checks if 840 * We now divide error by 4(via shift), which checks if
846 * the error is greater then twice the interval. 841 * the error is greater than twice the interval.
847 * If it is greater, we need a bigadjust, if its smaller, 842 * If it is greater, we need a bigadjust, if its smaller,
848 * we can adjust by 1. 843 * we can adjust by 1.
849 */ 844 */
@@ -874,13 +869,15 @@ static void timekeeping_adjust(s64 offset)
874 } else /* No adjustment needed */ 869 } else /* No adjustment needed */
875 return; 870 return;
876 871
877 WARN_ONCE(timekeeper.clock->maxadj && 872 if (unlikely(timekeeper.clock->maxadj &&
878 (timekeeper.mult + adj > timekeeper.clock->mult + 873 (timekeeper.mult + adj >
879 timekeeper.clock->maxadj), 874 timekeeper.clock->mult + timekeeper.clock->maxadj))) {
880 "Adjusting %s more then 11%% (%ld vs %ld)\n", 875 printk_once(KERN_WARNING
876 "Adjusting %s more than 11%% (%ld vs %ld)\n",
881 timekeeper.clock->name, (long)timekeeper.mult + adj, 877 timekeeper.clock->name, (long)timekeeper.mult + adj,
882 (long)timekeeper.clock->mult + 878 (long)timekeeper.clock->mult +
883 timekeeper.clock->maxadj); 879 timekeeper.clock->maxadj);
880 }
884 /* 881 /*
885 * So the following can be confusing. 882 * So the following can be confusing.
886 * 883 *
@@ -952,7 +949,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
952 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; 949 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
953 u64 raw_nsecs; 950 u64 raw_nsecs;
954 951
955 /* If the offset is smaller then a shifted interval, do nothing */ 952 /* If the offset is smaller than a shifted interval, do nothing */
956 if (offset < timekeeper.cycle_interval<<shift) 953 if (offset < timekeeper.cycle_interval<<shift)
957 return offset; 954 return offset;
958 955
@@ -962,9 +959,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
962 959
963 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; 960 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
964 while (timekeeper.xtime_nsec >= nsecps) { 961 while (timekeeper.xtime_nsec >= nsecps) {
962 int leap;
965 timekeeper.xtime_nsec -= nsecps; 963 timekeeper.xtime_nsec -= nsecps;
966 timekeeper.xtime.tv_sec++; 964 timekeeper.xtime.tv_sec++;
967 second_overflow(); 965 leap = second_overflow(timekeeper.xtime.tv_sec);
966 timekeeper.xtime.tv_sec += leap;
968 } 967 }
969 968
970 /* Accumulate raw time */ 969 /* Accumulate raw time */
@@ -1018,13 +1017,13 @@ static void update_wall_time(void)
1018 * With NO_HZ we may have to accumulate many cycle_intervals 1017 * With NO_HZ we may have to accumulate many cycle_intervals
1019 * (think "ticks") worth of time at once. To do this efficiently, 1018 * (think "ticks") worth of time at once. To do this efficiently,
1020 * we calculate the largest doubling multiple of cycle_intervals 1019 * we calculate the largest doubling multiple of cycle_intervals
1021 * that is smaller then the offset. We then accumulate that 1020 * that is smaller than the offset. We then accumulate that
1022 * chunk in one go, and then try to consume the next smaller 1021 * chunk in one go, and then try to consume the next smaller
1023 * doubled multiple. 1022 * doubled multiple.
1024 */ 1023 */
1025 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); 1024 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
1026 shift = max(0, shift); 1025 shift = max(0, shift);
1027 /* Bound shift to one less then what overflows tick_length */ 1026 /* Bound shift to one less than what overflows tick_length */
1028 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; 1027 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
1029 shift = min(shift, maxshift); 1028 shift = min(shift, maxshift);
1030 while (offset >= timekeeper.cycle_interval) { 1029 while (offset >= timekeeper.cycle_interval) {
@@ -1072,12 +1071,14 @@ static void update_wall_time(void)
1072 1071
1073 /* 1072 /*
1074 * Finally, make sure that after the rounding 1073 * Finally, make sure that after the rounding
1075 * xtime.tv_nsec isn't larger then NSEC_PER_SEC 1074 * xtime.tv_nsec isn't larger than NSEC_PER_SEC
1076 */ 1075 */
1077 if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { 1076 if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
1077 int leap;
1078 timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; 1078 timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
1079 timekeeper.xtime.tv_sec++; 1079 timekeeper.xtime.tv_sec++;
1080 second_overflow(); 1080 leap = second_overflow(timekeeper.xtime.tv_sec);
1081 timekeeper.xtime.tv_sec += leap;
1081 } 1082 }
1082 1083
1083 timekeeping_update(false); 1084 timekeeping_update(false);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index cd3134510f3..a1d2849f247 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -141,7 +141,7 @@ if FTRACE
141config FUNCTION_TRACER 141config FUNCTION_TRACER
142 bool "Kernel Function Tracer" 142 bool "Kernel Function Tracer"
143 depends on HAVE_FUNCTION_TRACER 143 depends on HAVE_FUNCTION_TRACER
144 select FRAME_POINTER if !ARM_UNWIND && !S390 && !MICROBLAZE 144 select FRAME_POINTER if !ARM_UNWIND && !PPC && !S390 && !MICROBLAZE
145 select KALLSYMS 145 select KALLSYMS
146 select GENERIC_TRACER 146 select GENERIC_TRACER
147 select CONTEXT_SWITCH_TRACER 147 select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index cdea7b56b0c..c0bd0308741 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -311,13 +311,6 @@ int blk_trace_remove(struct request_queue *q)
311} 311}
312EXPORT_SYMBOL_GPL(blk_trace_remove); 312EXPORT_SYMBOL_GPL(blk_trace_remove);
313 313
314static int blk_dropped_open(struct inode *inode, struct file *filp)
315{
316 filp->private_data = inode->i_private;
317
318 return 0;
319}
320
321static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, 314static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
322 size_t count, loff_t *ppos) 315 size_t count, loff_t *ppos)
323{ 316{
@@ -331,18 +324,11 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
331 324
332static const struct file_operations blk_dropped_fops = { 325static const struct file_operations blk_dropped_fops = {
333 .owner = THIS_MODULE, 326 .owner = THIS_MODULE,
334 .open = blk_dropped_open, 327 .open = simple_open,
335 .read = blk_dropped_read, 328 .read = blk_dropped_read,
336 .llseek = default_llseek, 329 .llseek = default_llseek,
337}; 330};
338 331
339static int blk_msg_open(struct inode *inode, struct file *filp)
340{
341 filp->private_data = inode->i_private;
342
343 return 0;
344}
345
346static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, 332static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
347 size_t count, loff_t *ppos) 333 size_t count, loff_t *ppos)
348{ 334{
@@ -371,7 +357,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
371 357
372static const struct file_operations blk_msg_fops = { 358static const struct file_operations blk_msg_fops = {
373 .owner = THIS_MODULE, 359 .owner = THIS_MODULE,
374 .open = blk_msg_open, 360 .open = simple_open,
375 .write = blk_msg_write, 361 .write = blk_msg_write,
376 .llseek = noop_llseek, 362 .llseek = noop_llseek,
377}; 363};
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 867bd1dd2dd..0fa92f677c9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -249,7 +249,8 @@ static void update_ftrace_function(void)
249#else 249#else
250 __ftrace_trace_function = func; 250 __ftrace_trace_function = func;
251#endif 251#endif
252 ftrace_trace_function = ftrace_test_stop_func; 252 ftrace_trace_function =
253 (func == ftrace_stub) ? func : ftrace_test_stop_func;
253#endif 254#endif
254} 255}
255 256
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f5b7b5c1195..cf8d11e91ef 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -154,33 +154,10 @@ enum {
154 154
155static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 155static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
156 156
157#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) 157/* Used for individual buffers (after the counter) */
158 158#define RB_BUFFER_OFF (1 << 20)
159/**
160 * tracing_on - enable all tracing buffers
161 *
162 * This function enables all tracing buffers that may have been
163 * disabled with tracing_off.
164 */
165void tracing_on(void)
166{
167 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
168}
169EXPORT_SYMBOL_GPL(tracing_on);
170 159
171/** 160#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
172 * tracing_off - turn off all tracing buffers
173 *
174 * This function stops all tracing buffers from recording data.
175 * It does not disable any overhead the tracers themselves may
176 * be causing. This function simply causes all recording to
177 * the ring buffers to fail.
178 */
179void tracing_off(void)
180{
181 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
182}
183EXPORT_SYMBOL_GPL(tracing_off);
184 161
185/** 162/**
186 * tracing_off_permanent - permanently disable ring buffers 163 * tracing_off_permanent - permanently disable ring buffers
@@ -193,15 +170,6 @@ void tracing_off_permanent(void)
193 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); 170 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
194} 171}
195 172
196/**
197 * tracing_is_on - show state of ring buffers enabled
198 */
199int tracing_is_on(void)
200{
201 return ring_buffer_flags == RB_BUFFERS_ON;
202}
203EXPORT_SYMBOL_GPL(tracing_is_on);
204
205#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 173#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
206#define RB_ALIGNMENT 4U 174#define RB_ALIGNMENT 4U
207#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 175#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
@@ -2619,6 +2587,63 @@ void ring_buffer_record_enable(struct ring_buffer *buffer)
2619EXPORT_SYMBOL_GPL(ring_buffer_record_enable); 2587EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
2620 2588
2621/** 2589/**
2590 * ring_buffer_record_off - stop all writes into the buffer
2591 * @buffer: The ring buffer to stop writes to.
2592 *
2593 * This prevents all writes to the buffer. Any attempt to write
2594 * to the buffer after this will fail and return NULL.
2595 *
2596 * This is different than ring_buffer_record_disable() as
2597 * it works like an on/off switch, where as the disable() verison
2598 * must be paired with a enable().
2599 */
2600void ring_buffer_record_off(struct ring_buffer *buffer)
2601{
2602 unsigned int rd;
2603 unsigned int new_rd;
2604
2605 do {
2606 rd = atomic_read(&buffer->record_disabled);
2607 new_rd = rd | RB_BUFFER_OFF;
2608 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2609}
2610EXPORT_SYMBOL_GPL(ring_buffer_record_off);
2611
2612/**
2613 * ring_buffer_record_on - restart writes into the buffer
2614 * @buffer: The ring buffer to start writes to.
2615 *
2616 * This enables all writes to the buffer that was disabled by
2617 * ring_buffer_record_off().
2618 *
2619 * This is different than ring_buffer_record_enable() as
2620 * it works like an on/off switch, where as the enable() verison
2621 * must be paired with a disable().
2622 */
2623void ring_buffer_record_on(struct ring_buffer *buffer)
2624{
2625 unsigned int rd;
2626 unsigned int new_rd;
2627
2628 do {
2629 rd = atomic_read(&buffer->record_disabled);
2630 new_rd = rd & ~RB_BUFFER_OFF;
2631 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2632}
2633EXPORT_SYMBOL_GPL(ring_buffer_record_on);
2634
2635/**
2636 * ring_buffer_record_is_on - return true if the ring buffer can write
2637 * @buffer: The ring buffer to see if write is enabled
2638 *
2639 * Returns true if the ring buffer is in a state that it accepts writes.
2640 */
2641int ring_buffer_record_is_on(struct ring_buffer *buffer)
2642{
2643 return !atomic_read(&buffer->record_disabled);
2644}
2645
2646/**
2622 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer 2647 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
2623 * @buffer: The ring buffer to stop writes to. 2648 * @buffer: The ring buffer to stop writes to.
2624 * @cpu: The CPU buffer to stop 2649 * @cpu: The CPU buffer to stop
@@ -4039,68 +4064,6 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
4039} 4064}
4040EXPORT_SYMBOL_GPL(ring_buffer_read_page); 4065EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4041 4066
4042#ifdef CONFIG_TRACING
4043static ssize_t
4044rb_simple_read(struct file *filp, char __user *ubuf,
4045 size_t cnt, loff_t *ppos)
4046{
4047 unsigned long *p = filp->private_data;
4048 char buf[64];
4049 int r;
4050
4051 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
4052 r = sprintf(buf, "permanently disabled\n");
4053 else
4054 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
4055
4056 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4057}
4058
4059static ssize_t
4060rb_simple_write(struct file *filp, const char __user *ubuf,
4061 size_t cnt, loff_t *ppos)
4062{
4063 unsigned long *p = filp->private_data;
4064 unsigned long val;
4065 int ret;
4066
4067 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4068 if (ret)
4069 return ret;
4070
4071 if (val)
4072 set_bit(RB_BUFFERS_ON_BIT, p);
4073 else
4074 clear_bit(RB_BUFFERS_ON_BIT, p);
4075
4076 (*ppos)++;
4077
4078 return cnt;
4079}
4080
4081static const struct file_operations rb_simple_fops = {
4082 .open = tracing_open_generic,
4083 .read = rb_simple_read,
4084 .write = rb_simple_write,
4085 .llseek = default_llseek,
4086};
4087
4088
4089static __init int rb_init_debugfs(void)
4090{
4091 struct dentry *d_tracer;
4092
4093 d_tracer = tracing_init_dentry();
4094
4095 trace_create_file("tracing_on", 0644, d_tracer,
4096 &ring_buffer_flags, &rb_simple_fops);
4097
4098 return 0;
4099}
4100
4101fs_initcall(rb_init_debugfs);
4102#endif
4103
4104#ifdef CONFIG_HOTPLUG_CPU 4067#ifdef CONFIG_HOTPLUG_CPU
4105static int rb_cpu_notify(struct notifier_block *self, 4068static int rb_cpu_notify(struct notifier_block *self,
4106 unsigned long action, void *hcpu) 4069 unsigned long action, void *hcpu)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 10d5503f0d0..ed7b5d1e12f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -36,6 +36,7 @@
36#include <linux/ctype.h> 36#include <linux/ctype.h>
37#include <linux/init.h> 37#include <linux/init.h>
38#include <linux/poll.h> 38#include <linux/poll.h>
39#include <linux/nmi.h>
39#include <linux/fs.h> 40#include <linux/fs.h>
40 41
41#include "trace.h" 42#include "trace.h"
@@ -352,6 +353,59 @@ static void wakeup_work_handler(struct work_struct *work)
352static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); 353static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
353 354
354/** 355/**
356 * tracing_on - enable tracing buffers
357 *
358 * This function enables tracing buffers that may have been
359 * disabled with tracing_off.
360 */
361void tracing_on(void)
362{
363 if (global_trace.buffer)
364 ring_buffer_record_on(global_trace.buffer);
365 /*
366 * This flag is only looked at when buffers haven't been
367 * allocated yet. We don't really care about the race
368 * between setting this flag and actually turning
369 * on the buffer.
370 */
371 global_trace.buffer_disabled = 0;
372}
373EXPORT_SYMBOL_GPL(tracing_on);
374
375/**
376 * tracing_off - turn off tracing buffers
377 *
378 * This function stops the tracing buffers from recording data.
379 * It does not disable any overhead the tracers themselves may
380 * be causing. This function simply causes all recording to
381 * the ring buffers to fail.
382 */
383void tracing_off(void)
384{
385 if (global_trace.buffer)
386 ring_buffer_record_on(global_trace.buffer);
387 /*
388 * This flag is only looked at when buffers haven't been
389 * allocated yet. We don't really care about the race
390 * between setting this flag and actually turning
391 * on the buffer.
392 */
393 global_trace.buffer_disabled = 1;
394}
395EXPORT_SYMBOL_GPL(tracing_off);
396
397/**
398 * tracing_is_on - show state of ring buffers enabled
399 */
400int tracing_is_on(void)
401{
402 if (global_trace.buffer)
403 return ring_buffer_record_is_on(global_trace.buffer);
404 return !global_trace.buffer_disabled;
405}
406EXPORT_SYMBOL_GPL(tracing_is_on);
407
408/**
355 * trace_wake_up - wake up tasks waiting for trace input 409 * trace_wake_up - wake up tasks waiting for trace input
356 * 410 *
357 * Schedules a delayed work to wake up any task that is blocked on the 411 * Schedules a delayed work to wake up any task that is blocked on the
@@ -1644,6 +1698,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1644 int cpu_file = iter->cpu_file; 1698 int cpu_file = iter->cpu_file;
1645 u64 next_ts = 0, ts; 1699 u64 next_ts = 0, ts;
1646 int next_cpu = -1; 1700 int next_cpu = -1;
1701 int next_size = 0;
1647 int cpu; 1702 int cpu;
1648 1703
1649 /* 1704 /*
@@ -1675,9 +1730,12 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1675 next_cpu = cpu; 1730 next_cpu = cpu;
1676 next_ts = ts; 1731 next_ts = ts;
1677 next_lost = lost_events; 1732 next_lost = lost_events;
1733 next_size = iter->ent_size;
1678 } 1734 }
1679 } 1735 }
1680 1736
1737 iter->ent_size = next_size;
1738
1681 if (ent_cpu) 1739 if (ent_cpu)
1682 *ent_cpu = next_cpu; 1740 *ent_cpu = next_cpu;
1683 1741
@@ -4567,6 +4625,55 @@ static __init void create_trace_options_dir(void)
4567 create_trace_option_core_file(trace_options[i], i); 4625 create_trace_option_core_file(trace_options[i], i);
4568} 4626}
4569 4627
4628static ssize_t
4629rb_simple_read(struct file *filp, char __user *ubuf,
4630 size_t cnt, loff_t *ppos)
4631{
4632 struct ring_buffer *buffer = filp->private_data;
4633 char buf[64];
4634 int r;
4635
4636 if (buffer)
4637 r = ring_buffer_record_is_on(buffer);
4638 else
4639 r = 0;
4640
4641 r = sprintf(buf, "%d\n", r);
4642
4643 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4644}
4645
4646static ssize_t
4647rb_simple_write(struct file *filp, const char __user *ubuf,
4648 size_t cnt, loff_t *ppos)
4649{
4650 struct ring_buffer *buffer = filp->private_data;
4651 unsigned long val;
4652 int ret;
4653
4654 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4655 if (ret)
4656 return ret;
4657
4658 if (buffer) {
4659 if (val)
4660 ring_buffer_record_on(buffer);
4661 else
4662 ring_buffer_record_off(buffer);
4663 }
4664
4665 (*ppos)++;
4666
4667 return cnt;
4668}
4669
4670static const struct file_operations rb_simple_fops = {
4671 .open = tracing_open_generic,
4672 .read = rb_simple_read,
4673 .write = rb_simple_write,
4674 .llseek = default_llseek,
4675};
4676
4570static __init int tracer_init_debugfs(void) 4677static __init int tracer_init_debugfs(void)
4571{ 4678{
4572 struct dentry *d_tracer; 4679 struct dentry *d_tracer;
@@ -4626,6 +4733,9 @@ static __init int tracer_init_debugfs(void)
4626 trace_create_file("trace_clock", 0644, d_tracer, NULL, 4733 trace_create_file("trace_clock", 0644, d_tracer, NULL,
4627 &trace_clock_fops); 4734 &trace_clock_fops);
4628 4735
4736 trace_create_file("tracing_on", 0644, d_tracer,
4737 global_trace.buffer, &rb_simple_fops);
4738
4629#ifdef CONFIG_DYNAMIC_FTRACE 4739#ifdef CONFIG_DYNAMIC_FTRACE
4630 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4740 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4631 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 4741 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -4798,6 +4908,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4798 if (ret != TRACE_TYPE_NO_CONSUME) 4908 if (ret != TRACE_TYPE_NO_CONSUME)
4799 trace_consume(&iter); 4909 trace_consume(&iter);
4800 } 4910 }
4911 touch_nmi_watchdog();
4801 4912
4802 trace_printk_seq(&iter.seq); 4913 trace_printk_seq(&iter.seq);
4803 } 4914 }
@@ -4863,6 +4974,8 @@ __init static int tracer_alloc_buffers(void)
4863 goto out_free_cpumask; 4974 goto out_free_cpumask;
4864 } 4975 }
4865 global_trace.entries = ring_buffer_size(global_trace.buffer); 4976 global_trace.entries = ring_buffer_size(global_trace.buffer);
4977 if (global_trace.buffer_disabled)
4978 tracing_off();
4866 4979
4867 4980
4868#ifdef CONFIG_TRACER_MAX_TRACE 4981#ifdef CONFIG_TRACER_MAX_TRACE
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 54faec790bc..95059f091a2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -154,6 +154,7 @@ struct trace_array {
154 struct ring_buffer *buffer; 154 struct ring_buffer *buffer;
155 unsigned long entries; 155 unsigned long entries;
156 int cpu; 156 int cpu;
157 int buffer_disabled;
157 cycle_t time_start; 158 cycle_t time_start;
158 struct task_struct *waiter; 159 struct task_struct *waiter;
159 struct trace_array_cpu *data[NR_CPUS]; 160 struct trace_array_cpu *data[NR_CPUS];
@@ -835,13 +836,11 @@ extern const char *__stop___trace_bprintk_fmt[];
835 filter) 836 filter)
836#include "trace_entries.h" 837#include "trace_entries.h"
837 838
838#ifdef CONFIG_PERF_EVENTS
839#ifdef CONFIG_FUNCTION_TRACER 839#ifdef CONFIG_FUNCTION_TRACER
840int perf_ftrace_event_register(struct ftrace_event_call *call, 840int perf_ftrace_event_register(struct ftrace_event_call *call,
841 enum trace_reg type, void *data); 841 enum trace_reg type, void *data);
842#else 842#else
843#define perf_ftrace_event_register NULL 843#define perf_ftrace_event_register NULL
844#endif /* CONFIG_FUNCTION_TRACER */ 844#endif /* CONFIG_FUNCTION_TRACER */
845#endif /* CONFIG_PERF_EVENTS */
846 845
847#endif /* _LINUX_KERNEL_TRACE_H */ 846#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index d91eb0541b3..4108e1250ca 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -166,6 +166,12 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
166 166
167#define FTRACE_STACK_ENTRIES 8 167#define FTRACE_STACK_ENTRIES 8
168 168
169#ifndef CONFIG_64BIT
170# define IP_FMT "%08lx"
171#else
172# define IP_FMT "%016lx"
173#endif
174
169FTRACE_ENTRY(kernel_stack, stack_entry, 175FTRACE_ENTRY(kernel_stack, stack_entry,
170 176
171 TRACE_STACK, 177 TRACE_STACK,
@@ -175,8 +181,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
175 __dynamic_array(unsigned long, caller ) 181 __dynamic_array(unsigned long, caller )
176 ), 182 ),
177 183
178 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" 184 F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
179 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", 185 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
186 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
180 __entry->caller[0], __entry->caller[1], __entry->caller[2], 187 __entry->caller[0], __entry->caller[1], __entry->caller[2],
181 __entry->caller[3], __entry->caller[4], __entry->caller[5], 188 __entry->caller[3], __entry->caller[4], __entry->caller[5],
182 __entry->caller[6], __entry->caller[7]), 189 __entry->caller[6], __entry->caller[7]),
@@ -193,8 +200,9 @@ FTRACE_ENTRY(user_stack, userstack_entry,
193 __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) 200 __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
194 ), 201 ),
195 202
196 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" 203 F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
197 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", 204 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
205 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
198 __entry->caller[0], __entry->caller[1], __entry->caller[2], 206 __entry->caller[0], __entry->caller[1], __entry->caller[2],
199 __entry->caller[3], __entry->caller[4], __entry->caller[5], 207 __entry->caller[3], __entry->caller[4], __entry->caller[5],
200 __entry->caller[6], __entry->caller[7]), 208 __entry->caller[6], __entry->caller[7]),
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 7b46c9bd22a..3dd15e8bc85 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -162,7 +162,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
162#define __dynamic_array(type, item) 162#define __dynamic_array(type, item)
163 163
164#undef F_printk 164#undef F_printk
165#define F_printk(fmt, args...) #fmt ", " __stringify(args) 165#define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args)
166 166
167#undef FTRACE_ENTRY_REG 167#undef FTRACE_ENTRY_REG
168#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ 168#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\