aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c1
-rw-r--r--kernel/auditfilter.c12
-rw-r--r--kernel/auditsc.c84
-rw-r--r--kernel/cpu.c16
-rw-r--r--kernel/cpuset.c5
-rw-r--r--kernel/exit.c9
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c21
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/kallsyms.c16
-rw-r--r--kernel/kmod.c303
-rw-r--r--kernel/kprobes.c9
-rw-r--r--kernel/ksysfs.c28
-rw-r--r--kernel/lockdep.c1501
-rw-r--r--kernel/lockdep_proc.c301
-rw-r--r--kernel/module.c10
-rw-r--r--kernel/mutex.c8
-rw-r--r--kernel/panic.c5
-rw-r--r--kernel/power/Kconfig29
-rw-r--r--kernel/power/disk.c251
-rw-r--r--kernel/power/main.c108
-rw-r--r--kernel/power/power.h29
-rw-r--r--kernel/power/process.c90
-rw-r--r--kernel/power/swap.c20
-rw-r--r--kernel/power/user.c154
-rw-r--r--kernel/ptrace.c21
-rw-r--r--kernel/rcutorture.c4
-rw-r--r--kernel/relay.c13
-rw-r--r--kernel/rtmutex-tester.c1
-rw-r--r--kernel/rwsem.c8
-rw-r--r--kernel/sched.c36
-rw-r--r--kernel/softirq.c3
-rw-r--r--kernel/softlockup.c2
-rw-r--r--kernel/spinlock.c32
-rw-r--r--kernel/sys.c91
-rw-r--r--kernel/sysctl.c68
-rw-r--r--kernel/time/timekeeping.c4
-rw-r--r--kernel/time/timer_list.c2
-rw-r--r--kernel/time/timer_stats.c2
-rw-r--r--kernel/timer.c28
-rw-r--r--kernel/workqueue.c15
41 files changed, 2256 insertions, 1090 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 5ce8851fac..eb0f9165b4 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -392,6 +392,7 @@ static int kauditd_thread(void *dummy)
392{ 392{
393 struct sk_buff *skb; 393 struct sk_buff *skb;
394 394
395 set_freezable();
395 while (!kthread_should_stop()) { 396 while (!kthread_should_stop()) {
396 skb = skb_dequeue(&audit_skb_queue); 397 skb = skb_dequeue(&audit_skb_queue);
397 wake_up(&audit_backlog_wait); 398 wake_up(&audit_backlog_wait);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index ce61f42354..1bf093dcff 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1210,8 +1210,8 @@ static inline int audit_add_rule(struct audit_entry *entry,
1210 struct audit_entry *e; 1210 struct audit_entry *e;
1211 struct audit_field *inode_f = entry->rule.inode_f; 1211 struct audit_field *inode_f = entry->rule.inode_f;
1212 struct audit_watch *watch = entry->rule.watch; 1212 struct audit_watch *watch = entry->rule.watch;
1213 struct nameidata *ndp, *ndw; 1213 struct nameidata *ndp = NULL, *ndw = NULL;
1214 int h, err, putnd_needed = 0; 1214 int h, err;
1215#ifdef CONFIG_AUDITSYSCALL 1215#ifdef CONFIG_AUDITSYSCALL
1216 int dont_count = 0; 1216 int dont_count = 0;
1217 1217
@@ -1239,7 +1239,6 @@ static inline int audit_add_rule(struct audit_entry *entry,
1239 err = audit_get_nd(watch->path, &ndp, &ndw); 1239 err = audit_get_nd(watch->path, &ndp, &ndw);
1240 if (err) 1240 if (err)
1241 goto error; 1241 goto error;
1242 putnd_needed = 1;
1243 } 1242 }
1244 1243
1245 mutex_lock(&audit_filter_mutex); 1244 mutex_lock(&audit_filter_mutex);
@@ -1269,14 +1268,11 @@ static inline int audit_add_rule(struct audit_entry *entry,
1269#endif 1268#endif
1270 mutex_unlock(&audit_filter_mutex); 1269 mutex_unlock(&audit_filter_mutex);
1271 1270
1272 if (putnd_needed) 1271 audit_put_nd(ndp, ndw); /* NULL args OK */
1273 audit_put_nd(ndp, ndw);
1274
1275 return 0; 1272 return 0;
1276 1273
1277error: 1274error:
1278 if (putnd_needed) 1275 audit_put_nd(ndp, ndw); /* NULL args OK */
1279 audit_put_nd(ndp, ndw);
1280 if (watch) 1276 if (watch)
1281 audit_put_watch(watch); /* tmp watch, matches initial get */ 1277 audit_put_watch(watch); /* tmp watch, matches initial get */
1282 return err; 1278 return err;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b7640a5f38..145cbb79c4 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -153,7 +153,7 @@ struct audit_aux_data_execve {
153 struct audit_aux_data d; 153 struct audit_aux_data d;
154 int argc; 154 int argc;
155 int envc; 155 int envc;
156 char mem[0]; 156 struct mm_struct *mm;
157}; 157};
158 158
159struct audit_aux_data_socketcall { 159struct audit_aux_data_socketcall {
@@ -831,6 +831,55 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid,
831 return rc; 831 return rc;
832} 832}
833 833
834static void audit_log_execve_info(struct audit_buffer *ab,
835 struct audit_aux_data_execve *axi)
836{
837 int i;
838 long len, ret;
839 const char __user *p = (const char __user *)axi->mm->arg_start;
840 char *buf;
841
842 if (axi->mm != current->mm)
843 return; /* execve failed, no additional info */
844
845 for (i = 0; i < axi->argc; i++, p += len) {
846 len = strnlen_user(p, MAX_ARG_STRLEN);
847 /*
848 * We just created this mm, if we can't find the strings
849 * we just copied into it something is _very_ wrong. Similar
850 * for strings that are too long, we should not have created
851 * any.
852 */
853 if (!len || len > MAX_ARG_STRLEN) {
854 WARN_ON(1);
855 send_sig(SIGKILL, current, 0);
856 }
857
858 buf = kmalloc(len, GFP_KERNEL);
859 if (!buf) {
860 audit_panic("out of memory for argv string\n");
861 break;
862 }
863
864 ret = copy_from_user(buf, p, len);
865 /*
866 * There is no reason for this copy to be short. We just
867 * copied them here, and the mm hasn't been exposed to user-
868 * space yet.
869 */
870 if (!ret) {
871 WARN_ON(1);
872 send_sig(SIGKILL, current, 0);
873 }
874
875 audit_log_format(ab, "a%d=", i);
876 audit_log_untrustedstring(ab, buf);
877 audit_log_format(ab, "\n");
878
879 kfree(buf);
880 }
881}
882
834static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) 883static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
835{ 884{
836 int i, call_panic = 0; 885 int i, call_panic = 0;
@@ -971,13 +1020,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
971 1020
972 case AUDIT_EXECVE: { 1021 case AUDIT_EXECVE: {
973 struct audit_aux_data_execve *axi = (void *)aux; 1022 struct audit_aux_data_execve *axi = (void *)aux;
974 int i; 1023 audit_log_execve_info(ab, axi);
975 const char *p;
976 for (i = 0, p = axi->mem; i < axi->argc; i++) {
977 audit_log_format(ab, "a%d=", i);
978 p = audit_log_untrustedstring(ab, p);
979 audit_log_format(ab, "\n");
980 }
981 break; } 1024 break; }
982 1025
983 case AUDIT_SOCKETCALL: { 1026 case AUDIT_SOCKETCALL: {
@@ -1821,32 +1864,31 @@ int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode
1821 return 0; 1864 return 0;
1822} 1865}
1823 1866
1867int audit_argv_kb = 32;
1868
1824int audit_bprm(struct linux_binprm *bprm) 1869int audit_bprm(struct linux_binprm *bprm)
1825{ 1870{
1826 struct audit_aux_data_execve *ax; 1871 struct audit_aux_data_execve *ax;
1827 struct audit_context *context = current->audit_context; 1872 struct audit_context *context = current->audit_context;
1828 unsigned long p, next;
1829 void *to;
1830 1873
1831 if (likely(!audit_enabled || !context || context->dummy)) 1874 if (likely(!audit_enabled || !context || context->dummy))
1832 return 0; 1875 return 0;
1833 1876
1834 ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, 1877 /*
1835 GFP_KERNEL); 1878 * Even though the stack code doesn't limit the arg+env size any more,
1879 * the audit code requires that _all_ arguments be logged in a single
1880 * netlink skb. Hence cap it :-(
1881 */
1882 if (bprm->argv_len > (audit_argv_kb << 10))
1883 return -E2BIG;
1884
1885 ax = kmalloc(sizeof(*ax), GFP_KERNEL);
1836 if (!ax) 1886 if (!ax)
1837 return -ENOMEM; 1887 return -ENOMEM;
1838 1888
1839 ax->argc = bprm->argc; 1889 ax->argc = bprm->argc;
1840 ax->envc = bprm->envc; 1890 ax->envc = bprm->envc;
1841 for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) { 1891 ax->mm = bprm->mm;
1842 struct page *page = bprm->page[p / PAGE_SIZE];
1843 void *kaddr = kmap(page);
1844 next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1);
1845 memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p);
1846 to += next - p;
1847 kunmap(page);
1848 }
1849
1850 ax->d.type = AUDIT_EXECVE; 1892 ax->d.type = AUDIT_EXECVE;
1851 ax->d.next = context->aux; 1893 ax->d.next = context->aux;
1852 context->aux = (void *)ax; 1894 context->aux = (void *)ax;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 208cf3497c..181ae70860 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu)
103 write_unlock_irq(&tasklist_lock); 103 write_unlock_irq(&tasklist_lock);
104} 104}
105 105
106struct take_cpu_down_param {
107 unsigned long mod;
108 void *hcpu;
109};
110
106/* Take this CPU down. */ 111/* Take this CPU down. */
107static int take_cpu_down(void *unused) 112static int take_cpu_down(void *_param)
108{ 113{
114 struct take_cpu_down_param *param = _param;
109 int err; 115 int err;
110 116
117 raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
118 param->hcpu);
111 /* Ensure this CPU doesn't handle any more interrupts. */ 119 /* Ensure this CPU doesn't handle any more interrupts. */
112 err = __cpu_disable(); 120 err = __cpu_disable();
113 if (err < 0) 121 if (err < 0)
@@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
127 cpumask_t old_allowed, tmp; 135 cpumask_t old_allowed, tmp;
128 void *hcpu = (void *)(long)cpu; 136 void *hcpu = (void *)(long)cpu;
129 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; 137 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
138 struct take_cpu_down_param tcd_param = {
139 .mod = mod,
140 .hcpu = hcpu,
141 };
130 142
131 if (num_online_cpus() == 1) 143 if (num_online_cpus() == 1)
132 return -EBUSY; 144 return -EBUSY;
@@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
153 set_cpus_allowed(current, tmp); 165 set_cpus_allowed(current, tmp);
154 166
155 mutex_lock(&cpu_bitmask_lock); 167 mutex_lock(&cpu_bitmask_lock);
156 p = __stop_machine_run(take_cpu_down, NULL, cpu); 168 p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
157 mutex_unlock(&cpu_bitmask_lock); 169 mutex_unlock(&cpu_bitmask_lock);
158 170
159 if (IS_ERR(p) || cpu_online(cpu)) { 171 if (IS_ERR(p) || cpu_online(cpu)) {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 824b1c01f4..57e6448b17 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -516,7 +516,7 @@ static void cpuset_release_agent(const char *pathbuf)
516 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 516 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
517 envp[i] = NULL; 517 envp[i] = NULL;
518 518
519 call_usermodehelper(argv[0], argv, envp, 0); 519 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
520 kfree(pathbuf); 520 kfree(pathbuf);
521} 521}
522 522
@@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void)
2138static int cpuset_handle_cpuhp(struct notifier_block *nb, 2138static int cpuset_handle_cpuhp(struct notifier_block *nb,
2139 unsigned long phase, void *cpu) 2139 unsigned long phase, void *cpu)
2140{ 2140{
2141 if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
2142 return NOTIFY_DONE;
2143
2141 common_cpu_mem_hotplug_unplug(); 2144 common_cpu_mem_hotplug_unplug();
2142 return 0; 2145 return 0;
2143} 2146}
diff --git a/kernel/exit.c b/kernel/exit.c
index 57626692cd..464c2b172f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -31,6 +31,7 @@
31#include <linux/mempolicy.h> 31#include <linux/mempolicy.h>
32#include <linux/taskstats_kern.h> 32#include <linux/taskstats_kern.h>
33#include <linux/delayacct.h> 33#include <linux/delayacct.h>
34#include <linux/freezer.h>
34#include <linux/cpuset.h> 35#include <linux/cpuset.h>
35#include <linux/syscalls.h> 36#include <linux/syscalls.h>
36#include <linux/signal.h> 37#include <linux/signal.h>
@@ -44,6 +45,7 @@
44#include <linux/resource.h> 45#include <linux/resource.h>
45#include <linux/blkdev.h> 46#include <linux/blkdev.h>
46#include <linux/task_io_accounting_ops.h> 47#include <linux/task_io_accounting_ops.h>
48#include <linux/freezer.h>
47 49
48#include <asm/uaccess.h> 50#include <asm/uaccess.h>
49#include <asm/unistd.h> 51#include <asm/unistd.h>
@@ -387,6 +389,11 @@ void daemonize(const char *name, ...)
387 * they would be locked into memory. 389 * they would be locked into memory.
388 */ 390 */
389 exit_mm(current); 391 exit_mm(current);
392 /*
393 * We don't want to have TIF_FREEZE set if the system-wide hibernation
394 * or suspend transition begins right now.
395 */
396 current->flags |= PF_NOFREEZE;
390 397
391 set_special_pids(1, 1); 398 set_special_pids(1, 1);
392 proc_clear_tty(current); 399 proc_clear_tty(current);
@@ -588,6 +595,8 @@ static void exit_mm(struct task_struct * tsk)
588 tsk->mm = NULL; 595 tsk->mm = NULL;
589 up_read(&mm->mmap_sem); 596 up_read(&mm->mmap_sem);
590 enter_lazy_tlb(mm, current); 597 enter_lazy_tlb(mm, current);
598 /* We don't want this task to be frozen prematurely */
599 clear_freeze_flag(tsk);
591 task_unlock(tsk); 600 task_unlock(tsk);
592 mmput(mm); 601 mmput(mm);
593} 602}
diff --git a/kernel/fork.c b/kernel/fork.c
index 7c5c5888e0..4698389982 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -334,6 +334,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
334 atomic_set(&mm->mm_count, 1); 334 atomic_set(&mm->mm_count, 1);
335 init_rwsem(&mm->mmap_sem); 335 init_rwsem(&mm->mmap_sem);
336 INIT_LIST_HEAD(&mm->mmlist); 336 INIT_LIST_HEAD(&mm->mmlist);
337 mm->flags = (current->mm) ? current->mm->flags
338 : MMF_DUMP_FILTER_DEFAULT;
337 mm->core_waiters = 0; 339 mm->core_waiters = 0;
338 mm->nr_ptes = 0; 340 mm->nr_ptes = 0;
339 set_mm_counter(mm, file_rss, 0); 341 set_mm_counter(mm, file_rss, 0);
@@ -923,7 +925,7 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
923{ 925{
924 unsigned long new_flags = p->flags; 926 unsigned long new_flags = p->flags;
925 927
926 new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); 928 new_flags &= ~PF_SUPERPRIV;
927 new_flags |= PF_FORKNOEXEC; 929 new_flags |= PF_FORKNOEXEC;
928 if (!(clone_flags & CLONE_PTRACE)) 930 if (!(clone_flags & CLONE_PTRACE))
929 p->ptrace = 0; 931 p->ptrace = 0;
diff --git a/kernel/futex.c b/kernel/futex.c
index 5c3f45d07c..a12425051e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -346,15 +346,20 @@ static int futex_handle_fault(unsigned long address,
346 vma = find_vma(mm, address); 346 vma = find_vma(mm, address);
347 if (vma && address >= vma->vm_start && 347 if (vma && address >= vma->vm_start &&
348 (vma->vm_flags & VM_WRITE)) { 348 (vma->vm_flags & VM_WRITE)) {
349 switch (handle_mm_fault(mm, vma, address, 1)) { 349 int fault;
350 case VM_FAULT_MINOR: 350 fault = handle_mm_fault(mm, vma, address, 1);
351 ret = 0; 351 if (unlikely((fault & VM_FAULT_ERROR))) {
352 current->min_flt++; 352#if 0
353 break; 353 /* XXX: let's do this when we verify it is OK */
354 case VM_FAULT_MAJOR: 354 if (ret & VM_FAULT_OOM)
355 ret = -ENOMEM;
356#endif
357 } else {
355 ret = 0; 358 ret = 0;
356 current->maj_flt++; 359 if (fault & VM_FAULT_MAJOR)
357 break; 360 current->maj_flt++;
361 else
362 current->min_flt++;
358 } 363 }
359 } 364 }
360 if (!fshared) 365 if (!fshared)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 23c03f43e1..72d034258b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1406,7 +1406,7 @@ static void migrate_hrtimers(int cpu)
1406static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, 1406static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1407 unsigned long action, void *hcpu) 1407 unsigned long action, void *hcpu)
1408{ 1408{
1409 long cpu = (long)hcpu; 1409 unsigned int cpu = (long)hcpu;
1410 1410
1411 switch (action) { 1411 switch (action) {
1412 1412
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 0d662475dd..474219a419 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -152,7 +152,7 @@ static unsigned int get_symbol_offset(unsigned long pos)
152/* Lookup the address for this symbol. Returns 0 if not found. */ 152/* Lookup the address for this symbol. Returns 0 if not found. */
153unsigned long kallsyms_lookup_name(const char *name) 153unsigned long kallsyms_lookup_name(const char *name)
154{ 154{
155 char namebuf[KSYM_NAME_LEN+1]; 155 char namebuf[KSYM_NAME_LEN];
156 unsigned long i; 156 unsigned long i;
157 unsigned int off; 157 unsigned int off;
158 158
@@ -248,7 +248,7 @@ const char *kallsyms_lookup(unsigned long addr,
248{ 248{
249 const char *msym; 249 const char *msym;
250 250
251 namebuf[KSYM_NAME_LEN] = 0; 251 namebuf[KSYM_NAME_LEN - 1] = 0;
252 namebuf[0] = 0; 252 namebuf[0] = 0;
253 253
254 if (is_ksym_addr(addr)) { 254 if (is_ksym_addr(addr)) {
@@ -265,7 +265,7 @@ const char *kallsyms_lookup(unsigned long addr,
265 /* see if it's in a module */ 265 /* see if it's in a module */
266 msym = module_address_lookup(addr, symbolsize, offset, modname); 266 msym = module_address_lookup(addr, symbolsize, offset, modname);
267 if (msym) 267 if (msym)
268 return strncpy(namebuf, msym, KSYM_NAME_LEN); 268 return strncpy(namebuf, msym, KSYM_NAME_LEN - 1);
269 269
270 return NULL; 270 return NULL;
271} 271}
@@ -273,7 +273,7 @@ const char *kallsyms_lookup(unsigned long addr,
273int lookup_symbol_name(unsigned long addr, char *symname) 273int lookup_symbol_name(unsigned long addr, char *symname)
274{ 274{
275 symname[0] = '\0'; 275 symname[0] = '\0';
276 symname[KSYM_NAME_LEN] = '\0'; 276 symname[KSYM_NAME_LEN - 1] = '\0';
277 277
278 if (is_ksym_addr(addr)) { 278 if (is_ksym_addr(addr)) {
279 unsigned long pos; 279 unsigned long pos;
@@ -291,7 +291,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
291 unsigned long *offset, char *modname, char *name) 291 unsigned long *offset, char *modname, char *name)
292{ 292{
293 name[0] = '\0'; 293 name[0] = '\0';
294 name[KSYM_NAME_LEN] = '\0'; 294 name[KSYM_NAME_LEN - 1] = '\0';
295 295
296 if (is_ksym_addr(addr)) { 296 if (is_ksym_addr(addr)) {
297 unsigned long pos; 297 unsigned long pos;
@@ -312,7 +312,7 @@ int sprint_symbol(char *buffer, unsigned long address)
312 char *modname; 312 char *modname;
313 const char *name; 313 const char *name;
314 unsigned long offset, size; 314 unsigned long offset, size;
315 char namebuf[KSYM_NAME_LEN+1]; 315 char namebuf[KSYM_NAME_LEN];
316 316
317 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); 317 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
318 if (!name) 318 if (!name)
@@ -342,8 +342,8 @@ struct kallsym_iter
342 unsigned long value; 342 unsigned long value;
343 unsigned int nameoff; /* If iterating in core kernel symbols */ 343 unsigned int nameoff; /* If iterating in core kernel symbols */
344 char type; 344 char type;
345 char name[KSYM_NAME_LEN+1]; 345 char name[KSYM_NAME_LEN];
346 char module_name[MODULE_NAME_LEN + 1]; 346 char module_name[MODULE_NAME_LEN];
347 int exported; 347 int exported;
348}; 348};
349 349
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 4d32eb0771..beedbdc646 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -33,6 +33,8 @@
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/resource.h> 35#include <linux/resource.h>
36#include <linux/notifier.h>
37#include <linux/suspend.h>
36#include <asm/uaccess.h> 38#include <asm/uaccess.h>
37 39
38extern int max_threads; 40extern int max_threads;
@@ -119,9 +121,10 @@ struct subprocess_info {
119 char **argv; 121 char **argv;
120 char **envp; 122 char **envp;
121 struct key *ring; 123 struct key *ring;
122 int wait; 124 enum umh_wait wait;
123 int retval; 125 int retval;
124 struct file *stdin; 126 struct file *stdin;
127 void (*cleanup)(char **argv, char **envp);
125}; 128};
126 129
127/* 130/*
@@ -180,6 +183,14 @@ static int ____call_usermodehelper(void *data)
180 do_exit(0); 183 do_exit(0);
181} 184}
182 185
186void call_usermodehelper_freeinfo(struct subprocess_info *info)
187{
188 if (info->cleanup)
189 (*info->cleanup)(info->argv, info->envp);
190 kfree(info);
191}
192EXPORT_SYMBOL(call_usermodehelper_freeinfo);
193
183/* Keventd can't block, but this (a child) can. */ 194/* Keventd can't block, but this (a child) can. */
184static int wait_for_helper(void *data) 195static int wait_for_helper(void *data)
185{ 196{
@@ -216,8 +227,8 @@ static int wait_for_helper(void *data)
216 sub_info->retval = ret; 227 sub_info->retval = ret;
217 } 228 }
218 229
219 if (sub_info->wait < 0) 230 if (sub_info->wait == UMH_NO_WAIT)
220 kfree(sub_info); 231 call_usermodehelper_freeinfo(sub_info);
221 else 232 else
222 complete(sub_info->complete); 233 complete(sub_info->complete);
223 return 0; 234 return 0;
@@ -229,34 +240,204 @@ static void __call_usermodehelper(struct work_struct *work)
229 struct subprocess_info *sub_info = 240 struct subprocess_info *sub_info =
230 container_of(work, struct subprocess_info, work); 241 container_of(work, struct subprocess_info, work);
231 pid_t pid; 242 pid_t pid;
232 int wait = sub_info->wait; 243 enum umh_wait wait = sub_info->wait;
233 244
234 /* CLONE_VFORK: wait until the usermode helper has execve'd 245 /* CLONE_VFORK: wait until the usermode helper has execve'd
235 * successfully We need the data structures to stay around 246 * successfully We need the data structures to stay around
236 * until that is done. */ 247 * until that is done. */
237 if (wait) 248 if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT)
238 pid = kernel_thread(wait_for_helper, sub_info, 249 pid = kernel_thread(wait_for_helper, sub_info,
239 CLONE_FS | CLONE_FILES | SIGCHLD); 250 CLONE_FS | CLONE_FILES | SIGCHLD);
240 else 251 else
241 pid = kernel_thread(____call_usermodehelper, sub_info, 252 pid = kernel_thread(____call_usermodehelper, sub_info,
242 CLONE_VFORK | SIGCHLD); 253 CLONE_VFORK | SIGCHLD);
243 254
244 if (wait < 0) 255 switch (wait) {
245 return; 256 case UMH_NO_WAIT:
257 break;
246 258
247 if (pid < 0) { 259 case UMH_WAIT_PROC:
260 if (pid > 0)
261 break;
248 sub_info->retval = pid; 262 sub_info->retval = pid;
263 /* FALLTHROUGH */
264
265 case UMH_WAIT_EXEC:
249 complete(sub_info->complete); 266 complete(sub_info->complete);
250 } else if (!wait) 267 }
251 complete(sub_info->complete); 268}
269
270#ifdef CONFIG_PM
271/*
272 * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
273 * (used for preventing user land processes from being created after the user
274 * land has been frozen during a system-wide hibernation or suspend operation).
275 */
276static int usermodehelper_disabled;
277
278/* Number of helpers running */
279static atomic_t running_helpers = ATOMIC_INIT(0);
280
281/*
282 * Wait queue head used by usermodehelper_pm_callback() to wait for all running
283 * helpers to finish.
284 */
285static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
286
287/*
288 * Time to wait for running_helpers to become zero before the setting of
289 * usermodehelper_disabled in usermodehelper_pm_callback() fails
290 */
291#define RUNNING_HELPERS_TIMEOUT (5 * HZ)
292
293static int usermodehelper_pm_callback(struct notifier_block *nfb,
294 unsigned long action,
295 void *ignored)
296{
297 long retval;
298
299 switch (action) {
300 case PM_HIBERNATION_PREPARE:
301 case PM_SUSPEND_PREPARE:
302 usermodehelper_disabled = 1;
303 smp_mb();
304 /*
305 * From now on call_usermodehelper_exec() won't start any new
306 * helpers, so it is sufficient if running_helpers turns out to
307 * be zero at one point (it may be increased later, but that
308 * doesn't matter).
309 */
310 retval = wait_event_timeout(running_helpers_waitq,
311 atomic_read(&running_helpers) == 0,
312 RUNNING_HELPERS_TIMEOUT);
313 if (retval) {
314 return NOTIFY_OK;
315 } else {
316 usermodehelper_disabled = 0;
317 return NOTIFY_BAD;
318 }
319 case PM_POST_HIBERNATION:
320 case PM_POST_SUSPEND:
321 usermodehelper_disabled = 0;
322 return NOTIFY_OK;
323 }
324
325 return NOTIFY_DONE;
326}
327
328static void helper_lock(void)
329{
330 atomic_inc(&running_helpers);
331 smp_mb__after_atomic_inc();
332}
333
334static void helper_unlock(void)
335{
336 if (atomic_dec_and_test(&running_helpers))
337 wake_up(&running_helpers_waitq);
338}
339
340static void register_pm_notifier_callback(void)
341{
342 pm_notifier(usermodehelper_pm_callback, 0);
252} 343}
344#else /* CONFIG_PM */
345#define usermodehelper_disabled 0
346
347static inline void helper_lock(void) {}
348static inline void helper_unlock(void) {}
349static inline void register_pm_notifier_callback(void) {}
350#endif /* CONFIG_PM */
253 351
254/** 352/**
255 * call_usermodehelper_keys - start a usermode application 353 * call_usermodehelper_setup - prepare to call a usermode helper
256 * @path: pathname for the application 354 * @path - path to usermode executable
257 * @argv: null-terminated argument list 355 * @argv - arg vector for process
258 * @envp: null-terminated environment list 356 * @envp - environment for process
259 * @session_keyring: session keyring for process (NULL for an empty keyring) 357 *
358 * Returns either NULL on allocation failure, or a subprocess_info
359 * structure. This should be passed to call_usermodehelper_exec to
360 * exec the process and free the structure.
361 */
362struct subprocess_info *call_usermodehelper_setup(char *path,
363 char **argv, char **envp)
364{
365 struct subprocess_info *sub_info;
366 sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC);
367 if (!sub_info)
368 goto out;
369
370 INIT_WORK(&sub_info->work, __call_usermodehelper);
371 sub_info->path = path;
372 sub_info->argv = argv;
373 sub_info->envp = envp;
374
375 out:
376 return sub_info;
377}
378EXPORT_SYMBOL(call_usermodehelper_setup);
379
380/**
381 * call_usermodehelper_setkeys - set the session keys for usermode helper
382 * @info: a subprocess_info returned by call_usermodehelper_setup
383 * @session_keyring: the session keyring for the process
384 */
385void call_usermodehelper_setkeys(struct subprocess_info *info,
386 struct key *session_keyring)
387{
388 info->ring = session_keyring;
389}
390EXPORT_SYMBOL(call_usermodehelper_setkeys);
391
392/**
393 * call_usermodehelper_setcleanup - set a cleanup function
394 * @info: a subprocess_info returned by call_usermodehelper_setup
395 * @cleanup: a cleanup function
396 *
397 * The cleanup function is just befor ethe subprocess_info is about to
398 * be freed. This can be used for freeing the argv and envp. The
399 * Function must be runnable in either a process context or the
400 * context in which call_usermodehelper_exec is called.
401 */
402void call_usermodehelper_setcleanup(struct subprocess_info *info,
403 void (*cleanup)(char **argv, char **envp))
404{
405 info->cleanup = cleanup;
406}
407EXPORT_SYMBOL(call_usermodehelper_setcleanup);
408
409/**
410 * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin
411 * @sub_info: a subprocess_info returned by call_usermodehelper_setup
412 * @filp: set to the write-end of a pipe
413 *
414 * This constructs a pipe, and sets the read end to be the stdin of the
415 * subprocess, and returns the write-end in *@filp.
416 */
417int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
418 struct file **filp)
419{
420 struct file *f;
421
422 f = create_write_pipe();
423 if (IS_ERR(f))
424 return PTR_ERR(f);
425 *filp = f;
426
427 f = create_read_pipe(f);
428 if (IS_ERR(f)) {
429 free_write_pipe(*filp);
430 return PTR_ERR(f);
431 }
432 sub_info->stdin = f;
433
434 return 0;
435}
436EXPORT_SYMBOL(call_usermodehelper_stdinpipe);
437
438/**
439 * call_usermodehelper_exec - start a usermode application
440 * @sub_info: information about the subprocessa
260 * @wait: wait for the application to finish and return status. 441 * @wait: wait for the application to finish and return status.
261 * when -1 don't wait at all, but you get no useful error back when 442 * when -1 don't wait at all, but you get no useful error back when
262 * the program couldn't be exec'ed. This makes it safe to call 443 * the program couldn't be exec'ed. This makes it safe to call
@@ -265,81 +446,70 @@ static void __call_usermodehelper(struct work_struct *work)
265 * Runs a user-space application. The application is started 446 * Runs a user-space application. The application is started
266 * asynchronously if wait is not set, and runs as a child of keventd. 447 * asynchronously if wait is not set, and runs as a child of keventd.
267 * (ie. it runs with full root capabilities). 448 * (ie. it runs with full root capabilities).
268 *
269 * Must be called from process context. Returns a negative error code
270 * if program was not execed successfully, or 0.
271 */ 449 */
272int call_usermodehelper_keys(char *path, char **argv, char **envp, 450int call_usermodehelper_exec(struct subprocess_info *sub_info,
273 struct key *session_keyring, int wait) 451 enum umh_wait wait)
274{ 452{
275 DECLARE_COMPLETION_ONSTACK(done); 453 DECLARE_COMPLETION_ONSTACK(done);
276 struct subprocess_info *sub_info;
277 int retval; 454 int retval;
278 455
279 if (!khelper_wq) 456 helper_lock();
280 return -EBUSY; 457 if (sub_info->path[0] == '\0') {
281 458 retval = 0;
282 if (path[0] == '\0') 459 goto out;
283 return 0; 460 }
284 461
285 sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); 462 if (!khelper_wq || usermodehelper_disabled) {
286 if (!sub_info) 463 retval = -EBUSY;
287 return -ENOMEM; 464 goto out;
465 }
288 466
289 INIT_WORK(&sub_info->work, __call_usermodehelper);
290 sub_info->complete = &done; 467 sub_info->complete = &done;
291 sub_info->path = path;
292 sub_info->argv = argv;
293 sub_info->envp = envp;
294 sub_info->ring = session_keyring;
295 sub_info->wait = wait; 468 sub_info->wait = wait;
296 469
297 queue_work(khelper_wq, &sub_info->work); 470 queue_work(khelper_wq, &sub_info->work);
298 if (wait < 0) /* task has freed sub_info */ 471 if (wait == UMH_NO_WAIT) /* task has freed sub_info */
299 return 0; 472 return 0;
300 wait_for_completion(&done); 473 wait_for_completion(&done);
301 retval = sub_info->retval; 474 retval = sub_info->retval;
302 kfree(sub_info); 475
476 out:
477 call_usermodehelper_freeinfo(sub_info);
478 helper_unlock();
303 return retval; 479 return retval;
304} 480}
305EXPORT_SYMBOL(call_usermodehelper_keys); 481EXPORT_SYMBOL(call_usermodehelper_exec);
306 482
483/**
484 * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin
485 * @path: path to usermode executable
486 * @argv: arg vector for process
487 * @envp: environment for process
488 * @filp: set to the write-end of a pipe
489 *
490 * This is a simple wrapper which executes a usermode-helper function
491 * with a pipe as stdin. It is implemented entirely in terms of
492 * lower-level call_usermodehelper_* functions.
493 */
307int call_usermodehelper_pipe(char *path, char **argv, char **envp, 494int call_usermodehelper_pipe(char *path, char **argv, char **envp,
308 struct file **filp) 495 struct file **filp)
309{ 496{
310 DECLARE_COMPLETION(done); 497 struct subprocess_info *sub_info;
311 struct subprocess_info sub_info = { 498 int ret;
312 .work = __WORK_INITIALIZER(sub_info.work,
313 __call_usermodehelper),
314 .complete = &done,
315 .path = path,
316 .argv = argv,
317 .envp = envp,
318 .retval = 0,
319 };
320 struct file *f;
321
322 if (!khelper_wq)
323 return -EBUSY;
324 499
325 if (path[0] == '\0') 500 sub_info = call_usermodehelper_setup(path, argv, envp);
326 return 0; 501 if (sub_info == NULL)
502 return -ENOMEM;
327 503
328 f = create_write_pipe(); 504 ret = call_usermodehelper_stdinpipe(sub_info, filp);
329 if (IS_ERR(f)) 505 if (ret < 0)
330 return PTR_ERR(f); 506 goto out;
331 *filp = f;
332 507
333 f = create_read_pipe(f); 508 return call_usermodehelper_exec(sub_info, 1);
334 if (IS_ERR(f)) {
335 free_write_pipe(*filp);
336 return PTR_ERR(f);
337 }
338 sub_info.stdin = f;
339 509
340 queue_work(khelper_wq, &sub_info.work); 510 out:
341 wait_for_completion(&done); 511 call_usermodehelper_freeinfo(sub_info);
342 return sub_info.retval; 512 return ret;
343} 513}
344EXPORT_SYMBOL(call_usermodehelper_pipe); 514EXPORT_SYMBOL(call_usermodehelper_pipe);
345 515
@@ -347,4 +517,5 @@ void __init usermodehelper_init(void)
347{ 517{
348 khelper_wq = create_singlethread_workqueue("khelper"); 518 khelper_wq = create_singlethread_workqueue("khelper");
349 BUG_ON(!khelper_wq); 519 BUG_ON(!khelper_wq);
520 register_pm_notifier_callback();
350} 521}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9e47d8c493..3e9f513a72 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -675,9 +675,18 @@ static struct notifier_block kprobe_exceptions_nb = {
675 .priority = 0x7fffffff /* we need to be notified first */ 675 .priority = 0x7fffffff /* we need to be notified first */
676}; 676};
677 677
678unsigned long __weak arch_deref_entry_point(void *entry)
679{
680 return (unsigned long)entry;
681}
678 682
679int __kprobes register_jprobe(struct jprobe *jp) 683int __kprobes register_jprobe(struct jprobe *jp)
680{ 684{
685 unsigned long addr = arch_deref_entry_point(jp->entry);
686
687 if (!kernel_text_address(addr))
688 return -EINVAL;
689
681 /* Todo: Verify probepoint is a function entry point */ 690 /* Todo: Verify probepoint is a function entry point */
682 jp->kp.pre_handler = setjmp_pre_handler; 691 jp->kp.pre_handler = setjmp_pre_handler;
683 jp->kp.break_handler = longjmp_break_handler; 692 jp->kp.break_handler = longjmp_break_handler;
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 559deca5ed..2565e1b6dd 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -62,6 +62,28 @@ static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page)
62KERNEL_ATTR_RO(kexec_crash_loaded); 62KERNEL_ATTR_RO(kexec_crash_loaded);
63#endif /* CONFIG_KEXEC */ 63#endif /* CONFIG_KEXEC */
64 64
65/*
66 * Make /sys/kernel/notes give the raw contents of our kernel .notes section.
67 */
68extern const char __start_notes __attribute__((weak));
69extern const char __stop_notes __attribute__((weak));
70#define notes_size (&__stop_notes - &__start_notes)
71
72static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr,
73 char *buf, loff_t off, size_t count)
74{
75 memcpy(buf, &__start_notes + off, count);
76 return count;
77}
78
79static struct bin_attribute notes_attr = {
80 .attr = {
81 .name = "notes",
82 .mode = S_IRUGO,
83 },
84 .read = &notes_read,
85};
86
65decl_subsys(kernel, NULL, NULL); 87decl_subsys(kernel, NULL, NULL);
66EXPORT_SYMBOL_GPL(kernel_subsys); 88EXPORT_SYMBOL_GPL(kernel_subsys);
67 89
@@ -88,6 +110,12 @@ static int __init ksysfs_init(void)
88 error = sysfs_create_group(&kernel_subsys.kobj, 110 error = sysfs_create_group(&kernel_subsys.kobj,
89 &kernel_attr_group); 111 &kernel_attr_group);
90 112
113 if (!error && notes_size > 0) {
114 notes_attr.size = notes_size;
115 error = sysfs_create_bin_file(&kernel_subsys.kobj,
116 &notes_attr);
117 }
118
91 return error; 119 return error;
92} 120}
93 121
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 1a5ff2211d..734da579ad 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -5,7 +5,8 @@
5 * 5 *
6 * Started by Ingo Molnar: 6 * Started by Ingo Molnar:
7 * 7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * 10 *
10 * this code maps all the lock dependencies as they occur in a live kernel 11 * this code maps all the lock dependencies as they occur in a live kernel
11 * and will warn about the following classes of locking bugs: 12 * and will warn about the following classes of locking bugs:
@@ -37,11 +38,26 @@
37#include <linux/debug_locks.h> 38#include <linux/debug_locks.h>
38#include <linux/irqflags.h> 39#include <linux/irqflags.h>
39#include <linux/utsname.h> 40#include <linux/utsname.h>
41#include <linux/hash.h>
40 42
41#include <asm/sections.h> 43#include <asm/sections.h>
42 44
43#include "lockdep_internals.h" 45#include "lockdep_internals.h"
44 46
47#ifdef CONFIG_PROVE_LOCKING
48int prove_locking = 1;
49module_param(prove_locking, int, 0644);
50#else
51#define prove_locking 0
52#endif
53
54#ifdef CONFIG_LOCK_STAT
55int lock_stat = 1;
56module_param(lock_stat, int, 0644);
57#else
58#define lock_stat 0
59#endif
60
45/* 61/*
46 * lockdep_lock: protects the lockdep graph, the hashes and the 62 * lockdep_lock: protects the lockdep graph, the hashes and the
47 * class/list/hash allocators. 63 * class/list/hash allocators.
@@ -96,23 +112,6 @@ unsigned long nr_list_entries;
96static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; 112static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
97 113
98/* 114/*
99 * Allocate a lockdep entry. (assumes the graph_lock held, returns
100 * with NULL on failure)
101 */
102static struct lock_list *alloc_list_entry(void)
103{
104 if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
105 if (!debug_locks_off_graph_unlock())
106 return NULL;
107
108 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
109 printk("turning off the locking correctness validator.\n");
110 return NULL;
111 }
112 return list_entries + nr_list_entries++;
113}
114
115/*
116 * All data structures here are protected by the global debug_lock. 115 * All data structures here are protected by the global debug_lock.
117 * 116 *
118 * Mutex key structs only get allocated, once during bootup, and never 117 * Mutex key structs only get allocated, once during bootup, and never
@@ -121,6 +120,117 @@ static struct lock_list *alloc_list_entry(void)
121unsigned long nr_lock_classes; 120unsigned long nr_lock_classes;
122static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; 121static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
123 122
123#ifdef CONFIG_LOCK_STAT
124static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
125
126static int lock_contention_point(struct lock_class *class, unsigned long ip)
127{
128 int i;
129
130 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
131 if (class->contention_point[i] == 0) {
132 class->contention_point[i] = ip;
133 break;
134 }
135 if (class->contention_point[i] == ip)
136 break;
137 }
138
139 return i;
140}
141
142static void lock_time_inc(struct lock_time *lt, s64 time)
143{
144 if (time > lt->max)
145 lt->max = time;
146
147 if (time < lt->min || !lt->min)
148 lt->min = time;
149
150 lt->total += time;
151 lt->nr++;
152}
153
154static inline void lock_time_add(struct lock_time *src, struct lock_time *dst)
155{
156 dst->min += src->min;
157 dst->max += src->max;
158 dst->total += src->total;
159 dst->nr += src->nr;
160}
161
162struct lock_class_stats lock_stats(struct lock_class *class)
163{
164 struct lock_class_stats stats;
165 int cpu, i;
166
167 memset(&stats, 0, sizeof(struct lock_class_stats));
168 for_each_possible_cpu(cpu) {
169 struct lock_class_stats *pcs =
170 &per_cpu(lock_stats, cpu)[class - lock_classes];
171
172 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
173 stats.contention_point[i] += pcs->contention_point[i];
174
175 lock_time_add(&pcs->read_waittime, &stats.read_waittime);
176 lock_time_add(&pcs->write_waittime, &stats.write_waittime);
177
178 lock_time_add(&pcs->read_holdtime, &stats.read_holdtime);
179 lock_time_add(&pcs->write_holdtime, &stats.write_holdtime);
180
181 for (i = 0; i < ARRAY_SIZE(stats.bounces); i++)
182 stats.bounces[i] += pcs->bounces[i];
183 }
184
185 return stats;
186}
187
188void clear_lock_stats(struct lock_class *class)
189{
190 int cpu;
191
192 for_each_possible_cpu(cpu) {
193 struct lock_class_stats *cpu_stats =
194 &per_cpu(lock_stats, cpu)[class - lock_classes];
195
196 memset(cpu_stats, 0, sizeof(struct lock_class_stats));
197 }
198 memset(class->contention_point, 0, sizeof(class->contention_point));
199}
200
201static struct lock_class_stats *get_lock_stats(struct lock_class *class)
202{
203 return &get_cpu_var(lock_stats)[class - lock_classes];
204}
205
206static void put_lock_stats(struct lock_class_stats *stats)
207{
208 put_cpu_var(lock_stats);
209}
210
211static void lock_release_holdtime(struct held_lock *hlock)
212{
213 struct lock_class_stats *stats;
214 s64 holdtime;
215
216 if (!lock_stat)
217 return;
218
219 holdtime = sched_clock() - hlock->holdtime_stamp;
220
221 stats = get_lock_stats(hlock->class);
222 if (hlock->read)
223 lock_time_inc(&stats->read_holdtime, holdtime);
224 else
225 lock_time_inc(&stats->write_holdtime, holdtime);
226 put_lock_stats(stats);
227}
228#else
229static inline void lock_release_holdtime(struct held_lock *hlock)
230{
231}
232#endif
233
124/* 234/*
125 * We keep a global list of all lock classes. The list only grows, 235 * We keep a global list of all lock classes. The list only grows,
126 * never shrinks. The list is only accessed with the lockdep 236 * never shrinks. The list is only accessed with the lockdep
@@ -133,24 +243,18 @@ LIST_HEAD(all_lock_classes);
133 */ 243 */
134#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) 244#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1)
135#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) 245#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS)
136#define CLASSHASH_MASK (CLASSHASH_SIZE - 1) 246#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS)
137#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK)
138#define classhashentry(key) (classhash_table + __classhashfn((key))) 247#define classhashentry(key) (classhash_table + __classhashfn((key)))
139 248
140static struct list_head classhash_table[CLASSHASH_SIZE]; 249static struct list_head classhash_table[CLASSHASH_SIZE];
141 250
142unsigned long nr_lock_chains;
143static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
144
145/* 251/*
146 * We put the lock dependency chains into a hash-table as well, to cache 252 * We put the lock dependency chains into a hash-table as well, to cache
147 * their existence: 253 * their existence:
148 */ 254 */
149#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) 255#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1)
150#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) 256#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS)
151#define CHAINHASH_MASK (CHAINHASH_SIZE - 1) 257#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS)
152#define __chainhashfn(chain) \
153 (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK)
154#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) 258#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
155 259
156static struct list_head chainhash_table[CHAINHASH_SIZE]; 260static struct list_head chainhash_table[CHAINHASH_SIZE];
@@ -223,26 +327,6 @@ static int verbose(struct lock_class *class)
223 return 0; 327 return 0;
224} 328}
225 329
226#ifdef CONFIG_TRACE_IRQFLAGS
227
228static int hardirq_verbose(struct lock_class *class)
229{
230#if HARDIRQ_VERBOSE
231 return class_filter(class);
232#endif
233 return 0;
234}
235
236static int softirq_verbose(struct lock_class *class)
237{
238#if SOFTIRQ_VERBOSE
239 return class_filter(class);
240#endif
241 return 0;
242}
243
244#endif
245
246/* 330/*
247 * Stack-trace: tightly packed array of stack backtrace 331 * Stack-trace: tightly packed array of stack backtrace
248 * addresses. Protected by the graph_lock. 332 * addresses. Protected by the graph_lock.
@@ -291,6 +375,11 @@ unsigned int max_recursion_depth;
291 * about it later on, in lockdep_info(). 375 * about it later on, in lockdep_info().
292 */ 376 */
293static int lockdep_init_error; 377static int lockdep_init_error;
378static unsigned long lockdep_init_trace_data[20];
379static struct stack_trace lockdep_init_trace = {
380 .max_entries = ARRAY_SIZE(lockdep_init_trace_data),
381 .entries = lockdep_init_trace_data,
382};
294 383
295/* 384/*
296 * Various lockdep statistics: 385 * Various lockdep statistics:
@@ -379,7 +468,7 @@ get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4
379 468
380static void print_lock_name(struct lock_class *class) 469static void print_lock_name(struct lock_class *class)
381{ 470{
382 char str[KSYM_NAME_LEN + 1], c1, c2, c3, c4; 471 char str[KSYM_NAME_LEN], c1, c2, c3, c4;
383 const char *name; 472 const char *name;
384 473
385 get_usage_chars(class, &c1, &c2, &c3, &c4); 474 get_usage_chars(class, &c1, &c2, &c3, &c4);
@@ -401,7 +490,7 @@ static void print_lock_name(struct lock_class *class)
401static void print_lockdep_cache(struct lockdep_map *lock) 490static void print_lockdep_cache(struct lockdep_map *lock)
402{ 491{
403 const char *name; 492 const char *name;
404 char str[KSYM_NAME_LEN + 1]; 493 char str[KSYM_NAME_LEN];
405 494
406 name = lock->name; 495 name = lock->name;
407 if (!name) 496 if (!name)
@@ -482,6 +571,262 @@ static void print_lock_dependencies(struct lock_class *class, int depth)
482 } 571 }
483} 572}
484 573
574static void print_kernel_version(void)
575{
576 printk("%s %.*s\n", init_utsname()->release,
577 (int)strcspn(init_utsname()->version, " "),
578 init_utsname()->version);
579}
580
581static int very_verbose(struct lock_class *class)
582{
583#if VERY_VERBOSE
584 return class_filter(class);
585#endif
586 return 0;
587}
588
589/*
590 * Is this the address of a static object:
591 */
592static int static_obj(void *obj)
593{
594 unsigned long start = (unsigned long) &_stext,
595 end = (unsigned long) &_end,
596 addr = (unsigned long) obj;
597#ifdef CONFIG_SMP
598 int i;
599#endif
600
601 /*
602 * static variable?
603 */
604 if ((addr >= start) && (addr < end))
605 return 1;
606
607#ifdef CONFIG_SMP
608 /*
609 * percpu var?
610 */
611 for_each_possible_cpu(i) {
612 start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
613 end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM
614 + per_cpu_offset(i);
615
616 if ((addr >= start) && (addr < end))
617 return 1;
618 }
619#endif
620
621 /*
622 * module var?
623 */
624 return is_module_address(addr);
625}
626
627/*
628 * To make lock name printouts unique, we calculate a unique
629 * class->name_version generation counter:
630 */
631static int count_matching_names(struct lock_class *new_class)
632{
633 struct lock_class *class;
634 int count = 0;
635
636 if (!new_class->name)
637 return 0;
638
639 list_for_each_entry(class, &all_lock_classes, lock_entry) {
640 if (new_class->key - new_class->subclass == class->key)
641 return class->name_version;
642 if (class->name && !strcmp(class->name, new_class->name))
643 count = max(count, class->name_version);
644 }
645
646 return count + 1;
647}
648
649/*
650 * Register a lock's class in the hash-table, if the class is not present
651 * yet. Otherwise we look it up. We cache the result in the lock object
652 * itself, so actual lookup of the hash should be once per lock object.
653 */
654static inline struct lock_class *
655look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
656{
657 struct lockdep_subclass_key *key;
658 struct list_head *hash_head;
659 struct lock_class *class;
660
661#ifdef CONFIG_DEBUG_LOCKDEP
662 /*
663 * If the architecture calls into lockdep before initializing
664 * the hashes then we'll warn about it later. (we cannot printk
665 * right now)
666 */
667 if (unlikely(!lockdep_initialized)) {
668 lockdep_init();
669 lockdep_init_error = 1;
670 save_stack_trace(&lockdep_init_trace);
671 }
672#endif
673
674 /*
675 * Static locks do not have their class-keys yet - for them the key
676 * is the lock object itself:
677 */
678 if (unlikely(!lock->key))
679 lock->key = (void *)lock;
680
681 /*
682 * NOTE: the class-key must be unique. For dynamic locks, a static
683 * lock_class_key variable is passed in through the mutex_init()
684 * (or spin_lock_init()) call - which acts as the key. For static
685 * locks we use the lock object itself as the key.
686 */
687 BUILD_BUG_ON(sizeof(struct lock_class_key) >
688 sizeof(struct lockdep_map));
689
690 key = lock->key->subkeys + subclass;
691
692 hash_head = classhashentry(key);
693
694 /*
695 * We can walk the hash lockfree, because the hash only
696 * grows, and we are careful when adding entries to the end:
697 */
698 list_for_each_entry(class, hash_head, hash_entry) {
699 if (class->key == key) {
700 WARN_ON_ONCE(class->name != lock->name);
701 return class;
702 }
703 }
704
705 return NULL;
706}
707
708/*
709 * Register a lock's class in the hash-table, if the class is not present
710 * yet. Otherwise we look it up. We cache the result in the lock object
711 * itself, so actual lookup of the hash should be once per lock object.
712 */
713static inline struct lock_class *
714register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
715{
716 struct lockdep_subclass_key *key;
717 struct list_head *hash_head;
718 struct lock_class *class;
719 unsigned long flags;
720
721 class = look_up_lock_class(lock, subclass);
722 if (likely(class))
723 return class;
724
725 /*
726 * Debug-check: all keys must be persistent!
727 */
728 if (!static_obj(lock->key)) {
729 debug_locks_off();
730 printk("INFO: trying to register non-static key.\n");
731 printk("the code is fine but needs lockdep annotation.\n");
732 printk("turning off the locking correctness validator.\n");
733 dump_stack();
734
735 return NULL;
736 }
737
738 key = lock->key->subkeys + subclass;
739 hash_head = classhashentry(key);
740
741 raw_local_irq_save(flags);
742 if (!graph_lock()) {
743 raw_local_irq_restore(flags);
744 return NULL;
745 }
746 /*
747 * We have to do the hash-walk again, to avoid races
748 * with another CPU:
749 */
750 list_for_each_entry(class, hash_head, hash_entry)
751 if (class->key == key)
752 goto out_unlock_set;
753 /*
754 * Allocate a new key from the static array, and add it to
755 * the hash:
756 */
757 if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
758 if (!debug_locks_off_graph_unlock()) {
759 raw_local_irq_restore(flags);
760 return NULL;
761 }
762 raw_local_irq_restore(flags);
763
764 printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
765 printk("turning off the locking correctness validator.\n");
766 return NULL;
767 }
768 class = lock_classes + nr_lock_classes++;
769 debug_atomic_inc(&nr_unused_locks);
770 class->key = key;
771 class->name = lock->name;
772 class->subclass = subclass;
773 INIT_LIST_HEAD(&class->lock_entry);
774 INIT_LIST_HEAD(&class->locks_before);
775 INIT_LIST_HEAD(&class->locks_after);
776 class->name_version = count_matching_names(class);
777 /*
778 * We use RCU's safe list-add method to make
779 * parallel walking of the hash-list safe:
780 */
781 list_add_tail_rcu(&class->hash_entry, hash_head);
782
783 if (verbose(class)) {
784 graph_unlock();
785 raw_local_irq_restore(flags);
786
787 printk("\nnew class %p: %s", class->key, class->name);
788 if (class->name_version > 1)
789 printk("#%d", class->name_version);
790 printk("\n");
791 dump_stack();
792
793 raw_local_irq_save(flags);
794 if (!graph_lock()) {
795 raw_local_irq_restore(flags);
796 return NULL;
797 }
798 }
799out_unlock_set:
800 graph_unlock();
801 raw_local_irq_restore(flags);
802
803 if (!subclass || force)
804 lock->class_cache = class;
805
806 if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass))
807 return NULL;
808
809 return class;
810}
811
812#ifdef CONFIG_PROVE_LOCKING
813/*
814 * Allocate a lockdep entry. (assumes the graph_lock held, returns
815 * with NULL on failure)
816 */
817static struct lock_list *alloc_list_entry(void)
818{
819 if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
820 if (!debug_locks_off_graph_unlock())
821 return NULL;
822
823 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
824 printk("turning off the locking correctness validator.\n");
825 return NULL;
826 }
827 return list_entries + nr_list_entries++;
828}
829
485/* 830/*
486 * Add a new dependency to the head of the list: 831 * Add a new dependency to the head of the list:
487 */ 832 */
@@ -542,13 +887,6 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth)
542 return 0; 887 return 0;
543} 888}
544 889
545static void print_kernel_version(void)
546{
547 printk("%s %.*s\n", init_utsname()->release,
548 (int)strcspn(init_utsname()->version, " "),
549 init_utsname()->version);
550}
551
552/* 890/*
553 * When a circular dependency is detected, print the 891 * When a circular dependency is detected, print the
554 * header first: 892 * header first:
@@ -640,15 +978,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
640 return 1; 978 return 1;
641} 979}
642 980
643static int very_verbose(struct lock_class *class)
644{
645#if VERY_VERBOSE
646 return class_filter(class);
647#endif
648 return 0;
649}
650#ifdef CONFIG_TRACE_IRQFLAGS 981#ifdef CONFIG_TRACE_IRQFLAGS
651
652/* 982/*
653 * Forwards and backwards subgraph searching, for the purposes of 983 * Forwards and backwards subgraph searching, for the purposes of
654 * proving that two subgraphs can be connected by a new dependency 984 * proving that two subgraphs can be connected by a new dependency
@@ -821,6 +1151,78 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
821 bit_backwards, bit_forwards, irqclass); 1151 bit_backwards, bit_forwards, irqclass);
822} 1152}
823 1153
1154static int
1155check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1156 struct held_lock *next)
1157{
1158 /*
1159 * Prove that the new dependency does not connect a hardirq-safe
1160 * lock with a hardirq-unsafe lock - to achieve this we search
1161 * the backwards-subgraph starting at <prev>, and the
1162 * forwards-subgraph starting at <next>:
1163 */
1164 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ,
1165 LOCK_ENABLED_HARDIRQS, "hard"))
1166 return 0;
1167
1168 /*
1169 * Prove that the new dependency does not connect a hardirq-safe-read
1170 * lock with a hardirq-unsafe lock - to achieve this we search
1171 * the backwards-subgraph starting at <prev>, and the
1172 * forwards-subgraph starting at <next>:
1173 */
1174 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ,
1175 LOCK_ENABLED_HARDIRQS, "hard-read"))
1176 return 0;
1177
1178 /*
1179 * Prove that the new dependency does not connect a softirq-safe
1180 * lock with a softirq-unsafe lock - to achieve this we search
1181 * the backwards-subgraph starting at <prev>, and the
1182 * forwards-subgraph starting at <next>:
1183 */
1184 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ,
1185 LOCK_ENABLED_SOFTIRQS, "soft"))
1186 return 0;
1187 /*
1188 * Prove that the new dependency does not connect a softirq-safe-read
1189 * lock with a softirq-unsafe lock - to achieve this we search
1190 * the backwards-subgraph starting at <prev>, and the
1191 * forwards-subgraph starting at <next>:
1192 */
1193 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
1194 LOCK_ENABLED_SOFTIRQS, "soft"))
1195 return 0;
1196
1197 return 1;
1198}
1199
1200static void inc_chains(void)
1201{
1202 if (current->hardirq_context)
1203 nr_hardirq_chains++;
1204 else {
1205 if (current->softirq_context)
1206 nr_softirq_chains++;
1207 else
1208 nr_process_chains++;
1209 }
1210}
1211
1212#else
1213
1214static inline int
1215check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1216 struct held_lock *next)
1217{
1218 return 1;
1219}
1220
1221static inline void inc_chains(void)
1222{
1223 nr_process_chains++;
1224}
1225
824#endif 1226#endif
825 1227
826static int 1228static int
@@ -922,47 +1324,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
922 if (!(check_noncircular(next->class, 0))) 1324 if (!(check_noncircular(next->class, 0)))
923 return print_circular_bug_tail(); 1325 return print_circular_bug_tail();
924 1326
925#ifdef CONFIG_TRACE_IRQFLAGS 1327 if (!check_prev_add_irq(curr, prev, next))
926 /*
927 * Prove that the new dependency does not connect a hardirq-safe
928 * lock with a hardirq-unsafe lock - to achieve this we search
929 * the backwards-subgraph starting at <prev>, and the
930 * forwards-subgraph starting at <next>:
931 */
932 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ,
933 LOCK_ENABLED_HARDIRQS, "hard"))
934 return 0; 1328 return 0;
935 1329
936 /* 1330 /*
937 * Prove that the new dependency does not connect a hardirq-safe-read
938 * lock with a hardirq-unsafe lock - to achieve this we search
939 * the backwards-subgraph starting at <prev>, and the
940 * forwards-subgraph starting at <next>:
941 */
942 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ,
943 LOCK_ENABLED_HARDIRQS, "hard-read"))
944 return 0;
945
946 /*
947 * Prove that the new dependency does not connect a softirq-safe
948 * lock with a softirq-unsafe lock - to achieve this we search
949 * the backwards-subgraph starting at <prev>, and the
950 * forwards-subgraph starting at <next>:
951 */
952 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ,
953 LOCK_ENABLED_SOFTIRQS, "soft"))
954 return 0;
955 /*
956 * Prove that the new dependency does not connect a softirq-safe-read
957 * lock with a softirq-unsafe lock - to achieve this we search
958 * the backwards-subgraph starting at <prev>, and the
959 * forwards-subgraph starting at <next>:
960 */
961 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
962 LOCK_ENABLED_SOFTIRQS, "soft"))
963 return 0;
964#endif
965 /*
966 * For recursive read-locks we do all the dependency checks, 1331 * For recursive read-locks we do all the dependency checks,
967 * but we dont store read-triggered dependencies (only 1332 * but we dont store read-triggered dependencies (only
968 * write-triggered dependencies). This ensures that only the 1333 * write-triggered dependencies). This ensures that only the
@@ -1088,224 +1453,8 @@ out_bug:
1088 return 0; 1453 return 0;
1089} 1454}
1090 1455
1091 1456unsigned long nr_lock_chains;
1092/* 1457static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
1093 * Is this the address of a static object:
1094 */
1095static int static_obj(void *obj)
1096{
1097 unsigned long start = (unsigned long) &_stext,
1098 end = (unsigned long) &_end,
1099 addr = (unsigned long) obj;
1100#ifdef CONFIG_SMP
1101 int i;
1102#endif
1103
1104 /*
1105 * static variable?
1106 */
1107 if ((addr >= start) && (addr < end))
1108 return 1;
1109
1110#ifdef CONFIG_SMP
1111 /*
1112 * percpu var?
1113 */
1114 for_each_possible_cpu(i) {
1115 start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
1116 end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM
1117 + per_cpu_offset(i);
1118
1119 if ((addr >= start) && (addr < end))
1120 return 1;
1121 }
1122#endif
1123
1124 /*
1125 * module var?
1126 */
1127 return is_module_address(addr);
1128}
1129
1130/*
1131 * To make lock name printouts unique, we calculate a unique
1132 * class->name_version generation counter:
1133 */
1134static int count_matching_names(struct lock_class *new_class)
1135{
1136 struct lock_class *class;
1137 int count = 0;
1138
1139 if (!new_class->name)
1140 return 0;
1141
1142 list_for_each_entry(class, &all_lock_classes, lock_entry) {
1143 if (new_class->key - new_class->subclass == class->key)
1144 return class->name_version;
1145 if (class->name && !strcmp(class->name, new_class->name))
1146 count = max(count, class->name_version);
1147 }
1148
1149 return count + 1;
1150}
1151
1152/*
1153 * Register a lock's class in the hash-table, if the class is not present
1154 * yet. Otherwise we look it up. We cache the result in the lock object
1155 * itself, so actual lookup of the hash should be once per lock object.
1156 */
1157static inline struct lock_class *
1158look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
1159{
1160 struct lockdep_subclass_key *key;
1161 struct list_head *hash_head;
1162 struct lock_class *class;
1163
1164#ifdef CONFIG_DEBUG_LOCKDEP
1165 /*
1166 * If the architecture calls into lockdep before initializing
1167 * the hashes then we'll warn about it later. (we cannot printk
1168 * right now)
1169 */
1170 if (unlikely(!lockdep_initialized)) {
1171 lockdep_init();
1172 lockdep_init_error = 1;
1173 }
1174#endif
1175
1176 /*
1177 * Static locks do not have their class-keys yet - for them the key
1178 * is the lock object itself:
1179 */
1180 if (unlikely(!lock->key))
1181 lock->key = (void *)lock;
1182
1183 /*
1184 * NOTE: the class-key must be unique. For dynamic locks, a static
1185 * lock_class_key variable is passed in through the mutex_init()
1186 * (or spin_lock_init()) call - which acts as the key. For static
1187 * locks we use the lock object itself as the key.
1188 */
1189 BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class));
1190
1191 key = lock->key->subkeys + subclass;
1192
1193 hash_head = classhashentry(key);
1194
1195 /*
1196 * We can walk the hash lockfree, because the hash only
1197 * grows, and we are careful when adding entries to the end:
1198 */
1199 list_for_each_entry(class, hash_head, hash_entry)
1200 if (class->key == key)
1201 return class;
1202
1203 return NULL;
1204}
1205
1206/*
1207 * Register a lock's class in the hash-table, if the class is not present
1208 * yet. Otherwise we look it up. We cache the result in the lock object
1209 * itself, so actual lookup of the hash should be once per lock object.
1210 */
1211static inline struct lock_class *
1212register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
1213{
1214 struct lockdep_subclass_key *key;
1215 struct list_head *hash_head;
1216 struct lock_class *class;
1217 unsigned long flags;
1218
1219 class = look_up_lock_class(lock, subclass);
1220 if (likely(class))
1221 return class;
1222
1223 /*
1224 * Debug-check: all keys must be persistent!
1225 */
1226 if (!static_obj(lock->key)) {
1227 debug_locks_off();
1228 printk("INFO: trying to register non-static key.\n");
1229 printk("the code is fine but needs lockdep annotation.\n");
1230 printk("turning off the locking correctness validator.\n");
1231 dump_stack();
1232
1233 return NULL;
1234 }
1235
1236 key = lock->key->subkeys + subclass;
1237 hash_head = classhashentry(key);
1238
1239 raw_local_irq_save(flags);
1240 if (!graph_lock()) {
1241 raw_local_irq_restore(flags);
1242 return NULL;
1243 }
1244 /*
1245 * We have to do the hash-walk again, to avoid races
1246 * with another CPU:
1247 */
1248 list_for_each_entry(class, hash_head, hash_entry)
1249 if (class->key == key)
1250 goto out_unlock_set;
1251 /*
1252 * Allocate a new key from the static array, and add it to
1253 * the hash:
1254 */
1255 if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
1256 if (!debug_locks_off_graph_unlock()) {
1257 raw_local_irq_restore(flags);
1258 return NULL;
1259 }
1260 raw_local_irq_restore(flags);
1261
1262 printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
1263 printk("turning off the locking correctness validator.\n");
1264 return NULL;
1265 }
1266 class = lock_classes + nr_lock_classes++;
1267 debug_atomic_inc(&nr_unused_locks);
1268 class->key = key;
1269 class->name = lock->name;
1270 class->subclass = subclass;
1271 INIT_LIST_HEAD(&class->lock_entry);
1272 INIT_LIST_HEAD(&class->locks_before);
1273 INIT_LIST_HEAD(&class->locks_after);
1274 class->name_version = count_matching_names(class);
1275 /*
1276 * We use RCU's safe list-add method to make
1277 * parallel walking of the hash-list safe:
1278 */
1279 list_add_tail_rcu(&class->hash_entry, hash_head);
1280
1281 if (verbose(class)) {
1282 graph_unlock();
1283 raw_local_irq_restore(flags);
1284
1285 printk("\nnew class %p: %s", class->key, class->name);
1286 if (class->name_version > 1)
1287 printk("#%d", class->name_version);
1288 printk("\n");
1289 dump_stack();
1290
1291 raw_local_irq_save(flags);
1292 if (!graph_lock()) {
1293 raw_local_irq_restore(flags);
1294 return NULL;
1295 }
1296 }
1297out_unlock_set:
1298 graph_unlock();
1299 raw_local_irq_restore(flags);
1300
1301 if (!subclass || force)
1302 lock->class_cache = class;
1303
1304 if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass))
1305 return NULL;
1306
1307 return class;
1308}
1309 1458
1310/* 1459/*
1311 * Look up a dependency chain. If the key is not present yet then 1460 * Look up a dependency chain. If the key is not present yet then
@@ -1366,21 +1515,72 @@ cache_hit:
1366 chain->chain_key = chain_key; 1515 chain->chain_key = chain_key;
1367 list_add_tail_rcu(&chain->entry, hash_head); 1516 list_add_tail_rcu(&chain->entry, hash_head);
1368 debug_atomic_inc(&chain_lookup_misses); 1517 debug_atomic_inc(&chain_lookup_misses);
1369#ifdef CONFIG_TRACE_IRQFLAGS 1518 inc_chains();
1370 if (current->hardirq_context) 1519
1371 nr_hardirq_chains++; 1520 return 1;
1372 else { 1521}
1373 if (current->softirq_context) 1522
1374 nr_softirq_chains++; 1523static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
1375 else 1524 struct held_lock *hlock, int chain_head)
1376 nr_process_chains++; 1525{
1377 } 1526 /*
1378#else 1527 * Trylock needs to maintain the stack of held locks, but it
1379 nr_process_chains++; 1528 * does not add new dependencies, because trylock can be done
1380#endif 1529 * in any order.
1530 *
1531 * We look up the chain_key and do the O(N^2) check and update of
1532 * the dependencies only if this is a new dependency chain.
1533 * (If lookup_chain_cache() returns with 1 it acquires
1534 * graph_lock for us)
1535 */
1536 if (!hlock->trylock && (hlock->check == 2) &&
1537 lookup_chain_cache(curr->curr_chain_key, hlock->class)) {
1538 /*
1539 * Check whether last held lock:
1540 *
1541 * - is irq-safe, if this lock is irq-unsafe
1542 * - is softirq-safe, if this lock is hardirq-unsafe
1543 *
1544 * And check whether the new lock's dependency graph
1545 * could lead back to the previous lock.
1546 *
1547 * any of these scenarios could lead to a deadlock. If
1548 * All validations
1549 */
1550 int ret = check_deadlock(curr, hlock, lock, hlock->read);
1551
1552 if (!ret)
1553 return 0;
1554 /*
1555 * Mark recursive read, as we jump over it when
1556 * building dependencies (just like we jump over
1557 * trylock entries):
1558 */
1559 if (ret == 2)
1560 hlock->read = 2;
1561 /*
1562 * Add dependency only if this lock is not the head
1563 * of the chain, and if it's not a secondary read-lock:
1564 */
1565 if (!chain_head && ret != 2)
1566 if (!check_prevs_add(curr, hlock))
1567 return 0;
1568 graph_unlock();
1569 } else
1570 /* after lookup_chain_cache(): */
1571 if (unlikely(!debug_locks))
1572 return 0;
1381 1573
1382 return 1; 1574 return 1;
1383} 1575}
1576#else
1577static inline int validate_chain(struct task_struct *curr,
1578 struct lockdep_map *lock, struct held_lock *hlock,
1579 int chain_head)
1580{
1581 return 1;
1582}
1583#endif
1384 1584
1385/* 1585/*
1386 * We are building curr_chain_key incrementally, so double-check 1586 * We are building curr_chain_key incrementally, so double-check
@@ -1425,6 +1625,57 @@ static void check_chain_key(struct task_struct *curr)
1425#endif 1625#endif
1426} 1626}
1427 1627
1628static int
1629print_usage_bug(struct task_struct *curr, struct held_lock *this,
1630 enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
1631{
1632 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1633 return 0;
1634
1635 printk("\n=================================\n");
1636 printk( "[ INFO: inconsistent lock state ]\n");
1637 print_kernel_version();
1638 printk( "---------------------------------\n");
1639
1640 printk("inconsistent {%s} -> {%s} usage.\n",
1641 usage_str[prev_bit], usage_str[new_bit]);
1642
1643 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
1644 curr->comm, curr->pid,
1645 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
1646 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
1647 trace_hardirqs_enabled(curr),
1648 trace_softirqs_enabled(curr));
1649 print_lock(this);
1650
1651 printk("{%s} state was registered at:\n", usage_str[prev_bit]);
1652 print_stack_trace(this->class->usage_traces + prev_bit, 1);
1653
1654 print_irqtrace_events(curr);
1655 printk("\nother info that might help us debug this:\n");
1656 lockdep_print_held_locks(curr);
1657
1658 printk("\nstack backtrace:\n");
1659 dump_stack();
1660
1661 return 0;
1662}
1663
1664/*
1665 * Print out an error if an invalid bit is set:
1666 */
1667static inline int
1668valid_state(struct task_struct *curr, struct held_lock *this,
1669 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
1670{
1671 if (unlikely(this->class->usage_mask & (1 << bad_bit)))
1672 return print_usage_bug(curr, this, bad_bit, new_bit);
1673 return 1;
1674}
1675
1676static int mark_lock(struct task_struct *curr, struct held_lock *this,
1677 enum lock_usage_bit new_bit);
1678
1428#ifdef CONFIG_TRACE_IRQFLAGS 1679#ifdef CONFIG_TRACE_IRQFLAGS
1429 1680
1430/* 1681/*
@@ -1518,90 +1769,30 @@ void print_irqtrace_events(struct task_struct *curr)
1518 print_ip_sym(curr->softirq_disable_ip); 1769 print_ip_sym(curr->softirq_disable_ip);
1519} 1770}
1520 1771
1521#endif 1772static int hardirq_verbose(struct lock_class *class)
1522
1523static int
1524print_usage_bug(struct task_struct *curr, struct held_lock *this,
1525 enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
1526{ 1773{
1527 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1774#if HARDIRQ_VERBOSE
1528 return 0; 1775 return class_filter(class);
1529 1776#endif
1530 printk("\n=================================\n");
1531 printk( "[ INFO: inconsistent lock state ]\n");
1532 print_kernel_version();
1533 printk( "---------------------------------\n");
1534
1535 printk("inconsistent {%s} -> {%s} usage.\n",
1536 usage_str[prev_bit], usage_str[new_bit]);
1537
1538 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
1539 curr->comm, curr->pid,
1540 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
1541 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
1542 trace_hardirqs_enabled(curr),
1543 trace_softirqs_enabled(curr));
1544 print_lock(this);
1545
1546 printk("{%s} state was registered at:\n", usage_str[prev_bit]);
1547 print_stack_trace(this->class->usage_traces + prev_bit, 1);
1548
1549 print_irqtrace_events(curr);
1550 printk("\nother info that might help us debug this:\n");
1551 lockdep_print_held_locks(curr);
1552
1553 printk("\nstack backtrace:\n");
1554 dump_stack();
1555
1556 return 0; 1777 return 0;
1557} 1778}
1558 1779
1559/* 1780static int softirq_verbose(struct lock_class *class)
1560 * Print out an error if an invalid bit is set:
1561 */
1562static inline int
1563valid_state(struct task_struct *curr, struct held_lock *this,
1564 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
1565{ 1781{
1566 if (unlikely(this->class->usage_mask & (1 << bad_bit))) 1782#if SOFTIRQ_VERBOSE
1567 return print_usage_bug(curr, this, bad_bit, new_bit); 1783 return class_filter(class);
1568 return 1; 1784#endif
1785 return 0;
1569} 1786}
1570 1787
1571#define STRICT_READ_CHECKS 1 1788#define STRICT_READ_CHECKS 1
1572 1789
1573/* 1790static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1574 * Mark a lock with a usage bit, and validate the state transition: 1791 enum lock_usage_bit new_bit)
1575 */
1576static int mark_lock(struct task_struct *curr, struct held_lock *this,
1577 enum lock_usage_bit new_bit)
1578{ 1792{
1579 unsigned int new_mask = 1 << new_bit, ret = 1; 1793 int ret = 1;
1580
1581 /*
1582 * If already set then do not dirty the cacheline,
1583 * nor do any checks:
1584 */
1585 if (likely(this->class->usage_mask & new_mask))
1586 return 1;
1587
1588 if (!graph_lock())
1589 return 0;
1590 /*
1591 * Make sure we didnt race:
1592 */
1593 if (unlikely(this->class->usage_mask & new_mask)) {
1594 graph_unlock();
1595 return 1;
1596 }
1597
1598 this->class->usage_mask |= new_mask;
1599 1794
1600 if (!save_trace(this->class->usage_traces + new_bit)) 1795 switch(new_bit) {
1601 return 0;
1602
1603 switch (new_bit) {
1604#ifdef CONFIG_TRACE_IRQFLAGS
1605 case LOCK_USED_IN_HARDIRQ: 1796 case LOCK_USED_IN_HARDIRQ:
1606 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) 1797 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
1607 return 0; 1798 return 0;
@@ -1760,37 +1951,14 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
1760 if (softirq_verbose(this->class)) 1951 if (softirq_verbose(this->class))
1761 ret = 2; 1952 ret = 2;
1762 break; 1953 break;
1763#endif
1764 case LOCK_USED:
1765 /*
1766 * Add it to the global list of classes:
1767 */
1768 list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
1769 debug_atomic_dec(&nr_unused_locks);
1770 break;
1771 default: 1954 default:
1772 if (!debug_locks_off_graph_unlock())
1773 return 0;
1774 WARN_ON(1); 1955 WARN_ON(1);
1775 return 0; 1956 break;
1776 }
1777
1778 graph_unlock();
1779
1780 /*
1781 * We must printk outside of the graph_lock:
1782 */
1783 if (ret == 2) {
1784 printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
1785 print_lock(this);
1786 print_irqtrace_events(curr);
1787 dump_stack();
1788 } 1957 }
1789 1958
1790 return ret; 1959 return ret;
1791} 1960}
1792 1961
1793#ifdef CONFIG_TRACE_IRQFLAGS
1794/* 1962/*
1795 * Mark all held locks with a usage bit: 1963 * Mark all held locks with a usage bit:
1796 */ 1964 */
@@ -1973,9 +2141,176 @@ void trace_softirqs_off(unsigned long ip)
1973 debug_atomic_inc(&redundant_softirqs_off); 2141 debug_atomic_inc(&redundant_softirqs_off);
1974} 2142}
1975 2143
2144static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2145{
2146 /*
2147 * If non-trylock use in a hardirq or softirq context, then
2148 * mark the lock as used in these contexts:
2149 */
2150 if (!hlock->trylock) {
2151 if (hlock->read) {
2152 if (curr->hardirq_context)
2153 if (!mark_lock(curr, hlock,
2154 LOCK_USED_IN_HARDIRQ_READ))
2155 return 0;
2156 if (curr->softirq_context)
2157 if (!mark_lock(curr, hlock,
2158 LOCK_USED_IN_SOFTIRQ_READ))
2159 return 0;
2160 } else {
2161 if (curr->hardirq_context)
2162 if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ))
2163 return 0;
2164 if (curr->softirq_context)
2165 if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ))
2166 return 0;
2167 }
2168 }
2169 if (!hlock->hardirqs_off) {
2170 if (hlock->read) {
2171 if (!mark_lock(curr, hlock,
2172 LOCK_ENABLED_HARDIRQS_READ))
2173 return 0;
2174 if (curr->softirqs_enabled)
2175 if (!mark_lock(curr, hlock,
2176 LOCK_ENABLED_SOFTIRQS_READ))
2177 return 0;
2178 } else {
2179 if (!mark_lock(curr, hlock,
2180 LOCK_ENABLED_HARDIRQS))
2181 return 0;
2182 if (curr->softirqs_enabled)
2183 if (!mark_lock(curr, hlock,
2184 LOCK_ENABLED_SOFTIRQS))
2185 return 0;
2186 }
2187 }
2188
2189 return 1;
2190}
2191
2192static int separate_irq_context(struct task_struct *curr,
2193 struct held_lock *hlock)
2194{
2195 unsigned int depth = curr->lockdep_depth;
2196
2197 /*
2198 * Keep track of points where we cross into an interrupt context:
2199 */
2200 hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
2201 curr->softirq_context;
2202 if (depth) {
2203 struct held_lock *prev_hlock;
2204
2205 prev_hlock = curr->held_locks + depth-1;
2206 /*
2207 * If we cross into another context, reset the
2208 * hash key (this also prevents the checking and the
2209 * adding of the dependency to 'prev'):
2210 */
2211 if (prev_hlock->irq_context != hlock->irq_context)
2212 return 1;
2213 }
2214 return 0;
2215}
2216
2217#else
2218
2219static inline
2220int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
2221 enum lock_usage_bit new_bit)
2222{
2223 WARN_ON(1);
2224 return 1;
2225}
2226
2227static inline int mark_irqflags(struct task_struct *curr,
2228 struct held_lock *hlock)
2229{
2230 return 1;
2231}
2232
2233static inline int separate_irq_context(struct task_struct *curr,
2234 struct held_lock *hlock)
2235{
2236 return 0;
2237}
2238
1976#endif 2239#endif
1977 2240
1978/* 2241/*
2242 * Mark a lock with a usage bit, and validate the state transition:
2243 */
2244static int mark_lock(struct task_struct *curr, struct held_lock *this,
2245 enum lock_usage_bit new_bit)
2246{
2247 unsigned int new_mask = 1 << new_bit, ret = 1;
2248
2249 /*
2250 * If already set then do not dirty the cacheline,
2251 * nor do any checks:
2252 */
2253 if (likely(this->class->usage_mask & new_mask))
2254 return 1;
2255
2256 if (!graph_lock())
2257 return 0;
2258 /*
2259 * Make sure we didnt race:
2260 */
2261 if (unlikely(this->class->usage_mask & new_mask)) {
2262 graph_unlock();
2263 return 1;
2264 }
2265
2266 this->class->usage_mask |= new_mask;
2267
2268 if (!save_trace(this->class->usage_traces + new_bit))
2269 return 0;
2270
2271 switch (new_bit) {
2272 case LOCK_USED_IN_HARDIRQ:
2273 case LOCK_USED_IN_SOFTIRQ:
2274 case LOCK_USED_IN_HARDIRQ_READ:
2275 case LOCK_USED_IN_SOFTIRQ_READ:
2276 case LOCK_ENABLED_HARDIRQS:
2277 case LOCK_ENABLED_SOFTIRQS:
2278 case LOCK_ENABLED_HARDIRQS_READ:
2279 case LOCK_ENABLED_SOFTIRQS_READ:
2280 ret = mark_lock_irq(curr, this, new_bit);
2281 if (!ret)
2282 return 0;
2283 break;
2284 case LOCK_USED:
2285 /*
2286 * Add it to the global list of classes:
2287 */
2288 list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
2289 debug_atomic_dec(&nr_unused_locks);
2290 break;
2291 default:
2292 if (!debug_locks_off_graph_unlock())
2293 return 0;
2294 WARN_ON(1);
2295 return 0;
2296 }
2297
2298 graph_unlock();
2299
2300 /*
2301 * We must printk outside of the graph_lock:
2302 */
2303 if (ret == 2) {
2304 printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
2305 print_lock(this);
2306 print_irqtrace_events(curr);
2307 dump_stack();
2308 }
2309
2310 return ret;
2311}
2312
2313/*
1979 * Initialize a lock instance's lock-class mapping info: 2314 * Initialize a lock instance's lock-class mapping info:
1980 */ 2315 */
1981void lockdep_init_map(struct lockdep_map *lock, const char *name, 2316void lockdep_init_map(struct lockdep_map *lock, const char *name,
@@ -1999,6 +2334,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
1999 lock->name = name; 2334 lock->name = name;
2000 lock->key = key; 2335 lock->key = key;
2001 lock->class_cache = NULL; 2336 lock->class_cache = NULL;
2337#ifdef CONFIG_LOCK_STAT
2338 lock->cpu = raw_smp_processor_id();
2339#endif
2002 if (subclass) 2340 if (subclass)
2003 register_lock_class(lock, subclass, 1); 2341 register_lock_class(lock, subclass, 1);
2004} 2342}
@@ -2020,6 +2358,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2020 int chain_head = 0; 2358 int chain_head = 0;
2021 u64 chain_key; 2359 u64 chain_key;
2022 2360
2361 if (!prove_locking)
2362 check = 1;
2363
2023 if (unlikely(!debug_locks)) 2364 if (unlikely(!debug_locks))
2024 return 0; 2365 return 0;
2025 2366
@@ -2070,57 +2411,18 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2070 hlock->read = read; 2411 hlock->read = read;
2071 hlock->check = check; 2412 hlock->check = check;
2072 hlock->hardirqs_off = hardirqs_off; 2413 hlock->hardirqs_off = hardirqs_off;
2073 2414#ifdef CONFIG_LOCK_STAT
2074 if (check != 2) 2415 hlock->waittime_stamp = 0;
2075 goto out_calc_hash; 2416 hlock->holdtime_stamp = sched_clock();
2076#ifdef CONFIG_TRACE_IRQFLAGS
2077 /*
2078 * If non-trylock use in a hardirq or softirq context, then
2079 * mark the lock as used in these contexts:
2080 */
2081 if (!trylock) {
2082 if (read) {
2083 if (curr->hardirq_context)
2084 if (!mark_lock(curr, hlock,
2085 LOCK_USED_IN_HARDIRQ_READ))
2086 return 0;
2087 if (curr->softirq_context)
2088 if (!mark_lock(curr, hlock,
2089 LOCK_USED_IN_SOFTIRQ_READ))
2090 return 0;
2091 } else {
2092 if (curr->hardirq_context)
2093 if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ))
2094 return 0;
2095 if (curr->softirq_context)
2096 if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ))
2097 return 0;
2098 }
2099 }
2100 if (!hardirqs_off) {
2101 if (read) {
2102 if (!mark_lock(curr, hlock,
2103 LOCK_ENABLED_HARDIRQS_READ))
2104 return 0;
2105 if (curr->softirqs_enabled)
2106 if (!mark_lock(curr, hlock,
2107 LOCK_ENABLED_SOFTIRQS_READ))
2108 return 0;
2109 } else {
2110 if (!mark_lock(curr, hlock,
2111 LOCK_ENABLED_HARDIRQS))
2112 return 0;
2113 if (curr->softirqs_enabled)
2114 if (!mark_lock(curr, hlock,
2115 LOCK_ENABLED_SOFTIRQS))
2116 return 0;
2117 }
2118 }
2119#endif 2417#endif
2418
2419 if (check == 2 && !mark_irqflags(curr, hlock))
2420 return 0;
2421
2120 /* mark it as used: */ 2422 /* mark it as used: */
2121 if (!mark_lock(curr, hlock, LOCK_USED)) 2423 if (!mark_lock(curr, hlock, LOCK_USED))
2122 return 0; 2424 return 0;
2123out_calc_hash: 2425
2124 /* 2426 /*
2125 * Calculate the chain hash: it's the combined has of all the 2427 * Calculate the chain hash: it's the combined has of all the
2126 * lock keys along the dependency chain. We save the hash value 2428 * lock keys along the dependency chain. We save the hash value
@@ -2143,77 +2445,15 @@ out_calc_hash:
2143 } 2445 }
2144 2446
2145 hlock->prev_chain_key = chain_key; 2447 hlock->prev_chain_key = chain_key;
2146 2448 if (separate_irq_context(curr, hlock)) {
2147#ifdef CONFIG_TRACE_IRQFLAGS 2449 chain_key = 0;
2148 /* 2450 chain_head = 1;
2149 * Keep track of points where we cross into an interrupt context:
2150 */
2151 hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
2152 curr->softirq_context;
2153 if (depth) {
2154 struct held_lock *prev_hlock;
2155
2156 prev_hlock = curr->held_locks + depth-1;
2157 /*
2158 * If we cross into another context, reset the
2159 * hash key (this also prevents the checking and the
2160 * adding of the dependency to 'prev'):
2161 */
2162 if (prev_hlock->irq_context != hlock->irq_context) {
2163 chain_key = 0;
2164 chain_head = 1;
2165 }
2166 } 2451 }
2167#endif
2168 chain_key = iterate_chain_key(chain_key, id); 2452 chain_key = iterate_chain_key(chain_key, id);
2169 curr->curr_chain_key = chain_key; 2453 curr->curr_chain_key = chain_key;
2170 2454
2171 /* 2455 if (!validate_chain(curr, lock, hlock, chain_head))
2172 * Trylock needs to maintain the stack of held locks, but it 2456 return 0;
2173 * does not add new dependencies, because trylock can be done
2174 * in any order.
2175 *
2176 * We look up the chain_key and do the O(N^2) check and update of
2177 * the dependencies only if this is a new dependency chain.
2178 * (If lookup_chain_cache() returns with 1 it acquires
2179 * graph_lock for us)
2180 */
2181 if (!trylock && (check == 2) && lookup_chain_cache(chain_key, class)) {
2182 /*
2183 * Check whether last held lock:
2184 *
2185 * - is irq-safe, if this lock is irq-unsafe
2186 * - is softirq-safe, if this lock is hardirq-unsafe
2187 *
2188 * And check whether the new lock's dependency graph
2189 * could lead back to the previous lock.
2190 *
2191 * any of these scenarios could lead to a deadlock. If
2192 * All validations
2193 */
2194 int ret = check_deadlock(curr, hlock, lock, read);
2195
2196 if (!ret)
2197 return 0;
2198 /*
2199 * Mark recursive read, as we jump over it when
2200 * building dependencies (just like we jump over
2201 * trylock entries):
2202 */
2203 if (ret == 2)
2204 hlock->read = 2;
2205 /*
2206 * Add dependency only if this lock is not the head
2207 * of the chain, and if it's not a secondary read-lock:
2208 */
2209 if (!chain_head && ret != 2)
2210 if (!check_prevs_add(curr, hlock))
2211 return 0;
2212 graph_unlock();
2213 } else
2214 /* after lookup_chain_cache(): */
2215 if (unlikely(!debug_locks))
2216 return 0;
2217 2457
2218 curr->lockdep_depth++; 2458 curr->lockdep_depth++;
2219 check_chain_key(curr); 2459 check_chain_key(curr);
@@ -2315,6 +2555,8 @@ lock_release_non_nested(struct task_struct *curr,
2315 return print_unlock_inbalance_bug(curr, lock, ip); 2555 return print_unlock_inbalance_bug(curr, lock, ip);
2316 2556
2317found_it: 2557found_it:
2558 lock_release_holdtime(hlock);
2559
2318 /* 2560 /*
2319 * We have the right lock to unlock, 'hlock' points to it. 2561 * We have the right lock to unlock, 'hlock' points to it.
2320 * Now we remove it from the stack, and add back the other 2562 * Now we remove it from the stack, and add back the other
@@ -2367,6 +2609,8 @@ static int lock_release_nested(struct task_struct *curr,
2367 2609
2368 curr->curr_chain_key = hlock->prev_chain_key; 2610 curr->curr_chain_key = hlock->prev_chain_key;
2369 2611
2612 lock_release_holdtime(hlock);
2613
2370#ifdef CONFIG_DEBUG_LOCKDEP 2614#ifdef CONFIG_DEBUG_LOCKDEP
2371 hlock->prev_chain_key = 0; 2615 hlock->prev_chain_key = 0;
2372 hlock->class = NULL; 2616 hlock->class = NULL;
@@ -2441,6 +2685,9 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2441{ 2685{
2442 unsigned long flags; 2686 unsigned long flags;
2443 2687
2688 if (unlikely(!lock_stat && !prove_locking))
2689 return;
2690
2444 if (unlikely(current->lockdep_recursion)) 2691 if (unlikely(current->lockdep_recursion))
2445 return; 2692 return;
2446 2693
@@ -2460,6 +2707,9 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2460{ 2707{
2461 unsigned long flags; 2708 unsigned long flags;
2462 2709
2710 if (unlikely(!lock_stat && !prove_locking))
2711 return;
2712
2463 if (unlikely(current->lockdep_recursion)) 2713 if (unlikely(current->lockdep_recursion))
2464 return; 2714 return;
2465 2715
@@ -2473,6 +2723,166 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2473 2723
2474EXPORT_SYMBOL_GPL(lock_release); 2724EXPORT_SYMBOL_GPL(lock_release);
2475 2725
2726#ifdef CONFIG_LOCK_STAT
2727static int
2728print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
2729 unsigned long ip)
2730{
2731 if (!debug_locks_off())
2732 return 0;
2733 if (debug_locks_silent)
2734 return 0;
2735
2736 printk("\n=================================\n");
2737 printk( "[ BUG: bad contention detected! ]\n");
2738 printk( "---------------------------------\n");
2739 printk("%s/%d is trying to contend lock (",
2740 curr->comm, curr->pid);
2741 print_lockdep_cache(lock);
2742 printk(") at:\n");
2743 print_ip_sym(ip);
2744 printk("but there are no locks held!\n");
2745 printk("\nother info that might help us debug this:\n");
2746 lockdep_print_held_locks(curr);
2747
2748 printk("\nstack backtrace:\n");
2749 dump_stack();
2750
2751 return 0;
2752}
2753
2754static void
2755__lock_contended(struct lockdep_map *lock, unsigned long ip)
2756{
2757 struct task_struct *curr = current;
2758 struct held_lock *hlock, *prev_hlock;
2759 struct lock_class_stats *stats;
2760 unsigned int depth;
2761 int i, point;
2762
2763 depth = curr->lockdep_depth;
2764 if (DEBUG_LOCKS_WARN_ON(!depth))
2765 return;
2766
2767 prev_hlock = NULL;
2768 for (i = depth-1; i >= 0; i--) {
2769 hlock = curr->held_locks + i;
2770 /*
2771 * We must not cross into another context:
2772 */
2773 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
2774 break;
2775 if (hlock->instance == lock)
2776 goto found_it;
2777 prev_hlock = hlock;
2778 }
2779 print_lock_contention_bug(curr, lock, ip);
2780 return;
2781
2782found_it:
2783 hlock->waittime_stamp = sched_clock();
2784
2785 point = lock_contention_point(hlock->class, ip);
2786
2787 stats = get_lock_stats(hlock->class);
2788 if (point < ARRAY_SIZE(stats->contention_point))
2789 stats->contention_point[i]++;
2790 if (lock->cpu != smp_processor_id())
2791 stats->bounces[bounce_contended + !!hlock->read]++;
2792 put_lock_stats(stats);
2793}
2794
2795static void
2796__lock_acquired(struct lockdep_map *lock)
2797{
2798 struct task_struct *curr = current;
2799 struct held_lock *hlock, *prev_hlock;
2800 struct lock_class_stats *stats;
2801 unsigned int depth;
2802 u64 now;
2803 s64 waittime = 0;
2804 int i, cpu;
2805
2806 depth = curr->lockdep_depth;
2807 if (DEBUG_LOCKS_WARN_ON(!depth))
2808 return;
2809
2810 prev_hlock = NULL;
2811 for (i = depth-1; i >= 0; i--) {
2812 hlock = curr->held_locks + i;
2813 /*
2814 * We must not cross into another context:
2815 */
2816 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
2817 break;
2818 if (hlock->instance == lock)
2819 goto found_it;
2820 prev_hlock = hlock;
2821 }
2822 print_lock_contention_bug(curr, lock, _RET_IP_);
2823 return;
2824
2825found_it:
2826 cpu = smp_processor_id();
2827 if (hlock->waittime_stamp) {
2828 now = sched_clock();
2829 waittime = now - hlock->waittime_stamp;
2830 hlock->holdtime_stamp = now;
2831 }
2832
2833 stats = get_lock_stats(hlock->class);
2834 if (waittime) {
2835 if (hlock->read)
2836 lock_time_inc(&stats->read_waittime, waittime);
2837 else
2838 lock_time_inc(&stats->write_waittime, waittime);
2839 }
2840 if (lock->cpu != cpu)
2841 stats->bounces[bounce_acquired + !!hlock->read]++;
2842 put_lock_stats(stats);
2843
2844 lock->cpu = cpu;
2845}
2846
2847void lock_contended(struct lockdep_map *lock, unsigned long ip)
2848{
2849 unsigned long flags;
2850
2851 if (unlikely(!lock_stat))
2852 return;
2853
2854 if (unlikely(current->lockdep_recursion))
2855 return;
2856
2857 raw_local_irq_save(flags);
2858 check_flags(flags);
2859 current->lockdep_recursion = 1;
2860 __lock_contended(lock, ip);
2861 current->lockdep_recursion = 0;
2862 raw_local_irq_restore(flags);
2863}
2864EXPORT_SYMBOL_GPL(lock_contended);
2865
2866void lock_acquired(struct lockdep_map *lock)
2867{
2868 unsigned long flags;
2869
2870 if (unlikely(!lock_stat))
2871 return;
2872
2873 if (unlikely(current->lockdep_recursion))
2874 return;
2875
2876 raw_local_irq_save(flags);
2877 check_flags(flags);
2878 current->lockdep_recursion = 1;
2879 __lock_acquired(lock);
2880 current->lockdep_recursion = 0;
2881 raw_local_irq_restore(flags);
2882}
2883EXPORT_SYMBOL_GPL(lock_acquired);
2884#endif
2885
2476/* 2886/*
2477 * Used by the testsuite, sanitize the validator state 2887 * Used by the testsuite, sanitize the validator state
2478 * after a simulated failure: 2888 * after a simulated failure:
@@ -2636,8 +3046,11 @@ void __init lockdep_info(void)
2636 sizeof(struct held_lock) * MAX_LOCK_DEPTH); 3046 sizeof(struct held_lock) * MAX_LOCK_DEPTH);
2637 3047
2638#ifdef CONFIG_DEBUG_LOCKDEP 3048#ifdef CONFIG_DEBUG_LOCKDEP
2639 if (lockdep_init_error) 3049 if (lockdep_init_error) {
2640 printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n"); 3050 printk("WARNING: lockdep init error! Arch code didn't call lockdep_init() early enough?\n");
3051 printk("Call stack leading to lockdep invocation was:\n");
3052 print_stack_trace(&lockdep_init_trace, 0);
3053 }
2641#endif 3054#endif
2642} 3055}
2643 3056
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 58f35e586e..9f17af4a24 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -5,7 +5,8 @@
5 * 5 *
6 * Started by Ingo Molnar: 6 * Started by Ingo Molnar:
7 * 7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * 10 *
10 * Code for /proc/lockdep and /proc/lockdep_stats: 11 * Code for /proc/lockdep and /proc/lockdep_stats:
11 * 12 *
@@ -15,6 +16,10 @@
15#include <linux/seq_file.h> 16#include <linux/seq_file.h>
16#include <linux/kallsyms.h> 17#include <linux/kallsyms.h>
17#include <linux/debug_locks.h> 18#include <linux/debug_locks.h>
19#include <linux/vmalloc.h>
20#include <linux/sort.h>
21#include <asm/uaccess.h>
22#include <asm/div64.h>
18 23
19#include "lockdep_internals.h" 24#include "lockdep_internals.h"
20 25
@@ -271,8 +276,10 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
271 if (nr_list_entries) 276 if (nr_list_entries)
272 factor = sum_forward_deps / nr_list_entries; 277 factor = sum_forward_deps / nr_list_entries;
273 278
279#ifdef CONFIG_PROVE_LOCKING
274 seq_printf(m, " dependency chains: %11lu [max: %lu]\n", 280 seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
275 nr_lock_chains, MAX_LOCKDEP_CHAINS); 281 nr_lock_chains, MAX_LOCKDEP_CHAINS);
282#endif
276 283
277#ifdef CONFIG_TRACE_IRQFLAGS 284#ifdef CONFIG_TRACE_IRQFLAGS
278 seq_printf(m, " in-hardirq chains: %11u\n", 285 seq_printf(m, " in-hardirq chains: %11u\n",
@@ -342,6 +349,292 @@ static const struct file_operations proc_lockdep_stats_operations = {
342 .release = seq_release, 349 .release = seq_release,
343}; 350};
344 351
352#ifdef CONFIG_LOCK_STAT
353
354struct lock_stat_data {
355 struct lock_class *class;
356 struct lock_class_stats stats;
357};
358
359struct lock_stat_seq {
360 struct lock_stat_data *iter;
361 struct lock_stat_data *iter_end;
362 struct lock_stat_data stats[MAX_LOCKDEP_KEYS];
363};
364
365/*
366 * sort on absolute number of contentions
367 */
368static int lock_stat_cmp(const void *l, const void *r)
369{
370 const struct lock_stat_data *dl = l, *dr = r;
371 unsigned long nl, nr;
372
373 nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr;
374 nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr;
375
376 return nr - nl;
377}
378
379static void seq_line(struct seq_file *m, char c, int offset, int length)
380{
381 int i;
382
383 for (i = 0; i < offset; i++)
384 seq_puts(m, " ");
385 for (i = 0; i < length; i++)
386 seq_printf(m, "%c", c);
387 seq_puts(m, "\n");
388}
389
390static void snprint_time(char *buf, size_t bufsiz, s64 nr)
391{
392 unsigned long rem;
393
394 rem = do_div(nr, 1000); /* XXX: do_div_signed */
395 snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10);
396}
397
398static void seq_time(struct seq_file *m, s64 time)
399{
400 char num[15];
401
402 snprint_time(num, sizeof(num), time);
403 seq_printf(m, " %14s", num);
404}
405
406static void seq_lock_time(struct seq_file *m, struct lock_time *lt)
407{
408 seq_printf(m, "%14lu", lt->nr);
409 seq_time(m, lt->min);
410 seq_time(m, lt->max);
411 seq_time(m, lt->total);
412}
413
414static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
415{
416 char name[39];
417 struct lock_class *class;
418 struct lock_class_stats *stats;
419 int i, namelen;
420
421 class = data->class;
422 stats = &data->stats;
423
424 namelen = 38;
425 if (class->name_version > 1)
426 namelen -= 2; /* XXX truncates versions > 9 */
427 if (class->subclass)
428 namelen -= 2;
429
430 if (!class->name) {
431 char str[KSYM_NAME_LEN];
432 const char *key_name;
433
434 key_name = __get_key_name(class->key, str);
435 snprintf(name, namelen, "%s", key_name);
436 } else {
437 snprintf(name, namelen, "%s", class->name);
438 }
439 namelen = strlen(name);
440 if (class->name_version > 1) {
441 snprintf(name+namelen, 3, "#%d", class->name_version);
442 namelen += 2;
443 }
444 if (class->subclass) {
445 snprintf(name+namelen, 3, "/%d", class->subclass);
446 namelen += 2;
447 }
448
449 if (stats->write_holdtime.nr) {
450 if (stats->read_holdtime.nr)
451 seq_printf(m, "%38s-W:", name);
452 else
453 seq_printf(m, "%40s:", name);
454
455 seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
456 seq_lock_time(m, &stats->write_waittime);
457 seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
458 seq_lock_time(m, &stats->write_holdtime);
459 seq_puts(m, "\n");
460 }
461
462 if (stats->read_holdtime.nr) {
463 seq_printf(m, "%38s-R:", name);
464 seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]);
465 seq_lock_time(m, &stats->read_waittime);
466 seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]);
467 seq_lock_time(m, &stats->read_holdtime);
468 seq_puts(m, "\n");
469 }
470
471 if (stats->read_waittime.nr + stats->write_waittime.nr == 0)
472 return;
473
474 if (stats->read_holdtime.nr)
475 namelen += 2;
476
477 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
478 char sym[KSYM_SYMBOL_LEN];
479 char ip[32];
480
481 if (class->contention_point[i] == 0)
482 break;
483
484 if (!i)
485 seq_line(m, '-', 40-namelen, namelen);
486
487 sprint_symbol(sym, class->contention_point[i]);
488 snprintf(ip, sizeof(ip), "[<%p>]",
489 (void *)class->contention_point[i]);
490 seq_printf(m, "%40s %14lu %29s %s\n", name,
491 stats->contention_point[i],
492 ip, sym);
493 }
494 if (i) {
495 seq_puts(m, "\n");
496 seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
497 seq_puts(m, "\n");
498 }
499}
500
501static void seq_header(struct seq_file *m)
502{
503 seq_printf(m, "lock_stat version 0.2\n");
504 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
505 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
506 "%14s %14s\n",
507 "class name",
508 "con-bounces",
509 "contentions",
510 "waittime-min",
511 "waittime-max",
512 "waittime-total",
513 "acq-bounces",
514 "acquisitions",
515 "holdtime-min",
516 "holdtime-max",
517 "holdtime-total");
518 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
519 seq_printf(m, "\n");
520}
521
522static void *ls_start(struct seq_file *m, loff_t *pos)
523{
524 struct lock_stat_seq *data = m->private;
525
526 if (data->iter == data->stats)
527 seq_header(m);
528
529 if (data->iter == data->iter_end)
530 data->iter = NULL;
531
532 return data->iter;
533}
534
535static void *ls_next(struct seq_file *m, void *v, loff_t *pos)
536{
537 struct lock_stat_seq *data = m->private;
538
539 (*pos)++;
540
541 data->iter = v;
542 data->iter++;
543 if (data->iter == data->iter_end)
544 data->iter = NULL;
545
546 return data->iter;
547}
548
549static void ls_stop(struct seq_file *m, void *v)
550{
551}
552
553static int ls_show(struct seq_file *m, void *v)
554{
555 struct lock_stat_seq *data = m->private;
556
557 seq_stats(m, data->iter);
558 return 0;
559}
560
561static struct seq_operations lockstat_ops = {
562 .start = ls_start,
563 .next = ls_next,
564 .stop = ls_stop,
565 .show = ls_show,
566};
567
568static int lock_stat_open(struct inode *inode, struct file *file)
569{
570 int res;
571 struct lock_class *class;
572 struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq));
573
574 if (!data)
575 return -ENOMEM;
576
577 res = seq_open(file, &lockstat_ops);
578 if (!res) {
579 struct lock_stat_data *iter = data->stats;
580 struct seq_file *m = file->private_data;
581
582 data->iter = iter;
583 list_for_each_entry(class, &all_lock_classes, lock_entry) {
584 iter->class = class;
585 iter->stats = lock_stats(class);
586 iter++;
587 }
588 data->iter_end = iter;
589
590 sort(data->stats, data->iter_end - data->iter,
591 sizeof(struct lock_stat_data),
592 lock_stat_cmp, NULL);
593
594 m->private = data;
595 } else
596 vfree(data);
597
598 return res;
599}
600
601static ssize_t lock_stat_write(struct file *file, const char __user *buf,
602 size_t count, loff_t *ppos)
603{
604 struct lock_class *class;
605 char c;
606
607 if (count) {
608 if (get_user(c, buf))
609 return -EFAULT;
610
611 if (c != '0')
612 return count;
613
614 list_for_each_entry(class, &all_lock_classes, lock_entry)
615 clear_lock_stats(class);
616 }
617 return count;
618}
619
620static int lock_stat_release(struct inode *inode, struct file *file)
621{
622 struct seq_file *seq = file->private_data;
623
624 vfree(seq->private);
625 seq->private = NULL;
626 return seq_release(inode, file);
627}
628
629static const struct file_operations proc_lock_stat_operations = {
630 .open = lock_stat_open,
631 .write = lock_stat_write,
632 .read = seq_read,
633 .llseek = seq_lseek,
634 .release = lock_stat_release,
635};
636#endif /* CONFIG_LOCK_STAT */
637
345static int __init lockdep_proc_init(void) 638static int __init lockdep_proc_init(void)
346{ 639{
347 struct proc_dir_entry *entry; 640 struct proc_dir_entry *entry;
@@ -354,6 +647,12 @@ static int __init lockdep_proc_init(void)
354 if (entry) 647 if (entry)
355 entry->proc_fops = &proc_lockdep_stats_operations; 648 entry->proc_fops = &proc_lockdep_stats_operations;
356 649
650#ifdef CONFIG_LOCK_STAT
651 entry = create_proc_entry("lock_stat", S_IRUSR, NULL);
652 if (entry)
653 entry->proc_fops = &proc_lock_stat_operations;
654#endif
655
357 return 0; 656 return 0;
358} 657}
359 658
diff --git a/kernel/module.c b/kernel/module.c
index 539fed9ac8..33c04ad511 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2133,7 +2133,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
2133 sym = get_ksymbol(mod, addr, NULL, NULL); 2133 sym = get_ksymbol(mod, addr, NULL, NULL);
2134 if (!sym) 2134 if (!sym)
2135 goto out; 2135 goto out;
2136 strlcpy(symname, sym, KSYM_NAME_LEN + 1); 2136 strlcpy(symname, sym, KSYM_NAME_LEN);
2137 mutex_unlock(&module_mutex); 2137 mutex_unlock(&module_mutex);
2138 return 0; 2138 return 0;
2139 } 2139 }
@@ -2158,9 +2158,9 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
2158 if (!sym) 2158 if (!sym)
2159 goto out; 2159 goto out;
2160 if (modname) 2160 if (modname)
2161 strlcpy(modname, mod->name, MODULE_NAME_LEN + 1); 2161 strlcpy(modname, mod->name, MODULE_NAME_LEN);
2162 if (name) 2162 if (name)
2163 strlcpy(name, sym, KSYM_NAME_LEN + 1); 2163 strlcpy(name, sym, KSYM_NAME_LEN);
2164 mutex_unlock(&module_mutex); 2164 mutex_unlock(&module_mutex);
2165 return 0; 2165 return 0;
2166 } 2166 }
@@ -2181,8 +2181,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2181 *value = mod->symtab[symnum].st_value; 2181 *value = mod->symtab[symnum].st_value;
2182 *type = mod->symtab[symnum].st_info; 2182 *type = mod->symtab[symnum].st_info;
2183 strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, 2183 strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
2184 KSYM_NAME_LEN + 1); 2184 KSYM_NAME_LEN);
2185 strlcpy(module_name, mod->name, MODULE_NAME_LEN + 1); 2185 strlcpy(module_name, mod->name, MODULE_NAME_LEN);
2186 *exported = is_exported(name, mod); 2186 *exported = is_exported(name, mod);
2187 mutex_unlock(&module_mutex); 2187 mutex_unlock(&module_mutex);
2188 return 0; 2188 return 0;
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 303eab1848..691b86564d 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -139,6 +139,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
139 list_add_tail(&waiter.list, &lock->wait_list); 139 list_add_tail(&waiter.list, &lock->wait_list);
140 waiter.task = task; 140 waiter.task = task;
141 141
142 old_val = atomic_xchg(&lock->count, -1);
143 if (old_val == 1)
144 goto done;
145
146 lock_contended(&lock->dep_map, _RET_IP_);
147
142 for (;;) { 148 for (;;) {
143 /* 149 /*
144 * Lets try to take the lock again - this is needed even if 150 * Lets try to take the lock again - this is needed even if
@@ -174,6 +180,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
174 spin_lock_mutex(&lock->wait_lock, flags); 180 spin_lock_mutex(&lock->wait_lock, flags);
175 } 181 }
176 182
183done:
184 lock_acquired(&lock->dep_map);
177 /* got the lock - rejoice! */ 185 /* got the lock - rejoice! */
178 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 186 mutex_remove_waiter(lock, &waiter, task_thread_info(task));
179 debug_mutex_set_owner(lock, task_thread_info(task)); 187 debug_mutex_set_owner(lock, task_thread_info(task));
diff --git a/kernel/panic.c b/kernel/panic.c
index 623d182825..f64f4c1ac1 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -159,14 +159,15 @@ const char *print_tainted(void)
159{ 159{
160 static char buf[20]; 160 static char buf[20];
161 if (tainted) { 161 if (tainted) {
162 snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c", 162 snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c",
163 tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', 163 tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
164 tainted & TAINT_FORCED_MODULE ? 'F' : ' ', 164 tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
165 tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', 165 tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
166 tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', 166 tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
167 tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', 167 tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
168 tainted & TAINT_BAD_PAGE ? 'B' : ' ', 168 tainted & TAINT_BAD_PAGE ? 'B' : ' ',
169 tainted & TAINT_USER ? 'U' : ' '); 169 tainted & TAINT_USER ? 'U' : ' ',
170 tainted & TAINT_DIE ? 'D' : ' ');
170 } 171 }
171 else 172 else
172 snprintf(buf, sizeof(buf), "Not tainted"); 173 snprintf(buf, sizeof(buf), "Not tainted");
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 495b7d4dd3..7358609e47 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -33,13 +33,20 @@ config PM_DEBUG
33 bool "Power Management Debug Support" 33 bool "Power Management Debug Support"
34 depends on PM 34 depends on PM
35 ---help--- 35 ---help---
36 This option enables verbose debugging support in the Power Management 36 This option enables various debugging support in the Power Management
37 code. This is helpful when debugging and reporting various PM bugs, 37 code. This is helpful when debugging and reporting PM bugs, like
38 like suspend support. 38 suspend support.
39
40config PM_VERBOSE
41 bool "Verbose Power Management debugging"
42 depends on PM_DEBUG
43 default n
44 ---help---
45 This option enables verbose messages from the Power Management code.
39 46
40config DISABLE_CONSOLE_SUSPEND 47config DISABLE_CONSOLE_SUSPEND
41 bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" 48 bool "Keep console(s) enabled during suspend/resume (DANGEROUS)"
42 depends on PM && PM_DEBUG 49 depends on PM_DEBUG
43 default n 50 default n
44 ---help--- 51 ---help---
45 This option turns off the console suspend mechanism that prevents 52 This option turns off the console suspend mechanism that prevents
@@ -50,7 +57,7 @@ config DISABLE_CONSOLE_SUSPEND
50 57
51config PM_TRACE 58config PM_TRACE
52 bool "Suspend/resume event tracing" 59 bool "Suspend/resume event tracing"
53 depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL 60 depends on PM_DEBUG && X86_32 && EXPERIMENTAL
54 default n 61 default n
55 ---help--- 62 ---help---
56 This enables some cheesy code to save the last PM event point in the 63 This enables some cheesy code to save the last PM event point in the
@@ -65,18 +72,6 @@ config PM_TRACE
65 CAUTION: this option will cause your machine's real-time clock to be 72 CAUTION: this option will cause your machine's real-time clock to be
66 set to an invalid time after a resume. 73 set to an invalid time after a resume.
67 74
68config PM_SYSFS_DEPRECATED
69 bool "Driver model /sys/devices/.../power/state files (DEPRECATED)"
70 depends on PM && SYSFS
71 default n
72 help
73 The driver model started out with a sysfs file intended to provide
74 a userspace hook for device power management. This feature has never
75 worked very well, except for limited testing purposes, and so it will
76 be removed. It's not clear that a generic mechanism could really
77 handle the wide variability of device power states; any replacements
78 are likely to be bus or driver specific.
79
80config SOFTWARE_SUSPEND 75config SOFTWARE_SUSPEND
81 bool "Software Suspend (Hibernation)" 76 bool "Software Suspend (Hibernation)"
82 depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) 77 depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index f445b9cd60..324ac0188c 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -45,7 +45,7 @@ enum {
45 45
46static int hibernation_mode = HIBERNATION_SHUTDOWN; 46static int hibernation_mode = HIBERNATION_SHUTDOWN;
47 47
48struct hibernation_ops *hibernation_ops; 48static struct hibernation_ops *hibernation_ops;
49 49
50/** 50/**
51 * hibernation_set_ops - set the global hibernate operations 51 * hibernation_set_ops - set the global hibernate operations
@@ -54,7 +54,8 @@ struct hibernation_ops *hibernation_ops;
54 54
55void hibernation_set_ops(struct hibernation_ops *ops) 55void hibernation_set_ops(struct hibernation_ops *ops)
56{ 56{
57 if (ops && !(ops->prepare && ops->enter && ops->finish)) { 57 if (ops && !(ops->prepare && ops->enter && ops->finish
58 && ops->pre_restore && ops->restore_cleanup)) {
58 WARN_ON(1); 59 WARN_ON(1);
59 return; 60 return;
60 } 61 }
@@ -74,9 +75,9 @@ void hibernation_set_ops(struct hibernation_ops *ops)
74 * platform driver if so configured and return an error code if it fails 75 * platform driver if so configured and return an error code if it fails
75 */ 76 */
76 77
77static int platform_prepare(void) 78static int platform_prepare(int platform_mode)
78{ 79{
79 return (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) ? 80 return (platform_mode && hibernation_ops) ?
80 hibernation_ops->prepare() : 0; 81 hibernation_ops->prepare() : 0;
81} 82}
82 83
@@ -85,13 +86,145 @@ static int platform_prepare(void)
85 * using the platform driver (must be called after platform_prepare()) 86 * using the platform driver (must be called after platform_prepare())
86 */ 87 */
87 88
88static void platform_finish(void) 89static void platform_finish(int platform_mode)
89{ 90{
90 if (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) 91 if (platform_mode && hibernation_ops)
91 hibernation_ops->finish(); 92 hibernation_ops->finish();
92} 93}
93 94
94/** 95/**
96 * platform_pre_restore - prepare the platform for the restoration from a
97 * hibernation image. If the restore fails after this function has been
98 * called, platform_restore_cleanup() must be called.
99 */
100
101static int platform_pre_restore(int platform_mode)
102{
103 return (platform_mode && hibernation_ops) ?
104 hibernation_ops->pre_restore() : 0;
105}
106
107/**
108 * platform_restore_cleanup - switch the platform to the normal mode of
109 * operation after a failing restore. If platform_pre_restore() has been
110 * called before the failing restore, this function must be called too,
111 * regardless of the result of platform_pre_restore().
112 */
113
114static void platform_restore_cleanup(int platform_mode)
115{
116 if (platform_mode && hibernation_ops)
117 hibernation_ops->restore_cleanup();
118}
119
120/**
121 * hibernation_snapshot - quiesce devices and create the hibernation
122 * snapshot image.
123 * @platform_mode - if set, use the platform driver, if available, to
124 * prepare the platform frimware for the power transition.
125 *
126 * Must be called with pm_mutex held
127 */
128
129int hibernation_snapshot(int platform_mode)
130{
131 int error;
132
133 /* Free memory before shutting down devices. */
134 error = swsusp_shrink_memory();
135 if (error)
136 return error;
137
138 suspend_console();
139 error = device_suspend(PMSG_FREEZE);
140 if (error)
141 goto Resume_console;
142
143 error = platform_prepare(platform_mode);
144 if (error)
145 goto Resume_devices;
146
147 error = disable_nonboot_cpus();
148 if (!error) {
149 if (hibernation_mode != HIBERNATION_TEST) {
150 in_suspend = 1;
151 error = swsusp_suspend();
152 /* Control returns here after successful restore */
153 } else {
154 printk("swsusp debug: Waiting for 5 seconds.\n");
155 mdelay(5000);
156 }
157 }
158 enable_nonboot_cpus();
159 Resume_devices:
160 platform_finish(platform_mode);
161 device_resume();
162 Resume_console:
163 resume_console();
164 return error;
165}
166
167/**
168 * hibernation_restore - quiesce devices and restore the hibernation
169 * snapshot image. If successful, control returns in hibernation_snaphot()
170 * @platform_mode - if set, use the platform driver, if available, to
171 * prepare the platform frimware for the transition.
172 *
173 * Must be called with pm_mutex held
174 */
175
176int hibernation_restore(int platform_mode)
177{
178 int error;
179
180 pm_prepare_console();
181 suspend_console();
182 error = device_suspend(PMSG_PRETHAW);
183 if (error)
184 goto Finish;
185
186 error = platform_pre_restore(platform_mode);
187 if (!error) {
188 error = disable_nonboot_cpus();
189 if (!error)
190 error = swsusp_resume();
191 enable_nonboot_cpus();
192 }
193 platform_restore_cleanup(platform_mode);
194 device_resume();
195 Finish:
196 resume_console();
197 pm_restore_console();
198 return error;
199}
200
201/**
202 * hibernation_platform_enter - enter the hibernation state using the
203 * platform driver (if available)
204 */
205
206int hibernation_platform_enter(void)
207{
208 int error;
209
210 if (hibernation_ops) {
211 kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
212 /*
213 * We have cancelled the power transition by running
214 * hibernation_ops->finish() before saving the image, so we
215 * should let the firmware know that we're going to enter the
216 * sleep state after all
217 */
218 error = hibernation_ops->prepare();
219 if (!error)
220 error = hibernation_ops->enter();
221 } else {
222 error = -ENOSYS;
223 }
224 return error;
225}
226
227/**
95 * power_down - Shut the machine down for hibernation. 228 * power_down - Shut the machine down for hibernation.
96 * 229 *
97 * Use the platform driver, if configured so; otherwise try 230 * Use the platform driver, if configured so; otherwise try
@@ -111,11 +244,7 @@ static void power_down(void)
111 kernel_restart(NULL); 244 kernel_restart(NULL);
112 break; 245 break;
113 case HIBERNATION_PLATFORM: 246 case HIBERNATION_PLATFORM:
114 if (hibernation_ops) { 247 hibernation_platform_enter();
115 kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
116 hibernation_ops->enter();
117 break;
118 }
119 } 248 }
120 kernel_halt(); 249 kernel_halt();
121 /* 250 /*
@@ -152,9 +281,16 @@ int hibernate(void)
152{ 281{
153 int error; 282 int error;
154 283
284 mutex_lock(&pm_mutex);
155 /* The snapshot device should not be opened while we're running */ 285 /* The snapshot device should not be opened while we're running */
156 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) 286 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
157 return -EBUSY; 287 error = -EBUSY;
288 goto Unlock;
289 }
290
291 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
292 if (error)
293 goto Exit;
158 294
159 /* Allocate memory management structures */ 295 /* Allocate memory management structures */
160 error = create_basic_memory_bitmaps(); 296 error = create_basic_memory_bitmaps();
@@ -165,75 +301,35 @@ int hibernate(void)
165 if (error) 301 if (error)
166 goto Finish; 302 goto Finish;
167 303
168 mutex_lock(&pm_mutex);
169 if (hibernation_mode == HIBERNATION_TESTPROC) { 304 if (hibernation_mode == HIBERNATION_TESTPROC) {
170 printk("swsusp debug: Waiting for 5 seconds.\n"); 305 printk("swsusp debug: Waiting for 5 seconds.\n");
171 mdelay(5000); 306 mdelay(5000);
172 goto Thaw; 307 goto Thaw;
173 } 308 }
309 error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
310 if (in_suspend && !error) {
311 unsigned int flags = 0;
174 312
175 /* Free memory before shutting down devices. */ 313 if (hibernation_mode == HIBERNATION_PLATFORM)
176 error = swsusp_shrink_memory(); 314 flags |= SF_PLATFORM_MODE;
177 if (error)
178 goto Thaw;
179
180 error = platform_prepare();
181 if (error)
182 goto Thaw;
183
184 suspend_console();
185 error = device_suspend(PMSG_FREEZE);
186 if (error) {
187 printk(KERN_ERR "PM: Some devices failed to suspend\n");
188 goto Resume_devices;
189 }
190 error = disable_nonboot_cpus();
191 if (error)
192 goto Enable_cpus;
193
194 if (hibernation_mode == HIBERNATION_TEST) {
195 printk("swsusp debug: Waiting for 5 seconds.\n");
196 mdelay(5000);
197 goto Enable_cpus;
198 }
199
200 pr_debug("PM: snapshotting memory.\n");
201 in_suspend = 1;
202 error = swsusp_suspend();
203 if (error)
204 goto Enable_cpus;
205
206 if (in_suspend) {
207 enable_nonboot_cpus();
208 platform_finish();
209 device_resume();
210 resume_console();
211 pr_debug("PM: writing image.\n"); 315 pr_debug("PM: writing image.\n");
212 error = swsusp_write(); 316 error = swsusp_write(flags);
317 swsusp_free();
213 if (!error) 318 if (!error)
214 power_down(); 319 power_down();
215 else {
216 swsusp_free();
217 goto Thaw;
218 }
219 } else { 320 } else {
220 pr_debug("PM: Image restored successfully.\n"); 321 pr_debug("PM: Image restored successfully.\n");
322 swsusp_free();
221 } 323 }
222
223 swsusp_free();
224 Enable_cpus:
225 enable_nonboot_cpus();
226 Resume_devices:
227 platform_finish();
228 device_resume();
229 resume_console();
230 Thaw: 324 Thaw:
231 mutex_unlock(&pm_mutex);
232 unprepare_processes(); 325 unprepare_processes();
233 Finish: 326 Finish:
234 free_basic_memory_bitmaps(); 327 free_basic_memory_bitmaps();
235 Exit: 328 Exit:
329 pm_notifier_call_chain(PM_POST_HIBERNATION);
236 atomic_inc(&snapshot_device_available); 330 atomic_inc(&snapshot_device_available);
331 Unlock:
332 mutex_unlock(&pm_mutex);
237 return error; 333 return error;
238} 334}
239 335
@@ -253,6 +349,7 @@ int hibernate(void)
253static int software_resume(void) 349static int software_resume(void)
254{ 350{
255 int error; 351 int error;
352 unsigned int flags;
256 353
257 mutex_lock(&pm_mutex); 354 mutex_lock(&pm_mutex);
258 if (!swsusp_resume_device) { 355 if (!swsusp_resume_device) {
@@ -300,30 +397,12 @@ static int software_resume(void)
300 397
301 pr_debug("PM: Reading swsusp image.\n"); 398 pr_debug("PM: Reading swsusp image.\n");
302 399
303 error = swsusp_read(); 400 error = swsusp_read(&flags);
304 if (error) {
305 swsusp_free();
306 goto Thaw;
307 }
308
309 pr_debug("PM: Preparing devices for restore.\n");
310
311 suspend_console();
312 error = device_suspend(PMSG_PRETHAW);
313 if (error)
314 goto Free;
315
316 error = disable_nonboot_cpus();
317 if (!error) 401 if (!error)
318 swsusp_resume(); 402 hibernation_restore(flags & SF_PLATFORM_MODE);
319 403
320 enable_nonboot_cpus();
321 Free:
322 swsusp_free();
323 device_resume();
324 resume_console();
325 Thaw:
326 printk(KERN_ERR "PM: Restore failed, recovering.\n"); 404 printk(KERN_ERR "PM: Restore failed, recovering.\n");
405 swsusp_free();
327 unprepare_processes(); 406 unprepare_processes();
328 Done: 407 Done:
329 free_basic_memory_bitmaps(); 408 free_basic_memory_bitmaps();
@@ -333,7 +412,7 @@ static int software_resume(void)
333 Unlock: 412 Unlock:
334 mutex_unlock(&pm_mutex); 413 mutex_unlock(&pm_mutex);
335 pr_debug("PM: Resume from disk failed.\n"); 414 pr_debug("PM: Resume from disk failed.\n");
336 return 0; 415 return error;
337} 416}
338 417
339late_initcall(software_resume); 418late_initcall(software_resume);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index fc45ed2262..32147b57c3 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -23,6 +23,8 @@
23 23
24#include "power.h" 24#include "power.h"
25 25
26BLOCKING_NOTIFIER_HEAD(pm_chain_head);
27
26/*This is just an arbitrary number */ 28/*This is just an arbitrary number */
27#define FREE_PAGE_NUMBER (100) 29#define FREE_PAGE_NUMBER (100)
28 30
@@ -63,14 +65,11 @@ static inline void pm_finish(suspend_state_t state)
63 65
64/** 66/**
65 * suspend_prepare - Do prep work before entering low-power state. 67 * suspend_prepare - Do prep work before entering low-power state.
66 * @state: State we're entering.
67 * 68 *
68 * This is common code that is called for each state that we're 69 * This is common code that is called for each state that we're entering.
69 * entering. Allocate a console, stop all processes, then make sure 70 * Run suspend notifiers, allocate a console and stop all processes.
70 * the platform can enter the requested state.
71 */ 71 */
72 72static int suspend_prepare(void)
73static int suspend_prepare(suspend_state_t state)
74{ 73{
75 int error; 74 int error;
76 unsigned int free_pages; 75 unsigned int free_pages;
@@ -78,6 +77,10 @@ static int suspend_prepare(suspend_state_t state)
78 if (!pm_ops || !pm_ops->enter) 77 if (!pm_ops || !pm_ops->enter)
79 return -EPERM; 78 return -EPERM;
80 79
80 error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
81 if (error)
82 goto Finish;
83
81 pm_prepare_console(); 84 pm_prepare_console();
82 85
83 if (freeze_processes()) { 86 if (freeze_processes()) {
@@ -85,46 +88,23 @@ static int suspend_prepare(suspend_state_t state)
85 goto Thaw; 88 goto Thaw;
86 } 89 }
87 90
88 if ((free_pages = global_page_state(NR_FREE_PAGES)) 91 free_pages = global_page_state(NR_FREE_PAGES);
89 < FREE_PAGE_NUMBER) { 92 if (free_pages < FREE_PAGE_NUMBER) {
90 pr_debug("PM: free some memory\n"); 93 pr_debug("PM: free some memory\n");
91 shrink_all_memory(FREE_PAGE_NUMBER - free_pages); 94 shrink_all_memory(FREE_PAGE_NUMBER - free_pages);
92 if (nr_free_pages() < FREE_PAGE_NUMBER) { 95 if (nr_free_pages() < FREE_PAGE_NUMBER) {
93 error = -ENOMEM; 96 error = -ENOMEM;
94 printk(KERN_ERR "PM: No enough memory\n"); 97 printk(KERN_ERR "PM: No enough memory\n");
95 goto Thaw;
96 } 98 }
97 } 99 }
98
99 if (pm_ops->set_target) {
100 error = pm_ops->set_target(state);
101 if (error)
102 goto Thaw;
103 }
104 suspend_console();
105 error = device_suspend(PMSG_SUSPEND);
106 if (error) {
107 printk(KERN_ERR "Some devices failed to suspend\n");
108 goto Resume_console;
109 }
110 if (pm_ops->prepare) {
111 if ((error = pm_ops->prepare(state)))
112 goto Resume_devices;
113 }
114
115 error = disable_nonboot_cpus();
116 if (!error) 100 if (!error)
117 return 0; 101 return 0;
118 102
119 enable_nonboot_cpus();
120 pm_finish(state);
121 Resume_devices:
122 device_resume();
123 Resume_console:
124 resume_console();
125 Thaw: 103 Thaw:
126 thaw_processes(); 104 thaw_processes();
127 pm_restore_console(); 105 pm_restore_console();
106 Finish:
107 pm_notifier_call_chain(PM_POST_SUSPEND);
128 return error; 108 return error;
129} 109}
130 110
@@ -140,6 +120,12 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
140 local_irq_enable(); 120 local_irq_enable();
141} 121}
142 122
123/**
124 * suspend_enter - enter the desired system sleep state.
125 * @state: state to enter
126 *
127 * This function should be called after devices have been suspended.
128 */
143int suspend_enter(suspend_state_t state) 129int suspend_enter(suspend_state_t state)
144{ 130{
145 int error = 0; 131 int error = 0;
@@ -159,23 +145,58 @@ int suspend_enter(suspend_state_t state)
159 return error; 145 return error;
160} 146}
161 147
148/**
149 * suspend_devices_and_enter - suspend devices and enter the desired system sleep
150 * state.
151 * @state: state to enter
152 */
153int suspend_devices_and_enter(suspend_state_t state)
154{
155 int error;
156
157 if (!pm_ops)
158 return -ENOSYS;
159
160 if (pm_ops->set_target) {
161 error = pm_ops->set_target(state);
162 if (error)
163 return error;
164 }
165 suspend_console();
166 error = device_suspend(PMSG_SUSPEND);
167 if (error) {
168 printk(KERN_ERR "Some devices failed to suspend\n");
169 goto Resume_console;
170 }
171 if (pm_ops->prepare) {
172 error = pm_ops->prepare(state);
173 if (error)
174 goto Resume_devices;
175 }
176 error = disable_nonboot_cpus();
177 if (!error)
178 suspend_enter(state);
179
180 enable_nonboot_cpus();
181 pm_finish(state);
182 Resume_devices:
183 device_resume();
184 Resume_console:
185 resume_console();
186 return error;
187}
162 188
163/** 189/**
164 * suspend_finish - Do final work before exiting suspend sequence. 190 * suspend_finish - Do final work before exiting suspend sequence.
165 * @state: State we're coming out of.
166 * 191 *
167 * Call platform code to clean up, restart processes, and free the 192 * Call platform code to clean up, restart processes, and free the
168 * console that we've allocated. This is not called for suspend-to-disk. 193 * console that we've allocated. This is not called for suspend-to-disk.
169 */ 194 */
170 195static void suspend_finish(void)
171static void suspend_finish(suspend_state_t state)
172{ 196{
173 enable_nonboot_cpus();
174 pm_finish(state);
175 device_resume();
176 resume_console();
177 thaw_processes(); 197 thaw_processes();
178 pm_restore_console(); 198 pm_restore_console();
199 pm_notifier_call_chain(PM_POST_SUSPEND);
179} 200}
180 201
181 202
@@ -207,7 +228,6 @@ static inline int valid_state(suspend_state_t state)
207 * Then, do the setup for suspend, enter the state, and cleaup (after 228 * Then, do the setup for suspend, enter the state, and cleaup (after
208 * we've woken up). 229 * we've woken up).
209 */ 230 */
210
211static int enter_state(suspend_state_t state) 231static int enter_state(suspend_state_t state)
212{ 232{
213 int error; 233 int error;
@@ -218,14 +238,14 @@ static int enter_state(suspend_state_t state)
218 return -EBUSY; 238 return -EBUSY;
219 239
220 pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); 240 pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
221 if ((error = suspend_prepare(state))) 241 if ((error = suspend_prepare()))
222 goto Unlock; 242 goto Unlock;
223 243
224 pr_debug("PM: Entering %s sleep\n", pm_states[state]); 244 pr_debug("PM: Entering %s sleep\n", pm_states[state]);
225 error = suspend_enter(state); 245 error = suspend_devices_and_enter(state);
226 246
227 pr_debug("PM: Finishing wakeup.\n"); 247 pr_debug("PM: Finishing wakeup.\n");
228 suspend_finish(state); 248 suspend_finish();
229 Unlock: 249 Unlock:
230 mutex_unlock(&pm_mutex); 250 mutex_unlock(&pm_mutex);
231 return error; 251 return error;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 5138148710..5f24c786f8 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -25,7 +25,10 @@ struct swsusp_info {
25 */ 25 */
26#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) 26#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT)
27 27
28extern struct hibernation_ops *hibernation_ops; 28/* kernel/power/disk.c */
29extern int hibernation_snapshot(int platform_mode);
30extern int hibernation_restore(int platform_mode);
31extern int hibernation_platform_enter(void);
29#endif 32#endif
30 33
31extern int pfn_is_nosave(unsigned long); 34extern int pfn_is_nosave(unsigned long);
@@ -152,16 +155,34 @@ extern sector_t alloc_swapdev_block(int swap);
152extern void free_all_swap_pages(int swap); 155extern void free_all_swap_pages(int swap);
153extern int swsusp_swap_in_use(void); 156extern int swsusp_swap_in_use(void);
154 157
158/*
159 * Flags that can be passed from the hibernatig hernel to the "boot" kernel in
160 * the image header.
161 */
162#define SF_PLATFORM_MODE 1
163
164/* kernel/power/disk.c */
155extern int swsusp_check(void); 165extern int swsusp_check(void);
156extern int swsusp_shrink_memory(void); 166extern int swsusp_shrink_memory(void);
157extern void swsusp_free(void); 167extern void swsusp_free(void);
158extern int swsusp_suspend(void); 168extern int swsusp_suspend(void);
159extern int swsusp_resume(void); 169extern int swsusp_resume(void);
160extern int swsusp_read(void); 170extern int swsusp_read(unsigned int *flags_p);
161extern int swsusp_write(void); 171extern int swsusp_write(unsigned int flags);
162extern void swsusp_close(void); 172extern void swsusp_close(void);
163extern int suspend_enter(suspend_state_t state);
164 173
165struct timeval; 174struct timeval;
175/* kernel/power/swsusp.c */
166extern void swsusp_show_speed(struct timeval *, struct timeval *, 176extern void swsusp_show_speed(struct timeval *, struct timeval *,
167 unsigned int, char *); 177 unsigned int, char *);
178
179/* kernel/power/main.c */
180extern int suspend_enter(suspend_state_t state);
181extern int suspend_devices_and_enter(suspend_state_t state);
182extern struct blocking_notifier_head pm_chain_head;
183
184static inline int pm_notifier_call_chain(unsigned long val)
185{
186 return (blocking_notifier_call_chain(&pm_chain_head, val, NULL)
187 == NOTIFY_BAD) ? -EINVAL : 0;
188}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index e0233d8422..3434940a3d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -40,7 +40,7 @@ static inline void frozen_process(void)
40 current->flags |= PF_FROZEN; 40 current->flags |= PF_FROZEN;
41 wmb(); 41 wmb();
42 } 42 }
43 clear_tsk_thread_flag(current, TIF_FREEZE); 43 clear_freeze_flag(current);
44} 44}
45 45
46/* Refrigerator is place where frozen processes are stored :-). */ 46/* Refrigerator is place where frozen processes are stored :-). */
@@ -72,20 +72,19 @@ void refrigerator(void)
72 schedule(); 72 schedule();
73 } 73 }
74 pr_debug("%s left refrigerator\n", current->comm); 74 pr_debug("%s left refrigerator\n", current->comm);
75 current->state = save; 75 __set_current_state(save);
76} 76}
77 77
78static inline void freeze_process(struct task_struct *p) 78static void freeze_task(struct task_struct *p)
79{ 79{
80 unsigned long flags; 80 unsigned long flags;
81 81
82 if (!freezing(p)) { 82 if (!freezing(p)) {
83 rmb(); 83 rmb();
84 if (!frozen(p)) { 84 if (!frozen(p)) {
85 set_freeze_flag(p);
85 if (p->state == TASK_STOPPED) 86 if (p->state == TASK_STOPPED)
86 force_sig_specific(SIGSTOP, p); 87 force_sig_specific(SIGSTOP, p);
87
88 freeze(p);
89 spin_lock_irqsave(&p->sighand->siglock, flags); 88 spin_lock_irqsave(&p->sighand->siglock, flags);
90 signal_wake_up(p, p->state == TASK_STOPPED); 89 signal_wake_up(p, p->state == TASK_STOPPED);
91 spin_unlock_irqrestore(&p->sighand->siglock, flags); 90 spin_unlock_irqrestore(&p->sighand->siglock, flags);
@@ -99,19 +98,14 @@ static void cancel_freezing(struct task_struct *p)
99 98
100 if (freezing(p)) { 99 if (freezing(p)) {
101 pr_debug(" clean up: %s\n", p->comm); 100 pr_debug(" clean up: %s\n", p->comm);
102 do_not_freeze(p); 101 clear_freeze_flag(p);
103 spin_lock_irqsave(&p->sighand->siglock, flags); 102 spin_lock_irqsave(&p->sighand->siglock, flags);
104 recalc_sigpending_and_wake(p); 103 recalc_sigpending_and_wake(p);
105 spin_unlock_irqrestore(&p->sighand->siglock, flags); 104 spin_unlock_irqrestore(&p->sighand->siglock, flags);
106 } 105 }
107} 106}
108 107
109static inline int is_user_space(struct task_struct *p) 108static int try_to_freeze_tasks(int freeze_user_space)
110{
111 return p->mm && !(p->flags & PF_BORROWED_MM);
112}
113
114static unsigned int try_to_freeze_tasks(int freeze_user_space)
115{ 109{
116 struct task_struct *g, *p; 110 struct task_struct *g, *p;
117 unsigned long end_time; 111 unsigned long end_time;
@@ -122,26 +116,40 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space)
122 todo = 0; 116 todo = 0;
123 read_lock(&tasklist_lock); 117 read_lock(&tasklist_lock);
124 do_each_thread(g, p) { 118 do_each_thread(g, p) {
125 if (!freezeable(p)) 119 if (frozen(p) || !freezeable(p))
126 continue; 120 continue;
127 121
128 if (frozen(p)) 122 if (freeze_user_space) {
129 continue; 123 if (p->state == TASK_TRACED &&
130 124 frozen(p->parent)) {
131 if (p->state == TASK_TRACED && frozen(p->parent)) { 125 cancel_freezing(p);
132 cancel_freezing(p); 126 continue;
133 continue; 127 }
128 /*
129 * Kernel threads should not have TIF_FREEZE set
130 * at this point, so we must ensure that either
131 * p->mm is not NULL *and* PF_BORROWED_MM is
132 * unset, or TIF_FRREZE is left unset.
133 * The task_lock() is necessary to prevent races
134 * with exit_mm() or use_mm()/unuse_mm() from
135 * occuring.
136 */
137 task_lock(p);
138 if (!p->mm || (p->flags & PF_BORROWED_MM)) {
139 task_unlock(p);
140 continue;
141 }
142 freeze_task(p);
143 task_unlock(p);
144 } else {
145 freeze_task(p);
134 } 146 }
135 if (freeze_user_space && !is_user_space(p))
136 continue;
137
138 freeze_process(p);
139 if (!freezer_should_skip(p)) 147 if (!freezer_should_skip(p))
140 todo++; 148 todo++;
141 } while_each_thread(g, p); 149 } while_each_thread(g, p);
142 read_unlock(&tasklist_lock); 150 read_unlock(&tasklist_lock);
143 yield(); /* Yield is okay here */ 151 yield(); /* Yield is okay here */
144 if (todo && time_after(jiffies, end_time)) 152 if (time_after(jiffies, end_time))
145 break; 153 break;
146 } while (todo); 154 } while (todo);
147 155
@@ -152,49 +160,41 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space)
152 * but it cleans up leftover PF_FREEZE requests. 160 * but it cleans up leftover PF_FREEZE requests.
153 */ 161 */
154 printk("\n"); 162 printk("\n");
155 printk(KERN_ERR "Stopping %s timed out after %d seconds " 163 printk(KERN_ERR "Freezing of %s timed out after %d seconds "
156 "(%d tasks refusing to freeze):\n", 164 "(%d tasks refusing to freeze):\n",
157 freeze_user_space ? "user space processes" : 165 freeze_user_space ? "user space " : "tasks ",
158 "kernel threads",
159 TIMEOUT / HZ, todo); 166 TIMEOUT / HZ, todo);
167 show_state();
160 read_lock(&tasklist_lock); 168 read_lock(&tasklist_lock);
161 do_each_thread(g, p) { 169 do_each_thread(g, p) {
162 if (freeze_user_space && !is_user_space(p))
163 continue;
164
165 task_lock(p); 170 task_lock(p);
166 if (freezeable(p) && !frozen(p) && 171 if (freezing(p) && !freezer_should_skip(p))
167 !freezer_should_skip(p))
168 printk(KERN_ERR " %s\n", p->comm); 172 printk(KERN_ERR " %s\n", p->comm);
169
170 cancel_freezing(p); 173 cancel_freezing(p);
171 task_unlock(p); 174 task_unlock(p);
172 } while_each_thread(g, p); 175 } while_each_thread(g, p);
173 read_unlock(&tasklist_lock); 176 read_unlock(&tasklist_lock);
174 } 177 }
175 178
176 return todo; 179 return todo ? -EBUSY : 0;
177} 180}
178 181
179/** 182/**
180 * freeze_processes - tell processes to enter the refrigerator 183 * freeze_processes - tell processes to enter the refrigerator
181 *
182 * Returns 0 on success, or the number of processes that didn't freeze,
183 * although they were told to.
184 */ 184 */
185int freeze_processes(void) 185int freeze_processes(void)
186{ 186{
187 unsigned int nr_unfrozen; 187 int error;
188 188
189 printk("Stopping tasks ... "); 189 printk("Stopping tasks ... ");
190 nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE); 190 error = try_to_freeze_tasks(FREEZER_USER_SPACE);
191 if (nr_unfrozen) 191 if (error)
192 return nr_unfrozen; 192 return error;
193 193
194 sys_sync(); 194 sys_sync();
195 nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); 195 error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS);
196 if (nr_unfrozen) 196 if (error)
197 return nr_unfrozen; 197 return error;
198 198
199 printk("done.\n"); 199 printk("done.\n");
200 BUG_ON(in_atomic()); 200 BUG_ON(in_atomic());
@@ -210,7 +210,7 @@ static void thaw_tasks(int thaw_user_space)
210 if (!freezeable(p)) 210 if (!freezeable(p))
211 continue; 211 continue;
212 212
213 if (is_user_space(p) == !thaw_user_space) 213 if (!p->mm == thaw_user_space)
214 continue; 214 continue;
215 215
216 thaw_process(p); 216 thaw_process(p);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 8b1a1b8371..917aba1005 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -33,8 +33,9 @@ extern char resume_file[];
33#define SWSUSP_SIG "S1SUSPEND" 33#define SWSUSP_SIG "S1SUSPEND"
34 34
35struct swsusp_header { 35struct swsusp_header {
36 char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; 36 char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
37 sector_t image; 37 sector_t image;
38 unsigned int flags; /* Flags to pass to the "boot" kernel */
38 char orig_sig[10]; 39 char orig_sig[10];
39 char sig[10]; 40 char sig[10];
40} __attribute__((packed)); 41} __attribute__((packed));
@@ -138,7 +139,7 @@ static int wait_on_bio_chain(struct bio **bio_chain)
138 * Saving part 139 * Saving part
139 */ 140 */
140 141
141static int mark_swapfiles(sector_t start) 142static int mark_swapfiles(sector_t start, unsigned int flags)
142{ 143{
143 int error; 144 int error;
144 145
@@ -148,6 +149,7 @@ static int mark_swapfiles(sector_t start)
148 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); 149 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
149 memcpy(swsusp_header->sig,SWSUSP_SIG, 10); 150 memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
150 swsusp_header->image = start; 151 swsusp_header->image = start;
152 swsusp_header->flags = flags;
151 error = bio_write_page(swsusp_resume_block, 153 error = bio_write_page(swsusp_resume_block,
152 swsusp_header, NULL); 154 swsusp_header, NULL);
153 } else { 155 } else {
@@ -369,6 +371,7 @@ static int enough_swap(unsigned int nr_pages)
369 371
370/** 372/**
371 * swsusp_write - Write entire image and metadata. 373 * swsusp_write - Write entire image and metadata.
374 * @flags: flags to pass to the "boot" kernel in the image header
372 * 375 *
373 * It is important _NOT_ to umount filesystems at this point. We want 376 * It is important _NOT_ to umount filesystems at this point. We want
374 * them synced (in case something goes wrong) but we DO not want to mark 377 * them synced (in case something goes wrong) but we DO not want to mark
@@ -376,7 +379,7 @@ static int enough_swap(unsigned int nr_pages)
376 * correctly, we'll mark system clean, anyway.) 379 * correctly, we'll mark system clean, anyway.)
377 */ 380 */
378 381
379int swsusp_write(void) 382int swsusp_write(unsigned int flags)
380{ 383{
381 struct swap_map_handle handle; 384 struct swap_map_handle handle;
382 struct snapshot_handle snapshot; 385 struct snapshot_handle snapshot;
@@ -415,7 +418,7 @@ int swsusp_write(void)
415 if (!error) { 418 if (!error) {
416 flush_swap_writer(&handle); 419 flush_swap_writer(&handle);
417 printk("S"); 420 printk("S");
418 error = mark_swapfiles(start); 421 error = mark_swapfiles(start, flags);
419 printk("|\n"); 422 printk("|\n");
420 } 423 }
421 } 424 }
@@ -540,13 +543,20 @@ static int load_image(struct swap_map_handle *handle,
540 return error; 543 return error;
541} 544}
542 545
543int swsusp_read(void) 546/**
547 * swsusp_read - read the hibernation image.
548 * @flags_p: flags passed by the "frozen" kernel in the image header should
549 * be written into this memeory location
550 */
551
552int swsusp_read(unsigned int *flags_p)
544{ 553{
545 int error; 554 int error;
546 struct swap_map_handle handle; 555 struct swap_map_handle handle;
547 struct snapshot_handle snapshot; 556 struct snapshot_handle snapshot;
548 struct swsusp_info *header; 557 struct swsusp_info *header;
549 558
559 *flags_p = swsusp_header->flags;
550 if (IS_ERR(resume_bdev)) { 560 if (IS_ERR(resume_bdev)) {
551 pr_debug("swsusp: block device not initialised\n"); 561 pr_debug("swsusp: block device not initialised\n");
552 return PTR_ERR(resume_bdev); 562 return PTR_ERR(resume_bdev);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index d65305b515..bd0723a7df 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -128,92 +128,6 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
128 return res; 128 return res;
129} 129}
130 130
131static inline int platform_prepare(void)
132{
133 int error = 0;
134
135 if (hibernation_ops)
136 error = hibernation_ops->prepare();
137
138 return error;
139}
140
141static inline void platform_finish(void)
142{
143 if (hibernation_ops)
144 hibernation_ops->finish();
145}
146
147static inline int snapshot_suspend(int platform_suspend)
148{
149 int error;
150
151 mutex_lock(&pm_mutex);
152 /* Free memory before shutting down devices. */
153 error = swsusp_shrink_memory();
154 if (error)
155 goto Finish;
156
157 if (platform_suspend) {
158 error = platform_prepare();
159 if (error)
160 goto Finish;
161 }
162 suspend_console();
163 error = device_suspend(PMSG_FREEZE);
164 if (error)
165 goto Resume_devices;
166
167 error = disable_nonboot_cpus();
168 if (!error) {
169 in_suspend = 1;
170 error = swsusp_suspend();
171 }
172 enable_nonboot_cpus();
173 Resume_devices:
174 if (platform_suspend)
175 platform_finish();
176
177 device_resume();
178 resume_console();
179 Finish:
180 mutex_unlock(&pm_mutex);
181 return error;
182}
183
184static inline int snapshot_restore(int platform_suspend)
185{
186 int error;
187
188 mutex_lock(&pm_mutex);
189 pm_prepare_console();
190 if (platform_suspend) {
191 error = platform_prepare();
192 if (error)
193 goto Finish;
194 }
195 suspend_console();
196 error = device_suspend(PMSG_PRETHAW);
197 if (error)
198 goto Resume_devices;
199
200 error = disable_nonboot_cpus();
201 if (!error)
202 error = swsusp_resume();
203
204 enable_nonboot_cpus();
205 Resume_devices:
206 if (platform_suspend)
207 platform_finish();
208
209 device_resume();
210 resume_console();
211 Finish:
212 pm_restore_console();
213 mutex_unlock(&pm_mutex);
214 return error;
215}
216
217static int snapshot_ioctl(struct inode *inode, struct file *filp, 131static int snapshot_ioctl(struct inode *inode, struct file *filp,
218 unsigned int cmd, unsigned long arg) 132 unsigned int cmd, unsigned long arg)
219{ 133{
@@ -237,10 +151,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
237 if (data->frozen) 151 if (data->frozen)
238 break; 152 break;
239 mutex_lock(&pm_mutex); 153 mutex_lock(&pm_mutex);
240 if (freeze_processes()) { 154 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
241 thaw_processes(); 155 if (!error) {
242 error = -EBUSY; 156 error = freeze_processes();
157 if (error)
158 thaw_processes();
243 } 159 }
160 if (error)
161 pm_notifier_call_chain(PM_POST_HIBERNATION);
244 mutex_unlock(&pm_mutex); 162 mutex_unlock(&pm_mutex);
245 if (!error) 163 if (!error)
246 data->frozen = 1; 164 data->frozen = 1;
@@ -251,6 +169,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
251 break; 169 break;
252 mutex_lock(&pm_mutex); 170 mutex_lock(&pm_mutex);
253 thaw_processes(); 171 thaw_processes();
172 pm_notifier_call_chain(PM_POST_HIBERNATION);
254 mutex_unlock(&pm_mutex); 173 mutex_unlock(&pm_mutex);
255 data->frozen = 0; 174 data->frozen = 0;
256 break; 175 break;
@@ -260,7 +179,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
260 error = -EPERM; 179 error = -EPERM;
261 break; 180 break;
262 } 181 }
263 error = snapshot_suspend(data->platform_suspend); 182 error = hibernation_snapshot(data->platform_suspend);
264 if (!error) 183 if (!error)
265 error = put_user(in_suspend, (unsigned int __user *)arg); 184 error = put_user(in_suspend, (unsigned int __user *)arg);
266 if (!error) 185 if (!error)
@@ -274,7 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
274 error = -EPERM; 193 error = -EPERM;
275 break; 194 break;
276 } 195 }
277 error = snapshot_restore(data->platform_suspend); 196 error = hibernation_restore(data->platform_suspend);
278 break; 197 break;
279 198
280 case SNAPSHOT_FREE: 199 case SNAPSHOT_FREE:
@@ -336,47 +255,19 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
336 break; 255 break;
337 256
338 case SNAPSHOT_S2RAM: 257 case SNAPSHOT_S2RAM:
339 if (!pm_ops) {
340 error = -ENOSYS;
341 break;
342 }
343
344 if (!data->frozen) { 258 if (!data->frozen) {
345 error = -EPERM; 259 error = -EPERM;
346 break; 260 break;
347 } 261 }
348
349 if (!mutex_trylock(&pm_mutex)) { 262 if (!mutex_trylock(&pm_mutex)) {
350 error = -EBUSY; 263 error = -EBUSY;
351 break; 264 break;
352 } 265 }
353 266 /*
354 if (pm_ops->prepare) { 267 * Tasks are frozen and the notifiers have been called with
355 error = pm_ops->prepare(PM_SUSPEND_MEM); 268 * PM_HIBERNATION_PREPARE
356 if (error) 269 */
357 goto OutS3; 270 error = suspend_devices_and_enter(PM_SUSPEND_MEM);
358 }
359
360 /* Put devices to sleep */
361 suspend_console();
362 error = device_suspend(PMSG_SUSPEND);
363 if (error) {
364 printk(KERN_ERR "Failed to suspend some devices.\n");
365 } else {
366 error = disable_nonboot_cpus();
367 if (!error) {
368 /* Enter S3, system is already frozen */
369 suspend_enter(PM_SUSPEND_MEM);
370 enable_nonboot_cpus();
371 }
372 /* Wake up devices */
373 device_resume();
374 }
375 resume_console();
376 if (pm_ops->finish)
377 pm_ops->finish(PM_SUSPEND_MEM);
378
379 OutS3:
380 mutex_unlock(&pm_mutex); 271 mutex_unlock(&pm_mutex);
381 break; 272 break;
382 273
@@ -386,19 +277,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
386 switch (arg) { 277 switch (arg) {
387 278
388 case PMOPS_PREPARE: 279 case PMOPS_PREPARE:
389 if (hibernation_ops) { 280 data->platform_suspend = 1;
390 data->platform_suspend = 1; 281 error = 0;
391 error = 0;
392 } else {
393 error = -ENOSYS;
394 }
395 break; 282 break;
396 283
397 case PMOPS_ENTER: 284 case PMOPS_ENTER:
398 if (data->platform_suspend) { 285 if (data->platform_suspend)
399 kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); 286 error = hibernation_platform_enter();
400 error = hibernation_ops->enter(); 287
401 }
402 break; 288 break;
403 289
404 case PMOPS_FINISH: 290 case PMOPS_FINISH:
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index b1d11f1c7c..82a558b655 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -142,7 +142,7 @@ static int may_attach(struct task_struct *task)
142 return -EPERM; 142 return -EPERM;
143 smp_rmb(); 143 smp_rmb();
144 if (task->mm) 144 if (task->mm)
145 dumpable = task->mm->dumpable; 145 dumpable = get_dumpable(task->mm);
146 if (!dumpable && !capable(CAP_SYS_PTRACE)) 146 if (!dumpable && !capable(CAP_SYS_PTRACE))
147 return -EPERM; 147 return -EPERM;
148 148
@@ -490,3 +490,22 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
490 return ret; 490 return ret;
491} 491}
492#endif /* __ARCH_SYS_PTRACE */ 492#endif /* __ARCH_SYS_PTRACE */
493
494int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data)
495{
496 unsigned long tmp;
497 int copied;
498
499 copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0);
500 if (copied != sizeof(tmp))
501 return -EIO;
502 return put_user(tmp, (unsigned long __user *)data);
503}
504
505int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
506{
507 int copied;
508
509 copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
510 return (copied == sizeof(data)) ? 0 : -EIO;
511}
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 55ba82a85a..ddff332477 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -40,6 +40,7 @@
40#include <linux/moduleparam.h> 40#include <linux/moduleparam.h>
41#include <linux/percpu.h> 41#include <linux/percpu.h>
42#include <linux/notifier.h> 42#include <linux/notifier.h>
43#include <linux/freezer.h>
43#include <linux/cpu.h> 44#include <linux/cpu.h>
44#include <linux/random.h> 45#include <linux/random.h>
45#include <linux/delay.h> 46#include <linux/delay.h>
@@ -518,7 +519,6 @@ rcu_torture_writer(void *arg)
518 519
519 VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); 520 VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
520 set_user_nice(current, 19); 521 set_user_nice(current, 19);
521 current->flags |= PF_NOFREEZE;
522 522
523 do { 523 do {
524 schedule_timeout_uninterruptible(1); 524 schedule_timeout_uninterruptible(1);
@@ -558,7 +558,6 @@ rcu_torture_fakewriter(void *arg)
558 558
559 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); 559 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
560 set_user_nice(current, 19); 560 set_user_nice(current, 19);
561 current->flags |= PF_NOFREEZE;
562 561
563 do { 562 do {
564 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); 563 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
@@ -589,7 +588,6 @@ rcu_torture_reader(void *arg)
589 588
590 VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); 589 VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
591 set_user_nice(current, 19); 590 set_user_nice(current, 19);
592 current->flags |= PF_NOFREEZE;
593 591
594 do { 592 do {
595 idx = cur_ops->readlock(); 593 idx = cur_ops->readlock();
diff --git a/kernel/relay.c b/kernel/relay.c
index a615a8f513..510fbbd7b5 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -80,7 +80,7 @@ static struct vm_operations_struct relay_file_mmap_ops = {
80 * 80 *
81 * Caller should already have grabbed mmap_sem. 81 * Caller should already have grabbed mmap_sem.
82 */ 82 */
83int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) 83static int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
84{ 84{
85 unsigned long length = vma->vm_end - vma->vm_start; 85 unsigned long length = vma->vm_end - vma->vm_start;
86 struct file *filp = vma->vm_file; 86 struct file *filp = vma->vm_file;
@@ -145,7 +145,7 @@ depopulate:
145 * 145 *
146 * Returns channel buffer if successful, %NULL otherwise. 146 * Returns channel buffer if successful, %NULL otherwise.
147 */ 147 */
148struct rchan_buf *relay_create_buf(struct rchan *chan) 148static struct rchan_buf *relay_create_buf(struct rchan *chan)
149{ 149{
150 struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); 150 struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
151 if (!buf) 151 if (!buf)
@@ -175,7 +175,7 @@ free_buf:
175 * 175 *
176 * Should only be called from kref_put(). 176 * Should only be called from kref_put().
177 */ 177 */
178void relay_destroy_channel(struct kref *kref) 178static void relay_destroy_channel(struct kref *kref)
179{ 179{
180 struct rchan *chan = container_of(kref, struct rchan, kref); 180 struct rchan *chan = container_of(kref, struct rchan, kref);
181 kfree(chan); 181 kfree(chan);
@@ -185,7 +185,7 @@ void relay_destroy_channel(struct kref *kref)
185 * relay_destroy_buf - destroy an rchan_buf struct and associated buffer 185 * relay_destroy_buf - destroy an rchan_buf struct and associated buffer
186 * @buf: the buffer struct 186 * @buf: the buffer struct
187 */ 187 */
188void relay_destroy_buf(struct rchan_buf *buf) 188static void relay_destroy_buf(struct rchan_buf *buf)
189{ 189{
190 struct rchan *chan = buf->chan; 190 struct rchan *chan = buf->chan;
191 unsigned int i; 191 unsigned int i;
@@ -210,7 +210,7 @@ void relay_destroy_buf(struct rchan_buf *buf)
210 * rchan_buf_struct and the channel buffer. Should only be called from 210 * rchan_buf_struct and the channel buffer. Should only be called from
211 * kref_put(). 211 * kref_put().
212 */ 212 */
213void relay_remove_buf(struct kref *kref) 213static void relay_remove_buf(struct kref *kref)
214{ 214{
215 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); 215 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
216 buf->chan->cb->remove_buf_file(buf->dentry); 216 buf->chan->cb->remove_buf_file(buf->dentry);
@@ -223,11 +223,10 @@ void relay_remove_buf(struct kref *kref)
223 * 223 *
224 * Returns 1 if the buffer is empty, 0 otherwise. 224 * Returns 1 if the buffer is empty, 0 otherwise.
225 */ 225 */
226int relay_buf_empty(struct rchan_buf *buf) 226static int relay_buf_empty(struct rchan_buf *buf)
227{ 227{
228 return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; 228 return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
229} 229}
230EXPORT_SYMBOL_GPL(relay_buf_empty);
231 230
232/** 231/**
233 * relay_buf_full - boolean, is the channel buffer full? 232 * relay_buf_full - boolean, is the channel buffer full?
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 015fc633c9..e3055ba691 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -260,6 +260,7 @@ static int test_func(void *data)
260 int ret; 260 int ret;
261 261
262 current->flags |= PF_MUTEX_TESTER; 262 current->flags |= PF_MUTEX_TESTER;
263 set_freezable();
263 allow_signal(SIGHUP); 264 allow_signal(SIGHUP);
264 265
265 for(;;) { 266 for(;;) {
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index 9a87886b02..1ec620c030 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -20,7 +20,7 @@ void down_read(struct rw_semaphore *sem)
20 might_sleep(); 20 might_sleep();
21 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); 21 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
22 22
23 __down_read(sem); 23 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
24} 24}
25 25
26EXPORT_SYMBOL(down_read); 26EXPORT_SYMBOL(down_read);
@@ -47,7 +47,7 @@ void down_write(struct rw_semaphore *sem)
47 might_sleep(); 47 might_sleep();
48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); 48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
49 49
50 __down_write(sem); 50 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
51} 51}
52 52
53EXPORT_SYMBOL(down_write); 53EXPORT_SYMBOL(down_write);
@@ -111,7 +111,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
111 might_sleep(); 111 might_sleep();
112 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); 112 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
113 113
114 __down_read(sem); 114 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
115} 115}
116 116
117EXPORT_SYMBOL(down_read_nested); 117EXPORT_SYMBOL(down_read_nested);
@@ -130,7 +130,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
130 might_sleep(); 130 might_sleep();
131 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); 131 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
132 132
133 __down_write_nested(sem, subclass); 133 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
134} 134}
135 135
136EXPORT_SYMBOL(down_write_nested); 136EXPORT_SYMBOL(down_write_nested);
diff --git a/kernel/sched.c b/kernel/sched.c
index 1c8076676e..93cf241cfb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -301,7 +301,7 @@ struct rq {
301 struct lock_class_key rq_lock_key; 301 struct lock_class_key rq_lock_key;
302}; 302};
303 303
304static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; 304static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
305static DEFINE_MUTEX(sched_hotcpu_mutex); 305static DEFINE_MUTEX(sched_hotcpu_mutex);
306 306
307static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) 307static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
@@ -379,6 +379,23 @@ static inline unsigned long long rq_clock(struct rq *rq)
379#define task_rq(p) cpu_rq(task_cpu(p)) 379#define task_rq(p) cpu_rq(task_cpu(p))
380#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 380#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
381 381
382/*
383 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
384 * clock constructed from sched_clock():
385 */
386unsigned long long cpu_clock(int cpu)
387{
388 struct rq *rq = cpu_rq(cpu);
389 unsigned long long now;
390 unsigned long flags;
391
392 spin_lock_irqsave(&rq->lock, flags);
393 now = rq_clock(rq);
394 spin_unlock_irqrestore(&rq->lock, flags);
395
396 return now;
397}
398
382#ifdef CONFIG_FAIR_GROUP_SCHED 399#ifdef CONFIG_FAIR_GROUP_SCHED
383/* Change a task's ->cfs_rq if it moves across CPUs */ 400/* Change a task's ->cfs_rq if it moves across CPUs */
384static inline void set_task_cfs_rq(struct task_struct *p) 401static inline void set_task_cfs_rq(struct task_struct *p)
@@ -2235,7 +2252,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2235 2252
2236 rq = cpu_rq(i); 2253 rq = cpu_rq(i);
2237 2254
2238 if (*sd_idle && !idle_cpu(i)) 2255 if (*sd_idle && rq->nr_running)
2239 *sd_idle = 0; 2256 *sd_idle = 0;
2240 2257
2241 /* Bias balancing toward cpus of our domain */ 2258 /* Bias balancing toward cpus of our domain */
@@ -2257,9 +2274,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2257 /* 2274 /*
2258 * First idle cpu or the first cpu(busiest) in this sched group 2275 * First idle cpu or the first cpu(busiest) in this sched group
2259 * is eligible for doing load balancing at this and above 2276 * is eligible for doing load balancing at this and above
2260 * domains. 2277 * domains. In the newly idle case, we will allow all the cpu's
2278 * to do the newly idle load balance.
2261 */ 2279 */
2262 if (local_group && balance_cpu != this_cpu && balance) { 2280 if (idle != CPU_NEWLY_IDLE && local_group &&
2281 balance_cpu != this_cpu && balance) {
2263 *balance = 0; 2282 *balance = 0;
2264 goto ret; 2283 goto ret;
2265 } 2284 }
@@ -2677,6 +2696,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2677 unsigned long imbalance; 2696 unsigned long imbalance;
2678 int nr_moved = 0; 2697 int nr_moved = 0;
2679 int sd_idle = 0; 2698 int sd_idle = 0;
2699 int all_pinned = 0;
2680 cpumask_t cpus = CPU_MASK_ALL; 2700 cpumask_t cpus = CPU_MASK_ALL;
2681 2701
2682 /* 2702 /*
@@ -2715,10 +2735,11 @@ redo:
2715 double_lock_balance(this_rq, busiest); 2735 double_lock_balance(this_rq, busiest);
2716 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2736 nr_moved = move_tasks(this_rq, this_cpu, busiest,
2717 minus_1_or_zero(busiest->nr_running), 2737 minus_1_or_zero(busiest->nr_running),
2718 imbalance, sd, CPU_NEWLY_IDLE, NULL); 2738 imbalance, sd, CPU_NEWLY_IDLE,
2739 &all_pinned);
2719 spin_unlock(&busiest->lock); 2740 spin_unlock(&busiest->lock);
2720 2741
2721 if (!nr_moved) { 2742 if (unlikely(all_pinned)) {
2722 cpu_clear(cpu_of(busiest), cpus); 2743 cpu_clear(cpu_of(busiest), cpus);
2723 if (!cpus_empty(cpus)) 2744 if (!cpus_empty(cpus))
2724 goto redo; 2745 goto redo;
@@ -4912,8 +4933,6 @@ static int migration_thread(void *data)
4912 struct migration_req *req; 4933 struct migration_req *req;
4913 struct list_head *head; 4934 struct list_head *head;
4914 4935
4915 try_to_freeze();
4916
4917 spin_lock_irq(&rq->lock); 4936 spin_lock_irq(&rq->lock);
4918 4937
4919 if (cpu_is_offline(cpu)) { 4938 if (cpu_is_offline(cpu)) {
@@ -5147,7 +5166,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5147 p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); 5166 p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
5148 if (IS_ERR(p)) 5167 if (IS_ERR(p))
5149 return NOTIFY_BAD; 5168 return NOTIFY_BAD;
5150 p->flags |= PF_NOFREEZE;
5151 kthread_bind(p, cpu); 5169 kthread_bind(p, cpu);
5152 /* Must be high prio: stop_machine expects to yield to it. */ 5170 /* Must be high prio: stop_machine expects to yield to it. */
5153 rq = task_rq_lock(p, &flags); 5171 rq = task_rq_lock(p, &flags);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8de2677901..0f546ddea4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -14,6 +14,7 @@
14#include <linux/notifier.h> 14#include <linux/notifier.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/cpu.h> 16#include <linux/cpu.h>
17#include <linux/freezer.h>
17#include <linux/kthread.h> 18#include <linux/kthread.h>
18#include <linux/rcupdate.h> 19#include <linux/rcupdate.h>
19#include <linux/smp.h> 20#include <linux/smp.h>
@@ -488,8 +489,6 @@ void __init softirq_init(void)
488 489
489static int ksoftirqd(void * __bind_cpu) 490static int ksoftirqd(void * __bind_cpu)
490{ 491{
491 current->flags |= PF_NOFREEZE;
492
493 set_current_state(TASK_INTERRUPTIBLE); 492 set_current_state(TASK_INTERRUPTIBLE);
494 493
495 while (!kthread_should_stop()) { 494 while (!kthread_should_stop()) {
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 0131e296ff..708d4882c0 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -10,6 +10,7 @@
10#include <linux/cpu.h> 10#include <linux/cpu.h>
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/delay.h> 12#include <linux/delay.h>
13#include <linux/freezer.h>
13#include <linux/kthread.h> 14#include <linux/kthread.h>
14#include <linux/notifier.h> 15#include <linux/notifier.h>
15#include <linux/module.h> 16#include <linux/module.h>
@@ -116,7 +117,6 @@ static int watchdog(void * __bind_cpu)
116 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 117 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
117 118
118 sched_setscheduler(current, SCHED_FIFO, &param); 119 sched_setscheduler(current, SCHED_FIFO, &param);
119 current->flags |= PF_NOFREEZE;
120 120
121 /* initialize timestamp */ 121 /* initialize timestamp */
122 touch_softlockup_watchdog(); 122 touch_softlockup_watchdog();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 2c6c2bf855..cd72424c26 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -72,7 +72,7 @@ void __lockfunc _read_lock(rwlock_t *lock)
72{ 72{
73 preempt_disable(); 73 preempt_disable();
74 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); 74 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
75 _raw_read_lock(lock); 75 LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
76} 76}
77EXPORT_SYMBOL(_read_lock); 77EXPORT_SYMBOL(_read_lock);
78 78
@@ -88,8 +88,8 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
88 * _raw_spin_lock_flags() code, because lockdep assumes 88 * _raw_spin_lock_flags() code, because lockdep assumes
89 * that interrupts are not re-enabled during lock-acquire: 89 * that interrupts are not re-enabled during lock-acquire:
90 */ 90 */
91#ifdef CONFIG_PROVE_LOCKING 91#ifdef CONFIG_LOCKDEP
92 _raw_spin_lock(lock); 92 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
93#else 93#else
94 _raw_spin_lock_flags(lock, &flags); 94 _raw_spin_lock_flags(lock, &flags);
95#endif 95#endif
@@ -102,7 +102,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
102 local_irq_disable(); 102 local_irq_disable();
103 preempt_disable(); 103 preempt_disable();
104 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); 104 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
105 _raw_spin_lock(lock); 105 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
106} 106}
107EXPORT_SYMBOL(_spin_lock_irq); 107EXPORT_SYMBOL(_spin_lock_irq);
108 108
@@ -111,7 +111,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
111 local_bh_disable(); 111 local_bh_disable();
112 preempt_disable(); 112 preempt_disable();
113 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); 113 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
114 _raw_spin_lock(lock); 114 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
115} 115}
116EXPORT_SYMBOL(_spin_lock_bh); 116EXPORT_SYMBOL(_spin_lock_bh);
117 117
@@ -122,7 +122,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
122 local_irq_save(flags); 122 local_irq_save(flags);
123 preempt_disable(); 123 preempt_disable();
124 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); 124 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
125 _raw_read_lock(lock); 125 LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
126 return flags; 126 return flags;
127} 127}
128EXPORT_SYMBOL(_read_lock_irqsave); 128EXPORT_SYMBOL(_read_lock_irqsave);
@@ -132,7 +132,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
132 local_irq_disable(); 132 local_irq_disable();
133 preempt_disable(); 133 preempt_disable();
134 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); 134 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
135 _raw_read_lock(lock); 135 LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
136} 136}
137EXPORT_SYMBOL(_read_lock_irq); 137EXPORT_SYMBOL(_read_lock_irq);
138 138
@@ -141,7 +141,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
141 local_bh_disable(); 141 local_bh_disable();
142 preempt_disable(); 142 preempt_disable();
143 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); 143 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
144 _raw_read_lock(lock); 144 LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
145} 145}
146EXPORT_SYMBOL(_read_lock_bh); 146EXPORT_SYMBOL(_read_lock_bh);
147 147
@@ -152,7 +152,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
152 local_irq_save(flags); 152 local_irq_save(flags);
153 preempt_disable(); 153 preempt_disable();
154 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); 154 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
155 _raw_write_lock(lock); 155 LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
156 return flags; 156 return flags;
157} 157}
158EXPORT_SYMBOL(_write_lock_irqsave); 158EXPORT_SYMBOL(_write_lock_irqsave);
@@ -162,7 +162,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
162 local_irq_disable(); 162 local_irq_disable();
163 preempt_disable(); 163 preempt_disable();
164 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); 164 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
165 _raw_write_lock(lock); 165 LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
166} 166}
167EXPORT_SYMBOL(_write_lock_irq); 167EXPORT_SYMBOL(_write_lock_irq);
168 168
@@ -171,7 +171,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
171 local_bh_disable(); 171 local_bh_disable();
172 preempt_disable(); 172 preempt_disable();
173 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); 173 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
174 _raw_write_lock(lock); 174 LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
175} 175}
176EXPORT_SYMBOL(_write_lock_bh); 176EXPORT_SYMBOL(_write_lock_bh);
177 177
@@ -179,7 +179,7 @@ void __lockfunc _spin_lock(spinlock_t *lock)
179{ 179{
180 preempt_disable(); 180 preempt_disable();
181 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); 181 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
182 _raw_spin_lock(lock); 182 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
183} 183}
184 184
185EXPORT_SYMBOL(_spin_lock); 185EXPORT_SYMBOL(_spin_lock);
@@ -188,7 +188,7 @@ void __lockfunc _write_lock(rwlock_t *lock)
188{ 188{
189 preempt_disable(); 189 preempt_disable();
190 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); 190 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
191 _raw_write_lock(lock); 191 LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
192} 192}
193 193
194EXPORT_SYMBOL(_write_lock); 194EXPORT_SYMBOL(_write_lock);
@@ -289,7 +289,7 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
289{ 289{
290 preempt_disable(); 290 preempt_disable();
291 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); 291 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
292 _raw_spin_lock(lock); 292 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
293} 293}
294 294
295EXPORT_SYMBOL(_spin_lock_nested); 295EXPORT_SYMBOL(_spin_lock_nested);
@@ -305,8 +305,8 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas
305 * _raw_spin_lock_flags() code, because lockdep assumes 305 * _raw_spin_lock_flags() code, because lockdep assumes
306 * that interrupts are not re-enabled during lock-acquire: 306 * that interrupts are not re-enabled during lock-acquire:
307 */ 307 */
308#ifdef CONFIG_PROVE_SPIN_LOCKING 308#ifdef CONFIG_LOCKDEP
309 _raw_spin_lock(lock); 309 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
310#else 310#else
311 _raw_spin_lock_flags(lock, &flags); 311 _raw_spin_lock_flags(lock, &flags);
312#endif 312#endif
diff --git a/kernel/sys.c b/kernel/sys.c
index 4d141ae3e8..08562f4197 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -100,6 +100,13 @@ struct pid *cad_pid;
100EXPORT_SYMBOL(cad_pid); 100EXPORT_SYMBOL(cad_pid);
101 101
102/* 102/*
103 * If set, this is used for preparing the system to power off.
104 */
105
106void (*pm_power_off_prepare)(void);
107EXPORT_SYMBOL(pm_power_off_prepare);
108
109/*
103 * Notifier list for kernel code which wants to be called 110 * Notifier list for kernel code which wants to be called
104 * at shutdown. This is used to stop any idling DMA operations 111 * at shutdown. This is used to stop any idling DMA operations
105 * and the like. 112 * and the like.
@@ -867,6 +874,8 @@ EXPORT_SYMBOL_GPL(kernel_halt);
867void kernel_power_off(void) 874void kernel_power_off(void)
868{ 875{
869 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 876 kernel_shutdown_prepare(SYSTEM_POWER_OFF);
877 if (pm_power_off_prepare)
878 pm_power_off_prepare();
870 printk(KERN_EMERG "Power down.\n"); 879 printk(KERN_EMERG "Power down.\n");
871 machine_power_off(); 880 machine_power_off();
872} 881}
@@ -1027,7 +1036,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
1027 return -EPERM; 1036 return -EPERM;
1028 } 1037 }
1029 if (new_egid != old_egid) { 1038 if (new_egid != old_egid) {
1030 current->mm->dumpable = suid_dumpable; 1039 set_dumpable(current->mm, suid_dumpable);
1031 smp_wmb(); 1040 smp_wmb();
1032 } 1041 }
1033 if (rgid != (gid_t) -1 || 1042 if (rgid != (gid_t) -1 ||
@@ -1057,13 +1066,13 @@ asmlinkage long sys_setgid(gid_t gid)
1057 1066
1058 if (capable(CAP_SETGID)) { 1067 if (capable(CAP_SETGID)) {
1059 if (old_egid != gid) { 1068 if (old_egid != gid) {
1060 current->mm->dumpable = suid_dumpable; 1069 set_dumpable(current->mm, suid_dumpable);
1061 smp_wmb(); 1070 smp_wmb();
1062 } 1071 }
1063 current->gid = current->egid = current->sgid = current->fsgid = gid; 1072 current->gid = current->egid = current->sgid = current->fsgid = gid;
1064 } else if ((gid == current->gid) || (gid == current->sgid)) { 1073 } else if ((gid == current->gid) || (gid == current->sgid)) {
1065 if (old_egid != gid) { 1074 if (old_egid != gid) {
1066 current->mm->dumpable = suid_dumpable; 1075 set_dumpable(current->mm, suid_dumpable);
1067 smp_wmb(); 1076 smp_wmb();
1068 } 1077 }
1069 current->egid = current->fsgid = gid; 1078 current->egid = current->fsgid = gid;
@@ -1094,7 +1103,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
1094 switch_uid(new_user); 1103 switch_uid(new_user);
1095 1104
1096 if (dumpclear) { 1105 if (dumpclear) {
1097 current->mm->dumpable = suid_dumpable; 1106 set_dumpable(current->mm, suid_dumpable);
1098 smp_wmb(); 1107 smp_wmb();
1099 } 1108 }
1100 current->uid = new_ruid; 1109 current->uid = new_ruid;
@@ -1150,7 +1159,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
1150 return -EAGAIN; 1159 return -EAGAIN;
1151 1160
1152 if (new_euid != old_euid) { 1161 if (new_euid != old_euid) {
1153 current->mm->dumpable = suid_dumpable; 1162 set_dumpable(current->mm, suid_dumpable);
1154 smp_wmb(); 1163 smp_wmb();
1155 } 1164 }
1156 current->fsuid = current->euid = new_euid; 1165 current->fsuid = current->euid = new_euid;
@@ -1200,7 +1209,7 @@ asmlinkage long sys_setuid(uid_t uid)
1200 return -EPERM; 1209 return -EPERM;
1201 1210
1202 if (old_euid != uid) { 1211 if (old_euid != uid) {
1203 current->mm->dumpable = suid_dumpable; 1212 set_dumpable(current->mm, suid_dumpable);
1204 smp_wmb(); 1213 smp_wmb();
1205 } 1214 }
1206 current->fsuid = current->euid = uid; 1215 current->fsuid = current->euid = uid;
@@ -1245,7 +1254,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
1245 } 1254 }
1246 if (euid != (uid_t) -1) { 1255 if (euid != (uid_t) -1) {
1247 if (euid != current->euid) { 1256 if (euid != current->euid) {
1248 current->mm->dumpable = suid_dumpable; 1257 set_dumpable(current->mm, suid_dumpable);
1249 smp_wmb(); 1258 smp_wmb();
1250 } 1259 }
1251 current->euid = euid; 1260 current->euid = euid;
@@ -1295,7 +1304,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
1295 } 1304 }
1296 if (egid != (gid_t) -1) { 1305 if (egid != (gid_t) -1) {
1297 if (egid != current->egid) { 1306 if (egid != current->egid) {
1298 current->mm->dumpable = suid_dumpable; 1307 set_dumpable(current->mm, suid_dumpable);
1299 smp_wmb(); 1308 smp_wmb();
1300 } 1309 }
1301 current->egid = egid; 1310 current->egid = egid;
@@ -1341,7 +1350,7 @@ asmlinkage long sys_setfsuid(uid_t uid)
1341 uid == current->suid || uid == current->fsuid || 1350 uid == current->suid || uid == current->fsuid ||
1342 capable(CAP_SETUID)) { 1351 capable(CAP_SETUID)) {
1343 if (uid != old_fsuid) { 1352 if (uid != old_fsuid) {
1344 current->mm->dumpable = suid_dumpable; 1353 set_dumpable(current->mm, suid_dumpable);
1345 smp_wmb(); 1354 smp_wmb();
1346 } 1355 }
1347 current->fsuid = uid; 1356 current->fsuid = uid;
@@ -1370,7 +1379,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
1370 gid == current->sgid || gid == current->fsgid || 1379 gid == current->sgid || gid == current->fsgid ||
1371 capable(CAP_SETGID)) { 1380 capable(CAP_SETGID)) {
1372 if (gid != old_fsgid) { 1381 if (gid != old_fsgid) {
1373 current->mm->dumpable = suid_dumpable; 1382 set_dumpable(current->mm, suid_dumpable);
1374 smp_wmb(); 1383 smp_wmb();
1375 } 1384 }
1376 current->fsgid = gid; 1385 current->fsgid = gid;
@@ -2167,14 +2176,14 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
2167 error = put_user(current->pdeath_signal, (int __user *)arg2); 2176 error = put_user(current->pdeath_signal, (int __user *)arg2);
2168 break; 2177 break;
2169 case PR_GET_DUMPABLE: 2178 case PR_GET_DUMPABLE:
2170 error = current->mm->dumpable; 2179 error = get_dumpable(current->mm);
2171 break; 2180 break;
2172 case PR_SET_DUMPABLE: 2181 case PR_SET_DUMPABLE:
2173 if (arg2 < 0 || arg2 > 1) { 2182 if (arg2 < 0 || arg2 > 1) {
2174 error = -EINVAL; 2183 error = -EINVAL;
2175 break; 2184 break;
2176 } 2185 }
2177 current->mm->dumpable = arg2; 2186 set_dumpable(current->mm, arg2);
2178 break; 2187 break;
2179 2188
2180 case PR_SET_UNALIGN: 2189 case PR_SET_UNALIGN:
@@ -2286,3 +2295,61 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
2286 } 2295 }
2287 return err ? -EFAULT : 0; 2296 return err ? -EFAULT : 0;
2288} 2297}
2298
2299char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
2300
2301static void argv_cleanup(char **argv, char **envp)
2302{
2303 argv_free(argv);
2304}
2305
2306/**
2307 * orderly_poweroff - Trigger an orderly system poweroff
2308 * @force: force poweroff if command execution fails
2309 *
2310 * This may be called from any context to trigger a system shutdown.
2311 * If the orderly shutdown fails, it will force an immediate shutdown.
2312 */
2313int orderly_poweroff(bool force)
2314{
2315 int argc;
2316 char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
2317 static char *envp[] = {
2318 "HOME=/",
2319 "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
2320 NULL
2321 };
2322 int ret = -ENOMEM;
2323 struct subprocess_info *info;
2324
2325 if (argv == NULL) {
2326 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
2327 __func__, poweroff_cmd);
2328 goto out;
2329 }
2330
2331 info = call_usermodehelper_setup(argv[0], argv, envp);
2332 if (info == NULL) {
2333 argv_free(argv);
2334 goto out;
2335 }
2336
2337 call_usermodehelper_setcleanup(info, argv_cleanup);
2338
2339 ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
2340
2341 out:
2342 if (ret && force) {
2343 printk(KERN_WARNING "Failed to start orderly shutdown: "
2344 "forcing the issue\n");
2345
2346 /* I guess this should try to kick off some daemon to
2347 sync and poweroff asap. Or not even bother syncing
2348 if we're doing an emergency shutdown? */
2349 emergency_sync();
2350 kernel_power_off();
2351 }
2352
2353 return ret;
2354}
2355EXPORT_SYMBOL_GPL(orderly_poweroff);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2ce7acf841..222299844a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -29,6 +29,7 @@
29#include <linux/utsname.h> 29#include <linux/utsname.h>
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
32#include <linux/fs.h>
32#include <linux/init.h> 33#include <linux/init.h>
33#include <linux/kernel.h> 34#include <linux/kernel.h>
34#include <linux/kobject.h> 35#include <linux/kobject.h>
@@ -45,13 +46,11 @@
45#include <linux/syscalls.h> 46#include <linux/syscalls.h>
46#include <linux/nfs_fs.h> 47#include <linux/nfs_fs.h>
47#include <linux/acpi.h> 48#include <linux/acpi.h>
49#include <linux/reboot.h>
48 50
49#include <asm/uaccess.h> 51#include <asm/uaccess.h>
50#include <asm/processor.h> 52#include <asm/processor.h>
51 53
52extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
53 void __user *buffer, size_t *lenp, loff_t *ppos);
54
55#ifdef CONFIG_X86 54#ifdef CONFIG_X86
56#include <asm/nmi.h> 55#include <asm/nmi.h>
57#include <asm/stacktrace.h> 56#include <asm/stacktrace.h>
@@ -79,6 +78,7 @@ extern int percpu_pagelist_fraction;
79extern int compat_log; 78extern int compat_log;
80extern int maps_protect; 79extern int maps_protect;
81extern int sysctl_stat_interval; 80extern int sysctl_stat_interval;
81extern int audit_argv_kb;
82 82
83/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 83/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
84static int maxolduid = 65535; 84static int maxolduid = 65535;
@@ -161,6 +161,8 @@ extern ctl_table inotify_table[];
161int sysctl_legacy_va_layout; 161int sysctl_legacy_va_layout;
162#endif 162#endif
163 163
164extern int prove_locking;
165extern int lock_stat;
164 166
165/* The default sysctl tables: */ 167/* The default sysctl tables: */
166 168
@@ -282,6 +284,26 @@ static ctl_table kern_table[] = {
282 .mode = 0644, 284 .mode = 0644,
283 .proc_handler = &proc_dointvec, 285 .proc_handler = &proc_dointvec,
284 }, 286 },
287#ifdef CONFIG_PROVE_LOCKING
288 {
289 .ctl_name = CTL_UNNUMBERED,
290 .procname = "prove_locking",
291 .data = &prove_locking,
292 .maxlen = sizeof(int),
293 .mode = 0644,
294 .proc_handler = &proc_dointvec,
295 },
296#endif
297#ifdef CONFIG_LOCK_STAT
298 {
299 .ctl_name = CTL_UNNUMBERED,
300 .procname = "lock_stat",
301 .data = &lock_stat,
302 .maxlen = sizeof(int),
303 .mode = 0644,
304 .proc_handler = &proc_dointvec,
305 },
306#endif
285 { 307 {
286 .ctl_name = CTL_UNNUMBERED, 308 .ctl_name = CTL_UNNUMBERED,
287 .procname = "sched_features", 309 .procname = "sched_features",
@@ -307,6 +329,16 @@ static ctl_table kern_table[] = {
307 .mode = 0644, 329 .mode = 0644,
308 .proc_handler = &proc_dointvec, 330 .proc_handler = &proc_dointvec,
309 }, 331 },
332#ifdef CONFIG_AUDITSYSCALL
333 {
334 .ctl_name = CTL_UNNUMBERED,
335 .procname = "audit_argv_kb",
336 .data = &audit_argv_kb,
337 .maxlen = sizeof(int),
338 .mode = 0644,
339 .proc_handler = &proc_dointvec,
340 },
341#endif
310 { 342 {
311 .ctl_name = KERN_CORE_PATTERN, 343 .ctl_name = KERN_CORE_PATTERN,
312 .procname = "core_pattern", 344 .procname = "core_pattern",
@@ -661,7 +693,7 @@ static ctl_table kern_table[] = {
661 { 693 {
662 .ctl_name = KERN_ACPI_VIDEO_FLAGS, 694 .ctl_name = KERN_ACPI_VIDEO_FLAGS,
663 .procname = "acpi_video_flags", 695 .procname = "acpi_video_flags",
664 .data = &acpi_video_flags, 696 .data = &acpi_realmode_flags,
665 .maxlen = sizeof (unsigned long), 697 .maxlen = sizeof (unsigned long),
666 .mode = 0644, 698 .mode = 0644,
667 .proc_handler = &proc_doulongvec_minmax, 699 .proc_handler = &proc_doulongvec_minmax,
@@ -707,13 +739,26 @@ static ctl_table kern_table[] = {
707 .proc_handler = &proc_dointvec, 739 .proc_handler = &proc_dointvec,
708 }, 740 },
709#endif 741#endif
710 742 {
743 .ctl_name = CTL_UNNUMBERED,
744 .procname = "poweroff_cmd",
745 .data = &poweroff_cmd,
746 .maxlen = POWEROFF_CMD_PATH_LEN,
747 .mode = 0644,
748 .proc_handler = &proc_dostring,
749 .strategy = &sysctl_string,
750 },
751/*
752 * NOTE: do not add new entries to this table unless you have read
753 * Documentation/sysctl/ctl_unnumbered.txt
754 */
711 { .ctl_name = 0 } 755 { .ctl_name = 0 }
712}; 756};
713 757
714/* Constants for minimum and maximum testing in vm_table. 758/* Constants for minimum and maximum testing in vm_table.
715 We use these as one-element integer vectors. */ 759 We use these as one-element integer vectors. */
716static int zero; 760static int zero;
761static int two = 2;
717static int one_hundred = 100; 762static int one_hundred = 100;
718 763
719 764
@@ -826,6 +871,14 @@ static ctl_table vm_table[] = {
826 .mode = 0644, 871 .mode = 0644,
827 .proc_handler = &proc_dointvec, 872 .proc_handler = &proc_dointvec,
828 }, 873 },
874 {
875 .ctl_name = CTL_UNNUMBERED,
876 .procname = "hugepages_treat_as_movable",
877 .data = &hugepages_treat_as_movable,
878 .maxlen = sizeof(int),
879 .mode = 0644,
880 .proc_handler = &hugetlb_treat_movable_handler,
881 },
829#endif 882#endif
830 { 883 {
831 .ctl_name = VM_LOWMEM_RESERVE_RATIO, 884 .ctl_name = VM_LOWMEM_RESERVE_RATIO,
@@ -1096,7 +1149,10 @@ static ctl_table fs_table[] = {
1096 .data = &lease_break_time, 1149 .data = &lease_break_time,
1097 .maxlen = sizeof(int), 1150 .maxlen = sizeof(int),
1098 .mode = 0644, 1151 .mode = 0644,
1099 .proc_handler = &proc_dointvec, 1152 .proc_handler = &proc_dointvec_minmax,
1153 .strategy = &sysctl_intvec,
1154 .extra1 = &zero,
1155 .extra2 = &two,
1100 }, 1156 },
1101 { 1157 {
1102 .ctl_name = FS_AIO_NR, 1158 .ctl_name = FS_AIO_NR,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 728cedfd3c..8969877661 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -401,7 +401,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
401 * this is optimized for the most common adjustments of -1,0,1, 401 * this is optimized for the most common adjustments of -1,0,1,
402 * for other values we can do a bit more work. 402 * for other values we can do a bit more work.
403 */ 403 */
404static void clocksource_adjust(struct clocksource *clock, s64 offset) 404static void clocksource_adjust(s64 offset)
405{ 405{
406 s64 error, interval = clock->cycle_interval; 406 s64 error, interval = clock->cycle_interval;
407 int adj; 407 int adj;
@@ -476,7 +476,7 @@ void update_wall_time(void)
476 } 476 }
477 477
478 /* correct the clock when NTP error is too big */ 478 /* correct the clock when NTP error is too big */
479 clocksource_adjust(clock, offset); 479 clocksource_adjust(offset);
480 480
481 /* store full nanoseconds into xtime */ 481 /* store full nanoseconds into xtime */
482 xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; 482 xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 8bbcfb77f7..e5edc3a22a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -38,7 +38,7 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
38 38
39static void print_name_offset(struct seq_file *m, void *sym) 39static void print_name_offset(struct seq_file *m, void *sym)
40{ 40{
41 char symname[KSYM_NAME_LEN+1]; 41 char symname[KSYM_NAME_LEN];
42 42
43 if (lookup_symbol_name((unsigned long)sym, symname) < 0) 43 if (lookup_symbol_name((unsigned long)sym, symname) < 0)
44 SEQ_printf(m, "<%p>", sym); 44 SEQ_printf(m, "<%p>", sym);
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 9b8a826236..8ed62fda16 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -269,7 +269,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
269 269
270static void print_name_offset(struct seq_file *m, unsigned long addr) 270static void print_name_offset(struct seq_file *m, unsigned long addr)
271{ 271{
272 char symname[KSYM_NAME_LEN+1]; 272 char symname[KSYM_NAME_LEN];
273 273
274 if (lookup_symbol_name(addr, symname) < 0) 274 if (lookup_symbol_name(addr, symname) < 0)
275 seq_printf(m, "<%p>", (void *)addr); 275 seq_printf(m, "<%p>", (void *)addr);
diff --git a/kernel/timer.c b/kernel/timer.c
index 1258371e0d..d1e8b975c7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -103,14 +103,14 @@ static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
103static inline void timer_set_deferrable(struct timer_list *timer) 103static inline void timer_set_deferrable(struct timer_list *timer)
104{ 104{
105 timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | 105 timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
106 TBASE_DEFERRABLE_FLAG)); 106 TBASE_DEFERRABLE_FLAG));
107} 107}
108 108
109static inline void 109static inline void
110timer_set_base(struct timer_list *timer, tvec_base_t *new_base) 110timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
111{ 111{
112 timer->base = (tvec_base_t *)((unsigned long)(new_base) | 112 timer->base = (tvec_base_t *)((unsigned long)(new_base) |
113 tbase_get_deferrable(timer->base)); 113 tbase_get_deferrable(timer->base));
114} 114}
115 115
116/** 116/**
@@ -445,10 +445,10 @@ EXPORT_SYMBOL(__mod_timer);
445void add_timer_on(struct timer_list *timer, int cpu) 445void add_timer_on(struct timer_list *timer, int cpu)
446{ 446{
447 tvec_base_t *base = per_cpu(tvec_bases, cpu); 447 tvec_base_t *base = per_cpu(tvec_bases, cpu);
448 unsigned long flags; 448 unsigned long flags;
449 449
450 timer_stats_timer_set_start_info(timer); 450 timer_stats_timer_set_start_info(timer);
451 BUG_ON(timer_pending(timer) || !timer->function); 451 BUG_ON(timer_pending(timer) || !timer->function);
452 spin_lock_irqsave(&base->lock, flags); 452 spin_lock_irqsave(&base->lock, flags);
453 timer_set_base(timer, base); 453 timer_set_base(timer, base);
454 internal_add_timer(base, timer); 454 internal_add_timer(base, timer);
@@ -627,7 +627,7 @@ static inline void __run_timers(tvec_base_t *base)
627 while (time_after_eq(jiffies, base->timer_jiffies)) { 627 while (time_after_eq(jiffies, base->timer_jiffies)) {
628 struct list_head work_list; 628 struct list_head work_list;
629 struct list_head *head = &work_list; 629 struct list_head *head = &work_list;
630 int index = base->timer_jiffies & TVR_MASK; 630 int index = base->timer_jiffies & TVR_MASK;
631 631
632 /* 632 /*
633 * Cascade timers: 633 * Cascade timers:
@@ -644,8 +644,8 @@ static inline void __run_timers(tvec_base_t *base)
644 unsigned long data; 644 unsigned long data;
645 645
646 timer = list_first_entry(head, struct timer_list,entry); 646 timer = list_first_entry(head, struct timer_list,entry);
647 fn = timer->function; 647 fn = timer->function;
648 data = timer->data; 648 data = timer->data;
649 649
650 timer_stats_account_timer(timer); 650 timer_stats_account_timer(timer);
651 651
@@ -689,8 +689,8 @@ static unsigned long __next_timer_interrupt(tvec_base_t *base)
689 index = slot = timer_jiffies & TVR_MASK; 689 index = slot = timer_jiffies & TVR_MASK;
690 do { 690 do {
691 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 691 list_for_each_entry(nte, base->tv1.vec + slot, entry) {
692 if (tbase_get_deferrable(nte->base)) 692 if (tbase_get_deferrable(nte->base))
693 continue; 693 continue;
694 694
695 found = 1; 695 found = 1;
696 expires = nte->expires; 696 expires = nte->expires;
@@ -834,7 +834,7 @@ void update_process_times(int user_tick)
834 if (rcu_pending(cpu)) 834 if (rcu_pending(cpu))
835 rcu_check_callbacks(cpu, user_tick); 835 rcu_check_callbacks(cpu, user_tick);
836 scheduler_tick(); 836 scheduler_tick();
837 run_posix_cpu_timers(p); 837 run_posix_cpu_timers(p);
838} 838}
839 839
840/* 840/*
@@ -909,7 +909,7 @@ static inline void update_times(unsigned long ticks)
909 update_wall_time(); 909 update_wall_time();
910 calc_load(ticks); 910 calc_load(ticks);
911} 911}
912 912
913/* 913/*
914 * The 64-bit jiffies value is not atomic - you MUST NOT read it 914 * The 64-bit jiffies value is not atomic - you MUST NOT read it
915 * without sampling the sequence number in xtime_lock. 915 * without sampling the sequence number in xtime_lock.
@@ -1105,7 +1105,7 @@ asmlinkage long sys_gettid(void)
1105/** 1105/**
1106 * do_sysinfo - fill in sysinfo struct 1106 * do_sysinfo - fill in sysinfo struct
1107 * @info: pointer to buffer to fill 1107 * @info: pointer to buffer to fill
1108 */ 1108 */
1109int do_sysinfo(struct sysinfo *info) 1109int do_sysinfo(struct sysinfo *info)
1110{ 1110{
1111 unsigned long mem_total, sav_total; 1111 unsigned long mem_total, sav_total;
@@ -1221,7 +1221,8 @@ static int __devinit init_timers_cpu(int cpu)
1221 /* 1221 /*
1222 * The APs use this path later in boot 1222 * The APs use this path later in boot
1223 */ 1223 */
1224 base = kmalloc_node(sizeof(*base), GFP_KERNEL, 1224 base = kmalloc_node(sizeof(*base),
1225 GFP_KERNEL | __GFP_ZERO,
1225 cpu_to_node(cpu)); 1226 cpu_to_node(cpu));
1226 if (!base) 1227 if (!base)
1227 return -ENOMEM; 1228 return -ENOMEM;
@@ -1232,7 +1233,6 @@ static int __devinit init_timers_cpu(int cpu)
1232 kfree(base); 1233 kfree(base);
1233 return -ENOMEM; 1234 return -ENOMEM;
1234 } 1235 }
1235 memset(base, 0, sizeof(*base));
1236 per_cpu(tvec_bases, cpu) = base; 1236 per_cpu(tvec_bases, cpu) = base;
1237 } else { 1237 } else {
1238 /* 1238 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d7d3fa3072..58e5c152a6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -282,8 +282,8 @@ static int worker_thread(void *__cwq)
282 struct cpu_workqueue_struct *cwq = __cwq; 282 struct cpu_workqueue_struct *cwq = __cwq;
283 DEFINE_WAIT(wait); 283 DEFINE_WAIT(wait);
284 284
285 if (!cwq->wq->freezeable) 285 if (cwq->wq->freezeable)
286 current->flags |= PF_NOFREEZE; 286 set_freezable();
287 287
288 set_user_nice(current, -5); 288 set_user_nice(current, -5);
289 289
@@ -752,18 +752,17 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
752 if (cwq->thread == NULL) 752 if (cwq->thread == NULL)
753 return; 753 return;
754 754
755 flush_cpu_workqueue(cwq);
755 /* 756 /*
756 * If the caller is CPU_DEAD the single flush_cpu_workqueue() 757 * If the caller is CPU_DEAD and cwq->worklist was not empty,
757 * is not enough, a concurrent flush_workqueue() can insert a 758 * a concurrent flush_workqueue() can insert a barrier after us.
758 * barrier after us. 759 * However, in that case run_workqueue() won't return and check
760 * kthread_should_stop() until it flushes all work_struct's.
759 * When ->worklist becomes empty it is safe to exit because no 761 * When ->worklist becomes empty it is safe to exit because no
760 * more work_structs can be queued on this cwq: flush_workqueue 762 * more work_structs can be queued on this cwq: flush_workqueue
761 * checks list_empty(), and a "normal" queue_work() can't use 763 * checks list_empty(), and a "normal" queue_work() can't use
762 * a dead CPU. 764 * a dead CPU.
763 */ 765 */
764 while (flush_cpu_workqueue(cwq))
765 ;
766
767 kthread_stop(cwq->thread); 766 kthread_stop(cwq->thread);
768 cwq->thread = NULL; 767 cwq->thread = NULL;
769} 768}