diff options
Diffstat (limited to 'kernel')
41 files changed, 2256 insertions, 1090 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 5ce8851fac..eb0f9165b4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -392,6 +392,7 @@ static int kauditd_thread(void *dummy) | |||
392 | { | 392 | { |
393 | struct sk_buff *skb; | 393 | struct sk_buff *skb; |
394 | 394 | ||
395 | set_freezable(); | ||
395 | while (!kthread_should_stop()) { | 396 | while (!kthread_should_stop()) { |
396 | skb = skb_dequeue(&audit_skb_queue); | 397 | skb = skb_dequeue(&audit_skb_queue); |
397 | wake_up(&audit_backlog_wait); | 398 | wake_up(&audit_backlog_wait); |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index ce61f42354..1bf093dcff 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -1210,8 +1210,8 @@ static inline int audit_add_rule(struct audit_entry *entry, | |||
1210 | struct audit_entry *e; | 1210 | struct audit_entry *e; |
1211 | struct audit_field *inode_f = entry->rule.inode_f; | 1211 | struct audit_field *inode_f = entry->rule.inode_f; |
1212 | struct audit_watch *watch = entry->rule.watch; | 1212 | struct audit_watch *watch = entry->rule.watch; |
1213 | struct nameidata *ndp, *ndw; | 1213 | struct nameidata *ndp = NULL, *ndw = NULL; |
1214 | int h, err, putnd_needed = 0; | 1214 | int h, err; |
1215 | #ifdef CONFIG_AUDITSYSCALL | 1215 | #ifdef CONFIG_AUDITSYSCALL |
1216 | int dont_count = 0; | 1216 | int dont_count = 0; |
1217 | 1217 | ||
@@ -1239,7 +1239,6 @@ static inline int audit_add_rule(struct audit_entry *entry, | |||
1239 | err = audit_get_nd(watch->path, &ndp, &ndw); | 1239 | err = audit_get_nd(watch->path, &ndp, &ndw); |
1240 | if (err) | 1240 | if (err) |
1241 | goto error; | 1241 | goto error; |
1242 | putnd_needed = 1; | ||
1243 | } | 1242 | } |
1244 | 1243 | ||
1245 | mutex_lock(&audit_filter_mutex); | 1244 | mutex_lock(&audit_filter_mutex); |
@@ -1269,14 +1268,11 @@ static inline int audit_add_rule(struct audit_entry *entry, | |||
1269 | #endif | 1268 | #endif |
1270 | mutex_unlock(&audit_filter_mutex); | 1269 | mutex_unlock(&audit_filter_mutex); |
1271 | 1270 | ||
1272 | if (putnd_needed) | 1271 | audit_put_nd(ndp, ndw); /* NULL args OK */ |
1273 | audit_put_nd(ndp, ndw); | ||
1274 | |||
1275 | return 0; | 1272 | return 0; |
1276 | 1273 | ||
1277 | error: | 1274 | error: |
1278 | if (putnd_needed) | 1275 | audit_put_nd(ndp, ndw); /* NULL args OK */ |
1279 | audit_put_nd(ndp, ndw); | ||
1280 | if (watch) | 1276 | if (watch) |
1281 | audit_put_watch(watch); /* tmp watch, matches initial get */ | 1277 | audit_put_watch(watch); /* tmp watch, matches initial get */ |
1282 | return err; | 1278 | return err; |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b7640a5f38..145cbb79c4 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -153,7 +153,7 @@ struct audit_aux_data_execve { | |||
153 | struct audit_aux_data d; | 153 | struct audit_aux_data d; |
154 | int argc; | 154 | int argc; |
155 | int envc; | 155 | int envc; |
156 | char mem[0]; | 156 | struct mm_struct *mm; |
157 | }; | 157 | }; |
158 | 158 | ||
159 | struct audit_aux_data_socketcall { | 159 | struct audit_aux_data_socketcall { |
@@ -831,6 +831,55 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, | |||
831 | return rc; | 831 | return rc; |
832 | } | 832 | } |
833 | 833 | ||
834 | static void audit_log_execve_info(struct audit_buffer *ab, | ||
835 | struct audit_aux_data_execve *axi) | ||
836 | { | ||
837 | int i; | ||
838 | long len, ret; | ||
839 | const char __user *p = (const char __user *)axi->mm->arg_start; | ||
840 | char *buf; | ||
841 | |||
842 | if (axi->mm != current->mm) | ||
843 | return; /* execve failed, no additional info */ | ||
844 | |||
845 | for (i = 0; i < axi->argc; i++, p += len) { | ||
846 | len = strnlen_user(p, MAX_ARG_STRLEN); | ||
847 | /* | ||
848 | * We just created this mm, if we can't find the strings | ||
849 | * we just copied into it something is _very_ wrong. Similar | ||
850 | * for strings that are too long, we should not have created | ||
851 | * any. | ||
852 | */ | ||
853 | if (!len || len > MAX_ARG_STRLEN) { | ||
854 | WARN_ON(1); | ||
855 | send_sig(SIGKILL, current, 0); | ||
856 | } | ||
857 | |||
858 | buf = kmalloc(len, GFP_KERNEL); | ||
859 | if (!buf) { | ||
860 | audit_panic("out of memory for argv string\n"); | ||
861 | break; | ||
862 | } | ||
863 | |||
864 | ret = copy_from_user(buf, p, len); | ||
865 | /* | ||
866 | * There is no reason for this copy to be short. We just | ||
867 | * copied them here, and the mm hasn't been exposed to user- | ||
868 | * space yet. | ||
869 | */ | ||
870 | if (!ret) { | ||
871 | WARN_ON(1); | ||
872 | send_sig(SIGKILL, current, 0); | ||
873 | } | ||
874 | |||
875 | audit_log_format(ab, "a%d=", i); | ||
876 | audit_log_untrustedstring(ab, buf); | ||
877 | audit_log_format(ab, "\n"); | ||
878 | |||
879 | kfree(buf); | ||
880 | } | ||
881 | } | ||
882 | |||
834 | static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) | 883 | static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) |
835 | { | 884 | { |
836 | int i, call_panic = 0; | 885 | int i, call_panic = 0; |
@@ -971,13 +1020,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
971 | 1020 | ||
972 | case AUDIT_EXECVE: { | 1021 | case AUDIT_EXECVE: { |
973 | struct audit_aux_data_execve *axi = (void *)aux; | 1022 | struct audit_aux_data_execve *axi = (void *)aux; |
974 | int i; | 1023 | audit_log_execve_info(ab, axi); |
975 | const char *p; | ||
976 | for (i = 0, p = axi->mem; i < axi->argc; i++) { | ||
977 | audit_log_format(ab, "a%d=", i); | ||
978 | p = audit_log_untrustedstring(ab, p); | ||
979 | audit_log_format(ab, "\n"); | ||
980 | } | ||
981 | break; } | 1024 | break; } |
982 | 1025 | ||
983 | case AUDIT_SOCKETCALL: { | 1026 | case AUDIT_SOCKETCALL: { |
@@ -1821,32 +1864,31 @@ int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode | |||
1821 | return 0; | 1864 | return 0; |
1822 | } | 1865 | } |
1823 | 1866 | ||
1867 | int audit_argv_kb = 32; | ||
1868 | |||
1824 | int audit_bprm(struct linux_binprm *bprm) | 1869 | int audit_bprm(struct linux_binprm *bprm) |
1825 | { | 1870 | { |
1826 | struct audit_aux_data_execve *ax; | 1871 | struct audit_aux_data_execve *ax; |
1827 | struct audit_context *context = current->audit_context; | 1872 | struct audit_context *context = current->audit_context; |
1828 | unsigned long p, next; | ||
1829 | void *to; | ||
1830 | 1873 | ||
1831 | if (likely(!audit_enabled || !context || context->dummy)) | 1874 | if (likely(!audit_enabled || !context || context->dummy)) |
1832 | return 0; | 1875 | return 0; |
1833 | 1876 | ||
1834 | ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, | 1877 | /* |
1835 | GFP_KERNEL); | 1878 | * Even though the stack code doesn't limit the arg+env size any more, |
1879 | * the audit code requires that _all_ arguments be logged in a single | ||
1880 | * netlink skb. Hence cap it :-( | ||
1881 | */ | ||
1882 | if (bprm->argv_len > (audit_argv_kb << 10)) | ||
1883 | return -E2BIG; | ||
1884 | |||
1885 | ax = kmalloc(sizeof(*ax), GFP_KERNEL); | ||
1836 | if (!ax) | 1886 | if (!ax) |
1837 | return -ENOMEM; | 1887 | return -ENOMEM; |
1838 | 1888 | ||
1839 | ax->argc = bprm->argc; | 1889 | ax->argc = bprm->argc; |
1840 | ax->envc = bprm->envc; | 1890 | ax->envc = bprm->envc; |
1841 | for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) { | 1891 | ax->mm = bprm->mm; |
1842 | struct page *page = bprm->page[p / PAGE_SIZE]; | ||
1843 | void *kaddr = kmap(page); | ||
1844 | next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1); | ||
1845 | memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p); | ||
1846 | to += next - p; | ||
1847 | kunmap(page); | ||
1848 | } | ||
1849 | |||
1850 | ax->d.type = AUDIT_EXECVE; | 1892 | ax->d.type = AUDIT_EXECVE; |
1851 | ax->d.next = context->aux; | 1893 | ax->d.next = context->aux; |
1852 | context->aux = (void *)ax; | 1894 | context->aux = (void *)ax; |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 208cf3497c..181ae70860 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu) | |||
103 | write_unlock_irq(&tasklist_lock); | 103 | write_unlock_irq(&tasklist_lock); |
104 | } | 104 | } |
105 | 105 | ||
106 | struct take_cpu_down_param { | ||
107 | unsigned long mod; | ||
108 | void *hcpu; | ||
109 | }; | ||
110 | |||
106 | /* Take this CPU down. */ | 111 | /* Take this CPU down. */ |
107 | static int take_cpu_down(void *unused) | 112 | static int take_cpu_down(void *_param) |
108 | { | 113 | { |
114 | struct take_cpu_down_param *param = _param; | ||
109 | int err; | 115 | int err; |
110 | 116 | ||
117 | raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, | ||
118 | param->hcpu); | ||
111 | /* Ensure this CPU doesn't handle any more interrupts. */ | 119 | /* Ensure this CPU doesn't handle any more interrupts. */ |
112 | err = __cpu_disable(); | 120 | err = __cpu_disable(); |
113 | if (err < 0) | 121 | if (err < 0) |
@@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
127 | cpumask_t old_allowed, tmp; | 135 | cpumask_t old_allowed, tmp; |
128 | void *hcpu = (void *)(long)cpu; | 136 | void *hcpu = (void *)(long)cpu; |
129 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | 137 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; |
138 | struct take_cpu_down_param tcd_param = { | ||
139 | .mod = mod, | ||
140 | .hcpu = hcpu, | ||
141 | }; | ||
130 | 142 | ||
131 | if (num_online_cpus() == 1) | 143 | if (num_online_cpus() == 1) |
132 | return -EBUSY; | 144 | return -EBUSY; |
@@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
153 | set_cpus_allowed(current, tmp); | 165 | set_cpus_allowed(current, tmp); |
154 | 166 | ||
155 | mutex_lock(&cpu_bitmask_lock); | 167 | mutex_lock(&cpu_bitmask_lock); |
156 | p = __stop_machine_run(take_cpu_down, NULL, cpu); | 168 | p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); |
157 | mutex_unlock(&cpu_bitmask_lock); | 169 | mutex_unlock(&cpu_bitmask_lock); |
158 | 170 | ||
159 | if (IS_ERR(p) || cpu_online(cpu)) { | 171 | if (IS_ERR(p) || cpu_online(cpu)) { |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 824b1c01f4..57e6448b17 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -516,7 +516,7 @@ static void cpuset_release_agent(const char *pathbuf) | |||
516 | envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | 516 | envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; |
517 | envp[i] = NULL; | 517 | envp[i] = NULL; |
518 | 518 | ||
519 | call_usermodehelper(argv[0], argv, envp, 0); | 519 | call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); |
520 | kfree(pathbuf); | 520 | kfree(pathbuf); |
521 | } | 521 | } |
522 | 522 | ||
@@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void) | |||
2138 | static int cpuset_handle_cpuhp(struct notifier_block *nb, | 2138 | static int cpuset_handle_cpuhp(struct notifier_block *nb, |
2139 | unsigned long phase, void *cpu) | 2139 | unsigned long phase, void *cpu) |
2140 | { | 2140 | { |
2141 | if (phase == CPU_DYING || phase == CPU_DYING_FROZEN) | ||
2142 | return NOTIFY_DONE; | ||
2143 | |||
2141 | common_cpu_mem_hotplug_unplug(); | 2144 | common_cpu_mem_hotplug_unplug(); |
2142 | return 0; | 2145 | return 0; |
2143 | } | 2146 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 57626692cd..464c2b172f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/mempolicy.h> | 31 | #include <linux/mempolicy.h> |
32 | #include <linux/taskstats_kern.h> | 32 | #include <linux/taskstats_kern.h> |
33 | #include <linux/delayacct.h> | 33 | #include <linux/delayacct.h> |
34 | #include <linux/freezer.h> | ||
34 | #include <linux/cpuset.h> | 35 | #include <linux/cpuset.h> |
35 | #include <linux/syscalls.h> | 36 | #include <linux/syscalls.h> |
36 | #include <linux/signal.h> | 37 | #include <linux/signal.h> |
@@ -44,6 +45,7 @@ | |||
44 | #include <linux/resource.h> | 45 | #include <linux/resource.h> |
45 | #include <linux/blkdev.h> | 46 | #include <linux/blkdev.h> |
46 | #include <linux/task_io_accounting_ops.h> | 47 | #include <linux/task_io_accounting_ops.h> |
48 | #include <linux/freezer.h> | ||
47 | 49 | ||
48 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
49 | #include <asm/unistd.h> | 51 | #include <asm/unistd.h> |
@@ -387,6 +389,11 @@ void daemonize(const char *name, ...) | |||
387 | * they would be locked into memory. | 389 | * they would be locked into memory. |
388 | */ | 390 | */ |
389 | exit_mm(current); | 391 | exit_mm(current); |
392 | /* | ||
393 | * We don't want to have TIF_FREEZE set if the system-wide hibernation | ||
394 | * or suspend transition begins right now. | ||
395 | */ | ||
396 | current->flags |= PF_NOFREEZE; | ||
390 | 397 | ||
391 | set_special_pids(1, 1); | 398 | set_special_pids(1, 1); |
392 | proc_clear_tty(current); | 399 | proc_clear_tty(current); |
@@ -588,6 +595,8 @@ static void exit_mm(struct task_struct * tsk) | |||
588 | tsk->mm = NULL; | 595 | tsk->mm = NULL; |
589 | up_read(&mm->mmap_sem); | 596 | up_read(&mm->mmap_sem); |
590 | enter_lazy_tlb(mm, current); | 597 | enter_lazy_tlb(mm, current); |
598 | /* We don't want this task to be frozen prematurely */ | ||
599 | clear_freeze_flag(tsk); | ||
591 | task_unlock(tsk); | 600 | task_unlock(tsk); |
592 | mmput(mm); | 601 | mmput(mm); |
593 | } | 602 | } |
diff --git a/kernel/fork.c b/kernel/fork.c index 7c5c5888e0..4698389982 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -334,6 +334,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm) | |||
334 | atomic_set(&mm->mm_count, 1); | 334 | atomic_set(&mm->mm_count, 1); |
335 | init_rwsem(&mm->mmap_sem); | 335 | init_rwsem(&mm->mmap_sem); |
336 | INIT_LIST_HEAD(&mm->mmlist); | 336 | INIT_LIST_HEAD(&mm->mmlist); |
337 | mm->flags = (current->mm) ? current->mm->flags | ||
338 | : MMF_DUMP_FILTER_DEFAULT; | ||
337 | mm->core_waiters = 0; | 339 | mm->core_waiters = 0; |
338 | mm->nr_ptes = 0; | 340 | mm->nr_ptes = 0; |
339 | set_mm_counter(mm, file_rss, 0); | 341 | set_mm_counter(mm, file_rss, 0); |
@@ -923,7 +925,7 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) | |||
923 | { | 925 | { |
924 | unsigned long new_flags = p->flags; | 926 | unsigned long new_flags = p->flags; |
925 | 927 | ||
926 | new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); | 928 | new_flags &= ~PF_SUPERPRIV; |
927 | new_flags |= PF_FORKNOEXEC; | 929 | new_flags |= PF_FORKNOEXEC; |
928 | if (!(clone_flags & CLONE_PTRACE)) | 930 | if (!(clone_flags & CLONE_PTRACE)) |
929 | p->ptrace = 0; | 931 | p->ptrace = 0; |
diff --git a/kernel/futex.c b/kernel/futex.c index 5c3f45d07c..a12425051e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -346,15 +346,20 @@ static int futex_handle_fault(unsigned long address, | |||
346 | vma = find_vma(mm, address); | 346 | vma = find_vma(mm, address); |
347 | if (vma && address >= vma->vm_start && | 347 | if (vma && address >= vma->vm_start && |
348 | (vma->vm_flags & VM_WRITE)) { | 348 | (vma->vm_flags & VM_WRITE)) { |
349 | switch (handle_mm_fault(mm, vma, address, 1)) { | 349 | int fault; |
350 | case VM_FAULT_MINOR: | 350 | fault = handle_mm_fault(mm, vma, address, 1); |
351 | ret = 0; | 351 | if (unlikely((fault & VM_FAULT_ERROR))) { |
352 | current->min_flt++; | 352 | #if 0 |
353 | break; | 353 | /* XXX: let's do this when we verify it is OK */ |
354 | case VM_FAULT_MAJOR: | 354 | if (ret & VM_FAULT_OOM) |
355 | ret = -ENOMEM; | ||
356 | #endif | ||
357 | } else { | ||
355 | ret = 0; | 358 | ret = 0; |
356 | current->maj_flt++; | 359 | if (fault & VM_FAULT_MAJOR) |
357 | break; | 360 | current->maj_flt++; |
361 | else | ||
362 | current->min_flt++; | ||
358 | } | 363 | } |
359 | } | 364 | } |
360 | if (!fshared) | 365 | if (!fshared) |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 23c03f43e1..72d034258b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1406,7 +1406,7 @@ static void migrate_hrtimers(int cpu) | |||
1406 | static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | 1406 | static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, |
1407 | unsigned long action, void *hcpu) | 1407 | unsigned long action, void *hcpu) |
1408 | { | 1408 | { |
1409 | long cpu = (long)hcpu; | 1409 | unsigned int cpu = (long)hcpu; |
1410 | 1410 | ||
1411 | switch (action) { | 1411 | switch (action) { |
1412 | 1412 | ||
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 0d662475dd..474219a419 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -152,7 +152,7 @@ static unsigned int get_symbol_offset(unsigned long pos) | |||
152 | /* Lookup the address for this symbol. Returns 0 if not found. */ | 152 | /* Lookup the address for this symbol. Returns 0 if not found. */ |
153 | unsigned long kallsyms_lookup_name(const char *name) | 153 | unsigned long kallsyms_lookup_name(const char *name) |
154 | { | 154 | { |
155 | char namebuf[KSYM_NAME_LEN+1]; | 155 | char namebuf[KSYM_NAME_LEN]; |
156 | unsigned long i; | 156 | unsigned long i; |
157 | unsigned int off; | 157 | unsigned int off; |
158 | 158 | ||
@@ -248,7 +248,7 @@ const char *kallsyms_lookup(unsigned long addr, | |||
248 | { | 248 | { |
249 | const char *msym; | 249 | const char *msym; |
250 | 250 | ||
251 | namebuf[KSYM_NAME_LEN] = 0; | 251 | namebuf[KSYM_NAME_LEN - 1] = 0; |
252 | namebuf[0] = 0; | 252 | namebuf[0] = 0; |
253 | 253 | ||
254 | if (is_ksym_addr(addr)) { | 254 | if (is_ksym_addr(addr)) { |
@@ -265,7 +265,7 @@ const char *kallsyms_lookup(unsigned long addr, | |||
265 | /* see if it's in a module */ | 265 | /* see if it's in a module */ |
266 | msym = module_address_lookup(addr, symbolsize, offset, modname); | 266 | msym = module_address_lookup(addr, symbolsize, offset, modname); |
267 | if (msym) | 267 | if (msym) |
268 | return strncpy(namebuf, msym, KSYM_NAME_LEN); | 268 | return strncpy(namebuf, msym, KSYM_NAME_LEN - 1); |
269 | 269 | ||
270 | return NULL; | 270 | return NULL; |
271 | } | 271 | } |
@@ -273,7 +273,7 @@ const char *kallsyms_lookup(unsigned long addr, | |||
273 | int lookup_symbol_name(unsigned long addr, char *symname) | 273 | int lookup_symbol_name(unsigned long addr, char *symname) |
274 | { | 274 | { |
275 | symname[0] = '\0'; | 275 | symname[0] = '\0'; |
276 | symname[KSYM_NAME_LEN] = '\0'; | 276 | symname[KSYM_NAME_LEN - 1] = '\0'; |
277 | 277 | ||
278 | if (is_ksym_addr(addr)) { | 278 | if (is_ksym_addr(addr)) { |
279 | unsigned long pos; | 279 | unsigned long pos; |
@@ -291,7 +291,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, | |||
291 | unsigned long *offset, char *modname, char *name) | 291 | unsigned long *offset, char *modname, char *name) |
292 | { | 292 | { |
293 | name[0] = '\0'; | 293 | name[0] = '\0'; |
294 | name[KSYM_NAME_LEN] = '\0'; | 294 | name[KSYM_NAME_LEN - 1] = '\0'; |
295 | 295 | ||
296 | if (is_ksym_addr(addr)) { | 296 | if (is_ksym_addr(addr)) { |
297 | unsigned long pos; | 297 | unsigned long pos; |
@@ -312,7 +312,7 @@ int sprint_symbol(char *buffer, unsigned long address) | |||
312 | char *modname; | 312 | char *modname; |
313 | const char *name; | 313 | const char *name; |
314 | unsigned long offset, size; | 314 | unsigned long offset, size; |
315 | char namebuf[KSYM_NAME_LEN+1]; | 315 | char namebuf[KSYM_NAME_LEN]; |
316 | 316 | ||
317 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); | 317 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); |
318 | if (!name) | 318 | if (!name) |
@@ -342,8 +342,8 @@ struct kallsym_iter | |||
342 | unsigned long value; | 342 | unsigned long value; |
343 | unsigned int nameoff; /* If iterating in core kernel symbols */ | 343 | unsigned int nameoff; /* If iterating in core kernel symbols */ |
344 | char type; | 344 | char type; |
345 | char name[KSYM_NAME_LEN+1]; | 345 | char name[KSYM_NAME_LEN]; |
346 | char module_name[MODULE_NAME_LEN + 1]; | 346 | char module_name[MODULE_NAME_LEN]; |
347 | int exported; | 347 | int exported; |
348 | }; | 348 | }; |
349 | 349 | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 4d32eb0771..beedbdc646 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -33,6 +33,8 @@ | |||
33 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/resource.h> | 35 | #include <linux/resource.h> |
36 | #include <linux/notifier.h> | ||
37 | #include <linux/suspend.h> | ||
36 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
37 | 39 | ||
38 | extern int max_threads; | 40 | extern int max_threads; |
@@ -119,9 +121,10 @@ struct subprocess_info { | |||
119 | char **argv; | 121 | char **argv; |
120 | char **envp; | 122 | char **envp; |
121 | struct key *ring; | 123 | struct key *ring; |
122 | int wait; | 124 | enum umh_wait wait; |
123 | int retval; | 125 | int retval; |
124 | struct file *stdin; | 126 | struct file *stdin; |
127 | void (*cleanup)(char **argv, char **envp); | ||
125 | }; | 128 | }; |
126 | 129 | ||
127 | /* | 130 | /* |
@@ -180,6 +183,14 @@ static int ____call_usermodehelper(void *data) | |||
180 | do_exit(0); | 183 | do_exit(0); |
181 | } | 184 | } |
182 | 185 | ||
186 | void call_usermodehelper_freeinfo(struct subprocess_info *info) | ||
187 | { | ||
188 | if (info->cleanup) | ||
189 | (*info->cleanup)(info->argv, info->envp); | ||
190 | kfree(info); | ||
191 | } | ||
192 | EXPORT_SYMBOL(call_usermodehelper_freeinfo); | ||
193 | |||
183 | /* Keventd can't block, but this (a child) can. */ | 194 | /* Keventd can't block, but this (a child) can. */ |
184 | static int wait_for_helper(void *data) | 195 | static int wait_for_helper(void *data) |
185 | { | 196 | { |
@@ -216,8 +227,8 @@ static int wait_for_helper(void *data) | |||
216 | sub_info->retval = ret; | 227 | sub_info->retval = ret; |
217 | } | 228 | } |
218 | 229 | ||
219 | if (sub_info->wait < 0) | 230 | if (sub_info->wait == UMH_NO_WAIT) |
220 | kfree(sub_info); | 231 | call_usermodehelper_freeinfo(sub_info); |
221 | else | 232 | else |
222 | complete(sub_info->complete); | 233 | complete(sub_info->complete); |
223 | return 0; | 234 | return 0; |
@@ -229,34 +240,204 @@ static void __call_usermodehelper(struct work_struct *work) | |||
229 | struct subprocess_info *sub_info = | 240 | struct subprocess_info *sub_info = |
230 | container_of(work, struct subprocess_info, work); | 241 | container_of(work, struct subprocess_info, work); |
231 | pid_t pid; | 242 | pid_t pid; |
232 | int wait = sub_info->wait; | 243 | enum umh_wait wait = sub_info->wait; |
233 | 244 | ||
234 | /* CLONE_VFORK: wait until the usermode helper has execve'd | 245 | /* CLONE_VFORK: wait until the usermode helper has execve'd |
235 | * successfully We need the data structures to stay around | 246 | * successfully We need the data structures to stay around |
236 | * until that is done. */ | 247 | * until that is done. */ |
237 | if (wait) | 248 | if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT) |
238 | pid = kernel_thread(wait_for_helper, sub_info, | 249 | pid = kernel_thread(wait_for_helper, sub_info, |
239 | CLONE_FS | CLONE_FILES | SIGCHLD); | 250 | CLONE_FS | CLONE_FILES | SIGCHLD); |
240 | else | 251 | else |
241 | pid = kernel_thread(____call_usermodehelper, sub_info, | 252 | pid = kernel_thread(____call_usermodehelper, sub_info, |
242 | CLONE_VFORK | SIGCHLD); | 253 | CLONE_VFORK | SIGCHLD); |
243 | 254 | ||
244 | if (wait < 0) | 255 | switch (wait) { |
245 | return; | 256 | case UMH_NO_WAIT: |
257 | break; | ||
246 | 258 | ||
247 | if (pid < 0) { | 259 | case UMH_WAIT_PROC: |
260 | if (pid > 0) | ||
261 | break; | ||
248 | sub_info->retval = pid; | 262 | sub_info->retval = pid; |
263 | /* FALLTHROUGH */ | ||
264 | |||
265 | case UMH_WAIT_EXEC: | ||
249 | complete(sub_info->complete); | 266 | complete(sub_info->complete); |
250 | } else if (!wait) | 267 | } |
251 | complete(sub_info->complete); | 268 | } |
269 | |||
270 | #ifdef CONFIG_PM | ||
271 | /* | ||
272 | * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY | ||
273 | * (used for preventing user land processes from being created after the user | ||
274 | * land has been frozen during a system-wide hibernation or suspend operation). | ||
275 | */ | ||
276 | static int usermodehelper_disabled; | ||
277 | |||
278 | /* Number of helpers running */ | ||
279 | static atomic_t running_helpers = ATOMIC_INIT(0); | ||
280 | |||
281 | /* | ||
282 | * Wait queue head used by usermodehelper_pm_callback() to wait for all running | ||
283 | * helpers to finish. | ||
284 | */ | ||
285 | static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); | ||
286 | |||
287 | /* | ||
288 | * Time to wait for running_helpers to become zero before the setting of | ||
289 | * usermodehelper_disabled in usermodehelper_pm_callback() fails | ||
290 | */ | ||
291 | #define RUNNING_HELPERS_TIMEOUT (5 * HZ) | ||
292 | |||
293 | static int usermodehelper_pm_callback(struct notifier_block *nfb, | ||
294 | unsigned long action, | ||
295 | void *ignored) | ||
296 | { | ||
297 | long retval; | ||
298 | |||
299 | switch (action) { | ||
300 | case PM_HIBERNATION_PREPARE: | ||
301 | case PM_SUSPEND_PREPARE: | ||
302 | usermodehelper_disabled = 1; | ||
303 | smp_mb(); | ||
304 | /* | ||
305 | * From now on call_usermodehelper_exec() won't start any new | ||
306 | * helpers, so it is sufficient if running_helpers turns out to | ||
307 | * be zero at one point (it may be increased later, but that | ||
308 | * doesn't matter). | ||
309 | */ | ||
310 | retval = wait_event_timeout(running_helpers_waitq, | ||
311 | atomic_read(&running_helpers) == 0, | ||
312 | RUNNING_HELPERS_TIMEOUT); | ||
313 | if (retval) { | ||
314 | return NOTIFY_OK; | ||
315 | } else { | ||
316 | usermodehelper_disabled = 0; | ||
317 | return NOTIFY_BAD; | ||
318 | } | ||
319 | case PM_POST_HIBERNATION: | ||
320 | case PM_POST_SUSPEND: | ||
321 | usermodehelper_disabled = 0; | ||
322 | return NOTIFY_OK; | ||
323 | } | ||
324 | |||
325 | return NOTIFY_DONE; | ||
326 | } | ||
327 | |||
328 | static void helper_lock(void) | ||
329 | { | ||
330 | atomic_inc(&running_helpers); | ||
331 | smp_mb__after_atomic_inc(); | ||
332 | } | ||
333 | |||
334 | static void helper_unlock(void) | ||
335 | { | ||
336 | if (atomic_dec_and_test(&running_helpers)) | ||
337 | wake_up(&running_helpers_waitq); | ||
338 | } | ||
339 | |||
340 | static void register_pm_notifier_callback(void) | ||
341 | { | ||
342 | pm_notifier(usermodehelper_pm_callback, 0); | ||
252 | } | 343 | } |
344 | #else /* CONFIG_PM */ | ||
345 | #define usermodehelper_disabled 0 | ||
346 | |||
347 | static inline void helper_lock(void) {} | ||
348 | static inline void helper_unlock(void) {} | ||
349 | static inline void register_pm_notifier_callback(void) {} | ||
350 | #endif /* CONFIG_PM */ | ||
253 | 351 | ||
254 | /** | 352 | /** |
255 | * call_usermodehelper_keys - start a usermode application | 353 | * call_usermodehelper_setup - prepare to call a usermode helper |
256 | * @path: pathname for the application | 354 | * @path - path to usermode executable |
257 | * @argv: null-terminated argument list | 355 | * @argv - arg vector for process |
258 | * @envp: null-terminated environment list | 356 | * @envp - environment for process |
259 | * @session_keyring: session keyring for process (NULL for an empty keyring) | 357 | * |
358 | * Returns either NULL on allocation failure, or a subprocess_info | ||
359 | * structure. This should be passed to call_usermodehelper_exec to | ||
360 | * exec the process and free the structure. | ||
361 | */ | ||
362 | struct subprocess_info *call_usermodehelper_setup(char *path, | ||
363 | char **argv, char **envp) | ||
364 | { | ||
365 | struct subprocess_info *sub_info; | ||
366 | sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); | ||
367 | if (!sub_info) | ||
368 | goto out; | ||
369 | |||
370 | INIT_WORK(&sub_info->work, __call_usermodehelper); | ||
371 | sub_info->path = path; | ||
372 | sub_info->argv = argv; | ||
373 | sub_info->envp = envp; | ||
374 | |||
375 | out: | ||
376 | return sub_info; | ||
377 | } | ||
378 | EXPORT_SYMBOL(call_usermodehelper_setup); | ||
379 | |||
380 | /** | ||
381 | * call_usermodehelper_setkeys - set the session keys for usermode helper | ||
382 | * @info: a subprocess_info returned by call_usermodehelper_setup | ||
383 | * @session_keyring: the session keyring for the process | ||
384 | */ | ||
385 | void call_usermodehelper_setkeys(struct subprocess_info *info, | ||
386 | struct key *session_keyring) | ||
387 | { | ||
388 | info->ring = session_keyring; | ||
389 | } | ||
390 | EXPORT_SYMBOL(call_usermodehelper_setkeys); | ||
391 | |||
392 | /** | ||
393 | * call_usermodehelper_setcleanup - set a cleanup function | ||
394 | * @info: a subprocess_info returned by call_usermodehelper_setup | ||
395 | * @cleanup: a cleanup function | ||
396 | * | ||
397 | * The cleanup function is just befor ethe subprocess_info is about to | ||
398 | * be freed. This can be used for freeing the argv and envp. The | ||
399 | * Function must be runnable in either a process context or the | ||
400 | * context in which call_usermodehelper_exec is called. | ||
401 | */ | ||
402 | void call_usermodehelper_setcleanup(struct subprocess_info *info, | ||
403 | void (*cleanup)(char **argv, char **envp)) | ||
404 | { | ||
405 | info->cleanup = cleanup; | ||
406 | } | ||
407 | EXPORT_SYMBOL(call_usermodehelper_setcleanup); | ||
408 | |||
409 | /** | ||
410 | * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin | ||
411 | * @sub_info: a subprocess_info returned by call_usermodehelper_setup | ||
412 | * @filp: set to the write-end of a pipe | ||
413 | * | ||
414 | * This constructs a pipe, and sets the read end to be the stdin of the | ||
415 | * subprocess, and returns the write-end in *@filp. | ||
416 | */ | ||
417 | int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, | ||
418 | struct file **filp) | ||
419 | { | ||
420 | struct file *f; | ||
421 | |||
422 | f = create_write_pipe(); | ||
423 | if (IS_ERR(f)) | ||
424 | return PTR_ERR(f); | ||
425 | *filp = f; | ||
426 | |||
427 | f = create_read_pipe(f); | ||
428 | if (IS_ERR(f)) { | ||
429 | free_write_pipe(*filp); | ||
430 | return PTR_ERR(f); | ||
431 | } | ||
432 | sub_info->stdin = f; | ||
433 | |||
434 | return 0; | ||
435 | } | ||
436 | EXPORT_SYMBOL(call_usermodehelper_stdinpipe); | ||
437 | |||
438 | /** | ||
439 | * call_usermodehelper_exec - start a usermode application | ||
440 | * @sub_info: information about the subprocessa | ||
260 | * @wait: wait for the application to finish and return status. | 441 | * @wait: wait for the application to finish and return status. |
261 | * when -1 don't wait at all, but you get no useful error back when | 442 | * when -1 don't wait at all, but you get no useful error back when |
262 | * the program couldn't be exec'ed. This makes it safe to call | 443 | * the program couldn't be exec'ed. This makes it safe to call |
@@ -265,81 +446,70 @@ static void __call_usermodehelper(struct work_struct *work) | |||
265 | * Runs a user-space application. The application is started | 446 | * Runs a user-space application. The application is started |
266 | * asynchronously if wait is not set, and runs as a child of keventd. | 447 | * asynchronously if wait is not set, and runs as a child of keventd. |
267 | * (ie. it runs with full root capabilities). | 448 | * (ie. it runs with full root capabilities). |
268 | * | ||
269 | * Must be called from process context. Returns a negative error code | ||
270 | * if program was not execed successfully, or 0. | ||
271 | */ | 449 | */ |
272 | int call_usermodehelper_keys(char *path, char **argv, char **envp, | 450 | int call_usermodehelper_exec(struct subprocess_info *sub_info, |
273 | struct key *session_keyring, int wait) | 451 | enum umh_wait wait) |
274 | { | 452 | { |
275 | DECLARE_COMPLETION_ONSTACK(done); | 453 | DECLARE_COMPLETION_ONSTACK(done); |
276 | struct subprocess_info *sub_info; | ||
277 | int retval; | 454 | int retval; |
278 | 455 | ||
279 | if (!khelper_wq) | 456 | helper_lock(); |
280 | return -EBUSY; | 457 | if (sub_info->path[0] == '\0') { |
281 | 458 | retval = 0; | |
282 | if (path[0] == '\0') | 459 | goto out; |
283 | return 0; | 460 | } |
284 | 461 | ||
285 | sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); | 462 | if (!khelper_wq || usermodehelper_disabled) { |
286 | if (!sub_info) | 463 | retval = -EBUSY; |
287 | return -ENOMEM; | 464 | goto out; |
465 | } | ||
288 | 466 | ||
289 | INIT_WORK(&sub_info->work, __call_usermodehelper); | ||
290 | sub_info->complete = &done; | 467 | sub_info->complete = &done; |
291 | sub_info->path = path; | ||
292 | sub_info->argv = argv; | ||
293 | sub_info->envp = envp; | ||
294 | sub_info->ring = session_keyring; | ||
295 | sub_info->wait = wait; | 468 | sub_info->wait = wait; |
296 | 469 | ||
297 | queue_work(khelper_wq, &sub_info->work); | 470 | queue_work(khelper_wq, &sub_info->work); |
298 | if (wait < 0) /* task has freed sub_info */ | 471 | if (wait == UMH_NO_WAIT) /* task has freed sub_info */ |
299 | return 0; | 472 | return 0; |
300 | wait_for_completion(&done); | 473 | wait_for_completion(&done); |
301 | retval = sub_info->retval; | 474 | retval = sub_info->retval; |
302 | kfree(sub_info); | 475 | |
476 | out: | ||
477 | call_usermodehelper_freeinfo(sub_info); | ||
478 | helper_unlock(); | ||
303 | return retval; | 479 | return retval; |
304 | } | 480 | } |
305 | EXPORT_SYMBOL(call_usermodehelper_keys); | 481 | EXPORT_SYMBOL(call_usermodehelper_exec); |
306 | 482 | ||
483 | /** | ||
484 | * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin | ||
485 | * @path: path to usermode executable | ||
486 | * @argv: arg vector for process | ||
487 | * @envp: environment for process | ||
488 | * @filp: set to the write-end of a pipe | ||
489 | * | ||
490 | * This is a simple wrapper which executes a usermode-helper function | ||
491 | * with a pipe as stdin. It is implemented entirely in terms of | ||
492 | * lower-level call_usermodehelper_* functions. | ||
493 | */ | ||
307 | int call_usermodehelper_pipe(char *path, char **argv, char **envp, | 494 | int call_usermodehelper_pipe(char *path, char **argv, char **envp, |
308 | struct file **filp) | 495 | struct file **filp) |
309 | { | 496 | { |
310 | DECLARE_COMPLETION(done); | 497 | struct subprocess_info *sub_info; |
311 | struct subprocess_info sub_info = { | 498 | int ret; |
312 | .work = __WORK_INITIALIZER(sub_info.work, | ||
313 | __call_usermodehelper), | ||
314 | .complete = &done, | ||
315 | .path = path, | ||
316 | .argv = argv, | ||
317 | .envp = envp, | ||
318 | .retval = 0, | ||
319 | }; | ||
320 | struct file *f; | ||
321 | |||
322 | if (!khelper_wq) | ||
323 | return -EBUSY; | ||
324 | 499 | ||
325 | if (path[0] == '\0') | 500 | sub_info = call_usermodehelper_setup(path, argv, envp); |
326 | return 0; | 501 | if (sub_info == NULL) |
502 | return -ENOMEM; | ||
327 | 503 | ||
328 | f = create_write_pipe(); | 504 | ret = call_usermodehelper_stdinpipe(sub_info, filp); |
329 | if (IS_ERR(f)) | 505 | if (ret < 0) |
330 | return PTR_ERR(f); | 506 | goto out; |
331 | *filp = f; | ||
332 | 507 | ||
333 | f = create_read_pipe(f); | 508 | return call_usermodehelper_exec(sub_info, 1); |
334 | if (IS_ERR(f)) { | ||
335 | free_write_pipe(*filp); | ||
336 | return PTR_ERR(f); | ||
337 | } | ||
338 | sub_info.stdin = f; | ||
339 | 509 | ||
340 | queue_work(khelper_wq, &sub_info.work); | 510 | out: |
341 | wait_for_completion(&done); | 511 | call_usermodehelper_freeinfo(sub_info); |
342 | return sub_info.retval; | 512 | return ret; |
343 | } | 513 | } |
344 | EXPORT_SYMBOL(call_usermodehelper_pipe); | 514 | EXPORT_SYMBOL(call_usermodehelper_pipe); |
345 | 515 | ||
@@ -347,4 +517,5 @@ void __init usermodehelper_init(void) | |||
347 | { | 517 | { |
348 | khelper_wq = create_singlethread_workqueue("khelper"); | 518 | khelper_wq = create_singlethread_workqueue("khelper"); |
349 | BUG_ON(!khelper_wq); | 519 | BUG_ON(!khelper_wq); |
520 | register_pm_notifier_callback(); | ||
350 | } | 521 | } |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9e47d8c493..3e9f513a72 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -675,9 +675,18 @@ static struct notifier_block kprobe_exceptions_nb = { | |||
675 | .priority = 0x7fffffff /* we need to be notified first */ | 675 | .priority = 0x7fffffff /* we need to be notified first */ |
676 | }; | 676 | }; |
677 | 677 | ||
678 | unsigned long __weak arch_deref_entry_point(void *entry) | ||
679 | { | ||
680 | return (unsigned long)entry; | ||
681 | } | ||
678 | 682 | ||
679 | int __kprobes register_jprobe(struct jprobe *jp) | 683 | int __kprobes register_jprobe(struct jprobe *jp) |
680 | { | 684 | { |
685 | unsigned long addr = arch_deref_entry_point(jp->entry); | ||
686 | |||
687 | if (!kernel_text_address(addr)) | ||
688 | return -EINVAL; | ||
689 | |||
681 | /* Todo: Verify probepoint is a function entry point */ | 690 | /* Todo: Verify probepoint is a function entry point */ |
682 | jp->kp.pre_handler = setjmp_pre_handler; | 691 | jp->kp.pre_handler = setjmp_pre_handler; |
683 | jp->kp.break_handler = longjmp_break_handler; | 692 | jp->kp.break_handler = longjmp_break_handler; |
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 559deca5ed..2565e1b6dd 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -62,6 +62,28 @@ static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page) | |||
62 | KERNEL_ATTR_RO(kexec_crash_loaded); | 62 | KERNEL_ATTR_RO(kexec_crash_loaded); |
63 | #endif /* CONFIG_KEXEC */ | 63 | #endif /* CONFIG_KEXEC */ |
64 | 64 | ||
65 | /* | ||
66 | * Make /sys/kernel/notes give the raw contents of our kernel .notes section. | ||
67 | */ | ||
68 | extern const char __start_notes __attribute__((weak)); | ||
69 | extern const char __stop_notes __attribute__((weak)); | ||
70 | #define notes_size (&__stop_notes - &__start_notes) | ||
71 | |||
72 | static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr, | ||
73 | char *buf, loff_t off, size_t count) | ||
74 | { | ||
75 | memcpy(buf, &__start_notes + off, count); | ||
76 | return count; | ||
77 | } | ||
78 | |||
79 | static struct bin_attribute notes_attr = { | ||
80 | .attr = { | ||
81 | .name = "notes", | ||
82 | .mode = S_IRUGO, | ||
83 | }, | ||
84 | .read = ¬es_read, | ||
85 | }; | ||
86 | |||
65 | decl_subsys(kernel, NULL, NULL); | 87 | decl_subsys(kernel, NULL, NULL); |
66 | EXPORT_SYMBOL_GPL(kernel_subsys); | 88 | EXPORT_SYMBOL_GPL(kernel_subsys); |
67 | 89 | ||
@@ -88,6 +110,12 @@ static int __init ksysfs_init(void) | |||
88 | error = sysfs_create_group(&kernel_subsys.kobj, | 110 | error = sysfs_create_group(&kernel_subsys.kobj, |
89 | &kernel_attr_group); | 111 | &kernel_attr_group); |
90 | 112 | ||
113 | if (!error && notes_size > 0) { | ||
114 | notes_attr.size = notes_size; | ||
115 | error = sysfs_create_bin_file(&kernel_subsys.kobj, | ||
116 | ¬es_attr); | ||
117 | } | ||
118 | |||
91 | return error; | 119 | return error; |
92 | } | 120 | } |
93 | 121 | ||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 1a5ff2211d..734da579ad 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -5,7 +5,8 @@ | |||
5 | * | 5 | * |
6 | * Started by Ingo Molnar: | 6 | * Started by Ingo Molnar: |
7 | * | 7 | * |
8 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
9 | * | 10 | * |
10 | * this code maps all the lock dependencies as they occur in a live kernel | 11 | * this code maps all the lock dependencies as they occur in a live kernel |
11 | * and will warn about the following classes of locking bugs: | 12 | * and will warn about the following classes of locking bugs: |
@@ -37,11 +38,26 @@ | |||
37 | #include <linux/debug_locks.h> | 38 | #include <linux/debug_locks.h> |
38 | #include <linux/irqflags.h> | 39 | #include <linux/irqflags.h> |
39 | #include <linux/utsname.h> | 40 | #include <linux/utsname.h> |
41 | #include <linux/hash.h> | ||
40 | 42 | ||
41 | #include <asm/sections.h> | 43 | #include <asm/sections.h> |
42 | 44 | ||
43 | #include "lockdep_internals.h" | 45 | #include "lockdep_internals.h" |
44 | 46 | ||
47 | #ifdef CONFIG_PROVE_LOCKING | ||
48 | int prove_locking = 1; | ||
49 | module_param(prove_locking, int, 0644); | ||
50 | #else | ||
51 | #define prove_locking 0 | ||
52 | #endif | ||
53 | |||
54 | #ifdef CONFIG_LOCK_STAT | ||
55 | int lock_stat = 1; | ||
56 | module_param(lock_stat, int, 0644); | ||
57 | #else | ||
58 | #define lock_stat 0 | ||
59 | #endif | ||
60 | |||
45 | /* | 61 | /* |
46 | * lockdep_lock: protects the lockdep graph, the hashes and the | 62 | * lockdep_lock: protects the lockdep graph, the hashes and the |
47 | * class/list/hash allocators. | 63 | * class/list/hash allocators. |
@@ -96,23 +112,6 @@ unsigned long nr_list_entries; | |||
96 | static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; | 112 | static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; |
97 | 113 | ||
98 | /* | 114 | /* |
99 | * Allocate a lockdep entry. (assumes the graph_lock held, returns | ||
100 | * with NULL on failure) | ||
101 | */ | ||
102 | static struct lock_list *alloc_list_entry(void) | ||
103 | { | ||
104 | if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { | ||
105 | if (!debug_locks_off_graph_unlock()) | ||
106 | return NULL; | ||
107 | |||
108 | printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); | ||
109 | printk("turning off the locking correctness validator.\n"); | ||
110 | return NULL; | ||
111 | } | ||
112 | return list_entries + nr_list_entries++; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * All data structures here are protected by the global debug_lock. | 115 | * All data structures here are protected by the global debug_lock. |
117 | * | 116 | * |
118 | * Mutex key structs only get allocated, once during bootup, and never | 117 | * Mutex key structs only get allocated, once during bootup, and never |
@@ -121,6 +120,117 @@ static struct lock_list *alloc_list_entry(void) | |||
121 | unsigned long nr_lock_classes; | 120 | unsigned long nr_lock_classes; |
122 | static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; | 121 | static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; |
123 | 122 | ||
123 | #ifdef CONFIG_LOCK_STAT | ||
124 | static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); | ||
125 | |||
126 | static int lock_contention_point(struct lock_class *class, unsigned long ip) | ||
127 | { | ||
128 | int i; | ||
129 | |||
130 | for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { | ||
131 | if (class->contention_point[i] == 0) { | ||
132 | class->contention_point[i] = ip; | ||
133 | break; | ||
134 | } | ||
135 | if (class->contention_point[i] == ip) | ||
136 | break; | ||
137 | } | ||
138 | |||
139 | return i; | ||
140 | } | ||
141 | |||
142 | static void lock_time_inc(struct lock_time *lt, s64 time) | ||
143 | { | ||
144 | if (time > lt->max) | ||
145 | lt->max = time; | ||
146 | |||
147 | if (time < lt->min || !lt->min) | ||
148 | lt->min = time; | ||
149 | |||
150 | lt->total += time; | ||
151 | lt->nr++; | ||
152 | } | ||
153 | |||
154 | static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) | ||
155 | { | ||
156 | dst->min += src->min; | ||
157 | dst->max += src->max; | ||
158 | dst->total += src->total; | ||
159 | dst->nr += src->nr; | ||
160 | } | ||
161 | |||
162 | struct lock_class_stats lock_stats(struct lock_class *class) | ||
163 | { | ||
164 | struct lock_class_stats stats; | ||
165 | int cpu, i; | ||
166 | |||
167 | memset(&stats, 0, sizeof(struct lock_class_stats)); | ||
168 | for_each_possible_cpu(cpu) { | ||
169 | struct lock_class_stats *pcs = | ||
170 | &per_cpu(lock_stats, cpu)[class - lock_classes]; | ||
171 | |||
172 | for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) | ||
173 | stats.contention_point[i] += pcs->contention_point[i]; | ||
174 | |||
175 | lock_time_add(&pcs->read_waittime, &stats.read_waittime); | ||
176 | lock_time_add(&pcs->write_waittime, &stats.write_waittime); | ||
177 | |||
178 | lock_time_add(&pcs->read_holdtime, &stats.read_holdtime); | ||
179 | lock_time_add(&pcs->write_holdtime, &stats.write_holdtime); | ||
180 | |||
181 | for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) | ||
182 | stats.bounces[i] += pcs->bounces[i]; | ||
183 | } | ||
184 | |||
185 | return stats; | ||
186 | } | ||
187 | |||
188 | void clear_lock_stats(struct lock_class *class) | ||
189 | { | ||
190 | int cpu; | ||
191 | |||
192 | for_each_possible_cpu(cpu) { | ||
193 | struct lock_class_stats *cpu_stats = | ||
194 | &per_cpu(lock_stats, cpu)[class - lock_classes]; | ||
195 | |||
196 | memset(cpu_stats, 0, sizeof(struct lock_class_stats)); | ||
197 | } | ||
198 | memset(class->contention_point, 0, sizeof(class->contention_point)); | ||
199 | } | ||
200 | |||
201 | static struct lock_class_stats *get_lock_stats(struct lock_class *class) | ||
202 | { | ||
203 | return &get_cpu_var(lock_stats)[class - lock_classes]; | ||
204 | } | ||
205 | |||
206 | static void put_lock_stats(struct lock_class_stats *stats) | ||
207 | { | ||
208 | put_cpu_var(lock_stats); | ||
209 | } | ||
210 | |||
211 | static void lock_release_holdtime(struct held_lock *hlock) | ||
212 | { | ||
213 | struct lock_class_stats *stats; | ||
214 | s64 holdtime; | ||
215 | |||
216 | if (!lock_stat) | ||
217 | return; | ||
218 | |||
219 | holdtime = sched_clock() - hlock->holdtime_stamp; | ||
220 | |||
221 | stats = get_lock_stats(hlock->class); | ||
222 | if (hlock->read) | ||
223 | lock_time_inc(&stats->read_holdtime, holdtime); | ||
224 | else | ||
225 | lock_time_inc(&stats->write_holdtime, holdtime); | ||
226 | put_lock_stats(stats); | ||
227 | } | ||
228 | #else | ||
229 | static inline void lock_release_holdtime(struct held_lock *hlock) | ||
230 | { | ||
231 | } | ||
232 | #endif | ||
233 | |||
124 | /* | 234 | /* |
125 | * We keep a global list of all lock classes. The list only grows, | 235 | * We keep a global list of all lock classes. The list only grows, |
126 | * never shrinks. The list is only accessed with the lockdep | 236 | * never shrinks. The list is only accessed with the lockdep |
@@ -133,24 +243,18 @@ LIST_HEAD(all_lock_classes); | |||
133 | */ | 243 | */ |
134 | #define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) | 244 | #define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) |
135 | #define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) | 245 | #define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) |
136 | #define CLASSHASH_MASK (CLASSHASH_SIZE - 1) | 246 | #define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) |
137 | #define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK) | ||
138 | #define classhashentry(key) (classhash_table + __classhashfn((key))) | 247 | #define classhashentry(key) (classhash_table + __classhashfn((key))) |
139 | 248 | ||
140 | static struct list_head classhash_table[CLASSHASH_SIZE]; | 249 | static struct list_head classhash_table[CLASSHASH_SIZE]; |
141 | 250 | ||
142 | unsigned long nr_lock_chains; | ||
143 | static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; | ||
144 | |||
145 | /* | 251 | /* |
146 | * We put the lock dependency chains into a hash-table as well, to cache | 252 | * We put the lock dependency chains into a hash-table as well, to cache |
147 | * their existence: | 253 | * their existence: |
148 | */ | 254 | */ |
149 | #define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) | 255 | #define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) |
150 | #define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) | 256 | #define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) |
151 | #define CHAINHASH_MASK (CHAINHASH_SIZE - 1) | 257 | #define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) |
152 | #define __chainhashfn(chain) \ | ||
153 | (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK) | ||
154 | #define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) | 258 | #define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) |
155 | 259 | ||
156 | static struct list_head chainhash_table[CHAINHASH_SIZE]; | 260 | static struct list_head chainhash_table[CHAINHASH_SIZE]; |
@@ -223,26 +327,6 @@ static int verbose(struct lock_class *class) | |||
223 | return 0; | 327 | return 0; |
224 | } | 328 | } |
225 | 329 | ||
226 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
227 | |||
228 | static int hardirq_verbose(struct lock_class *class) | ||
229 | { | ||
230 | #if HARDIRQ_VERBOSE | ||
231 | return class_filter(class); | ||
232 | #endif | ||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | static int softirq_verbose(struct lock_class *class) | ||
237 | { | ||
238 | #if SOFTIRQ_VERBOSE | ||
239 | return class_filter(class); | ||
240 | #endif | ||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | #endif | ||
245 | |||
246 | /* | 330 | /* |
247 | * Stack-trace: tightly packed array of stack backtrace | 331 | * Stack-trace: tightly packed array of stack backtrace |
248 | * addresses. Protected by the graph_lock. | 332 | * addresses. Protected by the graph_lock. |
@@ -291,6 +375,11 @@ unsigned int max_recursion_depth; | |||
291 | * about it later on, in lockdep_info(). | 375 | * about it later on, in lockdep_info(). |
292 | */ | 376 | */ |
293 | static int lockdep_init_error; | 377 | static int lockdep_init_error; |
378 | static unsigned long lockdep_init_trace_data[20]; | ||
379 | static struct stack_trace lockdep_init_trace = { | ||
380 | .max_entries = ARRAY_SIZE(lockdep_init_trace_data), | ||
381 | .entries = lockdep_init_trace_data, | ||
382 | }; | ||
294 | 383 | ||
295 | /* | 384 | /* |
296 | * Various lockdep statistics: | 385 | * Various lockdep statistics: |
@@ -379,7 +468,7 @@ get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4 | |||
379 | 468 | ||
380 | static void print_lock_name(struct lock_class *class) | 469 | static void print_lock_name(struct lock_class *class) |
381 | { | 470 | { |
382 | char str[KSYM_NAME_LEN + 1], c1, c2, c3, c4; | 471 | char str[KSYM_NAME_LEN], c1, c2, c3, c4; |
383 | const char *name; | 472 | const char *name; |
384 | 473 | ||
385 | get_usage_chars(class, &c1, &c2, &c3, &c4); | 474 | get_usage_chars(class, &c1, &c2, &c3, &c4); |
@@ -401,7 +490,7 @@ static void print_lock_name(struct lock_class *class) | |||
401 | static void print_lockdep_cache(struct lockdep_map *lock) | 490 | static void print_lockdep_cache(struct lockdep_map *lock) |
402 | { | 491 | { |
403 | const char *name; | 492 | const char *name; |
404 | char str[KSYM_NAME_LEN + 1]; | 493 | char str[KSYM_NAME_LEN]; |
405 | 494 | ||
406 | name = lock->name; | 495 | name = lock->name; |
407 | if (!name) | 496 | if (!name) |
@@ -482,6 +571,262 @@ static void print_lock_dependencies(struct lock_class *class, int depth) | |||
482 | } | 571 | } |
483 | } | 572 | } |
484 | 573 | ||
574 | static void print_kernel_version(void) | ||
575 | { | ||
576 | printk("%s %.*s\n", init_utsname()->release, | ||
577 | (int)strcspn(init_utsname()->version, " "), | ||
578 | init_utsname()->version); | ||
579 | } | ||
580 | |||
581 | static int very_verbose(struct lock_class *class) | ||
582 | { | ||
583 | #if VERY_VERBOSE | ||
584 | return class_filter(class); | ||
585 | #endif | ||
586 | return 0; | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * Is this the address of a static object: | ||
591 | */ | ||
592 | static int static_obj(void *obj) | ||
593 | { | ||
594 | unsigned long start = (unsigned long) &_stext, | ||
595 | end = (unsigned long) &_end, | ||
596 | addr = (unsigned long) obj; | ||
597 | #ifdef CONFIG_SMP | ||
598 | int i; | ||
599 | #endif | ||
600 | |||
601 | /* | ||
602 | * static variable? | ||
603 | */ | ||
604 | if ((addr >= start) && (addr < end)) | ||
605 | return 1; | ||
606 | |||
607 | #ifdef CONFIG_SMP | ||
608 | /* | ||
609 | * percpu var? | ||
610 | */ | ||
611 | for_each_possible_cpu(i) { | ||
612 | start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); | ||
613 | end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM | ||
614 | + per_cpu_offset(i); | ||
615 | |||
616 | if ((addr >= start) && (addr < end)) | ||
617 | return 1; | ||
618 | } | ||
619 | #endif | ||
620 | |||
621 | /* | ||
622 | * module var? | ||
623 | */ | ||
624 | return is_module_address(addr); | ||
625 | } | ||
626 | |||
627 | /* | ||
628 | * To make lock name printouts unique, we calculate a unique | ||
629 | * class->name_version generation counter: | ||
630 | */ | ||
631 | static int count_matching_names(struct lock_class *new_class) | ||
632 | { | ||
633 | struct lock_class *class; | ||
634 | int count = 0; | ||
635 | |||
636 | if (!new_class->name) | ||
637 | return 0; | ||
638 | |||
639 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | ||
640 | if (new_class->key - new_class->subclass == class->key) | ||
641 | return class->name_version; | ||
642 | if (class->name && !strcmp(class->name, new_class->name)) | ||
643 | count = max(count, class->name_version); | ||
644 | } | ||
645 | |||
646 | return count + 1; | ||
647 | } | ||
648 | |||
649 | /* | ||
650 | * Register a lock's class in the hash-table, if the class is not present | ||
651 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
652 | * itself, so actual lookup of the hash should be once per lock object. | ||
653 | */ | ||
654 | static inline struct lock_class * | ||
655 | look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | ||
656 | { | ||
657 | struct lockdep_subclass_key *key; | ||
658 | struct list_head *hash_head; | ||
659 | struct lock_class *class; | ||
660 | |||
661 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
662 | /* | ||
663 | * If the architecture calls into lockdep before initializing | ||
664 | * the hashes then we'll warn about it later. (we cannot printk | ||
665 | * right now) | ||
666 | */ | ||
667 | if (unlikely(!lockdep_initialized)) { | ||
668 | lockdep_init(); | ||
669 | lockdep_init_error = 1; | ||
670 | save_stack_trace(&lockdep_init_trace); | ||
671 | } | ||
672 | #endif | ||
673 | |||
674 | /* | ||
675 | * Static locks do not have their class-keys yet - for them the key | ||
676 | * is the lock object itself: | ||
677 | */ | ||
678 | if (unlikely(!lock->key)) | ||
679 | lock->key = (void *)lock; | ||
680 | |||
681 | /* | ||
682 | * NOTE: the class-key must be unique. For dynamic locks, a static | ||
683 | * lock_class_key variable is passed in through the mutex_init() | ||
684 | * (or spin_lock_init()) call - which acts as the key. For static | ||
685 | * locks we use the lock object itself as the key. | ||
686 | */ | ||
687 | BUILD_BUG_ON(sizeof(struct lock_class_key) > | ||
688 | sizeof(struct lockdep_map)); | ||
689 | |||
690 | key = lock->key->subkeys + subclass; | ||
691 | |||
692 | hash_head = classhashentry(key); | ||
693 | |||
694 | /* | ||
695 | * We can walk the hash lockfree, because the hash only | ||
696 | * grows, and we are careful when adding entries to the end: | ||
697 | */ | ||
698 | list_for_each_entry(class, hash_head, hash_entry) { | ||
699 | if (class->key == key) { | ||
700 | WARN_ON_ONCE(class->name != lock->name); | ||
701 | return class; | ||
702 | } | ||
703 | } | ||
704 | |||
705 | return NULL; | ||
706 | } | ||
707 | |||
708 | /* | ||
709 | * Register a lock's class in the hash-table, if the class is not present | ||
710 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
711 | * itself, so actual lookup of the hash should be once per lock object. | ||
712 | */ | ||
713 | static inline struct lock_class * | ||
714 | register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | ||
715 | { | ||
716 | struct lockdep_subclass_key *key; | ||
717 | struct list_head *hash_head; | ||
718 | struct lock_class *class; | ||
719 | unsigned long flags; | ||
720 | |||
721 | class = look_up_lock_class(lock, subclass); | ||
722 | if (likely(class)) | ||
723 | return class; | ||
724 | |||
725 | /* | ||
726 | * Debug-check: all keys must be persistent! | ||
727 | */ | ||
728 | if (!static_obj(lock->key)) { | ||
729 | debug_locks_off(); | ||
730 | printk("INFO: trying to register non-static key.\n"); | ||
731 | printk("the code is fine but needs lockdep annotation.\n"); | ||
732 | printk("turning off the locking correctness validator.\n"); | ||
733 | dump_stack(); | ||
734 | |||
735 | return NULL; | ||
736 | } | ||
737 | |||
738 | key = lock->key->subkeys + subclass; | ||
739 | hash_head = classhashentry(key); | ||
740 | |||
741 | raw_local_irq_save(flags); | ||
742 | if (!graph_lock()) { | ||
743 | raw_local_irq_restore(flags); | ||
744 | return NULL; | ||
745 | } | ||
746 | /* | ||
747 | * We have to do the hash-walk again, to avoid races | ||
748 | * with another CPU: | ||
749 | */ | ||
750 | list_for_each_entry(class, hash_head, hash_entry) | ||
751 | if (class->key == key) | ||
752 | goto out_unlock_set; | ||
753 | /* | ||
754 | * Allocate a new key from the static array, and add it to | ||
755 | * the hash: | ||
756 | */ | ||
757 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | ||
758 | if (!debug_locks_off_graph_unlock()) { | ||
759 | raw_local_irq_restore(flags); | ||
760 | return NULL; | ||
761 | } | ||
762 | raw_local_irq_restore(flags); | ||
763 | |||
764 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); | ||
765 | printk("turning off the locking correctness validator.\n"); | ||
766 | return NULL; | ||
767 | } | ||
768 | class = lock_classes + nr_lock_classes++; | ||
769 | debug_atomic_inc(&nr_unused_locks); | ||
770 | class->key = key; | ||
771 | class->name = lock->name; | ||
772 | class->subclass = subclass; | ||
773 | INIT_LIST_HEAD(&class->lock_entry); | ||
774 | INIT_LIST_HEAD(&class->locks_before); | ||
775 | INIT_LIST_HEAD(&class->locks_after); | ||
776 | class->name_version = count_matching_names(class); | ||
777 | /* | ||
778 | * We use RCU's safe list-add method to make | ||
779 | * parallel walking of the hash-list safe: | ||
780 | */ | ||
781 | list_add_tail_rcu(&class->hash_entry, hash_head); | ||
782 | |||
783 | if (verbose(class)) { | ||
784 | graph_unlock(); | ||
785 | raw_local_irq_restore(flags); | ||
786 | |||
787 | printk("\nnew class %p: %s", class->key, class->name); | ||
788 | if (class->name_version > 1) | ||
789 | printk("#%d", class->name_version); | ||
790 | printk("\n"); | ||
791 | dump_stack(); | ||
792 | |||
793 | raw_local_irq_save(flags); | ||
794 | if (!graph_lock()) { | ||
795 | raw_local_irq_restore(flags); | ||
796 | return NULL; | ||
797 | } | ||
798 | } | ||
799 | out_unlock_set: | ||
800 | graph_unlock(); | ||
801 | raw_local_irq_restore(flags); | ||
802 | |||
803 | if (!subclass || force) | ||
804 | lock->class_cache = class; | ||
805 | |||
806 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) | ||
807 | return NULL; | ||
808 | |||
809 | return class; | ||
810 | } | ||
811 | |||
812 | #ifdef CONFIG_PROVE_LOCKING | ||
813 | /* | ||
814 | * Allocate a lockdep entry. (assumes the graph_lock held, returns | ||
815 | * with NULL on failure) | ||
816 | */ | ||
817 | static struct lock_list *alloc_list_entry(void) | ||
818 | { | ||
819 | if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { | ||
820 | if (!debug_locks_off_graph_unlock()) | ||
821 | return NULL; | ||
822 | |||
823 | printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); | ||
824 | printk("turning off the locking correctness validator.\n"); | ||
825 | return NULL; | ||
826 | } | ||
827 | return list_entries + nr_list_entries++; | ||
828 | } | ||
829 | |||
485 | /* | 830 | /* |
486 | * Add a new dependency to the head of the list: | 831 | * Add a new dependency to the head of the list: |
487 | */ | 832 | */ |
@@ -542,13 +887,6 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth) | |||
542 | return 0; | 887 | return 0; |
543 | } | 888 | } |
544 | 889 | ||
545 | static void print_kernel_version(void) | ||
546 | { | ||
547 | printk("%s %.*s\n", init_utsname()->release, | ||
548 | (int)strcspn(init_utsname()->version, " "), | ||
549 | init_utsname()->version); | ||
550 | } | ||
551 | |||
552 | /* | 890 | /* |
553 | * When a circular dependency is detected, print the | 891 | * When a circular dependency is detected, print the |
554 | * header first: | 892 | * header first: |
@@ -640,15 +978,7 @@ check_noncircular(struct lock_class *source, unsigned int depth) | |||
640 | return 1; | 978 | return 1; |
641 | } | 979 | } |
642 | 980 | ||
643 | static int very_verbose(struct lock_class *class) | ||
644 | { | ||
645 | #if VERY_VERBOSE | ||
646 | return class_filter(class); | ||
647 | #endif | ||
648 | return 0; | ||
649 | } | ||
650 | #ifdef CONFIG_TRACE_IRQFLAGS | 981 | #ifdef CONFIG_TRACE_IRQFLAGS |
651 | |||
652 | /* | 982 | /* |
653 | * Forwards and backwards subgraph searching, for the purposes of | 983 | * Forwards and backwards subgraph searching, for the purposes of |
654 | * proving that two subgraphs can be connected by a new dependency | 984 | * proving that two subgraphs can be connected by a new dependency |
@@ -821,6 +1151,78 @@ check_usage(struct task_struct *curr, struct held_lock *prev, | |||
821 | bit_backwards, bit_forwards, irqclass); | 1151 | bit_backwards, bit_forwards, irqclass); |
822 | } | 1152 | } |
823 | 1153 | ||
1154 | static int | ||
1155 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | ||
1156 | struct held_lock *next) | ||
1157 | { | ||
1158 | /* | ||
1159 | * Prove that the new dependency does not connect a hardirq-safe | ||
1160 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
1161 | * the backwards-subgraph starting at <prev>, and the | ||
1162 | * forwards-subgraph starting at <next>: | ||
1163 | */ | ||
1164 | if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, | ||
1165 | LOCK_ENABLED_HARDIRQS, "hard")) | ||
1166 | return 0; | ||
1167 | |||
1168 | /* | ||
1169 | * Prove that the new dependency does not connect a hardirq-safe-read | ||
1170 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
1171 | * the backwards-subgraph starting at <prev>, and the | ||
1172 | * forwards-subgraph starting at <next>: | ||
1173 | */ | ||
1174 | if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, | ||
1175 | LOCK_ENABLED_HARDIRQS, "hard-read")) | ||
1176 | return 0; | ||
1177 | |||
1178 | /* | ||
1179 | * Prove that the new dependency does not connect a softirq-safe | ||
1180 | * lock with a softirq-unsafe lock - to achieve this we search | ||
1181 | * the backwards-subgraph starting at <prev>, and the | ||
1182 | * forwards-subgraph starting at <next>: | ||
1183 | */ | ||
1184 | if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, | ||
1185 | LOCK_ENABLED_SOFTIRQS, "soft")) | ||
1186 | return 0; | ||
1187 | /* | ||
1188 | * Prove that the new dependency does not connect a softirq-safe-read | ||
1189 | * lock with a softirq-unsafe lock - to achieve this we search | ||
1190 | * the backwards-subgraph starting at <prev>, and the | ||
1191 | * forwards-subgraph starting at <next>: | ||
1192 | */ | ||
1193 | if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, | ||
1194 | LOCK_ENABLED_SOFTIRQS, "soft")) | ||
1195 | return 0; | ||
1196 | |||
1197 | return 1; | ||
1198 | } | ||
1199 | |||
1200 | static void inc_chains(void) | ||
1201 | { | ||
1202 | if (current->hardirq_context) | ||
1203 | nr_hardirq_chains++; | ||
1204 | else { | ||
1205 | if (current->softirq_context) | ||
1206 | nr_softirq_chains++; | ||
1207 | else | ||
1208 | nr_process_chains++; | ||
1209 | } | ||
1210 | } | ||
1211 | |||
1212 | #else | ||
1213 | |||
1214 | static inline int | ||
1215 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | ||
1216 | struct held_lock *next) | ||
1217 | { | ||
1218 | return 1; | ||
1219 | } | ||
1220 | |||
1221 | static inline void inc_chains(void) | ||
1222 | { | ||
1223 | nr_process_chains++; | ||
1224 | } | ||
1225 | |||
824 | #endif | 1226 | #endif |
825 | 1227 | ||
826 | static int | 1228 | static int |
@@ -922,47 +1324,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
922 | if (!(check_noncircular(next->class, 0))) | 1324 | if (!(check_noncircular(next->class, 0))) |
923 | return print_circular_bug_tail(); | 1325 | return print_circular_bug_tail(); |
924 | 1326 | ||
925 | #ifdef CONFIG_TRACE_IRQFLAGS | 1327 | if (!check_prev_add_irq(curr, prev, next)) |
926 | /* | ||
927 | * Prove that the new dependency does not connect a hardirq-safe | ||
928 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
929 | * the backwards-subgraph starting at <prev>, and the | ||
930 | * forwards-subgraph starting at <next>: | ||
931 | */ | ||
932 | if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, | ||
933 | LOCK_ENABLED_HARDIRQS, "hard")) | ||
934 | return 0; | 1328 | return 0; |
935 | 1329 | ||
936 | /* | 1330 | /* |
937 | * Prove that the new dependency does not connect a hardirq-safe-read | ||
938 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
939 | * the backwards-subgraph starting at <prev>, and the | ||
940 | * forwards-subgraph starting at <next>: | ||
941 | */ | ||
942 | if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, | ||
943 | LOCK_ENABLED_HARDIRQS, "hard-read")) | ||
944 | return 0; | ||
945 | |||
946 | /* | ||
947 | * Prove that the new dependency does not connect a softirq-safe | ||
948 | * lock with a softirq-unsafe lock - to achieve this we search | ||
949 | * the backwards-subgraph starting at <prev>, and the | ||
950 | * forwards-subgraph starting at <next>: | ||
951 | */ | ||
952 | if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, | ||
953 | LOCK_ENABLED_SOFTIRQS, "soft")) | ||
954 | return 0; | ||
955 | /* | ||
956 | * Prove that the new dependency does not connect a softirq-safe-read | ||
957 | * lock with a softirq-unsafe lock - to achieve this we search | ||
958 | * the backwards-subgraph starting at <prev>, and the | ||
959 | * forwards-subgraph starting at <next>: | ||
960 | */ | ||
961 | if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, | ||
962 | LOCK_ENABLED_SOFTIRQS, "soft")) | ||
963 | return 0; | ||
964 | #endif | ||
965 | /* | ||
966 | * For recursive read-locks we do all the dependency checks, | 1331 | * For recursive read-locks we do all the dependency checks, |
967 | * but we dont store read-triggered dependencies (only | 1332 | * but we dont store read-triggered dependencies (only |
968 | * write-triggered dependencies). This ensures that only the | 1333 | * write-triggered dependencies). This ensures that only the |
@@ -1088,224 +1453,8 @@ out_bug: | |||
1088 | return 0; | 1453 | return 0; |
1089 | } | 1454 | } |
1090 | 1455 | ||
1091 | 1456 | unsigned long nr_lock_chains; | |
1092 | /* | 1457 | static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; |
1093 | * Is this the address of a static object: | ||
1094 | */ | ||
1095 | static int static_obj(void *obj) | ||
1096 | { | ||
1097 | unsigned long start = (unsigned long) &_stext, | ||
1098 | end = (unsigned long) &_end, | ||
1099 | addr = (unsigned long) obj; | ||
1100 | #ifdef CONFIG_SMP | ||
1101 | int i; | ||
1102 | #endif | ||
1103 | |||
1104 | /* | ||
1105 | * static variable? | ||
1106 | */ | ||
1107 | if ((addr >= start) && (addr < end)) | ||
1108 | return 1; | ||
1109 | |||
1110 | #ifdef CONFIG_SMP | ||
1111 | /* | ||
1112 | * percpu var? | ||
1113 | */ | ||
1114 | for_each_possible_cpu(i) { | ||
1115 | start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); | ||
1116 | end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM | ||
1117 | + per_cpu_offset(i); | ||
1118 | |||
1119 | if ((addr >= start) && (addr < end)) | ||
1120 | return 1; | ||
1121 | } | ||
1122 | #endif | ||
1123 | |||
1124 | /* | ||
1125 | * module var? | ||
1126 | */ | ||
1127 | return is_module_address(addr); | ||
1128 | } | ||
1129 | |||
1130 | /* | ||
1131 | * To make lock name printouts unique, we calculate a unique | ||
1132 | * class->name_version generation counter: | ||
1133 | */ | ||
1134 | static int count_matching_names(struct lock_class *new_class) | ||
1135 | { | ||
1136 | struct lock_class *class; | ||
1137 | int count = 0; | ||
1138 | |||
1139 | if (!new_class->name) | ||
1140 | return 0; | ||
1141 | |||
1142 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | ||
1143 | if (new_class->key - new_class->subclass == class->key) | ||
1144 | return class->name_version; | ||
1145 | if (class->name && !strcmp(class->name, new_class->name)) | ||
1146 | count = max(count, class->name_version); | ||
1147 | } | ||
1148 | |||
1149 | return count + 1; | ||
1150 | } | ||
1151 | |||
1152 | /* | ||
1153 | * Register a lock's class in the hash-table, if the class is not present | ||
1154 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
1155 | * itself, so actual lookup of the hash should be once per lock object. | ||
1156 | */ | ||
1157 | static inline struct lock_class * | ||
1158 | look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | ||
1159 | { | ||
1160 | struct lockdep_subclass_key *key; | ||
1161 | struct list_head *hash_head; | ||
1162 | struct lock_class *class; | ||
1163 | |||
1164 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
1165 | /* | ||
1166 | * If the architecture calls into lockdep before initializing | ||
1167 | * the hashes then we'll warn about it later. (we cannot printk | ||
1168 | * right now) | ||
1169 | */ | ||
1170 | if (unlikely(!lockdep_initialized)) { | ||
1171 | lockdep_init(); | ||
1172 | lockdep_init_error = 1; | ||
1173 | } | ||
1174 | #endif | ||
1175 | |||
1176 | /* | ||
1177 | * Static locks do not have their class-keys yet - for them the key | ||
1178 | * is the lock object itself: | ||
1179 | */ | ||
1180 | if (unlikely(!lock->key)) | ||
1181 | lock->key = (void *)lock; | ||
1182 | |||
1183 | /* | ||
1184 | * NOTE: the class-key must be unique. For dynamic locks, a static | ||
1185 | * lock_class_key variable is passed in through the mutex_init() | ||
1186 | * (or spin_lock_init()) call - which acts as the key. For static | ||
1187 | * locks we use the lock object itself as the key. | ||
1188 | */ | ||
1189 | BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class)); | ||
1190 | |||
1191 | key = lock->key->subkeys + subclass; | ||
1192 | |||
1193 | hash_head = classhashentry(key); | ||
1194 | |||
1195 | /* | ||
1196 | * We can walk the hash lockfree, because the hash only | ||
1197 | * grows, and we are careful when adding entries to the end: | ||
1198 | */ | ||
1199 | list_for_each_entry(class, hash_head, hash_entry) | ||
1200 | if (class->key == key) | ||
1201 | return class; | ||
1202 | |||
1203 | return NULL; | ||
1204 | } | ||
1205 | |||
1206 | /* | ||
1207 | * Register a lock's class in the hash-table, if the class is not present | ||
1208 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
1209 | * itself, so actual lookup of the hash should be once per lock object. | ||
1210 | */ | ||
1211 | static inline struct lock_class * | ||
1212 | register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | ||
1213 | { | ||
1214 | struct lockdep_subclass_key *key; | ||
1215 | struct list_head *hash_head; | ||
1216 | struct lock_class *class; | ||
1217 | unsigned long flags; | ||
1218 | |||
1219 | class = look_up_lock_class(lock, subclass); | ||
1220 | if (likely(class)) | ||
1221 | return class; | ||
1222 | |||
1223 | /* | ||
1224 | * Debug-check: all keys must be persistent! | ||
1225 | */ | ||
1226 | if (!static_obj(lock->key)) { | ||
1227 | debug_locks_off(); | ||
1228 | printk("INFO: trying to register non-static key.\n"); | ||
1229 | printk("the code is fine but needs lockdep annotation.\n"); | ||
1230 | printk("turning off the locking correctness validator.\n"); | ||
1231 | dump_stack(); | ||
1232 | |||
1233 | return NULL; | ||
1234 | } | ||
1235 | |||
1236 | key = lock->key->subkeys + subclass; | ||
1237 | hash_head = classhashentry(key); | ||
1238 | |||
1239 | raw_local_irq_save(flags); | ||
1240 | if (!graph_lock()) { | ||
1241 | raw_local_irq_restore(flags); | ||
1242 | return NULL; | ||
1243 | } | ||
1244 | /* | ||
1245 | * We have to do the hash-walk again, to avoid races | ||
1246 | * with another CPU: | ||
1247 | */ | ||
1248 | list_for_each_entry(class, hash_head, hash_entry) | ||
1249 | if (class->key == key) | ||
1250 | goto out_unlock_set; | ||
1251 | /* | ||
1252 | * Allocate a new key from the static array, and add it to | ||
1253 | * the hash: | ||
1254 | */ | ||
1255 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | ||
1256 | if (!debug_locks_off_graph_unlock()) { | ||
1257 | raw_local_irq_restore(flags); | ||
1258 | return NULL; | ||
1259 | } | ||
1260 | raw_local_irq_restore(flags); | ||
1261 | |||
1262 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); | ||
1263 | printk("turning off the locking correctness validator.\n"); | ||
1264 | return NULL; | ||
1265 | } | ||
1266 | class = lock_classes + nr_lock_classes++; | ||
1267 | debug_atomic_inc(&nr_unused_locks); | ||
1268 | class->key = key; | ||
1269 | class->name = lock->name; | ||
1270 | class->subclass = subclass; | ||
1271 | INIT_LIST_HEAD(&class->lock_entry); | ||
1272 | INIT_LIST_HEAD(&class->locks_before); | ||
1273 | INIT_LIST_HEAD(&class->locks_after); | ||
1274 | class->name_version = count_matching_names(class); | ||
1275 | /* | ||
1276 | * We use RCU's safe list-add method to make | ||
1277 | * parallel walking of the hash-list safe: | ||
1278 | */ | ||
1279 | list_add_tail_rcu(&class->hash_entry, hash_head); | ||
1280 | |||
1281 | if (verbose(class)) { | ||
1282 | graph_unlock(); | ||
1283 | raw_local_irq_restore(flags); | ||
1284 | |||
1285 | printk("\nnew class %p: %s", class->key, class->name); | ||
1286 | if (class->name_version > 1) | ||
1287 | printk("#%d", class->name_version); | ||
1288 | printk("\n"); | ||
1289 | dump_stack(); | ||
1290 | |||
1291 | raw_local_irq_save(flags); | ||
1292 | if (!graph_lock()) { | ||
1293 | raw_local_irq_restore(flags); | ||
1294 | return NULL; | ||
1295 | } | ||
1296 | } | ||
1297 | out_unlock_set: | ||
1298 | graph_unlock(); | ||
1299 | raw_local_irq_restore(flags); | ||
1300 | |||
1301 | if (!subclass || force) | ||
1302 | lock->class_cache = class; | ||
1303 | |||
1304 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) | ||
1305 | return NULL; | ||
1306 | |||
1307 | return class; | ||
1308 | } | ||
1309 | 1458 | ||
1310 | /* | 1459 | /* |
1311 | * Look up a dependency chain. If the key is not present yet then | 1460 | * Look up a dependency chain. If the key is not present yet then |
@@ -1366,21 +1515,72 @@ cache_hit: | |||
1366 | chain->chain_key = chain_key; | 1515 | chain->chain_key = chain_key; |
1367 | list_add_tail_rcu(&chain->entry, hash_head); | 1516 | list_add_tail_rcu(&chain->entry, hash_head); |
1368 | debug_atomic_inc(&chain_lookup_misses); | 1517 | debug_atomic_inc(&chain_lookup_misses); |
1369 | #ifdef CONFIG_TRACE_IRQFLAGS | 1518 | inc_chains(); |
1370 | if (current->hardirq_context) | 1519 | |
1371 | nr_hardirq_chains++; | 1520 | return 1; |
1372 | else { | 1521 | } |
1373 | if (current->softirq_context) | 1522 | |
1374 | nr_softirq_chains++; | 1523 | static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, |
1375 | else | 1524 | struct held_lock *hlock, int chain_head) |
1376 | nr_process_chains++; | 1525 | { |
1377 | } | 1526 | /* |
1378 | #else | 1527 | * Trylock needs to maintain the stack of held locks, but it |
1379 | nr_process_chains++; | 1528 | * does not add new dependencies, because trylock can be done |
1380 | #endif | 1529 | * in any order. |
1530 | * | ||
1531 | * We look up the chain_key and do the O(N^2) check and update of | ||
1532 | * the dependencies only if this is a new dependency chain. | ||
1533 | * (If lookup_chain_cache() returns with 1 it acquires | ||
1534 | * graph_lock for us) | ||
1535 | */ | ||
1536 | if (!hlock->trylock && (hlock->check == 2) && | ||
1537 | lookup_chain_cache(curr->curr_chain_key, hlock->class)) { | ||
1538 | /* | ||
1539 | * Check whether last held lock: | ||
1540 | * | ||
1541 | * - is irq-safe, if this lock is irq-unsafe | ||
1542 | * - is softirq-safe, if this lock is hardirq-unsafe | ||
1543 | * | ||
1544 | * And check whether the new lock's dependency graph | ||
1545 | * could lead back to the previous lock. | ||
1546 | * | ||
1547 | * any of these scenarios could lead to a deadlock. If | ||
1548 | * All validations | ||
1549 | */ | ||
1550 | int ret = check_deadlock(curr, hlock, lock, hlock->read); | ||
1551 | |||
1552 | if (!ret) | ||
1553 | return 0; | ||
1554 | /* | ||
1555 | * Mark recursive read, as we jump over it when | ||
1556 | * building dependencies (just like we jump over | ||
1557 | * trylock entries): | ||
1558 | */ | ||
1559 | if (ret == 2) | ||
1560 | hlock->read = 2; | ||
1561 | /* | ||
1562 | * Add dependency only if this lock is not the head | ||
1563 | * of the chain, and if it's not a secondary read-lock: | ||
1564 | */ | ||
1565 | if (!chain_head && ret != 2) | ||
1566 | if (!check_prevs_add(curr, hlock)) | ||
1567 | return 0; | ||
1568 | graph_unlock(); | ||
1569 | } else | ||
1570 | /* after lookup_chain_cache(): */ | ||
1571 | if (unlikely(!debug_locks)) | ||
1572 | return 0; | ||
1381 | 1573 | ||
1382 | return 1; | 1574 | return 1; |
1383 | } | 1575 | } |
1576 | #else | ||
1577 | static inline int validate_chain(struct task_struct *curr, | ||
1578 | struct lockdep_map *lock, struct held_lock *hlock, | ||
1579 | int chain_head) | ||
1580 | { | ||
1581 | return 1; | ||
1582 | } | ||
1583 | #endif | ||
1384 | 1584 | ||
1385 | /* | 1585 | /* |
1386 | * We are building curr_chain_key incrementally, so double-check | 1586 | * We are building curr_chain_key incrementally, so double-check |
@@ -1425,6 +1625,57 @@ static void check_chain_key(struct task_struct *curr) | |||
1425 | #endif | 1625 | #endif |
1426 | } | 1626 | } |
1427 | 1627 | ||
1628 | static int | ||
1629 | print_usage_bug(struct task_struct *curr, struct held_lock *this, | ||
1630 | enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) | ||
1631 | { | ||
1632 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||
1633 | return 0; | ||
1634 | |||
1635 | printk("\n=================================\n"); | ||
1636 | printk( "[ INFO: inconsistent lock state ]\n"); | ||
1637 | print_kernel_version(); | ||
1638 | printk( "---------------------------------\n"); | ||
1639 | |||
1640 | printk("inconsistent {%s} -> {%s} usage.\n", | ||
1641 | usage_str[prev_bit], usage_str[new_bit]); | ||
1642 | |||
1643 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", | ||
1644 | curr->comm, curr->pid, | ||
1645 | trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, | ||
1646 | trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, | ||
1647 | trace_hardirqs_enabled(curr), | ||
1648 | trace_softirqs_enabled(curr)); | ||
1649 | print_lock(this); | ||
1650 | |||
1651 | printk("{%s} state was registered at:\n", usage_str[prev_bit]); | ||
1652 | print_stack_trace(this->class->usage_traces + prev_bit, 1); | ||
1653 | |||
1654 | print_irqtrace_events(curr); | ||
1655 | printk("\nother info that might help us debug this:\n"); | ||
1656 | lockdep_print_held_locks(curr); | ||
1657 | |||
1658 | printk("\nstack backtrace:\n"); | ||
1659 | dump_stack(); | ||
1660 | |||
1661 | return 0; | ||
1662 | } | ||
1663 | |||
1664 | /* | ||
1665 | * Print out an error if an invalid bit is set: | ||
1666 | */ | ||
1667 | static inline int | ||
1668 | valid_state(struct task_struct *curr, struct held_lock *this, | ||
1669 | enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) | ||
1670 | { | ||
1671 | if (unlikely(this->class->usage_mask & (1 << bad_bit))) | ||
1672 | return print_usage_bug(curr, this, bad_bit, new_bit); | ||
1673 | return 1; | ||
1674 | } | ||
1675 | |||
1676 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
1677 | enum lock_usage_bit new_bit); | ||
1678 | |||
1428 | #ifdef CONFIG_TRACE_IRQFLAGS | 1679 | #ifdef CONFIG_TRACE_IRQFLAGS |
1429 | 1680 | ||
1430 | /* | 1681 | /* |
@@ -1518,90 +1769,30 @@ void print_irqtrace_events(struct task_struct *curr) | |||
1518 | print_ip_sym(curr->softirq_disable_ip); | 1769 | print_ip_sym(curr->softirq_disable_ip); |
1519 | } | 1770 | } |
1520 | 1771 | ||
1521 | #endif | 1772 | static int hardirq_verbose(struct lock_class *class) |
1522 | |||
1523 | static int | ||
1524 | print_usage_bug(struct task_struct *curr, struct held_lock *this, | ||
1525 | enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) | ||
1526 | { | 1773 | { |
1527 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1774 | #if HARDIRQ_VERBOSE |
1528 | return 0; | 1775 | return class_filter(class); |
1529 | 1776 | #endif | |
1530 | printk("\n=================================\n"); | ||
1531 | printk( "[ INFO: inconsistent lock state ]\n"); | ||
1532 | print_kernel_version(); | ||
1533 | printk( "---------------------------------\n"); | ||
1534 | |||
1535 | printk("inconsistent {%s} -> {%s} usage.\n", | ||
1536 | usage_str[prev_bit], usage_str[new_bit]); | ||
1537 | |||
1538 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", | ||
1539 | curr->comm, curr->pid, | ||
1540 | trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, | ||
1541 | trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, | ||
1542 | trace_hardirqs_enabled(curr), | ||
1543 | trace_softirqs_enabled(curr)); | ||
1544 | print_lock(this); | ||
1545 | |||
1546 | printk("{%s} state was registered at:\n", usage_str[prev_bit]); | ||
1547 | print_stack_trace(this->class->usage_traces + prev_bit, 1); | ||
1548 | |||
1549 | print_irqtrace_events(curr); | ||
1550 | printk("\nother info that might help us debug this:\n"); | ||
1551 | lockdep_print_held_locks(curr); | ||
1552 | |||
1553 | printk("\nstack backtrace:\n"); | ||
1554 | dump_stack(); | ||
1555 | |||
1556 | return 0; | 1777 | return 0; |
1557 | } | 1778 | } |
1558 | 1779 | ||
1559 | /* | 1780 | static int softirq_verbose(struct lock_class *class) |
1560 | * Print out an error if an invalid bit is set: | ||
1561 | */ | ||
1562 | static inline int | ||
1563 | valid_state(struct task_struct *curr, struct held_lock *this, | ||
1564 | enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) | ||
1565 | { | 1781 | { |
1566 | if (unlikely(this->class->usage_mask & (1 << bad_bit))) | 1782 | #if SOFTIRQ_VERBOSE |
1567 | return print_usage_bug(curr, this, bad_bit, new_bit); | 1783 | return class_filter(class); |
1568 | return 1; | 1784 | #endif |
1785 | return 0; | ||
1569 | } | 1786 | } |
1570 | 1787 | ||
1571 | #define STRICT_READ_CHECKS 1 | 1788 | #define STRICT_READ_CHECKS 1 |
1572 | 1789 | ||
1573 | /* | 1790 | static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, |
1574 | * Mark a lock with a usage bit, and validate the state transition: | 1791 | enum lock_usage_bit new_bit) |
1575 | */ | ||
1576 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
1577 | enum lock_usage_bit new_bit) | ||
1578 | { | 1792 | { |
1579 | unsigned int new_mask = 1 << new_bit, ret = 1; | 1793 | int ret = 1; |
1580 | |||
1581 | /* | ||
1582 | * If already set then do not dirty the cacheline, | ||
1583 | * nor do any checks: | ||
1584 | */ | ||
1585 | if (likely(this->class->usage_mask & new_mask)) | ||
1586 | return 1; | ||
1587 | |||
1588 | if (!graph_lock()) | ||
1589 | return 0; | ||
1590 | /* | ||
1591 | * Make sure we didnt race: | ||
1592 | */ | ||
1593 | if (unlikely(this->class->usage_mask & new_mask)) { | ||
1594 | graph_unlock(); | ||
1595 | return 1; | ||
1596 | } | ||
1597 | |||
1598 | this->class->usage_mask |= new_mask; | ||
1599 | 1794 | ||
1600 | if (!save_trace(this->class->usage_traces + new_bit)) | 1795 | switch(new_bit) { |
1601 | return 0; | ||
1602 | |||
1603 | switch (new_bit) { | ||
1604 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1605 | case LOCK_USED_IN_HARDIRQ: | 1796 | case LOCK_USED_IN_HARDIRQ: |
1606 | if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) | 1797 | if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) |
1607 | return 0; | 1798 | return 0; |
@@ -1760,37 +1951,14 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
1760 | if (softirq_verbose(this->class)) | 1951 | if (softirq_verbose(this->class)) |
1761 | ret = 2; | 1952 | ret = 2; |
1762 | break; | 1953 | break; |
1763 | #endif | ||
1764 | case LOCK_USED: | ||
1765 | /* | ||
1766 | * Add it to the global list of classes: | ||
1767 | */ | ||
1768 | list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); | ||
1769 | debug_atomic_dec(&nr_unused_locks); | ||
1770 | break; | ||
1771 | default: | 1954 | default: |
1772 | if (!debug_locks_off_graph_unlock()) | ||
1773 | return 0; | ||
1774 | WARN_ON(1); | 1955 | WARN_ON(1); |
1775 | return 0; | 1956 | break; |
1776 | } | ||
1777 | |||
1778 | graph_unlock(); | ||
1779 | |||
1780 | /* | ||
1781 | * We must printk outside of the graph_lock: | ||
1782 | */ | ||
1783 | if (ret == 2) { | ||
1784 | printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); | ||
1785 | print_lock(this); | ||
1786 | print_irqtrace_events(curr); | ||
1787 | dump_stack(); | ||
1788 | } | 1957 | } |
1789 | 1958 | ||
1790 | return ret; | 1959 | return ret; |
1791 | } | 1960 | } |
1792 | 1961 | ||
1793 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1794 | /* | 1962 | /* |
1795 | * Mark all held locks with a usage bit: | 1963 | * Mark all held locks with a usage bit: |
1796 | */ | 1964 | */ |
@@ -1973,9 +2141,176 @@ void trace_softirqs_off(unsigned long ip) | |||
1973 | debug_atomic_inc(&redundant_softirqs_off); | 2141 | debug_atomic_inc(&redundant_softirqs_off); |
1974 | } | 2142 | } |
1975 | 2143 | ||
2144 | static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) | ||
2145 | { | ||
2146 | /* | ||
2147 | * If non-trylock use in a hardirq or softirq context, then | ||
2148 | * mark the lock as used in these contexts: | ||
2149 | */ | ||
2150 | if (!hlock->trylock) { | ||
2151 | if (hlock->read) { | ||
2152 | if (curr->hardirq_context) | ||
2153 | if (!mark_lock(curr, hlock, | ||
2154 | LOCK_USED_IN_HARDIRQ_READ)) | ||
2155 | return 0; | ||
2156 | if (curr->softirq_context) | ||
2157 | if (!mark_lock(curr, hlock, | ||
2158 | LOCK_USED_IN_SOFTIRQ_READ)) | ||
2159 | return 0; | ||
2160 | } else { | ||
2161 | if (curr->hardirq_context) | ||
2162 | if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) | ||
2163 | return 0; | ||
2164 | if (curr->softirq_context) | ||
2165 | if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) | ||
2166 | return 0; | ||
2167 | } | ||
2168 | } | ||
2169 | if (!hlock->hardirqs_off) { | ||
2170 | if (hlock->read) { | ||
2171 | if (!mark_lock(curr, hlock, | ||
2172 | LOCK_ENABLED_HARDIRQS_READ)) | ||
2173 | return 0; | ||
2174 | if (curr->softirqs_enabled) | ||
2175 | if (!mark_lock(curr, hlock, | ||
2176 | LOCK_ENABLED_SOFTIRQS_READ)) | ||
2177 | return 0; | ||
2178 | } else { | ||
2179 | if (!mark_lock(curr, hlock, | ||
2180 | LOCK_ENABLED_HARDIRQS)) | ||
2181 | return 0; | ||
2182 | if (curr->softirqs_enabled) | ||
2183 | if (!mark_lock(curr, hlock, | ||
2184 | LOCK_ENABLED_SOFTIRQS)) | ||
2185 | return 0; | ||
2186 | } | ||
2187 | } | ||
2188 | |||
2189 | return 1; | ||
2190 | } | ||
2191 | |||
2192 | static int separate_irq_context(struct task_struct *curr, | ||
2193 | struct held_lock *hlock) | ||
2194 | { | ||
2195 | unsigned int depth = curr->lockdep_depth; | ||
2196 | |||
2197 | /* | ||
2198 | * Keep track of points where we cross into an interrupt context: | ||
2199 | */ | ||
2200 | hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + | ||
2201 | curr->softirq_context; | ||
2202 | if (depth) { | ||
2203 | struct held_lock *prev_hlock; | ||
2204 | |||
2205 | prev_hlock = curr->held_locks + depth-1; | ||
2206 | /* | ||
2207 | * If we cross into another context, reset the | ||
2208 | * hash key (this also prevents the checking and the | ||
2209 | * adding of the dependency to 'prev'): | ||
2210 | */ | ||
2211 | if (prev_hlock->irq_context != hlock->irq_context) | ||
2212 | return 1; | ||
2213 | } | ||
2214 | return 0; | ||
2215 | } | ||
2216 | |||
2217 | #else | ||
2218 | |||
2219 | static inline | ||
2220 | int mark_lock_irq(struct task_struct *curr, struct held_lock *this, | ||
2221 | enum lock_usage_bit new_bit) | ||
2222 | { | ||
2223 | WARN_ON(1); | ||
2224 | return 1; | ||
2225 | } | ||
2226 | |||
2227 | static inline int mark_irqflags(struct task_struct *curr, | ||
2228 | struct held_lock *hlock) | ||
2229 | { | ||
2230 | return 1; | ||
2231 | } | ||
2232 | |||
2233 | static inline int separate_irq_context(struct task_struct *curr, | ||
2234 | struct held_lock *hlock) | ||
2235 | { | ||
2236 | return 0; | ||
2237 | } | ||
2238 | |||
1976 | #endif | 2239 | #endif |
1977 | 2240 | ||
1978 | /* | 2241 | /* |
2242 | * Mark a lock with a usage bit, and validate the state transition: | ||
2243 | */ | ||
2244 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
2245 | enum lock_usage_bit new_bit) | ||
2246 | { | ||
2247 | unsigned int new_mask = 1 << new_bit, ret = 1; | ||
2248 | |||
2249 | /* | ||
2250 | * If already set then do not dirty the cacheline, | ||
2251 | * nor do any checks: | ||
2252 | */ | ||
2253 | if (likely(this->class->usage_mask & new_mask)) | ||
2254 | return 1; | ||
2255 | |||
2256 | if (!graph_lock()) | ||
2257 | return 0; | ||
2258 | /* | ||
2259 | * Make sure we didnt race: | ||
2260 | */ | ||
2261 | if (unlikely(this->class->usage_mask & new_mask)) { | ||
2262 | graph_unlock(); | ||
2263 | return 1; | ||
2264 | } | ||
2265 | |||
2266 | this->class->usage_mask |= new_mask; | ||
2267 | |||
2268 | if (!save_trace(this->class->usage_traces + new_bit)) | ||
2269 | return 0; | ||
2270 | |||
2271 | switch (new_bit) { | ||
2272 | case LOCK_USED_IN_HARDIRQ: | ||
2273 | case LOCK_USED_IN_SOFTIRQ: | ||
2274 | case LOCK_USED_IN_HARDIRQ_READ: | ||
2275 | case LOCK_USED_IN_SOFTIRQ_READ: | ||
2276 | case LOCK_ENABLED_HARDIRQS: | ||
2277 | case LOCK_ENABLED_SOFTIRQS: | ||
2278 | case LOCK_ENABLED_HARDIRQS_READ: | ||
2279 | case LOCK_ENABLED_SOFTIRQS_READ: | ||
2280 | ret = mark_lock_irq(curr, this, new_bit); | ||
2281 | if (!ret) | ||
2282 | return 0; | ||
2283 | break; | ||
2284 | case LOCK_USED: | ||
2285 | /* | ||
2286 | * Add it to the global list of classes: | ||
2287 | */ | ||
2288 | list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); | ||
2289 | debug_atomic_dec(&nr_unused_locks); | ||
2290 | break; | ||
2291 | default: | ||
2292 | if (!debug_locks_off_graph_unlock()) | ||
2293 | return 0; | ||
2294 | WARN_ON(1); | ||
2295 | return 0; | ||
2296 | } | ||
2297 | |||
2298 | graph_unlock(); | ||
2299 | |||
2300 | /* | ||
2301 | * We must printk outside of the graph_lock: | ||
2302 | */ | ||
2303 | if (ret == 2) { | ||
2304 | printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); | ||
2305 | print_lock(this); | ||
2306 | print_irqtrace_events(curr); | ||
2307 | dump_stack(); | ||
2308 | } | ||
2309 | |||
2310 | return ret; | ||
2311 | } | ||
2312 | |||
2313 | /* | ||
1979 | * Initialize a lock instance's lock-class mapping info: | 2314 | * Initialize a lock instance's lock-class mapping info: |
1980 | */ | 2315 | */ |
1981 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 2316 | void lockdep_init_map(struct lockdep_map *lock, const char *name, |
@@ -1999,6 +2334,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
1999 | lock->name = name; | 2334 | lock->name = name; |
2000 | lock->key = key; | 2335 | lock->key = key; |
2001 | lock->class_cache = NULL; | 2336 | lock->class_cache = NULL; |
2337 | #ifdef CONFIG_LOCK_STAT | ||
2338 | lock->cpu = raw_smp_processor_id(); | ||
2339 | #endif | ||
2002 | if (subclass) | 2340 | if (subclass) |
2003 | register_lock_class(lock, subclass, 1); | 2341 | register_lock_class(lock, subclass, 1); |
2004 | } | 2342 | } |
@@ -2020,6 +2358,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2020 | int chain_head = 0; | 2358 | int chain_head = 0; |
2021 | u64 chain_key; | 2359 | u64 chain_key; |
2022 | 2360 | ||
2361 | if (!prove_locking) | ||
2362 | check = 1; | ||
2363 | |||
2023 | if (unlikely(!debug_locks)) | 2364 | if (unlikely(!debug_locks)) |
2024 | return 0; | 2365 | return 0; |
2025 | 2366 | ||
@@ -2070,57 +2411,18 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2070 | hlock->read = read; | 2411 | hlock->read = read; |
2071 | hlock->check = check; | 2412 | hlock->check = check; |
2072 | hlock->hardirqs_off = hardirqs_off; | 2413 | hlock->hardirqs_off = hardirqs_off; |
2073 | 2414 | #ifdef CONFIG_LOCK_STAT | |
2074 | if (check != 2) | 2415 | hlock->waittime_stamp = 0; |
2075 | goto out_calc_hash; | 2416 | hlock->holdtime_stamp = sched_clock(); |
2076 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
2077 | /* | ||
2078 | * If non-trylock use in a hardirq or softirq context, then | ||
2079 | * mark the lock as used in these contexts: | ||
2080 | */ | ||
2081 | if (!trylock) { | ||
2082 | if (read) { | ||
2083 | if (curr->hardirq_context) | ||
2084 | if (!mark_lock(curr, hlock, | ||
2085 | LOCK_USED_IN_HARDIRQ_READ)) | ||
2086 | return 0; | ||
2087 | if (curr->softirq_context) | ||
2088 | if (!mark_lock(curr, hlock, | ||
2089 | LOCK_USED_IN_SOFTIRQ_READ)) | ||
2090 | return 0; | ||
2091 | } else { | ||
2092 | if (curr->hardirq_context) | ||
2093 | if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) | ||
2094 | return 0; | ||
2095 | if (curr->softirq_context) | ||
2096 | if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) | ||
2097 | return 0; | ||
2098 | } | ||
2099 | } | ||
2100 | if (!hardirqs_off) { | ||
2101 | if (read) { | ||
2102 | if (!mark_lock(curr, hlock, | ||
2103 | LOCK_ENABLED_HARDIRQS_READ)) | ||
2104 | return 0; | ||
2105 | if (curr->softirqs_enabled) | ||
2106 | if (!mark_lock(curr, hlock, | ||
2107 | LOCK_ENABLED_SOFTIRQS_READ)) | ||
2108 | return 0; | ||
2109 | } else { | ||
2110 | if (!mark_lock(curr, hlock, | ||
2111 | LOCK_ENABLED_HARDIRQS)) | ||
2112 | return 0; | ||
2113 | if (curr->softirqs_enabled) | ||
2114 | if (!mark_lock(curr, hlock, | ||
2115 | LOCK_ENABLED_SOFTIRQS)) | ||
2116 | return 0; | ||
2117 | } | ||
2118 | } | ||
2119 | #endif | 2417 | #endif |
2418 | |||
2419 | if (check == 2 && !mark_irqflags(curr, hlock)) | ||
2420 | return 0; | ||
2421 | |||
2120 | /* mark it as used: */ | 2422 | /* mark it as used: */ |
2121 | if (!mark_lock(curr, hlock, LOCK_USED)) | 2423 | if (!mark_lock(curr, hlock, LOCK_USED)) |
2122 | return 0; | 2424 | return 0; |
2123 | out_calc_hash: | 2425 | |
2124 | /* | 2426 | /* |
2125 | * Calculate the chain hash: it's the combined has of all the | 2427 | * Calculate the chain hash: it's the combined has of all the |
2126 | * lock keys along the dependency chain. We save the hash value | 2428 | * lock keys along the dependency chain. We save the hash value |
@@ -2143,77 +2445,15 @@ out_calc_hash: | |||
2143 | } | 2445 | } |
2144 | 2446 | ||
2145 | hlock->prev_chain_key = chain_key; | 2447 | hlock->prev_chain_key = chain_key; |
2146 | 2448 | if (separate_irq_context(curr, hlock)) { | |
2147 | #ifdef CONFIG_TRACE_IRQFLAGS | 2449 | chain_key = 0; |
2148 | /* | 2450 | chain_head = 1; |
2149 | * Keep track of points where we cross into an interrupt context: | ||
2150 | */ | ||
2151 | hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + | ||
2152 | curr->softirq_context; | ||
2153 | if (depth) { | ||
2154 | struct held_lock *prev_hlock; | ||
2155 | |||
2156 | prev_hlock = curr->held_locks + depth-1; | ||
2157 | /* | ||
2158 | * If we cross into another context, reset the | ||
2159 | * hash key (this also prevents the checking and the | ||
2160 | * adding of the dependency to 'prev'): | ||
2161 | */ | ||
2162 | if (prev_hlock->irq_context != hlock->irq_context) { | ||
2163 | chain_key = 0; | ||
2164 | chain_head = 1; | ||
2165 | } | ||
2166 | } | 2451 | } |
2167 | #endif | ||
2168 | chain_key = iterate_chain_key(chain_key, id); | 2452 | chain_key = iterate_chain_key(chain_key, id); |
2169 | curr->curr_chain_key = chain_key; | 2453 | curr->curr_chain_key = chain_key; |
2170 | 2454 | ||
2171 | /* | 2455 | if (!validate_chain(curr, lock, hlock, chain_head)) |
2172 | * Trylock needs to maintain the stack of held locks, but it | 2456 | return 0; |
2173 | * does not add new dependencies, because trylock can be done | ||
2174 | * in any order. | ||
2175 | * | ||
2176 | * We look up the chain_key and do the O(N^2) check and update of | ||
2177 | * the dependencies only if this is a new dependency chain. | ||
2178 | * (If lookup_chain_cache() returns with 1 it acquires | ||
2179 | * graph_lock for us) | ||
2180 | */ | ||
2181 | if (!trylock && (check == 2) && lookup_chain_cache(chain_key, class)) { | ||
2182 | /* | ||
2183 | * Check whether last held lock: | ||
2184 | * | ||
2185 | * - is irq-safe, if this lock is irq-unsafe | ||
2186 | * - is softirq-safe, if this lock is hardirq-unsafe | ||
2187 | * | ||
2188 | * And check whether the new lock's dependency graph | ||
2189 | * could lead back to the previous lock. | ||
2190 | * | ||
2191 | * any of these scenarios could lead to a deadlock. If | ||
2192 | * All validations | ||
2193 | */ | ||
2194 | int ret = check_deadlock(curr, hlock, lock, read); | ||
2195 | |||
2196 | if (!ret) | ||
2197 | return 0; | ||
2198 | /* | ||
2199 | * Mark recursive read, as we jump over it when | ||
2200 | * building dependencies (just like we jump over | ||
2201 | * trylock entries): | ||
2202 | */ | ||
2203 | if (ret == 2) | ||
2204 | hlock->read = 2; | ||
2205 | /* | ||
2206 | * Add dependency only if this lock is not the head | ||
2207 | * of the chain, and if it's not a secondary read-lock: | ||
2208 | */ | ||
2209 | if (!chain_head && ret != 2) | ||
2210 | if (!check_prevs_add(curr, hlock)) | ||
2211 | return 0; | ||
2212 | graph_unlock(); | ||
2213 | } else | ||
2214 | /* after lookup_chain_cache(): */ | ||
2215 | if (unlikely(!debug_locks)) | ||
2216 | return 0; | ||
2217 | 2457 | ||
2218 | curr->lockdep_depth++; | 2458 | curr->lockdep_depth++; |
2219 | check_chain_key(curr); | 2459 | check_chain_key(curr); |
@@ -2315,6 +2555,8 @@ lock_release_non_nested(struct task_struct *curr, | |||
2315 | return print_unlock_inbalance_bug(curr, lock, ip); | 2555 | return print_unlock_inbalance_bug(curr, lock, ip); |
2316 | 2556 | ||
2317 | found_it: | 2557 | found_it: |
2558 | lock_release_holdtime(hlock); | ||
2559 | |||
2318 | /* | 2560 | /* |
2319 | * We have the right lock to unlock, 'hlock' points to it. | 2561 | * We have the right lock to unlock, 'hlock' points to it. |
2320 | * Now we remove it from the stack, and add back the other | 2562 | * Now we remove it from the stack, and add back the other |
@@ -2367,6 +2609,8 @@ static int lock_release_nested(struct task_struct *curr, | |||
2367 | 2609 | ||
2368 | curr->curr_chain_key = hlock->prev_chain_key; | 2610 | curr->curr_chain_key = hlock->prev_chain_key; |
2369 | 2611 | ||
2612 | lock_release_holdtime(hlock); | ||
2613 | |||
2370 | #ifdef CONFIG_DEBUG_LOCKDEP | 2614 | #ifdef CONFIG_DEBUG_LOCKDEP |
2371 | hlock->prev_chain_key = 0; | 2615 | hlock->prev_chain_key = 0; |
2372 | hlock->class = NULL; | 2616 | hlock->class = NULL; |
@@ -2441,6 +2685,9 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2441 | { | 2685 | { |
2442 | unsigned long flags; | 2686 | unsigned long flags; |
2443 | 2687 | ||
2688 | if (unlikely(!lock_stat && !prove_locking)) | ||
2689 | return; | ||
2690 | |||
2444 | if (unlikely(current->lockdep_recursion)) | 2691 | if (unlikely(current->lockdep_recursion)) |
2445 | return; | 2692 | return; |
2446 | 2693 | ||
@@ -2460,6 +2707,9 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | |||
2460 | { | 2707 | { |
2461 | unsigned long flags; | 2708 | unsigned long flags; |
2462 | 2709 | ||
2710 | if (unlikely(!lock_stat && !prove_locking)) | ||
2711 | return; | ||
2712 | |||
2463 | if (unlikely(current->lockdep_recursion)) | 2713 | if (unlikely(current->lockdep_recursion)) |
2464 | return; | 2714 | return; |
2465 | 2715 | ||
@@ -2473,6 +2723,166 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | |||
2473 | 2723 | ||
2474 | EXPORT_SYMBOL_GPL(lock_release); | 2724 | EXPORT_SYMBOL_GPL(lock_release); |
2475 | 2725 | ||
2726 | #ifdef CONFIG_LOCK_STAT | ||
2727 | static int | ||
2728 | print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | ||
2729 | unsigned long ip) | ||
2730 | { | ||
2731 | if (!debug_locks_off()) | ||
2732 | return 0; | ||
2733 | if (debug_locks_silent) | ||
2734 | return 0; | ||
2735 | |||
2736 | printk("\n=================================\n"); | ||
2737 | printk( "[ BUG: bad contention detected! ]\n"); | ||
2738 | printk( "---------------------------------\n"); | ||
2739 | printk("%s/%d is trying to contend lock (", | ||
2740 | curr->comm, curr->pid); | ||
2741 | print_lockdep_cache(lock); | ||
2742 | printk(") at:\n"); | ||
2743 | print_ip_sym(ip); | ||
2744 | printk("but there are no locks held!\n"); | ||
2745 | printk("\nother info that might help us debug this:\n"); | ||
2746 | lockdep_print_held_locks(curr); | ||
2747 | |||
2748 | printk("\nstack backtrace:\n"); | ||
2749 | dump_stack(); | ||
2750 | |||
2751 | return 0; | ||
2752 | } | ||
2753 | |||
2754 | static void | ||
2755 | __lock_contended(struct lockdep_map *lock, unsigned long ip) | ||
2756 | { | ||
2757 | struct task_struct *curr = current; | ||
2758 | struct held_lock *hlock, *prev_hlock; | ||
2759 | struct lock_class_stats *stats; | ||
2760 | unsigned int depth; | ||
2761 | int i, point; | ||
2762 | |||
2763 | depth = curr->lockdep_depth; | ||
2764 | if (DEBUG_LOCKS_WARN_ON(!depth)) | ||
2765 | return; | ||
2766 | |||
2767 | prev_hlock = NULL; | ||
2768 | for (i = depth-1; i >= 0; i--) { | ||
2769 | hlock = curr->held_locks + i; | ||
2770 | /* | ||
2771 | * We must not cross into another context: | ||
2772 | */ | ||
2773 | if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) | ||
2774 | break; | ||
2775 | if (hlock->instance == lock) | ||
2776 | goto found_it; | ||
2777 | prev_hlock = hlock; | ||
2778 | } | ||
2779 | print_lock_contention_bug(curr, lock, ip); | ||
2780 | return; | ||
2781 | |||
2782 | found_it: | ||
2783 | hlock->waittime_stamp = sched_clock(); | ||
2784 | |||
2785 | point = lock_contention_point(hlock->class, ip); | ||
2786 | |||
2787 | stats = get_lock_stats(hlock->class); | ||
2788 | if (point < ARRAY_SIZE(stats->contention_point)) | ||
2789 | stats->contention_point[i]++; | ||
2790 | if (lock->cpu != smp_processor_id()) | ||
2791 | stats->bounces[bounce_contended + !!hlock->read]++; | ||
2792 | put_lock_stats(stats); | ||
2793 | } | ||
2794 | |||
2795 | static void | ||
2796 | __lock_acquired(struct lockdep_map *lock) | ||
2797 | { | ||
2798 | struct task_struct *curr = current; | ||
2799 | struct held_lock *hlock, *prev_hlock; | ||
2800 | struct lock_class_stats *stats; | ||
2801 | unsigned int depth; | ||
2802 | u64 now; | ||
2803 | s64 waittime = 0; | ||
2804 | int i, cpu; | ||
2805 | |||
2806 | depth = curr->lockdep_depth; | ||
2807 | if (DEBUG_LOCKS_WARN_ON(!depth)) | ||
2808 | return; | ||
2809 | |||
2810 | prev_hlock = NULL; | ||
2811 | for (i = depth-1; i >= 0; i--) { | ||
2812 | hlock = curr->held_locks + i; | ||
2813 | /* | ||
2814 | * We must not cross into another context: | ||
2815 | */ | ||
2816 | if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) | ||
2817 | break; | ||
2818 | if (hlock->instance == lock) | ||
2819 | goto found_it; | ||
2820 | prev_hlock = hlock; | ||
2821 | } | ||
2822 | print_lock_contention_bug(curr, lock, _RET_IP_); | ||
2823 | return; | ||
2824 | |||
2825 | found_it: | ||
2826 | cpu = smp_processor_id(); | ||
2827 | if (hlock->waittime_stamp) { | ||
2828 | now = sched_clock(); | ||
2829 | waittime = now - hlock->waittime_stamp; | ||
2830 | hlock->holdtime_stamp = now; | ||
2831 | } | ||
2832 | |||
2833 | stats = get_lock_stats(hlock->class); | ||
2834 | if (waittime) { | ||
2835 | if (hlock->read) | ||
2836 | lock_time_inc(&stats->read_waittime, waittime); | ||
2837 | else | ||
2838 | lock_time_inc(&stats->write_waittime, waittime); | ||
2839 | } | ||
2840 | if (lock->cpu != cpu) | ||
2841 | stats->bounces[bounce_acquired + !!hlock->read]++; | ||
2842 | put_lock_stats(stats); | ||
2843 | |||
2844 | lock->cpu = cpu; | ||
2845 | } | ||
2846 | |||
2847 | void lock_contended(struct lockdep_map *lock, unsigned long ip) | ||
2848 | { | ||
2849 | unsigned long flags; | ||
2850 | |||
2851 | if (unlikely(!lock_stat)) | ||
2852 | return; | ||
2853 | |||
2854 | if (unlikely(current->lockdep_recursion)) | ||
2855 | return; | ||
2856 | |||
2857 | raw_local_irq_save(flags); | ||
2858 | check_flags(flags); | ||
2859 | current->lockdep_recursion = 1; | ||
2860 | __lock_contended(lock, ip); | ||
2861 | current->lockdep_recursion = 0; | ||
2862 | raw_local_irq_restore(flags); | ||
2863 | } | ||
2864 | EXPORT_SYMBOL_GPL(lock_contended); | ||
2865 | |||
2866 | void lock_acquired(struct lockdep_map *lock) | ||
2867 | { | ||
2868 | unsigned long flags; | ||
2869 | |||
2870 | if (unlikely(!lock_stat)) | ||
2871 | return; | ||
2872 | |||
2873 | if (unlikely(current->lockdep_recursion)) | ||
2874 | return; | ||
2875 | |||
2876 | raw_local_irq_save(flags); | ||
2877 | check_flags(flags); | ||
2878 | current->lockdep_recursion = 1; | ||
2879 | __lock_acquired(lock); | ||
2880 | current->lockdep_recursion = 0; | ||
2881 | raw_local_irq_restore(flags); | ||
2882 | } | ||
2883 | EXPORT_SYMBOL_GPL(lock_acquired); | ||
2884 | #endif | ||
2885 | |||
2476 | /* | 2886 | /* |
2477 | * Used by the testsuite, sanitize the validator state | 2887 | * Used by the testsuite, sanitize the validator state |
2478 | * after a simulated failure: | 2888 | * after a simulated failure: |
@@ -2636,8 +3046,11 @@ void __init lockdep_info(void) | |||
2636 | sizeof(struct held_lock) * MAX_LOCK_DEPTH); | 3046 | sizeof(struct held_lock) * MAX_LOCK_DEPTH); |
2637 | 3047 | ||
2638 | #ifdef CONFIG_DEBUG_LOCKDEP | 3048 | #ifdef CONFIG_DEBUG_LOCKDEP |
2639 | if (lockdep_init_error) | 3049 | if (lockdep_init_error) { |
2640 | printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n"); | 3050 | printk("WARNING: lockdep init error! Arch code didn't call lockdep_init() early enough?\n"); |
3051 | printk("Call stack leading to lockdep invocation was:\n"); | ||
3052 | print_stack_trace(&lockdep_init_trace, 0); | ||
3053 | } | ||
2641 | #endif | 3054 | #endif |
2642 | } | 3055 | } |
2643 | 3056 | ||
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 58f35e586e..9f17af4a24 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -5,7 +5,8 @@ | |||
5 | * | 5 | * |
6 | * Started by Ingo Molnar: | 6 | * Started by Ingo Molnar: |
7 | * | 7 | * |
8 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
9 | * | 10 | * |
10 | * Code for /proc/lockdep and /proc/lockdep_stats: | 11 | * Code for /proc/lockdep and /proc/lockdep_stats: |
11 | * | 12 | * |
@@ -15,6 +16,10 @@ | |||
15 | #include <linux/seq_file.h> | 16 | #include <linux/seq_file.h> |
16 | #include <linux/kallsyms.h> | 17 | #include <linux/kallsyms.h> |
17 | #include <linux/debug_locks.h> | 18 | #include <linux/debug_locks.h> |
19 | #include <linux/vmalloc.h> | ||
20 | #include <linux/sort.h> | ||
21 | #include <asm/uaccess.h> | ||
22 | #include <asm/div64.h> | ||
18 | 23 | ||
19 | #include "lockdep_internals.h" | 24 | #include "lockdep_internals.h" |
20 | 25 | ||
@@ -271,8 +276,10 @@ static int lockdep_stats_show(struct seq_file *m, void *v) | |||
271 | if (nr_list_entries) | 276 | if (nr_list_entries) |
272 | factor = sum_forward_deps / nr_list_entries; | 277 | factor = sum_forward_deps / nr_list_entries; |
273 | 278 | ||
279 | #ifdef CONFIG_PROVE_LOCKING | ||
274 | seq_printf(m, " dependency chains: %11lu [max: %lu]\n", | 280 | seq_printf(m, " dependency chains: %11lu [max: %lu]\n", |
275 | nr_lock_chains, MAX_LOCKDEP_CHAINS); | 281 | nr_lock_chains, MAX_LOCKDEP_CHAINS); |
282 | #endif | ||
276 | 283 | ||
277 | #ifdef CONFIG_TRACE_IRQFLAGS | 284 | #ifdef CONFIG_TRACE_IRQFLAGS |
278 | seq_printf(m, " in-hardirq chains: %11u\n", | 285 | seq_printf(m, " in-hardirq chains: %11u\n", |
@@ -342,6 +349,292 @@ static const struct file_operations proc_lockdep_stats_operations = { | |||
342 | .release = seq_release, | 349 | .release = seq_release, |
343 | }; | 350 | }; |
344 | 351 | ||
352 | #ifdef CONFIG_LOCK_STAT | ||
353 | |||
354 | struct lock_stat_data { | ||
355 | struct lock_class *class; | ||
356 | struct lock_class_stats stats; | ||
357 | }; | ||
358 | |||
359 | struct lock_stat_seq { | ||
360 | struct lock_stat_data *iter; | ||
361 | struct lock_stat_data *iter_end; | ||
362 | struct lock_stat_data stats[MAX_LOCKDEP_KEYS]; | ||
363 | }; | ||
364 | |||
365 | /* | ||
366 | * sort on absolute number of contentions | ||
367 | */ | ||
368 | static int lock_stat_cmp(const void *l, const void *r) | ||
369 | { | ||
370 | const struct lock_stat_data *dl = l, *dr = r; | ||
371 | unsigned long nl, nr; | ||
372 | |||
373 | nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr; | ||
374 | nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr; | ||
375 | |||
376 | return nr - nl; | ||
377 | } | ||
378 | |||
379 | static void seq_line(struct seq_file *m, char c, int offset, int length) | ||
380 | { | ||
381 | int i; | ||
382 | |||
383 | for (i = 0; i < offset; i++) | ||
384 | seq_puts(m, " "); | ||
385 | for (i = 0; i < length; i++) | ||
386 | seq_printf(m, "%c", c); | ||
387 | seq_puts(m, "\n"); | ||
388 | } | ||
389 | |||
390 | static void snprint_time(char *buf, size_t bufsiz, s64 nr) | ||
391 | { | ||
392 | unsigned long rem; | ||
393 | |||
394 | rem = do_div(nr, 1000); /* XXX: do_div_signed */ | ||
395 | snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10); | ||
396 | } | ||
397 | |||
398 | static void seq_time(struct seq_file *m, s64 time) | ||
399 | { | ||
400 | char num[15]; | ||
401 | |||
402 | snprint_time(num, sizeof(num), time); | ||
403 | seq_printf(m, " %14s", num); | ||
404 | } | ||
405 | |||
406 | static void seq_lock_time(struct seq_file *m, struct lock_time *lt) | ||
407 | { | ||
408 | seq_printf(m, "%14lu", lt->nr); | ||
409 | seq_time(m, lt->min); | ||
410 | seq_time(m, lt->max); | ||
411 | seq_time(m, lt->total); | ||
412 | } | ||
413 | |||
414 | static void seq_stats(struct seq_file *m, struct lock_stat_data *data) | ||
415 | { | ||
416 | char name[39]; | ||
417 | struct lock_class *class; | ||
418 | struct lock_class_stats *stats; | ||
419 | int i, namelen; | ||
420 | |||
421 | class = data->class; | ||
422 | stats = &data->stats; | ||
423 | |||
424 | namelen = 38; | ||
425 | if (class->name_version > 1) | ||
426 | namelen -= 2; /* XXX truncates versions > 9 */ | ||
427 | if (class->subclass) | ||
428 | namelen -= 2; | ||
429 | |||
430 | if (!class->name) { | ||
431 | char str[KSYM_NAME_LEN]; | ||
432 | const char *key_name; | ||
433 | |||
434 | key_name = __get_key_name(class->key, str); | ||
435 | snprintf(name, namelen, "%s", key_name); | ||
436 | } else { | ||
437 | snprintf(name, namelen, "%s", class->name); | ||
438 | } | ||
439 | namelen = strlen(name); | ||
440 | if (class->name_version > 1) { | ||
441 | snprintf(name+namelen, 3, "#%d", class->name_version); | ||
442 | namelen += 2; | ||
443 | } | ||
444 | if (class->subclass) { | ||
445 | snprintf(name+namelen, 3, "/%d", class->subclass); | ||
446 | namelen += 2; | ||
447 | } | ||
448 | |||
449 | if (stats->write_holdtime.nr) { | ||
450 | if (stats->read_holdtime.nr) | ||
451 | seq_printf(m, "%38s-W:", name); | ||
452 | else | ||
453 | seq_printf(m, "%40s:", name); | ||
454 | |||
455 | seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]); | ||
456 | seq_lock_time(m, &stats->write_waittime); | ||
457 | seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]); | ||
458 | seq_lock_time(m, &stats->write_holdtime); | ||
459 | seq_puts(m, "\n"); | ||
460 | } | ||
461 | |||
462 | if (stats->read_holdtime.nr) { | ||
463 | seq_printf(m, "%38s-R:", name); | ||
464 | seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]); | ||
465 | seq_lock_time(m, &stats->read_waittime); | ||
466 | seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]); | ||
467 | seq_lock_time(m, &stats->read_holdtime); | ||
468 | seq_puts(m, "\n"); | ||
469 | } | ||
470 | |||
471 | if (stats->read_waittime.nr + stats->write_waittime.nr == 0) | ||
472 | return; | ||
473 | |||
474 | if (stats->read_holdtime.nr) | ||
475 | namelen += 2; | ||
476 | |||
477 | for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { | ||
478 | char sym[KSYM_SYMBOL_LEN]; | ||
479 | char ip[32]; | ||
480 | |||
481 | if (class->contention_point[i] == 0) | ||
482 | break; | ||
483 | |||
484 | if (!i) | ||
485 | seq_line(m, '-', 40-namelen, namelen); | ||
486 | |||
487 | sprint_symbol(sym, class->contention_point[i]); | ||
488 | snprintf(ip, sizeof(ip), "[<%p>]", | ||
489 | (void *)class->contention_point[i]); | ||
490 | seq_printf(m, "%40s %14lu %29s %s\n", name, | ||
491 | stats->contention_point[i], | ||
492 | ip, sym); | ||
493 | } | ||
494 | if (i) { | ||
495 | seq_puts(m, "\n"); | ||
496 | seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); | ||
497 | seq_puts(m, "\n"); | ||
498 | } | ||
499 | } | ||
500 | |||
501 | static void seq_header(struct seq_file *m) | ||
502 | { | ||
503 | seq_printf(m, "lock_stat version 0.2\n"); | ||
504 | seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); | ||
505 | seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " | ||
506 | "%14s %14s\n", | ||
507 | "class name", | ||
508 | "con-bounces", | ||
509 | "contentions", | ||
510 | "waittime-min", | ||
511 | "waittime-max", | ||
512 | "waittime-total", | ||
513 | "acq-bounces", | ||
514 | "acquisitions", | ||
515 | "holdtime-min", | ||
516 | "holdtime-max", | ||
517 | "holdtime-total"); | ||
518 | seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); | ||
519 | seq_printf(m, "\n"); | ||
520 | } | ||
521 | |||
522 | static void *ls_start(struct seq_file *m, loff_t *pos) | ||
523 | { | ||
524 | struct lock_stat_seq *data = m->private; | ||
525 | |||
526 | if (data->iter == data->stats) | ||
527 | seq_header(m); | ||
528 | |||
529 | if (data->iter == data->iter_end) | ||
530 | data->iter = NULL; | ||
531 | |||
532 | return data->iter; | ||
533 | } | ||
534 | |||
535 | static void *ls_next(struct seq_file *m, void *v, loff_t *pos) | ||
536 | { | ||
537 | struct lock_stat_seq *data = m->private; | ||
538 | |||
539 | (*pos)++; | ||
540 | |||
541 | data->iter = v; | ||
542 | data->iter++; | ||
543 | if (data->iter == data->iter_end) | ||
544 | data->iter = NULL; | ||
545 | |||
546 | return data->iter; | ||
547 | } | ||
548 | |||
549 | static void ls_stop(struct seq_file *m, void *v) | ||
550 | { | ||
551 | } | ||
552 | |||
553 | static int ls_show(struct seq_file *m, void *v) | ||
554 | { | ||
555 | struct lock_stat_seq *data = m->private; | ||
556 | |||
557 | seq_stats(m, data->iter); | ||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | static struct seq_operations lockstat_ops = { | ||
562 | .start = ls_start, | ||
563 | .next = ls_next, | ||
564 | .stop = ls_stop, | ||
565 | .show = ls_show, | ||
566 | }; | ||
567 | |||
568 | static int lock_stat_open(struct inode *inode, struct file *file) | ||
569 | { | ||
570 | int res; | ||
571 | struct lock_class *class; | ||
572 | struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq)); | ||
573 | |||
574 | if (!data) | ||
575 | return -ENOMEM; | ||
576 | |||
577 | res = seq_open(file, &lockstat_ops); | ||
578 | if (!res) { | ||
579 | struct lock_stat_data *iter = data->stats; | ||
580 | struct seq_file *m = file->private_data; | ||
581 | |||
582 | data->iter = iter; | ||
583 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | ||
584 | iter->class = class; | ||
585 | iter->stats = lock_stats(class); | ||
586 | iter++; | ||
587 | } | ||
588 | data->iter_end = iter; | ||
589 | |||
590 | sort(data->stats, data->iter_end - data->iter, | ||
591 | sizeof(struct lock_stat_data), | ||
592 | lock_stat_cmp, NULL); | ||
593 | |||
594 | m->private = data; | ||
595 | } else | ||
596 | vfree(data); | ||
597 | |||
598 | return res; | ||
599 | } | ||
600 | |||
601 | static ssize_t lock_stat_write(struct file *file, const char __user *buf, | ||
602 | size_t count, loff_t *ppos) | ||
603 | { | ||
604 | struct lock_class *class; | ||
605 | char c; | ||
606 | |||
607 | if (count) { | ||
608 | if (get_user(c, buf)) | ||
609 | return -EFAULT; | ||
610 | |||
611 | if (c != '0') | ||
612 | return count; | ||
613 | |||
614 | list_for_each_entry(class, &all_lock_classes, lock_entry) | ||
615 | clear_lock_stats(class); | ||
616 | } | ||
617 | return count; | ||
618 | } | ||
619 | |||
620 | static int lock_stat_release(struct inode *inode, struct file *file) | ||
621 | { | ||
622 | struct seq_file *seq = file->private_data; | ||
623 | |||
624 | vfree(seq->private); | ||
625 | seq->private = NULL; | ||
626 | return seq_release(inode, file); | ||
627 | } | ||
628 | |||
629 | static const struct file_operations proc_lock_stat_operations = { | ||
630 | .open = lock_stat_open, | ||
631 | .write = lock_stat_write, | ||
632 | .read = seq_read, | ||
633 | .llseek = seq_lseek, | ||
634 | .release = lock_stat_release, | ||
635 | }; | ||
636 | #endif /* CONFIG_LOCK_STAT */ | ||
637 | |||
345 | static int __init lockdep_proc_init(void) | 638 | static int __init lockdep_proc_init(void) |
346 | { | 639 | { |
347 | struct proc_dir_entry *entry; | 640 | struct proc_dir_entry *entry; |
@@ -354,6 +647,12 @@ static int __init lockdep_proc_init(void) | |||
354 | if (entry) | 647 | if (entry) |
355 | entry->proc_fops = &proc_lockdep_stats_operations; | 648 | entry->proc_fops = &proc_lockdep_stats_operations; |
356 | 649 | ||
650 | #ifdef CONFIG_LOCK_STAT | ||
651 | entry = create_proc_entry("lock_stat", S_IRUSR, NULL); | ||
652 | if (entry) | ||
653 | entry->proc_fops = &proc_lock_stat_operations; | ||
654 | #endif | ||
655 | |||
357 | return 0; | 656 | return 0; |
358 | } | 657 | } |
359 | 658 | ||
diff --git a/kernel/module.c b/kernel/module.c index 539fed9ac8..33c04ad511 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2133,7 +2133,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) | |||
2133 | sym = get_ksymbol(mod, addr, NULL, NULL); | 2133 | sym = get_ksymbol(mod, addr, NULL, NULL); |
2134 | if (!sym) | 2134 | if (!sym) |
2135 | goto out; | 2135 | goto out; |
2136 | strlcpy(symname, sym, KSYM_NAME_LEN + 1); | 2136 | strlcpy(symname, sym, KSYM_NAME_LEN); |
2137 | mutex_unlock(&module_mutex); | 2137 | mutex_unlock(&module_mutex); |
2138 | return 0; | 2138 | return 0; |
2139 | } | 2139 | } |
@@ -2158,9 +2158,9 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, | |||
2158 | if (!sym) | 2158 | if (!sym) |
2159 | goto out; | 2159 | goto out; |
2160 | if (modname) | 2160 | if (modname) |
2161 | strlcpy(modname, mod->name, MODULE_NAME_LEN + 1); | 2161 | strlcpy(modname, mod->name, MODULE_NAME_LEN); |
2162 | if (name) | 2162 | if (name) |
2163 | strlcpy(name, sym, KSYM_NAME_LEN + 1); | 2163 | strlcpy(name, sym, KSYM_NAME_LEN); |
2164 | mutex_unlock(&module_mutex); | 2164 | mutex_unlock(&module_mutex); |
2165 | return 0; | 2165 | return 0; |
2166 | } | 2166 | } |
@@ -2181,8 +2181,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, | |||
2181 | *value = mod->symtab[symnum].st_value; | 2181 | *value = mod->symtab[symnum].st_value; |
2182 | *type = mod->symtab[symnum].st_info; | 2182 | *type = mod->symtab[symnum].st_info; |
2183 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, | 2183 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, |
2184 | KSYM_NAME_LEN + 1); | 2184 | KSYM_NAME_LEN); |
2185 | strlcpy(module_name, mod->name, MODULE_NAME_LEN + 1); | 2185 | strlcpy(module_name, mod->name, MODULE_NAME_LEN); |
2186 | *exported = is_exported(name, mod); | 2186 | *exported = is_exported(name, mod); |
2187 | mutex_unlock(&module_mutex); | 2187 | mutex_unlock(&module_mutex); |
2188 | return 0; | 2188 | return 0; |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 303eab1848..691b86564d 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -139,6 +139,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) | |||
139 | list_add_tail(&waiter.list, &lock->wait_list); | 139 | list_add_tail(&waiter.list, &lock->wait_list); |
140 | waiter.task = task; | 140 | waiter.task = task; |
141 | 141 | ||
142 | old_val = atomic_xchg(&lock->count, -1); | ||
143 | if (old_val == 1) | ||
144 | goto done; | ||
145 | |||
146 | lock_contended(&lock->dep_map, _RET_IP_); | ||
147 | |||
142 | for (;;) { | 148 | for (;;) { |
143 | /* | 149 | /* |
144 | * Lets try to take the lock again - this is needed even if | 150 | * Lets try to take the lock again - this is needed even if |
@@ -174,6 +180,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) | |||
174 | spin_lock_mutex(&lock->wait_lock, flags); | 180 | spin_lock_mutex(&lock->wait_lock, flags); |
175 | } | 181 | } |
176 | 182 | ||
183 | done: | ||
184 | lock_acquired(&lock->dep_map); | ||
177 | /* got the lock - rejoice! */ | 185 | /* got the lock - rejoice! */ |
178 | mutex_remove_waiter(lock, &waiter, task_thread_info(task)); | 186 | mutex_remove_waiter(lock, &waiter, task_thread_info(task)); |
179 | debug_mutex_set_owner(lock, task_thread_info(task)); | 187 | debug_mutex_set_owner(lock, task_thread_info(task)); |
diff --git a/kernel/panic.c b/kernel/panic.c index 623d182825..f64f4c1ac1 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -159,14 +159,15 @@ const char *print_tainted(void) | |||
159 | { | 159 | { |
160 | static char buf[20]; | 160 | static char buf[20]; |
161 | if (tainted) { | 161 | if (tainted) { |
162 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c", | 162 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c", |
163 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', | 163 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', |
164 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', | 164 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', |
165 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', | 165 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', |
166 | tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', | 166 | tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', |
167 | tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', | 167 | tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', |
168 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', | 168 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', |
169 | tainted & TAINT_USER ? 'U' : ' '); | 169 | tainted & TAINT_USER ? 'U' : ' ', |
170 | tainted & TAINT_DIE ? 'D' : ' '); | ||
170 | } | 171 | } |
171 | else | 172 | else |
172 | snprintf(buf, sizeof(buf), "Not tainted"); | 173 | snprintf(buf, sizeof(buf), "Not tainted"); |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 495b7d4dd3..7358609e47 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -33,13 +33,20 @@ config PM_DEBUG | |||
33 | bool "Power Management Debug Support" | 33 | bool "Power Management Debug Support" |
34 | depends on PM | 34 | depends on PM |
35 | ---help--- | 35 | ---help--- |
36 | This option enables verbose debugging support in the Power Management | 36 | This option enables various debugging support in the Power Management |
37 | code. This is helpful when debugging and reporting various PM bugs, | 37 | code. This is helpful when debugging and reporting PM bugs, like |
38 | like suspend support. | 38 | suspend support. |
39 | |||
40 | config PM_VERBOSE | ||
41 | bool "Verbose Power Management debugging" | ||
42 | depends on PM_DEBUG | ||
43 | default n | ||
44 | ---help--- | ||
45 | This option enables verbose messages from the Power Management code. | ||
39 | 46 | ||
40 | config DISABLE_CONSOLE_SUSPEND | 47 | config DISABLE_CONSOLE_SUSPEND |
41 | bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" | 48 | bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" |
42 | depends on PM && PM_DEBUG | 49 | depends on PM_DEBUG |
43 | default n | 50 | default n |
44 | ---help--- | 51 | ---help--- |
45 | This option turns off the console suspend mechanism that prevents | 52 | This option turns off the console suspend mechanism that prevents |
@@ -50,7 +57,7 @@ config DISABLE_CONSOLE_SUSPEND | |||
50 | 57 | ||
51 | config PM_TRACE | 58 | config PM_TRACE |
52 | bool "Suspend/resume event tracing" | 59 | bool "Suspend/resume event tracing" |
53 | depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL | 60 | depends on PM_DEBUG && X86_32 && EXPERIMENTAL |
54 | default n | 61 | default n |
55 | ---help--- | 62 | ---help--- |
56 | This enables some cheesy code to save the last PM event point in the | 63 | This enables some cheesy code to save the last PM event point in the |
@@ -65,18 +72,6 @@ config PM_TRACE | |||
65 | CAUTION: this option will cause your machine's real-time clock to be | 72 | CAUTION: this option will cause your machine's real-time clock to be |
66 | set to an invalid time after a resume. | 73 | set to an invalid time after a resume. |
67 | 74 | ||
68 | config PM_SYSFS_DEPRECATED | ||
69 | bool "Driver model /sys/devices/.../power/state files (DEPRECATED)" | ||
70 | depends on PM && SYSFS | ||
71 | default n | ||
72 | help | ||
73 | The driver model started out with a sysfs file intended to provide | ||
74 | a userspace hook for device power management. This feature has never | ||
75 | worked very well, except for limited testing purposes, and so it will | ||
76 | be removed. It's not clear that a generic mechanism could really | ||
77 | handle the wide variability of device power states; any replacements | ||
78 | are likely to be bus or driver specific. | ||
79 | |||
80 | config SOFTWARE_SUSPEND | 75 | config SOFTWARE_SUSPEND |
81 | bool "Software Suspend (Hibernation)" | 76 | bool "Software Suspend (Hibernation)" |
82 | depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) | 77 | depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f445b9cd60..324ac0188c 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -45,7 +45,7 @@ enum { | |||
45 | 45 | ||
46 | static int hibernation_mode = HIBERNATION_SHUTDOWN; | 46 | static int hibernation_mode = HIBERNATION_SHUTDOWN; |
47 | 47 | ||
48 | struct hibernation_ops *hibernation_ops; | 48 | static struct hibernation_ops *hibernation_ops; |
49 | 49 | ||
50 | /** | 50 | /** |
51 | * hibernation_set_ops - set the global hibernate operations | 51 | * hibernation_set_ops - set the global hibernate operations |
@@ -54,7 +54,8 @@ struct hibernation_ops *hibernation_ops; | |||
54 | 54 | ||
55 | void hibernation_set_ops(struct hibernation_ops *ops) | 55 | void hibernation_set_ops(struct hibernation_ops *ops) |
56 | { | 56 | { |
57 | if (ops && !(ops->prepare && ops->enter && ops->finish)) { | 57 | if (ops && !(ops->prepare && ops->enter && ops->finish |
58 | && ops->pre_restore && ops->restore_cleanup)) { | ||
58 | WARN_ON(1); | 59 | WARN_ON(1); |
59 | return; | 60 | return; |
60 | } | 61 | } |
@@ -74,9 +75,9 @@ void hibernation_set_ops(struct hibernation_ops *ops) | |||
74 | * platform driver if so configured and return an error code if it fails | 75 | * platform driver if so configured and return an error code if it fails |
75 | */ | 76 | */ |
76 | 77 | ||
77 | static int platform_prepare(void) | 78 | static int platform_prepare(int platform_mode) |
78 | { | 79 | { |
79 | return (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) ? | 80 | return (platform_mode && hibernation_ops) ? |
80 | hibernation_ops->prepare() : 0; | 81 | hibernation_ops->prepare() : 0; |
81 | } | 82 | } |
82 | 83 | ||
@@ -85,13 +86,145 @@ static int platform_prepare(void) | |||
85 | * using the platform driver (must be called after platform_prepare()) | 86 | * using the platform driver (must be called after platform_prepare()) |
86 | */ | 87 | */ |
87 | 88 | ||
88 | static void platform_finish(void) | 89 | static void platform_finish(int platform_mode) |
89 | { | 90 | { |
90 | if (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) | 91 | if (platform_mode && hibernation_ops) |
91 | hibernation_ops->finish(); | 92 | hibernation_ops->finish(); |
92 | } | 93 | } |
93 | 94 | ||
94 | /** | 95 | /** |
96 | * platform_pre_restore - prepare the platform for the restoration from a | ||
97 | * hibernation image. If the restore fails after this function has been | ||
98 | * called, platform_restore_cleanup() must be called. | ||
99 | */ | ||
100 | |||
101 | static int platform_pre_restore(int platform_mode) | ||
102 | { | ||
103 | return (platform_mode && hibernation_ops) ? | ||
104 | hibernation_ops->pre_restore() : 0; | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * platform_restore_cleanup - switch the platform to the normal mode of | ||
109 | * operation after a failing restore. If platform_pre_restore() has been | ||
110 | * called before the failing restore, this function must be called too, | ||
111 | * regardless of the result of platform_pre_restore(). | ||
112 | */ | ||
113 | |||
114 | static void platform_restore_cleanup(int platform_mode) | ||
115 | { | ||
116 | if (platform_mode && hibernation_ops) | ||
117 | hibernation_ops->restore_cleanup(); | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * hibernation_snapshot - quiesce devices and create the hibernation | ||
122 | * snapshot image. | ||
123 | * @platform_mode - if set, use the platform driver, if available, to | ||
124 | * prepare the platform frimware for the power transition. | ||
125 | * | ||
126 | * Must be called with pm_mutex held | ||
127 | */ | ||
128 | |||
129 | int hibernation_snapshot(int platform_mode) | ||
130 | { | ||
131 | int error; | ||
132 | |||
133 | /* Free memory before shutting down devices. */ | ||
134 | error = swsusp_shrink_memory(); | ||
135 | if (error) | ||
136 | return error; | ||
137 | |||
138 | suspend_console(); | ||
139 | error = device_suspend(PMSG_FREEZE); | ||
140 | if (error) | ||
141 | goto Resume_console; | ||
142 | |||
143 | error = platform_prepare(platform_mode); | ||
144 | if (error) | ||
145 | goto Resume_devices; | ||
146 | |||
147 | error = disable_nonboot_cpus(); | ||
148 | if (!error) { | ||
149 | if (hibernation_mode != HIBERNATION_TEST) { | ||
150 | in_suspend = 1; | ||
151 | error = swsusp_suspend(); | ||
152 | /* Control returns here after successful restore */ | ||
153 | } else { | ||
154 | printk("swsusp debug: Waiting for 5 seconds.\n"); | ||
155 | mdelay(5000); | ||
156 | } | ||
157 | } | ||
158 | enable_nonboot_cpus(); | ||
159 | Resume_devices: | ||
160 | platform_finish(platform_mode); | ||
161 | device_resume(); | ||
162 | Resume_console: | ||
163 | resume_console(); | ||
164 | return error; | ||
165 | } | ||
166 | |||
167 | /** | ||
168 | * hibernation_restore - quiesce devices and restore the hibernation | ||
169 | * snapshot image. If successful, control returns in hibernation_snaphot() | ||
170 | * @platform_mode - if set, use the platform driver, if available, to | ||
171 | * prepare the platform frimware for the transition. | ||
172 | * | ||
173 | * Must be called with pm_mutex held | ||
174 | */ | ||
175 | |||
176 | int hibernation_restore(int platform_mode) | ||
177 | { | ||
178 | int error; | ||
179 | |||
180 | pm_prepare_console(); | ||
181 | suspend_console(); | ||
182 | error = device_suspend(PMSG_PRETHAW); | ||
183 | if (error) | ||
184 | goto Finish; | ||
185 | |||
186 | error = platform_pre_restore(platform_mode); | ||
187 | if (!error) { | ||
188 | error = disable_nonboot_cpus(); | ||
189 | if (!error) | ||
190 | error = swsusp_resume(); | ||
191 | enable_nonboot_cpus(); | ||
192 | } | ||
193 | platform_restore_cleanup(platform_mode); | ||
194 | device_resume(); | ||
195 | Finish: | ||
196 | resume_console(); | ||
197 | pm_restore_console(); | ||
198 | return error; | ||
199 | } | ||
200 | |||
201 | /** | ||
202 | * hibernation_platform_enter - enter the hibernation state using the | ||
203 | * platform driver (if available) | ||
204 | */ | ||
205 | |||
206 | int hibernation_platform_enter(void) | ||
207 | { | ||
208 | int error; | ||
209 | |||
210 | if (hibernation_ops) { | ||
211 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | ||
212 | /* | ||
213 | * We have cancelled the power transition by running | ||
214 | * hibernation_ops->finish() before saving the image, so we | ||
215 | * should let the firmware know that we're going to enter the | ||
216 | * sleep state after all | ||
217 | */ | ||
218 | error = hibernation_ops->prepare(); | ||
219 | if (!error) | ||
220 | error = hibernation_ops->enter(); | ||
221 | } else { | ||
222 | error = -ENOSYS; | ||
223 | } | ||
224 | return error; | ||
225 | } | ||
226 | |||
227 | /** | ||
95 | * power_down - Shut the machine down for hibernation. | 228 | * power_down - Shut the machine down for hibernation. |
96 | * | 229 | * |
97 | * Use the platform driver, if configured so; otherwise try | 230 | * Use the platform driver, if configured so; otherwise try |
@@ -111,11 +244,7 @@ static void power_down(void) | |||
111 | kernel_restart(NULL); | 244 | kernel_restart(NULL); |
112 | break; | 245 | break; |
113 | case HIBERNATION_PLATFORM: | 246 | case HIBERNATION_PLATFORM: |
114 | if (hibernation_ops) { | 247 | hibernation_platform_enter(); |
115 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | ||
116 | hibernation_ops->enter(); | ||
117 | break; | ||
118 | } | ||
119 | } | 248 | } |
120 | kernel_halt(); | 249 | kernel_halt(); |
121 | /* | 250 | /* |
@@ -152,9 +281,16 @@ int hibernate(void) | |||
152 | { | 281 | { |
153 | int error; | 282 | int error; |
154 | 283 | ||
284 | mutex_lock(&pm_mutex); | ||
155 | /* The snapshot device should not be opened while we're running */ | 285 | /* The snapshot device should not be opened while we're running */ |
156 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) | 286 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { |
157 | return -EBUSY; | 287 | error = -EBUSY; |
288 | goto Unlock; | ||
289 | } | ||
290 | |||
291 | error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); | ||
292 | if (error) | ||
293 | goto Exit; | ||
158 | 294 | ||
159 | /* Allocate memory management structures */ | 295 | /* Allocate memory management structures */ |
160 | error = create_basic_memory_bitmaps(); | 296 | error = create_basic_memory_bitmaps(); |
@@ -165,75 +301,35 @@ int hibernate(void) | |||
165 | if (error) | 301 | if (error) |
166 | goto Finish; | 302 | goto Finish; |
167 | 303 | ||
168 | mutex_lock(&pm_mutex); | ||
169 | if (hibernation_mode == HIBERNATION_TESTPROC) { | 304 | if (hibernation_mode == HIBERNATION_TESTPROC) { |
170 | printk("swsusp debug: Waiting for 5 seconds.\n"); | 305 | printk("swsusp debug: Waiting for 5 seconds.\n"); |
171 | mdelay(5000); | 306 | mdelay(5000); |
172 | goto Thaw; | 307 | goto Thaw; |
173 | } | 308 | } |
309 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); | ||
310 | if (in_suspend && !error) { | ||
311 | unsigned int flags = 0; | ||
174 | 312 | ||
175 | /* Free memory before shutting down devices. */ | 313 | if (hibernation_mode == HIBERNATION_PLATFORM) |
176 | error = swsusp_shrink_memory(); | 314 | flags |= SF_PLATFORM_MODE; |
177 | if (error) | ||
178 | goto Thaw; | ||
179 | |||
180 | error = platform_prepare(); | ||
181 | if (error) | ||
182 | goto Thaw; | ||
183 | |||
184 | suspend_console(); | ||
185 | error = device_suspend(PMSG_FREEZE); | ||
186 | if (error) { | ||
187 | printk(KERN_ERR "PM: Some devices failed to suspend\n"); | ||
188 | goto Resume_devices; | ||
189 | } | ||
190 | error = disable_nonboot_cpus(); | ||
191 | if (error) | ||
192 | goto Enable_cpus; | ||
193 | |||
194 | if (hibernation_mode == HIBERNATION_TEST) { | ||
195 | printk("swsusp debug: Waiting for 5 seconds.\n"); | ||
196 | mdelay(5000); | ||
197 | goto Enable_cpus; | ||
198 | } | ||
199 | |||
200 | pr_debug("PM: snapshotting memory.\n"); | ||
201 | in_suspend = 1; | ||
202 | error = swsusp_suspend(); | ||
203 | if (error) | ||
204 | goto Enable_cpus; | ||
205 | |||
206 | if (in_suspend) { | ||
207 | enable_nonboot_cpus(); | ||
208 | platform_finish(); | ||
209 | device_resume(); | ||
210 | resume_console(); | ||
211 | pr_debug("PM: writing image.\n"); | 315 | pr_debug("PM: writing image.\n"); |
212 | error = swsusp_write(); | 316 | error = swsusp_write(flags); |
317 | swsusp_free(); | ||
213 | if (!error) | 318 | if (!error) |
214 | power_down(); | 319 | power_down(); |
215 | else { | ||
216 | swsusp_free(); | ||
217 | goto Thaw; | ||
218 | } | ||
219 | } else { | 320 | } else { |
220 | pr_debug("PM: Image restored successfully.\n"); | 321 | pr_debug("PM: Image restored successfully.\n"); |
322 | swsusp_free(); | ||
221 | } | 323 | } |
222 | |||
223 | swsusp_free(); | ||
224 | Enable_cpus: | ||
225 | enable_nonboot_cpus(); | ||
226 | Resume_devices: | ||
227 | platform_finish(); | ||
228 | device_resume(); | ||
229 | resume_console(); | ||
230 | Thaw: | 324 | Thaw: |
231 | mutex_unlock(&pm_mutex); | ||
232 | unprepare_processes(); | 325 | unprepare_processes(); |
233 | Finish: | 326 | Finish: |
234 | free_basic_memory_bitmaps(); | 327 | free_basic_memory_bitmaps(); |
235 | Exit: | 328 | Exit: |
329 | pm_notifier_call_chain(PM_POST_HIBERNATION); | ||
236 | atomic_inc(&snapshot_device_available); | 330 | atomic_inc(&snapshot_device_available); |
331 | Unlock: | ||
332 | mutex_unlock(&pm_mutex); | ||
237 | return error; | 333 | return error; |
238 | } | 334 | } |
239 | 335 | ||
@@ -253,6 +349,7 @@ int hibernate(void) | |||
253 | static int software_resume(void) | 349 | static int software_resume(void) |
254 | { | 350 | { |
255 | int error; | 351 | int error; |
352 | unsigned int flags; | ||
256 | 353 | ||
257 | mutex_lock(&pm_mutex); | 354 | mutex_lock(&pm_mutex); |
258 | if (!swsusp_resume_device) { | 355 | if (!swsusp_resume_device) { |
@@ -300,30 +397,12 @@ static int software_resume(void) | |||
300 | 397 | ||
301 | pr_debug("PM: Reading swsusp image.\n"); | 398 | pr_debug("PM: Reading swsusp image.\n"); |
302 | 399 | ||
303 | error = swsusp_read(); | 400 | error = swsusp_read(&flags); |
304 | if (error) { | ||
305 | swsusp_free(); | ||
306 | goto Thaw; | ||
307 | } | ||
308 | |||
309 | pr_debug("PM: Preparing devices for restore.\n"); | ||
310 | |||
311 | suspend_console(); | ||
312 | error = device_suspend(PMSG_PRETHAW); | ||
313 | if (error) | ||
314 | goto Free; | ||
315 | |||
316 | error = disable_nonboot_cpus(); | ||
317 | if (!error) | 401 | if (!error) |
318 | swsusp_resume(); | 402 | hibernation_restore(flags & SF_PLATFORM_MODE); |
319 | 403 | ||
320 | enable_nonboot_cpus(); | ||
321 | Free: | ||
322 | swsusp_free(); | ||
323 | device_resume(); | ||
324 | resume_console(); | ||
325 | Thaw: | ||
326 | printk(KERN_ERR "PM: Restore failed, recovering.\n"); | 404 | printk(KERN_ERR "PM: Restore failed, recovering.\n"); |
405 | swsusp_free(); | ||
327 | unprepare_processes(); | 406 | unprepare_processes(); |
328 | Done: | 407 | Done: |
329 | free_basic_memory_bitmaps(); | 408 | free_basic_memory_bitmaps(); |
@@ -333,7 +412,7 @@ static int software_resume(void) | |||
333 | Unlock: | 412 | Unlock: |
334 | mutex_unlock(&pm_mutex); | 413 | mutex_unlock(&pm_mutex); |
335 | pr_debug("PM: Resume from disk failed.\n"); | 414 | pr_debug("PM: Resume from disk failed.\n"); |
336 | return 0; | 415 | return error; |
337 | } | 416 | } |
338 | 417 | ||
339 | late_initcall(software_resume); | 418 | late_initcall(software_resume); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index fc45ed2262..32147b57c3 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -23,6 +23,8 @@ | |||
23 | 23 | ||
24 | #include "power.h" | 24 | #include "power.h" |
25 | 25 | ||
26 | BLOCKING_NOTIFIER_HEAD(pm_chain_head); | ||
27 | |||
26 | /*This is just an arbitrary number */ | 28 | /*This is just an arbitrary number */ |
27 | #define FREE_PAGE_NUMBER (100) | 29 | #define FREE_PAGE_NUMBER (100) |
28 | 30 | ||
@@ -63,14 +65,11 @@ static inline void pm_finish(suspend_state_t state) | |||
63 | 65 | ||
64 | /** | 66 | /** |
65 | * suspend_prepare - Do prep work before entering low-power state. | 67 | * suspend_prepare - Do prep work before entering low-power state. |
66 | * @state: State we're entering. | ||
67 | * | 68 | * |
68 | * This is common code that is called for each state that we're | 69 | * This is common code that is called for each state that we're entering. |
69 | * entering. Allocate a console, stop all processes, then make sure | 70 | * Run suspend notifiers, allocate a console and stop all processes. |
70 | * the platform can enter the requested state. | ||
71 | */ | 71 | */ |
72 | 72 | static int suspend_prepare(void) | |
73 | static int suspend_prepare(suspend_state_t state) | ||
74 | { | 73 | { |
75 | int error; | 74 | int error; |
76 | unsigned int free_pages; | 75 | unsigned int free_pages; |
@@ -78,6 +77,10 @@ static int suspend_prepare(suspend_state_t state) | |||
78 | if (!pm_ops || !pm_ops->enter) | 77 | if (!pm_ops || !pm_ops->enter) |
79 | return -EPERM; | 78 | return -EPERM; |
80 | 79 | ||
80 | error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); | ||
81 | if (error) | ||
82 | goto Finish; | ||
83 | |||
81 | pm_prepare_console(); | 84 | pm_prepare_console(); |
82 | 85 | ||
83 | if (freeze_processes()) { | 86 | if (freeze_processes()) { |
@@ -85,46 +88,23 @@ static int suspend_prepare(suspend_state_t state) | |||
85 | goto Thaw; | 88 | goto Thaw; |
86 | } | 89 | } |
87 | 90 | ||
88 | if ((free_pages = global_page_state(NR_FREE_PAGES)) | 91 | free_pages = global_page_state(NR_FREE_PAGES); |
89 | < FREE_PAGE_NUMBER) { | 92 | if (free_pages < FREE_PAGE_NUMBER) { |
90 | pr_debug("PM: free some memory\n"); | 93 | pr_debug("PM: free some memory\n"); |
91 | shrink_all_memory(FREE_PAGE_NUMBER - free_pages); | 94 | shrink_all_memory(FREE_PAGE_NUMBER - free_pages); |
92 | if (nr_free_pages() < FREE_PAGE_NUMBER) { | 95 | if (nr_free_pages() < FREE_PAGE_NUMBER) { |
93 | error = -ENOMEM; | 96 | error = -ENOMEM; |
94 | printk(KERN_ERR "PM: No enough memory\n"); | 97 | printk(KERN_ERR "PM: No enough memory\n"); |
95 | goto Thaw; | ||
96 | } | 98 | } |
97 | } | 99 | } |
98 | |||
99 | if (pm_ops->set_target) { | ||
100 | error = pm_ops->set_target(state); | ||
101 | if (error) | ||
102 | goto Thaw; | ||
103 | } | ||
104 | suspend_console(); | ||
105 | error = device_suspend(PMSG_SUSPEND); | ||
106 | if (error) { | ||
107 | printk(KERN_ERR "Some devices failed to suspend\n"); | ||
108 | goto Resume_console; | ||
109 | } | ||
110 | if (pm_ops->prepare) { | ||
111 | if ((error = pm_ops->prepare(state))) | ||
112 | goto Resume_devices; | ||
113 | } | ||
114 | |||
115 | error = disable_nonboot_cpus(); | ||
116 | if (!error) | 100 | if (!error) |
117 | return 0; | 101 | return 0; |
118 | 102 | ||
119 | enable_nonboot_cpus(); | ||
120 | pm_finish(state); | ||
121 | Resume_devices: | ||
122 | device_resume(); | ||
123 | Resume_console: | ||
124 | resume_console(); | ||
125 | Thaw: | 103 | Thaw: |
126 | thaw_processes(); | 104 | thaw_processes(); |
127 | pm_restore_console(); | 105 | pm_restore_console(); |
106 | Finish: | ||
107 | pm_notifier_call_chain(PM_POST_SUSPEND); | ||
128 | return error; | 108 | return error; |
129 | } | 109 | } |
130 | 110 | ||
@@ -140,6 +120,12 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) | |||
140 | local_irq_enable(); | 120 | local_irq_enable(); |
141 | } | 121 | } |
142 | 122 | ||
123 | /** | ||
124 | * suspend_enter - enter the desired system sleep state. | ||
125 | * @state: state to enter | ||
126 | * | ||
127 | * This function should be called after devices have been suspended. | ||
128 | */ | ||
143 | int suspend_enter(suspend_state_t state) | 129 | int suspend_enter(suspend_state_t state) |
144 | { | 130 | { |
145 | int error = 0; | 131 | int error = 0; |
@@ -159,23 +145,58 @@ int suspend_enter(suspend_state_t state) | |||
159 | return error; | 145 | return error; |
160 | } | 146 | } |
161 | 147 | ||
148 | /** | ||
149 | * suspend_devices_and_enter - suspend devices and enter the desired system sleep | ||
150 | * state. | ||
151 | * @state: state to enter | ||
152 | */ | ||
153 | int suspend_devices_and_enter(suspend_state_t state) | ||
154 | { | ||
155 | int error; | ||
156 | |||
157 | if (!pm_ops) | ||
158 | return -ENOSYS; | ||
159 | |||
160 | if (pm_ops->set_target) { | ||
161 | error = pm_ops->set_target(state); | ||
162 | if (error) | ||
163 | return error; | ||
164 | } | ||
165 | suspend_console(); | ||
166 | error = device_suspend(PMSG_SUSPEND); | ||
167 | if (error) { | ||
168 | printk(KERN_ERR "Some devices failed to suspend\n"); | ||
169 | goto Resume_console; | ||
170 | } | ||
171 | if (pm_ops->prepare) { | ||
172 | error = pm_ops->prepare(state); | ||
173 | if (error) | ||
174 | goto Resume_devices; | ||
175 | } | ||
176 | error = disable_nonboot_cpus(); | ||
177 | if (!error) | ||
178 | suspend_enter(state); | ||
179 | |||
180 | enable_nonboot_cpus(); | ||
181 | pm_finish(state); | ||
182 | Resume_devices: | ||
183 | device_resume(); | ||
184 | Resume_console: | ||
185 | resume_console(); | ||
186 | return error; | ||
187 | } | ||
162 | 188 | ||
163 | /** | 189 | /** |
164 | * suspend_finish - Do final work before exiting suspend sequence. | 190 | * suspend_finish - Do final work before exiting suspend sequence. |
165 | * @state: State we're coming out of. | ||
166 | * | 191 | * |
167 | * Call platform code to clean up, restart processes, and free the | 192 | * Call platform code to clean up, restart processes, and free the |
168 | * console that we've allocated. This is not called for suspend-to-disk. | 193 | * console that we've allocated. This is not called for suspend-to-disk. |
169 | */ | 194 | */ |
170 | 195 | static void suspend_finish(void) | |
171 | static void suspend_finish(suspend_state_t state) | ||
172 | { | 196 | { |
173 | enable_nonboot_cpus(); | ||
174 | pm_finish(state); | ||
175 | device_resume(); | ||
176 | resume_console(); | ||
177 | thaw_processes(); | 197 | thaw_processes(); |
178 | pm_restore_console(); | 198 | pm_restore_console(); |
199 | pm_notifier_call_chain(PM_POST_SUSPEND); | ||
179 | } | 200 | } |
180 | 201 | ||
181 | 202 | ||
@@ -207,7 +228,6 @@ static inline int valid_state(suspend_state_t state) | |||
207 | * Then, do the setup for suspend, enter the state, and cleaup (after | 228 | * Then, do the setup for suspend, enter the state, and cleaup (after |
208 | * we've woken up). | 229 | * we've woken up). |
209 | */ | 230 | */ |
210 | |||
211 | static int enter_state(suspend_state_t state) | 231 | static int enter_state(suspend_state_t state) |
212 | { | 232 | { |
213 | int error; | 233 | int error; |
@@ -218,14 +238,14 @@ static int enter_state(suspend_state_t state) | |||
218 | return -EBUSY; | 238 | return -EBUSY; |
219 | 239 | ||
220 | pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); | 240 | pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); |
221 | if ((error = suspend_prepare(state))) | 241 | if ((error = suspend_prepare())) |
222 | goto Unlock; | 242 | goto Unlock; |
223 | 243 | ||
224 | pr_debug("PM: Entering %s sleep\n", pm_states[state]); | 244 | pr_debug("PM: Entering %s sleep\n", pm_states[state]); |
225 | error = suspend_enter(state); | 245 | error = suspend_devices_and_enter(state); |
226 | 246 | ||
227 | pr_debug("PM: Finishing wakeup.\n"); | 247 | pr_debug("PM: Finishing wakeup.\n"); |
228 | suspend_finish(state); | 248 | suspend_finish(); |
229 | Unlock: | 249 | Unlock: |
230 | mutex_unlock(&pm_mutex); | 250 | mutex_unlock(&pm_mutex); |
231 | return error; | 251 | return error; |
diff --git a/kernel/power/power.h b/kernel/power/power.h index 5138148710..5f24c786f8 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -25,7 +25,10 @@ struct swsusp_info { | |||
25 | */ | 25 | */ |
26 | #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) | 26 | #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) |
27 | 27 | ||
28 | extern struct hibernation_ops *hibernation_ops; | 28 | /* kernel/power/disk.c */ |
29 | extern int hibernation_snapshot(int platform_mode); | ||
30 | extern int hibernation_restore(int platform_mode); | ||
31 | extern int hibernation_platform_enter(void); | ||
29 | #endif | 32 | #endif |
30 | 33 | ||
31 | extern int pfn_is_nosave(unsigned long); | 34 | extern int pfn_is_nosave(unsigned long); |
@@ -152,16 +155,34 @@ extern sector_t alloc_swapdev_block(int swap); | |||
152 | extern void free_all_swap_pages(int swap); | 155 | extern void free_all_swap_pages(int swap); |
153 | extern int swsusp_swap_in_use(void); | 156 | extern int swsusp_swap_in_use(void); |
154 | 157 | ||
158 | /* | ||
159 | * Flags that can be passed from the hibernatig hernel to the "boot" kernel in | ||
160 | * the image header. | ||
161 | */ | ||
162 | #define SF_PLATFORM_MODE 1 | ||
163 | |||
164 | /* kernel/power/disk.c */ | ||
155 | extern int swsusp_check(void); | 165 | extern int swsusp_check(void); |
156 | extern int swsusp_shrink_memory(void); | 166 | extern int swsusp_shrink_memory(void); |
157 | extern void swsusp_free(void); | 167 | extern void swsusp_free(void); |
158 | extern int swsusp_suspend(void); | 168 | extern int swsusp_suspend(void); |
159 | extern int swsusp_resume(void); | 169 | extern int swsusp_resume(void); |
160 | extern int swsusp_read(void); | 170 | extern int swsusp_read(unsigned int *flags_p); |
161 | extern int swsusp_write(void); | 171 | extern int swsusp_write(unsigned int flags); |
162 | extern void swsusp_close(void); | 172 | extern void swsusp_close(void); |
163 | extern int suspend_enter(suspend_state_t state); | ||
164 | 173 | ||
165 | struct timeval; | 174 | struct timeval; |
175 | /* kernel/power/swsusp.c */ | ||
166 | extern void swsusp_show_speed(struct timeval *, struct timeval *, | 176 | extern void swsusp_show_speed(struct timeval *, struct timeval *, |
167 | unsigned int, char *); | 177 | unsigned int, char *); |
178 | |||
179 | /* kernel/power/main.c */ | ||
180 | extern int suspend_enter(suspend_state_t state); | ||
181 | extern int suspend_devices_and_enter(suspend_state_t state); | ||
182 | extern struct blocking_notifier_head pm_chain_head; | ||
183 | |||
184 | static inline int pm_notifier_call_chain(unsigned long val) | ||
185 | { | ||
186 | return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) | ||
187 | == NOTIFY_BAD) ? -EINVAL : 0; | ||
188 | } | ||
diff --git a/kernel/power/process.c b/kernel/power/process.c index e0233d8422..3434940a3d 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -40,7 +40,7 @@ static inline void frozen_process(void) | |||
40 | current->flags |= PF_FROZEN; | 40 | current->flags |= PF_FROZEN; |
41 | wmb(); | 41 | wmb(); |
42 | } | 42 | } |
43 | clear_tsk_thread_flag(current, TIF_FREEZE); | 43 | clear_freeze_flag(current); |
44 | } | 44 | } |
45 | 45 | ||
46 | /* Refrigerator is place where frozen processes are stored :-). */ | 46 | /* Refrigerator is place where frozen processes are stored :-). */ |
@@ -72,20 +72,19 @@ void refrigerator(void) | |||
72 | schedule(); | 72 | schedule(); |
73 | } | 73 | } |
74 | pr_debug("%s left refrigerator\n", current->comm); | 74 | pr_debug("%s left refrigerator\n", current->comm); |
75 | current->state = save; | 75 | __set_current_state(save); |
76 | } | 76 | } |
77 | 77 | ||
78 | static inline void freeze_process(struct task_struct *p) | 78 | static void freeze_task(struct task_struct *p) |
79 | { | 79 | { |
80 | unsigned long flags; | 80 | unsigned long flags; |
81 | 81 | ||
82 | if (!freezing(p)) { | 82 | if (!freezing(p)) { |
83 | rmb(); | 83 | rmb(); |
84 | if (!frozen(p)) { | 84 | if (!frozen(p)) { |
85 | set_freeze_flag(p); | ||
85 | if (p->state == TASK_STOPPED) | 86 | if (p->state == TASK_STOPPED) |
86 | force_sig_specific(SIGSTOP, p); | 87 | force_sig_specific(SIGSTOP, p); |
87 | |||
88 | freeze(p); | ||
89 | spin_lock_irqsave(&p->sighand->siglock, flags); | 88 | spin_lock_irqsave(&p->sighand->siglock, flags); |
90 | signal_wake_up(p, p->state == TASK_STOPPED); | 89 | signal_wake_up(p, p->state == TASK_STOPPED); |
91 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 90 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
@@ -99,19 +98,14 @@ static void cancel_freezing(struct task_struct *p) | |||
99 | 98 | ||
100 | if (freezing(p)) { | 99 | if (freezing(p)) { |
101 | pr_debug(" clean up: %s\n", p->comm); | 100 | pr_debug(" clean up: %s\n", p->comm); |
102 | do_not_freeze(p); | 101 | clear_freeze_flag(p); |
103 | spin_lock_irqsave(&p->sighand->siglock, flags); | 102 | spin_lock_irqsave(&p->sighand->siglock, flags); |
104 | recalc_sigpending_and_wake(p); | 103 | recalc_sigpending_and_wake(p); |
105 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 104 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
106 | } | 105 | } |
107 | } | 106 | } |
108 | 107 | ||
109 | static inline int is_user_space(struct task_struct *p) | 108 | static int try_to_freeze_tasks(int freeze_user_space) |
110 | { | ||
111 | return p->mm && !(p->flags & PF_BORROWED_MM); | ||
112 | } | ||
113 | |||
114 | static unsigned int try_to_freeze_tasks(int freeze_user_space) | ||
115 | { | 109 | { |
116 | struct task_struct *g, *p; | 110 | struct task_struct *g, *p; |
117 | unsigned long end_time; | 111 | unsigned long end_time; |
@@ -122,26 +116,40 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) | |||
122 | todo = 0; | 116 | todo = 0; |
123 | read_lock(&tasklist_lock); | 117 | read_lock(&tasklist_lock); |
124 | do_each_thread(g, p) { | 118 | do_each_thread(g, p) { |
125 | if (!freezeable(p)) | 119 | if (frozen(p) || !freezeable(p)) |
126 | continue; | 120 | continue; |
127 | 121 | ||
128 | if (frozen(p)) | 122 | if (freeze_user_space) { |
129 | continue; | 123 | if (p->state == TASK_TRACED && |
130 | 124 | frozen(p->parent)) { | |
131 | if (p->state == TASK_TRACED && frozen(p->parent)) { | 125 | cancel_freezing(p); |
132 | cancel_freezing(p); | 126 | continue; |
133 | continue; | 127 | } |
128 | /* | ||
129 | * Kernel threads should not have TIF_FREEZE set | ||
130 | * at this point, so we must ensure that either | ||
131 | * p->mm is not NULL *and* PF_BORROWED_MM is | ||
132 | * unset, or TIF_FRREZE is left unset. | ||
133 | * The task_lock() is necessary to prevent races | ||
134 | * with exit_mm() or use_mm()/unuse_mm() from | ||
135 | * occuring. | ||
136 | */ | ||
137 | task_lock(p); | ||
138 | if (!p->mm || (p->flags & PF_BORROWED_MM)) { | ||
139 | task_unlock(p); | ||
140 | continue; | ||
141 | } | ||
142 | freeze_task(p); | ||
143 | task_unlock(p); | ||
144 | } else { | ||
145 | freeze_task(p); | ||
134 | } | 146 | } |
135 | if (freeze_user_space && !is_user_space(p)) | ||
136 | continue; | ||
137 | |||
138 | freeze_process(p); | ||
139 | if (!freezer_should_skip(p)) | 147 | if (!freezer_should_skip(p)) |
140 | todo++; | 148 | todo++; |
141 | } while_each_thread(g, p); | 149 | } while_each_thread(g, p); |
142 | read_unlock(&tasklist_lock); | 150 | read_unlock(&tasklist_lock); |
143 | yield(); /* Yield is okay here */ | 151 | yield(); /* Yield is okay here */ |
144 | if (todo && time_after(jiffies, end_time)) | 152 | if (time_after(jiffies, end_time)) |
145 | break; | 153 | break; |
146 | } while (todo); | 154 | } while (todo); |
147 | 155 | ||
@@ -152,49 +160,41 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) | |||
152 | * but it cleans up leftover PF_FREEZE requests. | 160 | * but it cleans up leftover PF_FREEZE requests. |
153 | */ | 161 | */ |
154 | printk("\n"); | 162 | printk("\n"); |
155 | printk(KERN_ERR "Stopping %s timed out after %d seconds " | 163 | printk(KERN_ERR "Freezing of %s timed out after %d seconds " |
156 | "(%d tasks refusing to freeze):\n", | 164 | "(%d tasks refusing to freeze):\n", |
157 | freeze_user_space ? "user space processes" : | 165 | freeze_user_space ? "user space " : "tasks ", |
158 | "kernel threads", | ||
159 | TIMEOUT / HZ, todo); | 166 | TIMEOUT / HZ, todo); |
167 | show_state(); | ||
160 | read_lock(&tasklist_lock); | 168 | read_lock(&tasklist_lock); |
161 | do_each_thread(g, p) { | 169 | do_each_thread(g, p) { |
162 | if (freeze_user_space && !is_user_space(p)) | ||
163 | continue; | ||
164 | |||
165 | task_lock(p); | 170 | task_lock(p); |
166 | if (freezeable(p) && !frozen(p) && | 171 | if (freezing(p) && !freezer_should_skip(p)) |
167 | !freezer_should_skip(p)) | ||
168 | printk(KERN_ERR " %s\n", p->comm); | 172 | printk(KERN_ERR " %s\n", p->comm); |
169 | |||
170 | cancel_freezing(p); | 173 | cancel_freezing(p); |
171 | task_unlock(p); | 174 | task_unlock(p); |
172 | } while_each_thread(g, p); | 175 | } while_each_thread(g, p); |
173 | read_unlock(&tasklist_lock); | 176 | read_unlock(&tasklist_lock); |
174 | } | 177 | } |
175 | 178 | ||
176 | return todo; | 179 | return todo ? -EBUSY : 0; |
177 | } | 180 | } |
178 | 181 | ||
179 | /** | 182 | /** |
180 | * freeze_processes - tell processes to enter the refrigerator | 183 | * freeze_processes - tell processes to enter the refrigerator |
181 | * | ||
182 | * Returns 0 on success, or the number of processes that didn't freeze, | ||
183 | * although they were told to. | ||
184 | */ | 184 | */ |
185 | int freeze_processes(void) | 185 | int freeze_processes(void) |
186 | { | 186 | { |
187 | unsigned int nr_unfrozen; | 187 | int error; |
188 | 188 | ||
189 | printk("Stopping tasks ... "); | 189 | printk("Stopping tasks ... "); |
190 | nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE); | 190 | error = try_to_freeze_tasks(FREEZER_USER_SPACE); |
191 | if (nr_unfrozen) | 191 | if (error) |
192 | return nr_unfrozen; | 192 | return error; |
193 | 193 | ||
194 | sys_sync(); | 194 | sys_sync(); |
195 | nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); | 195 | error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); |
196 | if (nr_unfrozen) | 196 | if (error) |
197 | return nr_unfrozen; | 197 | return error; |
198 | 198 | ||
199 | printk("done.\n"); | 199 | printk("done.\n"); |
200 | BUG_ON(in_atomic()); | 200 | BUG_ON(in_atomic()); |
@@ -210,7 +210,7 @@ static void thaw_tasks(int thaw_user_space) | |||
210 | if (!freezeable(p)) | 210 | if (!freezeable(p)) |
211 | continue; | 211 | continue; |
212 | 212 | ||
213 | if (is_user_space(p) == !thaw_user_space) | 213 | if (!p->mm == thaw_user_space) |
214 | continue; | 214 | continue; |
215 | 215 | ||
216 | thaw_process(p); | 216 | thaw_process(p); |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8b1a1b8371..917aba1005 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -33,8 +33,9 @@ extern char resume_file[]; | |||
33 | #define SWSUSP_SIG "S1SUSPEND" | 33 | #define SWSUSP_SIG "S1SUSPEND" |
34 | 34 | ||
35 | struct swsusp_header { | 35 | struct swsusp_header { |
36 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; | 36 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; |
37 | sector_t image; | 37 | sector_t image; |
38 | unsigned int flags; /* Flags to pass to the "boot" kernel */ | ||
38 | char orig_sig[10]; | 39 | char orig_sig[10]; |
39 | char sig[10]; | 40 | char sig[10]; |
40 | } __attribute__((packed)); | 41 | } __attribute__((packed)); |
@@ -138,7 +139,7 @@ static int wait_on_bio_chain(struct bio **bio_chain) | |||
138 | * Saving part | 139 | * Saving part |
139 | */ | 140 | */ |
140 | 141 | ||
141 | static int mark_swapfiles(sector_t start) | 142 | static int mark_swapfiles(sector_t start, unsigned int flags) |
142 | { | 143 | { |
143 | int error; | 144 | int error; |
144 | 145 | ||
@@ -148,6 +149,7 @@ static int mark_swapfiles(sector_t start) | |||
148 | memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); | 149 | memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); |
149 | memcpy(swsusp_header->sig,SWSUSP_SIG, 10); | 150 | memcpy(swsusp_header->sig,SWSUSP_SIG, 10); |
150 | swsusp_header->image = start; | 151 | swsusp_header->image = start; |
152 | swsusp_header->flags = flags; | ||
151 | error = bio_write_page(swsusp_resume_block, | 153 | error = bio_write_page(swsusp_resume_block, |
152 | swsusp_header, NULL); | 154 | swsusp_header, NULL); |
153 | } else { | 155 | } else { |
@@ -369,6 +371,7 @@ static int enough_swap(unsigned int nr_pages) | |||
369 | 371 | ||
370 | /** | 372 | /** |
371 | * swsusp_write - Write entire image and metadata. | 373 | * swsusp_write - Write entire image and metadata. |
374 | * @flags: flags to pass to the "boot" kernel in the image header | ||
372 | * | 375 | * |
373 | * It is important _NOT_ to umount filesystems at this point. We want | 376 | * It is important _NOT_ to umount filesystems at this point. We want |
374 | * them synced (in case something goes wrong) but we DO not want to mark | 377 | * them synced (in case something goes wrong) but we DO not want to mark |
@@ -376,7 +379,7 @@ static int enough_swap(unsigned int nr_pages) | |||
376 | * correctly, we'll mark system clean, anyway.) | 379 | * correctly, we'll mark system clean, anyway.) |
377 | */ | 380 | */ |
378 | 381 | ||
379 | int swsusp_write(void) | 382 | int swsusp_write(unsigned int flags) |
380 | { | 383 | { |
381 | struct swap_map_handle handle; | 384 | struct swap_map_handle handle; |
382 | struct snapshot_handle snapshot; | 385 | struct snapshot_handle snapshot; |
@@ -415,7 +418,7 @@ int swsusp_write(void) | |||
415 | if (!error) { | 418 | if (!error) { |
416 | flush_swap_writer(&handle); | 419 | flush_swap_writer(&handle); |
417 | printk("S"); | 420 | printk("S"); |
418 | error = mark_swapfiles(start); | 421 | error = mark_swapfiles(start, flags); |
419 | printk("|\n"); | 422 | printk("|\n"); |
420 | } | 423 | } |
421 | } | 424 | } |
@@ -540,13 +543,20 @@ static int load_image(struct swap_map_handle *handle, | |||
540 | return error; | 543 | return error; |
541 | } | 544 | } |
542 | 545 | ||
543 | int swsusp_read(void) | 546 | /** |
547 | * swsusp_read - read the hibernation image. | ||
548 | * @flags_p: flags passed by the "frozen" kernel in the image header should | ||
549 | * be written into this memeory location | ||
550 | */ | ||
551 | |||
552 | int swsusp_read(unsigned int *flags_p) | ||
544 | { | 553 | { |
545 | int error; | 554 | int error; |
546 | struct swap_map_handle handle; | 555 | struct swap_map_handle handle; |
547 | struct snapshot_handle snapshot; | 556 | struct snapshot_handle snapshot; |
548 | struct swsusp_info *header; | 557 | struct swsusp_info *header; |
549 | 558 | ||
559 | *flags_p = swsusp_header->flags; | ||
550 | if (IS_ERR(resume_bdev)) { | 560 | if (IS_ERR(resume_bdev)) { |
551 | pr_debug("swsusp: block device not initialised\n"); | 561 | pr_debug("swsusp: block device not initialised\n"); |
552 | return PTR_ERR(resume_bdev); | 562 | return PTR_ERR(resume_bdev); |
diff --git a/kernel/power/user.c b/kernel/power/user.c index d65305b515..bd0723a7df 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -128,92 +128,6 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, | |||
128 | return res; | 128 | return res; |
129 | } | 129 | } |
130 | 130 | ||
131 | static inline int platform_prepare(void) | ||
132 | { | ||
133 | int error = 0; | ||
134 | |||
135 | if (hibernation_ops) | ||
136 | error = hibernation_ops->prepare(); | ||
137 | |||
138 | return error; | ||
139 | } | ||
140 | |||
141 | static inline void platform_finish(void) | ||
142 | { | ||
143 | if (hibernation_ops) | ||
144 | hibernation_ops->finish(); | ||
145 | } | ||
146 | |||
147 | static inline int snapshot_suspend(int platform_suspend) | ||
148 | { | ||
149 | int error; | ||
150 | |||
151 | mutex_lock(&pm_mutex); | ||
152 | /* Free memory before shutting down devices. */ | ||
153 | error = swsusp_shrink_memory(); | ||
154 | if (error) | ||
155 | goto Finish; | ||
156 | |||
157 | if (platform_suspend) { | ||
158 | error = platform_prepare(); | ||
159 | if (error) | ||
160 | goto Finish; | ||
161 | } | ||
162 | suspend_console(); | ||
163 | error = device_suspend(PMSG_FREEZE); | ||
164 | if (error) | ||
165 | goto Resume_devices; | ||
166 | |||
167 | error = disable_nonboot_cpus(); | ||
168 | if (!error) { | ||
169 | in_suspend = 1; | ||
170 | error = swsusp_suspend(); | ||
171 | } | ||
172 | enable_nonboot_cpus(); | ||
173 | Resume_devices: | ||
174 | if (platform_suspend) | ||
175 | platform_finish(); | ||
176 | |||
177 | device_resume(); | ||
178 | resume_console(); | ||
179 | Finish: | ||
180 | mutex_unlock(&pm_mutex); | ||
181 | return error; | ||
182 | } | ||
183 | |||
184 | static inline int snapshot_restore(int platform_suspend) | ||
185 | { | ||
186 | int error; | ||
187 | |||
188 | mutex_lock(&pm_mutex); | ||
189 | pm_prepare_console(); | ||
190 | if (platform_suspend) { | ||
191 | error = platform_prepare(); | ||
192 | if (error) | ||
193 | goto Finish; | ||
194 | } | ||
195 | suspend_console(); | ||
196 | error = device_suspend(PMSG_PRETHAW); | ||
197 | if (error) | ||
198 | goto Resume_devices; | ||
199 | |||
200 | error = disable_nonboot_cpus(); | ||
201 | if (!error) | ||
202 | error = swsusp_resume(); | ||
203 | |||
204 | enable_nonboot_cpus(); | ||
205 | Resume_devices: | ||
206 | if (platform_suspend) | ||
207 | platform_finish(); | ||
208 | |||
209 | device_resume(); | ||
210 | resume_console(); | ||
211 | Finish: | ||
212 | pm_restore_console(); | ||
213 | mutex_unlock(&pm_mutex); | ||
214 | return error; | ||
215 | } | ||
216 | |||
217 | static int snapshot_ioctl(struct inode *inode, struct file *filp, | 131 | static int snapshot_ioctl(struct inode *inode, struct file *filp, |
218 | unsigned int cmd, unsigned long arg) | 132 | unsigned int cmd, unsigned long arg) |
219 | { | 133 | { |
@@ -237,10 +151,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
237 | if (data->frozen) | 151 | if (data->frozen) |
238 | break; | 152 | break; |
239 | mutex_lock(&pm_mutex); | 153 | mutex_lock(&pm_mutex); |
240 | if (freeze_processes()) { | 154 | error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); |
241 | thaw_processes(); | 155 | if (!error) { |
242 | error = -EBUSY; | 156 | error = freeze_processes(); |
157 | if (error) | ||
158 | thaw_processes(); | ||
243 | } | 159 | } |
160 | if (error) | ||
161 | pm_notifier_call_chain(PM_POST_HIBERNATION); | ||
244 | mutex_unlock(&pm_mutex); | 162 | mutex_unlock(&pm_mutex); |
245 | if (!error) | 163 | if (!error) |
246 | data->frozen = 1; | 164 | data->frozen = 1; |
@@ -251,6 +169,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
251 | break; | 169 | break; |
252 | mutex_lock(&pm_mutex); | 170 | mutex_lock(&pm_mutex); |
253 | thaw_processes(); | 171 | thaw_processes(); |
172 | pm_notifier_call_chain(PM_POST_HIBERNATION); | ||
254 | mutex_unlock(&pm_mutex); | 173 | mutex_unlock(&pm_mutex); |
255 | data->frozen = 0; | 174 | data->frozen = 0; |
256 | break; | 175 | break; |
@@ -260,7 +179,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
260 | error = -EPERM; | 179 | error = -EPERM; |
261 | break; | 180 | break; |
262 | } | 181 | } |
263 | error = snapshot_suspend(data->platform_suspend); | 182 | error = hibernation_snapshot(data->platform_suspend); |
264 | if (!error) | 183 | if (!error) |
265 | error = put_user(in_suspend, (unsigned int __user *)arg); | 184 | error = put_user(in_suspend, (unsigned int __user *)arg); |
266 | if (!error) | 185 | if (!error) |
@@ -274,7 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
274 | error = -EPERM; | 193 | error = -EPERM; |
275 | break; | 194 | break; |
276 | } | 195 | } |
277 | error = snapshot_restore(data->platform_suspend); | 196 | error = hibernation_restore(data->platform_suspend); |
278 | break; | 197 | break; |
279 | 198 | ||
280 | case SNAPSHOT_FREE: | 199 | case SNAPSHOT_FREE: |
@@ -336,47 +255,19 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
336 | break; | 255 | break; |
337 | 256 | ||
338 | case SNAPSHOT_S2RAM: | 257 | case SNAPSHOT_S2RAM: |
339 | if (!pm_ops) { | ||
340 | error = -ENOSYS; | ||
341 | break; | ||
342 | } | ||
343 | |||
344 | if (!data->frozen) { | 258 | if (!data->frozen) { |
345 | error = -EPERM; | 259 | error = -EPERM; |
346 | break; | 260 | break; |
347 | } | 261 | } |
348 | |||
349 | if (!mutex_trylock(&pm_mutex)) { | 262 | if (!mutex_trylock(&pm_mutex)) { |
350 | error = -EBUSY; | 263 | error = -EBUSY; |
351 | break; | 264 | break; |
352 | } | 265 | } |
353 | 266 | /* | |
354 | if (pm_ops->prepare) { | 267 | * Tasks are frozen and the notifiers have been called with |
355 | error = pm_ops->prepare(PM_SUSPEND_MEM); | 268 | * PM_HIBERNATION_PREPARE |
356 | if (error) | 269 | */ |
357 | goto OutS3; | 270 | error = suspend_devices_and_enter(PM_SUSPEND_MEM); |
358 | } | ||
359 | |||
360 | /* Put devices to sleep */ | ||
361 | suspend_console(); | ||
362 | error = device_suspend(PMSG_SUSPEND); | ||
363 | if (error) { | ||
364 | printk(KERN_ERR "Failed to suspend some devices.\n"); | ||
365 | } else { | ||
366 | error = disable_nonboot_cpus(); | ||
367 | if (!error) { | ||
368 | /* Enter S3, system is already frozen */ | ||
369 | suspend_enter(PM_SUSPEND_MEM); | ||
370 | enable_nonboot_cpus(); | ||
371 | } | ||
372 | /* Wake up devices */ | ||
373 | device_resume(); | ||
374 | } | ||
375 | resume_console(); | ||
376 | if (pm_ops->finish) | ||
377 | pm_ops->finish(PM_SUSPEND_MEM); | ||
378 | |||
379 | OutS3: | ||
380 | mutex_unlock(&pm_mutex); | 271 | mutex_unlock(&pm_mutex); |
381 | break; | 272 | break; |
382 | 273 | ||
@@ -386,19 +277,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
386 | switch (arg) { | 277 | switch (arg) { |
387 | 278 | ||
388 | case PMOPS_PREPARE: | 279 | case PMOPS_PREPARE: |
389 | if (hibernation_ops) { | 280 | data->platform_suspend = 1; |
390 | data->platform_suspend = 1; | 281 | error = 0; |
391 | error = 0; | ||
392 | } else { | ||
393 | error = -ENOSYS; | ||
394 | } | ||
395 | break; | 282 | break; |
396 | 283 | ||
397 | case PMOPS_ENTER: | 284 | case PMOPS_ENTER: |
398 | if (data->platform_suspend) { | 285 | if (data->platform_suspend) |
399 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | 286 | error = hibernation_platform_enter(); |
400 | error = hibernation_ops->enter(); | 287 | |
401 | } | ||
402 | break; | 288 | break; |
403 | 289 | ||
404 | case PMOPS_FINISH: | 290 | case PMOPS_FINISH: |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b1d11f1c7c..82a558b655 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -142,7 +142,7 @@ static int may_attach(struct task_struct *task) | |||
142 | return -EPERM; | 142 | return -EPERM; |
143 | smp_rmb(); | 143 | smp_rmb(); |
144 | if (task->mm) | 144 | if (task->mm) |
145 | dumpable = task->mm->dumpable; | 145 | dumpable = get_dumpable(task->mm); |
146 | if (!dumpable && !capable(CAP_SYS_PTRACE)) | 146 | if (!dumpable && !capable(CAP_SYS_PTRACE)) |
147 | return -EPERM; | 147 | return -EPERM; |
148 | 148 | ||
@@ -490,3 +490,22 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) | |||
490 | return ret; | 490 | return ret; |
491 | } | 491 | } |
492 | #endif /* __ARCH_SYS_PTRACE */ | 492 | #endif /* __ARCH_SYS_PTRACE */ |
493 | |||
494 | int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | ||
495 | { | ||
496 | unsigned long tmp; | ||
497 | int copied; | ||
498 | |||
499 | copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0); | ||
500 | if (copied != sizeof(tmp)) | ||
501 | return -EIO; | ||
502 | return put_user(tmp, (unsigned long __user *)data); | ||
503 | } | ||
504 | |||
505 | int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) | ||
506 | { | ||
507 | int copied; | ||
508 | |||
509 | copied = access_process_vm(tsk, addr, &data, sizeof(data), 1); | ||
510 | return (copied == sizeof(data)) ? 0 : -EIO; | ||
511 | } | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 55ba82a85a..ddff332477 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/moduleparam.h> | 40 | #include <linux/moduleparam.h> |
41 | #include <linux/percpu.h> | 41 | #include <linux/percpu.h> |
42 | #include <linux/notifier.h> | 42 | #include <linux/notifier.h> |
43 | #include <linux/freezer.h> | ||
43 | #include <linux/cpu.h> | 44 | #include <linux/cpu.h> |
44 | #include <linux/random.h> | 45 | #include <linux/random.h> |
45 | #include <linux/delay.h> | 46 | #include <linux/delay.h> |
@@ -518,7 +519,6 @@ rcu_torture_writer(void *arg) | |||
518 | 519 | ||
519 | VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); | 520 | VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); |
520 | set_user_nice(current, 19); | 521 | set_user_nice(current, 19); |
521 | current->flags |= PF_NOFREEZE; | ||
522 | 522 | ||
523 | do { | 523 | do { |
524 | schedule_timeout_uninterruptible(1); | 524 | schedule_timeout_uninterruptible(1); |
@@ -558,7 +558,6 @@ rcu_torture_fakewriter(void *arg) | |||
558 | 558 | ||
559 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); | 559 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); |
560 | set_user_nice(current, 19); | 560 | set_user_nice(current, 19); |
561 | current->flags |= PF_NOFREEZE; | ||
562 | 561 | ||
563 | do { | 562 | do { |
564 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); | 563 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); |
@@ -589,7 +588,6 @@ rcu_torture_reader(void *arg) | |||
589 | 588 | ||
590 | VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); | 589 | VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); |
591 | set_user_nice(current, 19); | 590 | set_user_nice(current, 19); |
592 | current->flags |= PF_NOFREEZE; | ||
593 | 591 | ||
594 | do { | 592 | do { |
595 | idx = cur_ops->readlock(); | 593 | idx = cur_ops->readlock(); |
diff --git a/kernel/relay.c b/kernel/relay.c index a615a8f513..510fbbd7b5 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -80,7 +80,7 @@ static struct vm_operations_struct relay_file_mmap_ops = { | |||
80 | * | 80 | * |
81 | * Caller should already have grabbed mmap_sem. | 81 | * Caller should already have grabbed mmap_sem. |
82 | */ | 82 | */ |
83 | int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) | 83 | static int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) |
84 | { | 84 | { |
85 | unsigned long length = vma->vm_end - vma->vm_start; | 85 | unsigned long length = vma->vm_end - vma->vm_start; |
86 | struct file *filp = vma->vm_file; | 86 | struct file *filp = vma->vm_file; |
@@ -145,7 +145,7 @@ depopulate: | |||
145 | * | 145 | * |
146 | * Returns channel buffer if successful, %NULL otherwise. | 146 | * Returns channel buffer if successful, %NULL otherwise. |
147 | */ | 147 | */ |
148 | struct rchan_buf *relay_create_buf(struct rchan *chan) | 148 | static struct rchan_buf *relay_create_buf(struct rchan *chan) |
149 | { | 149 | { |
150 | struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); | 150 | struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); |
151 | if (!buf) | 151 | if (!buf) |
@@ -175,7 +175,7 @@ free_buf: | |||
175 | * | 175 | * |
176 | * Should only be called from kref_put(). | 176 | * Should only be called from kref_put(). |
177 | */ | 177 | */ |
178 | void relay_destroy_channel(struct kref *kref) | 178 | static void relay_destroy_channel(struct kref *kref) |
179 | { | 179 | { |
180 | struct rchan *chan = container_of(kref, struct rchan, kref); | 180 | struct rchan *chan = container_of(kref, struct rchan, kref); |
181 | kfree(chan); | 181 | kfree(chan); |
@@ -185,7 +185,7 @@ void relay_destroy_channel(struct kref *kref) | |||
185 | * relay_destroy_buf - destroy an rchan_buf struct and associated buffer | 185 | * relay_destroy_buf - destroy an rchan_buf struct and associated buffer |
186 | * @buf: the buffer struct | 186 | * @buf: the buffer struct |
187 | */ | 187 | */ |
188 | void relay_destroy_buf(struct rchan_buf *buf) | 188 | static void relay_destroy_buf(struct rchan_buf *buf) |
189 | { | 189 | { |
190 | struct rchan *chan = buf->chan; | 190 | struct rchan *chan = buf->chan; |
191 | unsigned int i; | 191 | unsigned int i; |
@@ -210,7 +210,7 @@ void relay_destroy_buf(struct rchan_buf *buf) | |||
210 | * rchan_buf_struct and the channel buffer. Should only be called from | 210 | * rchan_buf_struct and the channel buffer. Should only be called from |
211 | * kref_put(). | 211 | * kref_put(). |
212 | */ | 212 | */ |
213 | void relay_remove_buf(struct kref *kref) | 213 | static void relay_remove_buf(struct kref *kref) |
214 | { | 214 | { |
215 | struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); | 215 | struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); |
216 | buf->chan->cb->remove_buf_file(buf->dentry); | 216 | buf->chan->cb->remove_buf_file(buf->dentry); |
@@ -223,11 +223,10 @@ void relay_remove_buf(struct kref *kref) | |||
223 | * | 223 | * |
224 | * Returns 1 if the buffer is empty, 0 otherwise. | 224 | * Returns 1 if the buffer is empty, 0 otherwise. |
225 | */ | 225 | */ |
226 | int relay_buf_empty(struct rchan_buf *buf) | 226 | static int relay_buf_empty(struct rchan_buf *buf) |
227 | { | 227 | { |
228 | return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; | 228 | return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; |
229 | } | 229 | } |
230 | EXPORT_SYMBOL_GPL(relay_buf_empty); | ||
231 | 230 | ||
232 | /** | 231 | /** |
233 | * relay_buf_full - boolean, is the channel buffer full? | 232 | * relay_buf_full - boolean, is the channel buffer full? |
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 015fc633c9..e3055ba691 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
@@ -260,6 +260,7 @@ static int test_func(void *data) | |||
260 | int ret; | 260 | int ret; |
261 | 261 | ||
262 | current->flags |= PF_MUTEX_TESTER; | 262 | current->flags |= PF_MUTEX_TESTER; |
263 | set_freezable(); | ||
263 | allow_signal(SIGHUP); | 264 | allow_signal(SIGHUP); |
264 | 265 | ||
265 | for(;;) { | 266 | for(;;) { |
diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 9a87886b02..1ec620c030 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c | |||
@@ -20,7 +20,7 @@ void down_read(struct rw_semaphore *sem) | |||
20 | might_sleep(); | 20 | might_sleep(); |
21 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | 21 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); |
22 | 22 | ||
23 | __down_read(sem); | 23 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
24 | } | 24 | } |
25 | 25 | ||
26 | EXPORT_SYMBOL(down_read); | 26 | EXPORT_SYMBOL(down_read); |
@@ -47,7 +47,7 @@ void down_write(struct rw_semaphore *sem) | |||
47 | might_sleep(); | 47 | might_sleep(); |
48 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | 48 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); |
49 | 49 | ||
50 | __down_write(sem); | 50 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
51 | } | 51 | } |
52 | 52 | ||
53 | EXPORT_SYMBOL(down_write); | 53 | EXPORT_SYMBOL(down_write); |
@@ -111,7 +111,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) | |||
111 | might_sleep(); | 111 | might_sleep(); |
112 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | 112 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); |
113 | 113 | ||
114 | __down_read(sem); | 114 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
115 | } | 115 | } |
116 | 116 | ||
117 | EXPORT_SYMBOL(down_read_nested); | 117 | EXPORT_SYMBOL(down_read_nested); |
@@ -130,7 +130,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) | |||
130 | might_sleep(); | 130 | might_sleep(); |
131 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | 131 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); |
132 | 132 | ||
133 | __down_write_nested(sem, subclass); | 133 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
134 | } | 134 | } |
135 | 135 | ||
136 | EXPORT_SYMBOL(down_write_nested); | 136 | EXPORT_SYMBOL(down_write_nested); |
diff --git a/kernel/sched.c b/kernel/sched.c index 1c8076676e..93cf241cfb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -301,7 +301,7 @@ struct rq { | |||
301 | struct lock_class_key rq_lock_key; | 301 | struct lock_class_key rq_lock_key; |
302 | }; | 302 | }; |
303 | 303 | ||
304 | static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; | 304 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
305 | static DEFINE_MUTEX(sched_hotcpu_mutex); | 305 | static DEFINE_MUTEX(sched_hotcpu_mutex); |
306 | 306 | ||
307 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) | 307 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) |
@@ -379,6 +379,23 @@ static inline unsigned long long rq_clock(struct rq *rq) | |||
379 | #define task_rq(p) cpu_rq(task_cpu(p)) | 379 | #define task_rq(p) cpu_rq(task_cpu(p)) |
380 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 380 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
381 | 381 | ||
382 | /* | ||
383 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | ||
384 | * clock constructed from sched_clock(): | ||
385 | */ | ||
386 | unsigned long long cpu_clock(int cpu) | ||
387 | { | ||
388 | struct rq *rq = cpu_rq(cpu); | ||
389 | unsigned long long now; | ||
390 | unsigned long flags; | ||
391 | |||
392 | spin_lock_irqsave(&rq->lock, flags); | ||
393 | now = rq_clock(rq); | ||
394 | spin_unlock_irqrestore(&rq->lock, flags); | ||
395 | |||
396 | return now; | ||
397 | } | ||
398 | |||
382 | #ifdef CONFIG_FAIR_GROUP_SCHED | 399 | #ifdef CONFIG_FAIR_GROUP_SCHED |
383 | /* Change a task's ->cfs_rq if it moves across CPUs */ | 400 | /* Change a task's ->cfs_rq if it moves across CPUs */ |
384 | static inline void set_task_cfs_rq(struct task_struct *p) | 401 | static inline void set_task_cfs_rq(struct task_struct *p) |
@@ -2235,7 +2252,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2235 | 2252 | ||
2236 | rq = cpu_rq(i); | 2253 | rq = cpu_rq(i); |
2237 | 2254 | ||
2238 | if (*sd_idle && !idle_cpu(i)) | 2255 | if (*sd_idle && rq->nr_running) |
2239 | *sd_idle = 0; | 2256 | *sd_idle = 0; |
2240 | 2257 | ||
2241 | /* Bias balancing toward cpus of our domain */ | 2258 | /* Bias balancing toward cpus of our domain */ |
@@ -2257,9 +2274,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2257 | /* | 2274 | /* |
2258 | * First idle cpu or the first cpu(busiest) in this sched group | 2275 | * First idle cpu or the first cpu(busiest) in this sched group |
2259 | * is eligible for doing load balancing at this and above | 2276 | * is eligible for doing load balancing at this and above |
2260 | * domains. | 2277 | * domains. In the newly idle case, we will allow all the cpu's |
2278 | * to do the newly idle load balance. | ||
2261 | */ | 2279 | */ |
2262 | if (local_group && balance_cpu != this_cpu && balance) { | 2280 | if (idle != CPU_NEWLY_IDLE && local_group && |
2281 | balance_cpu != this_cpu && balance) { | ||
2263 | *balance = 0; | 2282 | *balance = 0; |
2264 | goto ret; | 2283 | goto ret; |
2265 | } | 2284 | } |
@@ -2677,6 +2696,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
2677 | unsigned long imbalance; | 2696 | unsigned long imbalance; |
2678 | int nr_moved = 0; | 2697 | int nr_moved = 0; |
2679 | int sd_idle = 0; | 2698 | int sd_idle = 0; |
2699 | int all_pinned = 0; | ||
2680 | cpumask_t cpus = CPU_MASK_ALL; | 2700 | cpumask_t cpus = CPU_MASK_ALL; |
2681 | 2701 | ||
2682 | /* | 2702 | /* |
@@ -2715,10 +2735,11 @@ redo: | |||
2715 | double_lock_balance(this_rq, busiest); | 2735 | double_lock_balance(this_rq, busiest); |
2716 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2736 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
2717 | minus_1_or_zero(busiest->nr_running), | 2737 | minus_1_or_zero(busiest->nr_running), |
2718 | imbalance, sd, CPU_NEWLY_IDLE, NULL); | 2738 | imbalance, sd, CPU_NEWLY_IDLE, |
2739 | &all_pinned); | ||
2719 | spin_unlock(&busiest->lock); | 2740 | spin_unlock(&busiest->lock); |
2720 | 2741 | ||
2721 | if (!nr_moved) { | 2742 | if (unlikely(all_pinned)) { |
2722 | cpu_clear(cpu_of(busiest), cpus); | 2743 | cpu_clear(cpu_of(busiest), cpus); |
2723 | if (!cpus_empty(cpus)) | 2744 | if (!cpus_empty(cpus)) |
2724 | goto redo; | 2745 | goto redo; |
@@ -4912,8 +4933,6 @@ static int migration_thread(void *data) | |||
4912 | struct migration_req *req; | 4933 | struct migration_req *req; |
4913 | struct list_head *head; | 4934 | struct list_head *head; |
4914 | 4935 | ||
4915 | try_to_freeze(); | ||
4916 | |||
4917 | spin_lock_irq(&rq->lock); | 4936 | spin_lock_irq(&rq->lock); |
4918 | 4937 | ||
4919 | if (cpu_is_offline(cpu)) { | 4938 | if (cpu_is_offline(cpu)) { |
@@ -5147,7 +5166,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5147 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); | 5166 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); |
5148 | if (IS_ERR(p)) | 5167 | if (IS_ERR(p)) |
5149 | return NOTIFY_BAD; | 5168 | return NOTIFY_BAD; |
5150 | p->flags |= PF_NOFREEZE; | ||
5151 | kthread_bind(p, cpu); | 5169 | kthread_bind(p, cpu); |
5152 | /* Must be high prio: stop_machine expects to yield to it. */ | 5170 | /* Must be high prio: stop_machine expects to yield to it. */ |
5153 | rq = task_rq_lock(p, &flags); | 5171 | rq = task_rq_lock(p, &flags); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 8de2677901..0f546ddea4 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/notifier.h> | 14 | #include <linux/notifier.h> |
15 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
17 | #include <linux/freezer.h> | ||
17 | #include <linux/kthread.h> | 18 | #include <linux/kthread.h> |
18 | #include <linux/rcupdate.h> | 19 | #include <linux/rcupdate.h> |
19 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
@@ -488,8 +489,6 @@ void __init softirq_init(void) | |||
488 | 489 | ||
489 | static int ksoftirqd(void * __bind_cpu) | 490 | static int ksoftirqd(void * __bind_cpu) |
490 | { | 491 | { |
491 | current->flags |= PF_NOFREEZE; | ||
492 | |||
493 | set_current_state(TASK_INTERRUPTIBLE); | 492 | set_current_state(TASK_INTERRUPTIBLE); |
494 | 493 | ||
495 | while (!kthread_should_stop()) { | 494 | while (!kthread_should_stop()) { |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 0131e296ff..708d4882c0 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/cpu.h> | 10 | #include <linux/cpu.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/delay.h> | 12 | #include <linux/delay.h> |
13 | #include <linux/freezer.h> | ||
13 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
14 | #include <linux/notifier.h> | 15 | #include <linux/notifier.h> |
15 | #include <linux/module.h> | 16 | #include <linux/module.h> |
@@ -116,7 +117,6 @@ static int watchdog(void * __bind_cpu) | |||
116 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 117 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
117 | 118 | ||
118 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 119 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
119 | current->flags |= PF_NOFREEZE; | ||
120 | 120 | ||
121 | /* initialize timestamp */ | 121 | /* initialize timestamp */ |
122 | touch_softlockup_watchdog(); | 122 | touch_softlockup_watchdog(); |
diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 2c6c2bf855..cd72424c26 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c | |||
@@ -72,7 +72,7 @@ void __lockfunc _read_lock(rwlock_t *lock) | |||
72 | { | 72 | { |
73 | preempt_disable(); | 73 | preempt_disable(); |
74 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); | 74 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); |
75 | _raw_read_lock(lock); | 75 | LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); |
76 | } | 76 | } |
77 | EXPORT_SYMBOL(_read_lock); | 77 | EXPORT_SYMBOL(_read_lock); |
78 | 78 | ||
@@ -88,8 +88,8 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) | |||
88 | * _raw_spin_lock_flags() code, because lockdep assumes | 88 | * _raw_spin_lock_flags() code, because lockdep assumes |
89 | * that interrupts are not re-enabled during lock-acquire: | 89 | * that interrupts are not re-enabled during lock-acquire: |
90 | */ | 90 | */ |
91 | #ifdef CONFIG_PROVE_LOCKING | 91 | #ifdef CONFIG_LOCKDEP |
92 | _raw_spin_lock(lock); | 92 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
93 | #else | 93 | #else |
94 | _raw_spin_lock_flags(lock, &flags); | 94 | _raw_spin_lock_flags(lock, &flags); |
95 | #endif | 95 | #endif |
@@ -102,7 +102,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock) | |||
102 | local_irq_disable(); | 102 | local_irq_disable(); |
103 | preempt_disable(); | 103 | preempt_disable(); |
104 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 104 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
105 | _raw_spin_lock(lock); | 105 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
106 | } | 106 | } |
107 | EXPORT_SYMBOL(_spin_lock_irq); | 107 | EXPORT_SYMBOL(_spin_lock_irq); |
108 | 108 | ||
@@ -111,7 +111,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock) | |||
111 | local_bh_disable(); | 111 | local_bh_disable(); |
112 | preempt_disable(); | 112 | preempt_disable(); |
113 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 113 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
114 | _raw_spin_lock(lock); | 114 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
115 | } | 115 | } |
116 | EXPORT_SYMBOL(_spin_lock_bh); | 116 | EXPORT_SYMBOL(_spin_lock_bh); |
117 | 117 | ||
@@ -122,7 +122,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) | |||
122 | local_irq_save(flags); | 122 | local_irq_save(flags); |
123 | preempt_disable(); | 123 | preempt_disable(); |
124 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); | 124 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); |
125 | _raw_read_lock(lock); | 125 | LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); |
126 | return flags; | 126 | return flags; |
127 | } | 127 | } |
128 | EXPORT_SYMBOL(_read_lock_irqsave); | 128 | EXPORT_SYMBOL(_read_lock_irqsave); |
@@ -132,7 +132,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock) | |||
132 | local_irq_disable(); | 132 | local_irq_disable(); |
133 | preempt_disable(); | 133 | preempt_disable(); |
134 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); | 134 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); |
135 | _raw_read_lock(lock); | 135 | LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); |
136 | } | 136 | } |
137 | EXPORT_SYMBOL(_read_lock_irq); | 137 | EXPORT_SYMBOL(_read_lock_irq); |
138 | 138 | ||
@@ -141,7 +141,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock) | |||
141 | local_bh_disable(); | 141 | local_bh_disable(); |
142 | preempt_disable(); | 142 | preempt_disable(); |
143 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); | 143 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); |
144 | _raw_read_lock(lock); | 144 | LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); |
145 | } | 145 | } |
146 | EXPORT_SYMBOL(_read_lock_bh); | 146 | EXPORT_SYMBOL(_read_lock_bh); |
147 | 147 | ||
@@ -152,7 +152,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) | |||
152 | local_irq_save(flags); | 152 | local_irq_save(flags); |
153 | preempt_disable(); | 153 | preempt_disable(); |
154 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 154 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
155 | _raw_write_lock(lock); | 155 | LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); |
156 | return flags; | 156 | return flags; |
157 | } | 157 | } |
158 | EXPORT_SYMBOL(_write_lock_irqsave); | 158 | EXPORT_SYMBOL(_write_lock_irqsave); |
@@ -162,7 +162,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock) | |||
162 | local_irq_disable(); | 162 | local_irq_disable(); |
163 | preempt_disable(); | 163 | preempt_disable(); |
164 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 164 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
165 | _raw_write_lock(lock); | 165 | LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); |
166 | } | 166 | } |
167 | EXPORT_SYMBOL(_write_lock_irq); | 167 | EXPORT_SYMBOL(_write_lock_irq); |
168 | 168 | ||
@@ -171,7 +171,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock) | |||
171 | local_bh_disable(); | 171 | local_bh_disable(); |
172 | preempt_disable(); | 172 | preempt_disable(); |
173 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 173 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
174 | _raw_write_lock(lock); | 174 | LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); |
175 | } | 175 | } |
176 | EXPORT_SYMBOL(_write_lock_bh); | 176 | EXPORT_SYMBOL(_write_lock_bh); |
177 | 177 | ||
@@ -179,7 +179,7 @@ void __lockfunc _spin_lock(spinlock_t *lock) | |||
179 | { | 179 | { |
180 | preempt_disable(); | 180 | preempt_disable(); |
181 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 181 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
182 | _raw_spin_lock(lock); | 182 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
183 | } | 183 | } |
184 | 184 | ||
185 | EXPORT_SYMBOL(_spin_lock); | 185 | EXPORT_SYMBOL(_spin_lock); |
@@ -188,7 +188,7 @@ void __lockfunc _write_lock(rwlock_t *lock) | |||
188 | { | 188 | { |
189 | preempt_disable(); | 189 | preempt_disable(); |
190 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 190 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
191 | _raw_write_lock(lock); | 191 | LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); |
192 | } | 192 | } |
193 | 193 | ||
194 | EXPORT_SYMBOL(_write_lock); | 194 | EXPORT_SYMBOL(_write_lock); |
@@ -289,7 +289,7 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) | |||
289 | { | 289 | { |
290 | preempt_disable(); | 290 | preempt_disable(); |
291 | spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | 291 | spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); |
292 | _raw_spin_lock(lock); | 292 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
293 | } | 293 | } |
294 | 294 | ||
295 | EXPORT_SYMBOL(_spin_lock_nested); | 295 | EXPORT_SYMBOL(_spin_lock_nested); |
@@ -305,8 +305,8 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas | |||
305 | * _raw_spin_lock_flags() code, because lockdep assumes | 305 | * _raw_spin_lock_flags() code, because lockdep assumes |
306 | * that interrupts are not re-enabled during lock-acquire: | 306 | * that interrupts are not re-enabled during lock-acquire: |
307 | */ | 307 | */ |
308 | #ifdef CONFIG_PROVE_SPIN_LOCKING | 308 | #ifdef CONFIG_LOCKDEP |
309 | _raw_spin_lock(lock); | 309 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
310 | #else | 310 | #else |
311 | _raw_spin_lock_flags(lock, &flags); | 311 | _raw_spin_lock_flags(lock, &flags); |
312 | #endif | 312 | #endif |
diff --git a/kernel/sys.c b/kernel/sys.c index 4d141ae3e8..08562f4197 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -100,6 +100,13 @@ struct pid *cad_pid; | |||
100 | EXPORT_SYMBOL(cad_pid); | 100 | EXPORT_SYMBOL(cad_pid); |
101 | 101 | ||
102 | /* | 102 | /* |
103 | * If set, this is used for preparing the system to power off. | ||
104 | */ | ||
105 | |||
106 | void (*pm_power_off_prepare)(void); | ||
107 | EXPORT_SYMBOL(pm_power_off_prepare); | ||
108 | |||
109 | /* | ||
103 | * Notifier list for kernel code which wants to be called | 110 | * Notifier list for kernel code which wants to be called |
104 | * at shutdown. This is used to stop any idling DMA operations | 111 | * at shutdown. This is used to stop any idling DMA operations |
105 | * and the like. | 112 | * and the like. |
@@ -867,6 +874,8 @@ EXPORT_SYMBOL_GPL(kernel_halt); | |||
867 | void kernel_power_off(void) | 874 | void kernel_power_off(void) |
868 | { | 875 | { |
869 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | 876 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); |
877 | if (pm_power_off_prepare) | ||
878 | pm_power_off_prepare(); | ||
870 | printk(KERN_EMERG "Power down.\n"); | 879 | printk(KERN_EMERG "Power down.\n"); |
871 | machine_power_off(); | 880 | machine_power_off(); |
872 | } | 881 | } |
@@ -1027,7 +1036,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) | |||
1027 | return -EPERM; | 1036 | return -EPERM; |
1028 | } | 1037 | } |
1029 | if (new_egid != old_egid) { | 1038 | if (new_egid != old_egid) { |
1030 | current->mm->dumpable = suid_dumpable; | 1039 | set_dumpable(current->mm, suid_dumpable); |
1031 | smp_wmb(); | 1040 | smp_wmb(); |
1032 | } | 1041 | } |
1033 | if (rgid != (gid_t) -1 || | 1042 | if (rgid != (gid_t) -1 || |
@@ -1057,13 +1066,13 @@ asmlinkage long sys_setgid(gid_t gid) | |||
1057 | 1066 | ||
1058 | if (capable(CAP_SETGID)) { | 1067 | if (capable(CAP_SETGID)) { |
1059 | if (old_egid != gid) { | 1068 | if (old_egid != gid) { |
1060 | current->mm->dumpable = suid_dumpable; | 1069 | set_dumpable(current->mm, suid_dumpable); |
1061 | smp_wmb(); | 1070 | smp_wmb(); |
1062 | } | 1071 | } |
1063 | current->gid = current->egid = current->sgid = current->fsgid = gid; | 1072 | current->gid = current->egid = current->sgid = current->fsgid = gid; |
1064 | } else if ((gid == current->gid) || (gid == current->sgid)) { | 1073 | } else if ((gid == current->gid) || (gid == current->sgid)) { |
1065 | if (old_egid != gid) { | 1074 | if (old_egid != gid) { |
1066 | current->mm->dumpable = suid_dumpable; | 1075 | set_dumpable(current->mm, suid_dumpable); |
1067 | smp_wmb(); | 1076 | smp_wmb(); |
1068 | } | 1077 | } |
1069 | current->egid = current->fsgid = gid; | 1078 | current->egid = current->fsgid = gid; |
@@ -1094,7 +1103,7 @@ static int set_user(uid_t new_ruid, int dumpclear) | |||
1094 | switch_uid(new_user); | 1103 | switch_uid(new_user); |
1095 | 1104 | ||
1096 | if (dumpclear) { | 1105 | if (dumpclear) { |
1097 | current->mm->dumpable = suid_dumpable; | 1106 | set_dumpable(current->mm, suid_dumpable); |
1098 | smp_wmb(); | 1107 | smp_wmb(); |
1099 | } | 1108 | } |
1100 | current->uid = new_ruid; | 1109 | current->uid = new_ruid; |
@@ -1150,7 +1159,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) | |||
1150 | return -EAGAIN; | 1159 | return -EAGAIN; |
1151 | 1160 | ||
1152 | if (new_euid != old_euid) { | 1161 | if (new_euid != old_euid) { |
1153 | current->mm->dumpable = suid_dumpable; | 1162 | set_dumpable(current->mm, suid_dumpable); |
1154 | smp_wmb(); | 1163 | smp_wmb(); |
1155 | } | 1164 | } |
1156 | current->fsuid = current->euid = new_euid; | 1165 | current->fsuid = current->euid = new_euid; |
@@ -1200,7 +1209,7 @@ asmlinkage long sys_setuid(uid_t uid) | |||
1200 | return -EPERM; | 1209 | return -EPERM; |
1201 | 1210 | ||
1202 | if (old_euid != uid) { | 1211 | if (old_euid != uid) { |
1203 | current->mm->dumpable = suid_dumpable; | 1212 | set_dumpable(current->mm, suid_dumpable); |
1204 | smp_wmb(); | 1213 | smp_wmb(); |
1205 | } | 1214 | } |
1206 | current->fsuid = current->euid = uid; | 1215 | current->fsuid = current->euid = uid; |
@@ -1245,7 +1254,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) | |||
1245 | } | 1254 | } |
1246 | if (euid != (uid_t) -1) { | 1255 | if (euid != (uid_t) -1) { |
1247 | if (euid != current->euid) { | 1256 | if (euid != current->euid) { |
1248 | current->mm->dumpable = suid_dumpable; | 1257 | set_dumpable(current->mm, suid_dumpable); |
1249 | smp_wmb(); | 1258 | smp_wmb(); |
1250 | } | 1259 | } |
1251 | current->euid = euid; | 1260 | current->euid = euid; |
@@ -1295,7 +1304,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) | |||
1295 | } | 1304 | } |
1296 | if (egid != (gid_t) -1) { | 1305 | if (egid != (gid_t) -1) { |
1297 | if (egid != current->egid) { | 1306 | if (egid != current->egid) { |
1298 | current->mm->dumpable = suid_dumpable; | 1307 | set_dumpable(current->mm, suid_dumpable); |
1299 | smp_wmb(); | 1308 | smp_wmb(); |
1300 | } | 1309 | } |
1301 | current->egid = egid; | 1310 | current->egid = egid; |
@@ -1341,7 +1350,7 @@ asmlinkage long sys_setfsuid(uid_t uid) | |||
1341 | uid == current->suid || uid == current->fsuid || | 1350 | uid == current->suid || uid == current->fsuid || |
1342 | capable(CAP_SETUID)) { | 1351 | capable(CAP_SETUID)) { |
1343 | if (uid != old_fsuid) { | 1352 | if (uid != old_fsuid) { |
1344 | current->mm->dumpable = suid_dumpable; | 1353 | set_dumpable(current->mm, suid_dumpable); |
1345 | smp_wmb(); | 1354 | smp_wmb(); |
1346 | } | 1355 | } |
1347 | current->fsuid = uid; | 1356 | current->fsuid = uid; |
@@ -1370,7 +1379,7 @@ asmlinkage long sys_setfsgid(gid_t gid) | |||
1370 | gid == current->sgid || gid == current->fsgid || | 1379 | gid == current->sgid || gid == current->fsgid || |
1371 | capable(CAP_SETGID)) { | 1380 | capable(CAP_SETGID)) { |
1372 | if (gid != old_fsgid) { | 1381 | if (gid != old_fsgid) { |
1373 | current->mm->dumpable = suid_dumpable; | 1382 | set_dumpable(current->mm, suid_dumpable); |
1374 | smp_wmb(); | 1383 | smp_wmb(); |
1375 | } | 1384 | } |
1376 | current->fsgid = gid; | 1385 | current->fsgid = gid; |
@@ -2167,14 +2176,14 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
2167 | error = put_user(current->pdeath_signal, (int __user *)arg2); | 2176 | error = put_user(current->pdeath_signal, (int __user *)arg2); |
2168 | break; | 2177 | break; |
2169 | case PR_GET_DUMPABLE: | 2178 | case PR_GET_DUMPABLE: |
2170 | error = current->mm->dumpable; | 2179 | error = get_dumpable(current->mm); |
2171 | break; | 2180 | break; |
2172 | case PR_SET_DUMPABLE: | 2181 | case PR_SET_DUMPABLE: |
2173 | if (arg2 < 0 || arg2 > 1) { | 2182 | if (arg2 < 0 || arg2 > 1) { |
2174 | error = -EINVAL; | 2183 | error = -EINVAL; |
2175 | break; | 2184 | break; |
2176 | } | 2185 | } |
2177 | current->mm->dumpable = arg2; | 2186 | set_dumpable(current->mm, arg2); |
2178 | break; | 2187 | break; |
2179 | 2188 | ||
2180 | case PR_SET_UNALIGN: | 2189 | case PR_SET_UNALIGN: |
@@ -2286,3 +2295,61 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, | |||
2286 | } | 2295 | } |
2287 | return err ? -EFAULT : 0; | 2296 | return err ? -EFAULT : 0; |
2288 | } | 2297 | } |
2298 | |||
2299 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; | ||
2300 | |||
2301 | static void argv_cleanup(char **argv, char **envp) | ||
2302 | { | ||
2303 | argv_free(argv); | ||
2304 | } | ||
2305 | |||
2306 | /** | ||
2307 | * orderly_poweroff - Trigger an orderly system poweroff | ||
2308 | * @force: force poweroff if command execution fails | ||
2309 | * | ||
2310 | * This may be called from any context to trigger a system shutdown. | ||
2311 | * If the orderly shutdown fails, it will force an immediate shutdown. | ||
2312 | */ | ||
2313 | int orderly_poweroff(bool force) | ||
2314 | { | ||
2315 | int argc; | ||
2316 | char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); | ||
2317 | static char *envp[] = { | ||
2318 | "HOME=/", | ||
2319 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", | ||
2320 | NULL | ||
2321 | }; | ||
2322 | int ret = -ENOMEM; | ||
2323 | struct subprocess_info *info; | ||
2324 | |||
2325 | if (argv == NULL) { | ||
2326 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", | ||
2327 | __func__, poweroff_cmd); | ||
2328 | goto out; | ||
2329 | } | ||
2330 | |||
2331 | info = call_usermodehelper_setup(argv[0], argv, envp); | ||
2332 | if (info == NULL) { | ||
2333 | argv_free(argv); | ||
2334 | goto out; | ||
2335 | } | ||
2336 | |||
2337 | call_usermodehelper_setcleanup(info, argv_cleanup); | ||
2338 | |||
2339 | ret = call_usermodehelper_exec(info, UMH_NO_WAIT); | ||
2340 | |||
2341 | out: | ||
2342 | if (ret && force) { | ||
2343 | printk(KERN_WARNING "Failed to start orderly shutdown: " | ||
2344 | "forcing the issue\n"); | ||
2345 | |||
2346 | /* I guess this should try to kick off some daemon to | ||
2347 | sync and poweroff asap. Or not even bother syncing | ||
2348 | if we're doing an emergency shutdown? */ | ||
2349 | emergency_sync(); | ||
2350 | kernel_power_off(); | ||
2351 | } | ||
2352 | |||
2353 | return ret; | ||
2354 | } | ||
2355 | EXPORT_SYMBOL_GPL(orderly_poweroff); | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2ce7acf841..222299844a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/utsname.h> | 29 | #include <linux/utsname.h> |
30 | #include <linux/capability.h> | 30 | #include <linux/capability.h> |
31 | #include <linux/smp_lock.h> | 31 | #include <linux/smp_lock.h> |
32 | #include <linux/fs.h> | ||
32 | #include <linux/init.h> | 33 | #include <linux/init.h> |
33 | #include <linux/kernel.h> | 34 | #include <linux/kernel.h> |
34 | #include <linux/kobject.h> | 35 | #include <linux/kobject.h> |
@@ -45,13 +46,11 @@ | |||
45 | #include <linux/syscalls.h> | 46 | #include <linux/syscalls.h> |
46 | #include <linux/nfs_fs.h> | 47 | #include <linux/nfs_fs.h> |
47 | #include <linux/acpi.h> | 48 | #include <linux/acpi.h> |
49 | #include <linux/reboot.h> | ||
48 | 50 | ||
49 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
50 | #include <asm/processor.h> | 52 | #include <asm/processor.h> |
51 | 53 | ||
52 | extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | ||
53 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
54 | |||
55 | #ifdef CONFIG_X86 | 54 | #ifdef CONFIG_X86 |
56 | #include <asm/nmi.h> | 55 | #include <asm/nmi.h> |
57 | #include <asm/stacktrace.h> | 56 | #include <asm/stacktrace.h> |
@@ -79,6 +78,7 @@ extern int percpu_pagelist_fraction; | |||
79 | extern int compat_log; | 78 | extern int compat_log; |
80 | extern int maps_protect; | 79 | extern int maps_protect; |
81 | extern int sysctl_stat_interval; | 80 | extern int sysctl_stat_interval; |
81 | extern int audit_argv_kb; | ||
82 | 82 | ||
83 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 83 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
84 | static int maxolduid = 65535; | 84 | static int maxolduid = 65535; |
@@ -161,6 +161,8 @@ extern ctl_table inotify_table[]; | |||
161 | int sysctl_legacy_va_layout; | 161 | int sysctl_legacy_va_layout; |
162 | #endif | 162 | #endif |
163 | 163 | ||
164 | extern int prove_locking; | ||
165 | extern int lock_stat; | ||
164 | 166 | ||
165 | /* The default sysctl tables: */ | 167 | /* The default sysctl tables: */ |
166 | 168 | ||
@@ -282,6 +284,26 @@ static ctl_table kern_table[] = { | |||
282 | .mode = 0644, | 284 | .mode = 0644, |
283 | .proc_handler = &proc_dointvec, | 285 | .proc_handler = &proc_dointvec, |
284 | }, | 286 | }, |
287 | #ifdef CONFIG_PROVE_LOCKING | ||
288 | { | ||
289 | .ctl_name = CTL_UNNUMBERED, | ||
290 | .procname = "prove_locking", | ||
291 | .data = &prove_locking, | ||
292 | .maxlen = sizeof(int), | ||
293 | .mode = 0644, | ||
294 | .proc_handler = &proc_dointvec, | ||
295 | }, | ||
296 | #endif | ||
297 | #ifdef CONFIG_LOCK_STAT | ||
298 | { | ||
299 | .ctl_name = CTL_UNNUMBERED, | ||
300 | .procname = "lock_stat", | ||
301 | .data = &lock_stat, | ||
302 | .maxlen = sizeof(int), | ||
303 | .mode = 0644, | ||
304 | .proc_handler = &proc_dointvec, | ||
305 | }, | ||
306 | #endif | ||
285 | { | 307 | { |
286 | .ctl_name = CTL_UNNUMBERED, | 308 | .ctl_name = CTL_UNNUMBERED, |
287 | .procname = "sched_features", | 309 | .procname = "sched_features", |
@@ -307,6 +329,16 @@ static ctl_table kern_table[] = { | |||
307 | .mode = 0644, | 329 | .mode = 0644, |
308 | .proc_handler = &proc_dointvec, | 330 | .proc_handler = &proc_dointvec, |
309 | }, | 331 | }, |
332 | #ifdef CONFIG_AUDITSYSCALL | ||
333 | { | ||
334 | .ctl_name = CTL_UNNUMBERED, | ||
335 | .procname = "audit_argv_kb", | ||
336 | .data = &audit_argv_kb, | ||
337 | .maxlen = sizeof(int), | ||
338 | .mode = 0644, | ||
339 | .proc_handler = &proc_dointvec, | ||
340 | }, | ||
341 | #endif | ||
310 | { | 342 | { |
311 | .ctl_name = KERN_CORE_PATTERN, | 343 | .ctl_name = KERN_CORE_PATTERN, |
312 | .procname = "core_pattern", | 344 | .procname = "core_pattern", |
@@ -661,7 +693,7 @@ static ctl_table kern_table[] = { | |||
661 | { | 693 | { |
662 | .ctl_name = KERN_ACPI_VIDEO_FLAGS, | 694 | .ctl_name = KERN_ACPI_VIDEO_FLAGS, |
663 | .procname = "acpi_video_flags", | 695 | .procname = "acpi_video_flags", |
664 | .data = &acpi_video_flags, | 696 | .data = &acpi_realmode_flags, |
665 | .maxlen = sizeof (unsigned long), | 697 | .maxlen = sizeof (unsigned long), |
666 | .mode = 0644, | 698 | .mode = 0644, |
667 | .proc_handler = &proc_doulongvec_minmax, | 699 | .proc_handler = &proc_doulongvec_minmax, |
@@ -707,13 +739,26 @@ static ctl_table kern_table[] = { | |||
707 | .proc_handler = &proc_dointvec, | 739 | .proc_handler = &proc_dointvec, |
708 | }, | 740 | }, |
709 | #endif | 741 | #endif |
710 | 742 | { | |
743 | .ctl_name = CTL_UNNUMBERED, | ||
744 | .procname = "poweroff_cmd", | ||
745 | .data = &poweroff_cmd, | ||
746 | .maxlen = POWEROFF_CMD_PATH_LEN, | ||
747 | .mode = 0644, | ||
748 | .proc_handler = &proc_dostring, | ||
749 | .strategy = &sysctl_string, | ||
750 | }, | ||
751 | /* | ||
752 | * NOTE: do not add new entries to this table unless you have read | ||
753 | * Documentation/sysctl/ctl_unnumbered.txt | ||
754 | */ | ||
711 | { .ctl_name = 0 } | 755 | { .ctl_name = 0 } |
712 | }; | 756 | }; |
713 | 757 | ||
714 | /* Constants for minimum and maximum testing in vm_table. | 758 | /* Constants for minimum and maximum testing in vm_table. |
715 | We use these as one-element integer vectors. */ | 759 | We use these as one-element integer vectors. */ |
716 | static int zero; | 760 | static int zero; |
761 | static int two = 2; | ||
717 | static int one_hundred = 100; | 762 | static int one_hundred = 100; |
718 | 763 | ||
719 | 764 | ||
@@ -826,6 +871,14 @@ static ctl_table vm_table[] = { | |||
826 | .mode = 0644, | 871 | .mode = 0644, |
827 | .proc_handler = &proc_dointvec, | 872 | .proc_handler = &proc_dointvec, |
828 | }, | 873 | }, |
874 | { | ||
875 | .ctl_name = CTL_UNNUMBERED, | ||
876 | .procname = "hugepages_treat_as_movable", | ||
877 | .data = &hugepages_treat_as_movable, | ||
878 | .maxlen = sizeof(int), | ||
879 | .mode = 0644, | ||
880 | .proc_handler = &hugetlb_treat_movable_handler, | ||
881 | }, | ||
829 | #endif | 882 | #endif |
830 | { | 883 | { |
831 | .ctl_name = VM_LOWMEM_RESERVE_RATIO, | 884 | .ctl_name = VM_LOWMEM_RESERVE_RATIO, |
@@ -1096,7 +1149,10 @@ static ctl_table fs_table[] = { | |||
1096 | .data = &lease_break_time, | 1149 | .data = &lease_break_time, |
1097 | .maxlen = sizeof(int), | 1150 | .maxlen = sizeof(int), |
1098 | .mode = 0644, | 1151 | .mode = 0644, |
1099 | .proc_handler = &proc_dointvec, | 1152 | .proc_handler = &proc_dointvec_minmax, |
1153 | .strategy = &sysctl_intvec, | ||
1154 | .extra1 = &zero, | ||
1155 | .extra2 = &two, | ||
1100 | }, | 1156 | }, |
1101 | { | 1157 | { |
1102 | .ctl_name = FS_AIO_NR, | 1158 | .ctl_name = FS_AIO_NR, |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 728cedfd3c..8969877661 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -401,7 +401,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, | |||
401 | * this is optimized for the most common adjustments of -1,0,1, | 401 | * this is optimized for the most common adjustments of -1,0,1, |
402 | * for other values we can do a bit more work. | 402 | * for other values we can do a bit more work. |
403 | */ | 403 | */ |
404 | static void clocksource_adjust(struct clocksource *clock, s64 offset) | 404 | static void clocksource_adjust(s64 offset) |
405 | { | 405 | { |
406 | s64 error, interval = clock->cycle_interval; | 406 | s64 error, interval = clock->cycle_interval; |
407 | int adj; | 407 | int adj; |
@@ -476,7 +476,7 @@ void update_wall_time(void) | |||
476 | } | 476 | } |
477 | 477 | ||
478 | /* correct the clock when NTP error is too big */ | 478 | /* correct the clock when NTP error is too big */ |
479 | clocksource_adjust(clock, offset); | 479 | clocksource_adjust(offset); |
480 | 480 | ||
481 | /* store full nanoseconds into xtime */ | 481 | /* store full nanoseconds into xtime */ |
482 | xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; | 482 | xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 8bbcfb77f7..e5edc3a22a 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -38,7 +38,7 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); | |||
38 | 38 | ||
39 | static void print_name_offset(struct seq_file *m, void *sym) | 39 | static void print_name_offset(struct seq_file *m, void *sym) |
40 | { | 40 | { |
41 | char symname[KSYM_NAME_LEN+1]; | 41 | char symname[KSYM_NAME_LEN]; |
42 | 42 | ||
43 | if (lookup_symbol_name((unsigned long)sym, symname) < 0) | 43 | if (lookup_symbol_name((unsigned long)sym, symname) < 0) |
44 | SEQ_printf(m, "<%p>", sym); | 44 | SEQ_printf(m, "<%p>", sym); |
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 9b8a826236..8ed62fda16 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
@@ -269,7 +269,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |||
269 | 269 | ||
270 | static void print_name_offset(struct seq_file *m, unsigned long addr) | 270 | static void print_name_offset(struct seq_file *m, unsigned long addr) |
271 | { | 271 | { |
272 | char symname[KSYM_NAME_LEN+1]; | 272 | char symname[KSYM_NAME_LEN]; |
273 | 273 | ||
274 | if (lookup_symbol_name(addr, symname) < 0) | 274 | if (lookup_symbol_name(addr, symname) < 0) |
275 | seq_printf(m, "<%p>", (void *)addr); | 275 | seq_printf(m, "<%p>", (void *)addr); |
diff --git a/kernel/timer.c b/kernel/timer.c index 1258371e0d..d1e8b975c7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -103,14 +103,14 @@ static inline tvec_base_t *tbase_get_base(tvec_base_t *base) | |||
103 | static inline void timer_set_deferrable(struct timer_list *timer) | 103 | static inline void timer_set_deferrable(struct timer_list *timer) |
104 | { | 104 | { |
105 | timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | | 105 | timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | |
106 | TBASE_DEFERRABLE_FLAG)); | 106 | TBASE_DEFERRABLE_FLAG)); |
107 | } | 107 | } |
108 | 108 | ||
109 | static inline void | 109 | static inline void |
110 | timer_set_base(struct timer_list *timer, tvec_base_t *new_base) | 110 | timer_set_base(struct timer_list *timer, tvec_base_t *new_base) |
111 | { | 111 | { |
112 | timer->base = (tvec_base_t *)((unsigned long)(new_base) | | 112 | timer->base = (tvec_base_t *)((unsigned long)(new_base) | |
113 | tbase_get_deferrable(timer->base)); | 113 | tbase_get_deferrable(timer->base)); |
114 | } | 114 | } |
115 | 115 | ||
116 | /** | 116 | /** |
@@ -445,10 +445,10 @@ EXPORT_SYMBOL(__mod_timer); | |||
445 | void add_timer_on(struct timer_list *timer, int cpu) | 445 | void add_timer_on(struct timer_list *timer, int cpu) |
446 | { | 446 | { |
447 | tvec_base_t *base = per_cpu(tvec_bases, cpu); | 447 | tvec_base_t *base = per_cpu(tvec_bases, cpu); |
448 | unsigned long flags; | 448 | unsigned long flags; |
449 | 449 | ||
450 | timer_stats_timer_set_start_info(timer); | 450 | timer_stats_timer_set_start_info(timer); |
451 | BUG_ON(timer_pending(timer) || !timer->function); | 451 | BUG_ON(timer_pending(timer) || !timer->function); |
452 | spin_lock_irqsave(&base->lock, flags); | 452 | spin_lock_irqsave(&base->lock, flags); |
453 | timer_set_base(timer, base); | 453 | timer_set_base(timer, base); |
454 | internal_add_timer(base, timer); | 454 | internal_add_timer(base, timer); |
@@ -627,7 +627,7 @@ static inline void __run_timers(tvec_base_t *base) | |||
627 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 627 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
628 | struct list_head work_list; | 628 | struct list_head work_list; |
629 | struct list_head *head = &work_list; | 629 | struct list_head *head = &work_list; |
630 | int index = base->timer_jiffies & TVR_MASK; | 630 | int index = base->timer_jiffies & TVR_MASK; |
631 | 631 | ||
632 | /* | 632 | /* |
633 | * Cascade timers: | 633 | * Cascade timers: |
@@ -644,8 +644,8 @@ static inline void __run_timers(tvec_base_t *base) | |||
644 | unsigned long data; | 644 | unsigned long data; |
645 | 645 | ||
646 | timer = list_first_entry(head, struct timer_list,entry); | 646 | timer = list_first_entry(head, struct timer_list,entry); |
647 | fn = timer->function; | 647 | fn = timer->function; |
648 | data = timer->data; | 648 | data = timer->data; |
649 | 649 | ||
650 | timer_stats_account_timer(timer); | 650 | timer_stats_account_timer(timer); |
651 | 651 | ||
@@ -689,8 +689,8 @@ static unsigned long __next_timer_interrupt(tvec_base_t *base) | |||
689 | index = slot = timer_jiffies & TVR_MASK; | 689 | index = slot = timer_jiffies & TVR_MASK; |
690 | do { | 690 | do { |
691 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { | 691 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { |
692 | if (tbase_get_deferrable(nte->base)) | 692 | if (tbase_get_deferrable(nte->base)) |
693 | continue; | 693 | continue; |
694 | 694 | ||
695 | found = 1; | 695 | found = 1; |
696 | expires = nte->expires; | 696 | expires = nte->expires; |
@@ -834,7 +834,7 @@ void update_process_times(int user_tick) | |||
834 | if (rcu_pending(cpu)) | 834 | if (rcu_pending(cpu)) |
835 | rcu_check_callbacks(cpu, user_tick); | 835 | rcu_check_callbacks(cpu, user_tick); |
836 | scheduler_tick(); | 836 | scheduler_tick(); |
837 | run_posix_cpu_timers(p); | 837 | run_posix_cpu_timers(p); |
838 | } | 838 | } |
839 | 839 | ||
840 | /* | 840 | /* |
@@ -909,7 +909,7 @@ static inline void update_times(unsigned long ticks) | |||
909 | update_wall_time(); | 909 | update_wall_time(); |
910 | calc_load(ticks); | 910 | calc_load(ticks); |
911 | } | 911 | } |
912 | 912 | ||
913 | /* | 913 | /* |
914 | * The 64-bit jiffies value is not atomic - you MUST NOT read it | 914 | * The 64-bit jiffies value is not atomic - you MUST NOT read it |
915 | * without sampling the sequence number in xtime_lock. | 915 | * without sampling the sequence number in xtime_lock. |
@@ -1105,7 +1105,7 @@ asmlinkage long sys_gettid(void) | |||
1105 | /** | 1105 | /** |
1106 | * do_sysinfo - fill in sysinfo struct | 1106 | * do_sysinfo - fill in sysinfo struct |
1107 | * @info: pointer to buffer to fill | 1107 | * @info: pointer to buffer to fill |
1108 | */ | 1108 | */ |
1109 | int do_sysinfo(struct sysinfo *info) | 1109 | int do_sysinfo(struct sysinfo *info) |
1110 | { | 1110 | { |
1111 | unsigned long mem_total, sav_total; | 1111 | unsigned long mem_total, sav_total; |
@@ -1221,7 +1221,8 @@ static int __devinit init_timers_cpu(int cpu) | |||
1221 | /* | 1221 | /* |
1222 | * The APs use this path later in boot | 1222 | * The APs use this path later in boot |
1223 | */ | 1223 | */ |
1224 | base = kmalloc_node(sizeof(*base), GFP_KERNEL, | 1224 | base = kmalloc_node(sizeof(*base), |
1225 | GFP_KERNEL | __GFP_ZERO, | ||
1225 | cpu_to_node(cpu)); | 1226 | cpu_to_node(cpu)); |
1226 | if (!base) | 1227 | if (!base) |
1227 | return -ENOMEM; | 1228 | return -ENOMEM; |
@@ -1232,7 +1233,6 @@ static int __devinit init_timers_cpu(int cpu) | |||
1232 | kfree(base); | 1233 | kfree(base); |
1233 | return -ENOMEM; | 1234 | return -ENOMEM; |
1234 | } | 1235 | } |
1235 | memset(base, 0, sizeof(*base)); | ||
1236 | per_cpu(tvec_bases, cpu) = base; | 1236 | per_cpu(tvec_bases, cpu) = base; |
1237 | } else { | 1237 | } else { |
1238 | /* | 1238 | /* |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d7d3fa3072..58e5c152a6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -282,8 +282,8 @@ static int worker_thread(void *__cwq) | |||
282 | struct cpu_workqueue_struct *cwq = __cwq; | 282 | struct cpu_workqueue_struct *cwq = __cwq; |
283 | DEFINE_WAIT(wait); | 283 | DEFINE_WAIT(wait); |
284 | 284 | ||
285 | if (!cwq->wq->freezeable) | 285 | if (cwq->wq->freezeable) |
286 | current->flags |= PF_NOFREEZE; | 286 | set_freezable(); |
287 | 287 | ||
288 | set_user_nice(current, -5); | 288 | set_user_nice(current, -5); |
289 | 289 | ||
@@ -752,18 +752,17 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
752 | if (cwq->thread == NULL) | 752 | if (cwq->thread == NULL) |
753 | return; | 753 | return; |
754 | 754 | ||
755 | flush_cpu_workqueue(cwq); | ||
755 | /* | 756 | /* |
756 | * If the caller is CPU_DEAD the single flush_cpu_workqueue() | 757 | * If the caller is CPU_DEAD and cwq->worklist was not empty, |
757 | * is not enough, a concurrent flush_workqueue() can insert a | 758 | * a concurrent flush_workqueue() can insert a barrier after us. |
758 | * barrier after us. | 759 | * However, in that case run_workqueue() won't return and check |
760 | * kthread_should_stop() until it flushes all work_struct's. | ||
759 | * When ->worklist becomes empty it is safe to exit because no | 761 | * When ->worklist becomes empty it is safe to exit because no |
760 | * more work_structs can be queued on this cwq: flush_workqueue | 762 | * more work_structs can be queued on this cwq: flush_workqueue |
761 | * checks list_empty(), and a "normal" queue_work() can't use | 763 | * checks list_empty(), and a "normal" queue_work() can't use |
762 | * a dead CPU. | 764 | * a dead CPU. |
763 | */ | 765 | */ |
764 | while (flush_cpu_workqueue(cwq)) | ||
765 | ; | ||
766 | |||
767 | kthread_stop(cwq->thread); | 766 | kthread_stop(cwq->thread); |
768 | cwq->thread = NULL; | 767 | cwq->thread = NULL; |
769 | } | 768 | } |