aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/auditsc.c3
-rw-r--r--kernel/capability.c21
-rw-r--r--kernel/cpu.c7
-rw-r--r--kernel/exit.c82
-rw-r--r--kernel/irq/proc.c96
-rw-r--r--kernel/kexec.c66
-rw-r--r--kernel/lockdep.c309
-rw-r--r--kernel/lockdep_internals.h19
-rw-r--r--kernel/lockdep_proc.c48
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/nsproxy.c1
-rw-r--r--kernel/pid_namespace.c3
-rw-r--r--kernel/pm_qos_params.c25
-rw-r--r--kernel/posix-timers.c19
-rw-r--r--kernel/power/disk.c13
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/power/swap.c1
-rw-r--r--kernel/ptrace.c5
-rw-r--r--kernel/rcupdate.c1
-rw-r--r--kernel/resource.c88
-rw-r--r--kernel/sched.c77
-rw-r--r--kernel/sched_clock.c224
-rw-r--r--kernel/sched_fair.c21
-rw-r--r--kernel/sched_features.h2
-rw-r--r--kernel/sched_rt.c23
-rw-r--r--kernel/signal.c6
-rw-r--r--kernel/smp.c68
-rw-r--r--kernel/softlockup.c3
-rw-r--r--kernel/spinlock.c12
-rw-r--r--kernel/stop_machine.c1
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/sysctl.c1
-rw-r--r--kernel/time/tick-sched.c8
-rw-r--r--kernel/user_namespace.c1
-rw-r--r--kernel/utsname.c1
-rw-r--r--kernel/utsname_sysctl.c1
-rw-r--r--kernel/workqueue.c24
38 files changed, 767 insertions, 534 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 382dd5a8b2d7..94fabd534b03 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
55 default 1000 if HZ_1000 55 default 1000 if HZ_1000
56 56
57config SCHED_HRTICK 57config SCHED_HRTICK
58 def_bool HIGH_RES_TIMERS && USE_GENERIC_SMP_HELPERS 58 def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 972f8e61d36a..59cedfb040e7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -243,10 +243,11 @@ static inline int open_arg(int flags, int mask)
243 243
244static int audit_match_perm(struct audit_context *ctx, int mask) 244static int audit_match_perm(struct audit_context *ctx, int mask)
245{ 245{
246 unsigned n;
246 if (unlikely(!ctx)) 247 if (unlikely(!ctx))
247 return 0; 248 return 0;
248 249
249 unsigned n = ctx->major; 250 n = ctx->major;
250 switch (audit_classify_syscall(ctx->arch, n)) { 251 switch (audit_classify_syscall(ctx->arch, n)) {
251 case 0: /* native */ 252 case 0: /* native */
252 if ((mask & AUDIT_PERM_WRITE) && 253 if ((mask & AUDIT_PERM_WRITE) &&
diff --git a/kernel/capability.c b/kernel/capability.c
index 0101e847603e..33e51e78c2d8 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -486,17 +486,22 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
486 return ret; 486 return ret;
487} 487}
488 488
489int __capable(struct task_struct *t, int cap) 489/**
490 * capable - Determine if the current task has a superior capability in effect
491 * @cap: The capability to be tested for
492 *
493 * Return true if the current task has the given superior capability currently
494 * available for use, false if not.
495 *
496 * This sets PF_SUPERPRIV on the task if the capability is available on the
497 * assumption that it's about to be used.
498 */
499int capable(int cap)
490{ 500{
491 if (security_capable(t, cap) == 0) { 501 if (has_capability(current, cap)) {
492 t->flags |= PF_SUPERPRIV; 502 current->flags |= PF_SUPERPRIV;
493 return 1; 503 return 1;
494 } 504 }
495 return 0; 505 return 0;
496} 506}
497
498int capable(int cap)
499{
500 return __capable(current, cap);
501}
502EXPORT_SYMBOL(capable); 507EXPORT_SYMBOL(capable);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e202a68d1cc1..f17e9854c246 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -349,6 +349,8 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
349 goto out_notify; 349 goto out_notify;
350 BUG_ON(!cpu_online(cpu)); 350 BUG_ON(!cpu_online(cpu));
351 351
352 cpu_set(cpu, cpu_active_map);
353
352 /* Now call notifier in preparation. */ 354 /* Now call notifier in preparation. */
353 raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); 355 raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
354 356
@@ -367,7 +369,7 @@ int __cpuinit cpu_up(unsigned int cpu)
367 if (!cpu_isset(cpu, cpu_possible_map)) { 369 if (!cpu_isset(cpu, cpu_possible_map)) {
368 printk(KERN_ERR "can't online cpu %d because it is not " 370 printk(KERN_ERR "can't online cpu %d because it is not "
369 "configured as may-hotadd at boot time\n", cpu); 371 "configured as may-hotadd at boot time\n", cpu);
370#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390) 372#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
371 printk(KERN_ERR "please check additional_cpus= boot " 373 printk(KERN_ERR "please check additional_cpus= boot "
372 "parameter\n"); 374 "parameter\n");
373#endif 375#endif
@@ -383,9 +385,6 @@ int __cpuinit cpu_up(unsigned int cpu)
383 385
384 err = _cpu_up(cpu, 0); 386 err = _cpu_up(cpu, 0);
385 387
386 if (cpu_online(cpu))
387 cpu_set(cpu, cpu_active_map);
388
389out: 388out:
390 cpu_maps_update_done(); 389 cpu_maps_update_done();
391 return err; 390 return err;
diff --git a/kernel/exit.c b/kernel/exit.c
index 38ec40630149..25ed2ad986df 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -831,26 +831,50 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father)
831 * the child reaper process (ie "init") in our pid 831 * the child reaper process (ie "init") in our pid
832 * space. 832 * space.
833 */ 833 */
834static struct task_struct *find_new_reaper(struct task_struct *father)
835{
836 struct pid_namespace *pid_ns = task_active_pid_ns(father);
837 struct task_struct *thread;
838
839 thread = father;
840 while_each_thread(father, thread) {
841 if (thread->flags & PF_EXITING)
842 continue;
843 if (unlikely(pid_ns->child_reaper == father))
844 pid_ns->child_reaper = thread;
845 return thread;
846 }
847
848 if (unlikely(pid_ns->child_reaper == father)) {
849 write_unlock_irq(&tasklist_lock);
850 if (unlikely(pid_ns == &init_pid_ns))
851 panic("Attempted to kill init!");
852
853 zap_pid_ns_processes(pid_ns);
854 write_lock_irq(&tasklist_lock);
855 /*
856 * We can not clear ->child_reaper or leave it alone.
857 * There may by stealth EXIT_DEAD tasks on ->children,
858 * forget_original_parent() must move them somewhere.
859 */
860 pid_ns->child_reaper = init_pid_ns.child_reaper;
861 }
862
863 return pid_ns->child_reaper;
864}
865
834static void forget_original_parent(struct task_struct *father) 866static void forget_original_parent(struct task_struct *father)
835{ 867{
836 struct task_struct *p, *n, *reaper = father; 868 struct task_struct *p, *n, *reaper;
837 LIST_HEAD(ptrace_dead); 869 LIST_HEAD(ptrace_dead);
838 870
839 write_lock_irq(&tasklist_lock); 871 write_lock_irq(&tasklist_lock);
840 872 reaper = find_new_reaper(father);
841 /* 873 /*
842 * First clean up ptrace if we were using it. 874 * First clean up ptrace if we were using it.
843 */ 875 */
844 ptrace_exit(father, &ptrace_dead); 876 ptrace_exit(father, &ptrace_dead);
845 877
846 do {
847 reaper = next_thread(reaper);
848 if (reaper == father) {
849 reaper = task_child_reaper(father);
850 break;
851 }
852 } while (reaper->flags & PF_EXITING);
853
854 list_for_each_entry_safe(p, n, &father->children, sibling) { 878 list_for_each_entry_safe(p, n, &father->children, sibling) {
855 p->real_parent = reaper; 879 p->real_parent = reaper;
856 if (p->parent == father) { 880 if (p->parent == father) {
@@ -918,8 +942,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
918 942
919 /* mt-exec, de_thread() is waiting for us */ 943 /* mt-exec, de_thread() is waiting for us */
920 if (thread_group_leader(tsk) && 944 if (thread_group_leader(tsk) &&
921 tsk->signal->notify_count < 0 && 945 tsk->signal->group_exit_task &&
922 tsk->signal->group_exit_task) 946 tsk->signal->notify_count < 0)
923 wake_up_process(tsk->signal->group_exit_task); 947 wake_up_process(tsk->signal->group_exit_task);
924 948
925 write_unlock_irq(&tasklist_lock); 949 write_unlock_irq(&tasklist_lock);
@@ -959,39 +983,6 @@ static void check_stack_usage(void)
959static inline void check_stack_usage(void) {} 983static inline void check_stack_usage(void) {}
960#endif 984#endif
961 985
962static inline void exit_child_reaper(struct task_struct *tsk)
963{
964 if (likely(tsk->group_leader != task_child_reaper(tsk)))
965 return;
966
967 if (tsk->nsproxy->pid_ns == &init_pid_ns)
968 panic("Attempted to kill init!");
969
970 /*
971 * @tsk is the last thread in the 'cgroup-init' and is exiting.
972 * Terminate all remaining processes in the namespace and reap them
973 * before exiting @tsk.
974 *
975 * Note that @tsk (last thread of cgroup-init) may not necessarily
976 * be the child-reaper (i.e main thread of cgroup-init) of the
977 * namespace i.e the child_reaper may have already exited.
978 *
979 * Even after a child_reaper exits, we let it inherit orphaned children,
980 * because, pid_ns->child_reaper remains valid as long as there is
981 * at least one living sub-thread in the cgroup init.
982
983 * This living sub-thread of the cgroup-init will be notified when
984 * a child inherited by the 'child-reaper' exits (do_notify_parent()
985 * uses __group_send_sig_info()). Further, when reaping child processes,
986 * do_wait() iterates over children of all living sub threads.
987
988 * i.e even though 'child_reaper' thread is listed as the parent of the
989 * orphaned children, any living sub-thread in the cgroup-init can
990 * perform the role of the child_reaper.
991 */
992 zap_pid_ns_processes(tsk->nsproxy->pid_ns);
993}
994
995NORET_TYPE void do_exit(long code) 986NORET_TYPE void do_exit(long code)
996{ 987{
997 struct task_struct *tsk = current; 988 struct task_struct *tsk = current;
@@ -1051,7 +1042,6 @@ NORET_TYPE void do_exit(long code)
1051 } 1042 }
1052 group_dead = atomic_dec_and_test(&tsk->signal->live); 1043 group_dead = atomic_dec_and_test(&tsk->signal->live);
1053 if (group_dead) { 1044 if (group_dead) {
1054 exit_child_reaper(tsk);
1055 hrtimer_cancel(&tsk->signal->real_timer); 1045 hrtimer_cancel(&tsk->signal->real_timer);
1056 exit_itimers(tsk->signal); 1046 exit_itimers(tsk->signal);
1057 } 1047 }
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6c6d35d68ee9..a09dd29c2fd7 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/irq.h> 9#include <linux/irq.h>
10#include <linux/proc_fs.h> 10#include <linux/proc_fs.h>
11#include <linux/seq_file.h>
11#include <linux/interrupt.h> 12#include <linux/interrupt.h>
12 13
13#include "internals.h" 14#include "internals.h"
@@ -16,23 +17,18 @@ static struct proc_dir_entry *root_irq_dir;
16 17
17#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
18 19
19static int irq_affinity_read_proc(char *page, char **start, off_t off, 20static int irq_affinity_proc_show(struct seq_file *m, void *v)
20 int count, int *eof, void *data)
21{ 21{
22 struct irq_desc *desc = irq_desc + (long)data; 22 struct irq_desc *desc = irq_desc + (long)m->private;
23 cpumask_t *mask = &desc->affinity; 23 cpumask_t *mask = &desc->affinity;
24 int len;
25 24
26#ifdef CONFIG_GENERIC_PENDING_IRQ 25#ifdef CONFIG_GENERIC_PENDING_IRQ
27 if (desc->status & IRQ_MOVE_PENDING) 26 if (desc->status & IRQ_MOVE_PENDING)
28 mask = &desc->pending_mask; 27 mask = &desc->pending_mask;
29#endif 28#endif
30 len = cpumask_scnprintf(page, count, *mask); 29 seq_cpumask(m, mask);
31 30 seq_putc(m, '\n');
32 if (count - len < 2) 31 return 0;
33 return -EINVAL;
34 len += sprintf(page + len, "\n");
35 return len;
36} 32}
37 33
38#ifndef is_affinity_mask_valid 34#ifndef is_affinity_mask_valid
@@ -40,11 +36,12 @@ static int irq_affinity_read_proc(char *page, char **start, off_t off,
40#endif 36#endif
41 37
42int no_irq_affinity; 38int no_irq_affinity;
43static int irq_affinity_write_proc(struct file *file, const char __user *buffer, 39static ssize_t irq_affinity_proc_write(struct file *file,
44 unsigned long count, void *data) 40 const char __user *buffer, size_t count, loff_t *pos)
45{ 41{
46 unsigned int irq = (int)(long)data, full_count = count, err; 42 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
47 cpumask_t new_value; 43 cpumask_t new_value;
44 int err;
48 45
49 if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || 46 if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
50 irq_balancing_disabled(irq)) 47 irq_balancing_disabled(irq))
@@ -65,28 +62,38 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
65 if (!cpus_intersects(new_value, cpu_online_map)) 62 if (!cpus_intersects(new_value, cpu_online_map))
66 /* Special case for empty set - allow the architecture 63 /* Special case for empty set - allow the architecture
67 code to set default SMP affinity. */ 64 code to set default SMP affinity. */
68 return irq_select_affinity(irq) ? -EINVAL : full_count; 65 return irq_select_affinity(irq) ? -EINVAL : count;
69 66
70 irq_set_affinity(irq, new_value); 67 irq_set_affinity(irq, new_value);
71 68
72 return full_count; 69 return count;
73} 70}
74 71
75static int default_affinity_read(char *page, char **start, off_t off, 72static int irq_affinity_proc_open(struct inode *inode, struct file *file)
76 int count, int *eof, void *data)
77{ 73{
78 int len = cpumask_scnprintf(page, count, irq_default_affinity); 74 return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
79 if (count - len < 2)
80 return -EINVAL;
81 len += sprintf(page + len, "\n");
82 return len;
83} 75}
84 76
85static int default_affinity_write(struct file *file, const char __user *buffer, 77static const struct file_operations irq_affinity_proc_fops = {
86 unsigned long count, void *data) 78 .open = irq_affinity_proc_open,
79 .read = seq_read,
80 .llseek = seq_lseek,
81 .release = single_release,
82 .write = irq_affinity_proc_write,
83};
84
85static int default_affinity_show(struct seq_file *m, void *v)
86{
87 seq_cpumask(m, &irq_default_affinity);
88 seq_putc(m, '\n');
89 return 0;
90}
91
92static ssize_t default_affinity_write(struct file *file,
93 const char __user *buffer, size_t count, loff_t *ppos)
87{ 94{
88 unsigned int full_count = count, err;
89 cpumask_t new_value; 95 cpumask_t new_value;
96 int err;
90 97
91 err = cpumask_parse_user(buffer, count, new_value); 98 err = cpumask_parse_user(buffer, count, new_value);
92 if (err) 99 if (err)
@@ -105,8 +112,21 @@ static int default_affinity_write(struct file *file, const char __user *buffer,
105 112
106 irq_default_affinity = new_value; 113 irq_default_affinity = new_value;
107 114
108 return full_count; 115 return count;
109} 116}
117
118static int default_affinity_open(struct inode *inode, struct file *file)
119{
120 return single_open(file, default_affinity_show, NULL);
121}
122
123static const struct file_operations default_affinity_proc_fops = {
124 .open = default_affinity_open,
125 .read = seq_read,
126 .llseek = seq_lseek,
127 .release = single_release,
128 .write = default_affinity_write,
129};
110#endif 130#endif
111 131
112static int irq_spurious_read(char *page, char **start, off_t off, 132static int irq_spurious_read(char *page, char **start, off_t off,
@@ -178,16 +198,9 @@ void register_irq_proc(unsigned int irq)
178 irq_desc[irq].dir = proc_mkdir(name, root_irq_dir); 198 irq_desc[irq].dir = proc_mkdir(name, root_irq_dir);
179 199
180#ifdef CONFIG_SMP 200#ifdef CONFIG_SMP
181 { 201 /* create /proc/irq/<irq>/smp_affinity */
182 /* create /proc/irq/<irq>/smp_affinity */ 202 proc_create_data("smp_affinity", 0600, irq_desc[irq].dir,
183 entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir); 203 &irq_affinity_proc_fops, (void *)(long)irq);
184
185 if (entry) {
186 entry->data = (void *)(long)irq;
187 entry->read_proc = irq_affinity_read_proc;
188 entry->write_proc = irq_affinity_write_proc;
189 }
190 }
191#endif 204#endif
192 205
193 entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir); 206 entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
@@ -208,15 +221,8 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
208void register_default_affinity_proc(void) 221void register_default_affinity_proc(void)
209{ 222{
210#ifdef CONFIG_SMP 223#ifdef CONFIG_SMP
211 struct proc_dir_entry *entry; 224 proc_create("irq/default_smp_affinity", 0600, NULL,
212 225 &default_affinity_proc_fops);
213 /* create /proc/irq/default_smp_affinity */
214 entry = create_proc_entry("default_smp_affinity", 0600, root_irq_dir);
215 if (entry) {
216 entry->data = NULL;
217 entry->read_proc = default_affinity_read;
218 entry->write_proc = default_affinity_write;
219 }
220#endif 226#endif
221} 227}
222 228
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c8a4370e2a34..59f3f0df35d4 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -12,7 +12,7 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/kexec.h> 14#include <linux/kexec.h>
15#include <linux/spinlock.h> 15#include <linux/mutex.h>
16#include <linux/list.h> 16#include <linux/list.h>
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <linux/syscalls.h> 18#include <linux/syscalls.h>
@@ -77,7 +77,7 @@ int kexec_should_crash(struct task_struct *p)
77 * 77 *
78 * The code for the transition from the current kernel to the 78 * The code for the transition from the current kernel to the
79 * the new kernel is placed in the control_code_buffer, whose size 79 * the new kernel is placed in the control_code_buffer, whose size
80 * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single 80 * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
81 * page of memory is necessary, but some architectures require more. 81 * page of memory is necessary, but some architectures require more.
82 * Because this memory must be identity mapped in the transition from 82 * Because this memory must be identity mapped in the transition from
83 * virtual to physical addresses it must live in the range 83 * virtual to physical addresses it must live in the range
@@ -242,7 +242,7 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
242 */ 242 */
243 result = -ENOMEM; 243 result = -ENOMEM;
244 image->control_code_page = kimage_alloc_control_pages(image, 244 image->control_code_page = kimage_alloc_control_pages(image,
245 get_order(KEXEC_CONTROL_CODE_SIZE)); 245 get_order(KEXEC_CONTROL_PAGE_SIZE));
246 if (!image->control_code_page) { 246 if (!image->control_code_page) {
247 printk(KERN_ERR "Could not allocate control_code_buffer\n"); 247 printk(KERN_ERR "Could not allocate control_code_buffer\n");
248 goto out; 248 goto out;
@@ -317,7 +317,7 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
317 */ 317 */
318 result = -ENOMEM; 318 result = -ENOMEM;
319 image->control_code_page = kimage_alloc_control_pages(image, 319 image->control_code_page = kimage_alloc_control_pages(image,
320 get_order(KEXEC_CONTROL_CODE_SIZE)); 320 get_order(KEXEC_CONTROL_PAGE_SIZE));
321 if (!image->control_code_page) { 321 if (!image->control_code_page) {
322 printk(KERN_ERR "Could not allocate control_code_buffer\n"); 322 printk(KERN_ERR "Could not allocate control_code_buffer\n");
323 goto out; 323 goto out;
@@ -924,19 +924,14 @@ static int kimage_load_segment(struct kimage *image,
924 */ 924 */
925struct kimage *kexec_image; 925struct kimage *kexec_image;
926struct kimage *kexec_crash_image; 926struct kimage *kexec_crash_image;
927/* 927
928 * A home grown binary mutex. 928static DEFINE_MUTEX(kexec_mutex);
929 * Nothing can wait so this mutex is safe to use
930 * in interrupt context :)
931 */
932static int kexec_lock;
933 929
934asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, 930asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
935 struct kexec_segment __user *segments, 931 struct kexec_segment __user *segments,
936 unsigned long flags) 932 unsigned long flags)
937{ 933{
938 struct kimage **dest_image, *image; 934 struct kimage **dest_image, *image;
939 int locked;
940 int result; 935 int result;
941 936
942 /* We only trust the superuser with rebooting the system. */ 937 /* We only trust the superuser with rebooting the system. */
@@ -972,8 +967,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
972 * 967 *
973 * KISS: always take the mutex. 968 * KISS: always take the mutex.
974 */ 969 */
975 locked = xchg(&kexec_lock, 1); 970 if (!mutex_trylock(&kexec_mutex))
976 if (locked)
977 return -EBUSY; 971 return -EBUSY;
978 972
979 dest_image = &kexec_image; 973 dest_image = &kexec_image;
@@ -1015,8 +1009,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
1015 image = xchg(dest_image, image); 1009 image = xchg(dest_image, image);
1016 1010
1017out: 1011out:
1018 locked = xchg(&kexec_lock, 0); /* Release the mutex */ 1012 mutex_unlock(&kexec_mutex);
1019 BUG_ON(!locked);
1020 kimage_free(image); 1013 kimage_free(image);
1021 1014
1022 return result; 1015 return result;
@@ -1063,10 +1056,7 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry,
1063 1056
1064void crash_kexec(struct pt_regs *regs) 1057void crash_kexec(struct pt_regs *regs)
1065{ 1058{
1066 int locked; 1059 /* Take the kexec_mutex here to prevent sys_kexec_load
1067
1068
1069 /* Take the kexec_lock here to prevent sys_kexec_load
1070 * running on one cpu from replacing the crash kernel 1060 * running on one cpu from replacing the crash kernel
1071 * we are using after a panic on a different cpu. 1061 * we are using after a panic on a different cpu.
1072 * 1062 *
@@ -1074,8 +1064,7 @@ void crash_kexec(struct pt_regs *regs)
1074 * of memory the xchg(&kexec_crash_image) would be 1064 * of memory the xchg(&kexec_crash_image) would be
1075 * sufficient. But since I reuse the memory... 1065 * sufficient. But since I reuse the memory...
1076 */ 1066 */
1077 locked = xchg(&kexec_lock, 1); 1067 if (mutex_trylock(&kexec_mutex)) {
1078 if (!locked) {
1079 if (kexec_crash_image) { 1068 if (kexec_crash_image) {
1080 struct pt_regs fixed_regs; 1069 struct pt_regs fixed_regs;
1081 crash_setup_regs(&fixed_regs, regs); 1070 crash_setup_regs(&fixed_regs, regs);
@@ -1083,8 +1072,7 @@ void crash_kexec(struct pt_regs *regs)
1083 machine_crash_shutdown(&fixed_regs); 1072 machine_crash_shutdown(&fixed_regs);
1084 machine_kexec(kexec_crash_image); 1073 machine_kexec(kexec_crash_image);
1085 } 1074 }
1086 locked = xchg(&kexec_lock, 0); 1075 mutex_unlock(&kexec_mutex);
1087 BUG_ON(!locked);
1088 } 1076 }
1089} 1077}
1090 1078
@@ -1426,25 +1414,23 @@ static int __init crash_save_vmcoreinfo_init(void)
1426 1414
1427module_init(crash_save_vmcoreinfo_init) 1415module_init(crash_save_vmcoreinfo_init)
1428 1416
1429/** 1417/*
1430 * kernel_kexec - reboot the system 1418 * Move into place and start executing a preloaded standalone
1431 * 1419 * executable. If nothing was preloaded return an error.
1432 * Move into place and start executing a preloaded standalone
1433 * executable. If nothing was preloaded return an error.
1434 */ 1420 */
1435int kernel_kexec(void) 1421int kernel_kexec(void)
1436{ 1422{
1437 int error = 0; 1423 int error = 0;
1438 1424
1439 if (xchg(&kexec_lock, 1)) 1425 if (!mutex_trylock(&kexec_mutex))
1440 return -EBUSY; 1426 return -EBUSY;
1441 if (!kexec_image) { 1427 if (!kexec_image) {
1442 error = -EINVAL; 1428 error = -EINVAL;
1443 goto Unlock; 1429 goto Unlock;
1444 } 1430 }
1445 1431
1446 if (kexec_image->preserve_context) {
1447#ifdef CONFIG_KEXEC_JUMP 1432#ifdef CONFIG_KEXEC_JUMP
1433 if (kexec_image->preserve_context) {
1448 mutex_lock(&pm_mutex); 1434 mutex_lock(&pm_mutex);
1449 pm_prepare_console(); 1435 pm_prepare_console();
1450 error = freeze_processes(); 1436 error = freeze_processes();
@@ -1459,6 +1445,7 @@ int kernel_kexec(void)
1459 error = disable_nonboot_cpus(); 1445 error = disable_nonboot_cpus();
1460 if (error) 1446 if (error)
1461 goto Resume_devices; 1447 goto Resume_devices;
1448 device_pm_lock();
1462 local_irq_disable(); 1449 local_irq_disable();
1463 /* At this point, device_suspend() has been called, 1450 /* At this point, device_suspend() has been called,
1464 * but *not* device_power_down(). We *must* 1451 * but *not* device_power_down(). We *must*
@@ -1470,26 +1457,22 @@ int kernel_kexec(void)
1470 error = device_power_down(PMSG_FREEZE); 1457 error = device_power_down(PMSG_FREEZE);
1471 if (error) 1458 if (error)
1472 goto Enable_irqs; 1459 goto Enable_irqs;
1473 save_processor_state(); 1460 } else
1474#endif 1461#endif
1475 } else { 1462 {
1476 blocking_notifier_call_chain(&reboot_notifier_list, 1463 kernel_restart_prepare(NULL);
1477 SYS_RESTART, NULL);
1478 system_state = SYSTEM_RESTART;
1479 device_shutdown();
1480 sysdev_shutdown();
1481 printk(KERN_EMERG "Starting new kernel\n"); 1464 printk(KERN_EMERG "Starting new kernel\n");
1482 machine_shutdown(); 1465 machine_shutdown();
1483 } 1466 }
1484 1467
1485 machine_kexec(kexec_image); 1468 machine_kexec(kexec_image);
1486 1469
1487 if (kexec_image->preserve_context) {
1488#ifdef CONFIG_KEXEC_JUMP 1470#ifdef CONFIG_KEXEC_JUMP
1489 restore_processor_state(); 1471 if (kexec_image->preserve_context) {
1490 device_power_up(PMSG_RESTORE); 1472 device_power_up(PMSG_RESTORE);
1491 Enable_irqs: 1473 Enable_irqs:
1492 local_irq_enable(); 1474 local_irq_enable();
1475 device_pm_unlock();
1493 enable_nonboot_cpus(); 1476 enable_nonboot_cpus();
1494 Resume_devices: 1477 Resume_devices:
1495 device_resume(PMSG_RESTORE); 1478 device_resume(PMSG_RESTORE);
@@ -1499,11 +1482,10 @@ int kernel_kexec(void)
1499 Restore_console: 1482 Restore_console:
1500 pm_restore_console(); 1483 pm_restore_console();
1501 mutex_unlock(&pm_mutex); 1484 mutex_unlock(&pm_mutex);
1502#endif
1503 } 1485 }
1486#endif
1504 1487
1505 Unlock: 1488 Unlock:
1506 xchg(&kexec_lock, 0); 1489 mutex_unlock(&kexec_mutex);
1507
1508 return error; 1490 return error;
1509} 1491}
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index d38a64362973..dbda475b13bd 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -124,6 +124,15 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
124unsigned long nr_lock_classes; 124unsigned long nr_lock_classes;
125static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; 125static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
126 126
127static inline struct lock_class *hlock_class(struct held_lock *hlock)
128{
129 if (!hlock->class_idx) {
130 DEBUG_LOCKS_WARN_ON(1);
131 return NULL;
132 }
133 return lock_classes + hlock->class_idx - 1;
134}
135
127#ifdef CONFIG_LOCK_STAT 136#ifdef CONFIG_LOCK_STAT
128static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); 137static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
129 138
@@ -222,7 +231,7 @@ static void lock_release_holdtime(struct held_lock *hlock)
222 231
223 holdtime = sched_clock() - hlock->holdtime_stamp; 232 holdtime = sched_clock() - hlock->holdtime_stamp;
224 233
225 stats = get_lock_stats(hlock->class); 234 stats = get_lock_stats(hlock_class(hlock));
226 if (hlock->read) 235 if (hlock->read)
227 lock_time_inc(&stats->read_holdtime, holdtime); 236 lock_time_inc(&stats->read_holdtime, holdtime);
228 else 237 else
@@ -372,6 +381,19 @@ unsigned int nr_process_chains;
372unsigned int max_lockdep_depth; 381unsigned int max_lockdep_depth;
373unsigned int max_recursion_depth; 382unsigned int max_recursion_depth;
374 383
384static unsigned int lockdep_dependency_gen_id;
385
386static bool lockdep_dependency_visit(struct lock_class *source,
387 unsigned int depth)
388{
389 if (!depth)
390 lockdep_dependency_gen_id++;
391 if (source->dep_gen_id == lockdep_dependency_gen_id)
392 return true;
393 source->dep_gen_id = lockdep_dependency_gen_id;
394 return false;
395}
396
375#ifdef CONFIG_DEBUG_LOCKDEP 397#ifdef CONFIG_DEBUG_LOCKDEP
376/* 398/*
377 * We cannot printk in early bootup code. Not even early_printk() 399 * We cannot printk in early bootup code. Not even early_printk()
@@ -505,7 +527,7 @@ static void print_lockdep_cache(struct lockdep_map *lock)
505 527
506static void print_lock(struct held_lock *hlock) 528static void print_lock(struct held_lock *hlock)
507{ 529{
508 print_lock_name(hlock->class); 530 print_lock_name(hlock_class(hlock));
509 printk(", at: "); 531 printk(", at: ");
510 print_ip_sym(hlock->acquire_ip); 532 print_ip_sym(hlock->acquire_ip);
511} 533}
@@ -558,6 +580,9 @@ static void print_lock_dependencies(struct lock_class *class, int depth)
558{ 580{
559 struct lock_list *entry; 581 struct lock_list *entry;
560 582
583 if (lockdep_dependency_visit(class, depth))
584 return;
585
561 if (DEBUG_LOCKS_WARN_ON(depth >= 20)) 586 if (DEBUG_LOCKS_WARN_ON(depth >= 20))
562 return; 587 return;
563 588
@@ -850,11 +875,11 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
850 if (!entry) 875 if (!entry)
851 return 0; 876 return 0;
852 877
853 entry->class = this;
854 entry->distance = distance;
855 if (!save_trace(&entry->trace)) 878 if (!save_trace(&entry->trace))
856 return 0; 879 return 0;
857 880
881 entry->class = this;
882 entry->distance = distance;
858 /* 883 /*
859 * Since we never remove from the dependency list, the list can 884 * Since we never remove from the dependency list, the list can
860 * be walked lockless by other CPUs, it's only allocation 885 * be walked lockless by other CPUs, it's only allocation
@@ -932,7 +957,7 @@ static noinline int print_circular_bug_tail(void)
932 if (debug_locks_silent) 957 if (debug_locks_silent)
933 return 0; 958 return 0;
934 959
935 this.class = check_source->class; 960 this.class = hlock_class(check_source);
936 if (!save_trace(&this.trace)) 961 if (!save_trace(&this.trace))
937 return 0; 962 return 0;
938 963
@@ -959,6 +984,67 @@ static int noinline print_infinite_recursion_bug(void)
959 return 0; 984 return 0;
960} 985}
961 986
987unsigned long __lockdep_count_forward_deps(struct lock_class *class,
988 unsigned int depth)
989{
990 struct lock_list *entry;
991 unsigned long ret = 1;
992
993 if (lockdep_dependency_visit(class, depth))
994 return 0;
995
996 /*
997 * Recurse this class's dependency list:
998 */
999 list_for_each_entry(entry, &class->locks_after, entry)
1000 ret += __lockdep_count_forward_deps(entry->class, depth + 1);
1001
1002 return ret;
1003}
1004
1005unsigned long lockdep_count_forward_deps(struct lock_class *class)
1006{
1007 unsigned long ret, flags;
1008
1009 local_irq_save(flags);
1010 __raw_spin_lock(&lockdep_lock);
1011 ret = __lockdep_count_forward_deps(class, 0);
1012 __raw_spin_unlock(&lockdep_lock);
1013 local_irq_restore(flags);
1014
1015 return ret;
1016}
1017
1018unsigned long __lockdep_count_backward_deps(struct lock_class *class,
1019 unsigned int depth)
1020{
1021 struct lock_list *entry;
1022 unsigned long ret = 1;
1023
1024 if (lockdep_dependency_visit(class, depth))
1025 return 0;
1026 /*
1027 * Recurse this class's dependency list:
1028 */
1029 list_for_each_entry(entry, &class->locks_before, entry)
1030 ret += __lockdep_count_backward_deps(entry->class, depth + 1);
1031
1032 return ret;
1033}
1034
1035unsigned long lockdep_count_backward_deps(struct lock_class *class)
1036{
1037 unsigned long ret, flags;
1038
1039 local_irq_save(flags);
1040 __raw_spin_lock(&lockdep_lock);
1041 ret = __lockdep_count_backward_deps(class, 0);
1042 __raw_spin_unlock(&lockdep_lock);
1043 local_irq_restore(flags);
1044
1045 return ret;
1046}
1047
962/* 1048/*
963 * Prove that the dependency graph starting at <entry> can not 1049 * Prove that the dependency graph starting at <entry> can not
964 * lead to <target>. Print an error and return 0 if it does. 1050 * lead to <target>. Print an error and return 0 if it does.
@@ -968,6 +1054,9 @@ check_noncircular(struct lock_class *source, unsigned int depth)
968{ 1054{
969 struct lock_list *entry; 1055 struct lock_list *entry;
970 1056
1057 if (lockdep_dependency_visit(source, depth))
1058 return 1;
1059
971 debug_atomic_inc(&nr_cyclic_check_recursions); 1060 debug_atomic_inc(&nr_cyclic_check_recursions);
972 if (depth > max_recursion_depth) 1061 if (depth > max_recursion_depth)
973 max_recursion_depth = depth; 1062 max_recursion_depth = depth;
@@ -977,7 +1066,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
977 * Check this lock's dependency list: 1066 * Check this lock's dependency list:
978 */ 1067 */
979 list_for_each_entry(entry, &source->locks_after, entry) { 1068 list_for_each_entry(entry, &source->locks_after, entry) {
980 if (entry->class == check_target->class) 1069 if (entry->class == hlock_class(check_target))
981 return print_circular_bug_header(entry, depth+1); 1070 return print_circular_bug_header(entry, depth+1);
982 debug_atomic_inc(&nr_cyclic_checks); 1071 debug_atomic_inc(&nr_cyclic_checks);
983 if (!check_noncircular(entry->class, depth+1)) 1072 if (!check_noncircular(entry->class, depth+1))
@@ -1011,6 +1100,9 @@ find_usage_forwards(struct lock_class *source, unsigned int depth)
1011 struct lock_list *entry; 1100 struct lock_list *entry;
1012 int ret; 1101 int ret;
1013 1102
1103 if (lockdep_dependency_visit(source, depth))
1104 return 1;
1105
1014 if (depth > max_recursion_depth) 1106 if (depth > max_recursion_depth)
1015 max_recursion_depth = depth; 1107 max_recursion_depth = depth;
1016 if (depth >= RECURSION_LIMIT) 1108 if (depth >= RECURSION_LIMIT)
@@ -1050,6 +1142,9 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
1050 struct lock_list *entry; 1142 struct lock_list *entry;
1051 int ret; 1143 int ret;
1052 1144
1145 if (lockdep_dependency_visit(source, depth))
1146 return 1;
1147
1053 if (!__raw_spin_is_locked(&lockdep_lock)) 1148 if (!__raw_spin_is_locked(&lockdep_lock))
1054 return DEBUG_LOCKS_WARN_ON(1); 1149 return DEBUG_LOCKS_WARN_ON(1);
1055 1150
@@ -1064,6 +1159,11 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
1064 return 2; 1159 return 2;
1065 } 1160 }
1066 1161
1162 if (!source && debug_locks_off_graph_unlock()) {
1163 WARN_ON(1);
1164 return 0;
1165 }
1166
1067 /* 1167 /*
1068 * Check this lock's dependency list: 1168 * Check this lock's dependency list:
1069 */ 1169 */
@@ -1103,9 +1203,9 @@ print_bad_irq_dependency(struct task_struct *curr,
1103 printk("\nand this task is already holding:\n"); 1203 printk("\nand this task is already holding:\n");
1104 print_lock(prev); 1204 print_lock(prev);
1105 printk("which would create a new lock dependency:\n"); 1205 printk("which would create a new lock dependency:\n");
1106 print_lock_name(prev->class); 1206 print_lock_name(hlock_class(prev));
1107 printk(" ->"); 1207 printk(" ->");
1108 print_lock_name(next->class); 1208 print_lock_name(hlock_class(next));
1109 printk("\n"); 1209 printk("\n");
1110 1210
1111 printk("\nbut this new dependency connects a %s-irq-safe lock:\n", 1211 printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
@@ -1146,12 +1246,12 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
1146 1246
1147 find_usage_bit = bit_backwards; 1247 find_usage_bit = bit_backwards;
1148 /* fills in <backwards_match> */ 1248 /* fills in <backwards_match> */
1149 ret = find_usage_backwards(prev->class, 0); 1249 ret = find_usage_backwards(hlock_class(prev), 0);
1150 if (!ret || ret == 1) 1250 if (!ret || ret == 1)
1151 return ret; 1251 return ret;
1152 1252
1153 find_usage_bit = bit_forwards; 1253 find_usage_bit = bit_forwards;
1154 ret = find_usage_forwards(next->class, 0); 1254 ret = find_usage_forwards(hlock_class(next), 0);
1155 if (!ret || ret == 1) 1255 if (!ret || ret == 1)
1156 return ret; 1256 return ret;
1157 /* ret == 2 */ 1257 /* ret == 2 */
@@ -1272,18 +1372,32 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
1272 struct lockdep_map *next_instance, int read) 1372 struct lockdep_map *next_instance, int read)
1273{ 1373{
1274 struct held_lock *prev; 1374 struct held_lock *prev;
1375 struct held_lock *nest = NULL;
1275 int i; 1376 int i;
1276 1377
1277 for (i = 0; i < curr->lockdep_depth; i++) { 1378 for (i = 0; i < curr->lockdep_depth; i++) {
1278 prev = curr->held_locks + i; 1379 prev = curr->held_locks + i;
1279 if (prev->class != next->class) 1380
1381 if (prev->instance == next->nest_lock)
1382 nest = prev;
1383
1384 if (hlock_class(prev) != hlock_class(next))
1280 continue; 1385 continue;
1386
1281 /* 1387 /*
1282 * Allow read-after-read recursion of the same 1388 * Allow read-after-read recursion of the same
1283 * lock class (i.e. read_lock(lock)+read_lock(lock)): 1389 * lock class (i.e. read_lock(lock)+read_lock(lock)):
1284 */ 1390 */
1285 if ((read == 2) && prev->read) 1391 if ((read == 2) && prev->read)
1286 return 2; 1392 return 2;
1393
1394 /*
1395 * We're holding the nest_lock, which serializes this lock's
1396 * nesting behaviour.
1397 */
1398 if (nest)
1399 return 2;
1400
1287 return print_deadlock_bug(curr, prev, next); 1401 return print_deadlock_bug(curr, prev, next);
1288 } 1402 }
1289 return 1; 1403 return 1;
@@ -1329,7 +1443,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1329 */ 1443 */
1330 check_source = next; 1444 check_source = next;
1331 check_target = prev; 1445 check_target = prev;
1332 if (!(check_noncircular(next->class, 0))) 1446 if (!(check_noncircular(hlock_class(next), 0)))
1333 return print_circular_bug_tail(); 1447 return print_circular_bug_tail();
1334 1448
1335 if (!check_prev_add_irq(curr, prev, next)) 1449 if (!check_prev_add_irq(curr, prev, next))
@@ -1353,8 +1467,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1353 * chains - the second one will be new, but L1 already has 1467 * chains - the second one will be new, but L1 already has
1354 * L2 added to its dependency list, due to the first chain.) 1468 * L2 added to its dependency list, due to the first chain.)
1355 */ 1469 */
1356 list_for_each_entry(entry, &prev->class->locks_after, entry) { 1470 list_for_each_entry(entry, &hlock_class(prev)->locks_after, entry) {
1357 if (entry->class == next->class) { 1471 if (entry->class == hlock_class(next)) {
1358 if (distance == 1) 1472 if (distance == 1)
1359 entry->distance = 1; 1473 entry->distance = 1;
1360 return 2; 1474 return 2;
@@ -1365,26 +1479,28 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1365 * Ok, all validations passed, add the new lock 1479 * Ok, all validations passed, add the new lock
1366 * to the previous lock's dependency list: 1480 * to the previous lock's dependency list:
1367 */ 1481 */
1368 ret = add_lock_to_list(prev->class, next->class, 1482 ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
1369 &prev->class->locks_after, next->acquire_ip, distance); 1483 &hlock_class(prev)->locks_after,
1484 next->acquire_ip, distance);
1370 1485
1371 if (!ret) 1486 if (!ret)
1372 return 0; 1487 return 0;
1373 1488
1374 ret = add_lock_to_list(next->class, prev->class, 1489 ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
1375 &next->class->locks_before, next->acquire_ip, distance); 1490 &hlock_class(next)->locks_before,
1491 next->acquire_ip, distance);
1376 if (!ret) 1492 if (!ret)
1377 return 0; 1493 return 0;
1378 1494
1379 /* 1495 /*
1380 * Debugging printouts: 1496 * Debugging printouts:
1381 */ 1497 */
1382 if (verbose(prev->class) || verbose(next->class)) { 1498 if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
1383 graph_unlock(); 1499 graph_unlock();
1384 printk("\n new dependency: "); 1500 printk("\n new dependency: ");
1385 print_lock_name(prev->class); 1501 print_lock_name(hlock_class(prev));
1386 printk(" => "); 1502 printk(" => ");
1387 print_lock_name(next->class); 1503 print_lock_name(hlock_class(next));
1388 printk("\n"); 1504 printk("\n");
1389 dump_stack(); 1505 dump_stack();
1390 return graph_lock(); 1506 return graph_lock();
@@ -1481,7 +1597,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
1481 struct held_lock *hlock, 1597 struct held_lock *hlock,
1482 u64 chain_key) 1598 u64 chain_key)
1483{ 1599{
1484 struct lock_class *class = hlock->class; 1600 struct lock_class *class = hlock_class(hlock);
1485 struct list_head *hash_head = chainhashentry(chain_key); 1601 struct list_head *hash_head = chainhashentry(chain_key);
1486 struct lock_chain *chain; 1602 struct lock_chain *chain;
1487 struct held_lock *hlock_curr, *hlock_next; 1603 struct held_lock *hlock_curr, *hlock_next;
@@ -1554,7 +1670,7 @@ cache_hit:
1554 if (likely(cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { 1670 if (likely(cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
1555 chain->base = cn; 1671 chain->base = cn;
1556 for (j = 0; j < chain->depth - 1; j++, i++) { 1672 for (j = 0; j < chain->depth - 1; j++, i++) {
1557 int lock_id = curr->held_locks[i].class - lock_classes; 1673 int lock_id = curr->held_locks[i].class_idx - 1;
1558 chain_hlocks[chain->base + j] = lock_id; 1674 chain_hlocks[chain->base + j] = lock_id;
1559 } 1675 }
1560 chain_hlocks[chain->base + j] = class - lock_classes; 1676 chain_hlocks[chain->base + j] = class - lock_classes;
@@ -1643,14 +1759,13 @@ static void check_chain_key(struct task_struct *curr)
1643 hlock = curr->held_locks + i; 1759 hlock = curr->held_locks + i;
1644 if (chain_key != hlock->prev_chain_key) { 1760 if (chain_key != hlock->prev_chain_key) {
1645 debug_locks_off(); 1761 debug_locks_off();
1646 printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n", 1762 WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n",
1647 curr->lockdep_depth, i, 1763 curr->lockdep_depth, i,
1648 (unsigned long long)chain_key, 1764 (unsigned long long)chain_key,
1649 (unsigned long long)hlock->prev_chain_key); 1765 (unsigned long long)hlock->prev_chain_key);
1650 WARN_ON(1);
1651 return; 1766 return;
1652 } 1767 }
1653 id = hlock->class - lock_classes; 1768 id = hlock->class_idx - 1;
1654 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) 1769 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
1655 return; 1770 return;
1656 1771
@@ -1662,11 +1777,10 @@ static void check_chain_key(struct task_struct *curr)
1662 } 1777 }
1663 if (chain_key != curr->curr_chain_key) { 1778 if (chain_key != curr->curr_chain_key) {
1664 debug_locks_off(); 1779 debug_locks_off();
1665 printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n", 1780 WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n",
1666 curr->lockdep_depth, i, 1781 curr->lockdep_depth, i,
1667 (unsigned long long)chain_key, 1782 (unsigned long long)chain_key,
1668 (unsigned long long)curr->curr_chain_key); 1783 (unsigned long long)curr->curr_chain_key);
1669 WARN_ON(1);
1670 } 1784 }
1671#endif 1785#endif
1672} 1786}
@@ -1695,7 +1809,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
1695 print_lock(this); 1809 print_lock(this);
1696 1810
1697 printk("{%s} state was registered at:\n", usage_str[prev_bit]); 1811 printk("{%s} state was registered at:\n", usage_str[prev_bit]);
1698 print_stack_trace(this->class->usage_traces + prev_bit, 1); 1812 print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
1699 1813
1700 print_irqtrace_events(curr); 1814 print_irqtrace_events(curr);
1701 printk("\nother info that might help us debug this:\n"); 1815 printk("\nother info that might help us debug this:\n");
@@ -1714,7 +1828,7 @@ static inline int
1714valid_state(struct task_struct *curr, struct held_lock *this, 1828valid_state(struct task_struct *curr, struct held_lock *this,
1715 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) 1829 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
1716{ 1830{
1717 if (unlikely(this->class->usage_mask & (1 << bad_bit))) 1831 if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit)))
1718 return print_usage_bug(curr, this, bad_bit, new_bit); 1832 return print_usage_bug(curr, this, bad_bit, new_bit);
1719 return 1; 1833 return 1;
1720} 1834}
@@ -1753,7 +1867,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1753 lockdep_print_held_locks(curr); 1867 lockdep_print_held_locks(curr);
1754 1868
1755 printk("\nthe first lock's dependencies:\n"); 1869 printk("\nthe first lock's dependencies:\n");
1756 print_lock_dependencies(this->class, 0); 1870 print_lock_dependencies(hlock_class(this), 0);
1757 1871
1758 printk("\nthe second lock's dependencies:\n"); 1872 printk("\nthe second lock's dependencies:\n");
1759 print_lock_dependencies(other, 0); 1873 print_lock_dependencies(other, 0);
@@ -1776,7 +1890,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
1776 1890
1777 find_usage_bit = bit; 1891 find_usage_bit = bit;
1778 /* fills in <forwards_match> */ 1892 /* fills in <forwards_match> */
1779 ret = find_usage_forwards(this->class, 0); 1893 ret = find_usage_forwards(hlock_class(this), 0);
1780 if (!ret || ret == 1) 1894 if (!ret || ret == 1)
1781 return ret; 1895 return ret;
1782 1896
@@ -1795,7 +1909,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
1795 1909
1796 find_usage_bit = bit; 1910 find_usage_bit = bit;
1797 /* fills in <backwards_match> */ 1911 /* fills in <backwards_match> */
1798 ret = find_usage_backwards(this->class, 0); 1912 ret = find_usage_backwards(hlock_class(this), 0);
1799 if (!ret || ret == 1) 1913 if (!ret || ret == 1)
1800 return ret; 1914 return ret;
1801 1915
@@ -1861,7 +1975,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1861 LOCK_ENABLED_HARDIRQS_READ, "hard-read")) 1975 LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
1862 return 0; 1976 return 0;
1863#endif 1977#endif
1864 if (hardirq_verbose(this->class)) 1978 if (hardirq_verbose(hlock_class(this)))
1865 ret = 2; 1979 ret = 2;
1866 break; 1980 break;
1867 case LOCK_USED_IN_SOFTIRQ: 1981 case LOCK_USED_IN_SOFTIRQ:
@@ -1886,7 +2000,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1886 LOCK_ENABLED_SOFTIRQS_READ, "soft-read")) 2000 LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
1887 return 0; 2001 return 0;
1888#endif 2002#endif
1889 if (softirq_verbose(this->class)) 2003 if (softirq_verbose(hlock_class(this)))
1890 ret = 2; 2004 ret = 2;
1891 break; 2005 break;
1892 case LOCK_USED_IN_HARDIRQ_READ: 2006 case LOCK_USED_IN_HARDIRQ_READ:
@@ -1899,7 +2013,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1899 if (!check_usage_forwards(curr, this, 2013 if (!check_usage_forwards(curr, this,
1900 LOCK_ENABLED_HARDIRQS, "hard")) 2014 LOCK_ENABLED_HARDIRQS, "hard"))
1901 return 0; 2015 return 0;
1902 if (hardirq_verbose(this->class)) 2016 if (hardirq_verbose(hlock_class(this)))
1903 ret = 2; 2017 ret = 2;
1904 break; 2018 break;
1905 case LOCK_USED_IN_SOFTIRQ_READ: 2019 case LOCK_USED_IN_SOFTIRQ_READ:
@@ -1912,7 +2026,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1912 if (!check_usage_forwards(curr, this, 2026 if (!check_usage_forwards(curr, this,
1913 LOCK_ENABLED_SOFTIRQS, "soft")) 2027 LOCK_ENABLED_SOFTIRQS, "soft"))
1914 return 0; 2028 return 0;
1915 if (softirq_verbose(this->class)) 2029 if (softirq_verbose(hlock_class(this)))
1916 ret = 2; 2030 ret = 2;
1917 break; 2031 break;
1918 case LOCK_ENABLED_HARDIRQS: 2032 case LOCK_ENABLED_HARDIRQS:
@@ -1938,7 +2052,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1938 LOCK_USED_IN_HARDIRQ_READ, "hard-read")) 2052 LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
1939 return 0; 2053 return 0;
1940#endif 2054#endif
1941 if (hardirq_verbose(this->class)) 2055 if (hardirq_verbose(hlock_class(this)))
1942 ret = 2; 2056 ret = 2;
1943 break; 2057 break;
1944 case LOCK_ENABLED_SOFTIRQS: 2058 case LOCK_ENABLED_SOFTIRQS:
@@ -1964,7 +2078,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1964 LOCK_USED_IN_SOFTIRQ_READ, "soft-read")) 2078 LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
1965 return 0; 2079 return 0;
1966#endif 2080#endif
1967 if (softirq_verbose(this->class)) 2081 if (softirq_verbose(hlock_class(this)))
1968 ret = 2; 2082 ret = 2;
1969 break; 2083 break;
1970 case LOCK_ENABLED_HARDIRQS_READ: 2084 case LOCK_ENABLED_HARDIRQS_READ:
@@ -1979,7 +2093,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1979 LOCK_USED_IN_HARDIRQ, "hard")) 2093 LOCK_USED_IN_HARDIRQ, "hard"))
1980 return 0; 2094 return 0;
1981#endif 2095#endif
1982 if (hardirq_verbose(this->class)) 2096 if (hardirq_verbose(hlock_class(this)))
1983 ret = 2; 2097 ret = 2;
1984 break; 2098 break;
1985 case LOCK_ENABLED_SOFTIRQS_READ: 2099 case LOCK_ENABLED_SOFTIRQS_READ:
@@ -1994,7 +2108,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1994 LOCK_USED_IN_SOFTIRQ, "soft")) 2108 LOCK_USED_IN_SOFTIRQ, "soft"))
1995 return 0; 2109 return 0;
1996#endif 2110#endif
1997 if (softirq_verbose(this->class)) 2111 if (softirq_verbose(hlock_class(this)))
1998 ret = 2; 2112 ret = 2;
1999 break; 2113 break;
2000 default: 2114 default:
@@ -2310,7 +2424,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2310 * If already set then do not dirty the cacheline, 2424 * If already set then do not dirty the cacheline,
2311 * nor do any checks: 2425 * nor do any checks:
2312 */ 2426 */
2313 if (likely(this->class->usage_mask & new_mask)) 2427 if (likely(hlock_class(this)->usage_mask & new_mask))
2314 return 1; 2428 return 1;
2315 2429
2316 if (!graph_lock()) 2430 if (!graph_lock())
@@ -2318,14 +2432,14 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2318 /* 2432 /*
2319 * Make sure we didnt race: 2433 * Make sure we didnt race:
2320 */ 2434 */
2321 if (unlikely(this->class->usage_mask & new_mask)) { 2435 if (unlikely(hlock_class(this)->usage_mask & new_mask)) {
2322 graph_unlock(); 2436 graph_unlock();
2323 return 1; 2437 return 1;
2324 } 2438 }
2325 2439
2326 this->class->usage_mask |= new_mask; 2440 hlock_class(this)->usage_mask |= new_mask;
2327 2441
2328 if (!save_trace(this->class->usage_traces + new_bit)) 2442 if (!save_trace(hlock_class(this)->usage_traces + new_bit))
2329 return 0; 2443 return 0;
2330 2444
2331 switch (new_bit) { 2445 switch (new_bit) {
@@ -2405,7 +2519,7 @@ EXPORT_SYMBOL_GPL(lockdep_init_map);
2405 */ 2519 */
2406static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, 2520static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2407 int trylock, int read, int check, int hardirqs_off, 2521 int trylock, int read, int check, int hardirqs_off,
2408 unsigned long ip) 2522 struct lockdep_map *nest_lock, unsigned long ip)
2409{ 2523{
2410 struct task_struct *curr = current; 2524 struct task_struct *curr = current;
2411 struct lock_class *class = NULL; 2525 struct lock_class *class = NULL;
@@ -2459,14 +2573,16 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2459 return 0; 2573 return 0;
2460 2574
2461 hlock = curr->held_locks + depth; 2575 hlock = curr->held_locks + depth;
2462 2576 if (DEBUG_LOCKS_WARN_ON(!class))
2463 hlock->class = class; 2577 return 0;
2578 hlock->class_idx = class - lock_classes + 1;
2464 hlock->acquire_ip = ip; 2579 hlock->acquire_ip = ip;
2465 hlock->instance = lock; 2580 hlock->instance = lock;
2581 hlock->nest_lock = nest_lock;
2466 hlock->trylock = trylock; 2582 hlock->trylock = trylock;
2467 hlock->read = read; 2583 hlock->read = read;
2468 hlock->check = check; 2584 hlock->check = check;
2469 hlock->hardirqs_off = hardirqs_off; 2585 hlock->hardirqs_off = !!hardirqs_off;
2470#ifdef CONFIG_LOCK_STAT 2586#ifdef CONFIG_LOCK_STAT
2471 hlock->waittime_stamp = 0; 2587 hlock->waittime_stamp = 0;
2472 hlock->holdtime_stamp = sched_clock(); 2588 hlock->holdtime_stamp = sched_clock();
@@ -2574,6 +2690,55 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
2574 return 1; 2690 return 1;
2575} 2691}
2576 2692
2693static int
2694__lock_set_subclass(struct lockdep_map *lock,
2695 unsigned int subclass, unsigned long ip)
2696{
2697 struct task_struct *curr = current;
2698 struct held_lock *hlock, *prev_hlock;
2699 struct lock_class *class;
2700 unsigned int depth;
2701 int i;
2702
2703 depth = curr->lockdep_depth;
2704 if (DEBUG_LOCKS_WARN_ON(!depth))
2705 return 0;
2706
2707 prev_hlock = NULL;
2708 for (i = depth-1; i >= 0; i--) {
2709 hlock = curr->held_locks + i;
2710 /*
2711 * We must not cross into another context:
2712 */
2713 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
2714 break;
2715 if (hlock->instance == lock)
2716 goto found_it;
2717 prev_hlock = hlock;
2718 }
2719 return print_unlock_inbalance_bug(curr, lock, ip);
2720
2721found_it:
2722 class = register_lock_class(lock, subclass, 0);
2723 hlock->class_idx = class - lock_classes + 1;
2724
2725 curr->lockdep_depth = i;
2726 curr->curr_chain_key = hlock->prev_chain_key;
2727
2728 for (; i < depth; i++) {
2729 hlock = curr->held_locks + i;
2730 if (!__lock_acquire(hlock->instance,
2731 hlock_class(hlock)->subclass, hlock->trylock,
2732 hlock->read, hlock->check, hlock->hardirqs_off,
2733 hlock->nest_lock, hlock->acquire_ip))
2734 return 0;
2735 }
2736
2737 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
2738 return 0;
2739 return 1;
2740}
2741
2577/* 2742/*
2578 * Remove the lock to the list of currently held locks in a 2743 * Remove the lock to the list of currently held locks in a
2579 * potentially non-nested (out of order) manner. This is a 2744 * potentially non-nested (out of order) manner. This is a
@@ -2624,9 +2789,9 @@ found_it:
2624 for (i++; i < depth; i++) { 2789 for (i++; i < depth; i++) {
2625 hlock = curr->held_locks + i; 2790 hlock = curr->held_locks + i;
2626 if (!__lock_acquire(hlock->instance, 2791 if (!__lock_acquire(hlock->instance,
2627 hlock->class->subclass, hlock->trylock, 2792 hlock_class(hlock)->subclass, hlock->trylock,
2628 hlock->read, hlock->check, hlock->hardirqs_off, 2793 hlock->read, hlock->check, hlock->hardirqs_off,
2629 hlock->acquire_ip)) 2794 hlock->nest_lock, hlock->acquire_ip))
2630 return 0; 2795 return 0;
2631 } 2796 }
2632 2797
@@ -2669,7 +2834,7 @@ static int lock_release_nested(struct task_struct *curr,
2669 2834
2670#ifdef CONFIG_DEBUG_LOCKDEP 2835#ifdef CONFIG_DEBUG_LOCKDEP
2671 hlock->prev_chain_key = 0; 2836 hlock->prev_chain_key = 0;
2672 hlock->class = NULL; 2837 hlock->class_idx = 0;
2673 hlock->acquire_ip = 0; 2838 hlock->acquire_ip = 0;
2674 hlock->irq_context = 0; 2839 hlock->irq_context = 0;
2675#endif 2840#endif
@@ -2738,18 +2903,36 @@ static void check_flags(unsigned long flags)
2738#endif 2903#endif
2739} 2904}
2740 2905
2906void
2907lock_set_subclass(struct lockdep_map *lock,
2908 unsigned int subclass, unsigned long ip)
2909{
2910 unsigned long flags;
2911
2912 if (unlikely(current->lockdep_recursion))
2913 return;
2914
2915 raw_local_irq_save(flags);
2916 current->lockdep_recursion = 1;
2917 check_flags(flags);
2918 if (__lock_set_subclass(lock, subclass, ip))
2919 check_chain_key(current);
2920 current->lockdep_recursion = 0;
2921 raw_local_irq_restore(flags);
2922}
2923
2924EXPORT_SYMBOL_GPL(lock_set_subclass);
2925
2741/* 2926/*
2742 * We are not always called with irqs disabled - do that here, 2927 * We are not always called with irqs disabled - do that here,
2743 * and also avoid lockdep recursion: 2928 * and also avoid lockdep recursion:
2744 */ 2929 */
2745void lock_acquire(struct lockdep_map *lock, unsigned int subclass, 2930void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2746 int trylock, int read, int check, unsigned long ip) 2931 int trylock, int read, int check,
2932 struct lockdep_map *nest_lock, unsigned long ip)
2747{ 2933{
2748 unsigned long flags; 2934 unsigned long flags;
2749 2935
2750 if (unlikely(!lock_stat && !prove_locking))
2751 return;
2752
2753 if (unlikely(current->lockdep_recursion)) 2936 if (unlikely(current->lockdep_recursion))
2754 return; 2937 return;
2755 2938
@@ -2758,7 +2941,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2758 2941
2759 current->lockdep_recursion = 1; 2942 current->lockdep_recursion = 1;
2760 __lock_acquire(lock, subclass, trylock, read, check, 2943 __lock_acquire(lock, subclass, trylock, read, check,
2761 irqs_disabled_flags(flags), ip); 2944 irqs_disabled_flags(flags), nest_lock, ip);
2762 current->lockdep_recursion = 0; 2945 current->lockdep_recursion = 0;
2763 raw_local_irq_restore(flags); 2946 raw_local_irq_restore(flags);
2764} 2947}
@@ -2770,9 +2953,6 @@ void lock_release(struct lockdep_map *lock, int nested,
2770{ 2953{
2771 unsigned long flags; 2954 unsigned long flags;
2772 2955
2773 if (unlikely(!lock_stat && !prove_locking))
2774 return;
2775
2776 if (unlikely(current->lockdep_recursion)) 2956 if (unlikely(current->lockdep_recursion))
2777 return; 2957 return;
2778 2958
@@ -2845,11 +3025,11 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
2845found_it: 3025found_it:
2846 hlock->waittime_stamp = sched_clock(); 3026 hlock->waittime_stamp = sched_clock();
2847 3027
2848 point = lock_contention_point(hlock->class, ip); 3028 point = lock_contention_point(hlock_class(hlock), ip);
2849 3029
2850 stats = get_lock_stats(hlock->class); 3030 stats = get_lock_stats(hlock_class(hlock));
2851 if (point < ARRAY_SIZE(stats->contention_point)) 3031 if (point < ARRAY_SIZE(stats->contention_point))
2852 stats->contention_point[i]++; 3032 stats->contention_point[point]++;
2853 if (lock->cpu != smp_processor_id()) 3033 if (lock->cpu != smp_processor_id())
2854 stats->bounces[bounce_contended + !!hlock->read]++; 3034 stats->bounces[bounce_contended + !!hlock->read]++;
2855 put_lock_stats(stats); 3035 put_lock_stats(stats);
@@ -2893,7 +3073,7 @@ found_it:
2893 hlock->holdtime_stamp = now; 3073 hlock->holdtime_stamp = now;
2894 } 3074 }
2895 3075
2896 stats = get_lock_stats(hlock->class); 3076 stats = get_lock_stats(hlock_class(hlock));
2897 if (waittime) { 3077 if (waittime) {
2898 if (hlock->read) 3078 if (hlock->read)
2899 lock_time_inc(&stats->read_waittime, waittime); 3079 lock_time_inc(&stats->read_waittime, waittime);
@@ -2988,6 +3168,7 @@ static void zap_class(struct lock_class *class)
2988 list_del_rcu(&class->hash_entry); 3168 list_del_rcu(&class->hash_entry);
2989 list_del_rcu(&class->lock_entry); 3169 list_del_rcu(&class->lock_entry);
2990 3170
3171 class->key = NULL;
2991} 3172}
2992 3173
2993static inline int within(const void *addr, void *start, unsigned long size) 3174static inline int within(const void *addr, void *start, unsigned long size)
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index c3600a091a28..56b196932c08 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -17,9 +17,6 @@
17 */ 17 */
18#define MAX_LOCKDEP_ENTRIES 8192UL 18#define MAX_LOCKDEP_ENTRIES 8192UL
19 19
20#define MAX_LOCKDEP_KEYS_BITS 11
21#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
22
23#define MAX_LOCKDEP_CHAINS_BITS 14 20#define MAX_LOCKDEP_CHAINS_BITS 14
24#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) 21#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
25 22
@@ -53,6 +50,22 @@ extern unsigned int nr_process_chains;
53extern unsigned int max_lockdep_depth; 50extern unsigned int max_lockdep_depth;
54extern unsigned int max_recursion_depth; 51extern unsigned int max_recursion_depth;
55 52
53#ifdef CONFIG_PROVE_LOCKING
54extern unsigned long lockdep_count_forward_deps(struct lock_class *);
55extern unsigned long lockdep_count_backward_deps(struct lock_class *);
56#else
57static inline unsigned long
58lockdep_count_forward_deps(struct lock_class *class)
59{
60 return 0;
61}
62static inline unsigned long
63lockdep_count_backward_deps(struct lock_class *class)
64{
65 return 0;
66}
67#endif
68
56#ifdef CONFIG_DEBUG_LOCKDEP 69#ifdef CONFIG_DEBUG_LOCKDEP
57/* 70/*
58 * Various lockdep statistics: 71 * Various lockdep statistics:
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 9b0e940e2545..20dbcbf9c7dd 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -63,34 +63,6 @@ static void l_stop(struct seq_file *m, void *v)
63{ 63{
64} 64}
65 65
66static unsigned long count_forward_deps(struct lock_class *class)
67{
68 struct lock_list *entry;
69 unsigned long ret = 1;
70
71 /*
72 * Recurse this class's dependency list:
73 */
74 list_for_each_entry(entry, &class->locks_after, entry)
75 ret += count_forward_deps(entry->class);
76
77 return ret;
78}
79
80static unsigned long count_backward_deps(struct lock_class *class)
81{
82 struct lock_list *entry;
83 unsigned long ret = 1;
84
85 /*
86 * Recurse this class's dependency list:
87 */
88 list_for_each_entry(entry, &class->locks_before, entry)
89 ret += count_backward_deps(entry->class);
90
91 return ret;
92}
93
94static void print_name(struct seq_file *m, struct lock_class *class) 66static void print_name(struct seq_file *m, struct lock_class *class)
95{ 67{
96 char str[128]; 68 char str[128];
@@ -110,7 +82,6 @@ static void print_name(struct seq_file *m, struct lock_class *class)
110 82
111static int l_show(struct seq_file *m, void *v) 83static int l_show(struct seq_file *m, void *v)
112{ 84{
113 unsigned long nr_forward_deps, nr_backward_deps;
114 struct lock_class *class = v; 85 struct lock_class *class = v;
115 struct lock_list *entry; 86 struct lock_list *entry;
116 char c1, c2, c3, c4; 87 char c1, c2, c3, c4;
@@ -124,11 +95,10 @@ static int l_show(struct seq_file *m, void *v)
124#ifdef CONFIG_DEBUG_LOCKDEP 95#ifdef CONFIG_DEBUG_LOCKDEP
125 seq_printf(m, " OPS:%8ld", class->ops); 96 seq_printf(m, " OPS:%8ld", class->ops);
126#endif 97#endif
127 nr_forward_deps = count_forward_deps(class); 98#ifdef CONFIG_PROVE_LOCKING
128 seq_printf(m, " FD:%5ld", nr_forward_deps); 99 seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class));
129 100 seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class));
130 nr_backward_deps = count_backward_deps(class); 101#endif
131 seq_printf(m, " BD:%5ld", nr_backward_deps);
132 102
133 get_usage_chars(class, &c1, &c2, &c3, &c4); 103 get_usage_chars(class, &c1, &c2, &c3, &c4);
134 seq_printf(m, " %c%c%c%c", c1, c2, c3, c4); 104 seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
@@ -229,6 +199,9 @@ static int lc_show(struct seq_file *m, void *v)
229 199
230 for (i = 0; i < chain->depth; i++) { 200 for (i = 0; i < chain->depth; i++) {
231 class = lock_chain_get_class(chain, i); 201 class = lock_chain_get_class(chain, i);
202 if (!class->key)
203 continue;
204
232 seq_printf(m, "[%p] ", class->key); 205 seq_printf(m, "[%p] ", class->key);
233 print_name(m, class); 206 print_name(m, class);
234 seq_puts(m, "\n"); 207 seq_puts(m, "\n");
@@ -350,7 +323,9 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
350 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 323 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
351 nr_hardirq_read_unsafe++; 324 nr_hardirq_read_unsafe++;
352 325
353 sum_forward_deps += count_forward_deps(class); 326#ifdef CONFIG_PROVE_LOCKING
327 sum_forward_deps += lockdep_count_forward_deps(class);
328#endif
354 } 329 }
355#ifdef CONFIG_DEBUG_LOCKDEP 330#ifdef CONFIG_DEBUG_LOCKDEP
356 DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused); 331 DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
@@ -497,8 +472,9 @@ static void snprint_time(char *buf, size_t bufsiz, s64 nr)
497{ 472{
498 unsigned long rem; 473 unsigned long rem;
499 474
475 nr += 5; /* for display rounding */
500 rem = do_div(nr, 1000); /* XXX: do_div_signed */ 476 rem = do_div(nr, 1000); /* XXX: do_div_signed */
501 snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10); 477 snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10);
502} 478}
503 479
504static void seq_time(struct seq_file *m, s64 time) 480static void seq_time(struct seq_file *m, s64 time)
diff --git a/kernel/module.c b/kernel/module.c
index 61d212120df4..9db11911e04b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1799,7 +1799,7 @@ static void *module_alloc_update_bounds(unsigned long size)
1799 1799
1800/* Allocate and load the module: note that size of section 0 is always 1800/* Allocate and load the module: note that size of section 0 is always
1801 zero, and we rely on this for optional sections. */ 1801 zero, and we rely on this for optional sections. */
1802static struct module *load_module(void __user *umod, 1802static noinline struct module *load_module(void __user *umod,
1803 unsigned long len, 1803 unsigned long len,
1804 const char __user *uargs) 1804 const char __user *uargs)
1805{ 1805{
@@ -2288,7 +2288,7 @@ sys_init_module(void __user *umod,
2288 2288
2289 /* Start the module */ 2289 /* Start the module */
2290 if (mod->init != NULL) 2290 if (mod->init != NULL)
2291 ret = mod->init(); 2291 ret = do_one_initcall(mod->init);
2292 if (ret < 0) { 2292 if (ret < 0) {
2293 /* Init routine failed: abort. Try to protect us from 2293 /* Init routine failed: abort. Try to protect us from
2294 buggy refcounters. */ 2294 buggy refcounters. */
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 21575fc46d05..1d3ef29a2583 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -14,7 +14,6 @@
14 */ 14 */
15 15
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/version.h>
18#include <linux/nsproxy.h> 17#include <linux/nsproxy.h>
19#include <linux/init_task.h> 18#include <linux/init_task.h>
20#include <linux/mnt_namespace.h> 19#include <linux/mnt_namespace.h>
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index ea567b78d1aa..fab8ea86fac3 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -179,9 +179,6 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
179 rc = sys_wait4(-1, NULL, __WALL, NULL); 179 rc = sys_wait4(-1, NULL, __WALL, NULL);
180 } while (rc != -ECHILD); 180 } while (rc != -ECHILD);
181 181
182
183 /* Child reaper for the pid namespace is going away */
184 pid_ns->child_reaper = NULL;
185 acct_exit_ns(pid_ns); 182 acct_exit_ns(pid_ns);
186 return; 183 return;
187} 184}
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index da9c2dda6a4e..dfdec524d1b7 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -43,7 +43,7 @@
43#include <linux/uaccess.h> 43#include <linux/uaccess.h>
44 44
45/* 45/*
46 * locking rule: all changes to target_value or requirements or notifiers lists 46 * locking rule: all changes to requirements or notifiers lists
47 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock 47 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
48 * held, taken with _irqsave. One lock to rule them all 48 * held, taken with _irqsave. One lock to rule them all
49 */ 49 */
@@ -66,7 +66,7 @@ struct pm_qos_object {
66 struct miscdevice pm_qos_power_miscdev; 66 struct miscdevice pm_qos_power_miscdev;
67 char *name; 67 char *name;
68 s32 default_value; 68 s32 default_value;
69 s32 target_value; 69 atomic_t target_value;
70 s32 (*comparitor)(s32, s32); 70 s32 (*comparitor)(s32, s32);
71}; 71};
72 72
@@ -77,7 +77,7 @@ static struct pm_qos_object cpu_dma_pm_qos = {
77 .notifiers = &cpu_dma_lat_notifier, 77 .notifiers = &cpu_dma_lat_notifier,
78 .name = "cpu_dma_latency", 78 .name = "cpu_dma_latency",
79 .default_value = 2000 * USEC_PER_SEC, 79 .default_value = 2000 * USEC_PER_SEC,
80 .target_value = 2000 * USEC_PER_SEC, 80 .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
81 .comparitor = min_compare 81 .comparitor = min_compare
82}; 82};
83 83
@@ -87,7 +87,7 @@ static struct pm_qos_object network_lat_pm_qos = {
87 .notifiers = &network_lat_notifier, 87 .notifiers = &network_lat_notifier,
88 .name = "network_latency", 88 .name = "network_latency",
89 .default_value = 2000 * USEC_PER_SEC, 89 .default_value = 2000 * USEC_PER_SEC,
90 .target_value = 2000 * USEC_PER_SEC, 90 .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
91 .comparitor = min_compare 91 .comparitor = min_compare
92}; 92};
93 93
@@ -99,7 +99,7 @@ static struct pm_qos_object network_throughput_pm_qos = {
99 .notifiers = &network_throughput_notifier, 99 .notifiers = &network_throughput_notifier,
100 .name = "network_throughput", 100 .name = "network_throughput",
101 .default_value = 0, 101 .default_value = 0,
102 .target_value = 0, 102 .target_value = ATOMIC_INIT(0),
103 .comparitor = max_compare 103 .comparitor = max_compare
104}; 104};
105 105
@@ -150,11 +150,11 @@ static void update_target(int target)
150 extreme_value = pm_qos_array[target]->comparitor( 150 extreme_value = pm_qos_array[target]->comparitor(
151 extreme_value, node->value); 151 extreme_value, node->value);
152 } 152 }
153 if (pm_qos_array[target]->target_value != extreme_value) { 153 if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) {
154 call_notifier = 1; 154 call_notifier = 1;
155 pm_qos_array[target]->target_value = extreme_value; 155 atomic_set(&pm_qos_array[target]->target_value, extreme_value);
156 pr_debug(KERN_ERR "new target for qos %d is %d\n", target, 156 pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
157 pm_qos_array[target]->target_value); 157 atomic_read(&pm_qos_array[target]->target_value));
158 } 158 }
159 spin_unlock_irqrestore(&pm_qos_lock, flags); 159 spin_unlock_irqrestore(&pm_qos_lock, flags);
160 160
@@ -193,14 +193,7 @@ static int find_pm_qos_object_by_minor(int minor)
193 */ 193 */
194int pm_qos_requirement(int pm_qos_class) 194int pm_qos_requirement(int pm_qos_class)
195{ 195{
196 int ret_val; 196 return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
197 unsigned long flags;
198
199 spin_lock_irqsave(&pm_qos_lock, flags);
200 ret_val = pm_qos_array[pm_qos_class]->target_value;
201 spin_unlock_irqrestore(&pm_qos_lock, flags);
202
203 return ret_val;
204} 197}
205EXPORT_SYMBOL_GPL(pm_qos_requirement); 198EXPORT_SYMBOL_GPL(pm_qos_requirement);
206 199
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 9a21681aa80f..e36d5798cbff 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -289,21 +289,29 @@ void do_schedule_next_timer(struct siginfo *info)
289 else 289 else
290 schedule_next_timer(timr); 290 schedule_next_timer(timr);
291 291
292 info->si_overrun = timr->it_overrun_last; 292 info->si_overrun += timr->it_overrun_last;
293 } 293 }
294 294
295 if (timr) 295 if (timr)
296 unlock_timer(timr, flags); 296 unlock_timer(timr, flags);
297} 297}
298 298
299int posix_timer_event(struct k_itimer *timr,int si_private) 299int posix_timer_event(struct k_itimer *timr, int si_private)
300{ 300{
301 memset(&timr->sigq->info, 0, sizeof(siginfo_t)); 301 /*
302 * FIXME: if ->sigq is queued we can race with
303 * dequeue_signal()->do_schedule_next_timer().
304 *
305 * If dequeue_signal() sees the "right" value of
306 * si_sys_private it calls do_schedule_next_timer().
307 * We re-queue ->sigq and drop ->it_lock().
308 * do_schedule_next_timer() locks the timer
309 * and re-schedules it while ->sigq is pending.
310 * Not really bad, but not that we want.
311 */
302 timr->sigq->info.si_sys_private = si_private; 312 timr->sigq->info.si_sys_private = si_private;
303 /* Send signal to the process that owns this timer.*/
304 313
305 timr->sigq->info.si_signo = timr->it_sigev_signo; 314 timr->sigq->info.si_signo = timr->it_sigev_signo;
306 timr->sigq->info.si_errno = 0;
307 timr->sigq->info.si_code = SI_TIMER; 315 timr->sigq->info.si_code = SI_TIMER;
308 timr->sigq->info.si_tid = timr->it_id; 316 timr->sigq->info.si_tid = timr->it_id;
309 timr->sigq->info.si_value = timr->it_sigev_value; 317 timr->sigq->info.si_value = timr->it_sigev_value;
@@ -435,6 +443,7 @@ static struct k_itimer * alloc_posix_timer(void)
435 kmem_cache_free(posix_timers_cache, tmr); 443 kmem_cache_free(posix_timers_cache, tmr);
436 tmr = NULL; 444 tmr = NULL;
437 } 445 }
446 memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
438 return tmr; 447 return tmr;
439} 448}
440 449
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index f011e0870b52..bbd85c60f741 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -21,6 +21,7 @@
21#include <linux/console.h> 21#include <linux/console.h>
22#include <linux/cpu.h> 22#include <linux/cpu.h>
23#include <linux/freezer.h> 23#include <linux/freezer.h>
24#include <linux/ftrace.h>
24 25
25#include "power.h" 26#include "power.h"
26 27
@@ -255,7 +256,7 @@ static int create_image(int platform_mode)
255 256
256int hibernation_snapshot(int platform_mode) 257int hibernation_snapshot(int platform_mode)
257{ 258{
258 int error; 259 int error, ftrace_save;
259 260
260 /* Free memory before shutting down devices. */ 261 /* Free memory before shutting down devices. */
261 error = swsusp_shrink_memory(); 262 error = swsusp_shrink_memory();
@@ -267,6 +268,7 @@ int hibernation_snapshot(int platform_mode)
267 goto Close; 268 goto Close;
268 269
269 suspend_console(); 270 suspend_console();
271 ftrace_save = __ftrace_enabled_save();
270 error = device_suspend(PMSG_FREEZE); 272 error = device_suspend(PMSG_FREEZE);
271 if (error) 273 if (error)
272 goto Recover_platform; 274 goto Recover_platform;
@@ -296,6 +298,7 @@ int hibernation_snapshot(int platform_mode)
296 Resume_devices: 298 Resume_devices:
297 device_resume(in_suspend ? 299 device_resume(in_suspend ?
298 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 300 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
301 __ftrace_enabled_restore(ftrace_save);
299 resume_console(); 302 resume_console();
300 Close: 303 Close:
301 platform_end(platform_mode); 304 platform_end(platform_mode);
@@ -366,10 +369,11 @@ static int resume_target_kernel(void)
366 369
367int hibernation_restore(int platform_mode) 370int hibernation_restore(int platform_mode)
368{ 371{
369 int error; 372 int error, ftrace_save;
370 373
371 pm_prepare_console(); 374 pm_prepare_console();
372 suspend_console(); 375 suspend_console();
376 ftrace_save = __ftrace_enabled_save();
373 error = device_suspend(PMSG_QUIESCE); 377 error = device_suspend(PMSG_QUIESCE);
374 if (error) 378 if (error)
375 goto Finish; 379 goto Finish;
@@ -384,6 +388,7 @@ int hibernation_restore(int platform_mode)
384 platform_restore_cleanup(platform_mode); 388 platform_restore_cleanup(platform_mode);
385 device_resume(PMSG_RECOVER); 389 device_resume(PMSG_RECOVER);
386 Finish: 390 Finish:
391 __ftrace_enabled_restore(ftrace_save);
387 resume_console(); 392 resume_console();
388 pm_restore_console(); 393 pm_restore_console();
389 return error; 394 return error;
@@ -396,7 +401,7 @@ int hibernation_restore(int platform_mode)
396 401
397int hibernation_platform_enter(void) 402int hibernation_platform_enter(void)
398{ 403{
399 int error; 404 int error, ftrace_save;
400 405
401 if (!hibernation_ops) 406 if (!hibernation_ops)
402 return -ENOSYS; 407 return -ENOSYS;
@@ -411,6 +416,7 @@ int hibernation_platform_enter(void)
411 goto Close; 416 goto Close;
412 417
413 suspend_console(); 418 suspend_console();
419 ftrace_save = __ftrace_enabled_save();
414 error = device_suspend(PMSG_HIBERNATE); 420 error = device_suspend(PMSG_HIBERNATE);
415 if (error) { 421 if (error) {
416 if (hibernation_ops->recover) 422 if (hibernation_ops->recover)
@@ -445,6 +451,7 @@ int hibernation_platform_enter(void)
445 hibernation_ops->finish(); 451 hibernation_ops->finish();
446 Resume_devices: 452 Resume_devices:
447 device_resume(PMSG_RESTORE); 453 device_resume(PMSG_RESTORE);
454 __ftrace_enabled_restore(ftrace_save);
448 resume_console(); 455 resume_console();
449 Close: 456 Close:
450 hibernation_ops->end(); 457 hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 0b7476f5d2a6..540b16b68565 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -21,6 +21,7 @@
21#include <linux/freezer.h> 21#include <linux/freezer.h>
22#include <linux/vmstat.h> 22#include <linux/vmstat.h>
23#include <linux/syscalls.h> 23#include <linux/syscalls.h>
24#include <linux/ftrace.h>
24 25
25#include "power.h" 26#include "power.h"
26 27
@@ -310,7 +311,7 @@ static int suspend_enter(suspend_state_t state)
310 */ 311 */
311int suspend_devices_and_enter(suspend_state_t state) 312int suspend_devices_and_enter(suspend_state_t state)
312{ 313{
313 int error; 314 int error, ftrace_save;
314 315
315 if (!suspend_ops) 316 if (!suspend_ops)
316 return -ENOSYS; 317 return -ENOSYS;
@@ -321,6 +322,7 @@ int suspend_devices_and_enter(suspend_state_t state)
321 goto Close; 322 goto Close;
322 } 323 }
323 suspend_console(); 324 suspend_console();
325 ftrace_save = __ftrace_enabled_save();
324 suspend_test_start(); 326 suspend_test_start();
325 error = device_suspend(PMSG_SUSPEND); 327 error = device_suspend(PMSG_SUSPEND);
326 if (error) { 328 if (error) {
@@ -352,6 +354,7 @@ int suspend_devices_and_enter(suspend_state_t state)
352 suspend_test_start(); 354 suspend_test_start();
353 device_resume(PMSG_RESUME); 355 device_resume(PMSG_RESUME);
354 suspend_test_finish("resume devices"); 356 suspend_test_finish("resume devices");
357 __ftrace_enabled_restore(ftrace_save);
355 resume_console(); 358 resume_console();
356 Close: 359 Close:
357 if (suspend_ops->end) 360 if (suspend_ops->end)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index a0abf9a463f9..80ccac849e46 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -14,7 +14,6 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/file.h> 15#include <linux/file.h>
16#include <linux/utsname.h> 16#include <linux/utsname.h>
17#include <linux/version.h>
18#include <linux/delay.h> 17#include <linux/delay.h>
19#include <linux/bitops.h> 18#include <linux/bitops.h>
20#include <linux/genhd.h> 19#include <linux/genhd.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 082b3fcb32a0..356699a96d56 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -140,7 +140,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
140 if (!dumpable && !capable(CAP_SYS_PTRACE)) 140 if (!dumpable && !capable(CAP_SYS_PTRACE))
141 return -EPERM; 141 return -EPERM;
142 142
143 return security_ptrace(current, task, mode); 143 return security_ptrace_may_access(task, mode);
144} 144}
145 145
146bool ptrace_may_access(struct task_struct *task, unsigned int mode) 146bool ptrace_may_access(struct task_struct *task, unsigned int mode)
@@ -499,8 +499,7 @@ repeat:
499 goto repeat; 499 goto repeat;
500 } 500 }
501 501
502 ret = security_ptrace(current->parent, current, 502 ret = security_ptrace_traceme(current->parent);
503 PTRACE_MODE_ATTACH);
504 503
505 /* 504 /*
506 * Set the ptrace bit in the process ptrace flags. 505 * Set the ptrace bit in the process ptrace flags.
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f14f372cf6f5..467d5940f624 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -77,6 +77,7 @@ void wakeme_after_rcu(struct rcu_head *head)
77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
78 * and may be nested. 78 * and may be nested.
79 */ 79 */
80void synchronize_rcu(void); /* Makes kernel-doc tools happy */
80synchronize_rcu_xxx(synchronize_rcu, call_rcu) 81synchronize_rcu_xxx(synchronize_rcu, call_rcu)
81EXPORT_SYMBOL_GPL(synchronize_rcu); 82EXPORT_SYMBOL_GPL(synchronize_rcu);
82 83
diff --git a/kernel/resource.c b/kernel/resource.c
index f5b518eabefe..03d796c1b2e9 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -362,35 +362,21 @@ int allocate_resource(struct resource *root, struct resource *new,
362 362
363EXPORT_SYMBOL(allocate_resource); 363EXPORT_SYMBOL(allocate_resource);
364 364
365/** 365/*
366 * insert_resource - Inserts a resource in the resource tree 366 * Insert a resource into the resource tree. If successful, return NULL,
367 * @parent: parent of the new resource 367 * otherwise return the conflicting resource (compare to __request_resource())
368 * @new: new resource to insert
369 *
370 * Returns 0 on success, -EBUSY if the resource can't be inserted.
371 *
372 * This function is equivalent to request_resource when no conflict
373 * happens. If a conflict happens, and the conflicting resources
374 * entirely fit within the range of the new resource, then the new
375 * resource is inserted and the conflicting resources become children of
376 * the new resource.
377 */ 368 */
378int insert_resource(struct resource *parent, struct resource *new) 369static struct resource * __insert_resource(struct resource *parent, struct resource *new)
379{ 370{
380 int result;
381 struct resource *first, *next; 371 struct resource *first, *next;
382 372
383 write_lock(&resource_lock);
384
385 for (;; parent = first) { 373 for (;; parent = first) {
386 result = 0;
387 first = __request_resource(parent, new); 374 first = __request_resource(parent, new);
388 if (!first) 375 if (!first)
389 goto out; 376 return first;
390 377
391 result = -EBUSY;
392 if (first == parent) 378 if (first == parent)
393 goto out; 379 return first;
394 380
395 if ((first->start > new->start) || (first->end < new->end)) 381 if ((first->start > new->start) || (first->end < new->end))
396 break; 382 break;
@@ -401,15 +387,13 @@ int insert_resource(struct resource *parent, struct resource *new)
401 for (next = first; ; next = next->sibling) { 387 for (next = first; ; next = next->sibling) {
402 /* Partial overlap? Bad, and unfixable */ 388 /* Partial overlap? Bad, and unfixable */
403 if (next->start < new->start || next->end > new->end) 389 if (next->start < new->start || next->end > new->end)
404 goto out; 390 return next;
405 if (!next->sibling) 391 if (!next->sibling)
406 break; 392 break;
407 if (next->sibling->start > new->end) 393 if (next->sibling->start > new->end)
408 break; 394 break;
409 } 395 }
410 396
411 result = 0;
412
413 new->parent = parent; 397 new->parent = parent;
414 new->sibling = next->sibling; 398 new->sibling = next->sibling;
415 new->child = first; 399 new->child = first;
@@ -426,10 +410,64 @@ int insert_resource(struct resource *parent, struct resource *new)
426 next = next->sibling; 410 next = next->sibling;
427 next->sibling = new; 411 next->sibling = new;
428 } 412 }
413 return NULL;
414}
429 415
430 out: 416/**
417 * insert_resource - Inserts a resource in the resource tree
418 * @parent: parent of the new resource
419 * @new: new resource to insert
420 *
421 * Returns 0 on success, -EBUSY if the resource can't be inserted.
422 *
423 * This function is equivalent to request_resource when no conflict
424 * happens. If a conflict happens, and the conflicting resources
425 * entirely fit within the range of the new resource, then the new
426 * resource is inserted and the conflicting resources become children of
427 * the new resource.
428 */
429int insert_resource(struct resource *parent, struct resource *new)
430{
431 struct resource *conflict;
432
433 write_lock(&resource_lock);
434 conflict = __insert_resource(parent, new);
435 write_unlock(&resource_lock);
436 return conflict ? -EBUSY : 0;
437}
438
439/**
440 * insert_resource_expand_to_fit - Insert a resource into the resource tree
441 * @root: root resource descriptor
442 * @new: new resource to insert
443 *
444 * Insert a resource into the resource tree, possibly expanding it in order
445 * to make it encompass any conflicting resources.
446 */
447void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
448{
449 if (new->parent)
450 return;
451
452 write_lock(&resource_lock);
453 for (;;) {
454 struct resource *conflict;
455
456 conflict = __insert_resource(root, new);
457 if (!conflict)
458 break;
459 if (conflict == root)
460 break;
461
462 /* Ok, expand resource to cover the conflict, then try again .. */
463 if (conflict->start < new->start)
464 new->start = conflict->start;
465 if (conflict->end > new->end)
466 new->end = conflict->end;
467
468 printk("Expanded resource %s due to conflict with %s\n", new->name, conflict->name);
469 }
431 write_unlock(&resource_lock); 470 write_unlock(&resource_lock);
432 return result;
433} 471}
434 472
435/** 473/**
diff --git a/kernel/sched.c b/kernel/sched.c
index 04160d277e7a..9a1ddb84e26d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -600,7 +600,6 @@ struct rq {
600 /* BKL stats */ 600 /* BKL stats */
601 unsigned int bkl_count; 601 unsigned int bkl_count;
602#endif 602#endif
603 struct lock_class_key rq_lock_key;
604}; 603};
605 604
606static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 605static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -809,9 +808,9 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
809 808
810/* 809/*
811 * ratelimit for updating the group shares. 810 * ratelimit for updating the group shares.
812 * default: 0.5ms 811 * default: 0.25ms
813 */ 812 */
814const_debug unsigned int sysctl_sched_shares_ratelimit = 500000; 813unsigned int sysctl_sched_shares_ratelimit = 250000;
815 814
816/* 815/*
817 * period over which we measure -rt task cpu usage in us. 816 * period over which we measure -rt task cpu usage in us.
@@ -834,7 +833,7 @@ static inline u64 global_rt_period(void)
834 833
835static inline u64 global_rt_runtime(void) 834static inline u64 global_rt_runtime(void)
836{ 835{
837 if (sysctl_sched_rt_period < 0) 836 if (sysctl_sched_rt_runtime < 0)
838 return RUNTIME_INF; 837 return RUNTIME_INF;
839 838
840 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 839 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
@@ -2759,10 +2758,10 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
2759 } else { 2758 } else {
2760 if (rq1 < rq2) { 2759 if (rq1 < rq2) {
2761 spin_lock(&rq1->lock); 2760 spin_lock(&rq1->lock);
2762 spin_lock(&rq2->lock); 2761 spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
2763 } else { 2762 } else {
2764 spin_lock(&rq2->lock); 2763 spin_lock(&rq2->lock);
2765 spin_lock(&rq1->lock); 2764 spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
2766 } 2765 }
2767 } 2766 }
2768 update_rq_clock(rq1); 2767 update_rq_clock(rq1);
@@ -2805,14 +2804,21 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
2805 if (busiest < this_rq) { 2804 if (busiest < this_rq) {
2806 spin_unlock(&this_rq->lock); 2805 spin_unlock(&this_rq->lock);
2807 spin_lock(&busiest->lock); 2806 spin_lock(&busiest->lock);
2808 spin_lock(&this_rq->lock); 2807 spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
2809 ret = 1; 2808 ret = 1;
2810 } else 2809 } else
2811 spin_lock(&busiest->lock); 2810 spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
2812 } 2811 }
2813 return ret; 2812 return ret;
2814} 2813}
2815 2814
2815static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
2816 __releases(busiest->lock)
2817{
2818 spin_unlock(&busiest->lock);
2819 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
2820}
2821
2816/* 2822/*
2817 * If dest_cpu is allowed for this process, migrate the task to it. 2823 * If dest_cpu is allowed for this process, migrate the task to it.
2818 * This is accomplished by forcing the cpu_allowed mask to only 2824 * This is accomplished by forcing the cpu_allowed mask to only
@@ -3637,7 +3643,7 @@ redo:
3637 ld_moved = move_tasks(this_rq, this_cpu, busiest, 3643 ld_moved = move_tasks(this_rq, this_cpu, busiest,
3638 imbalance, sd, CPU_NEWLY_IDLE, 3644 imbalance, sd, CPU_NEWLY_IDLE,
3639 &all_pinned); 3645 &all_pinned);
3640 spin_unlock(&busiest->lock); 3646 double_unlock_balance(this_rq, busiest);
3641 3647
3642 if (unlikely(all_pinned)) { 3648 if (unlikely(all_pinned)) {
3643 cpu_clear(cpu_of(busiest), *cpus); 3649 cpu_clear(cpu_of(busiest), *cpus);
@@ -3752,7 +3758,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3752 else 3758 else
3753 schedstat_inc(sd, alb_failed); 3759 schedstat_inc(sd, alb_failed);
3754 } 3760 }
3755 spin_unlock(&target_rq->lock); 3761 double_unlock_balance(busiest_rq, target_rq);
3756} 3762}
3757 3763
3758#ifdef CONFIG_NO_HZ 3764#ifdef CONFIG_NO_HZ
@@ -4663,6 +4669,52 @@ int __sched wait_for_completion_killable(struct completion *x)
4663} 4669}
4664EXPORT_SYMBOL(wait_for_completion_killable); 4670EXPORT_SYMBOL(wait_for_completion_killable);
4665 4671
4672/**
4673 * try_wait_for_completion - try to decrement a completion without blocking
4674 * @x: completion structure
4675 *
4676 * Returns: 0 if a decrement cannot be done without blocking
4677 * 1 if a decrement succeeded.
4678 *
4679 * If a completion is being used as a counting completion,
4680 * attempt to decrement the counter without blocking. This
4681 * enables us to avoid waiting if the resource the completion
4682 * is protecting is not available.
4683 */
4684bool try_wait_for_completion(struct completion *x)
4685{
4686 int ret = 1;
4687
4688 spin_lock_irq(&x->wait.lock);
4689 if (!x->done)
4690 ret = 0;
4691 else
4692 x->done--;
4693 spin_unlock_irq(&x->wait.lock);
4694 return ret;
4695}
4696EXPORT_SYMBOL(try_wait_for_completion);
4697
4698/**
4699 * completion_done - Test to see if a completion has any waiters
4700 * @x: completion structure
4701 *
4702 * Returns: 0 if there are waiters (wait_for_completion() in progress)
4703 * 1 if there are no waiters.
4704 *
4705 */
4706bool completion_done(struct completion *x)
4707{
4708 int ret = 1;
4709
4710 spin_lock_irq(&x->wait.lock);
4711 if (!x->done)
4712 ret = 0;
4713 spin_unlock_irq(&x->wait.lock);
4714 return ret;
4715}
4716EXPORT_SYMBOL(completion_done);
4717
4666static long __sched 4718static long __sched
4667sleep_on_common(wait_queue_head_t *q, int state, long timeout) 4719sleep_on_common(wait_queue_head_t *q, int state, long timeout)
4668{ 4720{
@@ -5734,6 +5786,8 @@ static inline void sched_init_granularity(void)
5734 sysctl_sched_latency = limit; 5786 sysctl_sched_latency = limit;
5735 5787
5736 sysctl_sched_wakeup_granularity *= factor; 5788 sysctl_sched_wakeup_granularity *= factor;
5789
5790 sysctl_sched_shares_ratelimit *= factor;
5737} 5791}
5738 5792
5739#ifdef CONFIG_SMP 5793#ifdef CONFIG_SMP
@@ -8000,7 +8054,6 @@ void __init sched_init(void)
8000 8054
8001 rq = cpu_rq(i); 8055 rq = cpu_rq(i);
8002 spin_lock_init(&rq->lock); 8056 spin_lock_init(&rq->lock);
8003 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
8004 rq->nr_running = 0; 8057 rq->nr_running = 0;
8005 init_cfs_rq(&rq->cfs, rq); 8058 init_cfs_rq(&rq->cfs, rq);
8006 init_rt_rq(&rq->rt, rq); 8059 init_rt_rq(&rq->rt, rq);
@@ -8457,8 +8510,8 @@ struct task_group *sched_create_group(struct task_group *parent)
8457 WARN_ON(!parent); /* root should already exist */ 8510 WARN_ON(!parent); /* root should already exist */
8458 8511
8459 tg->parent = parent; 8512 tg->parent = parent;
8460 list_add_rcu(&tg->siblings, &parent->children);
8461 INIT_LIST_HEAD(&tg->children); 8513 INIT_LIST_HEAD(&tg->children);
8514 list_add_rcu(&tg->siblings, &parent->children);
8462 spin_unlock_irqrestore(&task_group_lock, flags); 8515 spin_unlock_irqrestore(&task_group_lock, flags);
8463 8516
8464 return tg; 8517 return tg;
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 22ed55d1167f..e8ab096ddfe3 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -12,19 +12,17 @@
12 * 12 *
13 * Create a semi stable clock from a mixture of other events, including: 13 * Create a semi stable clock from a mixture of other events, including:
14 * - gtod 14 * - gtod
15 * - jiffies
16 * - sched_clock() 15 * - sched_clock()
17 * - explicit idle events 16 * - explicit idle events
18 * 17 *
19 * We use gtod as base and the unstable clock deltas. The deltas are filtered, 18 * We use gtod as base and the unstable clock deltas. The deltas are filtered,
20 * making it monotonic and keeping it within an expected window. This window 19 * making it monotonic and keeping it within an expected window.
21 * is set up using jiffies.
22 * 20 *
23 * Furthermore, explicit sleep and wakeup hooks allow us to account for time 21 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
24 * that is otherwise invisible (TSC gets stopped). 22 * that is otherwise invisible (TSC gets stopped).
25 * 23 *
26 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat 24 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
27 * consistent between cpus (never more than 1 jiffies difference). 25 * consistent between cpus (never more than 2 jiffies difference).
28 */ 26 */
29#include <linux/sched.h> 27#include <linux/sched.h>
30#include <linux/percpu.h> 28#include <linux/percpu.h>
@@ -32,13 +30,19 @@
32#include <linux/ktime.h> 30#include <linux/ktime.h>
33#include <linux/module.h> 31#include <linux/module.h>
34 32
33/*
34 * Scheduler clock - returns current time in nanosec units.
35 * This is default implementation.
36 * Architectures and sub-architectures can override this.
37 */
38unsigned long long __attribute__((weak)) sched_clock(void)
39{
40 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
41}
35 42
36#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 43static __read_mostly int sched_clock_running;
37 44
38#define MULTI_SHIFT 15 45#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
39/* Max is double, Min is 1/2 */
40#define MAX_MULTI (2LL << MULTI_SHIFT)
41#define MIN_MULTI (1LL << (MULTI_SHIFT-1))
42 46
43struct sched_clock_data { 47struct sched_clock_data {
44 /* 48 /*
@@ -48,15 +52,9 @@ struct sched_clock_data {
48 */ 52 */
49 raw_spinlock_t lock; 53 raw_spinlock_t lock;
50 54
51 unsigned long tick_jiffies;
52 u64 prev_raw;
53 u64 tick_raw; 55 u64 tick_raw;
54 u64 tick_gtod; 56 u64 tick_gtod;
55 u64 clock; 57 u64 clock;
56 s64 multi;
57#ifdef CONFIG_NO_HZ
58 int check_max;
59#endif
60}; 58};
61 59
62static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); 60static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
@@ -71,121 +69,69 @@ static inline struct sched_clock_data *cpu_sdc(int cpu)
71 return &per_cpu(sched_clock_data, cpu); 69 return &per_cpu(sched_clock_data, cpu);
72} 70}
73 71
74static __read_mostly int sched_clock_running;
75
76void sched_clock_init(void) 72void sched_clock_init(void)
77{ 73{
78 u64 ktime_now = ktime_to_ns(ktime_get()); 74 u64 ktime_now = ktime_to_ns(ktime_get());
79 unsigned long now_jiffies = jiffies;
80 int cpu; 75 int cpu;
81 76
82 for_each_possible_cpu(cpu) { 77 for_each_possible_cpu(cpu) {
83 struct sched_clock_data *scd = cpu_sdc(cpu); 78 struct sched_clock_data *scd = cpu_sdc(cpu);
84 79
85 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 80 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
86 scd->tick_jiffies = now_jiffies;
87 scd->prev_raw = 0;
88 scd->tick_raw = 0; 81 scd->tick_raw = 0;
89 scd->tick_gtod = ktime_now; 82 scd->tick_gtod = ktime_now;
90 scd->clock = ktime_now; 83 scd->clock = ktime_now;
91 scd->multi = 1 << MULTI_SHIFT;
92#ifdef CONFIG_NO_HZ
93 scd->check_max = 1;
94#endif
95 } 84 }
96 85
97 sched_clock_running = 1; 86 sched_clock_running = 1;
98} 87}
99 88
100#ifdef CONFIG_NO_HZ
101/* 89/*
102 * The dynamic ticks makes the delta jiffies inaccurate. This 90 * min,max except they take wrapping into account
103 * prevents us from checking the maximum time update.
104 * Disable the maximum check during stopped ticks.
105 */ 91 */
106void sched_clock_tick_stop(int cpu)
107{
108 struct sched_clock_data *scd = cpu_sdc(cpu);
109
110 scd->check_max = 0;
111}
112 92
113void sched_clock_tick_start(int cpu) 93static inline u64 wrap_min(u64 x, u64 y)
114{ 94{
115 struct sched_clock_data *scd = cpu_sdc(cpu); 95 return (s64)(x - y) < 0 ? x : y;
116
117 scd->check_max = 1;
118} 96}
119 97
120static int check_max(struct sched_clock_data *scd) 98static inline u64 wrap_max(u64 x, u64 y)
121{ 99{
122 return scd->check_max; 100 return (s64)(x - y) > 0 ? x : y;
123} 101}
124#else
125static int check_max(struct sched_clock_data *scd)
126{
127 return 1;
128}
129#endif /* CONFIG_NO_HZ */
130 102
131/* 103/*
132 * update the percpu scd from the raw @now value 104 * update the percpu scd from the raw @now value
133 * 105 *
134 * - filter out backward motion 106 * - filter out backward motion
135 * - use jiffies to generate a min,max window to clip the raw values 107 * - use the GTOD tick value to create a window to filter crazy TSC values
136 */ 108 */
137static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *time) 109static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
138{ 110{
139 unsigned long now_jiffies = jiffies; 111 s64 delta = now - scd->tick_raw;
140 long delta_jiffies = now_jiffies - scd->tick_jiffies; 112 u64 clock, min_clock, max_clock;
141 u64 clock = scd->clock;
142 u64 min_clock, max_clock;
143 s64 delta = now - scd->prev_raw;
144 113
145 WARN_ON_ONCE(!irqs_disabled()); 114 WARN_ON_ONCE(!irqs_disabled());
146 115
147 /* 116 if (unlikely(delta < 0))
148 * At schedule tick the clock can be just under the gtod. We don't 117 delta = 0;
149 * want to push it too prematurely.
150 */
151 min_clock = scd->tick_gtod + (delta_jiffies * TICK_NSEC);
152 if (min_clock > TICK_NSEC)
153 min_clock -= TICK_NSEC / 2;
154
155 if (unlikely(delta < 0)) {
156 clock++;
157 goto out;
158 }
159 118
160 /* 119 /*
161 * The clock must stay within a jiffie of the gtod. 120 * scd->clock = clamp(scd->tick_gtod + delta,
162 * But since we may be at the start of a jiffy or the end of one 121 * max(scd->tick_gtod, scd->clock),
163 * we add another jiffy buffer. 122 * scd->tick_gtod + TICK_NSEC);
164 */ 123 */
165 max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC;
166 124
167 delta *= scd->multi; 125 clock = scd->tick_gtod + delta;
168 delta >>= MULTI_SHIFT; 126 min_clock = wrap_max(scd->tick_gtod, scd->clock);
127 max_clock = scd->tick_gtod + TICK_NSEC;
169 128
170 if (unlikely(clock + delta > max_clock) && check_max(scd)) { 129 clock = wrap_max(clock, min_clock);
171 if (clock < max_clock) 130 clock = wrap_min(clock, max_clock);
172 clock = max_clock;
173 else
174 clock++;
175 } else {
176 clock += delta;
177 }
178 131
179 out: 132 scd->clock = clock;
180 if (unlikely(clock < min_clock))
181 clock = min_clock;
182 133
183 if (time) 134 return scd->clock;
184 *time = clock;
185 else {
186 scd->prev_raw = now;
187 scd->clock = clock;
188 }
189} 135}
190 136
191static void lock_double_clock(struct sched_clock_data *data1, 137static void lock_double_clock(struct sched_clock_data *data1,
@@ -203,7 +149,7 @@ static void lock_double_clock(struct sched_clock_data *data1,
203u64 sched_clock_cpu(int cpu) 149u64 sched_clock_cpu(int cpu)
204{ 150{
205 struct sched_clock_data *scd = cpu_sdc(cpu); 151 struct sched_clock_data *scd = cpu_sdc(cpu);
206 u64 now, clock; 152 u64 now, clock, this_clock, remote_clock;
207 153
208 if (unlikely(!sched_clock_running)) 154 if (unlikely(!sched_clock_running))
209 return 0ull; 155 return 0ull;
@@ -212,43 +158,44 @@ u64 sched_clock_cpu(int cpu)
212 now = sched_clock(); 158 now = sched_clock();
213 159
214 if (cpu != raw_smp_processor_id()) { 160 if (cpu != raw_smp_processor_id()) {
215 /*
216 * in order to update a remote cpu's clock based on our
217 * unstable raw time rebase it against:
218 * tick_raw (offset between raw counters)
219 * tick_gotd (tick offset between cpus)
220 */
221 struct sched_clock_data *my_scd = this_scd(); 161 struct sched_clock_data *my_scd = this_scd();
222 162
223 lock_double_clock(scd, my_scd); 163 lock_double_clock(scd, my_scd);
224 164
225 now -= my_scd->tick_raw; 165 this_clock = __update_sched_clock(my_scd, now);
226 now += scd->tick_raw; 166 remote_clock = scd->clock;
227 167
228 now += my_scd->tick_gtod; 168 /*
229 now -= scd->tick_gtod; 169 * Use the opportunity that we have both locks
170 * taken to couple the two clocks: we take the
171 * larger time as the latest time for both
172 * runqueues. (this creates monotonic movement)
173 */
174 if (likely((s64)(remote_clock - this_clock) < 0)) {
175 clock = this_clock;
176 scd->clock = clock;
177 } else {
178 /*
179 * Should be rare, but possible:
180 */
181 clock = remote_clock;
182 my_scd->clock = remote_clock;
183 }
230 184
231 __raw_spin_unlock(&my_scd->lock); 185 __raw_spin_unlock(&my_scd->lock);
232
233 __update_sched_clock(scd, now, &clock);
234
235 __raw_spin_unlock(&scd->lock);
236
237 } else { 186 } else {
238 __raw_spin_lock(&scd->lock); 187 __raw_spin_lock(&scd->lock);
239 __update_sched_clock(scd, now, NULL); 188 clock = __update_sched_clock(scd, now);
240 clock = scd->clock;
241 __raw_spin_unlock(&scd->lock);
242 } 189 }
243 190
191 __raw_spin_unlock(&scd->lock);
192
244 return clock; 193 return clock;
245} 194}
246 195
247void sched_clock_tick(void) 196void sched_clock_tick(void)
248{ 197{
249 struct sched_clock_data *scd = this_scd(); 198 struct sched_clock_data *scd = this_scd();
250 unsigned long now_jiffies = jiffies;
251 s64 mult, delta_gtod, delta_raw;
252 u64 now, now_gtod; 199 u64 now, now_gtod;
253 200
254 if (unlikely(!sched_clock_running)) 201 if (unlikely(!sched_clock_running))
@@ -260,29 +207,9 @@ void sched_clock_tick(void)
260 now = sched_clock(); 207 now = sched_clock();
261 208
262 __raw_spin_lock(&scd->lock); 209 __raw_spin_lock(&scd->lock);
263 __update_sched_clock(scd, now, NULL);
264 /*
265 * update tick_gtod after __update_sched_clock() because that will
266 * already observe 1 new jiffy; adding a new tick_gtod to that would
267 * increase the clock 2 jiffies.
268 */
269 delta_gtod = now_gtod - scd->tick_gtod;
270 delta_raw = now - scd->tick_raw;
271
272 if ((long)delta_raw > 0) {
273 mult = delta_gtod << MULTI_SHIFT;
274 do_div(mult, delta_raw);
275 scd->multi = mult;
276 if (scd->multi > MAX_MULTI)
277 scd->multi = MAX_MULTI;
278 else if (scd->multi < MIN_MULTI)
279 scd->multi = MIN_MULTI;
280 } else
281 scd->multi = 1 << MULTI_SHIFT;
282
283 scd->tick_raw = now; 210 scd->tick_raw = now;
284 scd->tick_gtod = now_gtod; 211 scd->tick_gtod = now_gtod;
285 scd->tick_jiffies = now_jiffies; 212 __update_sched_clock(scd, now);
286 __raw_spin_unlock(&scd->lock); 213 __raw_spin_unlock(&scd->lock);
287} 214}
288 215
@@ -300,37 +227,28 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
300 */ 227 */
301void sched_clock_idle_wakeup_event(u64 delta_ns) 228void sched_clock_idle_wakeup_event(u64 delta_ns)
302{ 229{
303 struct sched_clock_data *scd = this_scd(); 230 sched_clock_tick();
304 u64 now = sched_clock();
305
306 /*
307 * Override the previous timestamp and ignore all
308 * sched_clock() deltas that occured while we idled,
309 * and use the PM-provided delta_ns to advance the
310 * rq clock:
311 */
312 __raw_spin_lock(&scd->lock);
313 scd->prev_raw = now;
314 scd->clock += delta_ns;
315 scd->multi = 1 << MULTI_SHIFT;
316 __raw_spin_unlock(&scd->lock);
317
318 touch_softlockup_watchdog(); 231 touch_softlockup_watchdog();
319} 232}
320EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 233EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
321 234
322#endif 235#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
323 236
324/* 237void sched_clock_init(void)
325 * Scheduler clock - returns current time in nanosec units.
326 * This is default implementation.
327 * Architectures and sub-architectures can override this.
328 */
329unsigned long long __attribute__((weak)) sched_clock(void)
330{ 238{
331 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); 239 sched_clock_running = 1;
332} 240}
333 241
242u64 sched_clock_cpu(int cpu)
243{
244 if (unlikely(!sched_clock_running))
245 return 0;
246
247 return sched_clock();
248}
249
250#endif
251
334unsigned long long cpu_clock(int cpu) 252unsigned long long cpu_clock(int cpu)
335{ 253{
336 unsigned long long clock; 254 unsigned long long clock;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index cf2cd6ce4cb2..fb8994c6d4bb 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -899,7 +899,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
899 * doesn't make sense. Rely on vruntime for fairness. 899 * doesn't make sense. Rely on vruntime for fairness.
900 */ 900 */
901 if (rq->curr != p) 901 if (rq->curr != p)
902 delta = max(10000LL, delta); 902 delta = max_t(s64, 10000LL, delta);
903 903
904 hrtick_start(rq, delta); 904 hrtick_start(rq, delta);
905 } 905 }
@@ -1442,18 +1442,23 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
1442 struct task_struct *p = NULL; 1442 struct task_struct *p = NULL;
1443 struct sched_entity *se; 1443 struct sched_entity *se;
1444 1444
1445 while (next != &cfs_rq->tasks) { 1445 if (next == &cfs_rq->tasks)
1446 return NULL;
1447
1448 /* Skip over entities that are not tasks */
1449 do {
1446 se = list_entry(next, struct sched_entity, group_node); 1450 se = list_entry(next, struct sched_entity, group_node);
1447 next = next->next; 1451 next = next->next;
1452 } while (next != &cfs_rq->tasks && !entity_is_task(se));
1448 1453
1449 /* Skip over entities that are not tasks */ 1454 if (next == &cfs_rq->tasks)
1450 if (entity_is_task(se)) { 1455 return NULL;
1451 p = task_of(se);
1452 break;
1453 }
1454 }
1455 1456
1456 cfs_rq->balance_iterator = next; 1457 cfs_rq->balance_iterator = next;
1458
1459 if (entity_is_task(se))
1460 p = task_of(se);
1461
1457 return p; 1462 return p;
1458} 1463}
1459 1464
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 862b06bd560a..9353ca78154e 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -8,6 +8,6 @@ SCHED_FEAT(SYNC_WAKEUPS, 1)
8SCHED_FEAT(HRTICK, 1) 8SCHED_FEAT(HRTICK, 1)
9SCHED_FEAT(DOUBLE_TICK, 0) 9SCHED_FEAT(DOUBLE_TICK, 0)
10SCHED_FEAT(ASYM_GRAN, 1) 10SCHED_FEAT(ASYM_GRAN, 1)
11SCHED_FEAT(LB_BIAS, 0) 11SCHED_FEAT(LB_BIAS, 1)
12SCHED_FEAT(LB_WAKEUP_UPDATE, 1) 12SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
13SCHED_FEAT(ASYM_EFF_LOAD, 1) 13SCHED_FEAT(ASYM_EFF_LOAD, 1)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 908c04f9dad0..552310798dad 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -199,6 +199,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
199 199
200static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 200static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
201{ 201{
202 if (rt_rq->rt_nr_running)
203 resched_task(rq_of_rt_rq(rt_rq)->curr);
202} 204}
203 205
204static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 206static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
@@ -298,7 +300,7 @@ static void __disable_runtime(struct rq *rq)
298 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 300 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
299 s64 diff; 301 s64 diff;
300 302
301 if (iter == rt_rq) 303 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
302 continue; 304 continue;
303 305
304 spin_lock(&iter->rt_runtime_lock); 306 spin_lock(&iter->rt_runtime_lock);
@@ -438,9 +440,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
438{ 440{
439 u64 runtime = sched_rt_runtime(rt_rq); 441 u64 runtime = sched_rt_runtime(rt_rq);
440 442
441 if (runtime == RUNTIME_INF)
442 return 0;
443
444 if (rt_rq->rt_throttled) 443 if (rt_rq->rt_throttled)
445 return rt_rq_throttled(rt_rq); 444 return rt_rq_throttled(rt_rq);
446 445
@@ -491,9 +490,11 @@ static void update_curr_rt(struct rq *rq)
491 rt_rq = rt_rq_of_se(rt_se); 490 rt_rq = rt_rq_of_se(rt_se);
492 491
493 spin_lock(&rt_rq->rt_runtime_lock); 492 spin_lock(&rt_rq->rt_runtime_lock);
494 rt_rq->rt_time += delta_exec; 493 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
495 if (sched_rt_runtime_exceeded(rt_rq)) 494 rt_rq->rt_time += delta_exec;
496 resched_task(curr); 495 if (sched_rt_runtime_exceeded(rt_rq))
496 resched_task(curr);
497 }
497 spin_unlock(&rt_rq->rt_runtime_lock); 498 spin_unlock(&rt_rq->rt_runtime_lock);
498 } 499 }
499} 500}
@@ -861,6 +862,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
861#define RT_MAX_TRIES 3 862#define RT_MAX_TRIES 3
862 863
863static int double_lock_balance(struct rq *this_rq, struct rq *busiest); 864static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
865static void double_unlock_balance(struct rq *this_rq, struct rq *busiest);
866
864static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); 867static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
865 868
866static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 869static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
@@ -1022,7 +1025,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1022 break; 1025 break;
1023 1026
1024 /* try again */ 1027 /* try again */
1025 spin_unlock(&lowest_rq->lock); 1028 double_unlock_balance(rq, lowest_rq);
1026 lowest_rq = NULL; 1029 lowest_rq = NULL;
1027 } 1030 }
1028 1031
@@ -1091,7 +1094,7 @@ static int push_rt_task(struct rq *rq)
1091 1094
1092 resched_task(lowest_rq->curr); 1095 resched_task(lowest_rq->curr);
1093 1096
1094 spin_unlock(&lowest_rq->lock); 1097 double_unlock_balance(rq, lowest_rq);
1095 1098
1096 ret = 1; 1099 ret = 1;
1097out: 1100out:
@@ -1197,7 +1200,7 @@ static int pull_rt_task(struct rq *this_rq)
1197 1200
1198 } 1201 }
1199 skip: 1202 skip:
1200 spin_unlock(&src_rq->lock); 1203 double_unlock_balance(this_rq, src_rq);
1201 } 1204 }
1202 1205
1203 return ret; 1206 return ret;
diff --git a/kernel/signal.c b/kernel/signal.c
index 954f77d7e3bc..e661b01d340f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1304,6 +1304,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1304 q->info.si_overrun++; 1304 q->info.si_overrun++;
1305 goto out; 1305 goto out;
1306 } 1306 }
1307 q->info.si_overrun = 0;
1307 1308
1308 signalfd_notify(t, sig); 1309 signalfd_notify(t, sig);
1309 pending = group ? &t->signal->shared_pending : &t->pending; 1310 pending = group ? &t->signal->shared_pending : &t->pending;
@@ -1337,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1337 struct siginfo info; 1338 struct siginfo info;
1338 unsigned long flags; 1339 unsigned long flags;
1339 struct sighand_struct *psig; 1340 struct sighand_struct *psig;
1341 int ret = sig;
1340 1342
1341 BUG_ON(sig == -1); 1343 BUG_ON(sig == -1);
1342 1344
@@ -1401,7 +1403,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1401 * is implementation-defined: we do (if you don't want 1403 * is implementation-defined: we do (if you don't want
1402 * it, just use SIG_IGN instead). 1404 * it, just use SIG_IGN instead).
1403 */ 1405 */
1404 tsk->exit_signal = -1; 1406 ret = tsk->exit_signal = -1;
1405 if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) 1407 if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
1406 sig = -1; 1408 sig = -1;
1407 } 1409 }
@@ -1410,7 +1412,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1410 __wake_up_parent(tsk, tsk->parent); 1412 __wake_up_parent(tsk, tsk->parent);
1411 spin_unlock_irqrestore(&psig->siglock, flags); 1413 spin_unlock_irqrestore(&psig->siglock, flags);
1412 1414
1413 return sig; 1415 return ret;
1414} 1416}
1415 1417
1416static void do_notify_parent_cldstop(struct task_struct *tsk, int why) 1418static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
diff --git a/kernel/smp.c b/kernel/smp.c
index 96fc7c0edc59..f362a8553777 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -135,7 +135,8 @@ void generic_smp_call_function_interrupt(void)
135 */ 135 */
136 smp_wmb(); 136 smp_wmb();
137 data->csd.flags &= ~CSD_FLAG_WAIT; 137 data->csd.flags &= ~CSD_FLAG_WAIT;
138 } else 138 }
139 if (data->csd.flags & CSD_FLAG_ALLOC)
139 call_rcu(&data->rcu_head, rcu_free_call_data); 140 call_rcu(&data->rcu_head, rcu_free_call_data);
140 } 141 }
141 rcu_read_unlock(); 142 rcu_read_unlock();
@@ -209,8 +210,10 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
209{ 210{
210 struct call_single_data d; 211 struct call_single_data d;
211 unsigned long flags; 212 unsigned long flags;
212 /* prevent preemption and reschedule on another processor */ 213 /* prevent preemption and reschedule on another processor,
214 as well as CPU removal */
213 int me = get_cpu(); 215 int me = get_cpu();
216 int err = 0;
214 217
215 /* Can deadlock when called with interrupts disabled */ 218 /* Can deadlock when called with interrupts disabled */
216 WARN_ON(irqs_disabled()); 219 WARN_ON(irqs_disabled());
@@ -219,7 +222,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
219 local_irq_save(flags); 222 local_irq_save(flags);
220 func(info); 223 func(info);
221 local_irq_restore(flags); 224 local_irq_restore(flags);
222 } else { 225 } else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) {
223 struct call_single_data *data = NULL; 226 struct call_single_data *data = NULL;
224 227
225 if (!wait) { 228 if (!wait) {
@@ -235,10 +238,12 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
235 data->func = func; 238 data->func = func;
236 data->info = info; 239 data->info = info;
237 generic_exec_single(cpu, data); 240 generic_exec_single(cpu, data);
241 } else {
242 err = -ENXIO; /* CPU not online */
238 } 243 }
239 244
240 put_cpu(); 245 put_cpu();
241 return 0; 246 return err;
242} 247}
243EXPORT_SYMBOL(smp_call_function_single); 248EXPORT_SYMBOL(smp_call_function_single);
244 249
@@ -260,6 +265,42 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
260 generic_exec_single(cpu, data); 265 generic_exec_single(cpu, data);
261} 266}
262 267
268/* Dummy function */
269static void quiesce_dummy(void *unused)
270{
271}
272
273/*
274 * Ensure stack based data used in call function mask is safe to free.
275 *
276 * This is needed by smp_call_function_mask when using on-stack data, because
277 * a single call function queue is shared by all CPUs, and any CPU may pick up
278 * the data item on the queue at any time before it is deleted. So we need to
279 * ensure that all CPUs have transitioned through a quiescent state after
280 * this call.
281 *
282 * This is a very slow function, implemented by sending synchronous IPIs to
283 * all possible CPUs. For this reason, we have to alloc data rather than use
284 * stack based data even in the case of synchronous calls. The stack based
285 * data is then just used for deadlock/oom fallback which will be very rare.
286 *
287 * If a faster scheme can be made, we could go back to preferring stack based
288 * data -- the data allocation/free is non-zero cost.
289 */
290static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
291{
292 struct call_single_data data;
293 int cpu;
294
295 data.func = quiesce_dummy;
296 data.info = NULL;
297
298 for_each_cpu_mask(cpu, mask) {
299 data.flags = CSD_FLAG_WAIT;
300 generic_exec_single(cpu, &data);
301 }
302}
303
263/** 304/**
264 * smp_call_function_mask(): Run a function on a set of other CPUs. 305 * smp_call_function_mask(): Run a function on a set of other CPUs.
265 * @mask: The set of cpus to run on. 306 * @mask: The set of cpus to run on.
@@ -285,6 +326,7 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
285 cpumask_t allbutself; 326 cpumask_t allbutself;
286 unsigned long flags; 327 unsigned long flags;
287 int cpu, num_cpus; 328 int cpu, num_cpus;
329 int slowpath = 0;
288 330
289 /* Can deadlock when called with interrupts disabled */ 331 /* Can deadlock when called with interrupts disabled */
290 WARN_ON(irqs_disabled()); 332 WARN_ON(irqs_disabled());
@@ -306,15 +348,16 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
306 return smp_call_function_single(cpu, func, info, wait); 348 return smp_call_function_single(cpu, func, info, wait);
307 } 349 }
308 350
309 if (!wait) { 351 data = kmalloc(sizeof(*data), GFP_ATOMIC);
310 data = kmalloc(sizeof(*data), GFP_ATOMIC); 352 if (data) {
311 if (data) 353 data->csd.flags = CSD_FLAG_ALLOC;
312 data->csd.flags = CSD_FLAG_ALLOC; 354 if (wait)
313 } 355 data->csd.flags |= CSD_FLAG_WAIT;
314 if (!data) { 356 } else {
315 data = &d; 357 data = &d;
316 data->csd.flags = CSD_FLAG_WAIT; 358 data->csd.flags = CSD_FLAG_WAIT;
317 wait = 1; 359 wait = 1;
360 slowpath = 1;
318 } 361 }
319 362
320 spin_lock_init(&data->lock); 363 spin_lock_init(&data->lock);
@@ -331,8 +374,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
331 arch_send_call_function_ipi(mask); 374 arch_send_call_function_ipi(mask);
332 375
333 /* optionally wait for the CPUs to complete */ 376 /* optionally wait for the CPUs to complete */
334 if (wait) 377 if (wait) {
335 csd_flag_wait(&data->csd); 378 csd_flag_wait(&data->csd);
379 if (unlikely(slowpath))
380 smp_call_function_mask_quiesce_stack(mask);
381 }
336 382
337 return 0; 383 return 0;
338} 384}
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index b75b492fbfcf..cb838ee93a82 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -233,7 +233,8 @@ static void check_hung_uninterruptible_tasks(int this_cpu)
233 do_each_thread(g, t) { 233 do_each_thread(g, t) {
234 if (!--max_count) 234 if (!--max_count)
235 goto unlock; 235 goto unlock;
236 if (t->state & TASK_UNINTERRUPTIBLE) 236 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
237 if (t->state == TASK_UNINTERRUPTIBLE)
237 check_hung_task(t, now); 238 check_hung_task(t, now);
238 } while_each_thread(g, t); 239 } while_each_thread(g, t);
239 unlock: 240 unlock:
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index a1fb54c93cdd..29ab20749dd3 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -290,8 +290,8 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
290 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); 290 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
291 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); 291 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
292} 292}
293
294EXPORT_SYMBOL(_spin_lock_nested); 293EXPORT_SYMBOL(_spin_lock_nested);
294
295unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass) 295unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
296{ 296{
297 unsigned long flags; 297 unsigned long flags;
@@ -311,9 +311,17 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas
311#endif 311#endif
312 return flags; 312 return flags;
313} 313}
314
315EXPORT_SYMBOL(_spin_lock_irqsave_nested); 314EXPORT_SYMBOL(_spin_lock_irqsave_nested);
316 315
316void __lockfunc _spin_lock_nest_lock(spinlock_t *lock,
317 struct lockdep_map *nest_lock)
318{
319 preempt_disable();
320 spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
321 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
322}
323EXPORT_SYMBOL(_spin_lock_nest_lock);
324
317#endif 325#endif
318 326
319void __lockfunc _spin_unlock(spinlock_t *lock) 327void __lockfunc _spin_unlock(spinlock_t *lock)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index e446c7c7d6a9..af3c7cea258b 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -65,7 +65,6 @@ static void ack_state(void)
65static int stop_cpu(struct stop_machine_data *smdata) 65static int stop_cpu(struct stop_machine_data *smdata)
66{ 66{
67 enum stopmachine_state curstate = STOPMACHINE_NONE; 67 enum stopmachine_state curstate = STOPMACHINE_NONE;
68 int uninitialized_var(ret);
69 68
70 /* Simple state machine */ 69 /* Simple state machine */
71 do { 70 do {
diff --git a/kernel/sys.c b/kernel/sys.c
index c01858090a98..038a7bc0901d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -169,9 +169,9 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
169 pgrp = find_vpid(who); 169 pgrp = find_vpid(who);
170 else 170 else
171 pgrp = task_pgrp(current); 171 pgrp = task_pgrp(current);
172 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 172 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
173 error = set_one_prio(p, niceval, error); 173 error = set_one_prio(p, niceval, error);
174 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 174 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
175 break; 175 break;
176 case PRIO_USER: 176 case PRIO_USER:
177 user = current->user; 177 user = current->user;
@@ -229,11 +229,11 @@ asmlinkage long sys_getpriority(int which, int who)
229 pgrp = find_vpid(who); 229 pgrp = find_vpid(who);
230 else 230 else
231 pgrp = task_pgrp(current); 231 pgrp = task_pgrp(current);
232 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 232 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
233 niceval = 20 - task_nice(p); 233 niceval = 20 - task_nice(p);
234 if (niceval > retval) 234 if (niceval > retval)
235 retval = niceval; 235 retval = niceval;
236 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 236 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
237 break; 237 break;
238 case PRIO_USER: 238 case PRIO_USER:
239 user = current->user; 239 user = current->user;
@@ -274,7 +274,7 @@ void emergency_restart(void)
274} 274}
275EXPORT_SYMBOL_GPL(emergency_restart); 275EXPORT_SYMBOL_GPL(emergency_restart);
276 276
277static void kernel_restart_prepare(char *cmd) 277void kernel_restart_prepare(char *cmd)
278{ 278{
279 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 279 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
280 system_state = SYSTEM_RESTART; 280 system_state = SYSTEM_RESTART;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index fe4713347275..50ec0886fa3d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -159,6 +159,7 @@ static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *
159static struct ctl_table root_table[]; 159static struct ctl_table root_table[];
160static struct ctl_table_root sysctl_table_root; 160static struct ctl_table_root sysctl_table_root;
161static struct ctl_table_header root_table_header = { 161static struct ctl_table_header root_table_header = {
162 .count = 1,
162 .ctl_table = root_table, 163 .ctl_table = root_table,
163 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), 164 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
164 .root = &sysctl_table_root, 165 .root = &sysctl_table_root,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 825b4c00fe44..7a46bde78c66 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -289,7 +289,6 @@ void tick_nohz_stop_sched_tick(int inidle)
289 ts->tick_stopped = 1; 289 ts->tick_stopped = 1;
290 ts->idle_jiffies = last_jiffies; 290 ts->idle_jiffies = last_jiffies;
291 rcu_enter_nohz(); 291 rcu_enter_nohz();
292 sched_clock_tick_stop(cpu);
293 } 292 }
294 293
295 /* 294 /*
@@ -392,7 +391,6 @@ void tick_nohz_restart_sched_tick(void)
392 select_nohz_load_balancer(0); 391 select_nohz_load_balancer(0);
393 now = ktime_get(); 392 now = ktime_get();
394 tick_do_update_jiffies64(now); 393 tick_do_update_jiffies64(now);
395 sched_clock_tick_start(cpu);
396 cpu_clear(cpu, nohz_cpu_mask); 394 cpu_clear(cpu, nohz_cpu_mask);
397 395
398 /* 396 /*
@@ -645,17 +643,21 @@ void tick_setup_sched_timer(void)
645 ts->nohz_mode = NOHZ_MODE_HIGHRES; 643 ts->nohz_mode = NOHZ_MODE_HIGHRES;
646#endif 644#endif
647} 645}
646#endif /* HIGH_RES_TIMERS */
648 647
648#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS
649void tick_cancel_sched_timer(int cpu) 649void tick_cancel_sched_timer(int cpu)
650{ 650{
651 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 651 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
652 652
653# ifdef CONFIG_HIGH_RES_TIMERS
653 if (ts->sched_timer.base) 654 if (ts->sched_timer.base)
654 hrtimer_cancel(&ts->sched_timer); 655 hrtimer_cancel(&ts->sched_timer);
656# endif
655 657
656 ts->nohz_mode = NOHZ_MODE_INACTIVE; 658 ts->nohz_mode = NOHZ_MODE_INACTIVE;
657} 659}
658#endif /* HIGH_RES_TIMERS */ 660#endif
659 661
660/** 662/**
661 * Async notification about clocksource changes 663 * Async notification about clocksource changes
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a9ab0596de44..532858fa5b88 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -6,7 +6,6 @@
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/version.h>
10#include <linux/nsproxy.h> 9#include <linux/nsproxy.h>
11#include <linux/slab.h> 10#include <linux/slab.h>
12#include <linux/user_namespace.h> 11#include <linux/user_namespace.h>
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 64d398f12444..815237a55af8 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -12,7 +12,6 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/uts.h> 13#include <linux/uts.h>
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/version.h>
16#include <linux/err.h> 15#include <linux/err.h>
17#include <linux/slab.h> 16#include <linux/slab.h>
18 17
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index fe3a56c2256d..4ab9659d269e 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -12,7 +12,6 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/uts.h> 13#include <linux/uts.h>
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/version.h>
16#include <linux/sysctl.h> 15#include <linux/sysctl.h>
17 16
18static void *get_uts(ctl_table *table, int write) 17static void *get_uts(ctl_table *table, int write)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4a26a1382df0..4048e92aa04f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -290,11 +290,11 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
290 290
291 BUG_ON(get_wq_data(work) != cwq); 291 BUG_ON(get_wq_data(work) != cwq);
292 work_clear_pending(work); 292 work_clear_pending(work);
293 lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 293 lock_map_acquire(&cwq->wq->lockdep_map);
294 lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_); 294 lock_map_acquire(&lockdep_map);
295 f(work); 295 f(work);
296 lock_release(&lockdep_map, 1, _THIS_IP_); 296 lock_map_release(&lockdep_map);
297 lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_); 297 lock_map_release(&cwq->wq->lockdep_map);
298 298
299 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { 299 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
300 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " 300 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
@@ -413,8 +413,8 @@ void flush_workqueue(struct workqueue_struct *wq)
413 int cpu; 413 int cpu;
414 414
415 might_sleep(); 415 might_sleep();
416 lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 416 lock_map_acquire(&wq->lockdep_map);
417 lock_release(&wq->lockdep_map, 1, _THIS_IP_); 417 lock_map_release(&wq->lockdep_map);
418 for_each_cpu_mask_nr(cpu, *cpu_map) 418 for_each_cpu_mask_nr(cpu, *cpu_map)
419 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); 419 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
420} 420}
@@ -441,8 +441,8 @@ int flush_work(struct work_struct *work)
441 if (!cwq) 441 if (!cwq)
442 return 0; 442 return 0;
443 443
444 lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 444 lock_map_acquire(&cwq->wq->lockdep_map);
445 lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_); 445 lock_map_release(&cwq->wq->lockdep_map);
446 446
447 prev = NULL; 447 prev = NULL;
448 spin_lock_irq(&cwq->lock); 448 spin_lock_irq(&cwq->lock);
@@ -536,8 +536,8 @@ static void wait_on_work(struct work_struct *work)
536 536
537 might_sleep(); 537 might_sleep();
538 538
539 lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 539 lock_map_acquire(&work->lockdep_map);
540 lock_release(&work->lockdep_map, 1, _THIS_IP_); 540 lock_map_release(&work->lockdep_map);
541 541
542 cwq = get_wq_data(work); 542 cwq = get_wq_data(work);
543 if (!cwq) 543 if (!cwq)
@@ -872,8 +872,8 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
872 if (cwq->thread == NULL) 872 if (cwq->thread == NULL)
873 return; 873 return;
874 874
875 lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 875 lock_map_acquire(&cwq->wq->lockdep_map);
876 lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_); 876 lock_map_release(&cwq->wq->lockdep_map);
877 877
878 flush_cpu_workqueue(cwq); 878 flush_cpu_workqueue(cwq);
879 /* 879 /*