aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2006-07-05 13:13:03 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2006-07-05 13:13:03 -0400
commit5e66dd6d66ffe758b39b6dcadf2330753ee1159b (patch)
treea72cdcff4448e4af9425cc213ddf56ab23e697fe /kernel
parent026477c1141b67e98e3bd8bdedb7d4b88a3ecd09 (diff)
parentca78f6baca863afe2e6a244a0fe94b3a70211d46 (diff)
Merge branch 'master' of /home/trondmy/kernel/linux-2.6/
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/capability.c8
-rw-r--r--kernel/exit.c40
-rw-r--r--kernel/fork.c51
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/irq/chip.c5
-rw-r--r--kernel/irq/handle.c18
-rw-r--r--kernel/irq/manage.c6
-rw-r--r--kernel/kmod.c2
-rw-r--r--kernel/lockdep.c2702
-rw-r--r--kernel/lockdep_internals.h78
-rw-r--r--kernel/lockdep_proc.c345
-rw-r--r--kernel/module.c26
-rw-r--r--kernel/mutex-debug.c399
-rw-r--r--kernel/mutex-debug.h94
-rw-r--r--kernel/mutex.c74
-rw-r--r--kernel/mutex.h19
-rw-r--r--kernel/pid.c6
-rw-r--r--kernel/printk.c23
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--kernel/rtmutex-debug.c307
-rw-r--r--kernel/rtmutex-debug.h8
-rw-r--r--kernel/rtmutex-tester.c4
-rw-r--r--kernel/rtmutex.c57
-rw-r--r--kernel/rtmutex.h3
-rw-r--r--kernel/rwsem.c147
-rw-r--r--kernel/sched.c748
-rw-r--r--kernel/softirq.c141
-rw-r--r--kernel/spinlock.c79
-rw-r--r--kernel/stacktrace.c24
-rw-r--r--kernel/stop_machine.c17
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/timer.c13
-rw-r--r--kernel/wait.c4
-rw-r--r--kernel/workqueue.c59
37 files changed, 4269 insertions, 1301 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 82fb182f6f61..47dbcd570cd8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,10 +8,15 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o 11 hrtimer.o rwsem.o
12 12
13obj-$(CONFIG_STACKTRACE) += stacktrace.o
13obj-y += time/ 14obj-y += time/
14obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 15obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
16obj-$(CONFIG_LOCKDEP) += lockdep.o
17ifeq ($(CONFIG_PROC_FS),y)
18obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
19endif
15obj-$(CONFIG_FUTEX) += futex.o 20obj-$(CONFIG_FUTEX) += futex.o
16ifeq ($(CONFIG_COMPAT),y) 21ifeq ($(CONFIG_COMPAT),y)
17obj-$(CONFIG_FUTEX) += futex_compat.o 22obj-$(CONFIG_FUTEX) += futex_compat.o
@@ -22,6 +27,7 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
22obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 27obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
23obj-$(CONFIG_SMP) += cpu.o spinlock.o 28obj-$(CONFIG_SMP) += cpu.o spinlock.o
24obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 29obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
30obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
25obj-$(CONFIG_UID16) += uid16.o 31obj-$(CONFIG_UID16) += uid16.o
26obj-$(CONFIG_MODULES) += module.o 32obj-$(CONFIG_MODULES) += module.o
27obj-$(CONFIG_KALLSYMS) += kallsyms.o 33obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/capability.c b/kernel/capability.c
index 1a4d8a40d3f9..c7685ad00a97 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -46,7 +46,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
46 int ret = 0; 46 int ret = 0;
47 pid_t pid; 47 pid_t pid;
48 __u32 version; 48 __u32 version;
49 task_t *target; 49 struct task_struct *target;
50 struct __user_cap_data_struct data; 50 struct __user_cap_data_struct data;
51 51
52 if (get_user(version, &header->version)) 52 if (get_user(version, &header->version))
@@ -96,7 +96,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
96 kernel_cap_t *inheritable, 96 kernel_cap_t *inheritable,
97 kernel_cap_t *permitted) 97 kernel_cap_t *permitted)
98{ 98{
99 task_t *g, *target; 99 struct task_struct *g, *target;
100 int ret = -EPERM; 100 int ret = -EPERM;
101 int found = 0; 101 int found = 0;
102 102
@@ -128,7 +128,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
128 kernel_cap_t *inheritable, 128 kernel_cap_t *inheritable,
129 kernel_cap_t *permitted) 129 kernel_cap_t *permitted)
130{ 130{
131 task_t *g, *target; 131 struct task_struct *g, *target;
132 int ret = -EPERM; 132 int ret = -EPERM;
133 int found = 0; 133 int found = 0;
134 134
@@ -172,7 +172,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
172{ 172{
173 kernel_cap_t inheritable, permitted, effective; 173 kernel_cap_t inheritable, permitted, effective;
174 __u32 version; 174 __u32 version;
175 task_t *target; 175 struct task_struct *target;
176 int ret; 176 int ret;
177 pid_t pid; 177 pid_t pid;
178 178
diff --git a/kernel/exit.c b/kernel/exit.c
index 7f7ef2258553..6664c084783d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -134,8 +134,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
134 134
135void release_task(struct task_struct * p) 135void release_task(struct task_struct * p)
136{ 136{
137 struct task_struct *leader;
137 int zap_leader; 138 int zap_leader;
138 task_t *leader;
139repeat: 139repeat:
140 atomic_dec(&p->user->processes); 140 atomic_dec(&p->user->processes);
141 write_lock_irq(&tasklist_lock); 141 write_lock_irq(&tasklist_lock);
@@ -209,7 +209,7 @@ out:
209 * 209 *
210 * "I ask you, have you ever known what it is to be an orphan?" 210 * "I ask you, have you ever known what it is to be an orphan?"
211 */ 211 */
212static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task) 212static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
213{ 213{
214 struct task_struct *p; 214 struct task_struct *p;
215 int ret = 1; 215 int ret = 1;
@@ -582,7 +582,8 @@ static void exit_mm(struct task_struct * tsk)
582 mmput(mm); 582 mmput(mm);
583} 583}
584 584
585static inline void choose_new_parent(task_t *p, task_t *reaper) 585static inline void
586choose_new_parent(struct task_struct *p, struct task_struct *reaper)
586{ 587{
587 /* 588 /*
588 * Make sure we're not reparenting to ourselves and that 589 * Make sure we're not reparenting to ourselves and that
@@ -592,7 +593,8 @@ static inline void choose_new_parent(task_t *p, task_t *reaper)
592 p->real_parent = reaper; 593 p->real_parent = reaper;
593} 594}
594 595
595static void reparent_thread(task_t *p, task_t *father, int traced) 596static void
597reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
596{ 598{
597 /* We don't want people slaying init. */ 599 /* We don't want people slaying init. */
598 if (p->exit_signal != -1) 600 if (p->exit_signal != -1)
@@ -656,8 +658,8 @@ static void reparent_thread(task_t *p, task_t *father, int traced)
656 * group, and if no such member exists, give it to 658 * group, and if no such member exists, give it to
657 * the global child reaper process (ie "init") 659 * the global child reaper process (ie "init")
658 */ 660 */
659static void forget_original_parent(struct task_struct * father, 661static void
660 struct list_head *to_release) 662forget_original_parent(struct task_struct *father, struct list_head *to_release)
661{ 663{
662 struct task_struct *p, *reaper = father; 664 struct task_struct *p, *reaper = father;
663 struct list_head *_p, *_n; 665 struct list_head *_p, *_n;
@@ -680,7 +682,7 @@ static void forget_original_parent(struct task_struct * father,
680 */ 682 */
681 list_for_each_safe(_p, _n, &father->children) { 683 list_for_each_safe(_p, _n, &father->children) {
682 int ptrace; 684 int ptrace;
683 p = list_entry(_p,struct task_struct,sibling); 685 p = list_entry(_p, struct task_struct, sibling);
684 686
685 ptrace = p->ptrace; 687 ptrace = p->ptrace;
686 688
@@ -709,7 +711,7 @@ static void forget_original_parent(struct task_struct * father,
709 list_add(&p->ptrace_list, to_release); 711 list_add(&p->ptrace_list, to_release);
710 } 712 }
711 list_for_each_safe(_p, _n, &father->ptrace_children) { 713 list_for_each_safe(_p, _n, &father->ptrace_children) {
712 p = list_entry(_p,struct task_struct,ptrace_list); 714 p = list_entry(_p, struct task_struct, ptrace_list);
713 choose_new_parent(p, reaper); 715 choose_new_parent(p, reaper);
714 reparent_thread(p, father, 1); 716 reparent_thread(p, father, 1);
715 } 717 }
@@ -829,7 +831,7 @@ static void exit_notify(struct task_struct *tsk)
829 831
830 list_for_each_safe(_p, _n, &ptrace_dead) { 832 list_for_each_safe(_p, _n, &ptrace_dead) {
831 list_del_init(_p); 833 list_del_init(_p);
832 t = list_entry(_p,struct task_struct,ptrace_list); 834 t = list_entry(_p, struct task_struct, ptrace_list);
833 release_task(t); 835 release_task(t);
834 } 836 }
835 837
@@ -933,10 +935,9 @@ fastcall NORET_TYPE void do_exit(long code)
933 if (unlikely(current->pi_state_cache)) 935 if (unlikely(current->pi_state_cache))
934 kfree(current->pi_state_cache); 936 kfree(current->pi_state_cache);
935 /* 937 /*
936 * If DEBUG_MUTEXES is on, make sure we are holding no locks: 938 * Make sure we are holding no locks:
937 */ 939 */
938 mutex_debug_check_no_locks_held(tsk); 940 debug_check_no_locks_held(tsk);
939 rt_mutex_debug_check_no_locks_held(tsk);
940 941
941 if (tsk->io_context) 942 if (tsk->io_context)
942 exit_io_context(); 943 exit_io_context();
@@ -1011,7 +1012,7 @@ asmlinkage void sys_exit_group(int error_code)
1011 do_group_exit((error_code & 0xff) << 8); 1012 do_group_exit((error_code & 0xff) << 8);
1012} 1013}
1013 1014
1014static int eligible_child(pid_t pid, int options, task_t *p) 1015static int eligible_child(pid_t pid, int options, struct task_struct *p)
1015{ 1016{
1016 if (pid > 0) { 1017 if (pid > 0) {
1017 if (p->pid != pid) 1018 if (p->pid != pid)
@@ -1052,12 +1053,13 @@ static int eligible_child(pid_t pid, int options, task_t *p)
1052 return 1; 1053 return 1;
1053} 1054}
1054 1055
1055static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid, 1056static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1056 int why, int status, 1057 int why, int status,
1057 struct siginfo __user *infop, 1058 struct siginfo __user *infop,
1058 struct rusage __user *rusagep) 1059 struct rusage __user *rusagep)
1059{ 1060{
1060 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; 1061 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
1062
1061 put_task_struct(p); 1063 put_task_struct(p);
1062 if (!retval) 1064 if (!retval)
1063 retval = put_user(SIGCHLD, &infop->si_signo); 1065 retval = put_user(SIGCHLD, &infop->si_signo);
@@ -1082,7 +1084,7 @@ static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
1082 * the lock and this task is uninteresting. If we return nonzero, we have 1084 * the lock and this task is uninteresting. If we return nonzero, we have
1083 * released the lock and the system call should return. 1085 * released the lock and the system call should return.
1084 */ 1086 */
1085static int wait_task_zombie(task_t *p, int noreap, 1087static int wait_task_zombie(struct task_struct *p, int noreap,
1086 struct siginfo __user *infop, 1088 struct siginfo __user *infop,
1087 int __user *stat_addr, struct rusage __user *ru) 1089 int __user *stat_addr, struct rusage __user *ru)
1088{ 1090{
@@ -1244,8 +1246,8 @@ static int wait_task_zombie(task_t *p, int noreap,
1244 * the lock and this task is uninteresting. If we return nonzero, we have 1246 * the lock and this task is uninteresting. If we return nonzero, we have
1245 * released the lock and the system call should return. 1247 * released the lock and the system call should return.
1246 */ 1248 */
1247static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap, 1249static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
1248 struct siginfo __user *infop, 1250 int noreap, struct siginfo __user *infop,
1249 int __user *stat_addr, struct rusage __user *ru) 1251 int __user *stat_addr, struct rusage __user *ru)
1250{ 1252{
1251 int retval, exit_code; 1253 int retval, exit_code;
@@ -1359,7 +1361,7 @@ bail_ref:
1359 * the lock and this task is uninteresting. If we return nonzero, we have 1361 * the lock and this task is uninteresting. If we return nonzero, we have
1360 * released the lock and the system call should return. 1362 * released the lock and the system call should return.
1361 */ 1363 */
1362static int wait_task_continued(task_t *p, int noreap, 1364static int wait_task_continued(struct task_struct *p, int noreap,
1363 struct siginfo __user *infop, 1365 struct siginfo __user *infop,
1364 int __user *stat_addr, struct rusage __user *ru) 1366 int __user *stat_addr, struct rusage __user *ru)
1365{ 1367{
@@ -1445,7 +1447,7 @@ repeat:
1445 int ret; 1447 int ret;
1446 1448
1447 list_for_each(_p,&tsk->children) { 1449 list_for_each(_p,&tsk->children) {
1448 p = list_entry(_p,struct task_struct,sibling); 1450 p = list_entry(_p, struct task_struct, sibling);
1449 1451
1450 ret = eligible_child(pid, options, p); 1452 ret = eligible_child(pid, options, p);
1451 if (!ret) 1453 if (!ret)
diff --git a/kernel/fork.c b/kernel/fork.c
index 9064bf9e131b..56e4e07e45f7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,7 +193,10 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
193 193
194 down_write(&oldmm->mmap_sem); 194 down_write(&oldmm->mmap_sem);
195 flush_cache_mm(oldmm); 195 flush_cache_mm(oldmm);
196 down_write(&mm->mmap_sem); 196 /*
197 * Not linked in yet - no deadlock potential:
198 */
199 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
197 200
198 mm->locked_vm = 0; 201 mm->locked_vm = 0;
199 mm->mmap = NULL; 202 mm->mmap = NULL;
@@ -919,10 +922,6 @@ static inline void rt_mutex_init_task(struct task_struct *p)
919 spin_lock_init(&p->pi_lock); 922 spin_lock_init(&p->pi_lock);
920 plist_head_init(&p->pi_waiters, &p->pi_lock); 923 plist_head_init(&p->pi_waiters, &p->pi_lock);
921 p->pi_blocked_on = NULL; 924 p->pi_blocked_on = NULL;
922# ifdef CONFIG_DEBUG_RT_MUTEXES
923 spin_lock_init(&p->held_list_lock);
924 INIT_LIST_HEAD(&p->held_list_head);
925# endif
926#endif 925#endif
927} 926}
928 927
@@ -934,13 +933,13 @@ static inline void rt_mutex_init_task(struct task_struct *p)
934 * parts of the process environment (as per the clone 933 * parts of the process environment (as per the clone
935 * flags). The actual kick-off is left to the caller. 934 * flags). The actual kick-off is left to the caller.
936 */ 935 */
937static task_t *copy_process(unsigned long clone_flags, 936static struct task_struct *copy_process(unsigned long clone_flags,
938 unsigned long stack_start, 937 unsigned long stack_start,
939 struct pt_regs *regs, 938 struct pt_regs *regs,
940 unsigned long stack_size, 939 unsigned long stack_size,
941 int __user *parent_tidptr, 940 int __user *parent_tidptr,
942 int __user *child_tidptr, 941 int __user *child_tidptr,
943 int pid) 942 int pid)
944{ 943{
945 int retval; 944 int retval;
946 struct task_struct *p = NULL; 945 struct task_struct *p = NULL;
@@ -972,6 +971,10 @@ static task_t *copy_process(unsigned long clone_flags,
972 if (!p) 971 if (!p)
973 goto fork_out; 972 goto fork_out;
974 973
974#ifdef CONFIG_TRACE_IRQFLAGS
975 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
976 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
977#endif
975 retval = -EAGAIN; 978 retval = -EAGAIN;
976 if (atomic_read(&p->user->processes) >= 979 if (atomic_read(&p->user->processes) >=
977 p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 980 p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
@@ -1046,6 +1049,26 @@ static task_t *copy_process(unsigned long clone_flags,
1046 } 1049 }
1047 mpol_fix_fork_child_flag(p); 1050 mpol_fix_fork_child_flag(p);
1048#endif 1051#endif
1052#ifdef CONFIG_TRACE_IRQFLAGS
1053 p->irq_events = 0;
1054 p->hardirqs_enabled = 0;
1055 p->hardirq_enable_ip = 0;
1056 p->hardirq_enable_event = 0;
1057 p->hardirq_disable_ip = _THIS_IP_;
1058 p->hardirq_disable_event = 0;
1059 p->softirqs_enabled = 1;
1060 p->softirq_enable_ip = _THIS_IP_;
1061 p->softirq_enable_event = 0;
1062 p->softirq_disable_ip = 0;
1063 p->softirq_disable_event = 0;
1064 p->hardirq_context = 0;
1065 p->softirq_context = 0;
1066#endif
1067#ifdef CONFIG_LOCKDEP
1068 p->lockdep_depth = 0; /* no locks held yet */
1069 p->curr_chain_key = 0;
1070 p->lockdep_recursion = 0;
1071#endif
1049 1072
1050 rt_mutex_init_task(p); 1073 rt_mutex_init_task(p);
1051 1074
@@ -1271,9 +1294,9 @@ struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1271 return regs; 1294 return regs;
1272} 1295}
1273 1296
1274task_t * __devinit fork_idle(int cpu) 1297struct task_struct * __devinit fork_idle(int cpu)
1275{ 1298{
1276 task_t *task; 1299 struct task_struct *task;
1277 struct pt_regs regs; 1300 struct pt_regs regs;
1278 1301
1279 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0); 1302 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
diff --git a/kernel/futex.c b/kernel/futex.c
index 15caf93e4a43..1dc98e4dd287 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -607,6 +607,22 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
607} 607}
608 608
609/* 609/*
610 * Express the locking dependencies for lockdep:
611 */
612static inline void
613double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
614{
615 if (hb1 <= hb2) {
616 spin_lock(&hb1->lock);
617 if (hb1 < hb2)
618 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
619 } else { /* hb1 > hb2 */
620 spin_lock(&hb2->lock);
621 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
622 }
623}
624
625/*
610 * Wake up all waiters hashed on the physical page that is mapped 626 * Wake up all waiters hashed on the physical page that is mapped
611 * to this virtual address: 627 * to this virtual address:
612 */ 628 */
@@ -674,11 +690,7 @@ retryfull:
674 hb2 = hash_futex(&key2); 690 hb2 = hash_futex(&key2);
675 691
676retry: 692retry:
677 if (hb1 < hb2) 693 double_lock_hb(hb1, hb2);
678 spin_lock(&hb1->lock);
679 spin_lock(&hb2->lock);
680 if (hb1 > hb2)
681 spin_lock(&hb1->lock);
682 694
683 op_ret = futex_atomic_op_inuser(op, uaddr2); 695 op_ret = futex_atomic_op_inuser(op, uaddr2);
684 if (unlikely(op_ret < 0)) { 696 if (unlikely(op_ret < 0)) {
@@ -787,11 +799,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
787 hb1 = hash_futex(&key1); 799 hb1 = hash_futex(&key1);
788 hb2 = hash_futex(&key2); 800 hb2 = hash_futex(&key2);
789 801
790 if (hb1 < hb2) 802 double_lock_hb(hb1, hb2);
791 spin_lock(&hb1->lock);
792 spin_lock(&hb2->lock);
793 if (hb1 > hb2)
794 spin_lock(&hb1->lock);
795 803
796 if (likely(cmpval != NULL)) { 804 if (likely(cmpval != NULL)) {
797 u32 curval; 805 u32 curval;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 8d3dc29ef41a..d17766d40dab 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -669,7 +669,7 @@ static int hrtimer_wakeup(struct hrtimer *timer)
669 return HRTIMER_NORESTART; 669 return HRTIMER_NORESTART;
670} 670}
671 671
672void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task) 672void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
673{ 673{
674 sl->timer.function = hrtimer_wakeup; 674 sl->timer.function = hrtimer_wakeup;
675 sl->task = task; 675 sl->task = task;
@@ -782,8 +782,10 @@ static void __devinit init_hrtimers_cpu(int cpu)
782 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); 782 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
783 int i; 783 int i;
784 784
785 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) 785 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
786 spin_lock_init(&base->lock); 786 spin_lock_init(&base->lock);
787 lockdep_set_class(&base->lock, &base->lock_key);
788 }
787} 789}
788 790
789#ifdef CONFIG_HOTPLUG_CPU 791#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 54105bdfe20d..9336f2e89e40 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -261,10 +261,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
261 * keep it masked and get out of here 261 * keep it masked and get out of here
262 */ 262 */
263 action = desc->action; 263 action = desc->action;
264 if (unlikely(!action || (desc->status & IRQ_DISABLED))) 264 if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
265 desc->status |= IRQ_PENDING;
265 goto out; 266 goto out;
267 }
266 268
267 desc->status |= IRQ_INPROGRESS; 269 desc->status |= IRQ_INPROGRESS;
270 desc->status &= ~IRQ_PENDING;
268 spin_unlock(&desc->lock); 271 spin_unlock(&desc->lock);
269 272
270 action_ret = handle_IRQ_event(irq, regs, action); 273 action_ret = handle_IRQ_event(irq, regs, action);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index aeb6e391276c..fc4e906aedbd 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -132,7 +132,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
132 handle_dynamic_tick(action); 132 handle_dynamic_tick(action);
133 133
134 if (!(action->flags & IRQF_DISABLED)) 134 if (!(action->flags & IRQF_DISABLED))
135 local_irq_enable(); 135 local_irq_enable_in_hardirq();
136 136
137 do { 137 do {
138 ret = action->handler(irq, action->dev_id, regs); 138 ret = action->handler(irq, action->dev_id, regs);
@@ -249,3 +249,19 @@ out:
249 return 1; 249 return 1;
250} 250}
251 251
252#ifdef CONFIG_TRACE_IRQFLAGS
253
254/*
255 * lockdep: we want to handle all irq_desc locks as a single lock-class:
256 */
257static struct lock_class_key irq_desc_lock_class;
258
259void early_init_irq_lock_class(void)
260{
261 int i;
262
263 for (i = 0; i < NR_IRQS; i++)
264 lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
265}
266
267#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c911c6ec4dd6..4e461438e48b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -410,6 +410,12 @@ int request_irq(unsigned int irq,
410 struct irqaction *action; 410 struct irqaction *action;
411 int retval; 411 int retval;
412 412
413#ifdef CONFIG_LOCKDEP
414 /*
415 * Lockdep wants atomic interrupt handlers:
416 */
417 irqflags |= SA_INTERRUPT;
418#endif
413 /* 419 /*
414 * Sanity-check: shared interrupts must pass in a real dev-ID, 420 * Sanity-check: shared interrupts must pass in a real dev-ID,
415 * otherwise we'll have trouble later trying to figure out 421 * otherwise we'll have trouble later trying to figure out
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1b7157af051c..1d32defa38ab 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -233,7 +233,7 @@ static void __call_usermodehelper(void *data)
233int call_usermodehelper_keys(char *path, char **argv, char **envp, 233int call_usermodehelper_keys(char *path, char **argv, char **envp,
234 struct key *session_keyring, int wait) 234 struct key *session_keyring, int wait)
235{ 235{
236 DECLARE_COMPLETION(done); 236 DECLARE_COMPLETION_ONSTACK(done);
237 struct subprocess_info sub_info = { 237 struct subprocess_info sub_info = {
238 .complete = &done, 238 .complete = &done,
239 .path = path, 239 .path = path,
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
new file mode 100644
index 000000000000..f32ca78c198d
--- /dev/null
+++ b/kernel/lockdep.c
@@ -0,0 +1,2702 @@
1/*
2 * kernel/lockdep.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * this code maps all the lock dependencies as they occur in a live kernel
11 * and will warn about the following classes of locking bugs:
12 *
13 * - lock inversion scenarios
14 * - circular lock dependencies
15 * - hardirq/softirq safe/unsafe locking bugs
16 *
17 * Bugs are reported even if the current locking scenario does not cause
18 * any deadlock at this point.
19 *
20 * I.e. if anytime in the past two locks were taken in a different order,
21 * even if it happened for another task, even if those were different
22 * locks (but of the same class as this lock), this code will detect it.
23 *
24 * Thanks to Arjan van de Ven for coming up with the initial idea of
25 * mapping lock dependencies runtime.
26 */
27#include <linux/mutex.h>
28#include <linux/sched.h>
29#include <linux/delay.h>
30#include <linux/module.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/spinlock.h>
34#include <linux/kallsyms.h>
35#include <linux/interrupt.h>
36#include <linux/stacktrace.h>
37#include <linux/debug_locks.h>
38#include <linux/irqflags.h>
39
40#include <asm/sections.h>
41
42#include "lockdep_internals.h"
43
44/*
45 * hash_lock: protects the lockdep hashes and class/list/hash allocators.
46 *
47 * This is one of the rare exceptions where it's justified
48 * to use a raw spinlock - we really dont want the spinlock
49 * code to recurse back into the lockdep code.
50 */
51static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
52
53static int lockdep_initialized;
54
55unsigned long nr_list_entries;
56static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
57
58/*
59 * Allocate a lockdep entry. (assumes hash_lock held, returns
60 * with NULL on failure)
61 */
62static struct lock_list *alloc_list_entry(void)
63{
64 if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
65 __raw_spin_unlock(&hash_lock);
66 debug_locks_off();
67 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
68 printk("turning off the locking correctness validator.\n");
69 return NULL;
70 }
71 return list_entries + nr_list_entries++;
72}
73
74/*
75 * All data structures here are protected by the global debug_lock.
76 *
77 * Mutex key structs only get allocated, once during bootup, and never
78 * get freed - this significantly simplifies the debugging code.
79 */
80unsigned long nr_lock_classes;
81static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
82
83/*
84 * We keep a global list of all lock classes. The list only grows,
85 * never shrinks. The list is only accessed with the lockdep
86 * spinlock lock held.
87 */
88LIST_HEAD(all_lock_classes);
89
90/*
91 * The lockdep classes are in a hash-table as well, for fast lookup:
92 */
93#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1)
94#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS)
95#define CLASSHASH_MASK (CLASSHASH_SIZE - 1)
96#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK)
97#define classhashentry(key) (classhash_table + __classhashfn((key)))
98
99static struct list_head classhash_table[CLASSHASH_SIZE];
100
101unsigned long nr_lock_chains;
102static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
103
104/*
105 * We put the lock dependency chains into a hash-table as well, to cache
106 * their existence:
107 */
108#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1)
109#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS)
110#define CHAINHASH_MASK (CHAINHASH_SIZE - 1)
111#define __chainhashfn(chain) \
112 (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK)
113#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
114
115static struct list_head chainhash_table[CHAINHASH_SIZE];
116
117/*
118 * The hash key of the lock dependency chains is a hash itself too:
119 * it's a hash of all locks taken up to that lock, including that lock.
120 * It's a 64-bit hash, because it's important for the keys to be
121 * unique.
122 */
123#define iterate_chain_key(key1, key2) \
124 (((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \
125 ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \
126 (key2))
127
128void lockdep_off(void)
129{
130 current->lockdep_recursion++;
131}
132
133EXPORT_SYMBOL(lockdep_off);
134
135void lockdep_on(void)
136{
137 current->lockdep_recursion--;
138}
139
140EXPORT_SYMBOL(lockdep_on);
141
142int lockdep_internal(void)
143{
144 return current->lockdep_recursion != 0;
145}
146
147EXPORT_SYMBOL(lockdep_internal);
148
149/*
150 * Debugging switches:
151 */
152
153#define VERBOSE 0
154#ifdef VERBOSE
155# define VERY_VERBOSE 0
156#endif
157
158#if VERBOSE
159# define HARDIRQ_VERBOSE 1
160# define SOFTIRQ_VERBOSE 1
161#else
162# define HARDIRQ_VERBOSE 0
163# define SOFTIRQ_VERBOSE 0
164#endif
165
166#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
167/*
168 * Quick filtering for interesting events:
169 */
170static int class_filter(struct lock_class *class)
171{
172 if (class->name_version == 1 &&
173 !strcmp(class->name, "&rl->lock"))
174 return 1;
175 if (class->name_version == 1 &&
176 !strcmp(class->name, "&ni->mrec_lock"))
177 return 1;
178 if (class->name_version == 1 &&
179 !strcmp(class->name, "mft_ni_runlist_lock"))
180 return 1;
181 if (class->name_version == 1 &&
182 !strcmp(class->name, "mft_ni_mrec_lock"))
183 return 1;
184 if (class->name_version == 1 &&
185 !strcmp(class->name, "&vol->lcnbmp_lock"))
186 return 1;
187 return 0;
188}
189#endif
190
191static int verbose(struct lock_class *class)
192{
193#if VERBOSE
194 return class_filter(class);
195#endif
196 return 0;
197}
198
199#ifdef CONFIG_TRACE_IRQFLAGS
200
201static int hardirq_verbose(struct lock_class *class)
202{
203#if HARDIRQ_VERBOSE
204 return class_filter(class);
205#endif
206 return 0;
207}
208
209static int softirq_verbose(struct lock_class *class)
210{
211#if SOFTIRQ_VERBOSE
212 return class_filter(class);
213#endif
214 return 0;
215}
216
217#endif
218
219/*
220 * Stack-trace: tightly packed array of stack backtrace
221 * addresses. Protected by the hash_lock.
222 */
223unsigned long nr_stack_trace_entries;
224static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
225
226static int save_trace(struct stack_trace *trace)
227{
228 trace->nr_entries = 0;
229 trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
230 trace->entries = stack_trace + nr_stack_trace_entries;
231
232 save_stack_trace(trace, NULL, 0, 3);
233
234 trace->max_entries = trace->nr_entries;
235
236 nr_stack_trace_entries += trace->nr_entries;
237 if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES))
238 return 0;
239
240 if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
241 __raw_spin_unlock(&hash_lock);
242 if (debug_locks_off()) {
243 printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
244 printk("turning off the locking correctness validator.\n");
245 dump_stack();
246 }
247 return 0;
248 }
249
250 return 1;
251}
252
253unsigned int nr_hardirq_chains;
254unsigned int nr_softirq_chains;
255unsigned int nr_process_chains;
256unsigned int max_lockdep_depth;
257unsigned int max_recursion_depth;
258
259#ifdef CONFIG_DEBUG_LOCKDEP
260/*
261 * We cannot printk in early bootup code. Not even early_printk()
262 * might work. So we mark any initialization errors and printk
263 * about it later on, in lockdep_info().
264 */
265static int lockdep_init_error;
266
267/*
268 * Various lockdep statistics:
269 */
270atomic_t chain_lookup_hits;
271atomic_t chain_lookup_misses;
272atomic_t hardirqs_on_events;
273atomic_t hardirqs_off_events;
274atomic_t redundant_hardirqs_on;
275atomic_t redundant_hardirqs_off;
276atomic_t softirqs_on_events;
277atomic_t softirqs_off_events;
278atomic_t redundant_softirqs_on;
279atomic_t redundant_softirqs_off;
280atomic_t nr_unused_locks;
281atomic_t nr_cyclic_checks;
282atomic_t nr_cyclic_check_recursions;
283atomic_t nr_find_usage_forwards_checks;
284atomic_t nr_find_usage_forwards_recursions;
285atomic_t nr_find_usage_backwards_checks;
286atomic_t nr_find_usage_backwards_recursions;
287# define debug_atomic_inc(ptr) atomic_inc(ptr)
288# define debug_atomic_dec(ptr) atomic_dec(ptr)
289# define debug_atomic_read(ptr) atomic_read(ptr)
290#else
291# define debug_atomic_inc(ptr) do { } while (0)
292# define debug_atomic_dec(ptr) do { } while (0)
293# define debug_atomic_read(ptr) 0
294#endif
295
296/*
297 * Locking printouts:
298 */
299
300static const char *usage_str[] =
301{
302 [LOCK_USED] = "initial-use ",
303 [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W",
304 [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W",
305 [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W",
306 [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W",
307 [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R",
308 [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R",
309 [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R",
310 [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R",
311};
312
313const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
314{
315 unsigned long offs, size;
316 char *modname;
317
318 return kallsyms_lookup((unsigned long)key, &size, &offs, &modname, str);
319}
320
321void
322get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
323{
324 *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.';
325
326 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
327 *c1 = '+';
328 else
329 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
330 *c1 = '-';
331
332 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
333 *c2 = '+';
334 else
335 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
336 *c2 = '-';
337
338 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
339 *c3 = '-';
340 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) {
341 *c3 = '+';
342 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
343 *c3 = '?';
344 }
345
346 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
347 *c4 = '-';
348 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) {
349 *c4 = '+';
350 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
351 *c4 = '?';
352 }
353}
354
355static void print_lock_name(struct lock_class *class)
356{
357 char str[128], c1, c2, c3, c4;
358 const char *name;
359
360 get_usage_chars(class, &c1, &c2, &c3, &c4);
361
362 name = class->name;
363 if (!name) {
364 name = __get_key_name(class->key, str);
365 printk(" (%s", name);
366 } else {
367 printk(" (%s", name);
368 if (class->name_version > 1)
369 printk("#%d", class->name_version);
370 if (class->subclass)
371 printk("/%d", class->subclass);
372 }
373 printk("){%c%c%c%c}", c1, c2, c3, c4);
374}
375
376static void print_lockdep_cache(struct lockdep_map *lock)
377{
378 const char *name;
379 char str[128];
380
381 name = lock->name;
382 if (!name)
383 name = __get_key_name(lock->key->subkeys, str);
384
385 printk("%s", name);
386}
387
388static void print_lock(struct held_lock *hlock)
389{
390 print_lock_name(hlock->class);
391 printk(", at: ");
392 print_ip_sym(hlock->acquire_ip);
393}
394
395static void lockdep_print_held_locks(struct task_struct *curr)
396{
397 int i, depth = curr->lockdep_depth;
398
399 if (!depth) {
400 printk("no locks held by %s/%d.\n", curr->comm, curr->pid);
401 return;
402 }
403 printk("%d lock%s held by %s/%d:\n",
404 depth, depth > 1 ? "s" : "", curr->comm, curr->pid);
405
406 for (i = 0; i < depth; i++) {
407 printk(" #%d: ", i);
408 print_lock(curr->held_locks + i);
409 }
410}
411/*
412 * Helper to print a nice hierarchy of lock dependencies:
413 */
414static void print_spaces(int nr)
415{
416 int i;
417
418 for (i = 0; i < nr; i++)
419 printk(" ");
420}
421
422static void print_lock_class_header(struct lock_class *class, int depth)
423{
424 int bit;
425
426 print_spaces(depth);
427 printk("->");
428 print_lock_name(class);
429 printk(" ops: %lu", class->ops);
430 printk(" {\n");
431
432 for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
433 if (class->usage_mask & (1 << bit)) {
434 int len = depth;
435
436 print_spaces(depth);
437 len += printk(" %s", usage_str[bit]);
438 len += printk(" at:\n");
439 print_stack_trace(class->usage_traces + bit, len);
440 }
441 }
442 print_spaces(depth);
443 printk(" }\n");
444
445 print_spaces(depth);
446 printk(" ... key at: ");
447 print_ip_sym((unsigned long)class->key);
448}
449
450/*
451 * printk all lock dependencies starting at <entry>:
452 */
453static void print_lock_dependencies(struct lock_class *class, int depth)
454{
455 struct lock_list *entry;
456
457 if (DEBUG_LOCKS_WARN_ON(depth >= 20))
458 return;
459
460 print_lock_class_header(class, depth);
461
462 list_for_each_entry(entry, &class->locks_after, entry) {
463 DEBUG_LOCKS_WARN_ON(!entry->class);
464 print_lock_dependencies(entry->class, depth + 1);
465
466 print_spaces(depth);
467 printk(" ... acquired at:\n");
468 print_stack_trace(&entry->trace, 2);
469 printk("\n");
470 }
471}
472
473/*
474 * Add a new dependency to the head of the list:
475 */
476static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
477 struct list_head *head, unsigned long ip)
478{
479 struct lock_list *entry;
480 /*
481 * Lock not present yet - get a new dependency struct and
482 * add it to the list:
483 */
484 entry = alloc_list_entry();
485 if (!entry)
486 return 0;
487
488 entry->class = this;
489 save_trace(&entry->trace);
490
491 /*
492 * Since we never remove from the dependency list, the list can
493 * be walked lockless by other CPUs, it's only allocation
494 * that must be protected by the spinlock. But this also means
495 * we must make new entries visible only once writes to the
496 * entry become visible - hence the RCU op:
497 */
498 list_add_tail_rcu(&entry->entry, head);
499
500 return 1;
501}
502
503/*
504 * Recursive, forwards-direction lock-dependency checking, used for
505 * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
506 * checking.
507 *
508 * (to keep the stackframe of the recursive functions small we
509 * use these global variables, and we also mark various helper
510 * functions as noinline.)
511 */
512static struct held_lock *check_source, *check_target;
513
514/*
515 * Print a dependency chain entry (this is only done when a deadlock
516 * has been detected):
517 */
518static noinline int
519print_circular_bug_entry(struct lock_list *target, unsigned int depth)
520{
521 if (debug_locks_silent)
522 return 0;
523 printk("\n-> #%u", depth);
524 print_lock_name(target->class);
525 printk(":\n");
526 print_stack_trace(&target->trace, 6);
527
528 return 0;
529}
530
531/*
532 * When a circular dependency is detected, print the
533 * header first:
534 */
535static noinline int
536print_circular_bug_header(struct lock_list *entry, unsigned int depth)
537{
538 struct task_struct *curr = current;
539
540 __raw_spin_unlock(&hash_lock);
541 debug_locks_off();
542 if (debug_locks_silent)
543 return 0;
544
545 printk("\n=======================================================\n");
546 printk( "[ INFO: possible circular locking dependency detected ]\n");
547 printk( "-------------------------------------------------------\n");
548 printk("%s/%d is trying to acquire lock:\n",
549 curr->comm, curr->pid);
550 print_lock(check_source);
551 printk("\nbut task is already holding lock:\n");
552 print_lock(check_target);
553 printk("\nwhich lock already depends on the new lock.\n\n");
554 printk("\nthe existing dependency chain (in reverse order) is:\n");
555
556 print_circular_bug_entry(entry, depth);
557
558 return 0;
559}
560
561static noinline int print_circular_bug_tail(void)
562{
563 struct task_struct *curr = current;
564 struct lock_list this;
565
566 if (debug_locks_silent)
567 return 0;
568
569 this.class = check_source->class;
570 save_trace(&this.trace);
571 print_circular_bug_entry(&this, 0);
572
573 printk("\nother info that might help us debug this:\n\n");
574 lockdep_print_held_locks(curr);
575
576 printk("\nstack backtrace:\n");
577 dump_stack();
578
579 return 0;
580}
581
582static int noinline print_infinite_recursion_bug(void)
583{
584 __raw_spin_unlock(&hash_lock);
585 DEBUG_LOCKS_WARN_ON(1);
586
587 return 0;
588}
589
590/*
591 * Prove that the dependency graph starting at <entry> can not
592 * lead to <target>. Print an error and return 0 if it does.
593 */
594static noinline int
595check_noncircular(struct lock_class *source, unsigned int depth)
596{
597 struct lock_list *entry;
598
599 debug_atomic_inc(&nr_cyclic_check_recursions);
600 if (depth > max_recursion_depth)
601 max_recursion_depth = depth;
602 if (depth >= 20)
603 return print_infinite_recursion_bug();
604 /*
605 * Check this lock's dependency list:
606 */
607 list_for_each_entry(entry, &source->locks_after, entry) {
608 if (entry->class == check_target->class)
609 return print_circular_bug_header(entry, depth+1);
610 debug_atomic_inc(&nr_cyclic_checks);
611 if (!check_noncircular(entry->class, depth+1))
612 return print_circular_bug_entry(entry, depth+1);
613 }
614 return 1;
615}
616
617static int very_verbose(struct lock_class *class)
618{
619#if VERY_VERBOSE
620 return class_filter(class);
621#endif
622 return 0;
623}
624#ifdef CONFIG_TRACE_IRQFLAGS
625
626/*
627 * Forwards and backwards subgraph searching, for the purposes of
628 * proving that two subgraphs can be connected by a new dependency
629 * without creating any illegal irq-safe -> irq-unsafe lock dependency.
630 */
631static enum lock_usage_bit find_usage_bit;
632static struct lock_class *forwards_match, *backwards_match;
633
634/*
635 * Find a node in the forwards-direction dependency sub-graph starting
636 * at <source> that matches <find_usage_bit>.
637 *
638 * Return 2 if such a node exists in the subgraph, and put that node
639 * into <forwards_match>.
640 *
641 * Return 1 otherwise and keep <forwards_match> unchanged.
642 * Return 0 on error.
643 */
644static noinline int
645find_usage_forwards(struct lock_class *source, unsigned int depth)
646{
647 struct lock_list *entry;
648 int ret;
649
650 if (depth > max_recursion_depth)
651 max_recursion_depth = depth;
652 if (depth >= 20)
653 return print_infinite_recursion_bug();
654
655 debug_atomic_inc(&nr_find_usage_forwards_checks);
656 if (source->usage_mask & (1 << find_usage_bit)) {
657 forwards_match = source;
658 return 2;
659 }
660
661 /*
662 * Check this lock's dependency list:
663 */
664 list_for_each_entry(entry, &source->locks_after, entry) {
665 debug_atomic_inc(&nr_find_usage_forwards_recursions);
666 ret = find_usage_forwards(entry->class, depth+1);
667 if (ret == 2 || ret == 0)
668 return ret;
669 }
670 return 1;
671}
672
673/*
674 * Find a node in the backwards-direction dependency sub-graph starting
675 * at <source> that matches <find_usage_bit>.
676 *
677 * Return 2 if such a node exists in the subgraph, and put that node
678 * into <backwards_match>.
679 *
680 * Return 1 otherwise and keep <backwards_match> unchanged.
681 * Return 0 on error.
682 */
683static noinline int
684find_usage_backwards(struct lock_class *source, unsigned int depth)
685{
686 struct lock_list *entry;
687 int ret;
688
689 if (depth > max_recursion_depth)
690 max_recursion_depth = depth;
691 if (depth >= 20)
692 return print_infinite_recursion_bug();
693
694 debug_atomic_inc(&nr_find_usage_backwards_checks);
695 if (source->usage_mask & (1 << find_usage_bit)) {
696 backwards_match = source;
697 return 2;
698 }
699
700 /*
701 * Check this lock's dependency list:
702 */
703 list_for_each_entry(entry, &source->locks_before, entry) {
704 debug_atomic_inc(&nr_find_usage_backwards_recursions);
705 ret = find_usage_backwards(entry->class, depth+1);
706 if (ret == 2 || ret == 0)
707 return ret;
708 }
709 return 1;
710}
711
712static int
713print_bad_irq_dependency(struct task_struct *curr,
714 struct held_lock *prev,
715 struct held_lock *next,
716 enum lock_usage_bit bit1,
717 enum lock_usage_bit bit2,
718 const char *irqclass)
719{
720 __raw_spin_unlock(&hash_lock);
721 debug_locks_off();
722 if (debug_locks_silent)
723 return 0;
724
725 printk("\n======================================================\n");
726 printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
727 irqclass, irqclass);
728 printk( "------------------------------------------------------\n");
729 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
730 curr->comm, curr->pid,
731 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
732 curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
733 curr->hardirqs_enabled,
734 curr->softirqs_enabled);
735 print_lock(next);
736
737 printk("\nand this task is already holding:\n");
738 print_lock(prev);
739 printk("which would create a new lock dependency:\n");
740 print_lock_name(prev->class);
741 printk(" ->");
742 print_lock_name(next->class);
743 printk("\n");
744
745 printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
746 irqclass);
747 print_lock_name(backwards_match);
748 printk("\n... which became %s-irq-safe at:\n", irqclass);
749
750 print_stack_trace(backwards_match->usage_traces + bit1, 1);
751
752 printk("\nto a %s-irq-unsafe lock:\n", irqclass);
753 print_lock_name(forwards_match);
754 printk("\n... which became %s-irq-unsafe at:\n", irqclass);
755 printk("...");
756
757 print_stack_trace(forwards_match->usage_traces + bit2, 1);
758
759 printk("\nother info that might help us debug this:\n\n");
760 lockdep_print_held_locks(curr);
761
762 printk("\nthe %s-irq-safe lock's dependencies:\n", irqclass);
763 print_lock_dependencies(backwards_match, 0);
764
765 printk("\nthe %s-irq-unsafe lock's dependencies:\n", irqclass);
766 print_lock_dependencies(forwards_match, 0);
767
768 printk("\nstack backtrace:\n");
769 dump_stack();
770
771 return 0;
772}
773
774static int
775check_usage(struct task_struct *curr, struct held_lock *prev,
776 struct held_lock *next, enum lock_usage_bit bit_backwards,
777 enum lock_usage_bit bit_forwards, const char *irqclass)
778{
779 int ret;
780
781 find_usage_bit = bit_backwards;
782 /* fills in <backwards_match> */
783 ret = find_usage_backwards(prev->class, 0);
784 if (!ret || ret == 1)
785 return ret;
786
787 find_usage_bit = bit_forwards;
788 ret = find_usage_forwards(next->class, 0);
789 if (!ret || ret == 1)
790 return ret;
791 /* ret == 2 */
792 return print_bad_irq_dependency(curr, prev, next,
793 bit_backwards, bit_forwards, irqclass);
794}
795
796#endif
797
798static int
799print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
800 struct held_lock *next)
801{
802 debug_locks_off();
803 __raw_spin_unlock(&hash_lock);
804 if (debug_locks_silent)
805 return 0;
806
807 printk("\n=============================================\n");
808 printk( "[ INFO: possible recursive locking detected ]\n");
809 printk( "---------------------------------------------\n");
810 printk("%s/%d is trying to acquire lock:\n",
811 curr->comm, curr->pid);
812 print_lock(next);
813 printk("\nbut task is already holding lock:\n");
814 print_lock(prev);
815
816 printk("\nother info that might help us debug this:\n");
817 lockdep_print_held_locks(curr);
818
819 printk("\nstack backtrace:\n");
820 dump_stack();
821
822 return 0;
823}
824
825/*
826 * Check whether we are holding such a class already.
827 *
828 * (Note that this has to be done separately, because the graph cannot
829 * detect such classes of deadlocks.)
830 *
831 * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
832 */
833static int
834check_deadlock(struct task_struct *curr, struct held_lock *next,
835 struct lockdep_map *next_instance, int read)
836{
837 struct held_lock *prev;
838 int i;
839
840 for (i = 0; i < curr->lockdep_depth; i++) {
841 prev = curr->held_locks + i;
842 if (prev->class != next->class)
843 continue;
844 /*
845 * Allow read-after-read recursion of the same
846 * lock class (i.e. read_lock(lock)+read_lock(lock)):
847 */
848 if ((read == 2) && prev->read)
849 return 2;
850 return print_deadlock_bug(curr, prev, next);
851 }
852 return 1;
853}
854
855/*
856 * There was a chain-cache miss, and we are about to add a new dependency
857 * to a previous lock. We recursively validate the following rules:
858 *
859 * - would the adding of the <prev> -> <next> dependency create a
860 * circular dependency in the graph? [== circular deadlock]
861 *
862 * - does the new prev->next dependency connect any hardirq-safe lock
863 * (in the full backwards-subgraph starting at <prev>) with any
864 * hardirq-unsafe lock (in the full forwards-subgraph starting at
865 * <next>)? [== illegal lock inversion with hardirq contexts]
866 *
867 * - does the new prev->next dependency connect any softirq-safe lock
868 * (in the full backwards-subgraph starting at <prev>) with any
869 * softirq-unsafe lock (in the full forwards-subgraph starting at
870 * <next>)? [== illegal lock inversion with softirq contexts]
871 *
872 * any of these scenarios could lead to a deadlock.
873 *
874 * Then if all the validations pass, we add the forwards and backwards
875 * dependency.
876 */
877static int
878check_prev_add(struct task_struct *curr, struct held_lock *prev,
879 struct held_lock *next)
880{
881 struct lock_list *entry;
882 int ret;
883
884 /*
885 * Prove that the new <prev> -> <next> dependency would not
886 * create a circular dependency in the graph. (We do this by
887 * forward-recursing into the graph starting at <next>, and
888 * checking whether we can reach <prev>.)
889 *
890 * We are using global variables to control the recursion, to
891 * keep the stackframe size of the recursive functions low:
892 */
893 check_source = next;
894 check_target = prev;
895 if (!(check_noncircular(next->class, 0)))
896 return print_circular_bug_tail();
897
898#ifdef CONFIG_TRACE_IRQFLAGS
899 /*
900 * Prove that the new dependency does not connect a hardirq-safe
901 * lock with a hardirq-unsafe lock - to achieve this we search
902 * the backwards-subgraph starting at <prev>, and the
903 * forwards-subgraph starting at <next>:
904 */
905 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ,
906 LOCK_ENABLED_HARDIRQS, "hard"))
907 return 0;
908
909 /*
910 * Prove that the new dependency does not connect a hardirq-safe-read
911 * lock with a hardirq-unsafe lock - to achieve this we search
912 * the backwards-subgraph starting at <prev>, and the
913 * forwards-subgraph starting at <next>:
914 */
915 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ,
916 LOCK_ENABLED_HARDIRQS, "hard-read"))
917 return 0;
918
919 /*
920 * Prove that the new dependency does not connect a softirq-safe
921 * lock with a softirq-unsafe lock - to achieve this we search
922 * the backwards-subgraph starting at <prev>, and the
923 * forwards-subgraph starting at <next>:
924 */
925 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ,
926 LOCK_ENABLED_SOFTIRQS, "soft"))
927 return 0;
928 /*
929 * Prove that the new dependency does not connect a softirq-safe-read
930 * lock with a softirq-unsafe lock - to achieve this we search
931 * the backwards-subgraph starting at <prev>, and the
932 * forwards-subgraph starting at <next>:
933 */
934 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
935 LOCK_ENABLED_SOFTIRQS, "soft"))
936 return 0;
937#endif
938 /*
939 * For recursive read-locks we do all the dependency checks,
940 * but we dont store read-triggered dependencies (only
941 * write-triggered dependencies). This ensures that only the
942 * write-side dependencies matter, and that if for example a
943 * write-lock never takes any other locks, then the reads are
944 * equivalent to a NOP.
945 */
946 if (next->read == 2 || prev->read == 2)
947 return 1;
948 /*
949 * Is the <prev> -> <next> dependency already present?
950 *
951 * (this may occur even though this is a new chain: consider
952 * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3
953 * chains - the second one will be new, but L1 already has
954 * L2 added to its dependency list, due to the first chain.)
955 */
956 list_for_each_entry(entry, &prev->class->locks_after, entry) {
957 if (entry->class == next->class)
958 return 2;
959 }
960
961 /*
962 * Ok, all validations passed, add the new lock
963 * to the previous lock's dependency list:
964 */
965 ret = add_lock_to_list(prev->class, next->class,
966 &prev->class->locks_after, next->acquire_ip);
967 if (!ret)
968 return 0;
969 /*
970 * Return value of 2 signals 'dependency already added',
971 * in that case we dont have to add the backlink either.
972 */
973 if (ret == 2)
974 return 2;
975 ret = add_lock_to_list(next->class, prev->class,
976 &next->class->locks_before, next->acquire_ip);
977
978 /*
979 * Debugging printouts:
980 */
981 if (verbose(prev->class) || verbose(next->class)) {
982 __raw_spin_unlock(&hash_lock);
983 printk("\n new dependency: ");
984 print_lock_name(prev->class);
985 printk(" => ");
986 print_lock_name(next->class);
987 printk("\n");
988 dump_stack();
989 __raw_spin_lock(&hash_lock);
990 }
991 return 1;
992}
993
994/*
995 * Add the dependency to all directly-previous locks that are 'relevant'.
996 * The ones that are relevant are (in increasing distance from curr):
997 * all consecutive trylock entries and the final non-trylock entry - or
998 * the end of this context's lock-chain - whichever comes first.
999 */
1000static int
1001check_prevs_add(struct task_struct *curr, struct held_lock *next)
1002{
1003 int depth = curr->lockdep_depth;
1004 struct held_lock *hlock;
1005
1006 /*
1007 * Debugging checks.
1008 *
1009 * Depth must not be zero for a non-head lock:
1010 */
1011 if (!depth)
1012 goto out_bug;
1013 /*
1014 * At least two relevant locks must exist for this
1015 * to be a head:
1016 */
1017 if (curr->held_locks[depth].irq_context !=
1018 curr->held_locks[depth-1].irq_context)
1019 goto out_bug;
1020
1021 for (;;) {
1022 hlock = curr->held_locks + depth-1;
1023 /*
1024 * Only non-recursive-read entries get new dependencies
1025 * added:
1026 */
1027 if (hlock->read != 2) {
1028 check_prev_add(curr, hlock, next);
1029 /*
1030 * Stop after the first non-trylock entry,
1031 * as non-trylock entries have added their
1032 * own direct dependencies already, so this
1033 * lock is connected to them indirectly:
1034 */
1035 if (!hlock->trylock)
1036 break;
1037 }
1038 depth--;
1039 /*
1040 * End of lock-stack?
1041 */
1042 if (!depth)
1043 break;
1044 /*
1045 * Stop the search if we cross into another context:
1046 */
1047 if (curr->held_locks[depth].irq_context !=
1048 curr->held_locks[depth-1].irq_context)
1049 break;
1050 }
1051 return 1;
1052out_bug:
1053 __raw_spin_unlock(&hash_lock);
1054 DEBUG_LOCKS_WARN_ON(1);
1055
1056 return 0;
1057}
1058
1059
1060/*
1061 * Is this the address of a static object:
1062 */
1063static int static_obj(void *obj)
1064{
1065 unsigned long start = (unsigned long) &_stext,
1066 end = (unsigned long) &_end,
1067 addr = (unsigned long) obj;
1068#ifdef CONFIG_SMP
1069 int i;
1070#endif
1071
1072 /*
1073 * static variable?
1074 */
1075 if ((addr >= start) && (addr < end))
1076 return 1;
1077
1078#ifdef CONFIG_SMP
1079 /*
1080 * percpu var?
1081 */
1082 for_each_possible_cpu(i) {
1083 start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
1084 end = (unsigned long) &__per_cpu_end + per_cpu_offset(i);
1085
1086 if ((addr >= start) && (addr < end))
1087 return 1;
1088 }
1089#endif
1090
1091 /*
1092 * module var?
1093 */
1094 return is_module_address(addr);
1095}
1096
1097/*
1098 * To make lock name printouts unique, we calculate a unique
1099 * class->name_version generation counter:
1100 */
1101static int count_matching_names(struct lock_class *new_class)
1102{
1103 struct lock_class *class;
1104 int count = 0;
1105
1106 if (!new_class->name)
1107 return 0;
1108
1109 list_for_each_entry(class, &all_lock_classes, lock_entry) {
1110 if (new_class->key - new_class->subclass == class->key)
1111 return class->name_version;
1112 if (class->name && !strcmp(class->name, new_class->name))
1113 count = max(count, class->name_version);
1114 }
1115
1116 return count + 1;
1117}
1118
1119extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void);
1120
1121/*
1122 * Register a lock's class in the hash-table, if the class is not present
1123 * yet. Otherwise we look it up. We cache the result in the lock object
1124 * itself, so actual lookup of the hash should be once per lock object.
1125 */
1126static inline struct lock_class *
1127register_lock_class(struct lockdep_map *lock, unsigned int subclass)
1128{
1129 struct lockdep_subclass_key *key;
1130 struct list_head *hash_head;
1131 struct lock_class *class;
1132
1133#ifdef CONFIG_DEBUG_LOCKDEP
1134 /*
1135 * If the architecture calls into lockdep before initializing
1136 * the hashes then we'll warn about it later. (we cannot printk
1137 * right now)
1138 */
1139 if (unlikely(!lockdep_initialized)) {
1140 lockdep_init();
1141 lockdep_init_error = 1;
1142 }
1143#endif
1144
1145 /*
1146 * Static locks do not have their class-keys yet - for them the key
1147 * is the lock object itself:
1148 */
1149 if (unlikely(!lock->key))
1150 lock->key = (void *)lock;
1151
1152 /*
1153 * NOTE: the class-key must be unique. For dynamic locks, a static
1154 * lock_class_key variable is passed in through the mutex_init()
1155 * (or spin_lock_init()) call - which acts as the key. For static
1156 * locks we use the lock object itself as the key.
1157 */
1158 if (sizeof(struct lock_class_key) > sizeof(struct lock_class))
1159 __error_too_big_MAX_LOCKDEP_SUBCLASSES();
1160
1161 key = lock->key->subkeys + subclass;
1162
1163 hash_head = classhashentry(key);
1164
1165 /*
1166 * We can walk the hash lockfree, because the hash only
1167 * grows, and we are careful when adding entries to the end:
1168 */
1169 list_for_each_entry(class, hash_head, hash_entry)
1170 if (class->key == key)
1171 goto out_set;
1172
1173 /*
1174 * Debug-check: all keys must be persistent!
1175 */
1176 if (!static_obj(lock->key)) {
1177 debug_locks_off();
1178 printk("INFO: trying to register non-static key.\n");
1179 printk("the code is fine but needs lockdep annotation.\n");
1180 printk("turning off the locking correctness validator.\n");
1181 dump_stack();
1182
1183 return NULL;
1184 }
1185
1186 __raw_spin_lock(&hash_lock);
1187 /*
1188 * We have to do the hash-walk again, to avoid races
1189 * with another CPU:
1190 */
1191 list_for_each_entry(class, hash_head, hash_entry)
1192 if (class->key == key)
1193 goto out_unlock_set;
1194 /*
1195 * Allocate a new key from the static array, and add it to
1196 * the hash:
1197 */
1198 if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
1199 __raw_spin_unlock(&hash_lock);
1200 debug_locks_off();
1201 printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
1202 printk("turning off the locking correctness validator.\n");
1203 return NULL;
1204 }
1205 class = lock_classes + nr_lock_classes++;
1206 debug_atomic_inc(&nr_unused_locks);
1207 class->key = key;
1208 class->name = lock->name;
1209 class->subclass = subclass;
1210 INIT_LIST_HEAD(&class->lock_entry);
1211 INIT_LIST_HEAD(&class->locks_before);
1212 INIT_LIST_HEAD(&class->locks_after);
1213 class->name_version = count_matching_names(class);
1214 /*
1215 * We use RCU's safe list-add method to make
1216 * parallel walking of the hash-list safe:
1217 */
1218 list_add_tail_rcu(&class->hash_entry, hash_head);
1219
1220 if (verbose(class)) {
1221 __raw_spin_unlock(&hash_lock);
1222 printk("\nnew class %p: %s", class->key, class->name);
1223 if (class->name_version > 1)
1224 printk("#%d", class->name_version);
1225 printk("\n");
1226 dump_stack();
1227 __raw_spin_lock(&hash_lock);
1228 }
1229out_unlock_set:
1230 __raw_spin_unlock(&hash_lock);
1231
1232out_set:
1233 lock->class[subclass] = class;
1234
1235 DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
1236
1237 return class;
1238}
1239
1240/*
1241 * Look up a dependency chain. If the key is not present yet then
1242 * add it and return 0 - in this case the new dependency chain is
1243 * validated. If the key is already hashed, return 1.
1244 */
1245static inline int lookup_chain_cache(u64 chain_key)
1246{
1247 struct list_head *hash_head = chainhashentry(chain_key);
1248 struct lock_chain *chain;
1249
1250 DEBUG_LOCKS_WARN_ON(!irqs_disabled());
1251 /*
1252 * We can walk it lock-free, because entries only get added
1253 * to the hash:
1254 */
1255 list_for_each_entry(chain, hash_head, entry) {
1256 if (chain->chain_key == chain_key) {
1257cache_hit:
1258 debug_atomic_inc(&chain_lookup_hits);
1259 /*
1260 * In the debugging case, force redundant checking
1261 * by returning 1:
1262 */
1263#ifdef CONFIG_DEBUG_LOCKDEP
1264 __raw_spin_lock(&hash_lock);
1265 return 1;
1266#endif
1267 return 0;
1268 }
1269 }
1270 /*
1271 * Allocate a new chain entry from the static array, and add
1272 * it to the hash:
1273 */
1274 __raw_spin_lock(&hash_lock);
1275 /*
1276 * We have to walk the chain again locked - to avoid duplicates:
1277 */
1278 list_for_each_entry(chain, hash_head, entry) {
1279 if (chain->chain_key == chain_key) {
1280 __raw_spin_unlock(&hash_lock);
1281 goto cache_hit;
1282 }
1283 }
1284 if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
1285 __raw_spin_unlock(&hash_lock);
1286 debug_locks_off();
1287 printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
1288 printk("turning off the locking correctness validator.\n");
1289 return 0;
1290 }
1291 chain = lock_chains + nr_lock_chains++;
1292 chain->chain_key = chain_key;
1293 list_add_tail_rcu(&chain->entry, hash_head);
1294 debug_atomic_inc(&chain_lookup_misses);
1295#ifdef CONFIG_TRACE_IRQFLAGS
1296 if (current->hardirq_context)
1297 nr_hardirq_chains++;
1298 else {
1299 if (current->softirq_context)
1300 nr_softirq_chains++;
1301 else
1302 nr_process_chains++;
1303 }
1304#else
1305 nr_process_chains++;
1306#endif
1307
1308 return 1;
1309}
1310
1311/*
1312 * We are building curr_chain_key incrementally, so double-check
1313 * it from scratch, to make sure that it's done correctly:
1314 */
1315static void check_chain_key(struct task_struct *curr)
1316{
1317#ifdef CONFIG_DEBUG_LOCKDEP
1318 struct held_lock *hlock, *prev_hlock = NULL;
1319 unsigned int i, id;
1320 u64 chain_key = 0;
1321
1322 for (i = 0; i < curr->lockdep_depth; i++) {
1323 hlock = curr->held_locks + i;
1324 if (chain_key != hlock->prev_chain_key) {
1325 debug_locks_off();
1326 printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n",
1327 curr->lockdep_depth, i,
1328 (unsigned long long)chain_key,
1329 (unsigned long long)hlock->prev_chain_key);
1330 WARN_ON(1);
1331 return;
1332 }
1333 id = hlock->class - lock_classes;
1334 DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS);
1335 if (prev_hlock && (prev_hlock->irq_context !=
1336 hlock->irq_context))
1337 chain_key = 0;
1338 chain_key = iterate_chain_key(chain_key, id);
1339 prev_hlock = hlock;
1340 }
1341 if (chain_key != curr->curr_chain_key) {
1342 debug_locks_off();
1343 printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n",
1344 curr->lockdep_depth, i,
1345 (unsigned long long)chain_key,
1346 (unsigned long long)curr->curr_chain_key);
1347 WARN_ON(1);
1348 }
1349#endif
1350}
1351
1352#ifdef CONFIG_TRACE_IRQFLAGS
1353
1354/*
1355 * print irq inversion bug:
1356 */
1357static int
1358print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1359 struct held_lock *this, int forwards,
1360 const char *irqclass)
1361{
1362 __raw_spin_unlock(&hash_lock);
1363 debug_locks_off();
1364 if (debug_locks_silent)
1365 return 0;
1366
1367 printk("\n=========================================================\n");
1368 printk( "[ INFO: possible irq lock inversion dependency detected ]\n");
1369 printk( "---------------------------------------------------------\n");
1370 printk("%s/%d just changed the state of lock:\n",
1371 curr->comm, curr->pid);
1372 print_lock(this);
1373 if (forwards)
1374 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
1375 else
1376 printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass);
1377 print_lock_name(other);
1378 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
1379
1380 printk("\nother info that might help us debug this:\n");
1381 lockdep_print_held_locks(curr);
1382
1383 printk("\nthe first lock's dependencies:\n");
1384 print_lock_dependencies(this->class, 0);
1385
1386 printk("\nthe second lock's dependencies:\n");
1387 print_lock_dependencies(other, 0);
1388
1389 printk("\nstack backtrace:\n");
1390 dump_stack();
1391
1392 return 0;
1393}
1394
1395/*
1396 * Prove that in the forwards-direction subgraph starting at <this>
1397 * there is no lock matching <mask>:
1398 */
1399static int
1400check_usage_forwards(struct task_struct *curr, struct held_lock *this,
1401 enum lock_usage_bit bit, const char *irqclass)
1402{
1403 int ret;
1404
1405 find_usage_bit = bit;
1406 /* fills in <forwards_match> */
1407 ret = find_usage_forwards(this->class, 0);
1408 if (!ret || ret == 1)
1409 return ret;
1410
1411 return print_irq_inversion_bug(curr, forwards_match, this, 1, irqclass);
1412}
1413
1414/*
1415 * Prove that in the backwards-direction subgraph starting at <this>
1416 * there is no lock matching <mask>:
1417 */
1418static int
1419check_usage_backwards(struct task_struct *curr, struct held_lock *this,
1420 enum lock_usage_bit bit, const char *irqclass)
1421{
1422 int ret;
1423
1424 find_usage_bit = bit;
1425 /* fills in <backwards_match> */
1426 ret = find_usage_backwards(this->class, 0);
1427 if (!ret || ret == 1)
1428 return ret;
1429
1430 return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass);
1431}
1432
1433static inline void print_irqtrace_events(struct task_struct *curr)
1434{
1435 printk("irq event stamp: %u\n", curr->irq_events);
1436 printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event);
1437 print_ip_sym(curr->hardirq_enable_ip);
1438 printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event);
1439 print_ip_sym(curr->hardirq_disable_ip);
1440 printk("softirqs last enabled at (%u): ", curr->softirq_enable_event);
1441 print_ip_sym(curr->softirq_enable_ip);
1442 printk("softirqs last disabled at (%u): ", curr->softirq_disable_event);
1443 print_ip_sym(curr->softirq_disable_ip);
1444}
1445
1446#else
1447static inline void print_irqtrace_events(struct task_struct *curr)
1448{
1449}
1450#endif
1451
1452static int
1453print_usage_bug(struct task_struct *curr, struct held_lock *this,
1454 enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
1455{
1456 __raw_spin_unlock(&hash_lock);
1457 debug_locks_off();
1458 if (debug_locks_silent)
1459 return 0;
1460
1461 printk("\n=================================\n");
1462 printk( "[ INFO: inconsistent lock state ]\n");
1463 printk( "---------------------------------\n");
1464
1465 printk("inconsistent {%s} -> {%s} usage.\n",
1466 usage_str[prev_bit], usage_str[new_bit]);
1467
1468 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
1469 curr->comm, curr->pid,
1470 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
1471 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
1472 trace_hardirqs_enabled(curr),
1473 trace_softirqs_enabled(curr));
1474 print_lock(this);
1475
1476 printk("{%s} state was registered at:\n", usage_str[prev_bit]);
1477 print_stack_trace(this->class->usage_traces + prev_bit, 1);
1478
1479 print_irqtrace_events(curr);
1480 printk("\nother info that might help us debug this:\n");
1481 lockdep_print_held_locks(curr);
1482
1483 printk("\nstack backtrace:\n");
1484 dump_stack();
1485
1486 return 0;
1487}
1488
1489/*
1490 * Print out an error if an invalid bit is set:
1491 */
1492static inline int
1493valid_state(struct task_struct *curr, struct held_lock *this,
1494 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
1495{
1496 if (unlikely(this->class->usage_mask & (1 << bad_bit)))
1497 return print_usage_bug(curr, this, bad_bit, new_bit);
1498 return 1;
1499}
1500
1501#define STRICT_READ_CHECKS 1
1502
1503/*
1504 * Mark a lock with a usage bit, and validate the state transition:
1505 */
1506static int mark_lock(struct task_struct *curr, struct held_lock *this,
1507 enum lock_usage_bit new_bit, unsigned long ip)
1508{
1509 unsigned int new_mask = 1 << new_bit, ret = 1;
1510
1511 /*
1512 * If already set then do not dirty the cacheline,
1513 * nor do any checks:
1514 */
1515 if (likely(this->class->usage_mask & new_mask))
1516 return 1;
1517
1518 __raw_spin_lock(&hash_lock);
1519 /*
1520 * Make sure we didnt race:
1521 */
1522 if (unlikely(this->class->usage_mask & new_mask)) {
1523 __raw_spin_unlock(&hash_lock);
1524 return 1;
1525 }
1526
1527 this->class->usage_mask |= new_mask;
1528
1529#ifdef CONFIG_TRACE_IRQFLAGS
1530 if (new_bit == LOCK_ENABLED_HARDIRQS ||
1531 new_bit == LOCK_ENABLED_HARDIRQS_READ)
1532 ip = curr->hardirq_enable_ip;
1533 else if (new_bit == LOCK_ENABLED_SOFTIRQS ||
1534 new_bit == LOCK_ENABLED_SOFTIRQS_READ)
1535 ip = curr->softirq_enable_ip;
1536#endif
1537 if (!save_trace(this->class->usage_traces + new_bit))
1538 return 0;
1539
1540 switch (new_bit) {
1541#ifdef CONFIG_TRACE_IRQFLAGS
1542 case LOCK_USED_IN_HARDIRQ:
1543 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
1544 return 0;
1545 if (!valid_state(curr, this, new_bit,
1546 LOCK_ENABLED_HARDIRQS_READ))
1547 return 0;
1548 /*
1549 * just marked it hardirq-safe, check that this lock
1550 * took no hardirq-unsafe lock in the past:
1551 */
1552 if (!check_usage_forwards(curr, this,
1553 LOCK_ENABLED_HARDIRQS, "hard"))
1554 return 0;
1555#if STRICT_READ_CHECKS
1556 /*
1557 * just marked it hardirq-safe, check that this lock
1558 * took no hardirq-unsafe-read lock in the past:
1559 */
1560 if (!check_usage_forwards(curr, this,
1561 LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
1562 return 0;
1563#endif
1564 if (hardirq_verbose(this->class))
1565 ret = 2;
1566 break;
1567 case LOCK_USED_IN_SOFTIRQ:
1568 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
1569 return 0;
1570 if (!valid_state(curr, this, new_bit,
1571 LOCK_ENABLED_SOFTIRQS_READ))
1572 return 0;
1573 /*
1574 * just marked it softirq-safe, check that this lock
1575 * took no softirq-unsafe lock in the past:
1576 */
1577 if (!check_usage_forwards(curr, this,
1578 LOCK_ENABLED_SOFTIRQS, "soft"))
1579 return 0;
1580#if STRICT_READ_CHECKS
1581 /*
1582 * just marked it softirq-safe, check that this lock
1583 * took no softirq-unsafe-read lock in the past:
1584 */
1585 if (!check_usage_forwards(curr, this,
1586 LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
1587 return 0;
1588#endif
1589 if (softirq_verbose(this->class))
1590 ret = 2;
1591 break;
1592 case LOCK_USED_IN_HARDIRQ_READ:
1593 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
1594 return 0;
1595 /*
1596 * just marked it hardirq-read-safe, check that this lock
1597 * took no hardirq-unsafe lock in the past:
1598 */
1599 if (!check_usage_forwards(curr, this,
1600 LOCK_ENABLED_HARDIRQS, "hard"))
1601 return 0;
1602 if (hardirq_verbose(this->class))
1603 ret = 2;
1604 break;
1605 case LOCK_USED_IN_SOFTIRQ_READ:
1606 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
1607 return 0;
1608 /*
1609 * just marked it softirq-read-safe, check that this lock
1610 * took no softirq-unsafe lock in the past:
1611 */
1612 if (!check_usage_forwards(curr, this,
1613 LOCK_ENABLED_SOFTIRQS, "soft"))
1614 return 0;
1615 if (softirq_verbose(this->class))
1616 ret = 2;
1617 break;
1618 case LOCK_ENABLED_HARDIRQS:
1619 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
1620 return 0;
1621 if (!valid_state(curr, this, new_bit,
1622 LOCK_USED_IN_HARDIRQ_READ))
1623 return 0;
1624 /*
1625 * just marked it hardirq-unsafe, check that no hardirq-safe
1626 * lock in the system ever took it in the past:
1627 */
1628 if (!check_usage_backwards(curr, this,
1629 LOCK_USED_IN_HARDIRQ, "hard"))
1630 return 0;
1631#if STRICT_READ_CHECKS
1632 /*
1633 * just marked it hardirq-unsafe, check that no
1634 * hardirq-safe-read lock in the system ever took
1635 * it in the past:
1636 */
1637 if (!check_usage_backwards(curr, this,
1638 LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
1639 return 0;
1640#endif
1641 if (hardirq_verbose(this->class))
1642 ret = 2;
1643 break;
1644 case LOCK_ENABLED_SOFTIRQS:
1645 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
1646 return 0;
1647 if (!valid_state(curr, this, new_bit,
1648 LOCK_USED_IN_SOFTIRQ_READ))
1649 return 0;
1650 /*
1651 * just marked it softirq-unsafe, check that no softirq-safe
1652 * lock in the system ever took it in the past:
1653 */
1654 if (!check_usage_backwards(curr, this,
1655 LOCK_USED_IN_SOFTIRQ, "soft"))
1656 return 0;
1657#if STRICT_READ_CHECKS
1658 /*
1659 * just marked it softirq-unsafe, check that no
1660 * softirq-safe-read lock in the system ever took
1661 * it in the past:
1662 */
1663 if (!check_usage_backwards(curr, this,
1664 LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
1665 return 0;
1666#endif
1667 if (softirq_verbose(this->class))
1668 ret = 2;
1669 break;
1670 case LOCK_ENABLED_HARDIRQS_READ:
1671 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
1672 return 0;
1673#if STRICT_READ_CHECKS
1674 /*
1675 * just marked it hardirq-read-unsafe, check that no
1676 * hardirq-safe lock in the system ever took it in the past:
1677 */
1678 if (!check_usage_backwards(curr, this,
1679 LOCK_USED_IN_HARDIRQ, "hard"))
1680 return 0;
1681#endif
1682 if (hardirq_verbose(this->class))
1683 ret = 2;
1684 break;
1685 case LOCK_ENABLED_SOFTIRQS_READ:
1686 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
1687 return 0;
1688#if STRICT_READ_CHECKS
1689 /*
1690 * just marked it softirq-read-unsafe, check that no
1691 * softirq-safe lock in the system ever took it in the past:
1692 */
1693 if (!check_usage_backwards(curr, this,
1694 LOCK_USED_IN_SOFTIRQ, "soft"))
1695 return 0;
1696#endif
1697 if (softirq_verbose(this->class))
1698 ret = 2;
1699 break;
1700#endif
1701 case LOCK_USED:
1702 /*
1703 * Add it to the global list of classes:
1704 */
1705 list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
1706 debug_atomic_dec(&nr_unused_locks);
1707 break;
1708 default:
1709 debug_locks_off();
1710 WARN_ON(1);
1711 return 0;
1712 }
1713
1714 __raw_spin_unlock(&hash_lock);
1715
1716 /*
1717 * We must printk outside of the hash_lock:
1718 */
1719 if (ret == 2) {
1720 printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
1721 print_lock(this);
1722 print_irqtrace_events(curr);
1723 dump_stack();
1724 }
1725
1726 return ret;
1727}
1728
1729#ifdef CONFIG_TRACE_IRQFLAGS
1730/*
1731 * Mark all held locks with a usage bit:
1732 */
1733static int
1734mark_held_locks(struct task_struct *curr, int hardirq, unsigned long ip)
1735{
1736 enum lock_usage_bit usage_bit;
1737 struct held_lock *hlock;
1738 int i;
1739
1740 for (i = 0; i < curr->lockdep_depth; i++) {
1741 hlock = curr->held_locks + i;
1742
1743 if (hardirq) {
1744 if (hlock->read)
1745 usage_bit = LOCK_ENABLED_HARDIRQS_READ;
1746 else
1747 usage_bit = LOCK_ENABLED_HARDIRQS;
1748 } else {
1749 if (hlock->read)
1750 usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
1751 else
1752 usage_bit = LOCK_ENABLED_SOFTIRQS;
1753 }
1754 if (!mark_lock(curr, hlock, usage_bit, ip))
1755 return 0;
1756 }
1757
1758 return 1;
1759}
1760
1761/*
1762 * Debugging helper: via this flag we know that we are in
1763 * 'early bootup code', and will warn about any invalid irqs-on event:
1764 */
1765static int early_boot_irqs_enabled;
1766
1767void early_boot_irqs_off(void)
1768{
1769 early_boot_irqs_enabled = 0;
1770}
1771
1772void early_boot_irqs_on(void)
1773{
1774 early_boot_irqs_enabled = 1;
1775}
1776
1777/*
1778 * Hardirqs will be enabled:
1779 */
1780void trace_hardirqs_on(void)
1781{
1782 struct task_struct *curr = current;
1783 unsigned long ip;
1784
1785 if (unlikely(!debug_locks || current->lockdep_recursion))
1786 return;
1787
1788 if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled)))
1789 return;
1790
1791 if (unlikely(curr->hardirqs_enabled)) {
1792 debug_atomic_inc(&redundant_hardirqs_on);
1793 return;
1794 }
1795 /* we'll do an OFF -> ON transition: */
1796 curr->hardirqs_enabled = 1;
1797 ip = (unsigned long) __builtin_return_address(0);
1798
1799 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1800 return;
1801 if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
1802 return;
1803 /*
1804 * We are going to turn hardirqs on, so set the
1805 * usage bit for all held locks:
1806 */
1807 if (!mark_held_locks(curr, 1, ip))
1808 return;
1809 /*
1810 * If we have softirqs enabled, then set the usage
1811 * bit for all held locks. (disabled hardirqs prevented
1812 * this bit from being set before)
1813 */
1814 if (curr->softirqs_enabled)
1815 if (!mark_held_locks(curr, 0, ip))
1816 return;
1817
1818 curr->hardirq_enable_ip = ip;
1819 curr->hardirq_enable_event = ++curr->irq_events;
1820 debug_atomic_inc(&hardirqs_on_events);
1821}
1822
1823EXPORT_SYMBOL(trace_hardirqs_on);
1824
1825/*
1826 * Hardirqs were disabled:
1827 */
1828void trace_hardirqs_off(void)
1829{
1830 struct task_struct *curr = current;
1831
1832 if (unlikely(!debug_locks || current->lockdep_recursion))
1833 return;
1834
1835 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1836 return;
1837
1838 if (curr->hardirqs_enabled) {
1839 /*
1840 * We have done an ON -> OFF transition:
1841 */
1842 curr->hardirqs_enabled = 0;
1843 curr->hardirq_disable_ip = _RET_IP_;
1844 curr->hardirq_disable_event = ++curr->irq_events;
1845 debug_atomic_inc(&hardirqs_off_events);
1846 } else
1847 debug_atomic_inc(&redundant_hardirqs_off);
1848}
1849
1850EXPORT_SYMBOL(trace_hardirqs_off);
1851
1852/*
1853 * Softirqs will be enabled:
1854 */
1855void trace_softirqs_on(unsigned long ip)
1856{
1857 struct task_struct *curr = current;
1858
1859 if (unlikely(!debug_locks))
1860 return;
1861
1862 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1863 return;
1864
1865 if (curr->softirqs_enabled) {
1866 debug_atomic_inc(&redundant_softirqs_on);
1867 return;
1868 }
1869
1870 /*
1871 * We'll do an OFF -> ON transition:
1872 */
1873 curr->softirqs_enabled = 1;
1874 curr->softirq_enable_ip = ip;
1875 curr->softirq_enable_event = ++curr->irq_events;
1876 debug_atomic_inc(&softirqs_on_events);
1877 /*
1878 * We are going to turn softirqs on, so set the
1879 * usage bit for all held locks, if hardirqs are
1880 * enabled too:
1881 */
1882 if (curr->hardirqs_enabled)
1883 mark_held_locks(curr, 0, ip);
1884}
1885
1886/*
1887 * Softirqs were disabled:
1888 */
1889void trace_softirqs_off(unsigned long ip)
1890{
1891 struct task_struct *curr = current;
1892
1893 if (unlikely(!debug_locks))
1894 return;
1895
1896 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1897 return;
1898
1899 if (curr->softirqs_enabled) {
1900 /*
1901 * We have done an ON -> OFF transition:
1902 */
1903 curr->softirqs_enabled = 0;
1904 curr->softirq_disable_ip = ip;
1905 curr->softirq_disable_event = ++curr->irq_events;
1906 debug_atomic_inc(&softirqs_off_events);
1907 DEBUG_LOCKS_WARN_ON(!softirq_count());
1908 } else
1909 debug_atomic_inc(&redundant_softirqs_off);
1910}
1911
1912#endif
1913
1914/*
1915 * Initialize a lock instance's lock-class mapping info:
1916 */
1917void lockdep_init_map(struct lockdep_map *lock, const char *name,
1918 struct lock_class_key *key)
1919{
1920 if (unlikely(!debug_locks))
1921 return;
1922
1923 if (DEBUG_LOCKS_WARN_ON(!key))
1924 return;
1925 if (DEBUG_LOCKS_WARN_ON(!name))
1926 return;
1927 /*
1928 * Sanity check, the lock-class key must be persistent:
1929 */
1930 if (!static_obj(key)) {
1931 printk("BUG: key %p not in .data!\n", key);
1932 DEBUG_LOCKS_WARN_ON(1);
1933 return;
1934 }
1935 lock->name = name;
1936 lock->key = key;
1937 memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES);
1938}
1939
1940EXPORT_SYMBOL_GPL(lockdep_init_map);
1941
1942/*
1943 * This gets called for every mutex_lock*()/spin_lock*() operation.
1944 * We maintain the dependency maps and validate the locking attempt:
1945 */
1946static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
1947 int trylock, int read, int check, int hardirqs_off,
1948 unsigned long ip)
1949{
1950 struct task_struct *curr = current;
1951 struct held_lock *hlock;
1952 struct lock_class *class;
1953 unsigned int depth, id;
1954 int chain_head = 0;
1955 u64 chain_key;
1956
1957 if (unlikely(!debug_locks))
1958 return 0;
1959
1960 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1961 return 0;
1962
1963 if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
1964 debug_locks_off();
1965 printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
1966 printk("turning off the locking correctness validator.\n");
1967 return 0;
1968 }
1969
1970 class = lock->class[subclass];
1971 /* not cached yet? */
1972 if (unlikely(!class)) {
1973 class = register_lock_class(lock, subclass);
1974 if (!class)
1975 return 0;
1976 }
1977 debug_atomic_inc((atomic_t *)&class->ops);
1978 if (very_verbose(class)) {
1979 printk("\nacquire class [%p] %s", class->key, class->name);
1980 if (class->name_version > 1)
1981 printk("#%d", class->name_version);
1982 printk("\n");
1983 dump_stack();
1984 }
1985
1986 /*
1987 * Add the lock to the list of currently held locks.
1988 * (we dont increase the depth just yet, up until the
1989 * dependency checks are done)
1990 */
1991 depth = curr->lockdep_depth;
1992 if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
1993 return 0;
1994
1995 hlock = curr->held_locks + depth;
1996
1997 hlock->class = class;
1998 hlock->acquire_ip = ip;
1999 hlock->instance = lock;
2000 hlock->trylock = trylock;
2001 hlock->read = read;
2002 hlock->check = check;
2003 hlock->hardirqs_off = hardirqs_off;
2004
2005 if (check != 2)
2006 goto out_calc_hash;
2007#ifdef CONFIG_TRACE_IRQFLAGS
2008 /*
2009 * If non-trylock use in a hardirq or softirq context, then
2010 * mark the lock as used in these contexts:
2011 */
2012 if (!trylock) {
2013 if (read) {
2014 if (curr->hardirq_context)
2015 if (!mark_lock(curr, hlock,
2016 LOCK_USED_IN_HARDIRQ_READ, ip))
2017 return 0;
2018 if (curr->softirq_context)
2019 if (!mark_lock(curr, hlock,
2020 LOCK_USED_IN_SOFTIRQ_READ, ip))
2021 return 0;
2022 } else {
2023 if (curr->hardirq_context)
2024 if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ, ip))
2025 return 0;
2026 if (curr->softirq_context)
2027 if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ, ip))
2028 return 0;
2029 }
2030 }
2031 if (!hardirqs_off) {
2032 if (read) {
2033 if (!mark_lock(curr, hlock,
2034 LOCK_ENABLED_HARDIRQS_READ, ip))
2035 return 0;
2036 if (curr->softirqs_enabled)
2037 if (!mark_lock(curr, hlock,
2038 LOCK_ENABLED_SOFTIRQS_READ, ip))
2039 return 0;
2040 } else {
2041 if (!mark_lock(curr, hlock,
2042 LOCK_ENABLED_HARDIRQS, ip))
2043 return 0;
2044 if (curr->softirqs_enabled)
2045 if (!mark_lock(curr, hlock,
2046 LOCK_ENABLED_SOFTIRQS, ip))
2047 return 0;
2048 }
2049 }
2050#endif
2051 /* mark it as used: */
2052 if (!mark_lock(curr, hlock, LOCK_USED, ip))
2053 return 0;
2054out_calc_hash:
2055 /*
2056 * Calculate the chain hash: it's the combined has of all the
2057 * lock keys along the dependency chain. We save the hash value
2058 * at every step so that we can get the current hash easily
2059 * after unlock. The chain hash is then used to cache dependency
2060 * results.
2061 *
2062 * The 'key ID' is what is the most compact key value to drive
2063 * the hash, not class->key.
2064 */
2065 id = class - lock_classes;
2066 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
2067 return 0;
2068
2069 chain_key = curr->curr_chain_key;
2070 if (!depth) {
2071 if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
2072 return 0;
2073 chain_head = 1;
2074 }
2075
2076 hlock->prev_chain_key = chain_key;
2077
2078#ifdef CONFIG_TRACE_IRQFLAGS
2079 /*
2080 * Keep track of points where we cross into an interrupt context:
2081 */
2082 hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
2083 curr->softirq_context;
2084 if (depth) {
2085 struct held_lock *prev_hlock;
2086
2087 prev_hlock = curr->held_locks + depth-1;
2088 /*
2089 * If we cross into another context, reset the
2090 * hash key (this also prevents the checking and the
2091 * adding of the dependency to 'prev'):
2092 */
2093 if (prev_hlock->irq_context != hlock->irq_context) {
2094 chain_key = 0;
2095 chain_head = 1;
2096 }
2097 }
2098#endif
2099 chain_key = iterate_chain_key(chain_key, id);
2100 curr->curr_chain_key = chain_key;
2101
2102 /*
2103 * Trylock needs to maintain the stack of held locks, but it
2104 * does not add new dependencies, because trylock can be done
2105 * in any order.
2106 *
2107 * We look up the chain_key and do the O(N^2) check and update of
2108 * the dependencies only if this is a new dependency chain.
2109 * (If lookup_chain_cache() returns with 1 it acquires
2110 * hash_lock for us)
2111 */
2112 if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) {
2113 /*
2114 * Check whether last held lock:
2115 *
2116 * - is irq-safe, if this lock is irq-unsafe
2117 * - is softirq-safe, if this lock is hardirq-unsafe
2118 *
2119 * And check whether the new lock's dependency graph
2120 * could lead back to the previous lock.
2121 *
2122 * any of these scenarios could lead to a deadlock. If
2123 * All validations
2124 */
2125 int ret = check_deadlock(curr, hlock, lock, read);
2126
2127 if (!ret)
2128 return 0;
2129 /*
2130 * Mark recursive read, as we jump over it when
2131 * building dependencies (just like we jump over
2132 * trylock entries):
2133 */
2134 if (ret == 2)
2135 hlock->read = 2;
2136 /*
2137 * Add dependency only if this lock is not the head
2138 * of the chain, and if it's not a secondary read-lock:
2139 */
2140 if (!chain_head && ret != 2)
2141 if (!check_prevs_add(curr, hlock))
2142 return 0;
2143 __raw_spin_unlock(&hash_lock);
2144 }
2145 curr->lockdep_depth++;
2146 check_chain_key(curr);
2147 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
2148 debug_locks_off();
2149 printk("BUG: MAX_LOCK_DEPTH too low!\n");
2150 printk("turning off the locking correctness validator.\n");
2151 return 0;
2152 }
2153 if (unlikely(curr->lockdep_depth > max_lockdep_depth))
2154 max_lockdep_depth = curr->lockdep_depth;
2155
2156 return 1;
2157}
2158
2159static int
2160print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
2161 unsigned long ip)
2162{
2163 if (!debug_locks_off())
2164 return 0;
2165 if (debug_locks_silent)
2166 return 0;
2167
2168 printk("\n=====================================\n");
2169 printk( "[ BUG: bad unlock balance detected! ]\n");
2170 printk( "-------------------------------------\n");
2171 printk("%s/%d is trying to release lock (",
2172 curr->comm, curr->pid);
2173 print_lockdep_cache(lock);
2174 printk(") at:\n");
2175 print_ip_sym(ip);
2176 printk("but there are no more locks to release!\n");
2177 printk("\nother info that might help us debug this:\n");
2178 lockdep_print_held_locks(curr);
2179
2180 printk("\nstack backtrace:\n");
2181 dump_stack();
2182
2183 return 0;
2184}
2185
2186/*
2187 * Common debugging checks for both nested and non-nested unlock:
2188 */
2189static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
2190 unsigned long ip)
2191{
2192 if (unlikely(!debug_locks))
2193 return 0;
2194 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2195 return 0;
2196
2197 if (curr->lockdep_depth <= 0)
2198 return print_unlock_inbalance_bug(curr, lock, ip);
2199
2200 return 1;
2201}
2202
2203/*
2204 * Remove the lock to the list of currently held locks in a
2205 * potentially non-nested (out of order) manner. This is a
2206 * relatively rare operation, as all the unlock APIs default
2207 * to nested mode (which uses lock_release()):
2208 */
2209static int
2210lock_release_non_nested(struct task_struct *curr,
2211 struct lockdep_map *lock, unsigned long ip)
2212{
2213 struct held_lock *hlock, *prev_hlock;
2214 unsigned int depth;
2215 int i;
2216
2217 /*
2218 * Check whether the lock exists in the current stack
2219 * of held locks:
2220 */
2221 depth = curr->lockdep_depth;
2222 if (DEBUG_LOCKS_WARN_ON(!depth))
2223 return 0;
2224
2225 prev_hlock = NULL;
2226 for (i = depth-1; i >= 0; i--) {
2227 hlock = curr->held_locks + i;
2228 /*
2229 * We must not cross into another context:
2230 */
2231 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
2232 break;
2233 if (hlock->instance == lock)
2234 goto found_it;
2235 prev_hlock = hlock;
2236 }
2237 return print_unlock_inbalance_bug(curr, lock, ip);
2238
2239found_it:
2240 /*
2241 * We have the right lock to unlock, 'hlock' points to it.
2242 * Now we remove it from the stack, and add back the other
2243 * entries (if any), recalculating the hash along the way:
2244 */
2245 curr->lockdep_depth = i;
2246 curr->curr_chain_key = hlock->prev_chain_key;
2247
2248 for (i++; i < depth; i++) {
2249 hlock = curr->held_locks + i;
2250 if (!__lock_acquire(hlock->instance,
2251 hlock->class->subclass, hlock->trylock,
2252 hlock->read, hlock->check, hlock->hardirqs_off,
2253 hlock->acquire_ip))
2254 return 0;
2255 }
2256
2257 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
2258 return 0;
2259 return 1;
2260}
2261
2262/*
2263 * Remove the lock to the list of currently held locks - this gets
2264 * called on mutex_unlock()/spin_unlock*() (or on a failed
2265 * mutex_lock_interruptible()). This is done for unlocks that nest
2266 * perfectly. (i.e. the current top of the lock-stack is unlocked)
2267 */
2268static int lock_release_nested(struct task_struct *curr,
2269 struct lockdep_map *lock, unsigned long ip)
2270{
2271 struct held_lock *hlock;
2272 unsigned int depth;
2273
2274 /*
2275 * Pop off the top of the lock stack:
2276 */
2277 depth = curr->lockdep_depth - 1;
2278 hlock = curr->held_locks + depth;
2279
2280 /*
2281 * Is the unlock non-nested:
2282 */
2283 if (hlock->instance != lock)
2284 return lock_release_non_nested(curr, lock, ip);
2285 curr->lockdep_depth--;
2286
2287 if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
2288 return 0;
2289
2290 curr->curr_chain_key = hlock->prev_chain_key;
2291
2292#ifdef CONFIG_DEBUG_LOCKDEP
2293 hlock->prev_chain_key = 0;
2294 hlock->class = NULL;
2295 hlock->acquire_ip = 0;
2296 hlock->irq_context = 0;
2297#endif
2298 return 1;
2299}
2300
2301/*
2302 * Remove the lock to the list of currently held locks - this gets
2303 * called on mutex_unlock()/spin_unlock*() (or on a failed
2304 * mutex_lock_interruptible()). This is done for unlocks that nest
2305 * perfectly. (i.e. the current top of the lock-stack is unlocked)
2306 */
2307static void
2308__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2309{
2310 struct task_struct *curr = current;
2311
2312 if (!check_unlock(curr, lock, ip))
2313 return;
2314
2315 if (nested) {
2316 if (!lock_release_nested(curr, lock, ip))
2317 return;
2318 } else {
2319 if (!lock_release_non_nested(curr, lock, ip))
2320 return;
2321 }
2322
2323 check_chain_key(curr);
2324}
2325
2326/*
2327 * Check whether we follow the irq-flags state precisely:
2328 */
2329static void check_flags(unsigned long flags)
2330{
2331#if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS)
2332 if (!debug_locks)
2333 return;
2334
2335 if (irqs_disabled_flags(flags))
2336 DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled);
2337 else
2338 DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled);
2339
2340 /*
2341 * We dont accurately track softirq state in e.g.
2342 * hardirq contexts (such as on 4KSTACKS), so only
2343 * check if not in hardirq contexts:
2344 */
2345 if (!hardirq_count()) {
2346 if (softirq_count())
2347 DEBUG_LOCKS_WARN_ON(current->softirqs_enabled);
2348 else
2349 DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
2350 }
2351
2352 if (!debug_locks)
2353 print_irqtrace_events(current);
2354#endif
2355}
2356
2357/*
2358 * We are not always called with irqs disabled - do that here,
2359 * and also avoid lockdep recursion:
2360 */
2361void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2362 int trylock, int read, int check, unsigned long ip)
2363{
2364 unsigned long flags;
2365
2366 if (unlikely(current->lockdep_recursion))
2367 return;
2368
2369 raw_local_irq_save(flags);
2370 check_flags(flags);
2371
2372 current->lockdep_recursion = 1;
2373 __lock_acquire(lock, subclass, trylock, read, check,
2374 irqs_disabled_flags(flags), ip);
2375 current->lockdep_recursion = 0;
2376 raw_local_irq_restore(flags);
2377}
2378
2379EXPORT_SYMBOL_GPL(lock_acquire);
2380
2381void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2382{
2383 unsigned long flags;
2384
2385 if (unlikely(current->lockdep_recursion))
2386 return;
2387
2388 raw_local_irq_save(flags);
2389 check_flags(flags);
2390 current->lockdep_recursion = 1;
2391 __lock_release(lock, nested, ip);
2392 current->lockdep_recursion = 0;
2393 raw_local_irq_restore(flags);
2394}
2395
2396EXPORT_SYMBOL_GPL(lock_release);
2397
2398/*
2399 * Used by the testsuite, sanitize the validator state
2400 * after a simulated failure:
2401 */
2402
2403void lockdep_reset(void)
2404{
2405 unsigned long flags;
2406
2407 raw_local_irq_save(flags);
2408 current->curr_chain_key = 0;
2409 current->lockdep_depth = 0;
2410 current->lockdep_recursion = 0;
2411 memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
2412 nr_hardirq_chains = 0;
2413 nr_softirq_chains = 0;
2414 nr_process_chains = 0;
2415 debug_locks = 1;
2416 raw_local_irq_restore(flags);
2417}
2418
2419static void zap_class(struct lock_class *class)
2420{
2421 int i;
2422
2423 /*
2424 * Remove all dependencies this lock is
2425 * involved in:
2426 */
2427 for (i = 0; i < nr_list_entries; i++) {
2428 if (list_entries[i].class == class)
2429 list_del_rcu(&list_entries[i].entry);
2430 }
2431 /*
2432 * Unhash the class and remove it from the all_lock_classes list:
2433 */
2434 list_del_rcu(&class->hash_entry);
2435 list_del_rcu(&class->lock_entry);
2436
2437}
2438
2439static inline int within(void *addr, void *start, unsigned long size)
2440{
2441 return addr >= start && addr < start + size;
2442}
2443
2444void lockdep_free_key_range(void *start, unsigned long size)
2445{
2446 struct lock_class *class, *next;
2447 struct list_head *head;
2448 unsigned long flags;
2449 int i;
2450
2451 raw_local_irq_save(flags);
2452 __raw_spin_lock(&hash_lock);
2453
2454 /*
2455 * Unhash all classes that were created by this module:
2456 */
2457 for (i = 0; i < CLASSHASH_SIZE; i++) {
2458 head = classhash_table + i;
2459 if (list_empty(head))
2460 continue;
2461 list_for_each_entry_safe(class, next, head, hash_entry)
2462 if (within(class->key, start, size))
2463 zap_class(class);
2464 }
2465
2466 __raw_spin_unlock(&hash_lock);
2467 raw_local_irq_restore(flags);
2468}
2469
2470void lockdep_reset_lock(struct lockdep_map *lock)
2471{
2472 struct lock_class *class, *next, *entry;
2473 struct list_head *head;
2474 unsigned long flags;
2475 int i, j;
2476
2477 raw_local_irq_save(flags);
2478 __raw_spin_lock(&hash_lock);
2479
2480 /*
2481 * Remove all classes this lock has:
2482 */
2483 for (i = 0; i < CLASSHASH_SIZE; i++) {
2484 head = classhash_table + i;
2485 if (list_empty(head))
2486 continue;
2487 list_for_each_entry_safe(class, next, head, hash_entry) {
2488 for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
2489 entry = lock->class[j];
2490 if (class == entry) {
2491 zap_class(class);
2492 lock->class[j] = NULL;
2493 break;
2494 }
2495 }
2496 }
2497 }
2498
2499 /*
2500 * Debug check: in the end all mapped classes should
2501 * be gone.
2502 */
2503 for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
2504 entry = lock->class[j];
2505 if (!entry)
2506 continue;
2507 __raw_spin_unlock(&hash_lock);
2508 DEBUG_LOCKS_WARN_ON(1);
2509 raw_local_irq_restore(flags);
2510 return;
2511 }
2512
2513 __raw_spin_unlock(&hash_lock);
2514 raw_local_irq_restore(flags);
2515}
2516
2517void __init lockdep_init(void)
2518{
2519 int i;
2520
2521 /*
2522 * Some architectures have their own start_kernel()
2523 * code which calls lockdep_init(), while we also
2524 * call lockdep_init() from the start_kernel() itself,
2525 * and we want to initialize the hashes only once:
2526 */
2527 if (lockdep_initialized)
2528 return;
2529
2530 for (i = 0; i < CLASSHASH_SIZE; i++)
2531 INIT_LIST_HEAD(classhash_table + i);
2532
2533 for (i = 0; i < CHAINHASH_SIZE; i++)
2534 INIT_LIST_HEAD(chainhash_table + i);
2535
2536 lockdep_initialized = 1;
2537}
2538
2539void __init lockdep_info(void)
2540{
2541 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
2542
2543 printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
2544 printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
2545 printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
2546 printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
2547 printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
2548 printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
2549 printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
2550
2551 printk(" memory used by lock dependency info: %lu kB\n",
2552 (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
2553 sizeof(struct list_head) * CLASSHASH_SIZE +
2554 sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
2555 sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
2556 sizeof(struct list_head) * CHAINHASH_SIZE) / 1024);
2557
2558 printk(" per task-struct memory footprint: %lu bytes\n",
2559 sizeof(struct held_lock) * MAX_LOCK_DEPTH);
2560
2561#ifdef CONFIG_DEBUG_LOCKDEP
2562 if (lockdep_init_error)
2563 printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n");
2564#endif
2565}
2566
2567static inline int in_range(const void *start, const void *addr, const void *end)
2568{
2569 return addr >= start && addr <= end;
2570}
2571
2572static void
2573print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
2574 const void *mem_to)
2575{
2576 if (!debug_locks_off())
2577 return;
2578 if (debug_locks_silent)
2579 return;
2580
2581 printk("\n=========================\n");
2582 printk( "[ BUG: held lock freed! ]\n");
2583 printk( "-------------------------\n");
2584 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
2585 curr->comm, curr->pid, mem_from, mem_to-1);
2586 lockdep_print_held_locks(curr);
2587
2588 printk("\nstack backtrace:\n");
2589 dump_stack();
2590}
2591
2592/*
2593 * Called when kernel memory is freed (or unmapped), or if a lock
2594 * is destroyed or reinitialized - this code checks whether there is
2595 * any held lock in the memory range of <from> to <to>:
2596 */
2597void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
2598{
2599 const void *mem_to = mem_from + mem_len, *lock_from, *lock_to;
2600 struct task_struct *curr = current;
2601 struct held_lock *hlock;
2602 unsigned long flags;
2603 int i;
2604
2605 if (unlikely(!debug_locks))
2606 return;
2607
2608 local_irq_save(flags);
2609 for (i = 0; i < curr->lockdep_depth; i++) {
2610 hlock = curr->held_locks + i;
2611
2612 lock_from = (void *)hlock->instance;
2613 lock_to = (void *)(hlock->instance + 1);
2614
2615 if (!in_range(mem_from, lock_from, mem_to) &&
2616 !in_range(mem_from, lock_to, mem_to))
2617 continue;
2618
2619 print_freed_lock_bug(curr, mem_from, mem_to);
2620 break;
2621 }
2622 local_irq_restore(flags);
2623}
2624
2625static void print_held_locks_bug(struct task_struct *curr)
2626{
2627 if (!debug_locks_off())
2628 return;
2629 if (debug_locks_silent)
2630 return;
2631
2632 printk("\n=====================================\n");
2633 printk( "[ BUG: lock held at task exit time! ]\n");
2634 printk( "-------------------------------------\n");
2635 printk("%s/%d is exiting with locks still held!\n",
2636 curr->comm, curr->pid);
2637 lockdep_print_held_locks(curr);
2638
2639 printk("\nstack backtrace:\n");
2640 dump_stack();
2641}
2642
2643void debug_check_no_locks_held(struct task_struct *task)
2644{
2645 if (unlikely(task->lockdep_depth > 0))
2646 print_held_locks_bug(task);
2647}
2648
2649void debug_show_all_locks(void)
2650{
2651 struct task_struct *g, *p;
2652 int count = 10;
2653 int unlock = 1;
2654
2655 printk("\nShowing all locks held in the system:\n");
2656
2657 /*
2658 * Here we try to get the tasklist_lock as hard as possible,
2659 * if not successful after 2 seconds we ignore it (but keep
2660 * trying). This is to enable a debug printout even if a
2661 * tasklist_lock-holding task deadlocks or crashes.
2662 */
2663retry:
2664 if (!read_trylock(&tasklist_lock)) {
2665 if (count == 10)
2666 printk("hm, tasklist_lock locked, retrying... ");
2667 if (count) {
2668 count--;
2669 printk(" #%d", 10-count);
2670 mdelay(200);
2671 goto retry;
2672 }
2673 printk(" ignoring it.\n");
2674 unlock = 0;
2675 }
2676 if (count != 10)
2677 printk(" locked it.\n");
2678
2679 do_each_thread(g, p) {
2680 if (p->lockdep_depth)
2681 lockdep_print_held_locks(p);
2682 if (!unlock)
2683 if (read_trylock(&tasklist_lock))
2684 unlock = 1;
2685 } while_each_thread(g, p);
2686
2687 printk("\n");
2688 printk("=============================================\n\n");
2689
2690 if (unlock)
2691 read_unlock(&tasklist_lock);
2692}
2693
2694EXPORT_SYMBOL_GPL(debug_show_all_locks);
2695
2696void debug_show_held_locks(struct task_struct *task)
2697{
2698 lockdep_print_held_locks(task);
2699}
2700
2701EXPORT_SYMBOL_GPL(debug_show_held_locks);
2702
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
new file mode 100644
index 000000000000..0d355f24fe04
--- /dev/null
+++ b/kernel/lockdep_internals.h
@@ -0,0 +1,78 @@
1/*
2 * kernel/lockdep_internals.h
3 *
4 * Runtime locking correctness validator
5 *
6 * lockdep subsystem internal functions and variables.
7 */
8
9/*
10 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
11 * we track.
12 *
13 * We use the per-lock dependency maps in two ways: we grow it by adding
14 * every to-be-taken lock to all currently held lock's own dependency
15 * table (if it's not there yet), and we check it for lock order
16 * conflicts and deadlocks.
17 */
18#define MAX_LOCKDEP_ENTRIES 8192UL
19
20#define MAX_LOCKDEP_KEYS_BITS 11
21#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
22
23#define MAX_LOCKDEP_CHAINS_BITS 13
24#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
25
26/*
27 * Stack-trace: tightly packed array of stack backtrace
28 * addresses. Protected by the hash_lock.
29 */
30#define MAX_STACK_TRACE_ENTRIES 131072UL
31
32extern struct list_head all_lock_classes;
33
34extern void
35get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4);
36
37extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
38
39extern unsigned long nr_lock_classes;
40extern unsigned long nr_list_entries;
41extern unsigned long nr_lock_chains;
42extern unsigned long nr_stack_trace_entries;
43
44extern unsigned int nr_hardirq_chains;
45extern unsigned int nr_softirq_chains;
46extern unsigned int nr_process_chains;
47extern unsigned int max_lockdep_depth;
48extern unsigned int max_recursion_depth;
49
50#ifdef CONFIG_DEBUG_LOCKDEP
51/*
52 * Various lockdep statistics:
53 */
54extern atomic_t chain_lookup_hits;
55extern atomic_t chain_lookup_misses;
56extern atomic_t hardirqs_on_events;
57extern atomic_t hardirqs_off_events;
58extern atomic_t redundant_hardirqs_on;
59extern atomic_t redundant_hardirqs_off;
60extern atomic_t softirqs_on_events;
61extern atomic_t softirqs_off_events;
62extern atomic_t redundant_softirqs_on;
63extern atomic_t redundant_softirqs_off;
64extern atomic_t nr_unused_locks;
65extern atomic_t nr_cyclic_checks;
66extern atomic_t nr_cyclic_check_recursions;
67extern atomic_t nr_find_usage_forwards_checks;
68extern atomic_t nr_find_usage_forwards_recursions;
69extern atomic_t nr_find_usage_backwards_checks;
70extern atomic_t nr_find_usage_backwards_recursions;
71# define debug_atomic_inc(ptr) atomic_inc(ptr)
72# define debug_atomic_dec(ptr) atomic_dec(ptr)
73# define debug_atomic_read(ptr) atomic_read(ptr)
74#else
75# define debug_atomic_inc(ptr) do { } while (0)
76# define debug_atomic_dec(ptr) do { } while (0)
77# define debug_atomic_read(ptr) 0
78#endif
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
new file mode 100644
index 000000000000..f6e72eaab3fa
--- /dev/null
+++ b/kernel/lockdep_proc.c
@@ -0,0 +1,345 @@
1/*
2 * kernel/lockdep_proc.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * Code for /proc/lockdep and /proc/lockdep_stats:
11 *
12 */
13#include <linux/sched.h>
14#include <linux/module.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/kallsyms.h>
18#include <linux/debug_locks.h>
19
20#include "lockdep_internals.h"
21
22static void *l_next(struct seq_file *m, void *v, loff_t *pos)
23{
24 struct lock_class *class = v;
25
26 (*pos)++;
27
28 if (class->lock_entry.next != &all_lock_classes)
29 class = list_entry(class->lock_entry.next, struct lock_class,
30 lock_entry);
31 else
32 class = NULL;
33 m->private = class;
34
35 return class;
36}
37
38static void *l_start(struct seq_file *m, loff_t *pos)
39{
40 struct lock_class *class = m->private;
41
42 if (&class->lock_entry == all_lock_classes.next)
43 seq_printf(m, "all lock classes:\n");
44
45 return class;
46}
47
48static void l_stop(struct seq_file *m, void *v)
49{
50}
51
52static unsigned long count_forward_deps(struct lock_class *class)
53{
54 struct lock_list *entry;
55 unsigned long ret = 1;
56
57 /*
58 * Recurse this class's dependency list:
59 */
60 list_for_each_entry(entry, &class->locks_after, entry)
61 ret += count_forward_deps(entry->class);
62
63 return ret;
64}
65
66static unsigned long count_backward_deps(struct lock_class *class)
67{
68 struct lock_list *entry;
69 unsigned long ret = 1;
70
71 /*
72 * Recurse this class's dependency list:
73 */
74 list_for_each_entry(entry, &class->locks_before, entry)
75 ret += count_backward_deps(entry->class);
76
77 return ret;
78}
79
80static int l_show(struct seq_file *m, void *v)
81{
82 unsigned long nr_forward_deps, nr_backward_deps;
83 struct lock_class *class = m->private;
84 char str[128], c1, c2, c3, c4;
85 const char *name;
86
87 seq_printf(m, "%p", class->key);
88#ifdef CONFIG_DEBUG_LOCKDEP
89 seq_printf(m, " OPS:%8ld", class->ops);
90#endif
91 nr_forward_deps = count_forward_deps(class);
92 seq_printf(m, " FD:%5ld", nr_forward_deps);
93
94 nr_backward_deps = count_backward_deps(class);
95 seq_printf(m, " BD:%5ld", nr_backward_deps);
96
97 get_usage_chars(class, &c1, &c2, &c3, &c4);
98 seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
99
100 name = class->name;
101 if (!name) {
102 name = __get_key_name(class->key, str);
103 seq_printf(m, ": %s", name);
104 } else{
105 seq_printf(m, ": %s", name);
106 if (class->name_version > 1)
107 seq_printf(m, "#%d", class->name_version);
108 if (class->subclass)
109 seq_printf(m, "/%d", class->subclass);
110 }
111 seq_puts(m, "\n");
112
113 return 0;
114}
115
116static struct seq_operations lockdep_ops = {
117 .start = l_start,
118 .next = l_next,
119 .stop = l_stop,
120 .show = l_show,
121};
122
123static int lockdep_open(struct inode *inode, struct file *file)
124{
125 int res = seq_open(file, &lockdep_ops);
126 if (!res) {
127 struct seq_file *m = file->private_data;
128
129 if (!list_empty(&all_lock_classes))
130 m->private = list_entry(all_lock_classes.next,
131 struct lock_class, lock_entry);
132 else
133 m->private = NULL;
134 }
135 return res;
136}
137
138static struct file_operations proc_lockdep_operations = {
139 .open = lockdep_open,
140 .read = seq_read,
141 .llseek = seq_lseek,
142 .release = seq_release,
143};
144
145static void lockdep_stats_debug_show(struct seq_file *m)
146{
147#ifdef CONFIG_DEBUG_LOCKDEP
148 unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
149 hi2 = debug_atomic_read(&hardirqs_off_events),
150 hr1 = debug_atomic_read(&redundant_hardirqs_on),
151 hr2 = debug_atomic_read(&redundant_hardirqs_off),
152 si1 = debug_atomic_read(&softirqs_on_events),
153 si2 = debug_atomic_read(&softirqs_off_events),
154 sr1 = debug_atomic_read(&redundant_softirqs_on),
155 sr2 = debug_atomic_read(&redundant_softirqs_off);
156
157 seq_printf(m, " chain lookup misses: %11u\n",
158 debug_atomic_read(&chain_lookup_misses));
159 seq_printf(m, " chain lookup hits: %11u\n",
160 debug_atomic_read(&chain_lookup_hits));
161 seq_printf(m, " cyclic checks: %11u\n",
162 debug_atomic_read(&nr_cyclic_checks));
163 seq_printf(m, " cyclic-check recursions: %11u\n",
164 debug_atomic_read(&nr_cyclic_check_recursions));
165 seq_printf(m, " find-mask forwards checks: %11u\n",
166 debug_atomic_read(&nr_find_usage_forwards_checks));
167 seq_printf(m, " find-mask forwards recursions: %11u\n",
168 debug_atomic_read(&nr_find_usage_forwards_recursions));
169 seq_printf(m, " find-mask backwards checks: %11u\n",
170 debug_atomic_read(&nr_find_usage_backwards_checks));
171 seq_printf(m, " find-mask backwards recursions:%11u\n",
172 debug_atomic_read(&nr_find_usage_backwards_recursions));
173
174 seq_printf(m, " hardirq on events: %11u\n", hi1);
175 seq_printf(m, " hardirq off events: %11u\n", hi2);
176 seq_printf(m, " redundant hardirq ons: %11u\n", hr1);
177 seq_printf(m, " redundant hardirq offs: %11u\n", hr2);
178 seq_printf(m, " softirq on events: %11u\n", si1);
179 seq_printf(m, " softirq off events: %11u\n", si2);
180 seq_printf(m, " redundant softirq ons: %11u\n", sr1);
181 seq_printf(m, " redundant softirq offs: %11u\n", sr2);
182#endif
183}
184
185static int lockdep_stats_show(struct seq_file *m, void *v)
186{
187 struct lock_class *class;
188 unsigned long nr_unused = 0, nr_uncategorized = 0,
189 nr_irq_safe = 0, nr_irq_unsafe = 0,
190 nr_softirq_safe = 0, nr_softirq_unsafe = 0,
191 nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
192 nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
193 nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
194 nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
195 sum_forward_deps = 0, factor = 0;
196
197 list_for_each_entry(class, &all_lock_classes, lock_entry) {
198
199 if (class->usage_mask == 0)
200 nr_unused++;
201 if (class->usage_mask == LOCKF_USED)
202 nr_uncategorized++;
203 if (class->usage_mask & LOCKF_USED_IN_IRQ)
204 nr_irq_safe++;
205 if (class->usage_mask & LOCKF_ENABLED_IRQS)
206 nr_irq_unsafe++;
207 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
208 nr_softirq_safe++;
209 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
210 nr_softirq_unsafe++;
211 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
212 nr_hardirq_safe++;
213 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
214 nr_hardirq_unsafe++;
215 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
216 nr_irq_read_safe++;
217 if (class->usage_mask & LOCKF_ENABLED_IRQS_READ)
218 nr_irq_read_unsafe++;
219 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
220 nr_softirq_read_safe++;
221 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
222 nr_softirq_read_unsafe++;
223 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
224 nr_hardirq_read_safe++;
225 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
226 nr_hardirq_read_unsafe++;
227
228 sum_forward_deps += count_forward_deps(class);
229 }
230#ifdef CONFIG_LOCKDEP_DEBUG
231 DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
232#endif
233 seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
234 nr_lock_classes, MAX_LOCKDEP_KEYS);
235 seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
236 nr_list_entries, MAX_LOCKDEP_ENTRIES);
237 seq_printf(m, " indirect dependencies: %11lu\n",
238 sum_forward_deps);
239
240 /*
241 * Total number of dependencies:
242 *
243 * All irq-safe locks may nest inside irq-unsafe locks,
244 * plus all the other known dependencies:
245 */
246 seq_printf(m, " all direct dependencies: %11lu\n",
247 nr_irq_unsafe * nr_irq_safe +
248 nr_hardirq_unsafe * nr_hardirq_safe +
249 nr_list_entries);
250
251 /*
252 * Estimated factor between direct and indirect
253 * dependencies:
254 */
255 if (nr_list_entries)
256 factor = sum_forward_deps / nr_list_entries;
257
258 seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
259 nr_lock_chains, MAX_LOCKDEP_CHAINS);
260
261#ifdef CONFIG_TRACE_IRQFLAGS
262 seq_printf(m, " in-hardirq chains: %11u\n",
263 nr_hardirq_chains);
264 seq_printf(m, " in-softirq chains: %11u\n",
265 nr_softirq_chains);
266#endif
267 seq_printf(m, " in-process chains: %11u\n",
268 nr_process_chains);
269 seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n",
270 nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
271 seq_printf(m, " combined max dependencies: %11u\n",
272 (nr_hardirq_chains + 1) *
273 (nr_softirq_chains + 1) *
274 (nr_process_chains + 1)
275 );
276 seq_printf(m, " hardirq-safe locks: %11lu\n",
277 nr_hardirq_safe);
278 seq_printf(m, " hardirq-unsafe locks: %11lu\n",
279 nr_hardirq_unsafe);
280 seq_printf(m, " softirq-safe locks: %11lu\n",
281 nr_softirq_safe);
282 seq_printf(m, " softirq-unsafe locks: %11lu\n",
283 nr_softirq_unsafe);
284 seq_printf(m, " irq-safe locks: %11lu\n",
285 nr_irq_safe);
286 seq_printf(m, " irq-unsafe locks: %11lu\n",
287 nr_irq_unsafe);
288
289 seq_printf(m, " hardirq-read-safe locks: %11lu\n",
290 nr_hardirq_read_safe);
291 seq_printf(m, " hardirq-read-unsafe locks: %11lu\n",
292 nr_hardirq_read_unsafe);
293 seq_printf(m, " softirq-read-safe locks: %11lu\n",
294 nr_softirq_read_safe);
295 seq_printf(m, " softirq-read-unsafe locks: %11lu\n",
296 nr_softirq_read_unsafe);
297 seq_printf(m, " irq-read-safe locks: %11lu\n",
298 nr_irq_read_safe);
299 seq_printf(m, " irq-read-unsafe locks: %11lu\n",
300 nr_irq_read_unsafe);
301
302 seq_printf(m, " uncategorized locks: %11lu\n",
303 nr_uncategorized);
304 seq_printf(m, " unused locks: %11lu\n",
305 nr_unused);
306 seq_printf(m, " max locking depth: %11u\n",
307 max_lockdep_depth);
308 seq_printf(m, " max recursion depth: %11u\n",
309 max_recursion_depth);
310 lockdep_stats_debug_show(m);
311 seq_printf(m, " debug_locks: %11u\n",
312 debug_locks);
313
314 return 0;
315}
316
317static int lockdep_stats_open(struct inode *inode, struct file *file)
318{
319 return single_open(file, lockdep_stats_show, NULL);
320}
321
322static struct file_operations proc_lockdep_stats_operations = {
323 .open = lockdep_stats_open,
324 .read = seq_read,
325 .llseek = seq_lseek,
326 .release = seq_release,
327};
328
329static int __init lockdep_proc_init(void)
330{
331 struct proc_dir_entry *entry;
332
333 entry = create_proc_entry("lockdep", S_IRUSR, NULL);
334 if (entry)
335 entry->proc_fops = &proc_lockdep_operations;
336
337 entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL);
338 if (entry)
339 entry->proc_fops = &proc_lockdep_stats_operations;
340
341 return 0;
342}
343
344__initcall(lockdep_proc_init);
345
diff --git a/kernel/module.c b/kernel/module.c
index 281172f01e9a..35e1b1f859d7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1121,6 +1121,9 @@ static void free_module(struct module *mod)
1121 if (mod->percpu) 1121 if (mod->percpu)
1122 percpu_modfree(mod->percpu); 1122 percpu_modfree(mod->percpu);
1123 1123
1124 /* Free lock-classes: */
1125 lockdep_free_key_range(mod->module_core, mod->core_size);
1126
1124 /* Finally, free the core (containing the module structure) */ 1127 /* Finally, free the core (containing the module structure) */
1125 module_free(mod, mod->module_core); 1128 module_free(mod, mod->module_core);
1126} 1129}
@@ -2159,6 +2162,29 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2159 return e; 2162 return e;
2160} 2163}
2161 2164
2165/*
2166 * Is this a valid module address?
2167 */
2168int is_module_address(unsigned long addr)
2169{
2170 unsigned long flags;
2171 struct module *mod;
2172
2173 spin_lock_irqsave(&modlist_lock, flags);
2174
2175 list_for_each_entry(mod, &modules, list) {
2176 if (within(addr, mod->module_core, mod->core_size)) {
2177 spin_unlock_irqrestore(&modlist_lock, flags);
2178 return 1;
2179 }
2180 }
2181
2182 spin_unlock_irqrestore(&modlist_lock, flags);
2183
2184 return 0;
2185}
2186
2187
2162/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ 2188/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */
2163struct module *__module_text_address(unsigned long addr) 2189struct module *__module_text_address(unsigned long addr)
2164{ 2190{
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index e38e4bac97ca..e3203c654dda 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -20,367 +20,19 @@
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/kallsyms.h> 21#include <linux/kallsyms.h>
22#include <linux/interrupt.h> 22#include <linux/interrupt.h>
23#include <linux/debug_locks.h>
23 24
24#include "mutex-debug.h" 25#include "mutex-debug.h"
25 26
26/* 27/*
27 * We need a global lock when we walk through the multi-process
28 * lock tree. Only used in the deadlock-debugging case.
29 */
30DEFINE_SPINLOCK(debug_mutex_lock);
31
32/*
33 * All locks held by all tasks, in a single global list:
34 */
35LIST_HEAD(debug_mutex_held_locks);
36
37/*
38 * In the debug case we carry the caller's instruction pointer into
39 * other functions, but we dont want the function argument overhead
40 * in the nondebug case - hence these macros:
41 */
42#define __IP_DECL__ , unsigned long ip
43#define __IP__ , ip
44#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
45
46/*
47 * "mutex debugging enabled" flag. We turn it off when we detect
48 * the first problem because we dont want to recurse back
49 * into the tracing code when doing error printk or
50 * executing a BUG():
51 */
52int debug_mutex_on = 1;
53
54static void printk_task(struct task_struct *p)
55{
56 if (p)
57 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
58 else
59 printk("<none>");
60}
61
62static void printk_ti(struct thread_info *ti)
63{
64 if (ti)
65 printk_task(ti->task);
66 else
67 printk("<none>");
68}
69
70static void printk_task_short(struct task_struct *p)
71{
72 if (p)
73 printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
74 else
75 printk("<none>");
76}
77
78static void printk_lock(struct mutex *lock, int print_owner)
79{
80 printk(" [%p] {%s}\n", lock, lock->name);
81
82 if (print_owner && lock->owner) {
83 printk(".. held by: ");
84 printk_ti(lock->owner);
85 printk("\n");
86 }
87 if (lock->owner) {
88 printk("... acquired at: ");
89 print_symbol("%s\n", lock->acquire_ip);
90 }
91}
92
93/*
94 * printk locks held by a task:
95 */
96static void show_task_locks(struct task_struct *p)
97{
98 switch (p->state) {
99 case TASK_RUNNING: printk("R"); break;
100 case TASK_INTERRUPTIBLE: printk("S"); break;
101 case TASK_UNINTERRUPTIBLE: printk("D"); break;
102 case TASK_STOPPED: printk("T"); break;
103 case EXIT_ZOMBIE: printk("Z"); break;
104 case EXIT_DEAD: printk("X"); break;
105 default: printk("?"); break;
106 }
107 printk_task(p);
108 if (p->blocked_on) {
109 struct mutex *lock = p->blocked_on->lock;
110
111 printk(" blocked on mutex:");
112 printk_lock(lock, 1);
113 } else
114 printk(" (not blocked on mutex)\n");
115}
116
117/*
118 * printk all locks held in the system (if filter == NULL),
119 * or all locks belonging to a single task (if filter != NULL):
120 */
121void show_held_locks(struct task_struct *filter)
122{
123 struct list_head *curr, *cursor = NULL;
124 struct mutex *lock;
125 struct thread_info *t;
126 unsigned long flags;
127 int count = 0;
128
129 if (filter) {
130 printk("------------------------------\n");
131 printk("| showing all locks held by: | (");
132 printk_task_short(filter);
133 printk("):\n");
134 printk("------------------------------\n");
135 } else {
136 printk("---------------------------\n");
137 printk("| showing all locks held: |\n");
138 printk("---------------------------\n");
139 }
140
141 /*
142 * Play safe and acquire the global trace lock. We
143 * cannot printk with that lock held so we iterate
144 * very carefully:
145 */
146next:
147 debug_spin_lock_save(&debug_mutex_lock, flags);
148 list_for_each(curr, &debug_mutex_held_locks) {
149 if (cursor && curr != cursor)
150 continue;
151 lock = list_entry(curr, struct mutex, held_list);
152 t = lock->owner;
153 if (filter && (t != filter->thread_info))
154 continue;
155 count++;
156 cursor = curr->next;
157 debug_spin_unlock_restore(&debug_mutex_lock, flags);
158
159 printk("\n#%03d: ", count);
160 printk_lock(lock, filter ? 0 : 1);
161 goto next;
162 }
163 debug_spin_unlock_restore(&debug_mutex_lock, flags);
164 printk("\n");
165}
166
167void mutex_debug_show_all_locks(void)
168{
169 struct task_struct *g, *p;
170 int count = 10;
171 int unlock = 1;
172
173 printk("\nShowing all blocking locks in the system:\n");
174
175 /*
176 * Here we try to get the tasklist_lock as hard as possible,
177 * if not successful after 2 seconds we ignore it (but keep
178 * trying). This is to enable a debug printout even if a
179 * tasklist_lock-holding task deadlocks or crashes.
180 */
181retry:
182 if (!read_trylock(&tasklist_lock)) {
183 if (count == 10)
184 printk("hm, tasklist_lock locked, retrying... ");
185 if (count) {
186 count--;
187 printk(" #%d", 10-count);
188 mdelay(200);
189 goto retry;
190 }
191 printk(" ignoring it.\n");
192 unlock = 0;
193 }
194 if (count != 10)
195 printk(" locked it.\n");
196
197 do_each_thread(g, p) {
198 show_task_locks(p);
199 if (!unlock)
200 if (read_trylock(&tasklist_lock))
201 unlock = 1;
202 } while_each_thread(g, p);
203
204 printk("\n");
205 show_held_locks(NULL);
206 printk("=============================================\n\n");
207
208 if (unlock)
209 read_unlock(&tasklist_lock);
210}
211
212static void report_deadlock(struct task_struct *task, struct mutex *lock,
213 struct mutex *lockblk, unsigned long ip)
214{
215 printk("\n%s/%d is trying to acquire this lock:\n",
216 current->comm, current->pid);
217 printk_lock(lock, 1);
218 printk("... trying at: ");
219 print_symbol("%s\n", ip);
220 show_held_locks(current);
221
222 if (lockblk) {
223 printk("but %s/%d is deadlocking current task %s/%d!\n\n",
224 task->comm, task->pid, current->comm, current->pid);
225 printk("\n%s/%d is blocked on this lock:\n",
226 task->comm, task->pid);
227 printk_lock(lockblk, 1);
228
229 show_held_locks(task);
230
231 printk("\n%s/%d's [blocked] stackdump:\n\n",
232 task->comm, task->pid);
233 show_stack(task, NULL);
234 }
235
236 printk("\n%s/%d's [current] stackdump:\n\n",
237 current->comm, current->pid);
238 dump_stack();
239 mutex_debug_show_all_locks();
240 printk("[ turning off deadlock detection. Please report this. ]\n\n");
241 local_irq_disable();
242}
243
244/*
245 * Recursively check for mutex deadlocks:
246 */
247static int check_deadlock(struct mutex *lock, int depth,
248 struct thread_info *ti, unsigned long ip)
249{
250 struct mutex *lockblk;
251 struct task_struct *task;
252
253 if (!debug_mutex_on)
254 return 0;
255
256 ti = lock->owner;
257 if (!ti)
258 return 0;
259
260 task = ti->task;
261 lockblk = NULL;
262 if (task->blocked_on)
263 lockblk = task->blocked_on->lock;
264
265 /* Self-deadlock: */
266 if (current == task) {
267 DEBUG_OFF();
268 if (depth)
269 return 1;
270 printk("\n==========================================\n");
271 printk( "[ BUG: lock recursion deadlock detected! |\n");
272 printk( "------------------------------------------\n");
273 report_deadlock(task, lock, NULL, ip);
274 return 0;
275 }
276
277 /* Ugh, something corrupted the lock data structure? */
278 if (depth > 20) {
279 DEBUG_OFF();
280 printk("\n===========================================\n");
281 printk( "[ BUG: infinite lock dependency detected!? |\n");
282 printk( "-------------------------------------------\n");
283 report_deadlock(task, lock, lockblk, ip);
284 return 0;
285 }
286
287 /* Recursively check for dependencies: */
288 if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) {
289 printk("\n============================================\n");
290 printk( "[ BUG: circular locking deadlock detected! ]\n");
291 printk( "--------------------------------------------\n");
292 report_deadlock(task, lock, lockblk, ip);
293 return 0;
294 }
295 return 0;
296}
297
298/*
299 * Called when a task exits, this function checks whether the
300 * task is holding any locks, and reports the first one if so:
301 */
302void mutex_debug_check_no_locks_held(struct task_struct *task)
303{
304 struct list_head *curr, *next;
305 struct thread_info *t;
306 unsigned long flags;
307 struct mutex *lock;
308
309 if (!debug_mutex_on)
310 return;
311
312 debug_spin_lock_save(&debug_mutex_lock, flags);
313 list_for_each_safe(curr, next, &debug_mutex_held_locks) {
314 lock = list_entry(curr, struct mutex, held_list);
315 t = lock->owner;
316 if (t != task->thread_info)
317 continue;
318 list_del_init(curr);
319 DEBUG_OFF();
320 debug_spin_unlock_restore(&debug_mutex_lock, flags);
321
322 printk("BUG: %s/%d, lock held at task exit time!\n",
323 task->comm, task->pid);
324 printk_lock(lock, 1);
325 if (lock->owner != task->thread_info)
326 printk("exiting task is not even the owner??\n");
327 return;
328 }
329 debug_spin_unlock_restore(&debug_mutex_lock, flags);
330}
331
332/*
333 * Called when kernel memory is freed (or unmapped), or if a mutex
334 * is destroyed or reinitialized - this code checks whether there is
335 * any held lock in the memory range of <from> to <to>:
336 */
337void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
338{
339 struct list_head *curr, *next;
340 const void *to = from + len;
341 unsigned long flags;
342 struct mutex *lock;
343 void *lock_addr;
344
345 if (!debug_mutex_on)
346 return;
347
348 debug_spin_lock_save(&debug_mutex_lock, flags);
349 list_for_each_safe(curr, next, &debug_mutex_held_locks) {
350 lock = list_entry(curr, struct mutex, held_list);
351 lock_addr = lock;
352 if (lock_addr < from || lock_addr >= to)
353 continue;
354 list_del_init(curr);
355 DEBUG_OFF();
356 debug_spin_unlock_restore(&debug_mutex_lock, flags);
357
358 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
359 current->comm, current->pid, lock, from, to);
360 dump_stack();
361 printk_lock(lock, 1);
362 if (lock->owner != current_thread_info())
363 printk("freeing task is not even the owner??\n");
364 return;
365 }
366 debug_spin_unlock_restore(&debug_mutex_lock, flags);
367}
368
369/*
370 * Must be called with lock->wait_lock held. 28 * Must be called with lock->wait_lock held.
371 */ 29 */
372void debug_mutex_set_owner(struct mutex *lock, 30void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
373 struct thread_info *new_owner __IP_DECL__)
374{ 31{
375 lock->owner = new_owner; 32 lock->owner = new_owner;
376 DEBUG_WARN_ON(!list_empty(&lock->held_list));
377 if (debug_mutex_on) {
378 list_add_tail(&lock->held_list, &debug_mutex_held_locks);
379 lock->acquire_ip = ip;
380 }
381} 33}
382 34
383void debug_mutex_init_waiter(struct mutex_waiter *waiter) 35void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
384{ 36{
385 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); 37 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
386 waiter->magic = waiter; 38 waiter->magic = waiter;
@@ -389,23 +41,23 @@ void debug_mutex_init_waiter(struct mutex_waiter *waiter)
389 41
390void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) 42void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
391{ 43{
392 SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); 44 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
393 DEBUG_WARN_ON(list_empty(&lock->wait_list)); 45 DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
394 DEBUG_WARN_ON(waiter->magic != waiter); 46 DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
395 DEBUG_WARN_ON(list_empty(&waiter->list)); 47 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
396} 48}
397 49
398void debug_mutex_free_waiter(struct mutex_waiter *waiter) 50void debug_mutex_free_waiter(struct mutex_waiter *waiter)
399{ 51{
400 DEBUG_WARN_ON(!list_empty(&waiter->list)); 52 DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
401 memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); 53 memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
402} 54}
403 55
404void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, 56void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
405 struct thread_info *ti __IP_DECL__) 57 struct thread_info *ti)
406{ 58{
407 SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); 59 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
408 check_deadlock(lock, 0, ti, ip); 60
409 /* Mark the current thread as blocked on the lock: */ 61 /* Mark the current thread as blocked on the lock: */
410 ti->task->blocked_on = waiter; 62 ti->task->blocked_on = waiter;
411 waiter->lock = lock; 63 waiter->lock = lock;
@@ -414,9 +66,9 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
414void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 66void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
415 struct thread_info *ti) 67 struct thread_info *ti)
416{ 68{
417 DEBUG_WARN_ON(list_empty(&waiter->list)); 69 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
418 DEBUG_WARN_ON(waiter->task != ti->task); 70 DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
419 DEBUG_WARN_ON(ti->task->blocked_on != waiter); 71 DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
420 ti->task->blocked_on = NULL; 72 ti->task->blocked_on = NULL;
421 73
422 list_del_init(&waiter->list); 74 list_del_init(&waiter->list);
@@ -425,24 +77,23 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
425 77
426void debug_mutex_unlock(struct mutex *lock) 78void debug_mutex_unlock(struct mutex *lock)
427{ 79{
428 DEBUG_WARN_ON(lock->magic != lock); 80 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
429 DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 81 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
430 DEBUG_WARN_ON(lock->owner != current_thread_info()); 82 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
431 if (debug_mutex_on) { 83 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
432 DEBUG_WARN_ON(list_empty(&lock->held_list));
433 list_del_init(&lock->held_list);
434 }
435} 84}
436 85
437void debug_mutex_init(struct mutex *lock, const char *name) 86void debug_mutex_init(struct mutex *lock, const char *name,
87 struct lock_class_key *key)
438{ 88{
89#ifdef CONFIG_DEBUG_LOCK_ALLOC
439 /* 90 /*
440 * Make sure we are not reinitializing a held lock: 91 * Make sure we are not reinitializing a held lock:
441 */ 92 */
442 mutex_debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 93 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
94 lockdep_init_map(&lock->dep_map, name, key);
95#endif
443 lock->owner = NULL; 96 lock->owner = NULL;
444 INIT_LIST_HEAD(&lock->held_list);
445 lock->name = name;
446 lock->magic = lock; 97 lock->magic = lock;
447} 98}
448 99
@@ -456,7 +107,7 @@ void debug_mutex_init(struct mutex *lock, const char *name)
456 */ 107 */
457void fastcall mutex_destroy(struct mutex *lock) 108void fastcall mutex_destroy(struct mutex *lock)
458{ 109{
459 DEBUG_WARN_ON(mutex_is_locked(lock)); 110 DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
460 lock->magic = NULL; 111 lock->magic = NULL;
461} 112}
462 113
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index a5196c36a5fd..babfbdfc534b 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -10,110 +10,44 @@
10 * More details are in kernel/mutex-debug.c. 10 * More details are in kernel/mutex-debug.c.
11 */ 11 */
12 12
13extern spinlock_t debug_mutex_lock;
14extern struct list_head debug_mutex_held_locks;
15extern int debug_mutex_on;
16
17/*
18 * In the debug case we carry the caller's instruction pointer into
19 * other functions, but we dont want the function argument overhead
20 * in the nondebug case - hence these macros:
21 */
22#define __IP_DECL__ , unsigned long ip
23#define __IP__ , ip
24#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
25
26/* 13/*
27 * This must be called with lock->wait_lock held. 14 * This must be called with lock->wait_lock held.
28 */ 15 */
29extern void debug_mutex_set_owner(struct mutex *lock, 16extern void
30 struct thread_info *new_owner __IP_DECL__); 17debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
31 18
32static inline void debug_mutex_clear_owner(struct mutex *lock) 19static inline void debug_mutex_clear_owner(struct mutex *lock)
33{ 20{
34 lock->owner = NULL; 21 lock->owner = NULL;
35} 22}
36 23
37extern void debug_mutex_init_waiter(struct mutex_waiter *waiter); 24extern void debug_mutex_lock_common(struct mutex *lock,
25 struct mutex_waiter *waiter);
38extern void debug_mutex_wake_waiter(struct mutex *lock, 26extern void debug_mutex_wake_waiter(struct mutex *lock,
39 struct mutex_waiter *waiter); 27 struct mutex_waiter *waiter);
40extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); 28extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
41extern void debug_mutex_add_waiter(struct mutex *lock, 29extern void debug_mutex_add_waiter(struct mutex *lock,
42 struct mutex_waiter *waiter, 30 struct mutex_waiter *waiter,
43 struct thread_info *ti __IP_DECL__); 31 struct thread_info *ti);
44extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 32extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
45 struct thread_info *ti); 33 struct thread_info *ti);
46extern void debug_mutex_unlock(struct mutex *lock); 34extern void debug_mutex_unlock(struct mutex *lock);
47extern void debug_mutex_init(struct mutex *lock, const char *name); 35extern void debug_mutex_init(struct mutex *lock, const char *name,
48 36 struct lock_class_key *key);
49#define debug_spin_lock_save(lock, flags) \
50 do { \
51 local_irq_save(flags); \
52 if (debug_mutex_on) \
53 spin_lock(lock); \
54 } while (0)
55
56#define debug_spin_unlock_restore(lock, flags) \
57 do { \
58 if (debug_mutex_on) \
59 spin_unlock(lock); \
60 local_irq_restore(flags); \
61 preempt_check_resched(); \
62 } while (0)
63 37
64#define spin_lock_mutex(lock, flags) \ 38#define spin_lock_mutex(lock, flags) \
65 do { \ 39 do { \
66 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 40 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
67 \ 41 \
68 DEBUG_WARN_ON(in_interrupt()); \ 42 DEBUG_LOCKS_WARN_ON(in_interrupt()); \
69 debug_spin_lock_save(&debug_mutex_lock, flags); \ 43 local_irq_save(flags); \
70 spin_lock(lock); \ 44 __raw_spin_lock(&(lock)->raw_lock); \
71 DEBUG_WARN_ON(l->magic != l); \ 45 DEBUG_LOCKS_WARN_ON(l->magic != l); \
72 } while (0) 46 } while (0)
73 47
74#define spin_unlock_mutex(lock, flags) \ 48#define spin_unlock_mutex(lock, flags) \
75 do { \ 49 do { \
76 spin_unlock(lock); \ 50 __raw_spin_unlock(&(lock)->raw_lock); \
77 debug_spin_unlock_restore(&debug_mutex_lock, flags); \ 51 local_irq_restore(flags); \
52 preempt_check_resched(); \
78 } while (0) 53 } while (0)
79
80#define DEBUG_OFF() \
81do { \
82 if (debug_mutex_on) { \
83 debug_mutex_on = 0; \
84 console_verbose(); \
85 if (spin_is_locked(&debug_mutex_lock)) \
86 spin_unlock(&debug_mutex_lock); \
87 } \
88} while (0)
89
90#define DEBUG_BUG() \
91do { \
92 if (debug_mutex_on) { \
93 DEBUG_OFF(); \
94 BUG(); \
95 } \
96} while (0)
97
98#define DEBUG_WARN_ON(c) \
99do { \
100 if (unlikely(c && debug_mutex_on)) { \
101 DEBUG_OFF(); \
102 WARN_ON(1); \
103 } \
104} while (0)
105
106# define DEBUG_BUG_ON(c) \
107do { \
108 if (unlikely(c)) \
109 DEBUG_BUG(); \
110} while (0)
111
112#ifdef CONFIG_SMP
113# define SMP_DEBUG_WARN_ON(c) DEBUG_WARN_ON(c)
114# define SMP_DEBUG_BUG_ON(c) DEBUG_BUG_ON(c)
115#else
116# define SMP_DEBUG_WARN_ON(c) do { } while (0)
117# define SMP_DEBUG_BUG_ON(c) do { } while (0)
118#endif
119
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 7043db21bbce..8c71cf72a497 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/debug_locks.h>
20 21
21/* 22/*
22 * In the DEBUG case we are using the "NULL fastpath" for mutexes, 23 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -38,13 +39,14 @@
38 * 39 *
39 * It is not allowed to initialize an already locked mutex. 40 * It is not allowed to initialize an already locked mutex.
40 */ 41 */
41void fastcall __mutex_init(struct mutex *lock, const char *name) 42void
43__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
42{ 44{
43 atomic_set(&lock->count, 1); 45 atomic_set(&lock->count, 1);
44 spin_lock_init(&lock->wait_lock); 46 spin_lock_init(&lock->wait_lock);
45 INIT_LIST_HEAD(&lock->wait_list); 47 INIT_LIST_HEAD(&lock->wait_list);
46 48
47 debug_mutex_init(lock, name); 49 debug_mutex_init(lock, name, key);
48} 50}
49 51
50EXPORT_SYMBOL(__mutex_init); 52EXPORT_SYMBOL(__mutex_init);
@@ -56,7 +58,7 @@ EXPORT_SYMBOL(__mutex_init);
56 * branch is predicted by the CPU as default-untaken. 58 * branch is predicted by the CPU as default-untaken.
57 */ 59 */
58static void fastcall noinline __sched 60static void fastcall noinline __sched
59__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__); 61__mutex_lock_slowpath(atomic_t *lock_count);
60 62
61/*** 63/***
62 * mutex_lock - acquire the mutex 64 * mutex_lock - acquire the mutex
@@ -79,7 +81,7 @@ __mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
79 * 81 *
80 * This function is similar to (but not equivalent to) down(). 82 * This function is similar to (but not equivalent to) down().
81 */ 83 */
82void fastcall __sched mutex_lock(struct mutex *lock) 84void inline fastcall __sched mutex_lock(struct mutex *lock)
83{ 85{
84 might_sleep(); 86 might_sleep();
85 /* 87 /*
@@ -92,7 +94,7 @@ void fastcall __sched mutex_lock(struct mutex *lock)
92EXPORT_SYMBOL(mutex_lock); 94EXPORT_SYMBOL(mutex_lock);
93 95
94static void fastcall noinline __sched 96static void fastcall noinline __sched
95__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__); 97__mutex_unlock_slowpath(atomic_t *lock_count);
96 98
97/*** 99/***
98 * mutex_unlock - release the mutex 100 * mutex_unlock - release the mutex
@@ -120,18 +122,18 @@ EXPORT_SYMBOL(mutex_unlock);
120 * Lock a mutex (possibly interruptible), slowpath: 122 * Lock a mutex (possibly interruptible), slowpath:
121 */ 123 */
122static inline int __sched 124static inline int __sched
123__mutex_lock_common(struct mutex *lock, long state __IP_DECL__) 125__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
124{ 126{
125 struct task_struct *task = current; 127 struct task_struct *task = current;
126 struct mutex_waiter waiter; 128 struct mutex_waiter waiter;
127 unsigned int old_val; 129 unsigned int old_val;
128 unsigned long flags; 130 unsigned long flags;
129 131
130 debug_mutex_init_waiter(&waiter);
131
132 spin_lock_mutex(&lock->wait_lock, flags); 132 spin_lock_mutex(&lock->wait_lock, flags);
133 133
134 debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); 134 debug_mutex_lock_common(lock, &waiter);
135 mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
136 debug_mutex_add_waiter(lock, &waiter, task->thread_info);
135 137
136 /* add waiting tasks to the end of the waitqueue (FIFO): */ 138 /* add waiting tasks to the end of the waitqueue (FIFO): */
137 list_add_tail(&waiter.list, &lock->wait_list); 139 list_add_tail(&waiter.list, &lock->wait_list);
@@ -158,6 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
158 if (unlikely(state == TASK_INTERRUPTIBLE && 160 if (unlikely(state == TASK_INTERRUPTIBLE &&
159 signal_pending(task))) { 161 signal_pending(task))) {
160 mutex_remove_waiter(lock, &waiter, task->thread_info); 162 mutex_remove_waiter(lock, &waiter, task->thread_info);
163 mutex_release(&lock->dep_map, 1, _RET_IP_);
161 spin_unlock_mutex(&lock->wait_lock, flags); 164 spin_unlock_mutex(&lock->wait_lock, flags);
162 165
163 debug_mutex_free_waiter(&waiter); 166 debug_mutex_free_waiter(&waiter);
@@ -173,7 +176,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
173 176
174 /* got the lock - rejoice! */ 177 /* got the lock - rejoice! */
175 mutex_remove_waiter(lock, &waiter, task->thread_info); 178 mutex_remove_waiter(lock, &waiter, task->thread_info);
176 debug_mutex_set_owner(lock, task->thread_info __IP__); 179 debug_mutex_set_owner(lock, task->thread_info);
177 180
178 /* set it to 0 if there are no waiters left: */ 181 /* set it to 0 if there are no waiters left: */
179 if (likely(list_empty(&lock->wait_list))) 182 if (likely(list_empty(&lock->wait_list)))
@@ -183,32 +186,40 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
183 186
184 debug_mutex_free_waiter(&waiter); 187 debug_mutex_free_waiter(&waiter);
185 188
186 DEBUG_WARN_ON(list_empty(&lock->held_list));
187 DEBUG_WARN_ON(lock->owner != task->thread_info);
188
189 return 0; 189 return 0;
190} 190}
191 191
192static void fastcall noinline __sched 192static void fastcall noinline __sched
193__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__) 193__mutex_lock_slowpath(atomic_t *lock_count)
194{ 194{
195 struct mutex *lock = container_of(lock_count, struct mutex, count); 195 struct mutex *lock = container_of(lock_count, struct mutex, count);
196 196
197 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__); 197 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
198}
199
200#ifdef CONFIG_DEBUG_LOCK_ALLOC
201void __sched
202mutex_lock_nested(struct mutex *lock, unsigned int subclass)
203{
204 might_sleep();
205 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
198} 206}
199 207
208EXPORT_SYMBOL_GPL(mutex_lock_nested);
209#endif
210
200/* 211/*
201 * Release the lock, slowpath: 212 * Release the lock, slowpath:
202 */ 213 */
203static fastcall noinline void 214static fastcall inline void
204__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) 215__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
205{ 216{
206 struct mutex *lock = container_of(lock_count, struct mutex, count); 217 struct mutex *lock = container_of(lock_count, struct mutex, count);
207 unsigned long flags; 218 unsigned long flags;
208 219
209 DEBUG_WARN_ON(lock->owner != current_thread_info());
210
211 spin_lock_mutex(&lock->wait_lock, flags); 220 spin_lock_mutex(&lock->wait_lock, flags);
221 mutex_release(&lock->dep_map, nested, _RET_IP_);
222 debug_mutex_unlock(lock);
212 223
213 /* 224 /*
214 * some architectures leave the lock unlocked in the fastpath failure 225 * some architectures leave the lock unlocked in the fastpath failure
@@ -218,8 +229,6 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
218 if (__mutex_slowpath_needs_to_unlock()) 229 if (__mutex_slowpath_needs_to_unlock())
219 atomic_set(&lock->count, 1); 230 atomic_set(&lock->count, 1);
220 231
221 debug_mutex_unlock(lock);
222
223 if (!list_empty(&lock->wait_list)) { 232 if (!list_empty(&lock->wait_list)) {
224 /* get the first entry from the wait-list: */ 233 /* get the first entry from the wait-list: */
225 struct mutex_waiter *waiter = 234 struct mutex_waiter *waiter =
@@ -237,11 +246,20 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
237} 246}
238 247
239/* 248/*
249 * Release the lock, slowpath:
250 */
251static fastcall noinline void
252__mutex_unlock_slowpath(atomic_t *lock_count)
253{
254 __mutex_unlock_common_slowpath(lock_count, 1);
255}
256
257/*
240 * Here come the less common (and hence less performance-critical) APIs: 258 * Here come the less common (and hence less performance-critical) APIs:
241 * mutex_lock_interruptible() and mutex_trylock(). 259 * mutex_lock_interruptible() and mutex_trylock().
242 */ 260 */
243static int fastcall noinline __sched 261static int fastcall noinline __sched
244__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__); 262__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
245 263
246/*** 264/***
247 * mutex_lock_interruptible - acquire the mutex, interruptable 265 * mutex_lock_interruptible - acquire the mutex, interruptable
@@ -264,11 +282,11 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
264EXPORT_SYMBOL(mutex_lock_interruptible); 282EXPORT_SYMBOL(mutex_lock_interruptible);
265 283
266static int fastcall noinline __sched 284static int fastcall noinline __sched
267__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) 285__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
268{ 286{
269 struct mutex *lock = container_of(lock_count, struct mutex, count); 287 struct mutex *lock = container_of(lock_count, struct mutex, count);
270 288
271 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__); 289 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
272} 290}
273 291
274/* 292/*
@@ -284,8 +302,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
284 spin_lock_mutex(&lock->wait_lock, flags); 302 spin_lock_mutex(&lock->wait_lock, flags);
285 303
286 prev = atomic_xchg(&lock->count, -1); 304 prev = atomic_xchg(&lock->count, -1);
287 if (likely(prev == 1)) 305 if (likely(prev == 1)) {
288 debug_mutex_set_owner(lock, current_thread_info() __RET_IP__); 306 debug_mutex_set_owner(lock, current_thread_info());
307 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
308 }
289 /* Set it back to 0 if there are no waiters: */ 309 /* Set it back to 0 if there are no waiters: */
290 if (likely(list_empty(&lock->wait_list))) 310 if (likely(list_empty(&lock->wait_list)))
291 atomic_set(&lock->count, 0); 311 atomic_set(&lock->count, 0);
@@ -309,7 +329,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
309 * This function must not be used in interrupt context. The 329 * This function must not be used in interrupt context. The
310 * mutex must be released by the same task that acquired it. 330 * mutex must be released by the same task that acquired it.
311 */ 331 */
312int fastcall mutex_trylock(struct mutex *lock) 332int fastcall __sched mutex_trylock(struct mutex *lock)
313{ 333{
314 return __mutex_fastpath_trylock(&lock->count, 334 return __mutex_fastpath_trylock(&lock->count,
315 __mutex_trylock_slowpath); 335 __mutex_trylock_slowpath);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 069189947257..a075dafbb290 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,22 +16,15 @@
16#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
18 18
19#define DEBUG_WARN_ON(c) do { } while (0)
20#define debug_mutex_set_owner(lock, new_owner) do { } while (0) 19#define debug_mutex_set_owner(lock, new_owner) do { } while (0)
21#define debug_mutex_clear_owner(lock) do { } while (0) 20#define debug_mutex_clear_owner(lock) do { } while (0)
22#define debug_mutex_init_waiter(waiter) do { } while (0)
23#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) 21#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
24#define debug_mutex_free_waiter(waiter) do { } while (0) 22#define debug_mutex_free_waiter(waiter) do { } while (0)
25#define debug_mutex_add_waiter(lock, waiter, ti, ip) do { } while (0) 23#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
26#define debug_mutex_unlock(lock) do { } while (0) 24#define debug_mutex_unlock(lock) do { } while (0)
27#define debug_mutex_init(lock, name) do { } while (0) 25#define debug_mutex_init(lock, name, key) do { } while (0)
28
29/*
30 * Return-address parameters/declarations. They are very useful for
31 * debugging, but add overhead in the !DEBUG case - so we go the
32 * trouble of using this not too elegant but zero-cost solution:
33 */
34#define __IP_DECL__
35#define __IP__
36#define __RET_IP__
37 26
27static inline void
28debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
29{
30}
diff --git a/kernel/pid.c b/kernel/pid.c
index eeb836b65ca4..93e212f20671 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -218,7 +218,7 @@ struct pid * fastcall find_pid(int nr)
218 return NULL; 218 return NULL;
219} 219}
220 220
221int fastcall attach_pid(task_t *task, enum pid_type type, int nr) 221int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
222{ 222{
223 struct pid_link *link; 223 struct pid_link *link;
224 struct pid *pid; 224 struct pid *pid;
@@ -233,7 +233,7 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
233 return 0; 233 return 0;
234} 234}
235 235
236void fastcall detach_pid(task_t *task, enum pid_type type) 236void fastcall detach_pid(struct task_struct *task, enum pid_type type)
237{ 237{
238 struct pid_link *link; 238 struct pid_link *link;
239 struct pid *pid; 239 struct pid *pid;
@@ -267,7 +267,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
267/* 267/*
268 * Must be called under rcu_read_lock() or with tasklist_lock read-held. 268 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
269 */ 269 */
270task_t *find_task_by_pid_type(int type, int nr) 270struct task_struct *find_task_by_pid_type(int type, int nr)
271{ 271{
272 return pid_task(find_pid(nr), type); 272 return pid_task(find_pid(nr), type);
273} 273}
diff --git a/kernel/printk.c b/kernel/printk.c
index 39ae24d2a415..bdba5d80496c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -518,7 +518,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
518 zap_locks(); 518 zap_locks();
519 519
520 /* This stops the holder of console_sem just where we want him */ 520 /* This stops the holder of console_sem just where we want him */
521 spin_lock_irqsave(&logbuf_lock, flags); 521 local_irq_save(flags);
522 lockdep_off();
523 spin_lock(&logbuf_lock);
522 printk_cpu = smp_processor_id(); 524 printk_cpu = smp_processor_id();
523 525
524 /* Emit the output into the temporary buffer */ 526 /* Emit the output into the temporary buffer */
@@ -588,7 +590,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
588 */ 590 */
589 console_locked = 1; 591 console_locked = 1;
590 printk_cpu = UINT_MAX; 592 printk_cpu = UINT_MAX;
591 spin_unlock_irqrestore(&logbuf_lock, flags); 593 spin_unlock(&logbuf_lock);
592 594
593 /* 595 /*
594 * Console drivers may assume that per-cpu resources have 596 * Console drivers may assume that per-cpu resources have
@@ -604,6 +606,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
604 console_locked = 0; 606 console_locked = 0;
605 up(&console_sem); 607 up(&console_sem);
606 } 608 }
609 lockdep_on();
610 local_irq_restore(flags);
607 } else { 611 } else {
608 /* 612 /*
609 * Someone else owns the drivers. We drop the spinlock, which 613 * Someone else owns the drivers. We drop the spinlock, which
@@ -611,7 +615,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
611 * console drivers with the output which we just produced. 615 * console drivers with the output which we just produced.
612 */ 616 */
613 printk_cpu = UINT_MAX; 617 printk_cpu = UINT_MAX;
614 spin_unlock_irqrestore(&logbuf_lock, flags); 618 spin_unlock(&logbuf_lock);
619 lockdep_on();
620 local_irq_restore(flags);
615 } 621 }
616 622
617 preempt_enable(); 623 preempt_enable();
@@ -809,8 +815,15 @@ void release_console_sem(void)
809 console_may_schedule = 0; 815 console_may_schedule = 0;
810 up(&console_sem); 816 up(&console_sem);
811 spin_unlock_irqrestore(&logbuf_lock, flags); 817 spin_unlock_irqrestore(&logbuf_lock, flags);
812 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) 818 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
813 wake_up_interruptible(&log_wait); 819 /*
820 * If we printk from within the lock dependency code,
821 * from within the scheduler code, then do not lock
822 * up due to self-recursion:
823 */
824 if (!lockdep_internal())
825 wake_up_interruptible(&log_wait);
826 }
814} 827}
815EXPORT_SYMBOL(release_console_sem); 828EXPORT_SYMBOL(release_console_sem);
816 829
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 335c5b932e14..9a111f70145c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -28,7 +28,7 @@
28 * 28 *
29 * Must be called with the tasklist lock write-held. 29 * Must be called with the tasklist lock write-held.
30 */ 30 */
31void __ptrace_link(task_t *child, task_t *new_parent) 31void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
32{ 32{
33 BUG_ON(!list_empty(&child->ptrace_list)); 33 BUG_ON(!list_empty(&child->ptrace_list));
34 if (child->parent == new_parent) 34 if (child->parent == new_parent)
@@ -46,7 +46,7 @@ void __ptrace_link(task_t *child, task_t *new_parent)
46 * TASK_TRACED, resume it now. 46 * TASK_TRACED, resume it now.
47 * Requires that irqs be disabled. 47 * Requires that irqs be disabled.
48 */ 48 */
49void ptrace_untrace(task_t *child) 49void ptrace_untrace(struct task_struct *child)
50{ 50{
51 spin_lock(&child->sighand->siglock); 51 spin_lock(&child->sighand->siglock);
52 if (child->state == TASK_TRACED) { 52 if (child->state == TASK_TRACED) {
@@ -65,7 +65,7 @@ void ptrace_untrace(task_t *child)
65 * 65 *
66 * Must be called with the tasklist lock write-held. 66 * Must be called with the tasklist lock write-held.
67 */ 67 */
68void __ptrace_unlink(task_t *child) 68void __ptrace_unlink(struct task_struct *child)
69{ 69{
70 BUG_ON(!child->ptrace); 70 BUG_ON(!child->ptrace);
71 71
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f464f5ae3f11..759805c9859a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -53,13 +53,13 @@
53static struct rcu_ctrlblk rcu_ctrlblk = { 53static struct rcu_ctrlblk rcu_ctrlblk = {
54 .cur = -300, 54 .cur = -300,
55 .completed = -300, 55 .completed = -300,
56 .lock = SPIN_LOCK_UNLOCKED, 56 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
57 .cpumask = CPU_MASK_NONE, 57 .cpumask = CPU_MASK_NONE,
58}; 58};
59static struct rcu_ctrlblk rcu_bh_ctrlblk = { 59static struct rcu_ctrlblk rcu_bh_ctrlblk = {
60 .cur = -300, 60 .cur = -300,
61 .completed = -300, 61 .completed = -300,
62 .lock = SPIN_LOCK_UNLOCKED, 62 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
63 .cpumask = CPU_MASK_NONE, 63 .cpumask = CPU_MASK_NONE,
64}; 64};
65 65
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 4aa8a2c9f453..0c1faa950af7 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -26,6 +26,7 @@
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
27#include <linux/plist.h> 27#include <linux/plist.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29#include <linux/debug_locks.h>
29 30
30#include "rtmutex_common.h" 31#include "rtmutex_common.h"
31 32
@@ -45,8 +46,6 @@ do { \
45 console_verbose(); \ 46 console_verbose(); \
46 if (spin_is_locked(&current->pi_lock)) \ 47 if (spin_is_locked(&current->pi_lock)) \
47 spin_unlock(&current->pi_lock); \ 48 spin_unlock(&current->pi_lock); \
48 if (spin_is_locked(&current->held_list_lock)) \
49 spin_unlock(&current->held_list_lock); \
50 } \ 49 } \
51} while (0) 50} while (0)
52 51
@@ -97,7 +96,7 @@ void deadlock_trace_off(void)
97 rt_trace_on = 0; 96 rt_trace_on = 0;
98} 97}
99 98
100static void printk_task(task_t *p) 99static void printk_task(struct task_struct *p)
101{ 100{
102 if (p) 101 if (p)
103 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); 102 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
@@ -105,14 +104,6 @@ static void printk_task(task_t *p)
105 printk("<none>"); 104 printk("<none>");
106} 105}
107 106
108static void printk_task_short(task_t *p)
109{
110 if (p)
111 printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
112 else
113 printk("<none>");
114}
115
116static void printk_lock(struct rt_mutex *lock, int print_owner) 107static void printk_lock(struct rt_mutex *lock, int print_owner)
117{ 108{
118 if (lock->name) 109 if (lock->name)
@@ -128,222 +119,6 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)
128 printk_task(rt_mutex_owner(lock)); 119 printk_task(rt_mutex_owner(lock));
129 printk("\n"); 120 printk("\n");
130 } 121 }
131 if (rt_mutex_owner(lock)) {
132 printk("... acquired at: ");
133 print_symbol("%s\n", lock->acquire_ip);
134 }
135}
136
137static void printk_waiter(struct rt_mutex_waiter *w)
138{
139 printk("-------------------------\n");
140 printk("| waiter struct %p:\n", w);
141 printk("| w->list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
142 w->list_entry.plist.prio_list.prev, w->list_entry.plist.prio_list.next,
143 w->list_entry.plist.node_list.prev, w->list_entry.plist.node_list.next,
144 w->list_entry.prio);
145 printk("| w->pi_list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
146 w->pi_list_entry.plist.prio_list.prev, w->pi_list_entry.plist.prio_list.next,
147 w->pi_list_entry.plist.node_list.prev, w->pi_list_entry.plist.node_list.next,
148 w->pi_list_entry.prio);
149 printk("\n| lock:\n");
150 printk_lock(w->lock, 1);
151 printk("| w->ti->task:\n");
152 printk_task(w->task);
153 printk("| blocked at: ");
154 print_symbol("%s\n", w->ip);
155 printk("-------------------------\n");
156}
157
158static void show_task_locks(task_t *p)
159{
160 switch (p->state) {
161 case TASK_RUNNING: printk("R"); break;
162 case TASK_INTERRUPTIBLE: printk("S"); break;
163 case TASK_UNINTERRUPTIBLE: printk("D"); break;
164 case TASK_STOPPED: printk("T"); break;
165 case EXIT_ZOMBIE: printk("Z"); break;
166 case EXIT_DEAD: printk("X"); break;
167 default: printk("?"); break;
168 }
169 printk_task(p);
170 if (p->pi_blocked_on) {
171 struct rt_mutex *lock = p->pi_blocked_on->lock;
172
173 printk(" blocked on:");
174 printk_lock(lock, 1);
175 } else
176 printk(" (not blocked)\n");
177}
178
179void rt_mutex_show_held_locks(task_t *task, int verbose)
180{
181 struct list_head *curr, *cursor = NULL;
182 struct rt_mutex *lock;
183 task_t *t;
184 unsigned long flags;
185 int count = 0;
186
187 if (!rt_trace_on)
188 return;
189
190 if (verbose) {
191 printk("------------------------------\n");
192 printk("| showing all locks held by: | (");
193 printk_task_short(task);
194 printk("):\n");
195 printk("------------------------------\n");
196 }
197
198next:
199 spin_lock_irqsave(&task->held_list_lock, flags);
200 list_for_each(curr, &task->held_list_head) {
201 if (cursor && curr != cursor)
202 continue;
203 lock = list_entry(curr, struct rt_mutex, held_list_entry);
204 t = rt_mutex_owner(lock);
205 WARN_ON(t != task);
206 count++;
207 cursor = curr->next;
208 spin_unlock_irqrestore(&task->held_list_lock, flags);
209
210 printk("\n#%03d: ", count);
211 printk_lock(lock, 0);
212 goto next;
213 }
214 spin_unlock_irqrestore(&task->held_list_lock, flags);
215
216 printk("\n");
217}
218
219void rt_mutex_show_all_locks(void)
220{
221 task_t *g, *p;
222 int count = 10;
223 int unlock = 1;
224
225 printk("\n");
226 printk("----------------------\n");
227 printk("| showing all tasks: |\n");
228 printk("----------------------\n");
229
230 /*
231 * Here we try to get the tasklist_lock as hard as possible,
232 * if not successful after 2 seconds we ignore it (but keep
233 * trying). This is to enable a debug printout even if a
234 * tasklist_lock-holding task deadlocks or crashes.
235 */
236retry:
237 if (!read_trylock(&tasklist_lock)) {
238 if (count == 10)
239 printk("hm, tasklist_lock locked, retrying... ");
240 if (count) {
241 count--;
242 printk(" #%d", 10-count);
243 mdelay(200);
244 goto retry;
245 }
246 printk(" ignoring it.\n");
247 unlock = 0;
248 }
249 if (count != 10)
250 printk(" locked it.\n");
251
252 do_each_thread(g, p) {
253 show_task_locks(p);
254 if (!unlock)
255 if (read_trylock(&tasklist_lock))
256 unlock = 1;
257 } while_each_thread(g, p);
258
259 printk("\n");
260
261 printk("-----------------------------------------\n");
262 printk("| showing all locks held in the system: |\n");
263 printk("-----------------------------------------\n");
264
265 do_each_thread(g, p) {
266 rt_mutex_show_held_locks(p, 0);
267 if (!unlock)
268 if (read_trylock(&tasklist_lock))
269 unlock = 1;
270 } while_each_thread(g, p);
271
272
273 printk("=============================================\n\n");
274
275 if (unlock)
276 read_unlock(&tasklist_lock);
277}
278
279void rt_mutex_debug_check_no_locks_held(task_t *task)
280{
281 struct rt_mutex_waiter *w;
282 struct list_head *curr;
283 struct rt_mutex *lock;
284
285 if (!rt_trace_on)
286 return;
287 if (!rt_prio(task->normal_prio) && rt_prio(task->prio)) {
288 printk("BUG: PI priority boost leaked!\n");
289 printk_task(task);
290 printk("\n");
291 }
292 if (list_empty(&task->held_list_head))
293 return;
294
295 spin_lock(&task->pi_lock);
296 plist_for_each_entry(w, &task->pi_waiters, pi_list_entry) {
297 TRACE_OFF();
298
299 printk("hm, PI interest held at exit time? Task:\n");
300 printk_task(task);
301 printk_waiter(w);
302 return;
303 }
304 spin_unlock(&task->pi_lock);
305
306 list_for_each(curr, &task->held_list_head) {
307 lock = list_entry(curr, struct rt_mutex, held_list_entry);
308
309 printk("BUG: %s/%d, lock held at task exit time!\n",
310 task->comm, task->pid);
311 printk_lock(lock, 1);
312 if (rt_mutex_owner(lock) != task)
313 printk("exiting task is not even the owner??\n");
314 }
315}
316
317int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
318{
319 const void *to = from + len;
320 struct list_head *curr;
321 struct rt_mutex *lock;
322 unsigned long flags;
323 void *lock_addr;
324
325 if (!rt_trace_on)
326 return 0;
327
328 spin_lock_irqsave(&current->held_list_lock, flags);
329 list_for_each(curr, &current->held_list_head) {
330 lock = list_entry(curr, struct rt_mutex, held_list_entry);
331 lock_addr = lock;
332 if (lock_addr < from || lock_addr >= to)
333 continue;
334 TRACE_OFF();
335
336 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
337 current->comm, current->pid, lock, from, to);
338 dump_stack();
339 printk_lock(lock, 1);
340 if (rt_mutex_owner(lock) != current)
341 printk("freeing task is not even the owner??\n");
342 return 1;
343 }
344 spin_unlock_irqrestore(&current->held_list_lock, flags);
345
346 return 0;
347} 122}
348 123
349void rt_mutex_debug_task_free(struct task_struct *task) 124void rt_mutex_debug_task_free(struct task_struct *task)
@@ -395,85 +170,41 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
395 current->comm, current->pid); 170 current->comm, current->pid);
396 printk_lock(waiter->lock, 1); 171 printk_lock(waiter->lock, 1);
397 172
398 printk("... trying at: ");
399 print_symbol("%s\n", waiter->ip);
400
401 printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid); 173 printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
402 printk_lock(waiter->deadlock_lock, 1); 174 printk_lock(waiter->deadlock_lock, 1);
403 175
404 rt_mutex_show_held_locks(current, 1); 176 debug_show_held_locks(current);
405 rt_mutex_show_held_locks(task, 1); 177 debug_show_held_locks(task);
406 178
407 printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid); 179 printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
408 show_stack(task, NULL); 180 show_stack(task, NULL);
409 printk("\n%s/%d's [current] stackdump:\n\n", 181 printk("\n%s/%d's [current] stackdump:\n\n",
410 current->comm, current->pid); 182 current->comm, current->pid);
411 dump_stack(); 183 dump_stack();
412 rt_mutex_show_all_locks(); 184 debug_show_all_locks();
185
413 printk("[ turning off deadlock detection." 186 printk("[ turning off deadlock detection."
414 "Please report this trace. ]\n\n"); 187 "Please report this trace. ]\n\n");
415 local_irq_disable(); 188 local_irq_disable();
416} 189}
417 190
418void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__) 191void debug_rt_mutex_lock(struct rt_mutex *lock)
419{ 192{
420 unsigned long flags;
421
422 if (rt_trace_on) {
423 TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
424
425 spin_lock_irqsave(&current->held_list_lock, flags);
426 list_add_tail(&lock->held_list_entry, &current->held_list_head);
427 spin_unlock_irqrestore(&current->held_list_lock, flags);
428
429 lock->acquire_ip = ip;
430 }
431} 193}
432 194
433void debug_rt_mutex_unlock(struct rt_mutex *lock) 195void debug_rt_mutex_unlock(struct rt_mutex *lock)
434{ 196{
435 unsigned long flags; 197 TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
436
437 if (rt_trace_on) {
438 TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
439 TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
440
441 spin_lock_irqsave(&current->held_list_lock, flags);
442 list_del_init(&lock->held_list_entry);
443 spin_unlock_irqrestore(&current->held_list_lock, flags);
444 }
445} 198}
446 199
447void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, 200void
448 struct task_struct *powner __IP_DECL__) 201debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
449{ 202{
450 unsigned long flags;
451
452 if (rt_trace_on) {
453 TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
454
455 spin_lock_irqsave(&powner->held_list_lock, flags);
456 list_add_tail(&lock->held_list_entry, &powner->held_list_head);
457 spin_unlock_irqrestore(&powner->held_list_lock, flags);
458
459 lock->acquire_ip = ip;
460 }
461} 203}
462 204
463void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) 205void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
464{ 206{
465 unsigned long flags; 207 TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
466
467 if (rt_trace_on) {
468 struct task_struct *owner = rt_mutex_owner(lock);
469
470 TRACE_WARN_ON_LOCKED(!owner);
471 TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
472
473 spin_lock_irqsave(&owner->held_list_lock, flags);
474 list_del_init(&lock->held_list_entry);
475 spin_unlock_irqrestore(&owner->held_list_lock, flags);
476 }
477} 208}
478 209
479void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 210void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
@@ -493,17 +224,15 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
493 224
494void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) 225void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
495{ 226{
496 void *addr = lock; 227 /*
497 228 * Make sure we are not reinitializing a held lock:
498 if (rt_trace_on) { 229 */
499 rt_mutex_debug_check_no_locks_freed(addr, 230 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
500 sizeof(struct rt_mutex)); 231 lock->name = name;
501 INIT_LIST_HEAD(&lock->held_list_entry);
502 lock->name = name;
503 }
504} 232}
505 233
506void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task) 234void
235rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
507{ 236{
508} 237}
509 238
diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h
index 7612fbc62d70..14193d596d78 100644
--- a/kernel/rtmutex-debug.h
+++ b/kernel/rtmutex-debug.h
@@ -9,20 +9,16 @@
9 * This file contains macros used solely by rtmutex.c. Debug version. 9 * This file contains macros used solely by rtmutex.c. Debug version.
10 */ 10 */
11 11
12#define __IP_DECL__ , unsigned long ip
13#define __IP__ , ip
14#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
15
16extern void 12extern void
17rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); 13rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
18extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); 14extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
19extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); 15extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
20extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); 16extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
21extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); 17extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
22extern void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__); 18extern void debug_rt_mutex_lock(struct rt_mutex *lock);
23extern void debug_rt_mutex_unlock(struct rt_mutex *lock); 19extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
24extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, 20extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
25 struct task_struct *powner __IP_DECL__); 21 struct task_struct *powner);
26extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); 22extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
27extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, 23extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
28 struct rt_mutex *lock); 24 struct rt_mutex *lock);
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index e82c2f848249..494dac872a13 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -33,7 +33,7 @@ struct test_thread_data {
33}; 33};
34 34
35static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; 35static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
36static task_t *threads[MAX_RT_TEST_THREADS]; 36static struct task_struct *threads[MAX_RT_TEST_THREADS];
37static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; 37static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
38 38
39enum test_opcodes { 39enum test_opcodes {
@@ -361,8 +361,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
361static ssize_t sysfs_test_status(struct sys_device *dev, char *buf) 361static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)
362{ 362{
363 struct test_thread_data *td; 363 struct test_thread_data *td;
364 struct task_struct *tsk;
364 char *curr = buf; 365 char *curr = buf;
365 task_t *tsk;
366 int i; 366 int i;
367 367
368 td = container_of(dev, struct test_thread_data, sysdev); 368 td = container_of(dev, struct test_thread_data, sysdev);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 45d61016da57..d2ef13b485e7 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -157,12 +157,11 @@ int max_lock_depth = 1024;
157 * Decreases task's usage by one - may thus free the task. 157 * Decreases task's usage by one - may thus free the task.
158 * Returns 0 or -EDEADLK. 158 * Returns 0 or -EDEADLK.
159 */ 159 */
160static int rt_mutex_adjust_prio_chain(task_t *task, 160static int rt_mutex_adjust_prio_chain(struct task_struct *task,
161 int deadlock_detect, 161 int deadlock_detect,
162 struct rt_mutex *orig_lock, 162 struct rt_mutex *orig_lock,
163 struct rt_mutex_waiter *orig_waiter, 163 struct rt_mutex_waiter *orig_waiter,
164 struct task_struct *top_task 164 struct task_struct *top_task)
165 __IP_DECL__)
166{ 165{
167 struct rt_mutex *lock; 166 struct rt_mutex *lock;
168 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 167 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -283,6 +282,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task,
283 spin_unlock_irqrestore(&task->pi_lock, flags); 282 spin_unlock_irqrestore(&task->pi_lock, flags);
284 out_put_task: 283 out_put_task:
285 put_task_struct(task); 284 put_task_struct(task);
285
286 return ret; 286 return ret;
287} 287}
288 288
@@ -357,7 +357,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
357 * 357 *
358 * Must be called with lock->wait_lock held. 358 * Must be called with lock->wait_lock held.
359 */ 359 */
360static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__) 360static int try_to_take_rt_mutex(struct rt_mutex *lock)
361{ 361{
362 /* 362 /*
363 * We have to be careful here if the atomic speedups are 363 * We have to be careful here if the atomic speedups are
@@ -384,7 +384,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
384 return 0; 384 return 0;
385 385
386 /* We got the lock. */ 386 /* We got the lock. */
387 debug_rt_mutex_lock(lock __IP__); 387 debug_rt_mutex_lock(lock);
388 388
389 rt_mutex_set_owner(lock, current, 0); 389 rt_mutex_set_owner(lock, current, 0);
390 390
@@ -402,13 +402,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
402 */ 402 */
403static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 403static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
404 struct rt_mutex_waiter *waiter, 404 struct rt_mutex_waiter *waiter,
405 int detect_deadlock 405 int detect_deadlock)
406 __IP_DECL__)
407{ 406{
407 struct task_struct *owner = rt_mutex_owner(lock);
408 struct rt_mutex_waiter *top_waiter = waiter; 408 struct rt_mutex_waiter *top_waiter = waiter;
409 task_t *owner = rt_mutex_owner(lock);
410 int boost = 0, res;
411 unsigned long flags; 409 unsigned long flags;
410 int boost = 0, res;
412 411
413 spin_lock_irqsave(&current->pi_lock, flags); 412 spin_lock_irqsave(&current->pi_lock, flags);
414 __rt_mutex_adjust_prio(current); 413 __rt_mutex_adjust_prio(current);
@@ -454,7 +453,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
454 spin_unlock(&lock->wait_lock); 453 spin_unlock(&lock->wait_lock);
455 454
456 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 455 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
457 current __IP__); 456 current);
458 457
459 spin_lock(&lock->wait_lock); 458 spin_lock(&lock->wait_lock);
460 459
@@ -526,12 +525,12 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
526 * Must be called with lock->wait_lock held 525 * Must be called with lock->wait_lock held
527 */ 526 */
528static void remove_waiter(struct rt_mutex *lock, 527static void remove_waiter(struct rt_mutex *lock,
529 struct rt_mutex_waiter *waiter __IP_DECL__) 528 struct rt_mutex_waiter *waiter)
530{ 529{
531 int first = (waiter == rt_mutex_top_waiter(lock)); 530 int first = (waiter == rt_mutex_top_waiter(lock));
532 int boost = 0; 531 struct task_struct *owner = rt_mutex_owner(lock);
533 task_t *owner = rt_mutex_owner(lock);
534 unsigned long flags; 532 unsigned long flags;
533 int boost = 0;
535 534
536 spin_lock_irqsave(&current->pi_lock, flags); 535 spin_lock_irqsave(&current->pi_lock, flags);
537 plist_del(&waiter->list_entry, &lock->wait_list); 536 plist_del(&waiter->list_entry, &lock->wait_list);
@@ -568,7 +567,7 @@ static void remove_waiter(struct rt_mutex *lock,
568 567
569 spin_unlock(&lock->wait_lock); 568 spin_unlock(&lock->wait_lock);
570 569
571 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current __IP__); 570 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
572 571
573 spin_lock(&lock->wait_lock); 572 spin_lock(&lock->wait_lock);
574} 573}
@@ -595,7 +594,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
595 get_task_struct(task); 594 get_task_struct(task);
596 spin_unlock_irqrestore(&task->pi_lock, flags); 595 spin_unlock_irqrestore(&task->pi_lock, flags);
597 596
598 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task __RET_IP__); 597 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
599} 598}
600 599
601/* 600/*
@@ -604,7 +603,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
604static int __sched 603static int __sched
605rt_mutex_slowlock(struct rt_mutex *lock, int state, 604rt_mutex_slowlock(struct rt_mutex *lock, int state,
606 struct hrtimer_sleeper *timeout, 605 struct hrtimer_sleeper *timeout,
607 int detect_deadlock __IP_DECL__) 606 int detect_deadlock)
608{ 607{
609 struct rt_mutex_waiter waiter; 608 struct rt_mutex_waiter waiter;
610 int ret = 0; 609 int ret = 0;
@@ -615,7 +614,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
615 spin_lock(&lock->wait_lock); 614 spin_lock(&lock->wait_lock);
616 615
617 /* Try to acquire the lock again: */ 616 /* Try to acquire the lock again: */
618 if (try_to_take_rt_mutex(lock __IP__)) { 617 if (try_to_take_rt_mutex(lock)) {
619 spin_unlock(&lock->wait_lock); 618 spin_unlock(&lock->wait_lock);
620 return 0; 619 return 0;
621 } 620 }
@@ -629,7 +628,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
629 628
630 for (;;) { 629 for (;;) {
631 /* Try to acquire the lock: */ 630 /* Try to acquire the lock: */
632 if (try_to_take_rt_mutex(lock __IP__)) 631 if (try_to_take_rt_mutex(lock))
633 break; 632 break;
634 633
635 /* 634 /*
@@ -653,7 +652,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
653 */ 652 */
654 if (!waiter.task) { 653 if (!waiter.task) {
655 ret = task_blocks_on_rt_mutex(lock, &waiter, 654 ret = task_blocks_on_rt_mutex(lock, &waiter,
656 detect_deadlock __IP__); 655 detect_deadlock);
657 /* 656 /*
658 * If we got woken up by the owner then start loop 657 * If we got woken up by the owner then start loop
659 * all over without going into schedule to try 658 * all over without going into schedule to try
@@ -680,7 +679,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
680 set_current_state(TASK_RUNNING); 679 set_current_state(TASK_RUNNING);
681 680
682 if (unlikely(waiter.task)) 681 if (unlikely(waiter.task))
683 remove_waiter(lock, &waiter __IP__); 682 remove_waiter(lock, &waiter);
684 683
685 /* 684 /*
686 * try_to_take_rt_mutex() sets the waiter bit 685 * try_to_take_rt_mutex() sets the waiter bit
@@ -711,7 +710,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
711 * Slow path try-lock function: 710 * Slow path try-lock function:
712 */ 711 */
713static inline int 712static inline int
714rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__) 713rt_mutex_slowtrylock(struct rt_mutex *lock)
715{ 714{
716 int ret = 0; 715 int ret = 0;
717 716
@@ -719,7 +718,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__)
719 718
720 if (likely(rt_mutex_owner(lock) != current)) { 719 if (likely(rt_mutex_owner(lock) != current)) {
721 720
722 ret = try_to_take_rt_mutex(lock __IP__); 721 ret = try_to_take_rt_mutex(lock);
723 /* 722 /*
724 * try_to_take_rt_mutex() sets the lock waiters 723 * try_to_take_rt_mutex() sets the lock waiters
725 * bit unconditionally. Clean this up. 724 * bit unconditionally. Clean this up.
@@ -769,13 +768,13 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
769 int detect_deadlock, 768 int detect_deadlock,
770 int (*slowfn)(struct rt_mutex *lock, int state, 769 int (*slowfn)(struct rt_mutex *lock, int state,
771 struct hrtimer_sleeper *timeout, 770 struct hrtimer_sleeper *timeout,
772 int detect_deadlock __IP_DECL__)) 771 int detect_deadlock))
773{ 772{
774 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { 773 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
775 rt_mutex_deadlock_account_lock(lock, current); 774 rt_mutex_deadlock_account_lock(lock, current);
776 return 0; 775 return 0;
777 } else 776 } else
778 return slowfn(lock, state, NULL, detect_deadlock __RET_IP__); 777 return slowfn(lock, state, NULL, detect_deadlock);
779} 778}
780 779
781static inline int 780static inline int
@@ -783,24 +782,24 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
783 struct hrtimer_sleeper *timeout, int detect_deadlock, 782 struct hrtimer_sleeper *timeout, int detect_deadlock,
784 int (*slowfn)(struct rt_mutex *lock, int state, 783 int (*slowfn)(struct rt_mutex *lock, int state,
785 struct hrtimer_sleeper *timeout, 784 struct hrtimer_sleeper *timeout,
786 int detect_deadlock __IP_DECL__)) 785 int detect_deadlock))
787{ 786{
788 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { 787 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
789 rt_mutex_deadlock_account_lock(lock, current); 788 rt_mutex_deadlock_account_lock(lock, current);
790 return 0; 789 return 0;
791 } else 790 } else
792 return slowfn(lock, state, timeout, detect_deadlock __RET_IP__); 791 return slowfn(lock, state, timeout, detect_deadlock);
793} 792}
794 793
795static inline int 794static inline int
796rt_mutex_fasttrylock(struct rt_mutex *lock, 795rt_mutex_fasttrylock(struct rt_mutex *lock,
797 int (*slowfn)(struct rt_mutex *lock __IP_DECL__)) 796 int (*slowfn)(struct rt_mutex *lock))
798{ 797{
799 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { 798 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
800 rt_mutex_deadlock_account_lock(lock, current); 799 rt_mutex_deadlock_account_lock(lock, current);
801 return 1; 800 return 1;
802 } 801 }
803 return slowfn(lock __RET_IP__); 802 return slowfn(lock);
804} 803}
805 804
806static inline void 805static inline void
@@ -948,7 +947,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
948 struct task_struct *proxy_owner) 947 struct task_struct *proxy_owner)
949{ 948{
950 __rt_mutex_init(lock, NULL); 949 __rt_mutex_init(lock, NULL);
951 debug_rt_mutex_proxy_lock(lock, proxy_owner __RET_IP__); 950 debug_rt_mutex_proxy_lock(lock, proxy_owner);
952 rt_mutex_set_owner(lock, proxy_owner, 0); 951 rt_mutex_set_owner(lock, proxy_owner, 0);
953 rt_mutex_deadlock_account_lock(lock, proxy_owner); 952 rt_mutex_deadlock_account_lock(lock, proxy_owner);
954} 953}
diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h
index 1e0fca13ff72..a1a1dd06421d 100644
--- a/kernel/rtmutex.h
+++ b/kernel/rtmutex.h
@@ -10,9 +10,6 @@
10 * Non-debug version. 10 * Non-debug version.
11 */ 11 */
12 12
13#define __IP_DECL__
14#define __IP__
15#define __RET_IP__
16#define rt_mutex_deadlock_check(l) (0) 13#define rt_mutex_deadlock_check(l) (0)
17#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) 14#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
18#define rt_mutex_deadlock_account_unlock(l) do { } while (0) 15#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
new file mode 100644
index 000000000000..291ded556aa0
--- /dev/null
+++ b/kernel/rwsem.c
@@ -0,0 +1,147 @@
1/* kernel/rwsem.c: R/W semaphores, public implementation
2 *
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from asm-i386/semaphore.h
5 */
6
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/rwsem.h>
11
12#include <asm/system.h>
13#include <asm/atomic.h>
14
15/*
16 * lock for reading
17 */
18void down_read(struct rw_semaphore *sem)
19{
20 might_sleep();
21 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
22
23 __down_read(sem);
24}
25
26EXPORT_SYMBOL(down_read);
27
28/*
29 * trylock for reading -- returns 1 if successful, 0 if contention
30 */
31int down_read_trylock(struct rw_semaphore *sem)
32{
33 int ret = __down_read_trylock(sem);
34
35 if (ret == 1)
36 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
37 return ret;
38}
39
40EXPORT_SYMBOL(down_read_trylock);
41
42/*
43 * lock for writing
44 */
45void down_write(struct rw_semaphore *sem)
46{
47 might_sleep();
48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
49
50 __down_write(sem);
51}
52
53EXPORT_SYMBOL(down_write);
54
55/*
56 * trylock for writing -- returns 1 if successful, 0 if contention
57 */
58int down_write_trylock(struct rw_semaphore *sem)
59{
60 int ret = __down_write_trylock(sem);
61
62 if (ret == 1)
63 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
64 return ret;
65}
66
67EXPORT_SYMBOL(down_write_trylock);
68
69/*
70 * release a read lock
71 */
72void up_read(struct rw_semaphore *sem)
73{
74 rwsem_release(&sem->dep_map, 1, _RET_IP_);
75
76 __up_read(sem);
77}
78
79EXPORT_SYMBOL(up_read);
80
81/*
82 * release a write lock
83 */
84void up_write(struct rw_semaphore *sem)
85{
86 rwsem_release(&sem->dep_map, 1, _RET_IP_);
87
88 __up_write(sem);
89}
90
91EXPORT_SYMBOL(up_write);
92
93/*
94 * downgrade write lock to read lock
95 */
96void downgrade_write(struct rw_semaphore *sem)
97{
98 /*
99 * lockdep: a downgraded write will live on as a write
100 * dependency.
101 */
102 __downgrade_write(sem);
103}
104
105EXPORT_SYMBOL(downgrade_write);
106
107#ifdef CONFIG_DEBUG_LOCK_ALLOC
108
109void down_read_nested(struct rw_semaphore *sem, int subclass)
110{
111 might_sleep();
112 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
113
114 __down_read(sem);
115}
116
117EXPORT_SYMBOL(down_read_nested);
118
119void down_read_non_owner(struct rw_semaphore *sem)
120{
121 might_sleep();
122
123 __down_read(sem);
124}
125
126EXPORT_SYMBOL(down_read_non_owner);
127
128void down_write_nested(struct rw_semaphore *sem, int subclass)
129{
130 might_sleep();
131 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
132
133 __down_write_nested(sem, subclass);
134}
135
136EXPORT_SYMBOL(down_write_nested);
137
138void up_read_non_owner(struct rw_semaphore *sem)
139{
140 __up_read(sem);
141}
142
143EXPORT_SYMBOL(up_read_non_owner);
144
145#endif
146
147
diff --git a/kernel/sched.c b/kernel/sched.c
index d5e37072ea54..4ee400f9d56b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -30,6 +30,7 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/completion.h> 31#include <linux/completion.h>
32#include <linux/kernel_stat.h> 32#include <linux/kernel_stat.h>
33#include <linux/debug_locks.h>
33#include <linux/security.h> 34#include <linux/security.h>
34#include <linux/notifier.h> 35#include <linux/notifier.h>
35#include <linux/profile.h> 36#include <linux/profile.h>
@@ -178,20 +179,15 @@ static unsigned int static_prio_timeslice(int static_prio)
178 return SCALE_PRIO(DEF_TIMESLICE, static_prio); 179 return SCALE_PRIO(DEF_TIMESLICE, static_prio);
179} 180}
180 181
181static inline unsigned int task_timeslice(task_t *p) 182static inline unsigned int task_timeslice(struct task_struct *p)
182{ 183{
183 return static_prio_timeslice(p->static_prio); 184 return static_prio_timeslice(p->static_prio);
184} 185}
185 186
186#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
187 < (long long) (sd)->cache_hot_time)
188
189/* 187/*
190 * These are the runqueue data structures: 188 * These are the runqueue data structures:
191 */ 189 */
192 190
193typedef struct runqueue runqueue_t;
194
195struct prio_array { 191struct prio_array {
196 unsigned int nr_active; 192 unsigned int nr_active;
197 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ 193 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
@@ -205,7 +201,7 @@ struct prio_array {
205 * (such as the load balancing or the thread migration code), lock 201 * (such as the load balancing or the thread migration code), lock
206 * acquire operations must be ordered by ascending &runqueue. 202 * acquire operations must be ordered by ascending &runqueue.
207 */ 203 */
208struct runqueue { 204struct rq {
209 spinlock_t lock; 205 spinlock_t lock;
210 206
211 /* 207 /*
@@ -229,9 +225,9 @@ struct runqueue {
229 225
230 unsigned long expired_timestamp; 226 unsigned long expired_timestamp;
231 unsigned long long timestamp_last_tick; 227 unsigned long long timestamp_last_tick;
232 task_t *curr, *idle; 228 struct task_struct *curr, *idle;
233 struct mm_struct *prev_mm; 229 struct mm_struct *prev_mm;
234 prio_array_t *active, *expired, arrays[2]; 230 struct prio_array *active, *expired, arrays[2];
235 int best_expired_prio; 231 int best_expired_prio;
236 atomic_t nr_iowait; 232 atomic_t nr_iowait;
237 233
@@ -242,7 +238,7 @@ struct runqueue {
242 int active_balance; 238 int active_balance;
243 int push_cpu; 239 int push_cpu;
244 240
245 task_t *migration_thread; 241 struct task_struct *migration_thread;
246 struct list_head migration_queue; 242 struct list_head migration_queue;
247#endif 243#endif
248 244
@@ -265,9 +261,10 @@ struct runqueue {
265 unsigned long ttwu_cnt; 261 unsigned long ttwu_cnt;
266 unsigned long ttwu_local; 262 unsigned long ttwu_local;
267#endif 263#endif
264 struct lock_class_key rq_lock_key;
268}; 265};
269 266
270static DEFINE_PER_CPU(struct runqueue, runqueues); 267static DEFINE_PER_CPU(struct rq, runqueues);
271 268
272/* 269/*
273 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 270 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@@ -276,8 +273,8 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
276 * The domain tree of any CPU may only be accessed from within 273 * The domain tree of any CPU may only be accessed from within
277 * preempt-disabled sections. 274 * preempt-disabled sections.
278 */ 275 */
279#define for_each_domain(cpu, domain) \ 276#define for_each_domain(cpu, __sd) \
280for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) 277 for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
281 278
282#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) 279#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
283#define this_rq() (&__get_cpu_var(runqueues)) 280#define this_rq() (&__get_cpu_var(runqueues))
@@ -292,26 +289,33 @@ for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
292#endif 289#endif
293 290
294#ifndef __ARCH_WANT_UNLOCKED_CTXSW 291#ifndef __ARCH_WANT_UNLOCKED_CTXSW
295static inline int task_running(runqueue_t *rq, task_t *p) 292static inline int task_running(struct rq *rq, struct task_struct *p)
296{ 293{
297 return rq->curr == p; 294 return rq->curr == p;
298} 295}
299 296
300static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 297static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
301{ 298{
302} 299}
303 300
304static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 301static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
305{ 302{
306#ifdef CONFIG_DEBUG_SPINLOCK 303#ifdef CONFIG_DEBUG_SPINLOCK
307 /* this is a valid case when another task releases the spinlock */ 304 /* this is a valid case when another task releases the spinlock */
308 rq->lock.owner = current; 305 rq->lock.owner = current;
309#endif 306#endif
307 /*
308 * If we are tracking spinlock dependencies then we have to
309 * fix up the runqueue lock - which gets 'carried over' from
310 * prev into current:
311 */
312 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
313
310 spin_unlock_irq(&rq->lock); 314 spin_unlock_irq(&rq->lock);
311} 315}
312 316
313#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 317#else /* __ARCH_WANT_UNLOCKED_CTXSW */
314static inline int task_running(runqueue_t *rq, task_t *p) 318static inline int task_running(struct rq *rq, struct task_struct *p)
315{ 319{
316#ifdef CONFIG_SMP 320#ifdef CONFIG_SMP
317 return p->oncpu; 321 return p->oncpu;
@@ -320,7 +324,7 @@ static inline int task_running(runqueue_t *rq, task_t *p)
320#endif 324#endif
321} 325}
322 326
323static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 327static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
324{ 328{
325#ifdef CONFIG_SMP 329#ifdef CONFIG_SMP
326 /* 330 /*
@@ -337,7 +341,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
337#endif 341#endif
338} 342}
339 343
340static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 344static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
341{ 345{
342#ifdef CONFIG_SMP 346#ifdef CONFIG_SMP
343 /* 347 /*
@@ -358,10 +362,10 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
358 * __task_rq_lock - lock the runqueue a given task resides on. 362 * __task_rq_lock - lock the runqueue a given task resides on.
359 * Must be called interrupts disabled. 363 * Must be called interrupts disabled.
360 */ 364 */
361static inline runqueue_t *__task_rq_lock(task_t *p) 365static inline struct rq *__task_rq_lock(struct task_struct *p)
362 __acquires(rq->lock) 366 __acquires(rq->lock)
363{ 367{
364 struct runqueue *rq; 368 struct rq *rq;
365 369
366repeat_lock_task: 370repeat_lock_task:
367 rq = task_rq(p); 371 rq = task_rq(p);
@@ -378,10 +382,10 @@ repeat_lock_task:
378 * interrupts. Note the ordering: we can safely lookup the task_rq without 382 * interrupts. Note the ordering: we can safely lookup the task_rq without
379 * explicitly disabling preemption. 383 * explicitly disabling preemption.
380 */ 384 */
381static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) 385static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
382 __acquires(rq->lock) 386 __acquires(rq->lock)
383{ 387{
384 struct runqueue *rq; 388 struct rq *rq;
385 389
386repeat_lock_task: 390repeat_lock_task:
387 local_irq_save(*flags); 391 local_irq_save(*flags);
@@ -394,13 +398,13 @@ repeat_lock_task:
394 return rq; 398 return rq;
395} 399}
396 400
397static inline void __task_rq_unlock(runqueue_t *rq) 401static inline void __task_rq_unlock(struct rq *rq)
398 __releases(rq->lock) 402 __releases(rq->lock)
399{ 403{
400 spin_unlock(&rq->lock); 404 spin_unlock(&rq->lock);
401} 405}
402 406
403static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) 407static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
404 __releases(rq->lock) 408 __releases(rq->lock)
405{ 409{
406 spin_unlock_irqrestore(&rq->lock, *flags); 410 spin_unlock_irqrestore(&rq->lock, *flags);
@@ -420,7 +424,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
420 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); 424 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
421 seq_printf(seq, "timestamp %lu\n", jiffies); 425 seq_printf(seq, "timestamp %lu\n", jiffies);
422 for_each_online_cpu(cpu) { 426 for_each_online_cpu(cpu) {
423 runqueue_t *rq = cpu_rq(cpu); 427 struct rq *rq = cpu_rq(cpu);
424#ifdef CONFIG_SMP 428#ifdef CONFIG_SMP
425 struct sched_domain *sd; 429 struct sched_domain *sd;
426 int dcnt = 0; 430 int dcnt = 0;
@@ -507,10 +511,10 @@ struct file_operations proc_schedstat_operations = {
507/* 511/*
508 * rq_lock - lock a given runqueue and disable interrupts. 512 * rq_lock - lock a given runqueue and disable interrupts.
509 */ 513 */
510static inline runqueue_t *this_rq_lock(void) 514static inline struct rq *this_rq_lock(void)
511 __acquires(rq->lock) 515 __acquires(rq->lock)
512{ 516{
513 runqueue_t *rq; 517 struct rq *rq;
514 518
515 local_irq_disable(); 519 local_irq_disable();
516 rq = this_rq(); 520 rq = this_rq();
@@ -535,7 +539,7 @@ static inline runqueue_t *this_rq_lock(void)
535 * long it was from the *first* time it was queued to the time that it 539 * long it was from the *first* time it was queued to the time that it
536 * finally hit a cpu. 540 * finally hit a cpu.
537 */ 541 */
538static inline void sched_info_dequeued(task_t *t) 542static inline void sched_info_dequeued(struct task_struct *t)
539{ 543{
540 t->sched_info.last_queued = 0; 544 t->sched_info.last_queued = 0;
541} 545}
@@ -545,10 +549,10 @@ static inline void sched_info_dequeued(task_t *t)
545 * long it was waiting to run. We also note when it began so that we 549 * long it was waiting to run. We also note when it began so that we
546 * can keep stats on how long its timeslice is. 550 * can keep stats on how long its timeslice is.
547 */ 551 */
548static void sched_info_arrive(task_t *t) 552static void sched_info_arrive(struct task_struct *t)
549{ 553{
550 unsigned long now = jiffies, diff = 0; 554 unsigned long now = jiffies, diff = 0;
551 struct runqueue *rq = task_rq(t); 555 struct rq *rq = task_rq(t);
552 556
553 if (t->sched_info.last_queued) 557 if (t->sched_info.last_queued)
554 diff = now - t->sched_info.last_queued; 558 diff = now - t->sched_info.last_queued;
@@ -579,7 +583,7 @@ static void sched_info_arrive(task_t *t)
579 * the timestamp if it is already not set. It's assumed that 583 * the timestamp if it is already not set. It's assumed that
580 * sched_info_dequeued() will clear that stamp when appropriate. 584 * sched_info_dequeued() will clear that stamp when appropriate.
581 */ 585 */
582static inline void sched_info_queued(task_t *t) 586static inline void sched_info_queued(struct task_struct *t)
583{ 587{
584 if (!t->sched_info.last_queued) 588 if (!t->sched_info.last_queued)
585 t->sched_info.last_queued = jiffies; 589 t->sched_info.last_queued = jiffies;
@@ -589,9 +593,9 @@ static inline void sched_info_queued(task_t *t)
589 * Called when a process ceases being the active-running process, either 593 * Called when a process ceases being the active-running process, either
590 * voluntarily or involuntarily. Now we can calculate how long we ran. 594 * voluntarily or involuntarily. Now we can calculate how long we ran.
591 */ 595 */
592static inline void sched_info_depart(task_t *t) 596static inline void sched_info_depart(struct task_struct *t)
593{ 597{
594 struct runqueue *rq = task_rq(t); 598 struct rq *rq = task_rq(t);
595 unsigned long diff = jiffies - t->sched_info.last_arrival; 599 unsigned long diff = jiffies - t->sched_info.last_arrival;
596 600
597 t->sched_info.cpu_time += diff; 601 t->sched_info.cpu_time += diff;
@@ -605,9 +609,10 @@ static inline void sched_info_depart(task_t *t)
605 * their time slice. (This may also be called when switching to or from 609 * their time slice. (This may also be called when switching to or from
606 * the idle task.) We are only called when prev != next. 610 * the idle task.) We are only called when prev != next.
607 */ 611 */
608static inline void sched_info_switch(task_t *prev, task_t *next) 612static inline void
613sched_info_switch(struct task_struct *prev, struct task_struct *next)
609{ 614{
610 struct runqueue *rq = task_rq(prev); 615 struct rq *rq = task_rq(prev);
611 616
612 /* 617 /*
613 * prev now departs the cpu. It's not interesting to record 618 * prev now departs the cpu. It's not interesting to record
@@ -628,7 +633,7 @@ static inline void sched_info_switch(task_t *prev, task_t *next)
628/* 633/*
629 * Adding/removing a task to/from a priority array: 634 * Adding/removing a task to/from a priority array:
630 */ 635 */
631static void dequeue_task(struct task_struct *p, prio_array_t *array) 636static void dequeue_task(struct task_struct *p, struct prio_array *array)
632{ 637{
633 array->nr_active--; 638 array->nr_active--;
634 list_del(&p->run_list); 639 list_del(&p->run_list);
@@ -636,7 +641,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array)
636 __clear_bit(p->prio, array->bitmap); 641 __clear_bit(p->prio, array->bitmap);
637} 642}
638 643
639static void enqueue_task(struct task_struct *p, prio_array_t *array) 644static void enqueue_task(struct task_struct *p, struct prio_array *array)
640{ 645{
641 sched_info_queued(p); 646 sched_info_queued(p);
642 list_add_tail(&p->run_list, array->queue + p->prio); 647 list_add_tail(&p->run_list, array->queue + p->prio);
@@ -649,12 +654,13 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array)
649 * Put task to the end of the run list without the overhead of dequeue 654 * Put task to the end of the run list without the overhead of dequeue
650 * followed by enqueue. 655 * followed by enqueue.
651 */ 656 */
652static void requeue_task(struct task_struct *p, prio_array_t *array) 657static void requeue_task(struct task_struct *p, struct prio_array *array)
653{ 658{
654 list_move_tail(&p->run_list, array->queue + p->prio); 659 list_move_tail(&p->run_list, array->queue + p->prio);
655} 660}
656 661
657static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) 662static inline void
663enqueue_task_head(struct task_struct *p, struct prio_array *array)
658{ 664{
659 list_add(&p->run_list, array->queue + p->prio); 665 list_add(&p->run_list, array->queue + p->prio);
660 __set_bit(p->prio, array->bitmap); 666 __set_bit(p->prio, array->bitmap);
@@ -677,7 +683,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
677 * Both properties are important to certain workloads. 683 * Both properties are important to certain workloads.
678 */ 684 */
679 685
680static inline int __normal_prio(task_t *p) 686static inline int __normal_prio(struct task_struct *p)
681{ 687{
682 int bonus, prio; 688 int bonus, prio;
683 689
@@ -713,7 +719,7 @@ static inline int __normal_prio(task_t *p)
713#define RTPRIO_TO_LOAD_WEIGHT(rp) \ 719#define RTPRIO_TO_LOAD_WEIGHT(rp) \
714 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) 720 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
715 721
716static void set_load_weight(task_t *p) 722static void set_load_weight(struct task_struct *p)
717{ 723{
718 if (has_rt_policy(p)) { 724 if (has_rt_policy(p)) {
719#ifdef CONFIG_SMP 725#ifdef CONFIG_SMP
@@ -731,23 +737,25 @@ static void set_load_weight(task_t *p)
731 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); 737 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
732} 738}
733 739
734static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p) 740static inline void
741inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)
735{ 742{
736 rq->raw_weighted_load += p->load_weight; 743 rq->raw_weighted_load += p->load_weight;
737} 744}
738 745
739static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p) 746static inline void
747dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)
740{ 748{
741 rq->raw_weighted_load -= p->load_weight; 749 rq->raw_weighted_load -= p->load_weight;
742} 750}
743 751
744static inline void inc_nr_running(task_t *p, runqueue_t *rq) 752static inline void inc_nr_running(struct task_struct *p, struct rq *rq)
745{ 753{
746 rq->nr_running++; 754 rq->nr_running++;
747 inc_raw_weighted_load(rq, p); 755 inc_raw_weighted_load(rq, p);
748} 756}
749 757
750static inline void dec_nr_running(task_t *p, runqueue_t *rq) 758static inline void dec_nr_running(struct task_struct *p, struct rq *rq)
751{ 759{
752 rq->nr_running--; 760 rq->nr_running--;
753 dec_raw_weighted_load(rq, p); 761 dec_raw_weighted_load(rq, p);
@@ -760,7 +768,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq)
760 * setprio syscalls, and whenever the interactivity 768 * setprio syscalls, and whenever the interactivity
761 * estimator recalculates. 769 * estimator recalculates.
762 */ 770 */
763static inline int normal_prio(task_t *p) 771static inline int normal_prio(struct task_struct *p)
764{ 772{
765 int prio; 773 int prio;
766 774
@@ -778,7 +786,7 @@ static inline int normal_prio(task_t *p)
778 * interactivity modifiers. Will be RT if the task got 786 * interactivity modifiers. Will be RT if the task got
779 * RT-boosted. If not then it returns p->normal_prio. 787 * RT-boosted. If not then it returns p->normal_prio.
780 */ 788 */
781static int effective_prio(task_t *p) 789static int effective_prio(struct task_struct *p)
782{ 790{
783 p->normal_prio = normal_prio(p); 791 p->normal_prio = normal_prio(p);
784 /* 792 /*
@@ -794,9 +802,9 @@ static int effective_prio(task_t *p)
794/* 802/*
795 * __activate_task - move a task to the runqueue. 803 * __activate_task - move a task to the runqueue.
796 */ 804 */
797static void __activate_task(task_t *p, runqueue_t *rq) 805static void __activate_task(struct task_struct *p, struct rq *rq)
798{ 806{
799 prio_array_t *target = rq->active; 807 struct prio_array *target = rq->active;
800 808
801 if (batch_task(p)) 809 if (batch_task(p))
802 target = rq->expired; 810 target = rq->expired;
@@ -807,7 +815,7 @@ static void __activate_task(task_t *p, runqueue_t *rq)
807/* 815/*
808 * __activate_idle_task - move idle task to the _front_ of runqueue. 816 * __activate_idle_task - move idle task to the _front_ of runqueue.
809 */ 817 */
810static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 818static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)
811{ 819{
812 enqueue_task_head(p, rq->active); 820 enqueue_task_head(p, rq->active);
813 inc_nr_running(p, rq); 821 inc_nr_running(p, rq);
@@ -817,7 +825,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
817 * Recalculate p->normal_prio and p->prio after having slept, 825 * Recalculate p->normal_prio and p->prio after having slept,
818 * updating the sleep-average too: 826 * updating the sleep-average too:
819 */ 827 */
820static int recalc_task_prio(task_t *p, unsigned long long now) 828static int recalc_task_prio(struct task_struct *p, unsigned long long now)
821{ 829{
822 /* Caller must always ensure 'now >= p->timestamp' */ 830 /* Caller must always ensure 'now >= p->timestamp' */
823 unsigned long sleep_time = now - p->timestamp; 831 unsigned long sleep_time = now - p->timestamp;
@@ -889,7 +897,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
889 * Update all the scheduling statistics stuff. (sleep average 897 * Update all the scheduling statistics stuff. (sleep average
890 * calculation, priority modifiers, etc.) 898 * calculation, priority modifiers, etc.)
891 */ 899 */
892static void activate_task(task_t *p, runqueue_t *rq, int local) 900static void activate_task(struct task_struct *p, struct rq *rq, int local)
893{ 901{
894 unsigned long long now; 902 unsigned long long now;
895 903
@@ -897,7 +905,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
897#ifdef CONFIG_SMP 905#ifdef CONFIG_SMP
898 if (!local) { 906 if (!local) {
899 /* Compensate for drifting sched_clock */ 907 /* Compensate for drifting sched_clock */
900 runqueue_t *this_rq = this_rq(); 908 struct rq *this_rq = this_rq();
901 now = (now - this_rq->timestamp_last_tick) 909 now = (now - this_rq->timestamp_last_tick)
902 + rq->timestamp_last_tick; 910 + rq->timestamp_last_tick;
903 } 911 }
@@ -936,7 +944,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
936/* 944/*
937 * deactivate_task - remove a task from the runqueue. 945 * deactivate_task - remove a task from the runqueue.
938 */ 946 */
939static void deactivate_task(struct task_struct *p, runqueue_t *rq) 947static void deactivate_task(struct task_struct *p, struct rq *rq)
940{ 948{
941 dec_nr_running(p, rq); 949 dec_nr_running(p, rq);
942 dequeue_task(p, p->array); 950 dequeue_task(p, p->array);
@@ -956,7 +964,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
956#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 964#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
957#endif 965#endif
958 966
959static void resched_task(task_t *p) 967static void resched_task(struct task_struct *p)
960{ 968{
961 int cpu; 969 int cpu;
962 970
@@ -977,7 +985,7 @@ static void resched_task(task_t *p)
977 smp_send_reschedule(cpu); 985 smp_send_reschedule(cpu);
978} 986}
979#else 987#else
980static inline void resched_task(task_t *p) 988static inline void resched_task(struct task_struct *p)
981{ 989{
982 assert_spin_locked(&task_rq(p)->lock); 990 assert_spin_locked(&task_rq(p)->lock);
983 set_tsk_need_resched(p); 991 set_tsk_need_resched(p);
@@ -988,7 +996,7 @@ static inline void resched_task(task_t *p)
988 * task_curr - is this task currently executing on a CPU? 996 * task_curr - is this task currently executing on a CPU?
989 * @p: the task in question. 997 * @p: the task in question.
990 */ 998 */
991inline int task_curr(const task_t *p) 999inline int task_curr(const struct task_struct *p)
992{ 1000{
993 return cpu_curr(task_cpu(p)) == p; 1001 return cpu_curr(task_cpu(p)) == p;
994} 1002}
@@ -1000,22 +1008,23 @@ unsigned long weighted_cpuload(const int cpu)
1000} 1008}
1001 1009
1002#ifdef CONFIG_SMP 1010#ifdef CONFIG_SMP
1003typedef struct { 1011struct migration_req {
1004 struct list_head list; 1012 struct list_head list;
1005 1013
1006 task_t *task; 1014 struct task_struct *task;
1007 int dest_cpu; 1015 int dest_cpu;
1008 1016
1009 struct completion done; 1017 struct completion done;
1010} migration_req_t; 1018};
1011 1019
1012/* 1020/*
1013 * The task's runqueue lock must be held. 1021 * The task's runqueue lock must be held.
1014 * Returns true if you have to wait for migration thread. 1022 * Returns true if you have to wait for migration thread.
1015 */ 1023 */
1016static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) 1024static int
1025migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
1017{ 1026{
1018 runqueue_t *rq = task_rq(p); 1027 struct rq *rq = task_rq(p);
1019 1028
1020 /* 1029 /*
1021 * If the task is not on a runqueue (and not running), then 1030 * If the task is not on a runqueue (and not running), then
@@ -1030,6 +1039,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1030 req->task = p; 1039 req->task = p;
1031 req->dest_cpu = dest_cpu; 1040 req->dest_cpu = dest_cpu;
1032 list_add(&req->list, &rq->migration_queue); 1041 list_add(&req->list, &rq->migration_queue);
1042
1033 return 1; 1043 return 1;
1034} 1044}
1035 1045
@@ -1042,10 +1052,10 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1042 * smp_call_function() if an IPI is sent by the same process we are 1052 * smp_call_function() if an IPI is sent by the same process we are
1043 * waiting to become inactive. 1053 * waiting to become inactive.
1044 */ 1054 */
1045void wait_task_inactive(task_t *p) 1055void wait_task_inactive(struct task_struct *p)
1046{ 1056{
1047 unsigned long flags; 1057 unsigned long flags;
1048 runqueue_t *rq; 1058 struct rq *rq;
1049 int preempted; 1059 int preempted;
1050 1060
1051repeat: 1061repeat:
@@ -1076,7 +1086,7 @@ repeat:
1076 * to another CPU then no harm is done and the purpose has been 1086 * to another CPU then no harm is done and the purpose has been
1077 * achieved as well. 1087 * achieved as well.
1078 */ 1088 */
1079void kick_process(task_t *p) 1089void kick_process(struct task_struct *p)
1080{ 1090{
1081 int cpu; 1091 int cpu;
1082 1092
@@ -1096,7 +1106,7 @@ void kick_process(task_t *p)
1096 */ 1106 */
1097static inline unsigned long source_load(int cpu, int type) 1107static inline unsigned long source_load(int cpu, int type)
1098{ 1108{
1099 runqueue_t *rq = cpu_rq(cpu); 1109 struct rq *rq = cpu_rq(cpu);
1100 1110
1101 if (type == 0) 1111 if (type == 0)
1102 return rq->raw_weighted_load; 1112 return rq->raw_weighted_load;
@@ -1110,7 +1120,7 @@ static inline unsigned long source_load(int cpu, int type)
1110 */ 1120 */
1111static inline unsigned long target_load(int cpu, int type) 1121static inline unsigned long target_load(int cpu, int type)
1112{ 1122{
1113 runqueue_t *rq = cpu_rq(cpu); 1123 struct rq *rq = cpu_rq(cpu);
1114 1124
1115 if (type == 0) 1125 if (type == 0)
1116 return rq->raw_weighted_load; 1126 return rq->raw_weighted_load;
@@ -1123,10 +1133,10 @@ static inline unsigned long target_load(int cpu, int type)
1123 */ 1133 */
1124static inline unsigned long cpu_avg_load_per_task(int cpu) 1134static inline unsigned long cpu_avg_load_per_task(int cpu)
1125{ 1135{
1126 runqueue_t *rq = cpu_rq(cpu); 1136 struct rq *rq = cpu_rq(cpu);
1127 unsigned long n = rq->nr_running; 1137 unsigned long n = rq->nr_running;
1128 1138
1129 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; 1139 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
1130} 1140}
1131 1141
1132/* 1142/*
@@ -1279,7 +1289,7 @@ nextlevel:
1279 * Returns the CPU we should wake onto. 1289 * Returns the CPU we should wake onto.
1280 */ 1290 */
1281#if defined(ARCH_HAS_SCHED_WAKE_IDLE) 1291#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1282static int wake_idle(int cpu, task_t *p) 1292static int wake_idle(int cpu, struct task_struct *p)
1283{ 1293{
1284 cpumask_t tmp; 1294 cpumask_t tmp;
1285 struct sched_domain *sd; 1295 struct sched_domain *sd;
@@ -1302,7 +1312,7 @@ static int wake_idle(int cpu, task_t *p)
1302 return cpu; 1312 return cpu;
1303} 1313}
1304#else 1314#else
1305static inline int wake_idle(int cpu, task_t *p) 1315static inline int wake_idle(int cpu, struct task_struct *p)
1306{ 1316{
1307 return cpu; 1317 return cpu;
1308} 1318}
@@ -1322,15 +1332,15 @@ static inline int wake_idle(int cpu, task_t *p)
1322 * 1332 *
1323 * returns failure only if the task is already active. 1333 * returns failure only if the task is already active.
1324 */ 1334 */
1325static int try_to_wake_up(task_t *p, unsigned int state, int sync) 1335static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1326{ 1336{
1327 int cpu, this_cpu, success = 0; 1337 int cpu, this_cpu, success = 0;
1328 unsigned long flags; 1338 unsigned long flags;
1329 long old_state; 1339 long old_state;
1330 runqueue_t *rq; 1340 struct rq *rq;
1331#ifdef CONFIG_SMP 1341#ifdef CONFIG_SMP
1332 unsigned long load, this_load;
1333 struct sched_domain *sd, *this_sd = NULL; 1342 struct sched_domain *sd, *this_sd = NULL;
1343 unsigned long load, this_load;
1334 int new_cpu; 1344 int new_cpu;
1335#endif 1345#endif
1336 1346
@@ -1480,15 +1490,14 @@ out:
1480 return success; 1490 return success;
1481} 1491}
1482 1492
1483int fastcall wake_up_process(task_t *p) 1493int fastcall wake_up_process(struct task_struct *p)
1484{ 1494{
1485 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | 1495 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
1486 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); 1496 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
1487} 1497}
1488
1489EXPORT_SYMBOL(wake_up_process); 1498EXPORT_SYMBOL(wake_up_process);
1490 1499
1491int fastcall wake_up_state(task_t *p, unsigned int state) 1500int fastcall wake_up_state(struct task_struct *p, unsigned int state)
1492{ 1501{
1493 return try_to_wake_up(p, state, 0); 1502 return try_to_wake_up(p, state, 0);
1494} 1503}
@@ -1497,7 +1506,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
1497 * Perform scheduler related setup for a newly forked process p. 1506 * Perform scheduler related setup for a newly forked process p.
1498 * p is forked by current. 1507 * p is forked by current.
1499 */ 1508 */
1500void fastcall sched_fork(task_t *p, int clone_flags) 1509void fastcall sched_fork(struct task_struct *p, int clone_flags)
1501{ 1510{
1502 int cpu = get_cpu(); 1511 int cpu = get_cpu();
1503 1512
@@ -1565,11 +1574,11 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1565 * that must be done for every newly created context, then puts the task 1574 * that must be done for every newly created context, then puts the task
1566 * on the runqueue and wakes it. 1575 * on the runqueue and wakes it.
1567 */ 1576 */
1568void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) 1577void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1569{ 1578{
1579 struct rq *rq, *this_rq;
1570 unsigned long flags; 1580 unsigned long flags;
1571 int this_cpu, cpu; 1581 int this_cpu, cpu;
1572 runqueue_t *rq, *this_rq;
1573 1582
1574 rq = task_rq_lock(p, &flags); 1583 rq = task_rq_lock(p, &flags);
1575 BUG_ON(p->state != TASK_RUNNING); 1584 BUG_ON(p->state != TASK_RUNNING);
@@ -1649,10 +1658,10 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1649 * artificially, because any timeslice recovered here 1658 * artificially, because any timeslice recovered here
1650 * was given away by the parent in the first place.) 1659 * was given away by the parent in the first place.)
1651 */ 1660 */
1652void fastcall sched_exit(task_t *p) 1661void fastcall sched_exit(struct task_struct *p)
1653{ 1662{
1654 unsigned long flags; 1663 unsigned long flags;
1655 runqueue_t *rq; 1664 struct rq *rq;
1656 1665
1657 /* 1666 /*
1658 * If the child was a (relative-) CPU hog then decrease 1667 * If the child was a (relative-) CPU hog then decrease
@@ -1683,7 +1692,7 @@ void fastcall sched_exit(task_t *p)
1683 * prepare_task_switch sets up locking and calls architecture specific 1692 * prepare_task_switch sets up locking and calls architecture specific
1684 * hooks. 1693 * hooks.
1685 */ 1694 */
1686static inline void prepare_task_switch(runqueue_t *rq, task_t *next) 1695static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
1687{ 1696{
1688 prepare_lock_switch(rq, next); 1697 prepare_lock_switch(rq, next);
1689 prepare_arch_switch(next); 1698 prepare_arch_switch(next);
@@ -1704,7 +1713,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
1704 * with the lock held can cause deadlocks; see schedule() for 1713 * with the lock held can cause deadlocks; see schedule() for
1705 * details.) 1714 * details.)
1706 */ 1715 */
1707static inline void finish_task_switch(runqueue_t *rq, task_t *prev) 1716static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
1708 __releases(rq->lock) 1717 __releases(rq->lock)
1709{ 1718{
1710 struct mm_struct *mm = rq->prev_mm; 1719 struct mm_struct *mm = rq->prev_mm;
@@ -1742,10 +1751,11 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
1742 * schedule_tail - first thing a freshly forked thread must call. 1751 * schedule_tail - first thing a freshly forked thread must call.
1743 * @prev: the thread we just switched away from. 1752 * @prev: the thread we just switched away from.
1744 */ 1753 */
1745asmlinkage void schedule_tail(task_t *prev) 1754asmlinkage void schedule_tail(struct task_struct *prev)
1746 __releases(rq->lock) 1755 __releases(rq->lock)
1747{ 1756{
1748 runqueue_t *rq = this_rq(); 1757 struct rq *rq = this_rq();
1758
1749 finish_task_switch(rq, prev); 1759 finish_task_switch(rq, prev);
1750#ifdef __ARCH_WANT_UNLOCKED_CTXSW 1760#ifdef __ARCH_WANT_UNLOCKED_CTXSW
1751 /* In this case, finish_task_switch does not reenable preemption */ 1761 /* In this case, finish_task_switch does not reenable preemption */
@@ -1759,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev)
1759 * context_switch - switch to the new MM and the new 1769 * context_switch - switch to the new MM and the new
1760 * thread's register state. 1770 * thread's register state.
1761 */ 1771 */
1762static inline 1772static inline struct task_struct *
1763task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) 1773context_switch(struct rq *rq, struct task_struct *prev,
1774 struct task_struct *next)
1764{ 1775{
1765 struct mm_struct *mm = next->mm; 1776 struct mm_struct *mm = next->mm;
1766 struct mm_struct *oldmm = prev->active_mm; 1777 struct mm_struct *oldmm = prev->active_mm;
@@ -1777,6 +1788,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
1777 WARN_ON(rq->prev_mm); 1788 WARN_ON(rq->prev_mm);
1778 rq->prev_mm = oldmm; 1789 rq->prev_mm = oldmm;
1779 } 1790 }
1791 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
1780 1792
1781 /* Here we just switch the register state and the stack. */ 1793 /* Here we just switch the register state and the stack. */
1782 switch_to(prev, next, prev); 1794 switch_to(prev, next, prev);
@@ -1857,12 +1869,21 @@ unsigned long nr_active(void)
1857#ifdef CONFIG_SMP 1869#ifdef CONFIG_SMP
1858 1870
1859/* 1871/*
1872 * Is this task likely cache-hot:
1873 */
1874static inline int
1875task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
1876{
1877 return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
1878}
1879
1880/*
1860 * double_rq_lock - safely lock two runqueues 1881 * double_rq_lock - safely lock two runqueues
1861 * 1882 *
1862 * Note this does not disable interrupts like task_rq_lock, 1883 * Note this does not disable interrupts like task_rq_lock,
1863 * you need to do so manually before calling. 1884 * you need to do so manually before calling.
1864 */ 1885 */
1865static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) 1886static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1866 __acquires(rq1->lock) 1887 __acquires(rq1->lock)
1867 __acquires(rq2->lock) 1888 __acquires(rq2->lock)
1868{ 1889{
@@ -1886,7 +1907,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1886 * Note this does not restore interrupts like task_rq_unlock, 1907 * Note this does not restore interrupts like task_rq_unlock,
1887 * you need to do so manually after calling. 1908 * you need to do so manually after calling.
1888 */ 1909 */
1889static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) 1910static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1890 __releases(rq1->lock) 1911 __releases(rq1->lock)
1891 __releases(rq2->lock) 1912 __releases(rq2->lock)
1892{ 1913{
@@ -1900,7 +1921,7 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
1900/* 1921/*
1901 * double_lock_balance - lock the busiest runqueue, this_rq is locked already. 1922 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
1902 */ 1923 */
1903static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) 1924static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
1904 __releases(this_rq->lock) 1925 __releases(this_rq->lock)
1905 __acquires(busiest->lock) 1926 __acquires(busiest->lock)
1906 __acquires(this_rq->lock) 1927 __acquires(this_rq->lock)
@@ -1921,11 +1942,11 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
1921 * allow dest_cpu, which will force the cpu onto dest_cpu. Then 1942 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
1922 * the cpu_allowed mask is restored. 1943 * the cpu_allowed mask is restored.
1923 */ 1944 */
1924static void sched_migrate_task(task_t *p, int dest_cpu) 1945static void sched_migrate_task(struct task_struct *p, int dest_cpu)
1925{ 1946{
1926 migration_req_t req; 1947 struct migration_req req;
1927 runqueue_t *rq;
1928 unsigned long flags; 1948 unsigned long flags;
1949 struct rq *rq;
1929 1950
1930 rq = task_rq_lock(p, &flags); 1951 rq = task_rq_lock(p, &flags);
1931 if (!cpu_isset(dest_cpu, p->cpus_allowed) 1952 if (!cpu_isset(dest_cpu, p->cpus_allowed)
@@ -1936,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu)
1936 if (migrate_task(p, dest_cpu, &req)) { 1957 if (migrate_task(p, dest_cpu, &req)) {
1937 /* Need to wait for migration thread (might exit: take ref). */ 1958 /* Need to wait for migration thread (might exit: take ref). */
1938 struct task_struct *mt = rq->migration_thread; 1959 struct task_struct *mt = rq->migration_thread;
1960
1939 get_task_struct(mt); 1961 get_task_struct(mt);
1940 task_rq_unlock(rq, &flags); 1962 task_rq_unlock(rq, &flags);
1941 wake_up_process(mt); 1963 wake_up_process(mt);
1942 put_task_struct(mt); 1964 put_task_struct(mt);
1943 wait_for_completion(&req.done); 1965 wait_for_completion(&req.done);
1966
1944 return; 1967 return;
1945 } 1968 }
1946out: 1969out:
@@ -1964,9 +1987,9 @@ void sched_exec(void)
1964 * pull_task - move a task from a remote runqueue to the local runqueue. 1987 * pull_task - move a task from a remote runqueue to the local runqueue.
1965 * Both runqueues must be locked. 1988 * Both runqueues must be locked.
1966 */ 1989 */
1967static 1990static void pull_task(struct rq *src_rq, struct prio_array *src_array,
1968void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, 1991 struct task_struct *p, struct rq *this_rq,
1969 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1992 struct prio_array *this_array, int this_cpu)
1970{ 1993{
1971 dequeue_task(p, src_array); 1994 dequeue_task(p, src_array);
1972 dec_nr_running(p, src_rq); 1995 dec_nr_running(p, src_rq);
@@ -1987,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1987 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? 2010 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
1988 */ 2011 */
1989static 2012static
1990int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, 2013int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
1991 struct sched_domain *sd, enum idle_type idle, 2014 struct sched_domain *sd, enum idle_type idle,
1992 int *all_pinned) 2015 int *all_pinned)
1993{ 2016{
@@ -2019,6 +2042,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2019} 2042}
2020 2043
2021#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) 2044#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio)
2045
2022/* 2046/*
2023 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2047 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
2024 * load from busiest to this_rq, as part of a balancing operation within 2048 * load from busiest to this_rq, as part of a balancing operation within
@@ -2026,18 +2050,17 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2026 * 2050 *
2027 * Called with both runqueues locked. 2051 * Called with both runqueues locked.
2028 */ 2052 */
2029static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, 2053static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2030 unsigned long max_nr_move, unsigned long max_load_move, 2054 unsigned long max_nr_move, unsigned long max_load_move,
2031 struct sched_domain *sd, enum idle_type idle, 2055 struct sched_domain *sd, enum idle_type idle,
2032 int *all_pinned) 2056 int *all_pinned)
2033{ 2057{
2034 prio_array_t *array, *dst_array; 2058 int idx, pulled = 0, pinned = 0, this_best_prio, best_prio,
2059 best_prio_seen, skip_for_load;
2060 struct prio_array *array, *dst_array;
2035 struct list_head *head, *curr; 2061 struct list_head *head, *curr;
2036 int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio; 2062 struct task_struct *tmp;
2037 int busiest_best_prio_seen;
2038 int skip_for_load; /* skip the task based on weighted load issues */
2039 long rem_load_move; 2063 long rem_load_move;
2040 task_t *tmp;
2041 2064
2042 if (max_nr_move == 0 || max_load_move == 0) 2065 if (max_nr_move == 0 || max_load_move == 0)
2043 goto out; 2066 goto out;
@@ -2045,15 +2068,15 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
2045 rem_load_move = max_load_move; 2068 rem_load_move = max_load_move;
2046 pinned = 1; 2069 pinned = 1;
2047 this_best_prio = rq_best_prio(this_rq); 2070 this_best_prio = rq_best_prio(this_rq);
2048 busiest_best_prio = rq_best_prio(busiest); 2071 best_prio = rq_best_prio(busiest);
2049 /* 2072 /*
2050 * Enable handling of the case where there is more than one task 2073 * Enable handling of the case where there is more than one task
2051 * with the best priority. If the current running task is one 2074 * with the best priority. If the current running task is one
2052 * of those with prio==busiest_best_prio we know it won't be moved 2075 * of those with prio==best_prio we know it won't be moved
2053 * and therefore it's safe to override the skip (based on load) of 2076 * and therefore it's safe to override the skip (based on load) of
2054 * any task we find with that prio. 2077 * any task we find with that prio.
2055 */ 2078 */
2056 busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio; 2079 best_prio_seen = best_prio == busiest->curr->prio;
2057 2080
2058 /* 2081 /*
2059 * We first consider expired tasks. Those will likely not be 2082 * We first consider expired tasks. Those will likely not be
@@ -2089,7 +2112,7 @@ skip_bitmap:
2089 head = array->queue + idx; 2112 head = array->queue + idx;
2090 curr = head->prev; 2113 curr = head->prev;
2091skip_queue: 2114skip_queue:
2092 tmp = list_entry(curr, task_t, run_list); 2115 tmp = list_entry(curr, struct task_struct, run_list);
2093 2116
2094 curr = curr->prev; 2117 curr = curr->prev;
2095 2118
@@ -2100,10 +2123,11 @@ skip_queue:
2100 */ 2123 */
2101 skip_for_load = tmp->load_weight > rem_load_move; 2124 skip_for_load = tmp->load_weight > rem_load_move;
2102 if (skip_for_load && idx < this_best_prio) 2125 if (skip_for_load && idx < this_best_prio)
2103 skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio; 2126 skip_for_load = !best_prio_seen && idx == best_prio;
2104 if (skip_for_load || 2127 if (skip_for_load ||
2105 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { 2128 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
2106 busiest_best_prio_seen |= idx == busiest_best_prio; 2129
2130 best_prio_seen |= idx == best_prio;
2107 if (curr != head) 2131 if (curr != head)
2108 goto skip_queue; 2132 goto skip_queue;
2109 idx++; 2133 idx++;
@@ -2146,8 +2170,8 @@ out:
2146 2170
2147/* 2171/*
2148 * find_busiest_group finds and returns the busiest CPU group within the 2172 * find_busiest_group finds and returns the busiest CPU group within the
2149 * domain. It calculates and returns the amount of weighted load which should be 2173 * domain. It calculates and returns the amount of weighted load which
2150 * moved to restore balance via the imbalance parameter. 2174 * should be moved to restore balance via the imbalance parameter.
2151 */ 2175 */
2152static struct sched_group * 2176static struct sched_group *
2153find_busiest_group(struct sched_domain *sd, int this_cpu, 2177find_busiest_group(struct sched_domain *sd, int this_cpu,
@@ -2188,7 +2212,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2188 sum_weighted_load = sum_nr_running = avg_load = 0; 2212 sum_weighted_load = sum_nr_running = avg_load = 0;
2189 2213
2190 for_each_cpu_mask(i, group->cpumask) { 2214 for_each_cpu_mask(i, group->cpumask) {
2191 runqueue_t *rq = cpu_rq(i); 2215 struct rq *rq = cpu_rq(i);
2192 2216
2193 if (*sd_idle && !idle_cpu(i)) 2217 if (*sd_idle && !idle_cpu(i))
2194 *sd_idle = 0; 2218 *sd_idle = 0;
@@ -2269,7 +2293,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2269 * capacity but still has some space to pick up some load 2293 * capacity but still has some space to pick up some load
2270 * from other group and save more power 2294 * from other group and save more power
2271 */ 2295 */
2272 if (sum_nr_running <= group_capacity - 1) 2296 if (sum_nr_running <= group_capacity - 1) {
2273 if (sum_nr_running > leader_nr_running || 2297 if (sum_nr_running > leader_nr_running ||
2274 (sum_nr_running == leader_nr_running && 2298 (sum_nr_running == leader_nr_running &&
2275 first_cpu(group->cpumask) > 2299 first_cpu(group->cpumask) >
@@ -2277,7 +2301,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2277 group_leader = group; 2301 group_leader = group;
2278 leader_nr_running = sum_nr_running; 2302 leader_nr_running = sum_nr_running;
2279 } 2303 }
2280 2304 }
2281group_next: 2305group_next:
2282#endif 2306#endif
2283 group = group->next; 2307 group = group->next;
@@ -2332,8 +2356,7 @@ group_next:
2332 * moved 2356 * moved
2333 */ 2357 */
2334 if (*imbalance < busiest_load_per_task) { 2358 if (*imbalance < busiest_load_per_task) {
2335 unsigned long pwr_now, pwr_move; 2359 unsigned long tmp, pwr_now, pwr_move;
2336 unsigned long tmp;
2337 unsigned int imbn; 2360 unsigned int imbn;
2338 2361
2339small_imbalance: 2362small_imbalance:
@@ -2405,22 +2428,23 @@ ret:
2405/* 2428/*
2406 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2429 * find_busiest_queue - find the busiest runqueue among the cpus in group.
2407 */ 2430 */
2408static runqueue_t *find_busiest_queue(struct sched_group *group, 2431static struct rq *
2409 enum idle_type idle, unsigned long imbalance) 2432find_busiest_queue(struct sched_group *group, enum idle_type idle,
2433 unsigned long imbalance)
2410{ 2434{
2435 struct rq *busiest = NULL, *rq;
2411 unsigned long max_load = 0; 2436 unsigned long max_load = 0;
2412 runqueue_t *busiest = NULL, *rqi;
2413 int i; 2437 int i;
2414 2438
2415 for_each_cpu_mask(i, group->cpumask) { 2439 for_each_cpu_mask(i, group->cpumask) {
2416 rqi = cpu_rq(i); 2440 rq = cpu_rq(i);
2417 2441
2418 if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance) 2442 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
2419 continue; 2443 continue;
2420 2444
2421 if (rqi->raw_weighted_load > max_load) { 2445 if (rq->raw_weighted_load > max_load) {
2422 max_load = rqi->raw_weighted_load; 2446 max_load = rq->raw_weighted_load;
2423 busiest = rqi; 2447 busiest = rq;
2424 } 2448 }
2425 } 2449 }
2426 2450
@@ -2433,22 +2457,24 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2433 */ 2457 */
2434#define MAX_PINNED_INTERVAL 512 2458#define MAX_PINNED_INTERVAL 512
2435 2459
2436#define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0) 2460static inline unsigned long minus_1_or_zero(unsigned long n)
2461{
2462 return n > 0 ? n - 1 : 0;
2463}
2464
2437/* 2465/*
2438 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2466 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2439 * tasks if there is an imbalance. 2467 * tasks if there is an imbalance.
2440 * 2468 *
2441 * Called with this_rq unlocked. 2469 * Called with this_rq unlocked.
2442 */ 2470 */
2443static int load_balance(int this_cpu, runqueue_t *this_rq, 2471static int load_balance(int this_cpu, struct rq *this_rq,
2444 struct sched_domain *sd, enum idle_type idle) 2472 struct sched_domain *sd, enum idle_type idle)
2445{ 2473{
2474 int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
2446 struct sched_group *group; 2475 struct sched_group *group;
2447 runqueue_t *busiest;
2448 unsigned long imbalance; 2476 unsigned long imbalance;
2449 int nr_moved, all_pinned = 0; 2477 struct rq *busiest;
2450 int active_balance = 0;
2451 int sd_idle = 0;
2452 2478
2453 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2479 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
2454 !sched_smt_power_savings) 2480 !sched_smt_power_savings)
@@ -2482,8 +2508,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2482 */ 2508 */
2483 double_rq_lock(this_rq, busiest); 2509 double_rq_lock(this_rq, busiest);
2484 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2510 nr_moved = move_tasks(this_rq, this_cpu, busiest,
2485 minus_1_or_zero(busiest->nr_running), 2511 minus_1_or_zero(busiest->nr_running),
2486 imbalance, sd, idle, &all_pinned); 2512 imbalance, sd, idle, &all_pinned);
2487 double_rq_unlock(this_rq, busiest); 2513 double_rq_unlock(this_rq, busiest);
2488 2514
2489 /* All tasks on this runqueue were pinned by CPU affinity */ 2515 /* All tasks on this runqueue were pinned by CPU affinity */
@@ -2556,7 +2582,8 @@ out_one_pinned:
2556 (sd->balance_interval < sd->max_interval)) 2582 (sd->balance_interval < sd->max_interval))
2557 sd->balance_interval *= 2; 2583 sd->balance_interval *= 2;
2558 2584
2559 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2585 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2586 !sched_smt_power_savings)
2560 return -1; 2587 return -1;
2561 return 0; 2588 return 0;
2562} 2589}
@@ -2568,11 +2595,11 @@ out_one_pinned:
2568 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). 2595 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
2569 * this_rq is locked. 2596 * this_rq is locked.
2570 */ 2597 */
2571static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, 2598static int
2572 struct sched_domain *sd) 2599load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2573{ 2600{
2574 struct sched_group *group; 2601 struct sched_group *group;
2575 runqueue_t *busiest = NULL; 2602 struct rq *busiest = NULL;
2576 unsigned long imbalance; 2603 unsigned long imbalance;
2577 int nr_moved = 0; 2604 int nr_moved = 0;
2578 int sd_idle = 0; 2605 int sd_idle = 0;
@@ -2618,9 +2645,11 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2618 2645
2619out_balanced: 2646out_balanced:
2620 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2647 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
2621 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2648 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2649 !sched_smt_power_savings)
2622 return -1; 2650 return -1;
2623 sd->nr_balance_failed = 0; 2651 sd->nr_balance_failed = 0;
2652
2624 return 0; 2653 return 0;
2625} 2654}
2626 2655
@@ -2628,16 +2657,15 @@ out_balanced:
2628 * idle_balance is called by schedule() if this_cpu is about to become 2657 * idle_balance is called by schedule() if this_cpu is about to become
2629 * idle. Attempts to pull tasks from other CPUs. 2658 * idle. Attempts to pull tasks from other CPUs.
2630 */ 2659 */
2631static void idle_balance(int this_cpu, runqueue_t *this_rq) 2660static void idle_balance(int this_cpu, struct rq *this_rq)
2632{ 2661{
2633 struct sched_domain *sd; 2662 struct sched_domain *sd;
2634 2663
2635 for_each_domain(this_cpu, sd) { 2664 for_each_domain(this_cpu, sd) {
2636 if (sd->flags & SD_BALANCE_NEWIDLE) { 2665 if (sd->flags & SD_BALANCE_NEWIDLE) {
2637 if (load_balance_newidle(this_cpu, this_rq, sd)) { 2666 /* If we've pulled tasks over stop searching: */
2638 /* We've pulled tasks over so stop searching */ 2667 if (load_balance_newidle(this_cpu, this_rq, sd))
2639 break; 2668 break;
2640 }
2641 } 2669 }
2642 } 2670 }
2643} 2671}
@@ -2650,14 +2678,14 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq)
2650 * 2678 *
2651 * Called with busiest_rq locked. 2679 * Called with busiest_rq locked.
2652 */ 2680 */
2653static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) 2681static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2654{ 2682{
2655 struct sched_domain *sd;
2656 runqueue_t *target_rq;
2657 int target_cpu = busiest_rq->push_cpu; 2683 int target_cpu = busiest_rq->push_cpu;
2684 struct sched_domain *sd;
2685 struct rq *target_rq;
2658 2686
2687 /* Is there any task to move? */
2659 if (busiest_rq->nr_running <= 1) 2688 if (busiest_rq->nr_running <= 1)
2660 /* no task to move */
2661 return; 2689 return;
2662 2690
2663 target_rq = cpu_rq(target_cpu); 2691 target_rq = cpu_rq(target_cpu);
@@ -2675,21 +2703,20 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
2675 /* Search for an sd spanning us and the target CPU. */ 2703 /* Search for an sd spanning us and the target CPU. */
2676 for_each_domain(target_cpu, sd) { 2704 for_each_domain(target_cpu, sd) {
2677 if ((sd->flags & SD_LOAD_BALANCE) && 2705 if ((sd->flags & SD_LOAD_BALANCE) &&
2678 cpu_isset(busiest_cpu, sd->span)) 2706 cpu_isset(busiest_cpu, sd->span))
2679 break; 2707 break;
2680 } 2708 }
2681 2709
2682 if (unlikely(sd == NULL)) 2710 if (likely(sd)) {
2683 goto out; 2711 schedstat_inc(sd, alb_cnt);
2684
2685 schedstat_inc(sd, alb_cnt);
2686 2712
2687 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2713 if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
2688 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL)) 2714 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE,
2689 schedstat_inc(sd, alb_pushed); 2715 NULL))
2690 else 2716 schedstat_inc(sd, alb_pushed);
2691 schedstat_inc(sd, alb_failed); 2717 else
2692out: 2718 schedstat_inc(sd, alb_failed);
2719 }
2693 spin_unlock(&target_rq->lock); 2720 spin_unlock(&target_rq->lock);
2694} 2721}
2695 2722
@@ -2702,23 +2729,27 @@ out:
2702 * Balancing parameters are set up in arch_init_sched_domains. 2729 * Balancing parameters are set up in arch_init_sched_domains.
2703 */ 2730 */
2704 2731
2705/* Don't have all balancing operations going off at once */ 2732/* Don't have all balancing operations going off at once: */
2706#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) 2733static inline unsigned long cpu_offset(int cpu)
2734{
2735 return jiffies + cpu * HZ / NR_CPUS;
2736}
2707 2737
2708static void rebalance_tick(int this_cpu, runqueue_t *this_rq, 2738static void
2709 enum idle_type idle) 2739rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
2710{ 2740{
2711 unsigned long old_load, this_load; 2741 unsigned long this_load, interval, j = cpu_offset(this_cpu);
2712 unsigned long j = jiffies + CPU_OFFSET(this_cpu);
2713 struct sched_domain *sd; 2742 struct sched_domain *sd;
2714 int i; 2743 int i, scale;
2715 2744
2716 this_load = this_rq->raw_weighted_load; 2745 this_load = this_rq->raw_weighted_load;
2717 /* Update our load */ 2746
2718 for (i = 0; i < 3; i++) { 2747 /* Update our load: */
2719 unsigned long new_load = this_load; 2748 for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
2720 int scale = 1 << i; 2749 unsigned long old_load, new_load;
2750
2721 old_load = this_rq->cpu_load[i]; 2751 old_load = this_rq->cpu_load[i];
2752 new_load = this_load;
2722 /* 2753 /*
2723 * Round up the averaging division if load is increasing. This 2754 * Round up the averaging division if load is increasing. This
2724 * prevents us from getting stuck on 9 if the load is 10, for 2755 * prevents us from getting stuck on 9 if the load is 10, for
@@ -2730,8 +2761,6 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2730 } 2761 }
2731 2762
2732 for_each_domain(this_cpu, sd) { 2763 for_each_domain(this_cpu, sd) {
2733 unsigned long interval;
2734
2735 if (!(sd->flags & SD_LOAD_BALANCE)) 2764 if (!(sd->flags & SD_LOAD_BALANCE))
2736 continue; 2765 continue;
2737 2766
@@ -2761,17 +2790,18 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2761/* 2790/*
2762 * on UP we do not need to balance between CPUs: 2791 * on UP we do not need to balance between CPUs:
2763 */ 2792 */
2764static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) 2793static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
2765{ 2794{
2766} 2795}
2767static inline void idle_balance(int cpu, runqueue_t *rq) 2796static inline void idle_balance(int cpu, struct rq *rq)
2768{ 2797{
2769} 2798}
2770#endif 2799#endif
2771 2800
2772static inline int wake_priority_sleeper(runqueue_t *rq) 2801static inline int wake_priority_sleeper(struct rq *rq)
2773{ 2802{
2774 int ret = 0; 2803 int ret = 0;
2804
2775#ifdef CONFIG_SCHED_SMT 2805#ifdef CONFIG_SCHED_SMT
2776 spin_lock(&rq->lock); 2806 spin_lock(&rq->lock);
2777 /* 2807 /*
@@ -2795,25 +2825,26 @@ EXPORT_PER_CPU_SYMBOL(kstat);
2795 * This is called on clock ticks and on context switches. 2825 * This is called on clock ticks and on context switches.
2796 * Bank in p->sched_time the ns elapsed since the last tick or switch. 2826 * Bank in p->sched_time the ns elapsed since the last tick or switch.
2797 */ 2827 */
2798static inline void update_cpu_clock(task_t *p, runqueue_t *rq, 2828static inline void
2799 unsigned long long now) 2829update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
2800{ 2830{
2801 unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); 2831 p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
2802 p->sched_time += now - last;
2803} 2832}
2804 2833
2805/* 2834/*
2806 * Return current->sched_time plus any more ns on the sched_clock 2835 * Return current->sched_time plus any more ns on the sched_clock
2807 * that have not yet been banked. 2836 * that have not yet been banked.
2808 */ 2837 */
2809unsigned long long current_sched_time(const task_t *tsk) 2838unsigned long long current_sched_time(const struct task_struct *p)
2810{ 2839{
2811 unsigned long long ns; 2840 unsigned long long ns;
2812 unsigned long flags; 2841 unsigned long flags;
2842
2813 local_irq_save(flags); 2843 local_irq_save(flags);
2814 ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); 2844 ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
2815 ns = tsk->sched_time + (sched_clock() - ns); 2845 ns = p->sched_time + sched_clock() - ns;
2816 local_irq_restore(flags); 2846 local_irq_restore(flags);
2847
2817 return ns; 2848 return ns;
2818} 2849}
2819 2850
@@ -2827,11 +2858,16 @@ unsigned long long current_sched_time(const task_t *tsk)
2827 * increasing number of running tasks. We also ignore the interactivity 2858 * increasing number of running tasks. We also ignore the interactivity
2828 * if a better static_prio task has expired: 2859 * if a better static_prio task has expired:
2829 */ 2860 */
2830#define EXPIRED_STARVING(rq) \ 2861static inline int expired_starving(struct rq *rq)
2831 ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ 2862{
2832 (jiffies - (rq)->expired_timestamp >= \ 2863 if (rq->curr->static_prio > rq->best_expired_prio)
2833 STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ 2864 return 1;
2834 ((rq)->curr->static_prio > (rq)->best_expired_prio)) 2865 if (!STARVATION_LIMIT || !rq->expired_timestamp)
2866 return 0;
2867 if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running)
2868 return 1;
2869 return 0;
2870}
2835 2871
2836/* 2872/*
2837 * Account user cpu time to a process. 2873 * Account user cpu time to a process.
@@ -2864,7 +2900,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
2864 cputime_t cputime) 2900 cputime_t cputime)
2865{ 2901{
2866 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2902 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2867 runqueue_t *rq = this_rq(); 2903 struct rq *rq = this_rq();
2868 cputime64_t tmp; 2904 cputime64_t tmp;
2869 2905
2870 p->stime = cputime_add(p->stime, cputime); 2906 p->stime = cputime_add(p->stime, cputime);
@@ -2894,7 +2930,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2894{ 2930{
2895 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2931 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2896 cputime64_t tmp = cputime_to_cputime64(steal); 2932 cputime64_t tmp = cputime_to_cputime64(steal);
2897 runqueue_t *rq = this_rq(); 2933 struct rq *rq = this_rq();
2898 2934
2899 if (p == rq->idle) { 2935 if (p == rq->idle) {
2900 p->stime = cputime_add(p->stime, steal); 2936 p->stime = cputime_add(p->stime, steal);
@@ -2915,10 +2951,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2915 */ 2951 */
2916void scheduler_tick(void) 2952void scheduler_tick(void)
2917{ 2953{
2918 int cpu = smp_processor_id();
2919 runqueue_t *rq = this_rq();
2920 task_t *p = current;
2921 unsigned long long now = sched_clock(); 2954 unsigned long long now = sched_clock();
2955 struct task_struct *p = current;
2956 int cpu = smp_processor_id();
2957 struct rq *rq = cpu_rq(cpu);
2922 2958
2923 update_cpu_clock(p, rq, now); 2959 update_cpu_clock(p, rq, now);
2924 2960
@@ -2968,7 +3004,7 @@ void scheduler_tick(void)
2968 3004
2969 if (!rq->expired_timestamp) 3005 if (!rq->expired_timestamp)
2970 rq->expired_timestamp = jiffies; 3006 rq->expired_timestamp = jiffies;
2971 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { 3007 if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
2972 enqueue_task(p, rq->expired); 3008 enqueue_task(p, rq->expired);
2973 if (p->static_prio < rq->best_expired_prio) 3009 if (p->static_prio < rq->best_expired_prio)
2974 rq->best_expired_prio = p->static_prio; 3010 rq->best_expired_prio = p->static_prio;
@@ -3007,7 +3043,7 @@ out:
3007} 3043}
3008 3044
3009#ifdef CONFIG_SCHED_SMT 3045#ifdef CONFIG_SCHED_SMT
3010static inline void wakeup_busy_runqueue(runqueue_t *rq) 3046static inline void wakeup_busy_runqueue(struct rq *rq)
3011{ 3047{
3012 /* If an SMT runqueue is sleeping due to priority reasons wake it up */ 3048 /* If an SMT runqueue is sleeping due to priority reasons wake it up */
3013 if (rq->curr == rq->idle && rq->nr_running) 3049 if (rq->curr == rq->idle && rq->nr_running)
@@ -3033,7 +3069,7 @@ static void wake_sleeping_dependent(int this_cpu)
3033 return; 3069 return;
3034 3070
3035 for_each_cpu_mask(i, sd->span) { 3071 for_each_cpu_mask(i, sd->span) {
3036 runqueue_t *smt_rq = cpu_rq(i); 3072 struct rq *smt_rq = cpu_rq(i);
3037 3073
3038 if (i == this_cpu) 3074 if (i == this_cpu)
3039 continue; 3075 continue;
@@ -3050,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu)
3050 * utilize, if another task runs on a sibling. This models the 3086 * utilize, if another task runs on a sibling. This models the
3051 * slowdown effect of other tasks running on siblings: 3087 * slowdown effect of other tasks running on siblings:
3052 */ 3088 */
3053static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) 3089static inline unsigned long
3090smt_slice(struct task_struct *p, struct sched_domain *sd)
3054{ 3091{
3055 return p->time_slice * (100 - sd->per_cpu_gain) / 100; 3092 return p->time_slice * (100 - sd->per_cpu_gain) / 100;
3056} 3093}
@@ -3061,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
3061 * acquire their lock. As we only trylock the normal locking order does not 3098 * acquire their lock. As we only trylock the normal locking order does not
3062 * need to be obeyed. 3099 * need to be obeyed.
3063 */ 3100 */
3064static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) 3101static int
3102dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3065{ 3103{
3066 struct sched_domain *tmp, *sd = NULL; 3104 struct sched_domain *tmp, *sd = NULL;
3067 int ret = 0, i; 3105 int ret = 0, i;
@@ -3081,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
3081 return 0; 3119 return 0;
3082 3120
3083 for_each_cpu_mask(i, sd->span) { 3121 for_each_cpu_mask(i, sd->span) {
3084 runqueue_t *smt_rq; 3122 struct task_struct *smt_curr;
3085 task_t *smt_curr; 3123 struct rq *smt_rq;
3086 3124
3087 if (i == this_cpu) 3125 if (i == this_cpu)
3088 continue; 3126 continue;
@@ -3127,9 +3165,8 @@ unlock:
3127static inline void wake_sleeping_dependent(int this_cpu) 3165static inline void wake_sleeping_dependent(int this_cpu)
3128{ 3166{
3129} 3167}
3130 3168static inline int
3131static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, 3169dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3132 task_t *p)
3133{ 3170{
3134 return 0; 3171 return 0;
3135} 3172}
@@ -3142,12 +3179,13 @@ void fastcall add_preempt_count(int val)
3142 /* 3179 /*
3143 * Underflow? 3180 * Underflow?
3144 */ 3181 */
3145 BUG_ON((preempt_count() < 0)); 3182 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
3183 return;
3146 preempt_count() += val; 3184 preempt_count() += val;
3147 /* 3185 /*
3148 * Spinlock count overflowing soon? 3186 * Spinlock count overflowing soon?
3149 */ 3187 */
3150 BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); 3188 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
3151} 3189}
3152EXPORT_SYMBOL(add_preempt_count); 3190EXPORT_SYMBOL(add_preempt_count);
3153 3191
@@ -3156,11 +3194,15 @@ void fastcall sub_preempt_count(int val)
3156 /* 3194 /*
3157 * Underflow? 3195 * Underflow?
3158 */ 3196 */
3159 BUG_ON(val > preempt_count()); 3197 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
3198 return;
3160 /* 3199 /*
3161 * Is the spinlock portion underflowing? 3200 * Is the spinlock portion underflowing?
3162 */ 3201 */
3163 BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); 3202 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
3203 !(preempt_count() & PREEMPT_MASK)))
3204 return;
3205
3164 preempt_count() -= val; 3206 preempt_count() -= val;
3165} 3207}
3166EXPORT_SYMBOL(sub_preempt_count); 3208EXPORT_SYMBOL(sub_preempt_count);
@@ -3178,14 +3220,14 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
3178 */ 3220 */
3179asmlinkage void __sched schedule(void) 3221asmlinkage void __sched schedule(void)
3180{ 3222{
3181 long *switch_count; 3223 struct task_struct *prev, *next;
3182 task_t *prev, *next; 3224 struct prio_array *array;
3183 runqueue_t *rq;
3184 prio_array_t *array;
3185 struct list_head *queue; 3225 struct list_head *queue;
3186 unsigned long long now; 3226 unsigned long long now;
3187 unsigned long run_time; 3227 unsigned long run_time;
3188 int cpu, idx, new_prio; 3228 int cpu, idx, new_prio;
3229 long *switch_count;
3230 struct rq *rq;
3189 3231
3190 /* 3232 /*
3191 * Test if we are atomic. Since do_exit() needs to call into 3233 * Test if we are atomic. Since do_exit() needs to call into
@@ -3275,7 +3317,7 @@ need_resched_nonpreemptible:
3275 3317
3276 idx = sched_find_first_bit(array->bitmap); 3318 idx = sched_find_first_bit(array->bitmap);
3277 queue = array->queue + idx; 3319 queue = array->queue + idx;
3278 next = list_entry(queue->next, task_t, run_list); 3320 next = list_entry(queue->next, struct task_struct, run_list);
3279 3321
3280 if (!rt_task(next) && interactive_sleep(next->sleep_type)) { 3322 if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
3281 unsigned long long delta = now - next->timestamp; 3323 unsigned long long delta = now - next->timestamp;
@@ -3338,7 +3380,6 @@ switch_tasks:
3338 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3380 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3339 goto need_resched; 3381 goto need_resched;
3340} 3382}
3341
3342EXPORT_SYMBOL(schedule); 3383EXPORT_SYMBOL(schedule);
3343 3384
3344#ifdef CONFIG_PREEMPT 3385#ifdef CONFIG_PREEMPT
@@ -3383,7 +3424,6 @@ need_resched:
3383 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3424 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3384 goto need_resched; 3425 goto need_resched;
3385} 3426}
3386
3387EXPORT_SYMBOL(preempt_schedule); 3427EXPORT_SYMBOL(preempt_schedule);
3388 3428
3389/* 3429/*
@@ -3432,10 +3472,8 @@ need_resched:
3432int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, 3472int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
3433 void *key) 3473 void *key)
3434{ 3474{
3435 task_t *p = curr->private; 3475 return try_to_wake_up(curr->private, mode, sync);
3436 return try_to_wake_up(p, mode, sync);
3437} 3476}
3438
3439EXPORT_SYMBOL(default_wake_function); 3477EXPORT_SYMBOL(default_wake_function);
3440 3478
3441/* 3479/*
@@ -3453,13 +3491,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3453 struct list_head *tmp, *next; 3491 struct list_head *tmp, *next;
3454 3492
3455 list_for_each_safe(tmp, next, &q->task_list) { 3493 list_for_each_safe(tmp, next, &q->task_list) {
3456 wait_queue_t *curr; 3494 wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
3457 unsigned flags; 3495 unsigned flags = curr->flags;
3458 curr = list_entry(tmp, wait_queue_t, task_list); 3496
3459 flags = curr->flags;
3460 if (curr->func(curr, mode, sync, key) && 3497 if (curr->func(curr, mode, sync, key) &&
3461 (flags & WQ_FLAG_EXCLUSIVE) && 3498 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
3462 !--nr_exclusive)
3463 break; 3499 break;
3464 } 3500 }
3465} 3501}
@@ -3480,7 +3516,6 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
3480 __wake_up_common(q, mode, nr_exclusive, 0, key); 3516 __wake_up_common(q, mode, nr_exclusive, 0, key);
3481 spin_unlock_irqrestore(&q->lock, flags); 3517 spin_unlock_irqrestore(&q->lock, flags);
3482} 3518}
3483
3484EXPORT_SYMBOL(__wake_up); 3519EXPORT_SYMBOL(__wake_up);
3485 3520
3486/* 3521/*
@@ -3549,6 +3584,7 @@ EXPORT_SYMBOL(complete_all);
3549void fastcall __sched wait_for_completion(struct completion *x) 3584void fastcall __sched wait_for_completion(struct completion *x)
3550{ 3585{
3551 might_sleep(); 3586 might_sleep();
3587
3552 spin_lock_irq(&x->wait.lock); 3588 spin_lock_irq(&x->wait.lock);
3553 if (!x->done) { 3589 if (!x->done) {
3554 DECLARE_WAITQUEUE(wait, current); 3590 DECLARE_WAITQUEUE(wait, current);
@@ -3693,7 +3729,6 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
3693 schedule(); 3729 schedule();
3694 SLEEP_ON_TAIL 3730 SLEEP_ON_TAIL
3695} 3731}
3696
3697EXPORT_SYMBOL(interruptible_sleep_on); 3732EXPORT_SYMBOL(interruptible_sleep_on);
3698 3733
3699long fastcall __sched 3734long fastcall __sched
@@ -3709,7 +3744,6 @@ interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
3709 3744
3710 return timeout; 3745 return timeout;
3711} 3746}
3712
3713EXPORT_SYMBOL(interruptible_sleep_on_timeout); 3747EXPORT_SYMBOL(interruptible_sleep_on_timeout);
3714 3748
3715void fastcall __sched sleep_on(wait_queue_head_t *q) 3749void fastcall __sched sleep_on(wait_queue_head_t *q)
@@ -3722,7 +3756,6 @@ void fastcall __sched sleep_on(wait_queue_head_t *q)
3722 schedule(); 3756 schedule();
3723 SLEEP_ON_TAIL 3757 SLEEP_ON_TAIL
3724} 3758}
3725
3726EXPORT_SYMBOL(sleep_on); 3759EXPORT_SYMBOL(sleep_on);
3727 3760
3728long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) 3761long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
@@ -3752,11 +3785,11 @@ EXPORT_SYMBOL(sleep_on_timeout);
3752 * 3785 *
3753 * Used by the rt_mutex code to implement priority inheritance logic. 3786 * Used by the rt_mutex code to implement priority inheritance logic.
3754 */ 3787 */
3755void rt_mutex_setprio(task_t *p, int prio) 3788void rt_mutex_setprio(struct task_struct *p, int prio)
3756{ 3789{
3790 struct prio_array *array;
3757 unsigned long flags; 3791 unsigned long flags;
3758 prio_array_t *array; 3792 struct rq *rq;
3759 runqueue_t *rq;
3760 int oldprio; 3793 int oldprio;
3761 3794
3762 BUG_ON(prio < 0 || prio > MAX_PRIO); 3795 BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -3793,12 +3826,12 @@ void rt_mutex_setprio(task_t *p, int prio)
3793 3826
3794#endif 3827#endif
3795 3828
3796void set_user_nice(task_t *p, long nice) 3829void set_user_nice(struct task_struct *p, long nice)
3797{ 3830{
3798 unsigned long flags; 3831 struct prio_array *array;
3799 prio_array_t *array;
3800 runqueue_t *rq;
3801 int old_prio, delta; 3832 int old_prio, delta;
3833 unsigned long flags;
3834 struct rq *rq;
3802 3835
3803 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 3836 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
3804 return; 3837 return;
@@ -3849,10 +3882,11 @@ EXPORT_SYMBOL(set_user_nice);
3849 * @p: task 3882 * @p: task
3850 * @nice: nice value 3883 * @nice: nice value
3851 */ 3884 */
3852int can_nice(const task_t *p, const int nice) 3885int can_nice(const struct task_struct *p, const int nice)
3853{ 3886{
3854 /* convert nice value [19,-20] to rlimit style value [1,40] */ 3887 /* convert nice value [19,-20] to rlimit style value [1,40] */
3855 int nice_rlim = 20 - nice; 3888 int nice_rlim = 20 - nice;
3889
3856 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || 3890 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
3857 capable(CAP_SYS_NICE)); 3891 capable(CAP_SYS_NICE));
3858} 3892}
@@ -3868,8 +3902,7 @@ int can_nice(const task_t *p, const int nice)
3868 */ 3902 */
3869asmlinkage long sys_nice(int increment) 3903asmlinkage long sys_nice(int increment)
3870{ 3904{
3871 int retval; 3905 long nice, retval;
3872 long nice;
3873 3906
3874 /* 3907 /*
3875 * Setpriority might change our priority at the same moment. 3908 * Setpriority might change our priority at the same moment.
@@ -3908,7 +3941,7 @@ asmlinkage long sys_nice(int increment)
3908 * RT tasks are offset by -200. Normal tasks are centered 3941 * RT tasks are offset by -200. Normal tasks are centered
3909 * around 0, value goes from -16 to +15. 3942 * around 0, value goes from -16 to +15.
3910 */ 3943 */
3911int task_prio(const task_t *p) 3944int task_prio(const struct task_struct *p)
3912{ 3945{
3913 return p->prio - MAX_RT_PRIO; 3946 return p->prio - MAX_RT_PRIO;
3914} 3947}
@@ -3917,7 +3950,7 @@ int task_prio(const task_t *p)
3917 * task_nice - return the nice value of a given task. 3950 * task_nice - return the nice value of a given task.
3918 * @p: the task in question. 3951 * @p: the task in question.
3919 */ 3952 */
3920int task_nice(const task_t *p) 3953int task_nice(const struct task_struct *p)
3921{ 3954{
3922 return TASK_NICE(p); 3955 return TASK_NICE(p);
3923} 3956}
@@ -3936,7 +3969,7 @@ int idle_cpu(int cpu)
3936 * idle_task - return the idle task for a given cpu. 3969 * idle_task - return the idle task for a given cpu.
3937 * @cpu: the processor in question. 3970 * @cpu: the processor in question.
3938 */ 3971 */
3939task_t *idle_task(int cpu) 3972struct task_struct *idle_task(int cpu)
3940{ 3973{
3941 return cpu_rq(cpu)->idle; 3974 return cpu_rq(cpu)->idle;
3942} 3975}
@@ -3945,7 +3978,7 @@ task_t *idle_task(int cpu)
3945 * find_process_by_pid - find a process with a matching PID value. 3978 * find_process_by_pid - find a process with a matching PID value.
3946 * @pid: the pid in question. 3979 * @pid: the pid in question.
3947 */ 3980 */
3948static inline task_t *find_process_by_pid(pid_t pid) 3981static inline struct task_struct *find_process_by_pid(pid_t pid)
3949{ 3982{
3950 return pid ? find_task_by_pid(pid) : current; 3983 return pid ? find_task_by_pid(pid) : current;
3951} 3984}
@@ -3954,6 +3987,7 @@ static inline task_t *find_process_by_pid(pid_t pid)
3954static void __setscheduler(struct task_struct *p, int policy, int prio) 3987static void __setscheduler(struct task_struct *p, int policy, int prio)
3955{ 3988{
3956 BUG_ON(p->array); 3989 BUG_ON(p->array);
3990
3957 p->policy = policy; 3991 p->policy = policy;
3958 p->rt_priority = prio; 3992 p->rt_priority = prio;
3959 p->normal_prio = normal_prio(p); 3993 p->normal_prio = normal_prio(p);
@@ -3977,11 +4011,10 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3977int sched_setscheduler(struct task_struct *p, int policy, 4011int sched_setscheduler(struct task_struct *p, int policy,
3978 struct sched_param *param) 4012 struct sched_param *param)
3979{ 4013{
3980 int retval; 4014 int retval, oldprio, oldpolicy = -1;
3981 int oldprio, oldpolicy = -1; 4015 struct prio_array *array;
3982 prio_array_t *array;
3983 unsigned long flags; 4016 unsigned long flags;
3984 runqueue_t *rq; 4017 struct rq *rq;
3985 4018
3986 /* may grab non-irq protected spin_locks */ 4019 /* may grab non-irq protected spin_locks */
3987 BUG_ON(in_interrupt()); 4020 BUG_ON(in_interrupt());
@@ -4079,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
4079static int 4112static int
4080do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 4113do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4081{ 4114{
4082 int retval;
4083 struct sched_param lparam; 4115 struct sched_param lparam;
4084 struct task_struct *p; 4116 struct task_struct *p;
4117 int retval;
4085 4118
4086 if (!param || pid < 0) 4119 if (!param || pid < 0)
4087 return -EINVAL; 4120 return -EINVAL;
@@ -4097,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4097 read_unlock_irq(&tasklist_lock); 4130 read_unlock_irq(&tasklist_lock);
4098 retval = sched_setscheduler(p, policy, &lparam); 4131 retval = sched_setscheduler(p, policy, &lparam);
4099 put_task_struct(p); 4132 put_task_struct(p);
4133
4100 return retval; 4134 return retval;
4101} 4135}
4102 4136
@@ -4132,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
4132 */ 4166 */
4133asmlinkage long sys_sched_getscheduler(pid_t pid) 4167asmlinkage long sys_sched_getscheduler(pid_t pid)
4134{ 4168{
4169 struct task_struct *p;
4135 int retval = -EINVAL; 4170 int retval = -EINVAL;
4136 task_t *p;
4137 4171
4138 if (pid < 0) 4172 if (pid < 0)
4139 goto out_nounlock; 4173 goto out_nounlock;
@@ -4160,8 +4194,8 @@ out_nounlock:
4160asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 4194asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
4161{ 4195{
4162 struct sched_param lp; 4196 struct sched_param lp;
4197 struct task_struct *p;
4163 int retval = -EINVAL; 4198 int retval = -EINVAL;
4164 task_t *p;
4165 4199
4166 if (!param || pid < 0) 4200 if (!param || pid < 0)
4167 goto out_nounlock; 4201 goto out_nounlock;
@@ -4194,9 +4228,9 @@ out_unlock:
4194 4228
4195long sched_setaffinity(pid_t pid, cpumask_t new_mask) 4229long sched_setaffinity(pid_t pid, cpumask_t new_mask)
4196{ 4230{
4197 task_t *p;
4198 int retval;
4199 cpumask_t cpus_allowed; 4231 cpumask_t cpus_allowed;
4232 struct task_struct *p;
4233 int retval;
4200 4234
4201 lock_cpu_hotplug(); 4235 lock_cpu_hotplug();
4202 read_lock(&tasklist_lock); 4236 read_lock(&tasklist_lock);
@@ -4282,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
4282 4316
4283long sched_getaffinity(pid_t pid, cpumask_t *mask) 4317long sched_getaffinity(pid_t pid, cpumask_t *mask)
4284{ 4318{
4319 struct task_struct *p;
4285 int retval; 4320 int retval;
4286 task_t *p;
4287 4321
4288 lock_cpu_hotplug(); 4322 lock_cpu_hotplug();
4289 read_lock(&tasklist_lock); 4323 read_lock(&tasklist_lock);
@@ -4342,9 +4376,8 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
4342 */ 4376 */
4343asmlinkage long sys_sched_yield(void) 4377asmlinkage long sys_sched_yield(void)
4344{ 4378{
4345 runqueue_t *rq = this_rq_lock(); 4379 struct rq *rq = this_rq_lock();
4346 prio_array_t *array = current->array; 4380 struct prio_array *array = current->array, *target = rq->expired;
4347 prio_array_t *target = rq->expired;
4348 4381
4349 schedstat_inc(rq, yld_cnt); 4382 schedstat_inc(rq, yld_cnt);
4350 /* 4383 /*
@@ -4378,6 +4411,7 @@ asmlinkage long sys_sched_yield(void)
4378 * no need to preempt or enable interrupts: 4411 * no need to preempt or enable interrupts:
4379 */ 4412 */
4380 __release(rq->lock); 4413 __release(rq->lock);
4414 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
4381 _raw_spin_unlock(&rq->lock); 4415 _raw_spin_unlock(&rq->lock);
4382 preempt_enable_no_resched(); 4416 preempt_enable_no_resched();
4383 4417
@@ -4441,6 +4475,7 @@ int cond_resched_lock(spinlock_t *lock)
4441 spin_lock(lock); 4475 spin_lock(lock);
4442 } 4476 }
4443 if (need_resched() && __resched_legal()) { 4477 if (need_resched() && __resched_legal()) {
4478 spin_release(&lock->dep_map, 1, _THIS_IP_);
4444 _raw_spin_unlock(lock); 4479 _raw_spin_unlock(lock);
4445 preempt_enable_no_resched(); 4480 preempt_enable_no_resched();
4446 __cond_resched(); 4481 __cond_resched();
@@ -4456,7 +4491,9 @@ int __sched cond_resched_softirq(void)
4456 BUG_ON(!in_softirq()); 4491 BUG_ON(!in_softirq());
4457 4492
4458 if (need_resched() && __resched_legal()) { 4493 if (need_resched() && __resched_legal()) {
4459 __local_bh_enable(); 4494 raw_local_irq_disable();
4495 _local_bh_enable();
4496 raw_local_irq_enable();
4460 __cond_resched(); 4497 __cond_resched();
4461 local_bh_disable(); 4498 local_bh_disable();
4462 return 1; 4499 return 1;
@@ -4476,7 +4513,6 @@ void __sched yield(void)
4476 set_current_state(TASK_RUNNING); 4513 set_current_state(TASK_RUNNING);
4477 sys_sched_yield(); 4514 sys_sched_yield();
4478} 4515}
4479
4480EXPORT_SYMBOL(yield); 4516EXPORT_SYMBOL(yield);
4481 4517
4482/* 4518/*
@@ -4488,18 +4524,17 @@ EXPORT_SYMBOL(yield);
4488 */ 4524 */
4489void __sched io_schedule(void) 4525void __sched io_schedule(void)
4490{ 4526{
4491 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4527 struct rq *rq = &__raw_get_cpu_var(runqueues);
4492 4528
4493 atomic_inc(&rq->nr_iowait); 4529 atomic_inc(&rq->nr_iowait);
4494 schedule(); 4530 schedule();
4495 atomic_dec(&rq->nr_iowait); 4531 atomic_dec(&rq->nr_iowait);
4496} 4532}
4497
4498EXPORT_SYMBOL(io_schedule); 4533EXPORT_SYMBOL(io_schedule);
4499 4534
4500long __sched io_schedule_timeout(long timeout) 4535long __sched io_schedule_timeout(long timeout)
4501{ 4536{
4502 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4537 struct rq *rq = &__raw_get_cpu_var(runqueues);
4503 long ret; 4538 long ret;
4504 4539
4505 atomic_inc(&rq->nr_iowait); 4540 atomic_inc(&rq->nr_iowait);
@@ -4566,9 +4601,9 @@ asmlinkage long sys_sched_get_priority_min(int policy)
4566asmlinkage 4601asmlinkage
4567long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 4602long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
4568{ 4603{
4604 struct task_struct *p;
4569 int retval = -EINVAL; 4605 int retval = -EINVAL;
4570 struct timespec t; 4606 struct timespec t;
4571 task_t *p;
4572 4607
4573 if (pid < 0) 4608 if (pid < 0)
4574 goto out_nounlock; 4609 goto out_nounlock;
@@ -4596,28 +4631,32 @@ out_unlock:
4596 4631
4597static inline struct task_struct *eldest_child(struct task_struct *p) 4632static inline struct task_struct *eldest_child(struct task_struct *p)
4598{ 4633{
4599 if (list_empty(&p->children)) return NULL; 4634 if (list_empty(&p->children))
4635 return NULL;
4600 return list_entry(p->children.next,struct task_struct,sibling); 4636 return list_entry(p->children.next,struct task_struct,sibling);
4601} 4637}
4602 4638
4603static inline struct task_struct *older_sibling(struct task_struct *p) 4639static inline struct task_struct *older_sibling(struct task_struct *p)
4604{ 4640{
4605 if (p->sibling.prev==&p->parent->children) return NULL; 4641 if (p->sibling.prev==&p->parent->children)
4642 return NULL;
4606 return list_entry(p->sibling.prev,struct task_struct,sibling); 4643 return list_entry(p->sibling.prev,struct task_struct,sibling);
4607} 4644}
4608 4645
4609static inline struct task_struct *younger_sibling(struct task_struct *p) 4646static inline struct task_struct *younger_sibling(struct task_struct *p)
4610{ 4647{
4611 if (p->sibling.next==&p->parent->children) return NULL; 4648 if (p->sibling.next==&p->parent->children)
4649 return NULL;
4612 return list_entry(p->sibling.next,struct task_struct,sibling); 4650 return list_entry(p->sibling.next,struct task_struct,sibling);
4613} 4651}
4614 4652
4615static void show_task(task_t *p) 4653static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
4654
4655static void show_task(struct task_struct *p)
4616{ 4656{
4617 task_t *relative; 4657 struct task_struct *relative;
4618 unsigned state;
4619 unsigned long free = 0; 4658 unsigned long free = 0;
4620 static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; 4659 unsigned state;
4621 4660
4622 printk("%-13.13s ", p->comm); 4661 printk("%-13.13s ", p->comm);
4623 state = p->state ? __ffs(p->state) + 1 : 0; 4662 state = p->state ? __ffs(p->state) + 1 : 0;
@@ -4668,7 +4707,7 @@ static void show_task(task_t *p)
4668 4707
4669void show_state(void) 4708void show_state(void)
4670{ 4709{
4671 task_t *g, *p; 4710 struct task_struct *g, *p;
4672 4711
4673#if (BITS_PER_LONG == 32) 4712#if (BITS_PER_LONG == 32)
4674 printk("\n" 4713 printk("\n"
@@ -4690,7 +4729,7 @@ void show_state(void)
4690 } while_each_thread(g, p); 4729 } while_each_thread(g, p);
4691 4730
4692 read_unlock(&tasklist_lock); 4731 read_unlock(&tasklist_lock);
4693 mutex_debug_show_all_locks(); 4732 debug_show_all_locks();
4694} 4733}
4695 4734
4696/** 4735/**
@@ -4701,9 +4740,9 @@ void show_state(void)
4701 * NOTE: this function does not set the idle thread's NEED_RESCHED 4740 * NOTE: this function does not set the idle thread's NEED_RESCHED
4702 * flag, to make booting more robust. 4741 * flag, to make booting more robust.
4703 */ 4742 */
4704void __devinit init_idle(task_t *idle, int cpu) 4743void __devinit init_idle(struct task_struct *idle, int cpu)
4705{ 4744{
4706 runqueue_t *rq = cpu_rq(cpu); 4745 struct rq *rq = cpu_rq(cpu);
4707 unsigned long flags; 4746 unsigned long flags;
4708 4747
4709 idle->timestamp = sched_clock(); 4748 idle->timestamp = sched_clock();
@@ -4742,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4742/* 4781/*
4743 * This is how migration works: 4782 * This is how migration works:
4744 * 4783 *
4745 * 1) we queue a migration_req_t structure in the source CPU's 4784 * 1) we queue a struct migration_req structure in the source CPU's
4746 * runqueue and wake up that CPU's migration thread. 4785 * runqueue and wake up that CPU's migration thread.
4747 * 2) we down() the locked semaphore => thread blocks. 4786 * 2) we down() the locked semaphore => thread blocks.
4748 * 3) migration thread wakes up (implicitly it forces the migrated 4787 * 3) migration thread wakes up (implicitly it forces the migrated
@@ -4764,12 +4803,12 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4764 * task must not exit() & deallocate itself prematurely. The 4803 * task must not exit() & deallocate itself prematurely. The
4765 * call is not atomic; no spinlocks may be held. 4804 * call is not atomic; no spinlocks may be held.
4766 */ 4805 */
4767int set_cpus_allowed(task_t *p, cpumask_t new_mask) 4806int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
4768{ 4807{
4808 struct migration_req req;
4769 unsigned long flags; 4809 unsigned long flags;
4810 struct rq *rq;
4770 int ret = 0; 4811 int ret = 0;
4771 migration_req_t req;
4772 runqueue_t *rq;
4773 4812
4774 rq = task_rq_lock(p, &flags); 4813 rq = task_rq_lock(p, &flags);
4775 if (!cpus_intersects(new_mask, cpu_online_map)) { 4814 if (!cpus_intersects(new_mask, cpu_online_map)) {
@@ -4792,9 +4831,9 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
4792 } 4831 }
4793out: 4832out:
4794 task_rq_unlock(rq, &flags); 4833 task_rq_unlock(rq, &flags);
4834
4795 return ret; 4835 return ret;
4796} 4836}
4797
4798EXPORT_SYMBOL_GPL(set_cpus_allowed); 4837EXPORT_SYMBOL_GPL(set_cpus_allowed);
4799 4838
4800/* 4839/*
@@ -4810,7 +4849,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
4810 */ 4849 */
4811static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 4850static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4812{ 4851{
4813 runqueue_t *rq_dest, *rq_src; 4852 struct rq *rq_dest, *rq_src;
4814 int ret = 0; 4853 int ret = 0;
4815 4854
4816 if (unlikely(cpu_is_offline(dest_cpu))) 4855 if (unlikely(cpu_is_offline(dest_cpu)))
@@ -4855,16 +4894,16 @@ out:
4855 */ 4894 */
4856static int migration_thread(void *data) 4895static int migration_thread(void *data)
4857{ 4896{
4858 runqueue_t *rq;
4859 int cpu = (long)data; 4897 int cpu = (long)data;
4898 struct rq *rq;
4860 4899
4861 rq = cpu_rq(cpu); 4900 rq = cpu_rq(cpu);
4862 BUG_ON(rq->migration_thread != current); 4901 BUG_ON(rq->migration_thread != current);
4863 4902
4864 set_current_state(TASK_INTERRUPTIBLE); 4903 set_current_state(TASK_INTERRUPTIBLE);
4865 while (!kthread_should_stop()) { 4904 while (!kthread_should_stop()) {
4905 struct migration_req *req;
4866 struct list_head *head; 4906 struct list_head *head;
4867 migration_req_t *req;
4868 4907
4869 try_to_freeze(); 4908 try_to_freeze();
4870 4909
@@ -4888,7 +4927,7 @@ static int migration_thread(void *data)
4888 set_current_state(TASK_INTERRUPTIBLE); 4927 set_current_state(TASK_INTERRUPTIBLE);
4889 continue; 4928 continue;
4890 } 4929 }
4891 req = list_entry(head->next, migration_req_t, list); 4930 req = list_entry(head->next, struct migration_req, list);
4892 list_del_init(head->next); 4931 list_del_init(head->next);
4893 4932
4894 spin_unlock(&rq->lock); 4933 spin_unlock(&rq->lock);
@@ -4913,28 +4952,28 @@ wait_to_die:
4913 4952
4914#ifdef CONFIG_HOTPLUG_CPU 4953#ifdef CONFIG_HOTPLUG_CPU
4915/* Figure out where task on dead CPU should go, use force if neccessary. */ 4954/* Figure out where task on dead CPU should go, use force if neccessary. */
4916static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) 4955static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
4917{ 4956{
4918 runqueue_t *rq;
4919 unsigned long flags; 4957 unsigned long flags;
4920 int dest_cpu;
4921 cpumask_t mask; 4958 cpumask_t mask;
4959 struct rq *rq;
4960 int dest_cpu;
4922 4961
4923restart: 4962restart:
4924 /* On same node? */ 4963 /* On same node? */
4925 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 4964 mask = node_to_cpumask(cpu_to_node(dead_cpu));
4926 cpus_and(mask, mask, tsk->cpus_allowed); 4965 cpus_and(mask, mask, p->cpus_allowed);
4927 dest_cpu = any_online_cpu(mask); 4966 dest_cpu = any_online_cpu(mask);
4928 4967
4929 /* On any allowed CPU? */ 4968 /* On any allowed CPU? */
4930 if (dest_cpu == NR_CPUS) 4969 if (dest_cpu == NR_CPUS)
4931 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4970 dest_cpu = any_online_cpu(p->cpus_allowed);
4932 4971
4933 /* No more Mr. Nice Guy. */ 4972 /* No more Mr. Nice Guy. */
4934 if (dest_cpu == NR_CPUS) { 4973 if (dest_cpu == NR_CPUS) {
4935 rq = task_rq_lock(tsk, &flags); 4974 rq = task_rq_lock(p, &flags);
4936 cpus_setall(tsk->cpus_allowed); 4975 cpus_setall(p->cpus_allowed);
4937 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4976 dest_cpu = any_online_cpu(p->cpus_allowed);
4938 task_rq_unlock(rq, &flags); 4977 task_rq_unlock(rq, &flags);
4939 4978
4940 /* 4979 /*
@@ -4942,12 +4981,12 @@ restart:
4942 * kernel threads (both mm NULL), since they never 4981 * kernel threads (both mm NULL), since they never
4943 * leave kernel. 4982 * leave kernel.
4944 */ 4983 */
4945 if (tsk->mm && printk_ratelimit()) 4984 if (p->mm && printk_ratelimit())
4946 printk(KERN_INFO "process %d (%s) no " 4985 printk(KERN_INFO "process %d (%s) no "
4947 "longer affine to cpu%d\n", 4986 "longer affine to cpu%d\n",
4948 tsk->pid, tsk->comm, dead_cpu); 4987 p->pid, p->comm, dead_cpu);
4949 } 4988 }
4950 if (!__migrate_task(tsk, dead_cpu, dest_cpu)) 4989 if (!__migrate_task(p, dead_cpu, dest_cpu))
4951 goto restart; 4990 goto restart;
4952} 4991}
4953 4992
@@ -4958,9 +4997,9 @@ restart:
4958 * their home CPUs. So we just add the counter to another CPU's counter, 4997 * their home CPUs. So we just add the counter to another CPU's counter,
4959 * to keep the global sum constant after CPU-down: 4998 * to keep the global sum constant after CPU-down:
4960 */ 4999 */
4961static void migrate_nr_uninterruptible(runqueue_t *rq_src) 5000static void migrate_nr_uninterruptible(struct rq *rq_src)
4962{ 5001{
4963 runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); 5002 struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
4964 unsigned long flags; 5003 unsigned long flags;
4965 5004
4966 local_irq_save(flags); 5005 local_irq_save(flags);
@@ -4974,48 +5013,51 @@ static void migrate_nr_uninterruptible(runqueue_t *rq_src)
4974/* Run through task list and migrate tasks from the dead cpu. */ 5013/* Run through task list and migrate tasks from the dead cpu. */
4975static void migrate_live_tasks(int src_cpu) 5014static void migrate_live_tasks(int src_cpu)
4976{ 5015{
4977 struct task_struct *tsk, *t; 5016 struct task_struct *p, *t;
4978 5017
4979 write_lock_irq(&tasklist_lock); 5018 write_lock_irq(&tasklist_lock);
4980 5019
4981 do_each_thread(t, tsk) { 5020 do_each_thread(t, p) {
4982 if (tsk == current) 5021 if (p == current)
4983 continue; 5022 continue;
4984 5023
4985 if (task_cpu(tsk) == src_cpu) 5024 if (task_cpu(p) == src_cpu)
4986 move_task_off_dead_cpu(src_cpu, tsk); 5025 move_task_off_dead_cpu(src_cpu, p);
4987 } while_each_thread(t, tsk); 5026 } while_each_thread(t, p);
4988 5027
4989 write_unlock_irq(&tasklist_lock); 5028 write_unlock_irq(&tasklist_lock);
4990} 5029}
4991 5030
4992/* Schedules idle task to be the next runnable task on current CPU. 5031/* Schedules idle task to be the next runnable task on current CPU.
4993 * It does so by boosting its priority to highest possible and adding it to 5032 * It does so by boosting its priority to highest possible and adding it to
4994 * the _front_ of runqueue. Used by CPU offline code. 5033 * the _front_ of the runqueue. Used by CPU offline code.
4995 */ 5034 */
4996void sched_idle_next(void) 5035void sched_idle_next(void)
4997{ 5036{
4998 int cpu = smp_processor_id(); 5037 int this_cpu = smp_processor_id();
4999 runqueue_t *rq = this_rq(); 5038 struct rq *rq = cpu_rq(this_cpu);
5000 struct task_struct *p = rq->idle; 5039 struct task_struct *p = rq->idle;
5001 unsigned long flags; 5040 unsigned long flags;
5002 5041
5003 /* cpu has to be offline */ 5042 /* cpu has to be offline */
5004 BUG_ON(cpu_online(cpu)); 5043 BUG_ON(cpu_online(this_cpu));
5005 5044
5006 /* Strictly not necessary since rest of the CPUs are stopped by now 5045 /*
5007 * and interrupts disabled on current cpu. 5046 * Strictly not necessary since rest of the CPUs are stopped by now
5047 * and interrupts disabled on the current cpu.
5008 */ 5048 */
5009 spin_lock_irqsave(&rq->lock, flags); 5049 spin_lock_irqsave(&rq->lock, flags);
5010 5050
5011 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); 5051 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
5012 /* Add idle task to _front_ of it's priority queue */ 5052
5053 /* Add idle task to the _front_ of its priority queue: */
5013 __activate_idle_task(p, rq); 5054 __activate_idle_task(p, rq);
5014 5055
5015 spin_unlock_irqrestore(&rq->lock, flags); 5056 spin_unlock_irqrestore(&rq->lock, flags);
5016} 5057}
5017 5058
5018/* Ensures that the idle task is using init_mm right before its cpu goes 5059/*
5060 * Ensures that the idle task is using init_mm right before its cpu goes
5019 * offline. 5061 * offline.
5020 */ 5062 */
5021void idle_task_exit(void) 5063void idle_task_exit(void)
@@ -5029,17 +5071,17 @@ void idle_task_exit(void)
5029 mmdrop(mm); 5071 mmdrop(mm);
5030} 5072}
5031 5073
5032static void migrate_dead(unsigned int dead_cpu, task_t *tsk) 5074static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
5033{ 5075{
5034 struct runqueue *rq = cpu_rq(dead_cpu); 5076 struct rq *rq = cpu_rq(dead_cpu);
5035 5077
5036 /* Must be exiting, otherwise would be on tasklist. */ 5078 /* Must be exiting, otherwise would be on tasklist. */
5037 BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); 5079 BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
5038 5080
5039 /* Cannot have done final schedule yet: would have vanished. */ 5081 /* Cannot have done final schedule yet: would have vanished. */
5040 BUG_ON(tsk->flags & PF_DEAD); 5082 BUG_ON(p->flags & PF_DEAD);
5041 5083
5042 get_task_struct(tsk); 5084 get_task_struct(p);
5043 5085
5044 /* 5086 /*
5045 * Drop lock around migration; if someone else moves it, 5087 * Drop lock around migration; if someone else moves it,
@@ -5047,25 +5089,25 @@ static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
5047 * fine. 5089 * fine.
5048 */ 5090 */
5049 spin_unlock_irq(&rq->lock); 5091 spin_unlock_irq(&rq->lock);
5050 move_task_off_dead_cpu(dead_cpu, tsk); 5092 move_task_off_dead_cpu(dead_cpu, p);
5051 spin_lock_irq(&rq->lock); 5093 spin_lock_irq(&rq->lock);
5052 5094
5053 put_task_struct(tsk); 5095 put_task_struct(p);
5054} 5096}
5055 5097
5056/* release_task() removes task from tasklist, so we won't find dead tasks. */ 5098/* release_task() removes task from tasklist, so we won't find dead tasks. */
5057static void migrate_dead_tasks(unsigned int dead_cpu) 5099static void migrate_dead_tasks(unsigned int dead_cpu)
5058{ 5100{
5059 unsigned arr, i; 5101 struct rq *rq = cpu_rq(dead_cpu);
5060 struct runqueue *rq = cpu_rq(dead_cpu); 5102 unsigned int arr, i;
5061 5103
5062 for (arr = 0; arr < 2; arr++) { 5104 for (arr = 0; arr < 2; arr++) {
5063 for (i = 0; i < MAX_PRIO; i++) { 5105 for (i = 0; i < MAX_PRIO; i++) {
5064 struct list_head *list = &rq->arrays[arr].queue[i]; 5106 struct list_head *list = &rq->arrays[arr].queue[i];
5107
5065 while (!list_empty(list)) 5108 while (!list_empty(list))
5066 migrate_dead(dead_cpu, 5109 migrate_dead(dead_cpu, list_entry(list->next,
5067 list_entry(list->next, task_t, 5110 struct task_struct, run_list));
5068 run_list));
5069 } 5111 }
5070 } 5112 }
5071} 5113}
@@ -5075,14 +5117,13 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5075 * migration_call - callback that gets triggered when a CPU is added. 5117 * migration_call - callback that gets triggered when a CPU is added.
5076 * Here we can start up the necessary migration thread for the new CPU. 5118 * Here we can start up the necessary migration thread for the new CPU.
5077 */ 5119 */
5078static int __cpuinit migration_call(struct notifier_block *nfb, 5120static int __cpuinit
5079 unsigned long action, 5121migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5080 void *hcpu)
5081{ 5122{
5082 int cpu = (long)hcpu;
5083 struct task_struct *p; 5123 struct task_struct *p;
5084 struct runqueue *rq; 5124 int cpu = (long)hcpu;
5085 unsigned long flags; 5125 unsigned long flags;
5126 struct rq *rq;
5086 5127
5087 switch (action) { 5128 switch (action) {
5088 case CPU_UP_PREPARE: 5129 case CPU_UP_PREPARE:
@@ -5097,10 +5138,12 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5097 task_rq_unlock(rq, &flags); 5138 task_rq_unlock(rq, &flags);
5098 cpu_rq(cpu)->migration_thread = p; 5139 cpu_rq(cpu)->migration_thread = p;
5099 break; 5140 break;
5141
5100 case CPU_ONLINE: 5142 case CPU_ONLINE:
5101 /* Strictly unneccessary, as first user will wake it. */ 5143 /* Strictly unneccessary, as first user will wake it. */
5102 wake_up_process(cpu_rq(cpu)->migration_thread); 5144 wake_up_process(cpu_rq(cpu)->migration_thread);
5103 break; 5145 break;
5146
5104#ifdef CONFIG_HOTPLUG_CPU 5147#ifdef CONFIG_HOTPLUG_CPU
5105 case CPU_UP_CANCELED: 5148 case CPU_UP_CANCELED:
5106 if (!cpu_rq(cpu)->migration_thread) 5149 if (!cpu_rq(cpu)->migration_thread)
@@ -5111,6 +5154,7 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5111 kthread_stop(cpu_rq(cpu)->migration_thread); 5154 kthread_stop(cpu_rq(cpu)->migration_thread);
5112 cpu_rq(cpu)->migration_thread = NULL; 5155 cpu_rq(cpu)->migration_thread = NULL;
5113 break; 5156 break;
5157
5114 case CPU_DEAD: 5158 case CPU_DEAD:
5115 migrate_live_tasks(cpu); 5159 migrate_live_tasks(cpu);
5116 rq = cpu_rq(cpu); 5160 rq = cpu_rq(cpu);
@@ -5131,9 +5175,10 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5131 * the requestors. */ 5175 * the requestors. */
5132 spin_lock_irq(&rq->lock); 5176 spin_lock_irq(&rq->lock);
5133 while (!list_empty(&rq->migration_queue)) { 5177 while (!list_empty(&rq->migration_queue)) {
5134 migration_req_t *req; 5178 struct migration_req *req;
5179
5135 req = list_entry(rq->migration_queue.next, 5180 req = list_entry(rq->migration_queue.next,
5136 migration_req_t, list); 5181 struct migration_req, list);
5137 list_del_init(&req->list); 5182 list_del_init(&req->list);
5138 complete(&req->done); 5183 complete(&req->done);
5139 } 5184 }
@@ -5155,10 +5200,12 @@ static struct notifier_block __cpuinitdata migration_notifier = {
5155int __init migration_init(void) 5200int __init migration_init(void)
5156{ 5201{
5157 void *cpu = (void *)(long)smp_processor_id(); 5202 void *cpu = (void *)(long)smp_processor_id();
5158 /* Start one for boot CPU. */ 5203
5204 /* Start one for the boot CPU: */
5159 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); 5205 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
5160 migration_call(&migration_notifier, CPU_ONLINE, cpu); 5206 migration_call(&migration_notifier, CPU_ONLINE, cpu);
5161 register_cpu_notifier(&migration_notifier); 5207 register_cpu_notifier(&migration_notifier);
5208
5162 return 0; 5209 return 0;
5163} 5210}
5164#endif 5211#endif
@@ -5254,7 +5301,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
5254 } while (sd); 5301 } while (sd);
5255} 5302}
5256#else 5303#else
5257#define sched_domain_debug(sd, cpu) {} 5304# define sched_domain_debug(sd, cpu) do { } while (0)
5258#endif 5305#endif
5259 5306
5260static int sd_degenerate(struct sched_domain *sd) 5307static int sd_degenerate(struct sched_domain *sd)
@@ -5280,8 +5327,8 @@ static int sd_degenerate(struct sched_domain *sd)
5280 return 1; 5327 return 1;
5281} 5328}
5282 5329
5283static int sd_parent_degenerate(struct sched_domain *sd, 5330static int
5284 struct sched_domain *parent) 5331sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
5285{ 5332{
5286 unsigned long cflags = sd->flags, pflags = parent->flags; 5333 unsigned long cflags = sd->flags, pflags = parent->flags;
5287 5334
@@ -5314,7 +5361,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,
5314 */ 5361 */
5315static void cpu_attach_domain(struct sched_domain *sd, int cpu) 5362static void cpu_attach_domain(struct sched_domain *sd, int cpu)
5316{ 5363{
5317 runqueue_t *rq = cpu_rq(cpu); 5364 struct rq *rq = cpu_rq(cpu);
5318 struct sched_domain *tmp; 5365 struct sched_domain *tmp;
5319 5366
5320 /* Remove the sched domains which do not contribute to scheduling. */ 5367 /* Remove the sched domains which do not contribute to scheduling. */
@@ -5576,8 +5623,8 @@ static void touch_cache(void *__cache, unsigned long __size)
5576/* 5623/*
5577 * Measure the cache-cost of one task migration. Returns in units of nsec. 5624 * Measure the cache-cost of one task migration. Returns in units of nsec.
5578 */ 5625 */
5579static unsigned long long measure_one(void *cache, unsigned long size, 5626static unsigned long long
5580 int source, int target) 5627measure_one(void *cache, unsigned long size, int source, int target)
5581{ 5628{
5582 cpumask_t mask, saved_mask; 5629 cpumask_t mask, saved_mask;
5583 unsigned long long t0, t1, t2, t3, cost; 5630 unsigned long long t0, t1, t2, t3, cost;
@@ -5927,9 +5974,9 @@ static int find_next_best_node(int node, unsigned long *used_nodes)
5927 */ 5974 */
5928static cpumask_t sched_domain_node_span(int node) 5975static cpumask_t sched_domain_node_span(int node)
5929{ 5976{
5930 int i;
5931 cpumask_t span, nodemask;
5932 DECLARE_BITMAP(used_nodes, MAX_NUMNODES); 5977 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
5978 cpumask_t span, nodemask;
5979 int i;
5933 5980
5934 cpus_clear(span); 5981 cpus_clear(span);
5935 bitmap_zero(used_nodes, MAX_NUMNODES); 5982 bitmap_zero(used_nodes, MAX_NUMNODES);
@@ -5940,6 +5987,7 @@ static cpumask_t sched_domain_node_span(int node)
5940 5987
5941 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { 5988 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
5942 int next_node = find_next_best_node(node, used_nodes); 5989 int next_node = find_next_best_node(node, used_nodes);
5990
5943 nodemask = node_to_cpumask(next_node); 5991 nodemask = node_to_cpumask(next_node);
5944 cpus_or(span, span, nodemask); 5992 cpus_or(span, span, nodemask);
5945 } 5993 }
@@ -5949,19 +5997,23 @@ static cpumask_t sched_domain_node_span(int node)
5949#endif 5997#endif
5950 5998
5951int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 5999int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6000
5952/* 6001/*
5953 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we 6002 * SMT sched-domains:
5954 * can switch it on easily if needed.
5955 */ 6003 */
5956#ifdef CONFIG_SCHED_SMT 6004#ifdef CONFIG_SCHED_SMT
5957static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 6005static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
5958static struct sched_group sched_group_cpus[NR_CPUS]; 6006static struct sched_group sched_group_cpus[NR_CPUS];
6007
5959static int cpu_to_cpu_group(int cpu) 6008static int cpu_to_cpu_group(int cpu)
5960{ 6009{
5961 return cpu; 6010 return cpu;
5962} 6011}
5963#endif 6012#endif
5964 6013
6014/*
6015 * multi-core sched-domains:
6016 */
5965#ifdef CONFIG_SCHED_MC 6017#ifdef CONFIG_SCHED_MC
5966static DEFINE_PER_CPU(struct sched_domain, core_domains); 6018static DEFINE_PER_CPU(struct sched_domain, core_domains);
5967static struct sched_group *sched_group_core_bycpu[NR_CPUS]; 6019static struct sched_group *sched_group_core_bycpu[NR_CPUS];
@@ -5981,9 +6033,10 @@ static int cpu_to_core_group(int cpu)
5981 6033
5982static DEFINE_PER_CPU(struct sched_domain, phys_domains); 6034static DEFINE_PER_CPU(struct sched_domain, phys_domains);
5983static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; 6035static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
6036
5984static int cpu_to_phys_group(int cpu) 6037static int cpu_to_phys_group(int cpu)
5985{ 6038{
5986#if defined(CONFIG_SCHED_MC) 6039#ifdef CONFIG_SCHED_MC
5987 cpumask_t mask = cpu_coregroup_map(cpu); 6040 cpumask_t mask = cpu_coregroup_map(cpu);
5988 return first_cpu(mask); 6041 return first_cpu(mask);
5989#elif defined(CONFIG_SCHED_SMT) 6042#elif defined(CONFIG_SCHED_SMT)
@@ -6529,6 +6582,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
6529int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) 6582int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
6530{ 6583{
6531 int err = 0; 6584 int err = 0;
6585
6532#ifdef CONFIG_SCHED_SMT 6586#ifdef CONFIG_SCHED_SMT
6533 if (smt_capable()) 6587 if (smt_capable())
6534 err = sysfs_create_file(&cls->kset.kobj, 6588 err = sysfs_create_file(&cls->kset.kobj,
@@ -6548,7 +6602,8 @@ static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
6548{ 6602{
6549 return sprintf(page, "%u\n", sched_mc_power_savings); 6603 return sprintf(page, "%u\n", sched_mc_power_savings);
6550} 6604}
6551static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6605static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
6606 const char *buf, size_t count)
6552{ 6607{
6553 return sched_power_savings_store(buf, count, 0); 6608 return sched_power_savings_store(buf, count, 0);
6554} 6609}
@@ -6561,7 +6616,8 @@ static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
6561{ 6616{
6562 return sprintf(page, "%u\n", sched_smt_power_savings); 6617 return sprintf(page, "%u\n", sched_smt_power_savings);
6563} 6618}
6564static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6619static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
6620 const char *buf, size_t count)
6565{ 6621{
6566 return sched_power_savings_store(buf, count, 1); 6622 return sched_power_savings_store(buf, count, 1);
6567} 6623}
@@ -6623,6 +6679,7 @@ int in_sched_functions(unsigned long addr)
6623{ 6679{
6624 /* Linker adds these: start and end of __sched functions */ 6680 /* Linker adds these: start and end of __sched functions */
6625 extern char __sched_text_start[], __sched_text_end[]; 6681 extern char __sched_text_start[], __sched_text_end[];
6682
6626 return in_lock_functions(addr) || 6683 return in_lock_functions(addr) ||
6627 (addr >= (unsigned long)__sched_text_start 6684 (addr >= (unsigned long)__sched_text_start
6628 && addr < (unsigned long)__sched_text_end); 6685 && addr < (unsigned long)__sched_text_end);
@@ -6630,14 +6687,15 @@ int in_sched_functions(unsigned long addr)
6630 6687
6631void __init sched_init(void) 6688void __init sched_init(void)
6632{ 6689{
6633 runqueue_t *rq;
6634 int i, j, k; 6690 int i, j, k;
6635 6691
6636 for_each_possible_cpu(i) { 6692 for_each_possible_cpu(i) {
6637 prio_array_t *array; 6693 struct prio_array *array;
6694 struct rq *rq;
6638 6695
6639 rq = cpu_rq(i); 6696 rq = cpu_rq(i);
6640 spin_lock_init(&rq->lock); 6697 spin_lock_init(&rq->lock);
6698 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
6641 rq->nr_running = 0; 6699 rq->nr_running = 0;
6642 rq->active = rq->arrays; 6700 rq->active = rq->arrays;
6643 rq->expired = rq->arrays + 1; 6701 rq->expired = rq->arrays + 1;
@@ -6684,7 +6742,7 @@ void __init sched_init(void)
6684#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6742#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
6685void __might_sleep(char *file, int line) 6743void __might_sleep(char *file, int line)
6686{ 6744{
6687#if defined(in_atomic) 6745#ifdef in_atomic
6688 static unsigned long prev_jiffy; /* ratelimiting */ 6746 static unsigned long prev_jiffy; /* ratelimiting */
6689 6747
6690 if ((in_atomic() || irqs_disabled()) && 6748 if ((in_atomic() || irqs_disabled()) &&
@@ -6706,10 +6764,10 @@ EXPORT_SYMBOL(__might_sleep);
6706#ifdef CONFIG_MAGIC_SYSRQ 6764#ifdef CONFIG_MAGIC_SYSRQ
6707void normalize_rt_tasks(void) 6765void normalize_rt_tasks(void)
6708{ 6766{
6767 struct prio_array *array;
6709 struct task_struct *p; 6768 struct task_struct *p;
6710 prio_array_t *array;
6711 unsigned long flags; 6769 unsigned long flags;
6712 runqueue_t *rq; 6770 struct rq *rq;
6713 6771
6714 read_lock_irq(&tasklist_lock); 6772 read_lock_irq(&tasklist_lock);
6715 for_each_process(p) { 6773 for_each_process(p) {
@@ -6753,7 +6811,7 @@ void normalize_rt_tasks(void)
6753 * 6811 *
6754 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6812 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6755 */ 6813 */
6756task_t *curr_task(int cpu) 6814struct task_struct *curr_task(int cpu)
6757{ 6815{
6758 return cpu_curr(cpu); 6816 return cpu_curr(cpu);
6759} 6817}
@@ -6773,7 +6831,7 @@ task_t *curr_task(int cpu)
6773 * 6831 *
6774 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6832 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6775 */ 6833 */
6776void set_curr_task(int cpu, task_t *p) 6834void set_curr_task(int cpu, struct task_struct *p)
6777{ 6835{
6778 cpu_curr(cpu) = p; 6836 cpu_curr(cpu) = p;
6779} 6837}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8f03e3b89b55..215541e26c1a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -62,6 +62,119 @@ static inline void wakeup_softirqd(void)
62} 62}
63 63
64/* 64/*
65 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately:
67 */
68static void __local_bh_disable(unsigned long ip)
69{
70 unsigned long flags;
71
72 WARN_ON_ONCE(in_irq());
73
74 raw_local_irq_save(flags);
75 add_preempt_count(SOFTIRQ_OFFSET);
76 /*
77 * Were softirqs turned off above:
78 */
79 if (softirq_count() == SOFTIRQ_OFFSET)
80 trace_softirqs_off(ip);
81 raw_local_irq_restore(flags);
82}
83
84void local_bh_disable(void)
85{
86 __local_bh_disable((unsigned long)__builtin_return_address(0));
87}
88
89EXPORT_SYMBOL(local_bh_disable);
90
91void __local_bh_enable(void)
92{
93 WARN_ON_ONCE(in_irq());
94
95 /*
96 * softirqs should never be enabled by __local_bh_enable(),
97 * it always nests inside local_bh_enable() sections:
98 */
99 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
100
101 sub_preempt_count(SOFTIRQ_OFFSET);
102}
103EXPORT_SYMBOL_GPL(__local_bh_enable);
104
105/*
106 * Special-case - softirqs can safely be enabled in
107 * cond_resched_softirq(), or by __do_softirq(),
108 * without processing still-pending softirqs:
109 */
110void _local_bh_enable(void)
111{
112 WARN_ON_ONCE(in_irq());
113 WARN_ON_ONCE(!irqs_disabled());
114
115 if (softirq_count() == SOFTIRQ_OFFSET)
116 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117 sub_preempt_count(SOFTIRQ_OFFSET);
118}
119
120EXPORT_SYMBOL(_local_bh_enable);
121
122void local_bh_enable(void)
123{
124 unsigned long flags;
125
126 WARN_ON_ONCE(in_irq());
127 WARN_ON_ONCE(irqs_disabled());
128
129 local_irq_save(flags);
130 /*
131 * Are softirqs going to be turned on now:
132 */
133 if (softirq_count() == SOFTIRQ_OFFSET)
134 trace_softirqs_on((unsigned long)__builtin_return_address(0));
135 /*
136 * Keep preemption disabled until we are done with
137 * softirq processing:
138 */
139 sub_preempt_count(SOFTIRQ_OFFSET - 1);
140
141 if (unlikely(!in_interrupt() && local_softirq_pending()))
142 do_softirq();
143
144 dec_preempt_count();
145 local_irq_restore(flags);
146 preempt_check_resched();
147}
148EXPORT_SYMBOL(local_bh_enable);
149
150void local_bh_enable_ip(unsigned long ip)
151{
152 unsigned long flags;
153
154 WARN_ON_ONCE(in_irq());
155
156 local_irq_save(flags);
157 /*
158 * Are softirqs going to be turned on now:
159 */
160 if (softirq_count() == SOFTIRQ_OFFSET)
161 trace_softirqs_on(ip);
162 /*
163 * Keep preemption disabled until we are done with
164 * softirq processing:
165 */
166 sub_preempt_count(SOFTIRQ_OFFSET - 1);
167
168 if (unlikely(!in_interrupt() && local_softirq_pending()))
169 do_softirq();
170
171 dec_preempt_count();
172 local_irq_restore(flags);
173 preempt_check_resched();
174}
175EXPORT_SYMBOL(local_bh_enable_ip);
176
177/*
65 * We restart softirq processing MAX_SOFTIRQ_RESTART times, 178 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
66 * and we fall back to softirqd after that. 179 * and we fall back to softirqd after that.
67 * 180 *
@@ -80,8 +193,11 @@ asmlinkage void __do_softirq(void)
80 int cpu; 193 int cpu;
81 194
82 pending = local_softirq_pending(); 195 pending = local_softirq_pending();
196 account_system_vtime(current);
197
198 __local_bh_disable((unsigned long)__builtin_return_address(0));
199 trace_softirq_enter();
83 200
84 local_bh_disable();
85 cpu = smp_processor_id(); 201 cpu = smp_processor_id();
86restart: 202restart:
87 /* Reset the pending bitmask before enabling irqs */ 203 /* Reset the pending bitmask before enabling irqs */
@@ -109,7 +225,10 @@ restart:
109 if (pending) 225 if (pending)
110 wakeup_softirqd(); 226 wakeup_softirqd();
111 227
112 __local_bh_enable(); 228 trace_softirq_exit();
229
230 account_system_vtime(current);
231 _local_bh_enable();
113} 232}
114 233
115#ifndef __ARCH_HAS_DO_SOFTIRQ 234#ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -136,23 +255,6 @@ EXPORT_SYMBOL(do_softirq);
136 255
137#endif 256#endif
138 257
139void local_bh_enable(void)
140{
141 WARN_ON(irqs_disabled());
142 /*
143 * Keep preemption disabled until we are done with
144 * softirq processing:
145 */
146 sub_preempt_count(SOFTIRQ_OFFSET - 1);
147
148 if (unlikely(!in_interrupt() && local_softirq_pending()))
149 do_softirq();
150
151 dec_preempt_count();
152 preempt_check_resched();
153}
154EXPORT_SYMBOL(local_bh_enable);
155
156#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 258#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
157# define invoke_softirq() __do_softirq() 259# define invoke_softirq() __do_softirq()
158#else 260#else
@@ -165,6 +267,7 @@ EXPORT_SYMBOL(local_bh_enable);
165void irq_exit(void) 267void irq_exit(void)
166{ 268{
167 account_system_vtime(current); 269 account_system_vtime(current);
270 trace_hardirq_exit();
168 sub_preempt_count(IRQ_EXIT_OFFSET); 271 sub_preempt_count(IRQ_EXIT_OFFSET);
169 if (!in_interrupt() && local_softirq_pending()) 272 if (!in_interrupt() && local_softirq_pending())
170 invoke_softirq(); 273 invoke_softirq();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index b31e54eadf56..bfd6ad9c0330 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -13,6 +13,7 @@
13#include <linux/preempt.h> 13#include <linux/preempt.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/interrupt.h> 15#include <linux/interrupt.h>
16#include <linux/debug_locks.h>
16#include <linux/module.h> 17#include <linux/module.h>
17 18
18/* 19/*
@@ -29,8 +30,10 @@ EXPORT_SYMBOL(generic__raw_read_trylock);
29int __lockfunc _spin_trylock(spinlock_t *lock) 30int __lockfunc _spin_trylock(spinlock_t *lock)
30{ 31{
31 preempt_disable(); 32 preempt_disable();
32 if (_raw_spin_trylock(lock)) 33 if (_raw_spin_trylock(lock)) {
34 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
33 return 1; 35 return 1;
36 }
34 37
35 preempt_enable(); 38 preempt_enable();
36 return 0; 39 return 0;
@@ -40,8 +43,10 @@ EXPORT_SYMBOL(_spin_trylock);
40int __lockfunc _read_trylock(rwlock_t *lock) 43int __lockfunc _read_trylock(rwlock_t *lock)
41{ 44{
42 preempt_disable(); 45 preempt_disable();
43 if (_raw_read_trylock(lock)) 46 if (_raw_read_trylock(lock)) {
47 rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
44 return 1; 48 return 1;
49 }
45 50
46 preempt_enable(); 51 preempt_enable();
47 return 0; 52 return 0;
@@ -51,19 +56,28 @@ EXPORT_SYMBOL(_read_trylock);
51int __lockfunc _write_trylock(rwlock_t *lock) 56int __lockfunc _write_trylock(rwlock_t *lock)
52{ 57{
53 preempt_disable(); 58 preempt_disable();
54 if (_raw_write_trylock(lock)) 59 if (_raw_write_trylock(lock)) {
60 rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
55 return 1; 61 return 1;
62 }
56 63
57 preempt_enable(); 64 preempt_enable();
58 return 0; 65 return 0;
59} 66}
60EXPORT_SYMBOL(_write_trylock); 67EXPORT_SYMBOL(_write_trylock);
61 68
62#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) 69/*
70 * If lockdep is enabled then we use the non-preemption spin-ops
71 * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
72 * not re-enabled during lock-acquire (which the preempt-spin-ops do):
73 */
74#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
75 defined(CONFIG_PROVE_LOCKING)
63 76
64void __lockfunc _read_lock(rwlock_t *lock) 77void __lockfunc _read_lock(rwlock_t *lock)
65{ 78{
66 preempt_disable(); 79 preempt_disable();
80 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
67 _raw_read_lock(lock); 81 _raw_read_lock(lock);
68} 82}
69EXPORT_SYMBOL(_read_lock); 83EXPORT_SYMBOL(_read_lock);
@@ -74,7 +88,17 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
74 88
75 local_irq_save(flags); 89 local_irq_save(flags);
76 preempt_disable(); 90 preempt_disable();
91 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
92 /*
93 * On lockdep we dont want the hand-coded irq-enable of
94 * _raw_spin_lock_flags() code, because lockdep assumes
95 * that interrupts are not re-enabled during lock-acquire:
96 */
97#ifdef CONFIG_PROVE_LOCKING
98 _raw_spin_lock(lock);
99#else
77 _raw_spin_lock_flags(lock, &flags); 100 _raw_spin_lock_flags(lock, &flags);
101#endif
78 return flags; 102 return flags;
79} 103}
80EXPORT_SYMBOL(_spin_lock_irqsave); 104EXPORT_SYMBOL(_spin_lock_irqsave);
@@ -83,6 +107,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
83{ 107{
84 local_irq_disable(); 108 local_irq_disable();
85 preempt_disable(); 109 preempt_disable();
110 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
86 _raw_spin_lock(lock); 111 _raw_spin_lock(lock);
87} 112}
88EXPORT_SYMBOL(_spin_lock_irq); 113EXPORT_SYMBOL(_spin_lock_irq);
@@ -91,6 +116,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
91{ 116{
92 local_bh_disable(); 117 local_bh_disable();
93 preempt_disable(); 118 preempt_disable();
119 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
94 _raw_spin_lock(lock); 120 _raw_spin_lock(lock);
95} 121}
96EXPORT_SYMBOL(_spin_lock_bh); 122EXPORT_SYMBOL(_spin_lock_bh);
@@ -101,6 +127,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
101 127
102 local_irq_save(flags); 128 local_irq_save(flags);
103 preempt_disable(); 129 preempt_disable();
130 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
104 _raw_read_lock(lock); 131 _raw_read_lock(lock);
105 return flags; 132 return flags;
106} 133}
@@ -110,6 +137,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
110{ 137{
111 local_irq_disable(); 138 local_irq_disable();
112 preempt_disable(); 139 preempt_disable();
140 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
113 _raw_read_lock(lock); 141 _raw_read_lock(lock);
114} 142}
115EXPORT_SYMBOL(_read_lock_irq); 143EXPORT_SYMBOL(_read_lock_irq);
@@ -118,6 +146,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
118{ 146{
119 local_bh_disable(); 147 local_bh_disable();
120 preempt_disable(); 148 preempt_disable();
149 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
121 _raw_read_lock(lock); 150 _raw_read_lock(lock);
122} 151}
123EXPORT_SYMBOL(_read_lock_bh); 152EXPORT_SYMBOL(_read_lock_bh);
@@ -128,6 +157,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
128 157
129 local_irq_save(flags); 158 local_irq_save(flags);
130 preempt_disable(); 159 preempt_disable();
160 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
131 _raw_write_lock(lock); 161 _raw_write_lock(lock);
132 return flags; 162 return flags;
133} 163}
@@ -137,6 +167,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
137{ 167{
138 local_irq_disable(); 168 local_irq_disable();
139 preempt_disable(); 169 preempt_disable();
170 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
140 _raw_write_lock(lock); 171 _raw_write_lock(lock);
141} 172}
142EXPORT_SYMBOL(_write_lock_irq); 173EXPORT_SYMBOL(_write_lock_irq);
@@ -145,6 +176,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
145{ 176{
146 local_bh_disable(); 177 local_bh_disable();
147 preempt_disable(); 178 preempt_disable();
179 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
148 _raw_write_lock(lock); 180 _raw_write_lock(lock);
149} 181}
150EXPORT_SYMBOL(_write_lock_bh); 182EXPORT_SYMBOL(_write_lock_bh);
@@ -152,6 +184,7 @@ EXPORT_SYMBOL(_write_lock_bh);
152void __lockfunc _spin_lock(spinlock_t *lock) 184void __lockfunc _spin_lock(spinlock_t *lock)
153{ 185{
154 preempt_disable(); 186 preempt_disable();
187 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
155 _raw_spin_lock(lock); 188 _raw_spin_lock(lock);
156} 189}
157 190
@@ -160,6 +193,7 @@ EXPORT_SYMBOL(_spin_lock);
160void __lockfunc _write_lock(rwlock_t *lock) 193void __lockfunc _write_lock(rwlock_t *lock)
161{ 194{
162 preempt_disable(); 195 preempt_disable();
196 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
163 _raw_write_lock(lock); 197 _raw_write_lock(lock);
164} 198}
165 199
@@ -255,8 +289,22 @@ BUILD_LOCK_OPS(write, rwlock);
255 289
256#endif /* CONFIG_PREEMPT */ 290#endif /* CONFIG_PREEMPT */
257 291
292#ifdef CONFIG_DEBUG_LOCK_ALLOC
293
294void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
295{
296 preempt_disable();
297 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
298 _raw_spin_lock(lock);
299}
300
301EXPORT_SYMBOL(_spin_lock_nested);
302
303#endif
304
258void __lockfunc _spin_unlock(spinlock_t *lock) 305void __lockfunc _spin_unlock(spinlock_t *lock)
259{ 306{
307 spin_release(&lock->dep_map, 1, _RET_IP_);
260 _raw_spin_unlock(lock); 308 _raw_spin_unlock(lock);
261 preempt_enable(); 309 preempt_enable();
262} 310}
@@ -264,6 +312,7 @@ EXPORT_SYMBOL(_spin_unlock);
264 312
265void __lockfunc _write_unlock(rwlock_t *lock) 313void __lockfunc _write_unlock(rwlock_t *lock)
266{ 314{
315 rwlock_release(&lock->dep_map, 1, _RET_IP_);
267 _raw_write_unlock(lock); 316 _raw_write_unlock(lock);
268 preempt_enable(); 317 preempt_enable();
269} 318}
@@ -271,6 +320,7 @@ EXPORT_SYMBOL(_write_unlock);
271 320
272void __lockfunc _read_unlock(rwlock_t *lock) 321void __lockfunc _read_unlock(rwlock_t *lock)
273{ 322{
323 rwlock_release(&lock->dep_map, 1, _RET_IP_);
274 _raw_read_unlock(lock); 324 _raw_read_unlock(lock);
275 preempt_enable(); 325 preempt_enable();
276} 326}
@@ -278,6 +328,7 @@ EXPORT_SYMBOL(_read_unlock);
278 328
279void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) 329void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
280{ 330{
331 spin_release(&lock->dep_map, 1, _RET_IP_);
281 _raw_spin_unlock(lock); 332 _raw_spin_unlock(lock);
282 local_irq_restore(flags); 333 local_irq_restore(flags);
283 preempt_enable(); 334 preempt_enable();
@@ -286,6 +337,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore);
286 337
287void __lockfunc _spin_unlock_irq(spinlock_t *lock) 338void __lockfunc _spin_unlock_irq(spinlock_t *lock)
288{ 339{
340 spin_release(&lock->dep_map, 1, _RET_IP_);
289 _raw_spin_unlock(lock); 341 _raw_spin_unlock(lock);
290 local_irq_enable(); 342 local_irq_enable();
291 preempt_enable(); 343 preempt_enable();
@@ -294,14 +346,16 @@ EXPORT_SYMBOL(_spin_unlock_irq);
294 346
295void __lockfunc _spin_unlock_bh(spinlock_t *lock) 347void __lockfunc _spin_unlock_bh(spinlock_t *lock)
296{ 348{
349 spin_release(&lock->dep_map, 1, _RET_IP_);
297 _raw_spin_unlock(lock); 350 _raw_spin_unlock(lock);
298 preempt_enable_no_resched(); 351 preempt_enable_no_resched();
299 local_bh_enable(); 352 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
300} 353}
301EXPORT_SYMBOL(_spin_unlock_bh); 354EXPORT_SYMBOL(_spin_unlock_bh);
302 355
303void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 356void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
304{ 357{
358 rwlock_release(&lock->dep_map, 1, _RET_IP_);
305 _raw_read_unlock(lock); 359 _raw_read_unlock(lock);
306 local_irq_restore(flags); 360 local_irq_restore(flags);
307 preempt_enable(); 361 preempt_enable();
@@ -310,6 +364,7 @@ EXPORT_SYMBOL(_read_unlock_irqrestore);
310 364
311void __lockfunc _read_unlock_irq(rwlock_t *lock) 365void __lockfunc _read_unlock_irq(rwlock_t *lock)
312{ 366{
367 rwlock_release(&lock->dep_map, 1, _RET_IP_);
313 _raw_read_unlock(lock); 368 _raw_read_unlock(lock);
314 local_irq_enable(); 369 local_irq_enable();
315 preempt_enable(); 370 preempt_enable();
@@ -318,14 +373,16 @@ EXPORT_SYMBOL(_read_unlock_irq);
318 373
319void __lockfunc _read_unlock_bh(rwlock_t *lock) 374void __lockfunc _read_unlock_bh(rwlock_t *lock)
320{ 375{
376 rwlock_release(&lock->dep_map, 1, _RET_IP_);
321 _raw_read_unlock(lock); 377 _raw_read_unlock(lock);
322 preempt_enable_no_resched(); 378 preempt_enable_no_resched();
323 local_bh_enable(); 379 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
324} 380}
325EXPORT_SYMBOL(_read_unlock_bh); 381EXPORT_SYMBOL(_read_unlock_bh);
326 382
327void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 383void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
328{ 384{
385 rwlock_release(&lock->dep_map, 1, _RET_IP_);
329 _raw_write_unlock(lock); 386 _raw_write_unlock(lock);
330 local_irq_restore(flags); 387 local_irq_restore(flags);
331 preempt_enable(); 388 preempt_enable();
@@ -334,6 +391,7 @@ EXPORT_SYMBOL(_write_unlock_irqrestore);
334 391
335void __lockfunc _write_unlock_irq(rwlock_t *lock) 392void __lockfunc _write_unlock_irq(rwlock_t *lock)
336{ 393{
394 rwlock_release(&lock->dep_map, 1, _RET_IP_);
337 _raw_write_unlock(lock); 395 _raw_write_unlock(lock);
338 local_irq_enable(); 396 local_irq_enable();
339 preempt_enable(); 397 preempt_enable();
@@ -342,9 +400,10 @@ EXPORT_SYMBOL(_write_unlock_irq);
342 400
343void __lockfunc _write_unlock_bh(rwlock_t *lock) 401void __lockfunc _write_unlock_bh(rwlock_t *lock)
344{ 402{
403 rwlock_release(&lock->dep_map, 1, _RET_IP_);
345 _raw_write_unlock(lock); 404 _raw_write_unlock(lock);
346 preempt_enable_no_resched(); 405 preempt_enable_no_resched();
347 local_bh_enable(); 406 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
348} 407}
349EXPORT_SYMBOL(_write_unlock_bh); 408EXPORT_SYMBOL(_write_unlock_bh);
350 409
@@ -352,11 +411,13 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
352{ 411{
353 local_bh_disable(); 412 local_bh_disable();
354 preempt_disable(); 413 preempt_disable();
355 if (_raw_spin_trylock(lock)) 414 if (_raw_spin_trylock(lock)) {
415 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
356 return 1; 416 return 1;
417 }
357 418
358 preempt_enable_no_resched(); 419 preempt_enable_no_resched();
359 local_bh_enable(); 420 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
360 return 0; 421 return 0;
361} 422}
362EXPORT_SYMBOL(_spin_trylock_bh); 423EXPORT_SYMBOL(_spin_trylock_bh);
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
new file mode 100644
index 000000000000..b71816e47a30
--- /dev/null
+++ b/kernel/stacktrace.c
@@ -0,0 +1,24 @@
1/*
2 * kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/kallsyms.h>
10#include <linux/stacktrace.h>
11
12void print_stack_trace(struct stack_trace *trace, int spaces)
13{
14 int i, j;
15
16 for (i = 0; i < trace->nr_entries; i++) {
17 unsigned long ip = trace->entries[i];
18
19 for (j = 0; j < spaces + 1; j++)
20 printk(" ");
21 print_ip_sym(ip);
22 }
23}
24
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2c0aacc37c55..dcfb5d731466 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -4,7 +4,6 @@
4#include <linux/cpu.h> 4#include <linux/cpu.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/syscalls.h> 6#include <linux/syscalls.h>
7#include <linux/kthread.h>
8#include <asm/atomic.h> 7#include <asm/atomic.h>
9#include <asm/semaphore.h> 8#include <asm/semaphore.h>
10#include <asm/uaccess.h> 9#include <asm/uaccess.h>
@@ -26,11 +25,13 @@ static unsigned int stopmachine_num_threads;
26static atomic_t stopmachine_thread_ack; 25static atomic_t stopmachine_thread_ack;
27static DECLARE_MUTEX(stopmachine_mutex); 26static DECLARE_MUTEX(stopmachine_mutex);
28 27
29static int stopmachine(void *unused) 28static int stopmachine(void *cpu)
30{ 29{
31 int irqs_disabled = 0; 30 int irqs_disabled = 0;
32 int prepared = 0; 31 int prepared = 0;
33 32
33 set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
34
34 /* Ack: we are alive */ 35 /* Ack: we are alive */
35 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ 36 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
36 atomic_inc(&stopmachine_thread_ack); 37 atomic_inc(&stopmachine_thread_ack);
@@ -84,8 +85,7 @@ static void stopmachine_set_state(enum stopmachine_state state)
84 85
85static int stop_machine(void) 86static int stop_machine(void)
86{ 87{
87 int ret = 0; 88 int i, ret = 0;
88 unsigned int i;
89 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 89 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
90 90
91 /* One high-prio thread per cpu. We'll do this one. */ 91 /* One high-prio thread per cpu. We'll do this one. */
@@ -96,16 +96,11 @@ static int stop_machine(void)
96 stopmachine_state = STOPMACHINE_WAIT; 96 stopmachine_state = STOPMACHINE_WAIT;
97 97
98 for_each_online_cpu(i) { 98 for_each_online_cpu(i) {
99 struct task_struct *tsk;
100 if (i == raw_smp_processor_id()) 99 if (i == raw_smp_processor_id())
101 continue; 100 continue;
102 tsk = kthread_create(stopmachine, NULL, "stopmachine"); 101 ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
103 if (IS_ERR(tsk)) { 102 if (ret < 0)
104 ret = PTR_ERR(tsk);
105 break; 103 break;
106 }
107 kthread_bind(tsk, i);
108 wake_up_process(tsk);
109 stopmachine_num_threads++; 104 stopmachine_num_threads++;
110 } 105 }
111 106
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 99a58f279077..362a0cc37138 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -932,6 +932,17 @@ static ctl_table vm_table[] = {
932 .strategy = &sysctl_intvec, 932 .strategy = &sysctl_intvec,
933 .extra1 = &zero, 933 .extra1 = &zero,
934 }, 934 },
935 {
936 .ctl_name = VM_MIN_UNMAPPED,
937 .procname = "min_unmapped_ratio",
938 .data = &sysctl_min_unmapped_ratio,
939 .maxlen = sizeof(sysctl_min_unmapped_ratio),
940 .mode = 0644,
941 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
942 .strategy = &sysctl_intvec,
943 .extra1 = &zero,
944 .extra2 = &one_hundred,
945 },
935#endif 946#endif
936#ifdef CONFIG_X86_32 947#ifdef CONFIG_X86_32
937 { 948 {
diff --git a/kernel/timer.c b/kernel/timer.c
index 5a8960253063..396a3c024c2c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1208,7 +1208,7 @@ unsigned long wall_jiffies = INITIAL_JIFFIES;
1208 * playing with xtime and avenrun. 1208 * playing with xtime and avenrun.
1209 */ 1209 */
1210#ifndef ARCH_HAVE_XTIME_LOCK 1210#ifndef ARCH_HAVE_XTIME_LOCK
1211seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; 1211__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
1212 1212
1213EXPORT_SYMBOL(xtime_lock); 1213EXPORT_SYMBOL(xtime_lock);
1214#endif 1214#endif
@@ -1368,7 +1368,7 @@ asmlinkage long sys_getegid(void)
1368 1368
1369static void process_timeout(unsigned long __data) 1369static void process_timeout(unsigned long __data)
1370{ 1370{
1371 wake_up_process((task_t *)__data); 1371 wake_up_process((struct task_struct *)__data);
1372} 1372}
1373 1373
1374/** 1374/**
@@ -1559,6 +1559,13 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1559 return 0; 1559 return 0;
1560} 1560}
1561 1561
1562/*
1563 * lockdep: we want to track each per-CPU base as a separate lock-class,
1564 * but timer-bases are kmalloc()-ed, so we need to attach separate
1565 * keys to them:
1566 */
1567static struct lock_class_key base_lock_keys[NR_CPUS];
1568
1562static int __devinit init_timers_cpu(int cpu) 1569static int __devinit init_timers_cpu(int cpu)
1563{ 1570{
1564 int j; 1571 int j;
@@ -1594,6 +1601,8 @@ static int __devinit init_timers_cpu(int cpu)
1594 } 1601 }
1595 1602
1596 spin_lock_init(&base->lock); 1603 spin_lock_init(&base->lock);
1604 lockdep_set_class(&base->lock, base_lock_keys + cpu);
1605
1597 for (j = 0; j < TVN_SIZE; j++) { 1606 for (j = 0; j < TVN_SIZE; j++) {
1598 INIT_LIST_HEAD(base->tv5.vec + j); 1607 INIT_LIST_HEAD(base->tv5.vec + j);
1599 INIT_LIST_HEAD(base->tv4.vec + j); 1608 INIT_LIST_HEAD(base->tv4.vec + j);
diff --git a/kernel/wait.c b/kernel/wait.c
index 5985d866531f..a1d57aeb7f75 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,6 +10,10 @@
10#include <linux/wait.h> 10#include <linux/wait.h>
11#include <linux/hash.h> 11#include <linux/hash.h>
12 12
13struct lock_class_key waitqueue_lock_key;
14
15EXPORT_SYMBOL(waitqueue_lock_key);
16
13void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 17void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
14{ 18{
15 unsigned long flags; 19 unsigned long flags;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 59f0b42bd89e..eebb1d839235 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,7 +51,7 @@ struct cpu_workqueue_struct {
51 wait_queue_head_t work_done; 51 wait_queue_head_t work_done;
52 52
53 struct workqueue_struct *wq; 53 struct workqueue_struct *wq;
54 task_t *thread; 54 struct task_struct *thread;
55 55
56 int run_depth; /* Detect run_workqueue() recursion depth */ 56 int run_depth; /* Detect run_workqueue() recursion depth */
57} ____cacheline_aligned; 57} ____cacheline_aligned;
@@ -114,6 +114,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
114 put_cpu(); 114 put_cpu();
115 return ret; 115 return ret;
116} 116}
117EXPORT_SYMBOL_GPL(queue_work);
117 118
118static void delayed_work_timer_fn(unsigned long __data) 119static void delayed_work_timer_fn(unsigned long __data)
119{ 120{
@@ -147,6 +148,29 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
147 } 148 }
148 return ret; 149 return ret;
149} 150}
151EXPORT_SYMBOL_GPL(queue_delayed_work);
152
153int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
154 struct work_struct *work, unsigned long delay)
155{
156 int ret = 0;
157 struct timer_list *timer = &work->timer;
158
159 if (!test_and_set_bit(0, &work->pending)) {
160 BUG_ON(timer_pending(timer));
161 BUG_ON(!list_empty(&work->entry));
162
163 /* This stores wq for the moment, for the timer_fn */
164 work->wq_data = wq;
165 timer->expires = jiffies + delay;
166 timer->data = (unsigned long)work;
167 timer->function = delayed_work_timer_fn;
168 add_timer_on(timer, cpu);
169 ret = 1;
170 }
171 return ret;
172}
173EXPORT_SYMBOL_GPL(queue_delayed_work_on);
150 174
151static void run_workqueue(struct cpu_workqueue_struct *cwq) 175static void run_workqueue(struct cpu_workqueue_struct *cwq)
152{ 176{
@@ -281,6 +305,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
281 unlock_cpu_hotplug(); 305 unlock_cpu_hotplug();
282 } 306 }
283} 307}
308EXPORT_SYMBOL_GPL(flush_workqueue);
284 309
285static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, 310static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
286 int cpu) 311 int cpu)
@@ -358,6 +383,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
358 } 383 }
359 return wq; 384 return wq;
360} 385}
386EXPORT_SYMBOL_GPL(__create_workqueue);
361 387
362static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu) 388static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
363{ 389{
@@ -395,6 +421,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
395 free_percpu(wq->cpu_wq); 421 free_percpu(wq->cpu_wq);
396 kfree(wq); 422 kfree(wq);
397} 423}
424EXPORT_SYMBOL_GPL(destroy_workqueue);
398 425
399static struct workqueue_struct *keventd_wq; 426static struct workqueue_struct *keventd_wq;
400 427
@@ -402,31 +429,20 @@ int fastcall schedule_work(struct work_struct *work)
402{ 429{
403 return queue_work(keventd_wq, work); 430 return queue_work(keventd_wq, work);
404} 431}
432EXPORT_SYMBOL(schedule_work);
405 433
406int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay) 434int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
407{ 435{
408 return queue_delayed_work(keventd_wq, work, delay); 436 return queue_delayed_work(keventd_wq, work, delay);
409} 437}
438EXPORT_SYMBOL(schedule_delayed_work);
410 439
411int schedule_delayed_work_on(int cpu, 440int schedule_delayed_work_on(int cpu,
412 struct work_struct *work, unsigned long delay) 441 struct work_struct *work, unsigned long delay)
413{ 442{
414 int ret = 0; 443 return queue_delayed_work_on(cpu, keventd_wq, work, delay);
415 struct timer_list *timer = &work->timer;
416
417 if (!test_and_set_bit(0, &work->pending)) {
418 BUG_ON(timer_pending(timer));
419 BUG_ON(!list_empty(&work->entry));
420 /* This stores keventd_wq for the moment, for the timer_fn */
421 work->wq_data = keventd_wq;
422 timer->expires = jiffies + delay;
423 timer->data = (unsigned long)work;
424 timer->function = delayed_work_timer_fn;
425 add_timer_on(timer, cpu);
426 ret = 1;
427 }
428 return ret;
429} 444}
445EXPORT_SYMBOL(schedule_delayed_work_on);
430 446
431/** 447/**
432 * schedule_on_each_cpu - call a function on each online CPU from keventd 448 * schedule_on_each_cpu - call a function on each online CPU from keventd
@@ -463,6 +479,7 @@ void flush_scheduled_work(void)
463{ 479{
464 flush_workqueue(keventd_wq); 480 flush_workqueue(keventd_wq);
465} 481}
482EXPORT_SYMBOL(flush_scheduled_work);
466 483
467/** 484/**
468 * cancel_rearming_delayed_workqueue - reliably kill off a delayed 485 * cancel_rearming_delayed_workqueue - reliably kill off a delayed
@@ -619,13 +636,3 @@ void init_workqueues(void)
619 BUG_ON(!keventd_wq); 636 BUG_ON(!keventd_wq);
620} 637}
621 638
622EXPORT_SYMBOL_GPL(__create_workqueue);
623EXPORT_SYMBOL_GPL(queue_work);
624EXPORT_SYMBOL_GPL(queue_delayed_work);
625EXPORT_SYMBOL_GPL(flush_workqueue);
626EXPORT_SYMBOL_GPL(destroy_workqueue);
627
628EXPORT_SYMBOL(schedule_work);
629EXPORT_SYMBOL(schedule_delayed_work);
630EXPORT_SYMBOL(schedule_delayed_work_on);
631EXPORT_SYMBOL(flush_scheduled_work);