aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/acct.c1
-rw-r--r--kernel/audit.h1
-rw-r--r--kernel/auditfilter.c209
-rw-r--r--kernel/auditsc.c65
-rw-r--r--kernel/capability.c8
-rw-r--r--kernel/configs.c1
-rw-r--r--kernel/cpuset.c3
-rw-r--r--kernel/exec_domain.c1
-rw-r--r--kernel/exit.c41
-rw-r--r--kernel/fork.c52
-rw-r--r--kernel/futex.c36
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/irq/chip.c20
-rw-r--r--kernel/irq/handle.c40
-rw-r--r--kernel/irq/manage.c55
-rw-r--r--kernel/irq/spurious.c4
-rw-r--r--kernel/kmod.c3
-rw-r--r--kernel/ksysfs.c1
-rw-r--r--kernel/lockdep.c2702
-rw-r--r--kernel/lockdep_internals.h78
-rw-r--r--kernel/lockdep_proc.c345
-rw-r--r--kernel/module.c27
-rw-r--r--kernel/mutex-debug.c399
-rw-r--r--kernel/mutex-debug.h94
-rw-r--r--kernel/mutex.c74
-rw-r--r--kernel/mutex.h19
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/params.c1
-rw-r--r--kernel/pid.c6
-rw-r--r--kernel/power/Kconfig12
-rw-r--r--kernel/printk.c24
-rw-r--r--kernel/profile.c1
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--kernel/resource.c1
-rw-r--r--kernel/rtmutex-debug.c307
-rw-r--r--kernel/rtmutex-debug.h8
-rw-r--r--kernel/rtmutex-tester.c4
-rw-r--r--kernel/rtmutex.c57
-rw-r--r--kernel/rtmutex.h3
-rw-r--r--kernel/rwsem.c147
-rw-r--r--kernel/sched.c773
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/softirq.c141
-rw-r--r--kernel/spinlock.c80
-rw-r--r--kernel/stacktrace.c24
-rw-r--r--kernel/stop_machine.c17
-rw-r--r--kernel/sys.c1
-rw-r--r--kernel/sysctl.c15
-rw-r--r--kernel/timer.c13
-rw-r--r--kernel/wait.c5
-rw-r--r--kernel/workqueue.c2
53 files changed, 4526 insertions, 1428 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 82fb182f6f61..47dbcd570cd8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,10 +8,15 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o 11 hrtimer.o rwsem.o
12 12
13obj-$(CONFIG_STACKTRACE) += stacktrace.o
13obj-y += time/ 14obj-y += time/
14obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 15obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
16obj-$(CONFIG_LOCKDEP) += lockdep.o
17ifeq ($(CONFIG_PROC_FS),y)
18obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
19endif
15obj-$(CONFIG_FUTEX) += futex.o 20obj-$(CONFIG_FUTEX) += futex.o
16ifeq ($(CONFIG_COMPAT),y) 21ifeq ($(CONFIG_COMPAT),y)
17obj-$(CONFIG_FUTEX) += futex_compat.o 22obj-$(CONFIG_FUTEX) += futex_compat.o
@@ -22,6 +27,7 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
22obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 27obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
23obj-$(CONFIG_SMP) += cpu.o spinlock.o 28obj-$(CONFIG_SMP) += cpu.o spinlock.o
24obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 29obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
30obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
25obj-$(CONFIG_UID16) += uid16.o 31obj-$(CONFIG_UID16) += uid16.o
26obj-$(CONFIG_MODULES) += module.o 32obj-$(CONFIG_MODULES) += module.o
27obj-$(CONFIG_KALLSYMS) += kallsyms.o 33obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/acct.c b/kernel/acct.c
index 126ca43d5d2b..f18e0b8df3e1 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -43,7 +43,6 @@
43 * a struct file opened for write. Fixed. 2/6/2000, AV. 43 * a struct file opened for write. Fixed. 2/6/2000, AV.
44 */ 44 */
45 45
46#include <linux/config.h>
47#include <linux/mm.h> 46#include <linux/mm.h>
48#include <linux/slab.h> 47#include <linux/slab.h>
49#include <linux/acct.h> 48#include <linux/acct.h>
diff --git a/kernel/audit.h b/kernel/audit.h
index 8323e4132a33..6aa33b848cf2 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -81,6 +81,7 @@ struct audit_krule {
81 u32 mask[AUDIT_BITMASK_SIZE]; 81 u32 mask[AUDIT_BITMASK_SIZE];
82 u32 buflen; /* for data alloc on list rules */ 82 u32 buflen; /* for data alloc on list rules */
83 u32 field_count; 83 u32 field_count;
84 char *filterkey; /* ties events to rules */
84 struct audit_field *fields; 85 struct audit_field *fields;
85 struct audit_field *inode_f; /* quick access to an inode field */ 86 struct audit_field *inode_f; /* quick access to an inode field */
86 struct audit_watch *watch; /* associated watch */ 87 struct audit_watch *watch; /* associated watch */
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 4c99d2c586ed..5b4e16276ca0 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -141,6 +141,7 @@ static inline void audit_free_rule(struct audit_entry *e)
141 selinux_audit_rule_free(f->se_rule); 141 selinux_audit_rule_free(f->se_rule);
142 } 142 }
143 kfree(e->rule.fields); 143 kfree(e->rule.fields);
144 kfree(e->rule.filterkey);
144 kfree(e); 145 kfree(e);
145} 146}
146 147
@@ -278,6 +279,29 @@ static int audit_to_watch(struct audit_krule *krule, char *path, int len,
278 return 0; 279 return 0;
279} 280}
280 281
282static __u32 *classes[AUDIT_SYSCALL_CLASSES];
283
284int __init audit_register_class(int class, unsigned *list)
285{
286 __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL);
287 if (!p)
288 return -ENOMEM;
289 while (*list != ~0U) {
290 unsigned n = *list++;
291 if (n >= AUDIT_BITMASK_SIZE * 32 - AUDIT_SYSCALL_CLASSES) {
292 kfree(p);
293 return -EINVAL;
294 }
295 p[AUDIT_WORD(n)] |= AUDIT_BIT(n);
296 }
297 if (class >= AUDIT_SYSCALL_CLASSES || classes[class]) {
298 kfree(p);
299 return -EINVAL;
300 }
301 classes[class] = p;
302 return 0;
303}
304
281/* Common user-space to kernel rule translation. */ 305/* Common user-space to kernel rule translation. */
282static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) 306static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
283{ 307{
@@ -321,6 +345,22 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
321 for (i = 0; i < AUDIT_BITMASK_SIZE; i++) 345 for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
322 entry->rule.mask[i] = rule->mask[i]; 346 entry->rule.mask[i] = rule->mask[i];
323 347
348 for (i = 0; i < AUDIT_SYSCALL_CLASSES; i++) {
349 int bit = AUDIT_BITMASK_SIZE * 32 - i - 1;
350 __u32 *p = &entry->rule.mask[AUDIT_WORD(bit)];
351 __u32 *class;
352
353 if (!(*p & AUDIT_BIT(bit)))
354 continue;
355 *p &= ~AUDIT_BIT(bit);
356 class = classes[i];
357 if (class) {
358 int j;
359 for (j = 0; j < AUDIT_BITMASK_SIZE; j++)
360 entry->rule.mask[j] |= class[j];
361 }
362 }
363
324 return entry; 364 return entry;
325 365
326exit_err: 366exit_err:
@@ -469,11 +509,16 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
469 case AUDIT_ARG2: 509 case AUDIT_ARG2:
470 case AUDIT_ARG3: 510 case AUDIT_ARG3:
471 break; 511 break;
472 case AUDIT_SE_USER: 512 case AUDIT_SUBJ_USER:
473 case AUDIT_SE_ROLE: 513 case AUDIT_SUBJ_ROLE:
474 case AUDIT_SE_TYPE: 514 case AUDIT_SUBJ_TYPE:
475 case AUDIT_SE_SEN: 515 case AUDIT_SUBJ_SEN:
476 case AUDIT_SE_CLR: 516 case AUDIT_SUBJ_CLR:
517 case AUDIT_OBJ_USER:
518 case AUDIT_OBJ_ROLE:
519 case AUDIT_OBJ_TYPE:
520 case AUDIT_OBJ_LEV_LOW:
521 case AUDIT_OBJ_LEV_HIGH:
477 str = audit_unpack_string(&bufp, &remain, f->val); 522 str = audit_unpack_string(&bufp, &remain, f->val);
478 if (IS_ERR(str)) 523 if (IS_ERR(str))
479 goto exit_free; 524 goto exit_free;
@@ -511,6 +556,16 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
511 if (err) 556 if (err)
512 goto exit_free; 557 goto exit_free;
513 break; 558 break;
559 case AUDIT_FILTERKEY:
560 err = -EINVAL;
561 if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
562 goto exit_free;
563 str = audit_unpack_string(&bufp, &remain, f->val);
564 if (IS_ERR(str))
565 goto exit_free;
566 entry->rule.buflen += f->val;
567 entry->rule.filterkey = str;
568 break;
514 default: 569 default:
515 goto exit_free; 570 goto exit_free;
516 } 571 }
@@ -600,11 +655,16 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
600 data->fields[i] = f->type; 655 data->fields[i] = f->type;
601 data->fieldflags[i] = f->op; 656 data->fieldflags[i] = f->op;
602 switch(f->type) { 657 switch(f->type) {
603 case AUDIT_SE_USER: 658 case AUDIT_SUBJ_USER:
604 case AUDIT_SE_ROLE: 659 case AUDIT_SUBJ_ROLE:
605 case AUDIT_SE_TYPE: 660 case AUDIT_SUBJ_TYPE:
606 case AUDIT_SE_SEN: 661 case AUDIT_SUBJ_SEN:
607 case AUDIT_SE_CLR: 662 case AUDIT_SUBJ_CLR:
663 case AUDIT_OBJ_USER:
664 case AUDIT_OBJ_ROLE:
665 case AUDIT_OBJ_TYPE:
666 case AUDIT_OBJ_LEV_LOW:
667 case AUDIT_OBJ_LEV_HIGH:
608 data->buflen += data->values[i] = 668 data->buflen += data->values[i] =
609 audit_pack_string(&bufp, f->se_str); 669 audit_pack_string(&bufp, f->se_str);
610 break; 670 break;
@@ -612,6 +672,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
612 data->buflen += data->values[i] = 672 data->buflen += data->values[i] =
613 audit_pack_string(&bufp, krule->watch->path); 673 audit_pack_string(&bufp, krule->watch->path);
614 break; 674 break;
675 case AUDIT_FILTERKEY:
676 data->buflen += data->values[i] =
677 audit_pack_string(&bufp, krule->filterkey);
678 break;
615 default: 679 default:
616 data->values[i] = f->val; 680 data->values[i] = f->val;
617 } 681 }
@@ -639,11 +703,16 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
639 return 1; 703 return 1;
640 704
641 switch(a->fields[i].type) { 705 switch(a->fields[i].type) {
642 case AUDIT_SE_USER: 706 case AUDIT_SUBJ_USER:
643 case AUDIT_SE_ROLE: 707 case AUDIT_SUBJ_ROLE:
644 case AUDIT_SE_TYPE: 708 case AUDIT_SUBJ_TYPE:
645 case AUDIT_SE_SEN: 709 case AUDIT_SUBJ_SEN:
646 case AUDIT_SE_CLR: 710 case AUDIT_SUBJ_CLR:
711 case AUDIT_OBJ_USER:
712 case AUDIT_OBJ_ROLE:
713 case AUDIT_OBJ_TYPE:
714 case AUDIT_OBJ_LEV_LOW:
715 case AUDIT_OBJ_LEV_HIGH:
647 if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) 716 if (strcmp(a->fields[i].se_str, b->fields[i].se_str))
648 return 1; 717 return 1;
649 break; 718 break;
@@ -651,6 +720,11 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
651 if (strcmp(a->watch->path, b->watch->path)) 720 if (strcmp(a->watch->path, b->watch->path))
652 return 1; 721 return 1;
653 break; 722 break;
723 case AUDIT_FILTERKEY:
724 /* both filterkeys exist based on above type compare */
725 if (strcmp(a->filterkey, b->filterkey))
726 return 1;
727 break;
654 default: 728 default:
655 if (a->fields[i].val != b->fields[i].val) 729 if (a->fields[i].val != b->fields[i].val)
656 return 1; 730 return 1;
@@ -730,6 +804,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
730 u32 fcount = old->field_count; 804 u32 fcount = old->field_count;
731 struct audit_entry *entry; 805 struct audit_entry *entry;
732 struct audit_krule *new; 806 struct audit_krule *new;
807 char *fk;
733 int i, err = 0; 808 int i, err = 0;
734 809
735 entry = audit_init_entry(fcount); 810 entry = audit_init_entry(fcount);
@@ -753,13 +828,25 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
753 * the originals will all be freed when the old rule is freed. */ 828 * the originals will all be freed when the old rule is freed. */
754 for (i = 0; i < fcount; i++) { 829 for (i = 0; i < fcount; i++) {
755 switch (new->fields[i].type) { 830 switch (new->fields[i].type) {
756 case AUDIT_SE_USER: 831 case AUDIT_SUBJ_USER:
757 case AUDIT_SE_ROLE: 832 case AUDIT_SUBJ_ROLE:
758 case AUDIT_SE_TYPE: 833 case AUDIT_SUBJ_TYPE:
759 case AUDIT_SE_SEN: 834 case AUDIT_SUBJ_SEN:
760 case AUDIT_SE_CLR: 835 case AUDIT_SUBJ_CLR:
836 case AUDIT_OBJ_USER:
837 case AUDIT_OBJ_ROLE:
838 case AUDIT_OBJ_TYPE:
839 case AUDIT_OBJ_LEV_LOW:
840 case AUDIT_OBJ_LEV_HIGH:
761 err = audit_dupe_selinux_field(&new->fields[i], 841 err = audit_dupe_selinux_field(&new->fields[i],
762 &old->fields[i]); 842 &old->fields[i]);
843 break;
844 case AUDIT_FILTERKEY:
845 fk = kstrdup(old->filterkey, GFP_KERNEL);
846 if (unlikely(!fk))
847 err = -ENOMEM;
848 else
849 new->filterkey = fk;
763 } 850 }
764 if (err) { 851 if (err) {
765 audit_free_rule(entry); 852 audit_free_rule(entry);
@@ -1245,6 +1332,34 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
1245 skb_queue_tail(q, skb); 1332 skb_queue_tail(q, skb);
1246} 1333}
1247 1334
1335/* Log rule additions and removals */
1336static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
1337 struct audit_krule *rule, int res)
1338{
1339 struct audit_buffer *ab;
1340
1341 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
1342 if (!ab)
1343 return;
1344 audit_log_format(ab, "auid=%u", loginuid);
1345 if (sid) {
1346 char *ctx = NULL;
1347 u32 len;
1348 if (selinux_ctxid_to_string(sid, &ctx, &len))
1349 audit_log_format(ab, " ssid=%u", sid);
1350 else
1351 audit_log_format(ab, " subj=%s", ctx);
1352 kfree(ctx);
1353 }
1354 audit_log_format(ab, " %s rule key=", action);
1355 if (rule->filterkey)
1356 audit_log_untrustedstring(ab, rule->filterkey);
1357 else
1358 audit_log_format(ab, "(null)");
1359 audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
1360 audit_log_end(ab);
1361}
1362
1248/** 1363/**
1249 * audit_receive_filter - apply all rules to the specified message type 1364 * audit_receive_filter - apply all rules to the specified message type
1250 * @type: audit message type 1365 * @type: audit message type
@@ -1304,24 +1419,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
1304 1419
1305 err = audit_add_rule(entry, 1420 err = audit_add_rule(entry,
1306 &audit_filter_list[entry->rule.listnr]); 1421 &audit_filter_list[entry->rule.listnr]);
1307 1422 audit_log_rule_change(loginuid, sid, "add", &entry->rule, !err);
1308 if (sid) {
1309 char *ctx = NULL;
1310 u32 len;
1311 if (selinux_ctxid_to_string(sid, &ctx, &len)) {
1312 /* Maybe call audit_panic? */
1313 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
1314 "auid=%u ssid=%u add rule to list=%d res=%d",
1315 loginuid, sid, entry->rule.listnr, !err);
1316 } else
1317 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
1318 "auid=%u subj=%s add rule to list=%d res=%d",
1319 loginuid, ctx, entry->rule.listnr, !err);
1320 kfree(ctx);
1321 } else
1322 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
1323 "auid=%u add rule to list=%d res=%d",
1324 loginuid, entry->rule.listnr, !err);
1325 1423
1326 if (err) 1424 if (err)
1327 audit_free_rule(entry); 1425 audit_free_rule(entry);
@@ -1337,24 +1435,8 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
1337 1435
1338 err = audit_del_rule(entry, 1436 err = audit_del_rule(entry,
1339 &audit_filter_list[entry->rule.listnr]); 1437 &audit_filter_list[entry->rule.listnr]);
1340 1438 audit_log_rule_change(loginuid, sid, "remove", &entry->rule,
1341 if (sid) { 1439 !err);
1342 char *ctx = NULL;
1343 u32 len;
1344 if (selinux_ctxid_to_string(sid, &ctx, &len)) {
1345 /* Maybe call audit_panic? */
1346 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
1347 "auid=%u ssid=%u remove rule from list=%d res=%d",
1348 loginuid, sid, entry->rule.listnr, !err);
1349 } else
1350 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
1351 "auid=%u subj=%s remove rule from list=%d res=%d",
1352 loginuid, ctx, entry->rule.listnr, !err);
1353 kfree(ctx);
1354 } else
1355 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
1356 "auid=%u remove rule from list=%d res=%d",
1357 loginuid, entry->rule.listnr, !err);
1358 1440
1359 audit_free_rule(entry); 1441 audit_free_rule(entry);
1360 break; 1442 break;
@@ -1514,11 +1596,16 @@ static inline int audit_rule_has_selinux(struct audit_krule *rule)
1514 for (i = 0; i < rule->field_count; i++) { 1596 for (i = 0; i < rule->field_count; i++) {
1515 struct audit_field *f = &rule->fields[i]; 1597 struct audit_field *f = &rule->fields[i];
1516 switch (f->type) { 1598 switch (f->type) {
1517 case AUDIT_SE_USER: 1599 case AUDIT_SUBJ_USER:
1518 case AUDIT_SE_ROLE: 1600 case AUDIT_SUBJ_ROLE:
1519 case AUDIT_SE_TYPE: 1601 case AUDIT_SUBJ_TYPE:
1520 case AUDIT_SE_SEN: 1602 case AUDIT_SUBJ_SEN:
1521 case AUDIT_SE_CLR: 1603 case AUDIT_SUBJ_CLR:
1604 case AUDIT_OBJ_USER:
1605 case AUDIT_OBJ_ROLE:
1606 case AUDIT_OBJ_TYPE:
1607 case AUDIT_OBJ_LEV_LOW:
1608 case AUDIT_OBJ_LEV_HIGH:
1522 return 1; 1609 return 1;
1523 } 1610 }
1524 } 1611 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index dc5e3f01efe7..ae40ac8c39e7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -186,6 +186,7 @@ struct audit_context {
186 int auditable; /* 1 if record should be written */ 186 int auditable; /* 1 if record should be written */
187 int name_count; 187 int name_count;
188 struct audit_names names[AUDIT_NAMES]; 188 struct audit_names names[AUDIT_NAMES];
189 char * filterkey; /* key for rule that triggered record */
189 struct dentry * pwd; 190 struct dentry * pwd;
190 struct vfsmount * pwdmnt; 191 struct vfsmount * pwdmnt;
191 struct audit_context *previous; /* For nested syscalls */ 192 struct audit_context *previous; /* For nested syscalls */
@@ -320,11 +321,11 @@ static int audit_filter_rules(struct task_struct *tsk,
320 if (ctx) 321 if (ctx)
321 result = audit_comparator(ctx->loginuid, f->op, f->val); 322 result = audit_comparator(ctx->loginuid, f->op, f->val);
322 break; 323 break;
323 case AUDIT_SE_USER: 324 case AUDIT_SUBJ_USER:
324 case AUDIT_SE_ROLE: 325 case AUDIT_SUBJ_ROLE:
325 case AUDIT_SE_TYPE: 326 case AUDIT_SUBJ_TYPE:
326 case AUDIT_SE_SEN: 327 case AUDIT_SUBJ_SEN:
327 case AUDIT_SE_CLR: 328 case AUDIT_SUBJ_CLR:
328 /* NOTE: this may return negative values indicating 329 /* NOTE: this may return negative values indicating
329 a temporary error. We simply treat this as a 330 a temporary error. We simply treat this as a
330 match for now to avoid losing information that 331 match for now to avoid losing information that
@@ -341,6 +342,46 @@ static int audit_filter_rules(struct task_struct *tsk,
341 ctx); 342 ctx);
342 } 343 }
343 break; 344 break;
345 case AUDIT_OBJ_USER:
346 case AUDIT_OBJ_ROLE:
347 case AUDIT_OBJ_TYPE:
348 case AUDIT_OBJ_LEV_LOW:
349 case AUDIT_OBJ_LEV_HIGH:
350 /* The above note for AUDIT_SUBJ_USER...AUDIT_SUBJ_CLR
351 also applies here */
352 if (f->se_rule) {
353 /* Find files that match */
354 if (name) {
355 result = selinux_audit_rule_match(
356 name->osid, f->type, f->op,
357 f->se_rule, ctx);
358 } else if (ctx) {
359 for (j = 0; j < ctx->name_count; j++) {
360 if (selinux_audit_rule_match(
361 ctx->names[j].osid,
362 f->type, f->op,
363 f->se_rule, ctx)) {
364 ++result;
365 break;
366 }
367 }
368 }
369 /* Find ipc objects that match */
370 if (ctx) {
371 struct audit_aux_data *aux;
372 for (aux = ctx->aux; aux;
373 aux = aux->next) {
374 if (aux->type == AUDIT_IPC) {
375 struct audit_aux_data_ipcctl *axi = (void *)aux;
376 if (selinux_audit_rule_match(axi->osid, f->type, f->op, f->se_rule, ctx)) {
377 ++result;
378 break;
379 }
380 }
381 }
382 }
383 }
384 break;
344 case AUDIT_ARG0: 385 case AUDIT_ARG0:
345 case AUDIT_ARG1: 386 case AUDIT_ARG1:
346 case AUDIT_ARG2: 387 case AUDIT_ARG2:
@@ -348,11 +389,17 @@ static int audit_filter_rules(struct task_struct *tsk,
348 if (ctx) 389 if (ctx)
349 result = audit_comparator(ctx->argv[f->type-AUDIT_ARG0], f->op, f->val); 390 result = audit_comparator(ctx->argv[f->type-AUDIT_ARG0], f->op, f->val);
350 break; 391 break;
392 case AUDIT_FILTERKEY:
393 /* ignore this field for filtering */
394 result = 1;
395 break;
351 } 396 }
352 397
353 if (!result) 398 if (!result)
354 return 0; 399 return 0;
355 } 400 }
401 if (rule->filterkey)
402 ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
356 switch (rule->action) { 403 switch (rule->action) {
357 case AUDIT_NEVER: *state = AUDIT_DISABLED; break; 404 case AUDIT_NEVER: *state = AUDIT_DISABLED; break;
358 case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; 405 case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break;
@@ -627,6 +674,7 @@ static inline void audit_free_context(struct audit_context *context)
627 } 674 }
628 audit_free_names(context); 675 audit_free_names(context);
629 audit_free_aux(context); 676 audit_free_aux(context);
677 kfree(context->filterkey);
630 kfree(context); 678 kfree(context);
631 context = previous; 679 context = previous;
632 } while (context); 680 } while (context);
@@ -735,6 +783,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
735 context->euid, context->suid, context->fsuid, 783 context->euid, context->suid, context->fsuid,
736 context->egid, context->sgid, context->fsgid, tty); 784 context->egid, context->sgid, context->fsgid, tty);
737 audit_log_task_info(ab, tsk); 785 audit_log_task_info(ab, tsk);
786 if (context->filterkey) {
787 audit_log_format(ab, " key=");
788 audit_log_untrustedstring(ab, context->filterkey);
789 } else
790 audit_log_format(ab, " key=(null)");
738 audit_log_end(ab); 791 audit_log_end(ab);
739 792
740 for (aux = context->aux; aux; aux = aux->next) { 793 for (aux = context->aux; aux; aux = aux->next) {
@@ -1060,6 +1113,8 @@ void audit_syscall_exit(int valid, long return_code)
1060 } else { 1113 } else {
1061 audit_free_names(context); 1114 audit_free_names(context);
1062 audit_free_aux(context); 1115 audit_free_aux(context);
1116 kfree(context->filterkey);
1117 context->filterkey = NULL;
1063 tsk->audit_context = context; 1118 tsk->audit_context = context;
1064 } 1119 }
1065} 1120}
diff --git a/kernel/capability.c b/kernel/capability.c
index 1a4d8a40d3f9..c7685ad00a97 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -46,7 +46,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
46 int ret = 0; 46 int ret = 0;
47 pid_t pid; 47 pid_t pid;
48 __u32 version; 48 __u32 version;
49 task_t *target; 49 struct task_struct *target;
50 struct __user_cap_data_struct data; 50 struct __user_cap_data_struct data;
51 51
52 if (get_user(version, &header->version)) 52 if (get_user(version, &header->version))
@@ -96,7 +96,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
96 kernel_cap_t *inheritable, 96 kernel_cap_t *inheritable,
97 kernel_cap_t *permitted) 97 kernel_cap_t *permitted)
98{ 98{
99 task_t *g, *target; 99 struct task_struct *g, *target;
100 int ret = -EPERM; 100 int ret = -EPERM;
101 int found = 0; 101 int found = 0;
102 102
@@ -128,7 +128,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
128 kernel_cap_t *inheritable, 128 kernel_cap_t *inheritable,
129 kernel_cap_t *permitted) 129 kernel_cap_t *permitted)
130{ 130{
131 task_t *g, *target; 131 struct task_struct *g, *target;
132 int ret = -EPERM; 132 int ret = -EPERM;
133 int found = 0; 133 int found = 0;
134 134
@@ -172,7 +172,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
172{ 172{
173 kernel_cap_t inheritable, permitted, effective; 173 kernel_cap_t inheritable, permitted, effective;
174 __u32 version; 174 __u32 version;
175 task_t *target; 175 struct task_struct *target;
176 int ret; 176 int ret;
177 pid_t pid; 177 pid_t pid;
178 178
diff --git a/kernel/configs.c b/kernel/configs.c
index 009e1ebdcb88..f9e31974f4ad 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -23,7 +23,6 @@
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 */ 24 */
25 25
26#include <linux/config.h>
27#include <linux/kernel.h> 26#include <linux/kernel.h>
28#include <linux/module.h> 27#include <linux/module.h>
29#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1535af3a912d..c232dc077438 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -18,7 +18,6 @@
18 * distribution for more details. 18 * distribution for more details.
19 */ 19 */
20 20
21#include <linux/config.h>
22#include <linux/cpu.h> 21#include <linux/cpu.h>
23#include <linux/cpumask.h> 22#include <linux/cpumask.h>
24#include <linux/cpuset.h> 23#include <linux/cpuset.h>
@@ -1064,7 +1063,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
1064} 1063}
1065 1064
1066/* 1065/*
1067 * Frequency meter - How fast is some event occuring? 1066 * Frequency meter - How fast is some event occurring?
1068 * 1067 *
1069 * These routines manage a digitally filtered, constant time based, 1068 * These routines manage a digitally filtered, constant time based,
1070 * event frequency meter. There are four routines: 1069 * event frequency meter. There are four routines:
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c01cead2cfd6..3c2eaea66b1e 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -7,7 +7,6 @@
7 * 2001-05-06 Complete rewrite, Christoph Hellwig (hch@infradead.org) 7 * 2001-05-06 Complete rewrite, Christoph Hellwig (hch@infradead.org)
8 */ 8 */
9 9
10#include <linux/config.h>
11#include <linux/init.h> 10#include <linux/init.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/kmod.h> 12#include <linux/kmod.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index ab06b9f88f64..6664c084783d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -4,7 +4,6 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/config.h>
8#include <linux/mm.h> 7#include <linux/mm.h>
9#include <linux/slab.h> 8#include <linux/slab.h>
10#include <linux/interrupt.h> 9#include <linux/interrupt.h>
@@ -135,8 +134,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
135 134
136void release_task(struct task_struct * p) 135void release_task(struct task_struct * p)
137{ 136{
137 struct task_struct *leader;
138 int zap_leader; 138 int zap_leader;
139 task_t *leader;
140repeat: 139repeat:
141 atomic_dec(&p->user->processes); 140 atomic_dec(&p->user->processes);
142 write_lock_irq(&tasklist_lock); 141 write_lock_irq(&tasklist_lock);
@@ -210,7 +209,7 @@ out:
210 * 209 *
211 * "I ask you, have you ever known what it is to be an orphan?" 210 * "I ask you, have you ever known what it is to be an orphan?"
212 */ 211 */
213static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task) 212static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
214{ 213{
215 struct task_struct *p; 214 struct task_struct *p;
216 int ret = 1; 215 int ret = 1;
@@ -583,7 +582,8 @@ static void exit_mm(struct task_struct * tsk)
583 mmput(mm); 582 mmput(mm);
584} 583}
585 584
586static inline void choose_new_parent(task_t *p, task_t *reaper) 585static inline void
586choose_new_parent(struct task_struct *p, struct task_struct *reaper)
587{ 587{
588 /* 588 /*
589 * Make sure we're not reparenting to ourselves and that 589 * Make sure we're not reparenting to ourselves and that
@@ -593,7 +593,8 @@ static inline void choose_new_parent(task_t *p, task_t *reaper)
593 p->real_parent = reaper; 593 p->real_parent = reaper;
594} 594}
595 595
596static void reparent_thread(task_t *p, task_t *father, int traced) 596static void
597reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
597{ 598{
598 /* We don't want people slaying init. */ 599 /* We don't want people slaying init. */
599 if (p->exit_signal != -1) 600 if (p->exit_signal != -1)
@@ -657,8 +658,8 @@ static void reparent_thread(task_t *p, task_t *father, int traced)
657 * group, and if no such member exists, give it to 658 * group, and if no such member exists, give it to
658 * the global child reaper process (ie "init") 659 * the global child reaper process (ie "init")
659 */ 660 */
660static void forget_original_parent(struct task_struct * father, 661static void
661 struct list_head *to_release) 662forget_original_parent(struct task_struct *father, struct list_head *to_release)
662{ 663{
663 struct task_struct *p, *reaper = father; 664 struct task_struct *p, *reaper = father;
664 struct list_head *_p, *_n; 665 struct list_head *_p, *_n;
@@ -681,7 +682,7 @@ static void forget_original_parent(struct task_struct * father,
681 */ 682 */
682 list_for_each_safe(_p, _n, &father->children) { 683 list_for_each_safe(_p, _n, &father->children) {
683 int ptrace; 684 int ptrace;
684 p = list_entry(_p,struct task_struct,sibling); 685 p = list_entry(_p, struct task_struct, sibling);
685 686
686 ptrace = p->ptrace; 687 ptrace = p->ptrace;
687 688
@@ -710,7 +711,7 @@ static void forget_original_parent(struct task_struct * father,
710 list_add(&p->ptrace_list, to_release); 711 list_add(&p->ptrace_list, to_release);
711 } 712 }
712 list_for_each_safe(_p, _n, &father->ptrace_children) { 713 list_for_each_safe(_p, _n, &father->ptrace_children) {
713 p = list_entry(_p,struct task_struct,ptrace_list); 714 p = list_entry(_p, struct task_struct, ptrace_list);
714 choose_new_parent(p, reaper); 715 choose_new_parent(p, reaper);
715 reparent_thread(p, father, 1); 716 reparent_thread(p, father, 1);
716 } 717 }
@@ -830,7 +831,7 @@ static void exit_notify(struct task_struct *tsk)
830 831
831 list_for_each_safe(_p, _n, &ptrace_dead) { 832 list_for_each_safe(_p, _n, &ptrace_dead) {
832 list_del_init(_p); 833 list_del_init(_p);
833 t = list_entry(_p,struct task_struct,ptrace_list); 834 t = list_entry(_p, struct task_struct, ptrace_list);
834 release_task(t); 835 release_task(t);
835 } 836 }
836 837
@@ -934,10 +935,9 @@ fastcall NORET_TYPE void do_exit(long code)
934 if (unlikely(current->pi_state_cache)) 935 if (unlikely(current->pi_state_cache))
935 kfree(current->pi_state_cache); 936 kfree(current->pi_state_cache);
936 /* 937 /*
937 * If DEBUG_MUTEXES is on, make sure we are holding no locks: 938 * Make sure we are holding no locks:
938 */ 939 */
939 mutex_debug_check_no_locks_held(tsk); 940 debug_check_no_locks_held(tsk);
940 rt_mutex_debug_check_no_locks_held(tsk);
941 941
942 if (tsk->io_context) 942 if (tsk->io_context)
943 exit_io_context(); 943 exit_io_context();
@@ -1012,7 +1012,7 @@ asmlinkage void sys_exit_group(int error_code)
1012 do_group_exit((error_code & 0xff) << 8); 1012 do_group_exit((error_code & 0xff) << 8);
1013} 1013}
1014 1014
1015static int eligible_child(pid_t pid, int options, task_t *p) 1015static int eligible_child(pid_t pid, int options, struct task_struct *p)
1016{ 1016{
1017 if (pid > 0) { 1017 if (pid > 0) {
1018 if (p->pid != pid) 1018 if (p->pid != pid)
@@ -1053,12 +1053,13 @@ static int eligible_child(pid_t pid, int options, task_t *p)
1053 return 1; 1053 return 1;
1054} 1054}
1055 1055
1056static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid, 1056static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1057 int why, int status, 1057 int why, int status,
1058 struct siginfo __user *infop, 1058 struct siginfo __user *infop,
1059 struct rusage __user *rusagep) 1059 struct rusage __user *rusagep)
1060{ 1060{
1061 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; 1061 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
1062
1062 put_task_struct(p); 1063 put_task_struct(p);
1063 if (!retval) 1064 if (!retval)
1064 retval = put_user(SIGCHLD, &infop->si_signo); 1065 retval = put_user(SIGCHLD, &infop->si_signo);
@@ -1083,7 +1084,7 @@ static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
1083 * the lock and this task is uninteresting. If we return nonzero, we have 1084 * the lock and this task is uninteresting. If we return nonzero, we have
1084 * released the lock and the system call should return. 1085 * released the lock and the system call should return.
1085 */ 1086 */
1086static int wait_task_zombie(task_t *p, int noreap, 1087static int wait_task_zombie(struct task_struct *p, int noreap,
1087 struct siginfo __user *infop, 1088 struct siginfo __user *infop,
1088 int __user *stat_addr, struct rusage __user *ru) 1089 int __user *stat_addr, struct rusage __user *ru)
1089{ 1090{
@@ -1245,8 +1246,8 @@ static int wait_task_zombie(task_t *p, int noreap,
1245 * the lock and this task is uninteresting. If we return nonzero, we have 1246 * the lock and this task is uninteresting. If we return nonzero, we have
1246 * released the lock and the system call should return. 1247 * released the lock and the system call should return.
1247 */ 1248 */
1248static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap, 1249static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
1249 struct siginfo __user *infop, 1250 int noreap, struct siginfo __user *infop,
1250 int __user *stat_addr, struct rusage __user *ru) 1251 int __user *stat_addr, struct rusage __user *ru)
1251{ 1252{
1252 int retval, exit_code; 1253 int retval, exit_code;
@@ -1360,7 +1361,7 @@ bail_ref:
1360 * the lock and this task is uninteresting. If we return nonzero, we have 1361 * the lock and this task is uninteresting. If we return nonzero, we have
1361 * released the lock and the system call should return. 1362 * released the lock and the system call should return.
1362 */ 1363 */
1363static int wait_task_continued(task_t *p, int noreap, 1364static int wait_task_continued(struct task_struct *p, int noreap,
1364 struct siginfo __user *infop, 1365 struct siginfo __user *infop,
1365 int __user *stat_addr, struct rusage __user *ru) 1366 int __user *stat_addr, struct rusage __user *ru)
1366{ 1367{
@@ -1446,7 +1447,7 @@ repeat:
1446 int ret; 1447 int ret;
1447 1448
1448 list_for_each(_p,&tsk->children) { 1449 list_for_each(_p,&tsk->children) {
1449 p = list_entry(_p,struct task_struct,sibling); 1450 p = list_entry(_p, struct task_struct, sibling);
1450 1451
1451 ret = eligible_child(pid, options, p); 1452 ret = eligible_child(pid, options, p);
1452 if (!ret) 1453 if (!ret)
diff --git a/kernel/fork.c b/kernel/fork.c
index 628198a4f28a..56e4e07e45f7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -11,7 +11,6 @@
11 * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' 11 * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
12 */ 12 */
13 13
14#include <linux/config.h>
15#include <linux/slab.h> 14#include <linux/slab.h>
16#include <linux/init.h> 15#include <linux/init.h>
17#include <linux/unistd.h> 16#include <linux/unistd.h>
@@ -194,7 +193,10 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
194 193
195 down_write(&oldmm->mmap_sem); 194 down_write(&oldmm->mmap_sem);
196 flush_cache_mm(oldmm); 195 flush_cache_mm(oldmm);
197 down_write(&mm->mmap_sem); 196 /*
197 * Not linked in yet - no deadlock potential:
198 */
199 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
198 200
199 mm->locked_vm = 0; 201 mm->locked_vm = 0;
200 mm->mmap = NULL; 202 mm->mmap = NULL;
@@ -920,10 +922,6 @@ static inline void rt_mutex_init_task(struct task_struct *p)
920 spin_lock_init(&p->pi_lock); 922 spin_lock_init(&p->pi_lock);
921 plist_head_init(&p->pi_waiters, &p->pi_lock); 923 plist_head_init(&p->pi_waiters, &p->pi_lock);
922 p->pi_blocked_on = NULL; 924 p->pi_blocked_on = NULL;
923# ifdef CONFIG_DEBUG_RT_MUTEXES
924 spin_lock_init(&p->held_list_lock);
925 INIT_LIST_HEAD(&p->held_list_head);
926# endif
927#endif 925#endif
928} 926}
929 927
@@ -935,13 +933,13 @@ static inline void rt_mutex_init_task(struct task_struct *p)
935 * parts of the process environment (as per the clone 933 * parts of the process environment (as per the clone
936 * flags). The actual kick-off is left to the caller. 934 * flags). The actual kick-off is left to the caller.
937 */ 935 */
938static task_t *copy_process(unsigned long clone_flags, 936static struct task_struct *copy_process(unsigned long clone_flags,
939 unsigned long stack_start, 937 unsigned long stack_start,
940 struct pt_regs *regs, 938 struct pt_regs *regs,
941 unsigned long stack_size, 939 unsigned long stack_size,
942 int __user *parent_tidptr, 940 int __user *parent_tidptr,
943 int __user *child_tidptr, 941 int __user *child_tidptr,
944 int pid) 942 int pid)
945{ 943{
946 int retval; 944 int retval;
947 struct task_struct *p = NULL; 945 struct task_struct *p = NULL;
@@ -973,6 +971,10 @@ static task_t *copy_process(unsigned long clone_flags,
973 if (!p) 971 if (!p)
974 goto fork_out; 972 goto fork_out;
975 973
974#ifdef CONFIG_TRACE_IRQFLAGS
975 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
976 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
977#endif
976 retval = -EAGAIN; 978 retval = -EAGAIN;
977 if (atomic_read(&p->user->processes) >= 979 if (atomic_read(&p->user->processes) >=
978 p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 980 p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
@@ -1047,6 +1049,26 @@ static task_t *copy_process(unsigned long clone_flags,
1047 } 1049 }
1048 mpol_fix_fork_child_flag(p); 1050 mpol_fix_fork_child_flag(p);
1049#endif 1051#endif
1052#ifdef CONFIG_TRACE_IRQFLAGS
1053 p->irq_events = 0;
1054 p->hardirqs_enabled = 0;
1055 p->hardirq_enable_ip = 0;
1056 p->hardirq_enable_event = 0;
1057 p->hardirq_disable_ip = _THIS_IP_;
1058 p->hardirq_disable_event = 0;
1059 p->softirqs_enabled = 1;
1060 p->softirq_enable_ip = _THIS_IP_;
1061 p->softirq_enable_event = 0;
1062 p->softirq_disable_ip = 0;
1063 p->softirq_disable_event = 0;
1064 p->hardirq_context = 0;
1065 p->softirq_context = 0;
1066#endif
1067#ifdef CONFIG_LOCKDEP
1068 p->lockdep_depth = 0; /* no locks held yet */
1069 p->curr_chain_key = 0;
1070 p->lockdep_recursion = 0;
1071#endif
1050 1072
1051 rt_mutex_init_task(p); 1073 rt_mutex_init_task(p);
1052 1074
@@ -1272,9 +1294,9 @@ struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1272 return regs; 1294 return regs;
1273} 1295}
1274 1296
1275task_t * __devinit fork_idle(int cpu) 1297struct task_struct * __devinit fork_idle(int cpu)
1276{ 1298{
1277 task_t *task; 1299 struct task_struct *task;
1278 struct pt_regs regs; 1300 struct pt_regs regs;
1279 1301
1280 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0); 1302 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
diff --git a/kernel/futex.c b/kernel/futex.c
index 6c91f938005d..1dc98e4dd287 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -607,6 +607,22 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
607} 607}
608 608
609/* 609/*
610 * Express the locking dependencies for lockdep:
611 */
612static inline void
613double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
614{
615 if (hb1 <= hb2) {
616 spin_lock(&hb1->lock);
617 if (hb1 < hb2)
618 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
619 } else { /* hb1 > hb2 */
620 spin_lock(&hb2->lock);
621 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
622 }
623}
624
625/*
610 * Wake up all waiters hashed on the physical page that is mapped 626 * Wake up all waiters hashed on the physical page that is mapped
611 * to this virtual address: 627 * to this virtual address:
612 */ 628 */
@@ -630,8 +646,10 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
630 646
631 list_for_each_entry_safe(this, next, head, list) { 647 list_for_each_entry_safe(this, next, head, list) {
632 if (match_futex (&this->key, &key)) { 648 if (match_futex (&this->key, &key)) {
633 if (this->pi_state) 649 if (this->pi_state) {
634 return -EINVAL; 650 ret = -EINVAL;
651 break;
652 }
635 wake_futex(this); 653 wake_futex(this);
636 if (++ret >= nr_wake) 654 if (++ret >= nr_wake)
637 break; 655 break;
@@ -672,11 +690,7 @@ retryfull:
672 hb2 = hash_futex(&key2); 690 hb2 = hash_futex(&key2);
673 691
674retry: 692retry:
675 if (hb1 < hb2) 693 double_lock_hb(hb1, hb2);
676 spin_lock(&hb1->lock);
677 spin_lock(&hb2->lock);
678 if (hb1 > hb2)
679 spin_lock(&hb1->lock);
680 694
681 op_ret = futex_atomic_op_inuser(op, uaddr2); 695 op_ret = futex_atomic_op_inuser(op, uaddr2);
682 if (unlikely(op_ret < 0)) { 696 if (unlikely(op_ret < 0)) {
@@ -785,11 +799,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
785 hb1 = hash_futex(&key1); 799 hb1 = hash_futex(&key1);
786 hb2 = hash_futex(&key2); 800 hb2 = hash_futex(&key2);
787 801
788 if (hb1 < hb2) 802 double_lock_hb(hb1, hb2);
789 spin_lock(&hb1->lock);
790 spin_lock(&hb2->lock);
791 if (hb1 > hb2)
792 spin_lock(&hb1->lock);
793 803
794 if (likely(cmpval != NULL)) { 804 if (likely(cmpval != NULL)) {
795 u32 curval; 805 u32 curval;
@@ -1208,7 +1218,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1208 } 1218 }
1209 1219
1210 down_read(&curr->mm->mmap_sem); 1220 down_read(&curr->mm->mmap_sem);
1211 hb = queue_lock(&q, -1, NULL); 1221 spin_lock(q.lock_ptr);
1212 1222
1213 /* 1223 /*
1214 * Got the lock. We might not be the anticipated owner if we 1224 * Got the lock. We might not be the anticipated owner if we
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 8d3dc29ef41a..d17766d40dab 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -669,7 +669,7 @@ static int hrtimer_wakeup(struct hrtimer *timer)
669 return HRTIMER_NORESTART; 669 return HRTIMER_NORESTART;
670} 670}
671 671
672void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task) 672void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
673{ 673{
674 sl->timer.function = hrtimer_wakeup; 674 sl->timer.function = hrtimer_wakeup;
675 sl->task = task; 675 sl->task = task;
@@ -782,8 +782,10 @@ static void __devinit init_hrtimers_cpu(int cpu)
782 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); 782 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
783 int i; 783 int i;
784 784
785 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) 785 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
786 spin_lock_init(&base->lock); 786 spin_lock_init(&base->lock);
787 lockdep_set_class(&base->lock, &base->lock_key);
788 }
787} 789}
788 790
789#ifdef CONFIG_HOTPLUG_CPU 791#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4a0952d9458b..9336f2e89e40 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -261,10 +261,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
261 * keep it masked and get out of here 261 * keep it masked and get out of here
262 */ 262 */
263 action = desc->action; 263 action = desc->action;
264 if (unlikely(!action || (desc->status & IRQ_DISABLED))) 264 if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
265 desc->status |= IRQ_PENDING;
265 goto out; 266 goto out;
267 }
266 268
267 desc->status |= IRQ_INPROGRESS; 269 desc->status |= IRQ_INPROGRESS;
270 desc->status &= ~IRQ_PENDING;
268 spin_unlock(&desc->lock); 271 spin_unlock(&desc->lock);
269 272
270 action_ret = handle_IRQ_event(irq, regs, action); 273 action_ret = handle_IRQ_event(irq, regs, action);
@@ -462,9 +465,18 @@ __set_irq_handler(unsigned int irq,
462 if (!handle) 465 if (!handle)
463 handle = handle_bad_irq; 466 handle = handle_bad_irq;
464 467
465 if (is_chained && desc->chip == &no_irq_chip) 468 if (desc->chip == &no_irq_chip) {
466 printk(KERN_WARNING "Trying to install " 469 printk(KERN_WARNING "Trying to install %sinterrupt handler "
467 "chained interrupt type for IRQ%d\n", irq); 470 "for IRQ%d\n", is_chained ? "chained " : " ", irq);
471 /*
472 * Some ARM implementations install a handler for really dumb
473 * interrupt hardware without setting an irq_chip. This worked
474 * with the ARM no_irq_chip but the check in setup_irq would
475 * prevent us to setup the interrupt at all. Switch it to
476 * dummy_irq_chip for easy transition.
477 */
478 desc->chip = &dummy_irq_chip;
479 }
468 480
469 spin_lock_irqsave(&desc->lock, flags); 481 spin_lock_irqsave(&desc->lock, flags);
470 482
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 5a360dd4331b..fc4e906aedbd 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -92,6 +92,22 @@ struct irq_chip no_irq_chip = {
92}; 92};
93 93
94/* 94/*
95 * Generic dummy implementation which can be used for
96 * real dumb interrupt sources
97 */
98struct irq_chip dummy_irq_chip = {
99 .name = "dummy",
100 .startup = noop_ret,
101 .shutdown = noop,
102 .enable = noop,
103 .disable = noop,
104 .ack = noop,
105 .mask = noop,
106 .unmask = noop,
107 .end = noop,
108};
109
110/*
95 * Special, empty irq handler: 111 * Special, empty irq handler:
96 */ 112 */
97irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) 113irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
@@ -113,8 +129,10 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
113 irqreturn_t ret, retval = IRQ_NONE; 129 irqreturn_t ret, retval = IRQ_NONE;
114 unsigned int status = 0; 130 unsigned int status = 0;
115 131
116 if (!(action->flags & SA_INTERRUPT)) 132 handle_dynamic_tick(action);
117 local_irq_enable(); 133
134 if (!(action->flags & IRQF_DISABLED))
135 local_irq_enable_in_hardirq();
118 136
119 do { 137 do {
120 ret = action->handler(irq, action->dev_id, regs); 138 ret = action->handler(irq, action->dev_id, regs);
@@ -124,7 +142,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
124 action = action->next; 142 action = action->next;
125 } while (action); 143 } while (action);
126 144
127 if (status & SA_SAMPLE_RANDOM) 145 if (status & IRQF_SAMPLE_RANDOM)
128 add_interrupt_randomness(irq); 146 add_interrupt_randomness(irq);
129 local_irq_disable(); 147 local_irq_disable();
130 148
@@ -231,3 +249,19 @@ out:
231 return 1; 249 return 1;
232} 250}
233 251
252#ifdef CONFIG_TRACE_IRQFLAGS
253
254/*
255 * lockdep: we want to handle all irq_desc locks as a single lock-class:
256 */
257static struct lock_class_key irq_desc_lock_class;
258
259void early_init_irq_lock_class(void)
260{
261 int i;
262
263 for (i = 0; i < NR_IRQS; i++)
264 lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
265}
266
267#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 9eb1d518ee1c..4e461438e48b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -7,7 +7,6 @@
7 * This file contains driver APIs to the irq subsystem. 7 * This file contains driver APIs to the irq subsystem.
8 */ 8 */
9 9
10#include <linux/config.h>
11#include <linux/irq.h> 10#include <linux/irq.h>
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/random.h> 12#include <linux/random.h>
@@ -115,7 +114,7 @@ void enable_irq(unsigned int irq)
115 spin_lock_irqsave(&desc->lock, flags); 114 spin_lock_irqsave(&desc->lock, flags);
116 switch (desc->depth) { 115 switch (desc->depth) {
117 case 0: 116 case 0:
118 printk(KERN_WARNING "Unablanced enable_irq(%d)\n", irq); 117 printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
119 WARN_ON(1); 118 WARN_ON(1);
120 break; 119 break;
121 case 1: { 120 case 1: {
@@ -168,7 +167,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
168 167
169 action = irq_desc[irq].action; 168 action = irq_desc[irq].action;
170 if (action) 169 if (action)
171 if (irqflags & action->flags & SA_SHIRQ) 170 if (irqflags & action->flags & IRQF_SHARED)
172 action = NULL; 171 action = NULL;
173 172
174 return !action; 173 return !action;
@@ -206,7 +205,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
206 * so we have to be careful not to interfere with a 205 * so we have to be careful not to interfere with a
207 * running system. 206 * running system.
208 */ 207 */
209 if (new->flags & SA_SAMPLE_RANDOM) { 208 if (new->flags & IRQF_SAMPLE_RANDOM) {
210 /* 209 /*
211 * This function might sleep, we want to call it first, 210 * This function might sleep, we want to call it first,
212 * outside of the atomic block. 211 * outside of the atomic block.
@@ -228,16 +227,17 @@ int setup_irq(unsigned int irq, struct irqaction *new)
228 /* 227 /*
229 * Can't share interrupts unless both agree to and are 228 * Can't share interrupts unless both agree to and are
230 * the same type (level, edge, polarity). So both flag 229 * the same type (level, edge, polarity). So both flag
231 * fields must have SA_SHIRQ set and the bits which 230 * fields must have IRQF_SHARED set and the bits which
232 * set the trigger type must match. 231 * set the trigger type must match.
233 */ 232 */
234 if (!((old->flags & new->flags) & SA_SHIRQ) || 233 if (!((old->flags & new->flags) & IRQF_SHARED) ||
235 ((old->flags ^ new->flags) & SA_TRIGGER_MASK)) 234 ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK))
236 goto mismatch; 235 goto mismatch;
237 236
238#if defined(CONFIG_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ) 237#if defined(CONFIG_IRQ_PER_CPU)
239 /* All handlers must agree on per-cpuness */ 238 /* All handlers must agree on per-cpuness */
240 if ((old->flags & IRQ_PER_CPU) != (new->flags & IRQ_PER_CPU)) 239 if ((old->flags & IRQF_PERCPU) !=
240 (new->flags & IRQF_PERCPU))
241 goto mismatch; 241 goto mismatch;
242#endif 242#endif
243 243
@@ -250,26 +250,27 @@ int setup_irq(unsigned int irq, struct irqaction *new)
250 } 250 }
251 251
252 *p = new; 252 *p = new;
253#if defined(CONFIG_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ) 253#if defined(CONFIG_IRQ_PER_CPU)
254 if (new->flags & SA_PERCPU_IRQ) 254 if (new->flags & IRQF_PERCPU)
255 desc->status |= IRQ_PER_CPU; 255 desc->status |= IRQ_PER_CPU;
256#endif 256#endif
257 if (!shared) { 257 if (!shared) {
258 irq_chip_set_defaults(desc->chip); 258 irq_chip_set_defaults(desc->chip);
259 259
260 /* Setup the type (level, edge polarity) if configured: */ 260 /* Setup the type (level, edge polarity) if configured: */
261 if (new->flags & SA_TRIGGER_MASK) { 261 if (new->flags & IRQF_TRIGGER_MASK) {
262 if (desc->chip && desc->chip->set_type) 262 if (desc->chip && desc->chip->set_type)
263 desc->chip->set_type(irq, 263 desc->chip->set_type(irq,
264 new->flags & SA_TRIGGER_MASK); 264 new->flags & IRQF_TRIGGER_MASK);
265 else 265 else
266 /* 266 /*
267 * SA_TRIGGER_* but the PIC does not support 267 * IRQF_TRIGGER_* but the PIC does not support
268 * multiple flow-types? 268 * multiple flow-types?
269 */ 269 */
270 printk(KERN_WARNING "setup_irq(%d) SA_TRIGGER" 270 printk(KERN_WARNING "No IRQF_TRIGGER set_type "
271 "set. No set_type function available\n", 271 "function for IRQ %d (%s)\n", irq,
272 irq); 272 desc->chip ? desc->chip->name :
273 "unknown");
273 } else 274 } else
274 compat_irq_chip_set_default_handler(desc); 275 compat_irq_chip_set_default_handler(desc);
275 276
@@ -298,8 +299,8 @@ int setup_irq(unsigned int irq, struct irqaction *new)
298 299
299mismatch: 300mismatch:
300 spin_unlock_irqrestore(&desc->lock, flags); 301 spin_unlock_irqrestore(&desc->lock, flags);
301 if (!(new->flags & SA_PROBEIRQ)) { 302 if (!(new->flags & IRQF_PROBE_SHARED)) {
302 printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__); 303 printk(KERN_ERR "IRQ handler type mismatch for IRQ %d\n", irq);
303 dump_stack(); 304 dump_stack();
304 } 305 }
305 return -EBUSY; 306 return -EBUSY;
@@ -366,7 +367,7 @@ void free_irq(unsigned int irq, void *dev_id)
366 kfree(action); 367 kfree(action);
367 return; 368 return;
368 } 369 }
369 printk(KERN_ERR "Trying to free free IRQ%d\n", irq); 370 printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
370 spin_unlock_irqrestore(&desc->lock, flags); 371 spin_unlock_irqrestore(&desc->lock, flags);
371 return; 372 return;
372 } 373 }
@@ -397,9 +398,9 @@ EXPORT_SYMBOL(free_irq);
397 * 398 *
398 * Flags: 399 * Flags:
399 * 400 *
400 * SA_SHIRQ Interrupt is shared 401 * IRQF_SHARED Interrupt is shared
401 * SA_INTERRUPT Disable local interrupts while processing 402 * IRQF_DISABLED Disable local interrupts while processing
402 * SA_SAMPLE_RANDOM The interrupt can be used for entropy 403 * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
403 * 404 *
404 */ 405 */
405int request_irq(unsigned int irq, 406int request_irq(unsigned int irq,
@@ -409,13 +410,19 @@ int request_irq(unsigned int irq,
409 struct irqaction *action; 410 struct irqaction *action;
410 int retval; 411 int retval;
411 412
413#ifdef CONFIG_LOCKDEP
414 /*
415 * Lockdep wants atomic interrupt handlers:
416 */
417 irqflags |= SA_INTERRUPT;
418#endif
412 /* 419 /*
413 * Sanity-check: shared interrupts must pass in a real dev-ID, 420 * Sanity-check: shared interrupts must pass in a real dev-ID,
414 * otherwise we'll have trouble later trying to figure out 421 * otherwise we'll have trouble later trying to figure out
415 * which interrupt is which (messes up the interrupt freeing 422 * which interrupt is which (messes up the interrupt freeing
416 * logic etc). 423 * logic etc).
417 */ 424 */
418 if ((irqflags & SA_SHIRQ) && !dev_id) 425 if ((irqflags & IRQF_SHARED) && !dev_id)
419 return -EINVAL; 426 return -EINVAL;
420 if (irq >= NR_IRQS) 427 if (irq >= NR_IRQS)
421 return -EINVAL; 428 return -EINVAL;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index b483deed311c..417e98092cf2 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -36,7 +36,7 @@ static int misrouted_irq(int irq, struct pt_regs *regs)
36 * Already running: If it is shared get the other 36 * Already running: If it is shared get the other
37 * CPU to go looking for our mystery interrupt too 37 * CPU to go looking for our mystery interrupt too
38 */ 38 */
39 if (desc->action && (desc->action->flags & SA_SHIRQ)) 39 if (desc->action && (desc->action->flags & IRQF_SHARED))
40 desc->status |= IRQ_PENDING; 40 desc->status |= IRQ_PENDING;
41 spin_unlock(&desc->lock); 41 spin_unlock(&desc->lock);
42 continue; 42 continue;
@@ -48,7 +48,7 @@ static int misrouted_irq(int irq, struct pt_regs *regs)
48 48
49 while (action) { 49 while (action) {
50 /* Only shared IRQ handlers are safe to call */ 50 /* Only shared IRQ handlers are safe to call */
51 if (action->flags & SA_SHIRQ) { 51 if (action->flags & IRQF_SHARED) {
52 if (action->handler(i, action->dev_id, regs) == 52 if (action->handler(i, action->dev_id, regs) ==
53 IRQ_HANDLED) 53 IRQ_HANDLED)
54 ok = 1; 54 ok = 1;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 20a997c73c3d..1d32defa38ab 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -20,7 +20,6 @@
20*/ 20*/
21#define __KERNEL_SYSCALLS__ 21#define __KERNEL_SYSCALLS__
22 22
23#include <linux/config.h>
24#include <linux/module.h> 23#include <linux/module.h>
25#include <linux/sched.h> 24#include <linux/sched.h>
26#include <linux/syscalls.h> 25#include <linux/syscalls.h>
@@ -234,7 +233,7 @@ static void __call_usermodehelper(void *data)
234int call_usermodehelper_keys(char *path, char **argv, char **envp, 233int call_usermodehelper_keys(char *path, char **argv, char **envp,
235 struct key *session_keyring, int wait) 234 struct key *session_keyring, int wait)
236{ 235{
237 DECLARE_COMPLETION(done); 236 DECLARE_COMPLETION_ONSTACK(done);
238 struct subprocess_info sub_info = { 237 struct subprocess_info sub_info = {
239 .complete = &done, 238 .complete = &done,
240 .path = path, 239 .path = path,
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 9e28478a17a5..e0ffe4ab0917 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -8,7 +8,6 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/config.h>
12#include <linux/kobject.h> 11#include <linux/kobject.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/sysfs.h> 13#include <linux/sysfs.h>
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
new file mode 100644
index 000000000000..f32ca78c198d
--- /dev/null
+++ b/kernel/lockdep.c
@@ -0,0 +1,2702 @@
1/*
2 * kernel/lockdep.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * this code maps all the lock dependencies as they occur in a live kernel
11 * and will warn about the following classes of locking bugs:
12 *
13 * - lock inversion scenarios
14 * - circular lock dependencies
15 * - hardirq/softirq safe/unsafe locking bugs
16 *
17 * Bugs are reported even if the current locking scenario does not cause
18 * any deadlock at this point.
19 *
20 * I.e. if anytime in the past two locks were taken in a different order,
21 * even if it happened for another task, even if those were different
22 * locks (but of the same class as this lock), this code will detect it.
23 *
24 * Thanks to Arjan van de Ven for coming up with the initial idea of
25 * mapping lock dependencies runtime.
26 */
27#include <linux/mutex.h>
28#include <linux/sched.h>
29#include <linux/delay.h>
30#include <linux/module.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/spinlock.h>
34#include <linux/kallsyms.h>
35#include <linux/interrupt.h>
36#include <linux/stacktrace.h>
37#include <linux/debug_locks.h>
38#include <linux/irqflags.h>
39
40#include <asm/sections.h>
41
42#include "lockdep_internals.h"
43
44/*
45 * hash_lock: protects the lockdep hashes and class/list/hash allocators.
46 *
47 * This is one of the rare exceptions where it's justified
48 * to use a raw spinlock - we really dont want the spinlock
49 * code to recurse back into the lockdep code.
50 */
51static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
52
53static int lockdep_initialized;
54
55unsigned long nr_list_entries;
56static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
57
58/*
59 * Allocate a lockdep entry. (assumes hash_lock held, returns
60 * with NULL on failure)
61 */
62static struct lock_list *alloc_list_entry(void)
63{
64 if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
65 __raw_spin_unlock(&hash_lock);
66 debug_locks_off();
67 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
68 printk("turning off the locking correctness validator.\n");
69 return NULL;
70 }
71 return list_entries + nr_list_entries++;
72}
73
74/*
75 * All data structures here are protected by the global debug_lock.
76 *
77 * Mutex key structs only get allocated, once during bootup, and never
78 * get freed - this significantly simplifies the debugging code.
79 */
80unsigned long nr_lock_classes;
81static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
82
83/*
84 * We keep a global list of all lock classes. The list only grows,
85 * never shrinks. The list is only accessed with the lockdep
86 * spinlock lock held.
87 */
88LIST_HEAD(all_lock_classes);
89
90/*
91 * The lockdep classes are in a hash-table as well, for fast lookup:
92 */
93#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1)
94#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS)
95#define CLASSHASH_MASK (CLASSHASH_SIZE - 1)
96#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK)
97#define classhashentry(key) (classhash_table + __classhashfn((key)))
98
99static struct list_head classhash_table[CLASSHASH_SIZE];
100
101unsigned long nr_lock_chains;
102static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
103
104/*
105 * We put the lock dependency chains into a hash-table as well, to cache
106 * their existence:
107 */
108#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1)
109#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS)
110#define CHAINHASH_MASK (CHAINHASH_SIZE - 1)
111#define __chainhashfn(chain) \
112 (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK)
113#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
114
115static struct list_head chainhash_table[CHAINHASH_SIZE];
116
117/*
118 * The hash key of the lock dependency chains is a hash itself too:
119 * it's a hash of all locks taken up to that lock, including that lock.
120 * It's a 64-bit hash, because it's important for the keys to be
121 * unique.
122 */
123#define iterate_chain_key(key1, key2) \
124 (((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \
125 ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \
126 (key2))
127
128void lockdep_off(void)
129{
130 current->lockdep_recursion++;
131}
132
133EXPORT_SYMBOL(lockdep_off);
134
135void lockdep_on(void)
136{
137 current->lockdep_recursion--;
138}
139
140EXPORT_SYMBOL(lockdep_on);
141
142int lockdep_internal(void)
143{
144 return current->lockdep_recursion != 0;
145}
146
147EXPORT_SYMBOL(lockdep_internal);
148
149/*
150 * Debugging switches:
151 */
152
153#define VERBOSE 0
154#ifdef VERBOSE
155# define VERY_VERBOSE 0
156#endif
157
158#if VERBOSE
159# define HARDIRQ_VERBOSE 1
160# define SOFTIRQ_VERBOSE 1
161#else
162# define HARDIRQ_VERBOSE 0
163# define SOFTIRQ_VERBOSE 0
164#endif
165
166#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
167/*
168 * Quick filtering for interesting events:
169 */
170static int class_filter(struct lock_class *class)
171{
172 if (class->name_version == 1 &&
173 !strcmp(class->name, "&rl->lock"))
174 return 1;
175 if (class->name_version == 1 &&
176 !strcmp(class->name, "&ni->mrec_lock"))
177 return 1;
178 if (class->name_version == 1 &&
179 !strcmp(class->name, "mft_ni_runlist_lock"))
180 return 1;
181 if (class->name_version == 1 &&
182 !strcmp(class->name, "mft_ni_mrec_lock"))
183 return 1;
184 if (class->name_version == 1 &&
185 !strcmp(class->name, "&vol->lcnbmp_lock"))
186 return 1;
187 return 0;
188}
189#endif
190
191static int verbose(struct lock_class *class)
192{
193#if VERBOSE
194 return class_filter(class);
195#endif
196 return 0;
197}
198
199#ifdef CONFIG_TRACE_IRQFLAGS
200
201static int hardirq_verbose(struct lock_class *class)
202{
203#if HARDIRQ_VERBOSE
204 return class_filter(class);
205#endif
206 return 0;
207}
208
209static int softirq_verbose(struct lock_class *class)
210{
211#if SOFTIRQ_VERBOSE
212 return class_filter(class);
213#endif
214 return 0;
215}
216
217#endif
218
219/*
220 * Stack-trace: tightly packed array of stack backtrace
221 * addresses. Protected by the hash_lock.
222 */
223unsigned long nr_stack_trace_entries;
224static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
225
226static int save_trace(struct stack_trace *trace)
227{
228 trace->nr_entries = 0;
229 trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
230 trace->entries = stack_trace + nr_stack_trace_entries;
231
232 save_stack_trace(trace, NULL, 0, 3);
233
234 trace->max_entries = trace->nr_entries;
235
236 nr_stack_trace_entries += trace->nr_entries;
237 if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES))
238 return 0;
239
240 if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
241 __raw_spin_unlock(&hash_lock);
242 if (debug_locks_off()) {
243 printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
244 printk("turning off the locking correctness validator.\n");
245 dump_stack();
246 }
247 return 0;
248 }
249
250 return 1;
251}
252
253unsigned int nr_hardirq_chains;
254unsigned int nr_softirq_chains;
255unsigned int nr_process_chains;
256unsigned int max_lockdep_depth;
257unsigned int max_recursion_depth;
258
259#ifdef CONFIG_DEBUG_LOCKDEP
260/*
261 * We cannot printk in early bootup code. Not even early_printk()
262 * might work. So we mark any initialization errors and printk
263 * about it later on, in lockdep_info().
264 */
265static int lockdep_init_error;
266
267/*
268 * Various lockdep statistics:
269 */
270atomic_t chain_lookup_hits;
271atomic_t chain_lookup_misses;
272atomic_t hardirqs_on_events;
273atomic_t hardirqs_off_events;
274atomic_t redundant_hardirqs_on;
275atomic_t redundant_hardirqs_off;
276atomic_t softirqs_on_events;
277atomic_t softirqs_off_events;
278atomic_t redundant_softirqs_on;
279atomic_t redundant_softirqs_off;
280atomic_t nr_unused_locks;
281atomic_t nr_cyclic_checks;
282atomic_t nr_cyclic_check_recursions;
283atomic_t nr_find_usage_forwards_checks;
284atomic_t nr_find_usage_forwards_recursions;
285atomic_t nr_find_usage_backwards_checks;
286atomic_t nr_find_usage_backwards_recursions;
287# define debug_atomic_inc(ptr) atomic_inc(ptr)
288# define debug_atomic_dec(ptr) atomic_dec(ptr)
289# define debug_atomic_read(ptr) atomic_read(ptr)
290#else
291# define debug_atomic_inc(ptr) do { } while (0)
292# define debug_atomic_dec(ptr) do { } while (0)
293# define debug_atomic_read(ptr) 0
294#endif
295
296/*
297 * Locking printouts:
298 */
299
300static const char *usage_str[] =
301{
302 [LOCK_USED] = "initial-use ",
303 [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W",
304 [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W",
305 [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W",
306 [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W",
307 [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R",
308 [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R",
309 [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R",
310 [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R",
311};
312
313const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
314{
315 unsigned long offs, size;
316 char *modname;
317
318 return kallsyms_lookup((unsigned long)key, &size, &offs, &modname, str);
319}
320
321void
322get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
323{
324 *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.';
325
326 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
327 *c1 = '+';
328 else
329 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
330 *c1 = '-';
331
332 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
333 *c2 = '+';
334 else
335 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
336 *c2 = '-';
337
338 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
339 *c3 = '-';
340 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) {
341 *c3 = '+';
342 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
343 *c3 = '?';
344 }
345
346 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
347 *c4 = '-';
348 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) {
349 *c4 = '+';
350 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
351 *c4 = '?';
352 }
353}
354
355static void print_lock_name(struct lock_class *class)
356{
357 char str[128], c1, c2, c3, c4;
358 const char *name;
359
360 get_usage_chars(class, &c1, &c2, &c3, &c4);
361
362 name = class->name;
363 if (!name) {
364 name = __get_key_name(class->key, str);
365 printk(" (%s", name);
366 } else {
367 printk(" (%s", name);
368 if (class->name_version > 1)
369 printk("#%d", class->name_version);
370 if (class->subclass)
371 printk("/%d", class->subclass);
372 }
373 printk("){%c%c%c%c}", c1, c2, c3, c4);
374}
375
376static void print_lockdep_cache(struct lockdep_map *lock)
377{
378 const char *name;
379 char str[128];
380
381 name = lock->name;
382 if (!name)
383 name = __get_key_name(lock->key->subkeys, str);
384
385 printk("%s", name);
386}
387
388static void print_lock(struct held_lock *hlock)
389{
390 print_lock_name(hlock->class);
391 printk(", at: ");
392 print_ip_sym(hlock->acquire_ip);
393}
394
395static void lockdep_print_held_locks(struct task_struct *curr)
396{
397 int i, depth = curr->lockdep_depth;
398
399 if (!depth) {
400 printk("no locks held by %s/%d.\n", curr->comm, curr->pid);
401 return;
402 }
403 printk("%d lock%s held by %s/%d:\n",
404 depth, depth > 1 ? "s" : "", curr->comm, curr->pid);
405
406 for (i = 0; i < depth; i++) {
407 printk(" #%d: ", i);
408 print_lock(curr->held_locks + i);
409 }
410}
411/*
412 * Helper to print a nice hierarchy of lock dependencies:
413 */
414static void print_spaces(int nr)
415{
416 int i;
417
418 for (i = 0; i < nr; i++)
419 printk(" ");
420}
421
422static void print_lock_class_header(struct lock_class *class, int depth)
423{
424 int bit;
425
426 print_spaces(depth);
427 printk("->");
428 print_lock_name(class);
429 printk(" ops: %lu", class->ops);
430 printk(" {\n");
431
432 for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
433 if (class->usage_mask & (1 << bit)) {
434 int len = depth;
435
436 print_spaces(depth);
437 len += printk(" %s", usage_str[bit]);
438 len += printk(" at:\n");
439 print_stack_trace(class->usage_traces + bit, len);
440 }
441 }
442 print_spaces(depth);
443 printk(" }\n");
444
445 print_spaces(depth);
446 printk(" ... key at: ");
447 print_ip_sym((unsigned long)class->key);
448}
449
450/*
451 * printk all lock dependencies starting at <entry>:
452 */
453static void print_lock_dependencies(struct lock_class *class, int depth)
454{
455 struct lock_list *entry;
456
457 if (DEBUG_LOCKS_WARN_ON(depth >= 20))
458 return;
459
460 print_lock_class_header(class, depth);
461
462 list_for_each_entry(entry, &class->locks_after, entry) {
463 DEBUG_LOCKS_WARN_ON(!entry->class);
464 print_lock_dependencies(entry->class, depth + 1);
465
466 print_spaces(depth);
467 printk(" ... acquired at:\n");
468 print_stack_trace(&entry->trace, 2);
469 printk("\n");
470 }
471}
472
473/*
474 * Add a new dependency to the head of the list:
475 */
476static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
477 struct list_head *head, unsigned long ip)
478{
479 struct lock_list *entry;
480 /*
481 * Lock not present yet - get a new dependency struct and
482 * add it to the list:
483 */
484 entry = alloc_list_entry();
485 if (!entry)
486 return 0;
487
488 entry->class = this;
489 save_trace(&entry->trace);
490
491 /*
492 * Since we never remove from the dependency list, the list can
493 * be walked lockless by other CPUs, it's only allocation
494 * that must be protected by the spinlock. But this also means
495 * we must make new entries visible only once writes to the
496 * entry become visible - hence the RCU op:
497 */
498 list_add_tail_rcu(&entry->entry, head);
499
500 return 1;
501}
502
503/*
504 * Recursive, forwards-direction lock-dependency checking, used for
505 * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
506 * checking.
507 *
508 * (to keep the stackframe of the recursive functions small we
509 * use these global variables, and we also mark various helper
510 * functions as noinline.)
511 */
512static struct held_lock *check_source, *check_target;
513
514/*
515 * Print a dependency chain entry (this is only done when a deadlock
516 * has been detected):
517 */
518static noinline int
519print_circular_bug_entry(struct lock_list *target, unsigned int depth)
520{
521 if (debug_locks_silent)
522 return 0;
523 printk("\n-> #%u", depth);
524 print_lock_name(target->class);
525 printk(":\n");
526 print_stack_trace(&target->trace, 6);
527
528 return 0;
529}
530
531/*
532 * When a circular dependency is detected, print the
533 * header first:
534 */
535static noinline int
536print_circular_bug_header(struct lock_list *entry, unsigned int depth)
537{
538 struct task_struct *curr = current;
539
540 __raw_spin_unlock(&hash_lock);
541 debug_locks_off();
542 if (debug_locks_silent)
543 return 0;
544
545 printk("\n=======================================================\n");
546 printk( "[ INFO: possible circular locking dependency detected ]\n");
547 printk( "-------------------------------------------------------\n");
548 printk("%s/%d is trying to acquire lock:\n",
549 curr->comm, curr->pid);
550 print_lock(check_source);
551 printk("\nbut task is already holding lock:\n");
552 print_lock(check_target);
553 printk("\nwhich lock already depends on the new lock.\n\n");
554 printk("\nthe existing dependency chain (in reverse order) is:\n");
555
556 print_circular_bug_entry(entry, depth);
557
558 return 0;
559}
560
561static noinline int print_circular_bug_tail(void)
562{
563 struct task_struct *curr = current;
564 struct lock_list this;
565
566 if (debug_locks_silent)
567 return 0;
568
569 this.class = check_source->class;
570 save_trace(&this.trace);
571 print_circular_bug_entry(&this, 0);
572
573 printk("\nother info that might help us debug this:\n\n");
574 lockdep_print_held_locks(curr);
575
576 printk("\nstack backtrace:\n");
577 dump_stack();
578
579 return 0;
580}
581
582static int noinline print_infinite_recursion_bug(void)
583{
584 __raw_spin_unlock(&hash_lock);
585 DEBUG_LOCKS_WARN_ON(1);
586
587 return 0;
588}
589
590/*
591 * Prove that the dependency graph starting at <entry> can not
592 * lead to <target>. Print an error and return 0 if it does.
593 */
594static noinline int
595check_noncircular(struct lock_class *source, unsigned int depth)
596{
597 struct lock_list *entry;
598
599 debug_atomic_inc(&nr_cyclic_check_recursions);
600 if (depth > max_recursion_depth)
601 max_recursion_depth = depth;
602 if (depth >= 20)
603 return print_infinite_recursion_bug();
604 /*
605 * Check this lock's dependency list:
606 */
607 list_for_each_entry(entry, &source->locks_after, entry) {
608 if (entry->class == check_target->class)
609 return print_circular_bug_header(entry, depth+1);
610 debug_atomic_inc(&nr_cyclic_checks);
611 if (!check_noncircular(entry->class, depth+1))
612 return print_circular_bug_entry(entry, depth+1);
613 }
614 return 1;
615}
616
617static int very_verbose(struct lock_class *class)
618{
619#if VERY_VERBOSE
620 return class_filter(class);
621#endif
622 return 0;
623}
624#ifdef CONFIG_TRACE_IRQFLAGS
625
626/*
627 * Forwards and backwards subgraph searching, for the purposes of
628 * proving that two subgraphs can be connected by a new dependency
629 * without creating any illegal irq-safe -> irq-unsafe lock dependency.
630 */
631static enum lock_usage_bit find_usage_bit;
632static struct lock_class *forwards_match, *backwards_match;
633
634/*
635 * Find a node in the forwards-direction dependency sub-graph starting
636 * at <source> that matches <find_usage_bit>.
637 *
638 * Return 2 if such a node exists in the subgraph, and put that node
639 * into <forwards_match>.
640 *
641 * Return 1 otherwise and keep <forwards_match> unchanged.
642 * Return 0 on error.
643 */
644static noinline int
645find_usage_forwards(struct lock_class *source, unsigned int depth)
646{
647 struct lock_list *entry;
648 int ret;
649
650 if (depth > max_recursion_depth)
651 max_recursion_depth = depth;
652 if (depth >= 20)
653 return print_infinite_recursion_bug();
654
655 debug_atomic_inc(&nr_find_usage_forwards_checks);
656 if (source->usage_mask & (1 << find_usage_bit)) {
657 forwards_match = source;
658 return 2;
659 }
660
661 /*
662 * Check this lock's dependency list:
663 */
664 list_for_each_entry(entry, &source->locks_after, entry) {
665 debug_atomic_inc(&nr_find_usage_forwards_recursions);
666 ret = find_usage_forwards(entry->class, depth+1);
667 if (ret == 2 || ret == 0)
668 return ret;
669 }
670 return 1;
671}
672
673/*
674 * Find a node in the backwards-direction dependency sub-graph starting
675 * at <source> that matches <find_usage_bit>.
676 *
677 * Return 2 if such a node exists in the subgraph, and put that node
678 * into <backwards_match>.
679 *
680 * Return 1 otherwise and keep <backwards_match> unchanged.
681 * Return 0 on error.
682 */
683static noinline int
684find_usage_backwards(struct lock_class *source, unsigned int depth)
685{
686 struct lock_list *entry;
687 int ret;
688
689 if (depth > max_recursion_depth)
690 max_recursion_depth = depth;
691 if (depth >= 20)
692 return print_infinite_recursion_bug();
693
694 debug_atomic_inc(&nr_find_usage_backwards_checks);
695 if (source->usage_mask & (1 << find_usage_bit)) {
696 backwards_match = source;
697 return 2;
698 }
699
700 /*
701 * Check this lock's dependency list:
702 */
703 list_for_each_entry(entry, &source->locks_before, entry) {
704 debug_atomic_inc(&nr_find_usage_backwards_recursions);
705 ret = find_usage_backwards(entry->class, depth+1);
706 if (ret == 2 || ret == 0)
707 return ret;
708 }
709 return 1;
710}
711
712static int
713print_bad_irq_dependency(struct task_struct *curr,
714 struct held_lock *prev,
715 struct held_lock *next,
716 enum lock_usage_bit bit1,
717 enum lock_usage_bit bit2,
718 const char *irqclass)
719{
720 __raw_spin_unlock(&hash_lock);
721 debug_locks_off();
722 if (debug_locks_silent)
723 return 0;
724
725 printk("\n======================================================\n");
726 printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
727 irqclass, irqclass);
728 printk( "------------------------------------------------------\n");
729 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
730 curr->comm, curr->pid,
731 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
732 curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
733 curr->hardirqs_enabled,
734 curr->softirqs_enabled);
735 print_lock(next);
736
737 printk("\nand this task is already holding:\n");
738 print_lock(prev);
739 printk("which would create a new lock dependency:\n");
740 print_lock_name(prev->class);
741 printk(" ->");
742 print_lock_name(next->class);
743 printk("\n");
744
745 printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
746 irqclass);
747 print_lock_name(backwards_match);
748 printk("\n... which became %s-irq-safe at:\n", irqclass);
749
750 print_stack_trace(backwards_match->usage_traces + bit1, 1);
751
752 printk("\nto a %s-irq-unsafe lock:\n", irqclass);
753 print_lock_name(forwards_match);
754 printk("\n... which became %s-irq-unsafe at:\n", irqclass);
755 printk("...");
756
757 print_stack_trace(forwards_match->usage_traces + bit2, 1);
758
759 printk("\nother info that might help us debug this:\n\n");
760 lockdep_print_held_locks(curr);
761
762 printk("\nthe %s-irq-safe lock's dependencies:\n", irqclass);
763 print_lock_dependencies(backwards_match, 0);
764
765 printk("\nthe %s-irq-unsafe lock's dependencies:\n", irqclass);
766 print_lock_dependencies(forwards_match, 0);
767
768 printk("\nstack backtrace:\n");
769 dump_stack();
770
771 return 0;
772}
773
774static int
775check_usage(struct task_struct *curr, struct held_lock *prev,
776 struct held_lock *next, enum lock_usage_bit bit_backwards,
777 enum lock_usage_bit bit_forwards, const char *irqclass)
778{
779 int ret;
780
781 find_usage_bit = bit_backwards;
782 /* fills in <backwards_match> */
783 ret = find_usage_backwards(prev->class, 0);
784 if (!ret || ret == 1)
785 return ret;
786
787 find_usage_bit = bit_forwards;
788 ret = find_usage_forwards(next->class, 0);
789 if (!ret || ret == 1)
790 return ret;
791 /* ret == 2 */
792 return print_bad_irq_dependency(curr, prev, next,
793 bit_backwards, bit_forwards, irqclass);
794}
795
796#endif
797
798static int
799print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
800 struct held_lock *next)
801{
802 debug_locks_off();
803 __raw_spin_unlock(&hash_lock);
804 if (debug_locks_silent)
805 return 0;
806
807 printk("\n=============================================\n");
808 printk( "[ INFO: possible recursive locking detected ]\n");
809 printk( "---------------------------------------------\n");
810 printk("%s/%d is trying to acquire lock:\n",
811 curr->comm, curr->pid);
812 print_lock(next);
813 printk("\nbut task is already holding lock:\n");
814 print_lock(prev);
815
816 printk("\nother info that might help us debug this:\n");
817 lockdep_print_held_locks(curr);
818
819 printk("\nstack backtrace:\n");
820 dump_stack();
821
822 return 0;
823}
824
825/*
826 * Check whether we are holding such a class already.
827 *
828 * (Note that this has to be done separately, because the graph cannot
829 * detect such classes of deadlocks.)
830 *
831 * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
832 */
833static int
834check_deadlock(struct task_struct *curr, struct held_lock *next,
835 struct lockdep_map *next_instance, int read)
836{
837 struct held_lock *prev;
838 int i;
839
840 for (i = 0; i < curr->lockdep_depth; i++) {
841 prev = curr->held_locks + i;
842 if (prev->class != next->class)
843 continue;
844 /*
845 * Allow read-after-read recursion of the same
846 * lock class (i.e. read_lock(lock)+read_lock(lock)):
847 */
848 if ((read == 2) && prev->read)
849 return 2;
850 return print_deadlock_bug(curr, prev, next);
851 }
852 return 1;
853}
854
855/*
856 * There was a chain-cache miss, and we are about to add a new dependency
857 * to a previous lock. We recursively validate the following rules:
858 *
859 * - would the adding of the <prev> -> <next> dependency create a
860 * circular dependency in the graph? [== circular deadlock]
861 *
862 * - does the new prev->next dependency connect any hardirq-safe lock
863 * (in the full backwards-subgraph starting at <prev>) with any
864 * hardirq-unsafe lock (in the full forwards-subgraph starting at
865 * <next>)? [== illegal lock inversion with hardirq contexts]
866 *
867 * - does the new prev->next dependency connect any softirq-safe lock
868 * (in the full backwards-subgraph starting at <prev>) with any
869 * softirq-unsafe lock (in the full forwards-subgraph starting at
870 * <next>)? [== illegal lock inversion with softirq contexts]
871 *
872 * any of these scenarios could lead to a deadlock.
873 *
874 * Then if all the validations pass, we add the forwards and backwards
875 * dependency.
876 */
877static int
878check_prev_add(struct task_struct *curr, struct held_lock *prev,
879 struct held_lock *next)
880{
881 struct lock_list *entry;
882 int ret;
883
884 /*
885 * Prove that the new <prev> -> <next> dependency would not
886 * create a circular dependency in the graph. (We do this by
887 * forward-recursing into the graph starting at <next>, and
888 * checking whether we can reach <prev>.)
889 *
890 * We are using global variables to control the recursion, to
891 * keep the stackframe size of the recursive functions low:
892 */
893 check_source = next;
894 check_target = prev;
895 if (!(check_noncircular(next->class, 0)))
896 return print_circular_bug_tail();
897
898#ifdef CONFIG_TRACE_IRQFLAGS
899 /*
900 * Prove that the new dependency does not connect a hardirq-safe
901 * lock with a hardirq-unsafe lock - to achieve this we search
902 * the backwards-subgraph starting at <prev>, and the
903 * forwards-subgraph starting at <next>:
904 */
905 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ,
906 LOCK_ENABLED_HARDIRQS, "hard"))
907 return 0;
908
909 /*
910 * Prove that the new dependency does not connect a hardirq-safe-read
911 * lock with a hardirq-unsafe lock - to achieve this we search
912 * the backwards-subgraph starting at <prev>, and the
913 * forwards-subgraph starting at <next>:
914 */
915 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ,
916 LOCK_ENABLED_HARDIRQS, "hard-read"))
917 return 0;
918
919 /*
920 * Prove that the new dependency does not connect a softirq-safe
921 * lock with a softirq-unsafe lock - to achieve this we search
922 * the backwards-subgraph starting at <prev>, and the
923 * forwards-subgraph starting at <next>:
924 */
925 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ,
926 LOCK_ENABLED_SOFTIRQS, "soft"))
927 return 0;
928 /*
929 * Prove that the new dependency does not connect a softirq-safe-read
930 * lock with a softirq-unsafe lock - to achieve this we search
931 * the backwards-subgraph starting at <prev>, and the
932 * forwards-subgraph starting at <next>:
933 */
934 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
935 LOCK_ENABLED_SOFTIRQS, "soft"))
936 return 0;
937#endif
938 /*
939 * For recursive read-locks we do all the dependency checks,
940 * but we dont store read-triggered dependencies (only
941 * write-triggered dependencies). This ensures that only the
942 * write-side dependencies matter, and that if for example a
943 * write-lock never takes any other locks, then the reads are
944 * equivalent to a NOP.
945 */
946 if (next->read == 2 || prev->read == 2)
947 return 1;
948 /*
949 * Is the <prev> -> <next> dependency already present?
950 *
951 * (this may occur even though this is a new chain: consider
952 * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3
953 * chains - the second one will be new, but L1 already has
954 * L2 added to its dependency list, due to the first chain.)
955 */
956 list_for_each_entry(entry, &prev->class->locks_after, entry) {
957 if (entry->class == next->class)
958 return 2;
959 }
960
961 /*
962 * Ok, all validations passed, add the new lock
963 * to the previous lock's dependency list:
964 */
965 ret = add_lock_to_list(prev->class, next->class,
966 &prev->class->locks_after, next->acquire_ip);
967 if (!ret)
968 return 0;
969 /*
970 * Return value of 2 signals 'dependency already added',
971 * in that case we dont have to add the backlink either.
972 */
973 if (ret == 2)
974 return 2;
975 ret = add_lock_to_list(next->class, prev->class,
976 &next->class->locks_before, next->acquire_ip);
977
978 /*
979 * Debugging printouts:
980 */
981 if (verbose(prev->class) || verbose(next->class)) {
982 __raw_spin_unlock(&hash_lock);
983 printk("\n new dependency: ");
984 print_lock_name(prev->class);
985 printk(" => ");
986 print_lock_name(next->class);
987 printk("\n");
988 dump_stack();
989 __raw_spin_lock(&hash_lock);
990 }
991 return 1;
992}
993
994/*
995 * Add the dependency to all directly-previous locks that are 'relevant'.
996 * The ones that are relevant are (in increasing distance from curr):
997 * all consecutive trylock entries and the final non-trylock entry - or
998 * the end of this context's lock-chain - whichever comes first.
999 */
1000static int
1001check_prevs_add(struct task_struct *curr, struct held_lock *next)
1002{
1003 int depth = curr->lockdep_depth;
1004 struct held_lock *hlock;
1005
1006 /*
1007 * Debugging checks.
1008 *
1009 * Depth must not be zero for a non-head lock:
1010 */
1011 if (!depth)
1012 goto out_bug;
1013 /*
1014 * At least two relevant locks must exist for this
1015 * to be a head:
1016 */
1017 if (curr->held_locks[depth].irq_context !=
1018 curr->held_locks[depth-1].irq_context)
1019 goto out_bug;
1020
1021 for (;;) {
1022 hlock = curr->held_locks + depth-1;
1023 /*
1024 * Only non-recursive-read entries get new dependencies
1025 * added:
1026 */
1027 if (hlock->read != 2) {
1028 check_prev_add(curr, hlock, next);
1029 /*
1030 * Stop after the first non-trylock entry,
1031 * as non-trylock entries have added their
1032 * own direct dependencies already, so this
1033 * lock is connected to them indirectly:
1034 */
1035 if (!hlock->trylock)
1036 break;
1037 }
1038 depth--;
1039 /*
1040 * End of lock-stack?
1041 */
1042 if (!depth)
1043 break;
1044 /*
1045 * Stop the search if we cross into another context:
1046 */
1047 if (curr->held_locks[depth].irq_context !=
1048 curr->held_locks[depth-1].irq_context)
1049 break;
1050 }
1051 return 1;
1052out_bug:
1053 __raw_spin_unlock(&hash_lock);
1054 DEBUG_LOCKS_WARN_ON(1);
1055
1056 return 0;
1057}
1058
1059
1060/*
1061 * Is this the address of a static object:
1062 */
1063static int static_obj(void *obj)
1064{
1065 unsigned long start = (unsigned long) &_stext,
1066 end = (unsigned long) &_end,
1067 addr = (unsigned long) obj;
1068#ifdef CONFIG_SMP
1069 int i;
1070#endif
1071
1072 /*
1073 * static variable?
1074 */
1075 if ((addr >= start) && (addr < end))
1076 return 1;
1077
1078#ifdef CONFIG_SMP
1079 /*
1080 * percpu var?
1081 */
1082 for_each_possible_cpu(i) {
1083 start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
1084 end = (unsigned long) &__per_cpu_end + per_cpu_offset(i);
1085
1086 if ((addr >= start) && (addr < end))
1087 return 1;
1088 }
1089#endif
1090
1091 /*
1092 * module var?
1093 */
1094 return is_module_address(addr);
1095}
1096
1097/*
1098 * To make lock name printouts unique, we calculate a unique
1099 * class->name_version generation counter:
1100 */
1101static int count_matching_names(struct lock_class *new_class)
1102{
1103 struct lock_class *class;
1104 int count = 0;
1105
1106 if (!new_class->name)
1107 return 0;
1108
1109 list_for_each_entry(class, &all_lock_classes, lock_entry) {
1110 if (new_class->key - new_class->subclass == class->key)
1111 return class->name_version;
1112 if (class->name && !strcmp(class->name, new_class->name))
1113 count = max(count, class->name_version);
1114 }
1115
1116 return count + 1;
1117}
1118
1119extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void);
1120
1121/*
1122 * Register a lock's class in the hash-table, if the class is not present
1123 * yet. Otherwise we look it up. We cache the result in the lock object
1124 * itself, so actual lookup of the hash should be once per lock object.
1125 */
1126static inline struct lock_class *
1127register_lock_class(struct lockdep_map *lock, unsigned int subclass)
1128{
1129 struct lockdep_subclass_key *key;
1130 struct list_head *hash_head;
1131 struct lock_class *class;
1132
1133#ifdef CONFIG_DEBUG_LOCKDEP
1134 /*
1135 * If the architecture calls into lockdep before initializing
1136 * the hashes then we'll warn about it later. (we cannot printk
1137 * right now)
1138 */
1139 if (unlikely(!lockdep_initialized)) {
1140 lockdep_init();
1141 lockdep_init_error = 1;
1142 }
1143#endif
1144
1145 /*
1146 * Static locks do not have their class-keys yet - for them the key
1147 * is the lock object itself:
1148 */
1149 if (unlikely(!lock->key))
1150 lock->key = (void *)lock;
1151
1152 /*
1153 * NOTE: the class-key must be unique. For dynamic locks, a static
1154 * lock_class_key variable is passed in through the mutex_init()
1155 * (or spin_lock_init()) call - which acts as the key. For static
1156 * locks we use the lock object itself as the key.
1157 */
1158 if (sizeof(struct lock_class_key) > sizeof(struct lock_class))
1159 __error_too_big_MAX_LOCKDEP_SUBCLASSES();
1160
1161 key = lock->key->subkeys + subclass;
1162
1163 hash_head = classhashentry(key);
1164
1165 /*
1166 * We can walk the hash lockfree, because the hash only
1167 * grows, and we are careful when adding entries to the end:
1168 */
1169 list_for_each_entry(class, hash_head, hash_entry)
1170 if (class->key == key)
1171 goto out_set;
1172
1173 /*
1174 * Debug-check: all keys must be persistent!
1175 */
1176 if (!static_obj(lock->key)) {
1177 debug_locks_off();
1178 printk("INFO: trying to register non-static key.\n");
1179 printk("the code is fine but needs lockdep annotation.\n");
1180 printk("turning off the locking correctness validator.\n");
1181 dump_stack();
1182
1183 return NULL;
1184 }
1185
1186 __raw_spin_lock(&hash_lock);
1187 /*
1188 * We have to do the hash-walk again, to avoid races
1189 * with another CPU:
1190 */
1191 list_for_each_entry(class, hash_head, hash_entry)
1192 if (class->key == key)
1193 goto out_unlock_set;
1194 /*
1195 * Allocate a new key from the static array, and add it to
1196 * the hash:
1197 */
1198 if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
1199 __raw_spin_unlock(&hash_lock);
1200 debug_locks_off();
1201 printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
1202 printk("turning off the locking correctness validator.\n");
1203 return NULL;
1204 }
1205 class = lock_classes + nr_lock_classes++;
1206 debug_atomic_inc(&nr_unused_locks);
1207 class->key = key;
1208 class->name = lock->name;
1209 class->subclass = subclass;
1210 INIT_LIST_HEAD(&class->lock_entry);
1211 INIT_LIST_HEAD(&class->locks_before);
1212 INIT_LIST_HEAD(&class->locks_after);
1213 class->name_version = count_matching_names(class);
1214 /*
1215 * We use RCU's safe list-add method to make
1216 * parallel walking of the hash-list safe:
1217 */
1218 list_add_tail_rcu(&class->hash_entry, hash_head);
1219
1220 if (verbose(class)) {
1221 __raw_spin_unlock(&hash_lock);
1222 printk("\nnew class %p: %s", class->key, class->name);
1223 if (class->name_version > 1)
1224 printk("#%d", class->name_version);
1225 printk("\n");
1226 dump_stack();
1227 __raw_spin_lock(&hash_lock);
1228 }
1229out_unlock_set:
1230 __raw_spin_unlock(&hash_lock);
1231
1232out_set:
1233 lock->class[subclass] = class;
1234
1235 DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
1236
1237 return class;
1238}
1239
1240/*
1241 * Look up a dependency chain. If the key is not present yet then
1242 * add it and return 0 - in this case the new dependency chain is
1243 * validated. If the key is already hashed, return 1.
1244 */
1245static inline int lookup_chain_cache(u64 chain_key)
1246{
1247 struct list_head *hash_head = chainhashentry(chain_key);
1248 struct lock_chain *chain;
1249
1250 DEBUG_LOCKS_WARN_ON(!irqs_disabled());
1251 /*
1252 * We can walk it lock-free, because entries only get added
1253 * to the hash:
1254 */
1255 list_for_each_entry(chain, hash_head, entry) {
1256 if (chain->chain_key == chain_key) {
1257cache_hit:
1258 debug_atomic_inc(&chain_lookup_hits);
1259 /*
1260 * In the debugging case, force redundant checking
1261 * by returning 1:
1262 */
1263#ifdef CONFIG_DEBUG_LOCKDEP
1264 __raw_spin_lock(&hash_lock);
1265 return 1;
1266#endif
1267 return 0;
1268 }
1269 }
1270 /*
1271 * Allocate a new chain entry from the static array, and add
1272 * it to the hash:
1273 */
1274 __raw_spin_lock(&hash_lock);
1275 /*
1276 * We have to walk the chain again locked - to avoid duplicates:
1277 */
1278 list_for_each_entry(chain, hash_head, entry) {
1279 if (chain->chain_key == chain_key) {
1280 __raw_spin_unlock(&hash_lock);
1281 goto cache_hit;
1282 }
1283 }
1284 if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
1285 __raw_spin_unlock(&hash_lock);
1286 debug_locks_off();
1287 printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
1288 printk("turning off the locking correctness validator.\n");
1289 return 0;
1290 }
1291 chain = lock_chains + nr_lock_chains++;
1292 chain->chain_key = chain_key;
1293 list_add_tail_rcu(&chain->entry, hash_head);
1294 debug_atomic_inc(&chain_lookup_misses);
1295#ifdef CONFIG_TRACE_IRQFLAGS
1296 if (current->hardirq_context)
1297 nr_hardirq_chains++;
1298 else {
1299 if (current->softirq_context)
1300 nr_softirq_chains++;
1301 else
1302 nr_process_chains++;
1303 }
1304#else
1305 nr_process_chains++;
1306#endif
1307
1308 return 1;
1309}
1310
1311/*
1312 * We are building curr_chain_key incrementally, so double-check
1313 * it from scratch, to make sure that it's done correctly:
1314 */
1315static void check_chain_key(struct task_struct *curr)
1316{
1317#ifdef CONFIG_DEBUG_LOCKDEP
1318 struct held_lock *hlock, *prev_hlock = NULL;
1319 unsigned int i, id;
1320 u64 chain_key = 0;
1321
1322 for (i = 0; i < curr->lockdep_depth; i++) {
1323 hlock = curr->held_locks + i;
1324 if (chain_key != hlock->prev_chain_key) {
1325 debug_locks_off();
1326 printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n",
1327 curr->lockdep_depth, i,
1328 (unsigned long long)chain_key,
1329 (unsigned long long)hlock->prev_chain_key);
1330 WARN_ON(1);
1331 return;
1332 }
1333 id = hlock->class - lock_classes;
1334 DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS);
1335 if (prev_hlock && (prev_hlock->irq_context !=
1336 hlock->irq_context))
1337 chain_key = 0;
1338 chain_key = iterate_chain_key(chain_key, id);
1339 prev_hlock = hlock;
1340 }
1341 if (chain_key != curr->curr_chain_key) {
1342 debug_locks_off();
1343 printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n",
1344 curr->lockdep_depth, i,
1345 (unsigned long long)chain_key,
1346 (unsigned long long)curr->curr_chain_key);
1347 WARN_ON(1);
1348 }
1349#endif
1350}
1351
1352#ifdef CONFIG_TRACE_IRQFLAGS
1353
1354/*
1355 * print irq inversion bug:
1356 */
1357static int
1358print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1359 struct held_lock *this, int forwards,
1360 const char *irqclass)
1361{
1362 __raw_spin_unlock(&hash_lock);
1363 debug_locks_off();
1364 if (debug_locks_silent)
1365 return 0;
1366
1367 printk("\n=========================================================\n");
1368 printk( "[ INFO: possible irq lock inversion dependency detected ]\n");
1369 printk( "---------------------------------------------------------\n");
1370 printk("%s/%d just changed the state of lock:\n",
1371 curr->comm, curr->pid);
1372 print_lock(this);
1373 if (forwards)
1374 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
1375 else
1376 printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass);
1377 print_lock_name(other);
1378 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
1379
1380 printk("\nother info that might help us debug this:\n");
1381 lockdep_print_held_locks(curr);
1382
1383 printk("\nthe first lock's dependencies:\n");
1384 print_lock_dependencies(this->class, 0);
1385
1386 printk("\nthe second lock's dependencies:\n");
1387 print_lock_dependencies(other, 0);
1388
1389 printk("\nstack backtrace:\n");
1390 dump_stack();
1391
1392 return 0;
1393}
1394
1395/*
1396 * Prove that in the forwards-direction subgraph starting at <this>
1397 * there is no lock matching <mask>:
1398 */
1399static int
1400check_usage_forwards(struct task_struct *curr, struct held_lock *this,
1401 enum lock_usage_bit bit, const char *irqclass)
1402{
1403 int ret;
1404
1405 find_usage_bit = bit;
1406 /* fills in <forwards_match> */
1407 ret = find_usage_forwards(this->class, 0);
1408 if (!ret || ret == 1)
1409 return ret;
1410
1411 return print_irq_inversion_bug(curr, forwards_match, this, 1, irqclass);
1412}
1413
1414/*
1415 * Prove that in the backwards-direction subgraph starting at <this>
1416 * there is no lock matching <mask>:
1417 */
1418static int
1419check_usage_backwards(struct task_struct *curr, struct held_lock *this,
1420 enum lock_usage_bit bit, const char *irqclass)
1421{
1422 int ret;
1423
1424 find_usage_bit = bit;
1425 /* fills in <backwards_match> */
1426 ret = find_usage_backwards(this->class, 0);
1427 if (!ret || ret == 1)
1428 return ret;
1429
1430 return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass);
1431}
1432
1433static inline void print_irqtrace_events(struct task_struct *curr)
1434{
1435 printk("irq event stamp: %u\n", curr->irq_events);
1436 printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event);
1437 print_ip_sym(curr->hardirq_enable_ip);
1438 printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event);
1439 print_ip_sym(curr->hardirq_disable_ip);
1440 printk("softirqs last enabled at (%u): ", curr->softirq_enable_event);
1441 print_ip_sym(curr->softirq_enable_ip);
1442 printk("softirqs last disabled at (%u): ", curr->softirq_disable_event);
1443 print_ip_sym(curr->softirq_disable_ip);
1444}
1445
1446#else
1447static inline void print_irqtrace_events(struct task_struct *curr)
1448{
1449}
1450#endif
1451
1452static int
1453print_usage_bug(struct task_struct *curr, struct held_lock *this,
1454 enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
1455{
1456 __raw_spin_unlock(&hash_lock);
1457 debug_locks_off();
1458 if (debug_locks_silent)
1459 return 0;
1460
1461 printk("\n=================================\n");
1462 printk( "[ INFO: inconsistent lock state ]\n");
1463 printk( "---------------------------------\n");
1464
1465 printk("inconsistent {%s} -> {%s} usage.\n",
1466 usage_str[prev_bit], usage_str[new_bit]);
1467
1468 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
1469 curr->comm, curr->pid,
1470 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
1471 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
1472 trace_hardirqs_enabled(curr),
1473 trace_softirqs_enabled(curr));
1474 print_lock(this);
1475
1476 printk("{%s} state was registered at:\n", usage_str[prev_bit]);
1477 print_stack_trace(this->class->usage_traces + prev_bit, 1);
1478
1479 print_irqtrace_events(curr);
1480 printk("\nother info that might help us debug this:\n");
1481 lockdep_print_held_locks(curr);
1482
1483 printk("\nstack backtrace:\n");
1484 dump_stack();
1485
1486 return 0;
1487}
1488
1489/*
1490 * Print out an error if an invalid bit is set:
1491 */
1492static inline int
1493valid_state(struct task_struct *curr, struct held_lock *this,
1494 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
1495{
1496 if (unlikely(this->class->usage_mask & (1 << bad_bit)))
1497 return print_usage_bug(curr, this, bad_bit, new_bit);
1498 return 1;
1499}
1500
1501#define STRICT_READ_CHECKS 1
1502
1503/*
1504 * Mark a lock with a usage bit, and validate the state transition:
1505 */
1506static int mark_lock(struct task_struct *curr, struct held_lock *this,
1507 enum lock_usage_bit new_bit, unsigned long ip)
1508{
1509 unsigned int new_mask = 1 << new_bit, ret = 1;
1510
1511 /*
1512 * If already set then do not dirty the cacheline,
1513 * nor do any checks:
1514 */
1515 if (likely(this->class->usage_mask & new_mask))
1516 return 1;
1517
1518 __raw_spin_lock(&hash_lock);
1519 /*
1520 * Make sure we didnt race:
1521 */
1522 if (unlikely(this->class->usage_mask & new_mask)) {
1523 __raw_spin_unlock(&hash_lock);
1524 return 1;
1525 }
1526
1527 this->class->usage_mask |= new_mask;
1528
1529#ifdef CONFIG_TRACE_IRQFLAGS
1530 if (new_bit == LOCK_ENABLED_HARDIRQS ||
1531 new_bit == LOCK_ENABLED_HARDIRQS_READ)
1532 ip = curr->hardirq_enable_ip;
1533 else if (new_bit == LOCK_ENABLED_SOFTIRQS ||
1534 new_bit == LOCK_ENABLED_SOFTIRQS_READ)
1535 ip = curr->softirq_enable_ip;
1536#endif
1537 if (!save_trace(this->class->usage_traces + new_bit))
1538 return 0;
1539
1540 switch (new_bit) {
1541#ifdef CONFIG_TRACE_IRQFLAGS
1542 case LOCK_USED_IN_HARDIRQ:
1543 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
1544 return 0;
1545 if (!valid_state(curr, this, new_bit,
1546 LOCK_ENABLED_HARDIRQS_READ))
1547 return 0;
1548 /*
1549 * just marked it hardirq-safe, check that this lock
1550 * took no hardirq-unsafe lock in the past:
1551 */
1552 if (!check_usage_forwards(curr, this,
1553 LOCK_ENABLED_HARDIRQS, "hard"))
1554 return 0;
1555#if STRICT_READ_CHECKS
1556 /*
1557 * just marked it hardirq-safe, check that this lock
1558 * took no hardirq-unsafe-read lock in the past:
1559 */
1560 if (!check_usage_forwards(curr, this,
1561 LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
1562 return 0;
1563#endif
1564 if (hardirq_verbose(this->class))
1565 ret = 2;
1566 break;
1567 case LOCK_USED_IN_SOFTIRQ:
1568 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
1569 return 0;
1570 if (!valid_state(curr, this, new_bit,
1571 LOCK_ENABLED_SOFTIRQS_READ))
1572 return 0;
1573 /*
1574 * just marked it softirq-safe, check that this lock
1575 * took no softirq-unsafe lock in the past:
1576 */
1577 if (!check_usage_forwards(curr, this,
1578 LOCK_ENABLED_SOFTIRQS, "soft"))
1579 return 0;
1580#if STRICT_READ_CHECKS
1581 /*
1582 * just marked it softirq-safe, check that this lock
1583 * took no softirq-unsafe-read lock in the past:
1584 */
1585 if (!check_usage_forwards(curr, this,
1586 LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
1587 return 0;
1588#endif
1589 if (softirq_verbose(this->class))
1590 ret = 2;
1591 break;
1592 case LOCK_USED_IN_HARDIRQ_READ:
1593 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
1594 return 0;
1595 /*
1596 * just marked it hardirq-read-safe, check that this lock
1597 * took no hardirq-unsafe lock in the past:
1598 */
1599 if (!check_usage_forwards(curr, this,
1600 LOCK_ENABLED_HARDIRQS, "hard"))
1601 return 0;
1602 if (hardirq_verbose(this->class))
1603 ret = 2;
1604 break;
1605 case LOCK_USED_IN_SOFTIRQ_READ:
1606 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
1607 return 0;
1608 /*
1609 * just marked it softirq-read-safe, check that this lock
1610 * took no softirq-unsafe lock in the past:
1611 */
1612 if (!check_usage_forwards(curr, this,
1613 LOCK_ENABLED_SOFTIRQS, "soft"))
1614 return 0;
1615 if (softirq_verbose(this->class))
1616 ret = 2;
1617 break;
1618 case LOCK_ENABLED_HARDIRQS:
1619 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
1620 return 0;
1621 if (!valid_state(curr, this, new_bit,
1622 LOCK_USED_IN_HARDIRQ_READ))
1623 return 0;
1624 /*
1625 * just marked it hardirq-unsafe, check that no hardirq-safe
1626 * lock in the system ever took it in the past:
1627 */
1628 if (!check_usage_backwards(curr, this,
1629 LOCK_USED_IN_HARDIRQ, "hard"))
1630 return 0;
1631#if STRICT_READ_CHECKS
1632 /*
1633 * just marked it hardirq-unsafe, check that no
1634 * hardirq-safe-read lock in the system ever took
1635 * it in the past:
1636 */
1637 if (!check_usage_backwards(curr, this,
1638 LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
1639 return 0;
1640#endif
1641 if (hardirq_verbose(this->class))
1642 ret = 2;
1643 break;
1644 case LOCK_ENABLED_SOFTIRQS:
1645 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
1646 return 0;
1647 if (!valid_state(curr, this, new_bit,
1648 LOCK_USED_IN_SOFTIRQ_READ))
1649 return 0;
1650 /*
1651 * just marked it softirq-unsafe, check that no softirq-safe
1652 * lock in the system ever took it in the past:
1653 */
1654 if (!check_usage_backwards(curr, this,
1655 LOCK_USED_IN_SOFTIRQ, "soft"))
1656 return 0;
1657#if STRICT_READ_CHECKS
1658 /*
1659 * just marked it softirq-unsafe, check that no
1660 * softirq-safe-read lock in the system ever took
1661 * it in the past:
1662 */
1663 if (!check_usage_backwards(curr, this,
1664 LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
1665 return 0;
1666#endif
1667 if (softirq_verbose(this->class))
1668 ret = 2;
1669 break;
1670 case LOCK_ENABLED_HARDIRQS_READ:
1671 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
1672 return 0;
1673#if STRICT_READ_CHECKS
1674 /*
1675 * just marked it hardirq-read-unsafe, check that no
1676 * hardirq-safe lock in the system ever took it in the past:
1677 */
1678 if (!check_usage_backwards(curr, this,
1679 LOCK_USED_IN_HARDIRQ, "hard"))
1680 return 0;
1681#endif
1682 if (hardirq_verbose(this->class))
1683 ret = 2;
1684 break;
1685 case LOCK_ENABLED_SOFTIRQS_READ:
1686 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
1687 return 0;
1688#if STRICT_READ_CHECKS
1689 /*
1690 * just marked it softirq-read-unsafe, check that no
1691 * softirq-safe lock in the system ever took it in the past:
1692 */
1693 if (!check_usage_backwards(curr, this,
1694 LOCK_USED_IN_SOFTIRQ, "soft"))
1695 return 0;
1696#endif
1697 if (softirq_verbose(this->class))
1698 ret = 2;
1699 break;
1700#endif
1701 case LOCK_USED:
1702 /*
1703 * Add it to the global list of classes:
1704 */
1705 list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
1706 debug_atomic_dec(&nr_unused_locks);
1707 break;
1708 default:
1709 debug_locks_off();
1710 WARN_ON(1);
1711 return 0;
1712 }
1713
1714 __raw_spin_unlock(&hash_lock);
1715
1716 /*
1717 * We must printk outside of the hash_lock:
1718 */
1719 if (ret == 2) {
1720 printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
1721 print_lock(this);
1722 print_irqtrace_events(curr);
1723 dump_stack();
1724 }
1725
1726 return ret;
1727}
1728
1729#ifdef CONFIG_TRACE_IRQFLAGS
1730/*
1731 * Mark all held locks with a usage bit:
1732 */
1733static int
1734mark_held_locks(struct task_struct *curr, int hardirq, unsigned long ip)
1735{
1736 enum lock_usage_bit usage_bit;
1737 struct held_lock *hlock;
1738 int i;
1739
1740 for (i = 0; i < curr->lockdep_depth; i++) {
1741 hlock = curr->held_locks + i;
1742
1743 if (hardirq) {
1744 if (hlock->read)
1745 usage_bit = LOCK_ENABLED_HARDIRQS_READ;
1746 else
1747 usage_bit = LOCK_ENABLED_HARDIRQS;
1748 } else {
1749 if (hlock->read)
1750 usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
1751 else
1752 usage_bit = LOCK_ENABLED_SOFTIRQS;
1753 }
1754 if (!mark_lock(curr, hlock, usage_bit, ip))
1755 return 0;
1756 }
1757
1758 return 1;
1759}
1760
1761/*
1762 * Debugging helper: via this flag we know that we are in
1763 * 'early bootup code', and will warn about any invalid irqs-on event:
1764 */
1765static int early_boot_irqs_enabled;
1766
1767void early_boot_irqs_off(void)
1768{
1769 early_boot_irqs_enabled = 0;
1770}
1771
1772void early_boot_irqs_on(void)
1773{
1774 early_boot_irqs_enabled = 1;
1775}
1776
1777/*
1778 * Hardirqs will be enabled:
1779 */
1780void trace_hardirqs_on(void)
1781{
1782 struct task_struct *curr = current;
1783 unsigned long ip;
1784
1785 if (unlikely(!debug_locks || current->lockdep_recursion))
1786 return;
1787
1788 if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled)))
1789 return;
1790
1791 if (unlikely(curr->hardirqs_enabled)) {
1792 debug_atomic_inc(&redundant_hardirqs_on);
1793 return;
1794 }
1795 /* we'll do an OFF -> ON transition: */
1796 curr->hardirqs_enabled = 1;
1797 ip = (unsigned long) __builtin_return_address(0);
1798
1799 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1800 return;
1801 if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
1802 return;
1803 /*
1804 * We are going to turn hardirqs on, so set the
1805 * usage bit for all held locks:
1806 */
1807 if (!mark_held_locks(curr, 1, ip))
1808 return;
1809 /*
1810 * If we have softirqs enabled, then set the usage
1811 * bit for all held locks. (disabled hardirqs prevented
1812 * this bit from being set before)
1813 */
1814 if (curr->softirqs_enabled)
1815 if (!mark_held_locks(curr, 0, ip))
1816 return;
1817
1818 curr->hardirq_enable_ip = ip;
1819 curr->hardirq_enable_event = ++curr->irq_events;
1820 debug_atomic_inc(&hardirqs_on_events);
1821}
1822
1823EXPORT_SYMBOL(trace_hardirqs_on);
1824
1825/*
1826 * Hardirqs were disabled:
1827 */
1828void trace_hardirqs_off(void)
1829{
1830 struct task_struct *curr = current;
1831
1832 if (unlikely(!debug_locks || current->lockdep_recursion))
1833 return;
1834
1835 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1836 return;
1837
1838 if (curr->hardirqs_enabled) {
1839 /*
1840 * We have done an ON -> OFF transition:
1841 */
1842 curr->hardirqs_enabled = 0;
1843 curr->hardirq_disable_ip = _RET_IP_;
1844 curr->hardirq_disable_event = ++curr->irq_events;
1845 debug_atomic_inc(&hardirqs_off_events);
1846 } else
1847 debug_atomic_inc(&redundant_hardirqs_off);
1848}
1849
1850EXPORT_SYMBOL(trace_hardirqs_off);
1851
1852/*
1853 * Softirqs will be enabled:
1854 */
1855void trace_softirqs_on(unsigned long ip)
1856{
1857 struct task_struct *curr = current;
1858
1859 if (unlikely(!debug_locks))
1860 return;
1861
1862 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1863 return;
1864
1865 if (curr->softirqs_enabled) {
1866 debug_atomic_inc(&redundant_softirqs_on);
1867 return;
1868 }
1869
1870 /*
1871 * We'll do an OFF -> ON transition:
1872 */
1873 curr->softirqs_enabled = 1;
1874 curr->softirq_enable_ip = ip;
1875 curr->softirq_enable_event = ++curr->irq_events;
1876 debug_atomic_inc(&softirqs_on_events);
1877 /*
1878 * We are going to turn softirqs on, so set the
1879 * usage bit for all held locks, if hardirqs are
1880 * enabled too:
1881 */
1882 if (curr->hardirqs_enabled)
1883 mark_held_locks(curr, 0, ip);
1884}
1885
1886/*
1887 * Softirqs were disabled:
1888 */
1889void trace_softirqs_off(unsigned long ip)
1890{
1891 struct task_struct *curr = current;
1892
1893 if (unlikely(!debug_locks))
1894 return;
1895
1896 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1897 return;
1898
1899 if (curr->softirqs_enabled) {
1900 /*
1901 * We have done an ON -> OFF transition:
1902 */
1903 curr->softirqs_enabled = 0;
1904 curr->softirq_disable_ip = ip;
1905 curr->softirq_disable_event = ++curr->irq_events;
1906 debug_atomic_inc(&softirqs_off_events);
1907 DEBUG_LOCKS_WARN_ON(!softirq_count());
1908 } else
1909 debug_atomic_inc(&redundant_softirqs_off);
1910}
1911
1912#endif
1913
1914/*
1915 * Initialize a lock instance's lock-class mapping info:
1916 */
1917void lockdep_init_map(struct lockdep_map *lock, const char *name,
1918 struct lock_class_key *key)
1919{
1920 if (unlikely(!debug_locks))
1921 return;
1922
1923 if (DEBUG_LOCKS_WARN_ON(!key))
1924 return;
1925 if (DEBUG_LOCKS_WARN_ON(!name))
1926 return;
1927 /*
1928 * Sanity check, the lock-class key must be persistent:
1929 */
1930 if (!static_obj(key)) {
1931 printk("BUG: key %p not in .data!\n", key);
1932 DEBUG_LOCKS_WARN_ON(1);
1933 return;
1934 }
1935 lock->name = name;
1936 lock->key = key;
1937 memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES);
1938}
1939
1940EXPORT_SYMBOL_GPL(lockdep_init_map);
1941
1942/*
1943 * This gets called for every mutex_lock*()/spin_lock*() operation.
1944 * We maintain the dependency maps and validate the locking attempt:
1945 */
1946static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
1947 int trylock, int read, int check, int hardirqs_off,
1948 unsigned long ip)
1949{
1950 struct task_struct *curr = current;
1951 struct held_lock *hlock;
1952 struct lock_class *class;
1953 unsigned int depth, id;
1954 int chain_head = 0;
1955 u64 chain_key;
1956
1957 if (unlikely(!debug_locks))
1958 return 0;
1959
1960 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1961 return 0;
1962
1963 if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
1964 debug_locks_off();
1965 printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
1966 printk("turning off the locking correctness validator.\n");
1967 return 0;
1968 }
1969
1970 class = lock->class[subclass];
1971 /* not cached yet? */
1972 if (unlikely(!class)) {
1973 class = register_lock_class(lock, subclass);
1974 if (!class)
1975 return 0;
1976 }
1977 debug_atomic_inc((atomic_t *)&class->ops);
1978 if (very_verbose(class)) {
1979 printk("\nacquire class [%p] %s", class->key, class->name);
1980 if (class->name_version > 1)
1981 printk("#%d", class->name_version);
1982 printk("\n");
1983 dump_stack();
1984 }
1985
1986 /*
1987 * Add the lock to the list of currently held locks.
1988 * (we dont increase the depth just yet, up until the
1989 * dependency checks are done)
1990 */
1991 depth = curr->lockdep_depth;
1992 if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
1993 return 0;
1994
1995 hlock = curr->held_locks + depth;
1996
1997 hlock->class = class;
1998 hlock->acquire_ip = ip;
1999 hlock->instance = lock;
2000 hlock->trylock = trylock;
2001 hlock->read = read;
2002 hlock->check = check;
2003 hlock->hardirqs_off = hardirqs_off;
2004
2005 if (check != 2)
2006 goto out_calc_hash;
2007#ifdef CONFIG_TRACE_IRQFLAGS
2008 /*
2009 * If non-trylock use in a hardirq or softirq context, then
2010 * mark the lock as used in these contexts:
2011 */
2012 if (!trylock) {
2013 if (read) {
2014 if (curr->hardirq_context)
2015 if (!mark_lock(curr, hlock,
2016 LOCK_USED_IN_HARDIRQ_READ, ip))
2017 return 0;
2018 if (curr->softirq_context)
2019 if (!mark_lock(curr, hlock,
2020 LOCK_USED_IN_SOFTIRQ_READ, ip))
2021 return 0;
2022 } else {
2023 if (curr->hardirq_context)
2024 if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ, ip))
2025 return 0;
2026 if (curr->softirq_context)
2027 if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ, ip))
2028 return 0;
2029 }
2030 }
2031 if (!hardirqs_off) {
2032 if (read) {
2033 if (!mark_lock(curr, hlock,
2034 LOCK_ENABLED_HARDIRQS_READ, ip))
2035 return 0;
2036 if (curr->softirqs_enabled)
2037 if (!mark_lock(curr, hlock,
2038 LOCK_ENABLED_SOFTIRQS_READ, ip))
2039 return 0;
2040 } else {
2041 if (!mark_lock(curr, hlock,
2042 LOCK_ENABLED_HARDIRQS, ip))
2043 return 0;
2044 if (curr->softirqs_enabled)
2045 if (!mark_lock(curr, hlock,
2046 LOCK_ENABLED_SOFTIRQS, ip))
2047 return 0;
2048 }
2049 }
2050#endif
2051 /* mark it as used: */
2052 if (!mark_lock(curr, hlock, LOCK_USED, ip))
2053 return 0;
2054out_calc_hash:
2055 /*
2056 * Calculate the chain hash: it's the combined has of all the
2057 * lock keys along the dependency chain. We save the hash value
2058 * at every step so that we can get the current hash easily
2059 * after unlock. The chain hash is then used to cache dependency
2060 * results.
2061 *
2062 * The 'key ID' is what is the most compact key value to drive
2063 * the hash, not class->key.
2064 */
2065 id = class - lock_classes;
2066 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
2067 return 0;
2068
2069 chain_key = curr->curr_chain_key;
2070 if (!depth) {
2071 if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
2072 return 0;
2073 chain_head = 1;
2074 }
2075
2076 hlock->prev_chain_key = chain_key;
2077
2078#ifdef CONFIG_TRACE_IRQFLAGS
2079 /*
2080 * Keep track of points where we cross into an interrupt context:
2081 */
2082 hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
2083 curr->softirq_context;
2084 if (depth) {
2085 struct held_lock *prev_hlock;
2086
2087 prev_hlock = curr->held_locks + depth-1;
2088 /*
2089 * If we cross into another context, reset the
2090 * hash key (this also prevents the checking and the
2091 * adding of the dependency to 'prev'):
2092 */
2093 if (prev_hlock->irq_context != hlock->irq_context) {
2094 chain_key = 0;
2095 chain_head = 1;
2096 }
2097 }
2098#endif
2099 chain_key = iterate_chain_key(chain_key, id);
2100 curr->curr_chain_key = chain_key;
2101
2102 /*
2103 * Trylock needs to maintain the stack of held locks, but it
2104 * does not add new dependencies, because trylock can be done
2105 * in any order.
2106 *
2107 * We look up the chain_key and do the O(N^2) check and update of
2108 * the dependencies only if this is a new dependency chain.
2109 * (If lookup_chain_cache() returns with 1 it acquires
2110 * hash_lock for us)
2111 */
2112 if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) {
2113 /*
2114 * Check whether last held lock:
2115 *
2116 * - is irq-safe, if this lock is irq-unsafe
2117 * - is softirq-safe, if this lock is hardirq-unsafe
2118 *
2119 * And check whether the new lock's dependency graph
2120 * could lead back to the previous lock.
2121 *
2122 * any of these scenarios could lead to a deadlock. If
2123 * All validations
2124 */
2125 int ret = check_deadlock(curr, hlock, lock, read);
2126
2127 if (!ret)
2128 return 0;
2129 /*
2130 * Mark recursive read, as we jump over it when
2131 * building dependencies (just like we jump over
2132 * trylock entries):
2133 */
2134 if (ret == 2)
2135 hlock->read = 2;
2136 /*
2137 * Add dependency only if this lock is not the head
2138 * of the chain, and if it's not a secondary read-lock:
2139 */
2140 if (!chain_head && ret != 2)
2141 if (!check_prevs_add(curr, hlock))
2142 return 0;
2143 __raw_spin_unlock(&hash_lock);
2144 }
2145 curr->lockdep_depth++;
2146 check_chain_key(curr);
2147 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
2148 debug_locks_off();
2149 printk("BUG: MAX_LOCK_DEPTH too low!\n");
2150 printk("turning off the locking correctness validator.\n");
2151 return 0;
2152 }
2153 if (unlikely(curr->lockdep_depth > max_lockdep_depth))
2154 max_lockdep_depth = curr->lockdep_depth;
2155
2156 return 1;
2157}
2158
2159static int
2160print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
2161 unsigned long ip)
2162{
2163 if (!debug_locks_off())
2164 return 0;
2165 if (debug_locks_silent)
2166 return 0;
2167
2168 printk("\n=====================================\n");
2169 printk( "[ BUG: bad unlock balance detected! ]\n");
2170 printk( "-------------------------------------\n");
2171 printk("%s/%d is trying to release lock (",
2172 curr->comm, curr->pid);
2173 print_lockdep_cache(lock);
2174 printk(") at:\n");
2175 print_ip_sym(ip);
2176 printk("but there are no more locks to release!\n");
2177 printk("\nother info that might help us debug this:\n");
2178 lockdep_print_held_locks(curr);
2179
2180 printk("\nstack backtrace:\n");
2181 dump_stack();
2182
2183 return 0;
2184}
2185
2186/*
2187 * Common debugging checks for both nested and non-nested unlock:
2188 */
2189static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
2190 unsigned long ip)
2191{
2192 if (unlikely(!debug_locks))
2193 return 0;
2194 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2195 return 0;
2196
2197 if (curr->lockdep_depth <= 0)
2198 return print_unlock_inbalance_bug(curr, lock, ip);
2199
2200 return 1;
2201}
2202
2203/*
2204 * Remove the lock to the list of currently held locks in a
2205 * potentially non-nested (out of order) manner. This is a
2206 * relatively rare operation, as all the unlock APIs default
2207 * to nested mode (which uses lock_release()):
2208 */
2209static int
2210lock_release_non_nested(struct task_struct *curr,
2211 struct lockdep_map *lock, unsigned long ip)
2212{
2213 struct held_lock *hlock, *prev_hlock;
2214 unsigned int depth;
2215 int i;
2216
2217 /*
2218 * Check whether the lock exists in the current stack
2219 * of held locks:
2220 */
2221 depth = curr->lockdep_depth;
2222 if (DEBUG_LOCKS_WARN_ON(!depth))
2223 return 0;
2224
2225 prev_hlock = NULL;
2226 for (i = depth-1; i >= 0; i--) {
2227 hlock = curr->held_locks + i;
2228 /*
2229 * We must not cross into another context:
2230 */
2231 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
2232 break;
2233 if (hlock->instance == lock)
2234 goto found_it;
2235 prev_hlock = hlock;
2236 }
2237 return print_unlock_inbalance_bug(curr, lock, ip);
2238
2239found_it:
2240 /*
2241 * We have the right lock to unlock, 'hlock' points to it.
2242 * Now we remove it from the stack, and add back the other
2243 * entries (if any), recalculating the hash along the way:
2244 */
2245 curr->lockdep_depth = i;
2246 curr->curr_chain_key = hlock->prev_chain_key;
2247
2248 for (i++; i < depth; i++) {
2249 hlock = curr->held_locks + i;
2250 if (!__lock_acquire(hlock->instance,
2251 hlock->class->subclass, hlock->trylock,
2252 hlock->read, hlock->check, hlock->hardirqs_off,
2253 hlock->acquire_ip))
2254 return 0;
2255 }
2256
2257 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
2258 return 0;
2259 return 1;
2260}
2261
2262/*
2263 * Remove the lock to the list of currently held locks - this gets
2264 * called on mutex_unlock()/spin_unlock*() (or on a failed
2265 * mutex_lock_interruptible()). This is done for unlocks that nest
2266 * perfectly. (i.e. the current top of the lock-stack is unlocked)
2267 */
2268static int lock_release_nested(struct task_struct *curr,
2269 struct lockdep_map *lock, unsigned long ip)
2270{
2271 struct held_lock *hlock;
2272 unsigned int depth;
2273
2274 /*
2275 * Pop off the top of the lock stack:
2276 */
2277 depth = curr->lockdep_depth - 1;
2278 hlock = curr->held_locks + depth;
2279
2280 /*
2281 * Is the unlock non-nested:
2282 */
2283 if (hlock->instance != lock)
2284 return lock_release_non_nested(curr, lock, ip);
2285 curr->lockdep_depth--;
2286
2287 if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
2288 return 0;
2289
2290 curr->curr_chain_key = hlock->prev_chain_key;
2291
2292#ifdef CONFIG_DEBUG_LOCKDEP
2293 hlock->prev_chain_key = 0;
2294 hlock->class = NULL;
2295 hlock->acquire_ip = 0;
2296 hlock->irq_context = 0;
2297#endif
2298 return 1;
2299}
2300
2301/*
2302 * Remove the lock to the list of currently held locks - this gets
2303 * called on mutex_unlock()/spin_unlock*() (or on a failed
2304 * mutex_lock_interruptible()). This is done for unlocks that nest
2305 * perfectly. (i.e. the current top of the lock-stack is unlocked)
2306 */
2307static void
2308__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2309{
2310 struct task_struct *curr = current;
2311
2312 if (!check_unlock(curr, lock, ip))
2313 return;
2314
2315 if (nested) {
2316 if (!lock_release_nested(curr, lock, ip))
2317 return;
2318 } else {
2319 if (!lock_release_non_nested(curr, lock, ip))
2320 return;
2321 }
2322
2323 check_chain_key(curr);
2324}
2325
2326/*
2327 * Check whether we follow the irq-flags state precisely:
2328 */
2329static void check_flags(unsigned long flags)
2330{
2331#if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS)
2332 if (!debug_locks)
2333 return;
2334
2335 if (irqs_disabled_flags(flags))
2336 DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled);
2337 else
2338 DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled);
2339
2340 /*
2341 * We dont accurately track softirq state in e.g.
2342 * hardirq contexts (such as on 4KSTACKS), so only
2343 * check if not in hardirq contexts:
2344 */
2345 if (!hardirq_count()) {
2346 if (softirq_count())
2347 DEBUG_LOCKS_WARN_ON(current->softirqs_enabled);
2348 else
2349 DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
2350 }
2351
2352 if (!debug_locks)
2353 print_irqtrace_events(current);
2354#endif
2355}
2356
2357/*
2358 * We are not always called with irqs disabled - do that here,
2359 * and also avoid lockdep recursion:
2360 */
2361void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2362 int trylock, int read, int check, unsigned long ip)
2363{
2364 unsigned long flags;
2365
2366 if (unlikely(current->lockdep_recursion))
2367 return;
2368
2369 raw_local_irq_save(flags);
2370 check_flags(flags);
2371
2372 current->lockdep_recursion = 1;
2373 __lock_acquire(lock, subclass, trylock, read, check,
2374 irqs_disabled_flags(flags), ip);
2375 current->lockdep_recursion = 0;
2376 raw_local_irq_restore(flags);
2377}
2378
2379EXPORT_SYMBOL_GPL(lock_acquire);
2380
2381void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2382{
2383 unsigned long flags;
2384
2385 if (unlikely(current->lockdep_recursion))
2386 return;
2387
2388 raw_local_irq_save(flags);
2389 check_flags(flags);
2390 current->lockdep_recursion = 1;
2391 __lock_release(lock, nested, ip);
2392 current->lockdep_recursion = 0;
2393 raw_local_irq_restore(flags);
2394}
2395
2396EXPORT_SYMBOL_GPL(lock_release);
2397
2398/*
2399 * Used by the testsuite, sanitize the validator state
2400 * after a simulated failure:
2401 */
2402
2403void lockdep_reset(void)
2404{
2405 unsigned long flags;
2406
2407 raw_local_irq_save(flags);
2408 current->curr_chain_key = 0;
2409 current->lockdep_depth = 0;
2410 current->lockdep_recursion = 0;
2411 memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
2412 nr_hardirq_chains = 0;
2413 nr_softirq_chains = 0;
2414 nr_process_chains = 0;
2415 debug_locks = 1;
2416 raw_local_irq_restore(flags);
2417}
2418
2419static void zap_class(struct lock_class *class)
2420{
2421 int i;
2422
2423 /*
2424 * Remove all dependencies this lock is
2425 * involved in:
2426 */
2427 for (i = 0; i < nr_list_entries; i++) {
2428 if (list_entries[i].class == class)
2429 list_del_rcu(&list_entries[i].entry);
2430 }
2431 /*
2432 * Unhash the class and remove it from the all_lock_classes list:
2433 */
2434 list_del_rcu(&class->hash_entry);
2435 list_del_rcu(&class->lock_entry);
2436
2437}
2438
2439static inline int within(void *addr, void *start, unsigned long size)
2440{
2441 return addr >= start && addr < start + size;
2442}
2443
2444void lockdep_free_key_range(void *start, unsigned long size)
2445{
2446 struct lock_class *class, *next;
2447 struct list_head *head;
2448 unsigned long flags;
2449 int i;
2450
2451 raw_local_irq_save(flags);
2452 __raw_spin_lock(&hash_lock);
2453
2454 /*
2455 * Unhash all classes that were created by this module:
2456 */
2457 for (i = 0; i < CLASSHASH_SIZE; i++) {
2458 head = classhash_table + i;
2459 if (list_empty(head))
2460 continue;
2461 list_for_each_entry_safe(class, next, head, hash_entry)
2462 if (within(class->key, start, size))
2463 zap_class(class);
2464 }
2465
2466 __raw_spin_unlock(&hash_lock);
2467 raw_local_irq_restore(flags);
2468}
2469
2470void lockdep_reset_lock(struct lockdep_map *lock)
2471{
2472 struct lock_class *class, *next, *entry;
2473 struct list_head *head;
2474 unsigned long flags;
2475 int i, j;
2476
2477 raw_local_irq_save(flags);
2478 __raw_spin_lock(&hash_lock);
2479
2480 /*
2481 * Remove all classes this lock has:
2482 */
2483 for (i = 0; i < CLASSHASH_SIZE; i++) {
2484 head = classhash_table + i;
2485 if (list_empty(head))
2486 continue;
2487 list_for_each_entry_safe(class, next, head, hash_entry) {
2488 for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
2489 entry = lock->class[j];
2490 if (class == entry) {
2491 zap_class(class);
2492 lock->class[j] = NULL;
2493 break;
2494 }
2495 }
2496 }
2497 }
2498
2499 /*
2500 * Debug check: in the end all mapped classes should
2501 * be gone.
2502 */
2503 for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
2504 entry = lock->class[j];
2505 if (!entry)
2506 continue;
2507 __raw_spin_unlock(&hash_lock);
2508 DEBUG_LOCKS_WARN_ON(1);
2509 raw_local_irq_restore(flags);
2510 return;
2511 }
2512
2513 __raw_spin_unlock(&hash_lock);
2514 raw_local_irq_restore(flags);
2515}
2516
2517void __init lockdep_init(void)
2518{
2519 int i;
2520
2521 /*
2522 * Some architectures have their own start_kernel()
2523 * code which calls lockdep_init(), while we also
2524 * call lockdep_init() from the start_kernel() itself,
2525 * and we want to initialize the hashes only once:
2526 */
2527 if (lockdep_initialized)
2528 return;
2529
2530 for (i = 0; i < CLASSHASH_SIZE; i++)
2531 INIT_LIST_HEAD(classhash_table + i);
2532
2533 for (i = 0; i < CHAINHASH_SIZE; i++)
2534 INIT_LIST_HEAD(chainhash_table + i);
2535
2536 lockdep_initialized = 1;
2537}
2538
2539void __init lockdep_info(void)
2540{
2541 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
2542
2543 printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
2544 printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
2545 printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
2546 printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
2547 printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
2548 printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
2549 printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
2550
2551 printk(" memory used by lock dependency info: %lu kB\n",
2552 (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
2553 sizeof(struct list_head) * CLASSHASH_SIZE +
2554 sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
2555 sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
2556 sizeof(struct list_head) * CHAINHASH_SIZE) / 1024);
2557
2558 printk(" per task-struct memory footprint: %lu bytes\n",
2559 sizeof(struct held_lock) * MAX_LOCK_DEPTH);
2560
2561#ifdef CONFIG_DEBUG_LOCKDEP
2562 if (lockdep_init_error)
2563 printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n");
2564#endif
2565}
2566
2567static inline int in_range(const void *start, const void *addr, const void *end)
2568{
2569 return addr >= start && addr <= end;
2570}
2571
2572static void
2573print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
2574 const void *mem_to)
2575{
2576 if (!debug_locks_off())
2577 return;
2578 if (debug_locks_silent)
2579 return;
2580
2581 printk("\n=========================\n");
2582 printk( "[ BUG: held lock freed! ]\n");
2583 printk( "-------------------------\n");
2584 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
2585 curr->comm, curr->pid, mem_from, mem_to-1);
2586 lockdep_print_held_locks(curr);
2587
2588 printk("\nstack backtrace:\n");
2589 dump_stack();
2590}
2591
2592/*
2593 * Called when kernel memory is freed (or unmapped), or if a lock
2594 * is destroyed or reinitialized - this code checks whether there is
2595 * any held lock in the memory range of <from> to <to>:
2596 */
2597void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
2598{
2599 const void *mem_to = mem_from + mem_len, *lock_from, *lock_to;
2600 struct task_struct *curr = current;
2601 struct held_lock *hlock;
2602 unsigned long flags;
2603 int i;
2604
2605 if (unlikely(!debug_locks))
2606 return;
2607
2608 local_irq_save(flags);
2609 for (i = 0; i < curr->lockdep_depth; i++) {
2610 hlock = curr->held_locks + i;
2611
2612 lock_from = (void *)hlock->instance;
2613 lock_to = (void *)(hlock->instance + 1);
2614
2615 if (!in_range(mem_from, lock_from, mem_to) &&
2616 !in_range(mem_from, lock_to, mem_to))
2617 continue;
2618
2619 print_freed_lock_bug(curr, mem_from, mem_to);
2620 break;
2621 }
2622 local_irq_restore(flags);
2623}
2624
2625static void print_held_locks_bug(struct task_struct *curr)
2626{
2627 if (!debug_locks_off())
2628 return;
2629 if (debug_locks_silent)
2630 return;
2631
2632 printk("\n=====================================\n");
2633 printk( "[ BUG: lock held at task exit time! ]\n");
2634 printk( "-------------------------------------\n");
2635 printk("%s/%d is exiting with locks still held!\n",
2636 curr->comm, curr->pid);
2637 lockdep_print_held_locks(curr);
2638
2639 printk("\nstack backtrace:\n");
2640 dump_stack();
2641}
2642
2643void debug_check_no_locks_held(struct task_struct *task)
2644{
2645 if (unlikely(task->lockdep_depth > 0))
2646 print_held_locks_bug(task);
2647}
2648
2649void debug_show_all_locks(void)
2650{
2651 struct task_struct *g, *p;
2652 int count = 10;
2653 int unlock = 1;
2654
2655 printk("\nShowing all locks held in the system:\n");
2656
2657 /*
2658 * Here we try to get the tasklist_lock as hard as possible,
2659 * if not successful after 2 seconds we ignore it (but keep
2660 * trying). This is to enable a debug printout even if a
2661 * tasklist_lock-holding task deadlocks or crashes.
2662 */
2663retry:
2664 if (!read_trylock(&tasklist_lock)) {
2665 if (count == 10)
2666 printk("hm, tasklist_lock locked, retrying... ");
2667 if (count) {
2668 count--;
2669 printk(" #%d", 10-count);
2670 mdelay(200);
2671 goto retry;
2672 }
2673 printk(" ignoring it.\n");
2674 unlock = 0;
2675 }
2676 if (count != 10)
2677 printk(" locked it.\n");
2678
2679 do_each_thread(g, p) {
2680 if (p->lockdep_depth)
2681 lockdep_print_held_locks(p);
2682 if (!unlock)
2683 if (read_trylock(&tasklist_lock))
2684 unlock = 1;
2685 } while_each_thread(g, p);
2686
2687 printk("\n");
2688 printk("=============================================\n\n");
2689
2690 if (unlock)
2691 read_unlock(&tasklist_lock);
2692}
2693
2694EXPORT_SYMBOL_GPL(debug_show_all_locks);
2695
2696void debug_show_held_locks(struct task_struct *task)
2697{
2698 lockdep_print_held_locks(task);
2699}
2700
2701EXPORT_SYMBOL_GPL(debug_show_held_locks);
2702
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
new file mode 100644
index 000000000000..0d355f24fe04
--- /dev/null
+++ b/kernel/lockdep_internals.h
@@ -0,0 +1,78 @@
1/*
2 * kernel/lockdep_internals.h
3 *
4 * Runtime locking correctness validator
5 *
6 * lockdep subsystem internal functions and variables.
7 */
8
9/*
10 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
11 * we track.
12 *
13 * We use the per-lock dependency maps in two ways: we grow it by adding
14 * every to-be-taken lock to all currently held lock's own dependency
15 * table (if it's not there yet), and we check it for lock order
16 * conflicts and deadlocks.
17 */
18#define MAX_LOCKDEP_ENTRIES 8192UL
19
20#define MAX_LOCKDEP_KEYS_BITS 11
21#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
22
23#define MAX_LOCKDEP_CHAINS_BITS 13
24#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
25
26/*
27 * Stack-trace: tightly packed array of stack backtrace
28 * addresses. Protected by the hash_lock.
29 */
30#define MAX_STACK_TRACE_ENTRIES 131072UL
31
32extern struct list_head all_lock_classes;
33
34extern void
35get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4);
36
37extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
38
39extern unsigned long nr_lock_classes;
40extern unsigned long nr_list_entries;
41extern unsigned long nr_lock_chains;
42extern unsigned long nr_stack_trace_entries;
43
44extern unsigned int nr_hardirq_chains;
45extern unsigned int nr_softirq_chains;
46extern unsigned int nr_process_chains;
47extern unsigned int max_lockdep_depth;
48extern unsigned int max_recursion_depth;
49
50#ifdef CONFIG_DEBUG_LOCKDEP
51/*
52 * Various lockdep statistics:
53 */
54extern atomic_t chain_lookup_hits;
55extern atomic_t chain_lookup_misses;
56extern atomic_t hardirqs_on_events;
57extern atomic_t hardirqs_off_events;
58extern atomic_t redundant_hardirqs_on;
59extern atomic_t redundant_hardirqs_off;
60extern atomic_t softirqs_on_events;
61extern atomic_t softirqs_off_events;
62extern atomic_t redundant_softirqs_on;
63extern atomic_t redundant_softirqs_off;
64extern atomic_t nr_unused_locks;
65extern atomic_t nr_cyclic_checks;
66extern atomic_t nr_cyclic_check_recursions;
67extern atomic_t nr_find_usage_forwards_checks;
68extern atomic_t nr_find_usage_forwards_recursions;
69extern atomic_t nr_find_usage_backwards_checks;
70extern atomic_t nr_find_usage_backwards_recursions;
71# define debug_atomic_inc(ptr) atomic_inc(ptr)
72# define debug_atomic_dec(ptr) atomic_dec(ptr)
73# define debug_atomic_read(ptr) atomic_read(ptr)
74#else
75# define debug_atomic_inc(ptr) do { } while (0)
76# define debug_atomic_dec(ptr) do { } while (0)
77# define debug_atomic_read(ptr) 0
78#endif
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
new file mode 100644
index 000000000000..f6e72eaab3fa
--- /dev/null
+++ b/kernel/lockdep_proc.c
@@ -0,0 +1,345 @@
1/*
2 * kernel/lockdep_proc.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * Code for /proc/lockdep and /proc/lockdep_stats:
11 *
12 */
13#include <linux/sched.h>
14#include <linux/module.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/kallsyms.h>
18#include <linux/debug_locks.h>
19
20#include "lockdep_internals.h"
21
22static void *l_next(struct seq_file *m, void *v, loff_t *pos)
23{
24 struct lock_class *class = v;
25
26 (*pos)++;
27
28 if (class->lock_entry.next != &all_lock_classes)
29 class = list_entry(class->lock_entry.next, struct lock_class,
30 lock_entry);
31 else
32 class = NULL;
33 m->private = class;
34
35 return class;
36}
37
38static void *l_start(struct seq_file *m, loff_t *pos)
39{
40 struct lock_class *class = m->private;
41
42 if (&class->lock_entry == all_lock_classes.next)
43 seq_printf(m, "all lock classes:\n");
44
45 return class;
46}
47
48static void l_stop(struct seq_file *m, void *v)
49{
50}
51
52static unsigned long count_forward_deps(struct lock_class *class)
53{
54 struct lock_list *entry;
55 unsigned long ret = 1;
56
57 /*
58 * Recurse this class's dependency list:
59 */
60 list_for_each_entry(entry, &class->locks_after, entry)
61 ret += count_forward_deps(entry->class);
62
63 return ret;
64}
65
66static unsigned long count_backward_deps(struct lock_class *class)
67{
68 struct lock_list *entry;
69 unsigned long ret = 1;
70
71 /*
72 * Recurse this class's dependency list:
73 */
74 list_for_each_entry(entry, &class->locks_before, entry)
75 ret += count_backward_deps(entry->class);
76
77 return ret;
78}
79
80static int l_show(struct seq_file *m, void *v)
81{
82 unsigned long nr_forward_deps, nr_backward_deps;
83 struct lock_class *class = m->private;
84 char str[128], c1, c2, c3, c4;
85 const char *name;
86
87 seq_printf(m, "%p", class->key);
88#ifdef CONFIG_DEBUG_LOCKDEP
89 seq_printf(m, " OPS:%8ld", class->ops);
90#endif
91 nr_forward_deps = count_forward_deps(class);
92 seq_printf(m, " FD:%5ld", nr_forward_deps);
93
94 nr_backward_deps = count_backward_deps(class);
95 seq_printf(m, " BD:%5ld", nr_backward_deps);
96
97 get_usage_chars(class, &c1, &c2, &c3, &c4);
98 seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
99
100 name = class->name;
101 if (!name) {
102 name = __get_key_name(class->key, str);
103 seq_printf(m, ": %s", name);
104 } else{
105 seq_printf(m, ": %s", name);
106 if (class->name_version > 1)
107 seq_printf(m, "#%d", class->name_version);
108 if (class->subclass)
109 seq_printf(m, "/%d", class->subclass);
110 }
111 seq_puts(m, "\n");
112
113 return 0;
114}
115
116static struct seq_operations lockdep_ops = {
117 .start = l_start,
118 .next = l_next,
119 .stop = l_stop,
120 .show = l_show,
121};
122
123static int lockdep_open(struct inode *inode, struct file *file)
124{
125 int res = seq_open(file, &lockdep_ops);
126 if (!res) {
127 struct seq_file *m = file->private_data;
128
129 if (!list_empty(&all_lock_classes))
130 m->private = list_entry(all_lock_classes.next,
131 struct lock_class, lock_entry);
132 else
133 m->private = NULL;
134 }
135 return res;
136}
137
138static struct file_operations proc_lockdep_operations = {
139 .open = lockdep_open,
140 .read = seq_read,
141 .llseek = seq_lseek,
142 .release = seq_release,
143};
144
145static void lockdep_stats_debug_show(struct seq_file *m)
146{
147#ifdef CONFIG_DEBUG_LOCKDEP
148 unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
149 hi2 = debug_atomic_read(&hardirqs_off_events),
150 hr1 = debug_atomic_read(&redundant_hardirqs_on),
151 hr2 = debug_atomic_read(&redundant_hardirqs_off),
152 si1 = debug_atomic_read(&softirqs_on_events),
153 si2 = debug_atomic_read(&softirqs_off_events),
154 sr1 = debug_atomic_read(&redundant_softirqs_on),
155 sr2 = debug_atomic_read(&redundant_softirqs_off);
156
157 seq_printf(m, " chain lookup misses: %11u\n",
158 debug_atomic_read(&chain_lookup_misses));
159 seq_printf(m, " chain lookup hits: %11u\n",
160 debug_atomic_read(&chain_lookup_hits));
161 seq_printf(m, " cyclic checks: %11u\n",
162 debug_atomic_read(&nr_cyclic_checks));
163 seq_printf(m, " cyclic-check recursions: %11u\n",
164 debug_atomic_read(&nr_cyclic_check_recursions));
165 seq_printf(m, " find-mask forwards checks: %11u\n",
166 debug_atomic_read(&nr_find_usage_forwards_checks));
167 seq_printf(m, " find-mask forwards recursions: %11u\n",
168 debug_atomic_read(&nr_find_usage_forwards_recursions));
169 seq_printf(m, " find-mask backwards checks: %11u\n",
170 debug_atomic_read(&nr_find_usage_backwards_checks));
171 seq_printf(m, " find-mask backwards recursions:%11u\n",
172 debug_atomic_read(&nr_find_usage_backwards_recursions));
173
174 seq_printf(m, " hardirq on events: %11u\n", hi1);
175 seq_printf(m, " hardirq off events: %11u\n", hi2);
176 seq_printf(m, " redundant hardirq ons: %11u\n", hr1);
177 seq_printf(m, " redundant hardirq offs: %11u\n", hr2);
178 seq_printf(m, " softirq on events: %11u\n", si1);
179 seq_printf(m, " softirq off events: %11u\n", si2);
180 seq_printf(m, " redundant softirq ons: %11u\n", sr1);
181 seq_printf(m, " redundant softirq offs: %11u\n", sr2);
182#endif
183}
184
185static int lockdep_stats_show(struct seq_file *m, void *v)
186{
187 struct lock_class *class;
188 unsigned long nr_unused = 0, nr_uncategorized = 0,
189 nr_irq_safe = 0, nr_irq_unsafe = 0,
190 nr_softirq_safe = 0, nr_softirq_unsafe = 0,
191 nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
192 nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
193 nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
194 nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
195 sum_forward_deps = 0, factor = 0;
196
197 list_for_each_entry(class, &all_lock_classes, lock_entry) {
198
199 if (class->usage_mask == 0)
200 nr_unused++;
201 if (class->usage_mask == LOCKF_USED)
202 nr_uncategorized++;
203 if (class->usage_mask & LOCKF_USED_IN_IRQ)
204 nr_irq_safe++;
205 if (class->usage_mask & LOCKF_ENABLED_IRQS)
206 nr_irq_unsafe++;
207 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
208 nr_softirq_safe++;
209 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
210 nr_softirq_unsafe++;
211 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
212 nr_hardirq_safe++;
213 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
214 nr_hardirq_unsafe++;
215 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
216 nr_irq_read_safe++;
217 if (class->usage_mask & LOCKF_ENABLED_IRQS_READ)
218 nr_irq_read_unsafe++;
219 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
220 nr_softirq_read_safe++;
221 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
222 nr_softirq_read_unsafe++;
223 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
224 nr_hardirq_read_safe++;
225 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
226 nr_hardirq_read_unsafe++;
227
228 sum_forward_deps += count_forward_deps(class);
229 }
230#ifdef CONFIG_LOCKDEP_DEBUG
231 DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
232#endif
233 seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
234 nr_lock_classes, MAX_LOCKDEP_KEYS);
235 seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
236 nr_list_entries, MAX_LOCKDEP_ENTRIES);
237 seq_printf(m, " indirect dependencies: %11lu\n",
238 sum_forward_deps);
239
240 /*
241 * Total number of dependencies:
242 *
243 * All irq-safe locks may nest inside irq-unsafe locks,
244 * plus all the other known dependencies:
245 */
246 seq_printf(m, " all direct dependencies: %11lu\n",
247 nr_irq_unsafe * nr_irq_safe +
248 nr_hardirq_unsafe * nr_hardirq_safe +
249 nr_list_entries);
250
251 /*
252 * Estimated factor between direct and indirect
253 * dependencies:
254 */
255 if (nr_list_entries)
256 factor = sum_forward_deps / nr_list_entries;
257
258 seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
259 nr_lock_chains, MAX_LOCKDEP_CHAINS);
260
261#ifdef CONFIG_TRACE_IRQFLAGS
262 seq_printf(m, " in-hardirq chains: %11u\n",
263 nr_hardirq_chains);
264 seq_printf(m, " in-softirq chains: %11u\n",
265 nr_softirq_chains);
266#endif
267 seq_printf(m, " in-process chains: %11u\n",
268 nr_process_chains);
269 seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n",
270 nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
271 seq_printf(m, " combined max dependencies: %11u\n",
272 (nr_hardirq_chains + 1) *
273 (nr_softirq_chains + 1) *
274 (nr_process_chains + 1)
275 );
276 seq_printf(m, " hardirq-safe locks: %11lu\n",
277 nr_hardirq_safe);
278 seq_printf(m, " hardirq-unsafe locks: %11lu\n",
279 nr_hardirq_unsafe);
280 seq_printf(m, " softirq-safe locks: %11lu\n",
281 nr_softirq_safe);
282 seq_printf(m, " softirq-unsafe locks: %11lu\n",
283 nr_softirq_unsafe);
284 seq_printf(m, " irq-safe locks: %11lu\n",
285 nr_irq_safe);
286 seq_printf(m, " irq-unsafe locks: %11lu\n",
287 nr_irq_unsafe);
288
289 seq_printf(m, " hardirq-read-safe locks: %11lu\n",
290 nr_hardirq_read_safe);
291 seq_printf(m, " hardirq-read-unsafe locks: %11lu\n",
292 nr_hardirq_read_unsafe);
293 seq_printf(m, " softirq-read-safe locks: %11lu\n",
294 nr_softirq_read_safe);
295 seq_printf(m, " softirq-read-unsafe locks: %11lu\n",
296 nr_softirq_read_unsafe);
297 seq_printf(m, " irq-read-safe locks: %11lu\n",
298 nr_irq_read_safe);
299 seq_printf(m, " irq-read-unsafe locks: %11lu\n",
300 nr_irq_read_unsafe);
301
302 seq_printf(m, " uncategorized locks: %11lu\n",
303 nr_uncategorized);
304 seq_printf(m, " unused locks: %11lu\n",
305 nr_unused);
306 seq_printf(m, " max locking depth: %11u\n",
307 max_lockdep_depth);
308 seq_printf(m, " max recursion depth: %11u\n",
309 max_recursion_depth);
310 lockdep_stats_debug_show(m);
311 seq_printf(m, " debug_locks: %11u\n",
312 debug_locks);
313
314 return 0;
315}
316
317static int lockdep_stats_open(struct inode *inode, struct file *file)
318{
319 return single_open(file, lockdep_stats_show, NULL);
320}
321
322static struct file_operations proc_lockdep_stats_operations = {
323 .open = lockdep_stats_open,
324 .read = seq_read,
325 .llseek = seq_lseek,
326 .release = seq_release,
327};
328
329static int __init lockdep_proc_init(void)
330{
331 struct proc_dir_entry *entry;
332
333 entry = create_proc_entry("lockdep", S_IRUSR, NULL);
334 if (entry)
335 entry->proc_fops = &proc_lockdep_operations;
336
337 entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL);
338 if (entry)
339 entry->proc_fops = &proc_lockdep_stats_operations;
340
341 return 0;
342}
343
344__initcall(lockdep_proc_init);
345
diff --git a/kernel/module.c b/kernel/module.c
index 99c022ac3d21..35e1b1f859d7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -16,7 +16,6 @@
16 along with this program; if not, write to the Free Software 16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18*/ 18*/
19#include <linux/config.h>
20#include <linux/module.h> 19#include <linux/module.h>
21#include <linux/moduleloader.h> 20#include <linux/moduleloader.h>
22#include <linux/init.h> 21#include <linux/init.h>
@@ -1122,6 +1121,9 @@ static void free_module(struct module *mod)
1122 if (mod->percpu) 1121 if (mod->percpu)
1123 percpu_modfree(mod->percpu); 1122 percpu_modfree(mod->percpu);
1124 1123
1124 /* Free lock-classes: */
1125 lockdep_free_key_range(mod->module_core, mod->core_size);
1126
1125 /* Finally, free the core (containing the module structure) */ 1127 /* Finally, free the core (containing the module structure) */
1126 module_free(mod, mod->module_core); 1128 module_free(mod, mod->module_core);
1127} 1129}
@@ -2160,6 +2162,29 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2160 return e; 2162 return e;
2161} 2163}
2162 2164
2165/*
2166 * Is this a valid module address?
2167 */
2168int is_module_address(unsigned long addr)
2169{
2170 unsigned long flags;
2171 struct module *mod;
2172
2173 spin_lock_irqsave(&modlist_lock, flags);
2174
2175 list_for_each_entry(mod, &modules, list) {
2176 if (within(addr, mod->module_core, mod->core_size)) {
2177 spin_unlock_irqrestore(&modlist_lock, flags);
2178 return 1;
2179 }
2180 }
2181
2182 spin_unlock_irqrestore(&modlist_lock, flags);
2183
2184 return 0;
2185}
2186
2187
2163/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ 2188/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */
2164struct module *__module_text_address(unsigned long addr) 2189struct module *__module_text_address(unsigned long addr)
2165{ 2190{
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index e38e4bac97ca..e3203c654dda 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -20,367 +20,19 @@
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/kallsyms.h> 21#include <linux/kallsyms.h>
22#include <linux/interrupt.h> 22#include <linux/interrupt.h>
23#include <linux/debug_locks.h>
23 24
24#include "mutex-debug.h" 25#include "mutex-debug.h"
25 26
26/* 27/*
27 * We need a global lock when we walk through the multi-process
28 * lock tree. Only used in the deadlock-debugging case.
29 */
30DEFINE_SPINLOCK(debug_mutex_lock);
31
32/*
33 * All locks held by all tasks, in a single global list:
34 */
35LIST_HEAD(debug_mutex_held_locks);
36
37/*
38 * In the debug case we carry the caller's instruction pointer into
39 * other functions, but we dont want the function argument overhead
40 * in the nondebug case - hence these macros:
41 */
42#define __IP_DECL__ , unsigned long ip
43#define __IP__ , ip
44#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
45
46/*
47 * "mutex debugging enabled" flag. We turn it off when we detect
48 * the first problem because we dont want to recurse back
49 * into the tracing code when doing error printk or
50 * executing a BUG():
51 */
52int debug_mutex_on = 1;
53
54static void printk_task(struct task_struct *p)
55{
56 if (p)
57 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
58 else
59 printk("<none>");
60}
61
62static void printk_ti(struct thread_info *ti)
63{
64 if (ti)
65 printk_task(ti->task);
66 else
67 printk("<none>");
68}
69
70static void printk_task_short(struct task_struct *p)
71{
72 if (p)
73 printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
74 else
75 printk("<none>");
76}
77
78static void printk_lock(struct mutex *lock, int print_owner)
79{
80 printk(" [%p] {%s}\n", lock, lock->name);
81
82 if (print_owner && lock->owner) {
83 printk(".. held by: ");
84 printk_ti(lock->owner);
85 printk("\n");
86 }
87 if (lock->owner) {
88 printk("... acquired at: ");
89 print_symbol("%s\n", lock->acquire_ip);
90 }
91}
92
93/*
94 * printk locks held by a task:
95 */
96static void show_task_locks(struct task_struct *p)
97{
98 switch (p->state) {
99 case TASK_RUNNING: printk("R"); break;
100 case TASK_INTERRUPTIBLE: printk("S"); break;
101 case TASK_UNINTERRUPTIBLE: printk("D"); break;
102 case TASK_STOPPED: printk("T"); break;
103 case EXIT_ZOMBIE: printk("Z"); break;
104 case EXIT_DEAD: printk("X"); break;
105 default: printk("?"); break;
106 }
107 printk_task(p);
108 if (p->blocked_on) {
109 struct mutex *lock = p->blocked_on->lock;
110
111 printk(" blocked on mutex:");
112 printk_lock(lock, 1);
113 } else
114 printk(" (not blocked on mutex)\n");
115}
116
117/*
118 * printk all locks held in the system (if filter == NULL),
119 * or all locks belonging to a single task (if filter != NULL):
120 */
121void show_held_locks(struct task_struct *filter)
122{
123 struct list_head *curr, *cursor = NULL;
124 struct mutex *lock;
125 struct thread_info *t;
126 unsigned long flags;
127 int count = 0;
128
129 if (filter) {
130 printk("------------------------------\n");
131 printk("| showing all locks held by: | (");
132 printk_task_short(filter);
133 printk("):\n");
134 printk("------------------------------\n");
135 } else {
136 printk("---------------------------\n");
137 printk("| showing all locks held: |\n");
138 printk("---------------------------\n");
139 }
140
141 /*
142 * Play safe and acquire the global trace lock. We
143 * cannot printk with that lock held so we iterate
144 * very carefully:
145 */
146next:
147 debug_spin_lock_save(&debug_mutex_lock, flags);
148 list_for_each(curr, &debug_mutex_held_locks) {
149 if (cursor && curr != cursor)
150 continue;
151 lock = list_entry(curr, struct mutex, held_list);
152 t = lock->owner;
153 if (filter && (t != filter->thread_info))
154 continue;
155 count++;
156 cursor = curr->next;
157 debug_spin_unlock_restore(&debug_mutex_lock, flags);
158
159 printk("\n#%03d: ", count);
160 printk_lock(lock, filter ? 0 : 1);
161 goto next;
162 }
163 debug_spin_unlock_restore(&debug_mutex_lock, flags);
164 printk("\n");
165}
166
167void mutex_debug_show_all_locks(void)
168{
169 struct task_struct *g, *p;
170 int count = 10;
171 int unlock = 1;
172
173 printk("\nShowing all blocking locks in the system:\n");
174
175 /*
176 * Here we try to get the tasklist_lock as hard as possible,
177 * if not successful after 2 seconds we ignore it (but keep
178 * trying). This is to enable a debug printout even if a
179 * tasklist_lock-holding task deadlocks or crashes.
180 */
181retry:
182 if (!read_trylock(&tasklist_lock)) {
183 if (count == 10)
184 printk("hm, tasklist_lock locked, retrying... ");
185 if (count) {
186 count--;
187 printk(" #%d", 10-count);
188 mdelay(200);
189 goto retry;
190 }
191 printk(" ignoring it.\n");
192 unlock = 0;
193 }
194 if (count != 10)
195 printk(" locked it.\n");
196
197 do_each_thread(g, p) {
198 show_task_locks(p);
199 if (!unlock)
200 if (read_trylock(&tasklist_lock))
201 unlock = 1;
202 } while_each_thread(g, p);
203
204 printk("\n");
205 show_held_locks(NULL);
206 printk("=============================================\n\n");
207
208 if (unlock)
209 read_unlock(&tasklist_lock);
210}
211
212static void report_deadlock(struct task_struct *task, struct mutex *lock,
213 struct mutex *lockblk, unsigned long ip)
214{
215 printk("\n%s/%d is trying to acquire this lock:\n",
216 current->comm, current->pid);
217 printk_lock(lock, 1);
218 printk("... trying at: ");
219 print_symbol("%s\n", ip);
220 show_held_locks(current);
221
222 if (lockblk) {
223 printk("but %s/%d is deadlocking current task %s/%d!\n\n",
224 task->comm, task->pid, current->comm, current->pid);
225 printk("\n%s/%d is blocked on this lock:\n",
226 task->comm, task->pid);
227 printk_lock(lockblk, 1);
228
229 show_held_locks(task);
230
231 printk("\n%s/%d's [blocked] stackdump:\n\n",
232 task->comm, task->pid);
233 show_stack(task, NULL);
234 }
235
236 printk("\n%s/%d's [current] stackdump:\n\n",
237 current->comm, current->pid);
238 dump_stack();
239 mutex_debug_show_all_locks();
240 printk("[ turning off deadlock detection. Please report this. ]\n\n");
241 local_irq_disable();
242}
243
244/*
245 * Recursively check for mutex deadlocks:
246 */
247static int check_deadlock(struct mutex *lock, int depth,
248 struct thread_info *ti, unsigned long ip)
249{
250 struct mutex *lockblk;
251 struct task_struct *task;
252
253 if (!debug_mutex_on)
254 return 0;
255
256 ti = lock->owner;
257 if (!ti)
258 return 0;
259
260 task = ti->task;
261 lockblk = NULL;
262 if (task->blocked_on)
263 lockblk = task->blocked_on->lock;
264
265 /* Self-deadlock: */
266 if (current == task) {
267 DEBUG_OFF();
268 if (depth)
269 return 1;
270 printk("\n==========================================\n");
271 printk( "[ BUG: lock recursion deadlock detected! |\n");
272 printk( "------------------------------------------\n");
273 report_deadlock(task, lock, NULL, ip);
274 return 0;
275 }
276
277 /* Ugh, something corrupted the lock data structure? */
278 if (depth > 20) {
279 DEBUG_OFF();
280 printk("\n===========================================\n");
281 printk( "[ BUG: infinite lock dependency detected!? |\n");
282 printk( "-------------------------------------------\n");
283 report_deadlock(task, lock, lockblk, ip);
284 return 0;
285 }
286
287 /* Recursively check for dependencies: */
288 if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) {
289 printk("\n============================================\n");
290 printk( "[ BUG: circular locking deadlock detected! ]\n");
291 printk( "--------------------------------------------\n");
292 report_deadlock(task, lock, lockblk, ip);
293 return 0;
294 }
295 return 0;
296}
297
298/*
299 * Called when a task exits, this function checks whether the
300 * task is holding any locks, and reports the first one if so:
301 */
302void mutex_debug_check_no_locks_held(struct task_struct *task)
303{
304 struct list_head *curr, *next;
305 struct thread_info *t;
306 unsigned long flags;
307 struct mutex *lock;
308
309 if (!debug_mutex_on)
310 return;
311
312 debug_spin_lock_save(&debug_mutex_lock, flags);
313 list_for_each_safe(curr, next, &debug_mutex_held_locks) {
314 lock = list_entry(curr, struct mutex, held_list);
315 t = lock->owner;
316 if (t != task->thread_info)
317 continue;
318 list_del_init(curr);
319 DEBUG_OFF();
320 debug_spin_unlock_restore(&debug_mutex_lock, flags);
321
322 printk("BUG: %s/%d, lock held at task exit time!\n",
323 task->comm, task->pid);
324 printk_lock(lock, 1);
325 if (lock->owner != task->thread_info)
326 printk("exiting task is not even the owner??\n");
327 return;
328 }
329 debug_spin_unlock_restore(&debug_mutex_lock, flags);
330}
331
332/*
333 * Called when kernel memory is freed (or unmapped), or if a mutex
334 * is destroyed or reinitialized - this code checks whether there is
335 * any held lock in the memory range of <from> to <to>:
336 */
337void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
338{
339 struct list_head *curr, *next;
340 const void *to = from + len;
341 unsigned long flags;
342 struct mutex *lock;
343 void *lock_addr;
344
345 if (!debug_mutex_on)
346 return;
347
348 debug_spin_lock_save(&debug_mutex_lock, flags);
349 list_for_each_safe(curr, next, &debug_mutex_held_locks) {
350 lock = list_entry(curr, struct mutex, held_list);
351 lock_addr = lock;
352 if (lock_addr < from || lock_addr >= to)
353 continue;
354 list_del_init(curr);
355 DEBUG_OFF();
356 debug_spin_unlock_restore(&debug_mutex_lock, flags);
357
358 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
359 current->comm, current->pid, lock, from, to);
360 dump_stack();
361 printk_lock(lock, 1);
362 if (lock->owner != current_thread_info())
363 printk("freeing task is not even the owner??\n");
364 return;
365 }
366 debug_spin_unlock_restore(&debug_mutex_lock, flags);
367}
368
369/*
370 * Must be called with lock->wait_lock held. 28 * Must be called with lock->wait_lock held.
371 */ 29 */
372void debug_mutex_set_owner(struct mutex *lock, 30void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
373 struct thread_info *new_owner __IP_DECL__)
374{ 31{
375 lock->owner = new_owner; 32 lock->owner = new_owner;
376 DEBUG_WARN_ON(!list_empty(&lock->held_list));
377 if (debug_mutex_on) {
378 list_add_tail(&lock->held_list, &debug_mutex_held_locks);
379 lock->acquire_ip = ip;
380 }
381} 33}
382 34
383void debug_mutex_init_waiter(struct mutex_waiter *waiter) 35void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
384{ 36{
385 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); 37 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
386 waiter->magic = waiter; 38 waiter->magic = waiter;
@@ -389,23 +41,23 @@ void debug_mutex_init_waiter(struct mutex_waiter *waiter)
389 41
390void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) 42void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
391{ 43{
392 SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); 44 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
393 DEBUG_WARN_ON(list_empty(&lock->wait_list)); 45 DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
394 DEBUG_WARN_ON(waiter->magic != waiter); 46 DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
395 DEBUG_WARN_ON(list_empty(&waiter->list)); 47 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
396} 48}
397 49
398void debug_mutex_free_waiter(struct mutex_waiter *waiter) 50void debug_mutex_free_waiter(struct mutex_waiter *waiter)
399{ 51{
400 DEBUG_WARN_ON(!list_empty(&waiter->list)); 52 DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
401 memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); 53 memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
402} 54}
403 55
404void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, 56void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
405 struct thread_info *ti __IP_DECL__) 57 struct thread_info *ti)
406{ 58{
407 SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); 59 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
408 check_deadlock(lock, 0, ti, ip); 60
409 /* Mark the current thread as blocked on the lock: */ 61 /* Mark the current thread as blocked on the lock: */
410 ti->task->blocked_on = waiter; 62 ti->task->blocked_on = waiter;
411 waiter->lock = lock; 63 waiter->lock = lock;
@@ -414,9 +66,9 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
414void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 66void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
415 struct thread_info *ti) 67 struct thread_info *ti)
416{ 68{
417 DEBUG_WARN_ON(list_empty(&waiter->list)); 69 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
418 DEBUG_WARN_ON(waiter->task != ti->task); 70 DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
419 DEBUG_WARN_ON(ti->task->blocked_on != waiter); 71 DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
420 ti->task->blocked_on = NULL; 72 ti->task->blocked_on = NULL;
421 73
422 list_del_init(&waiter->list); 74 list_del_init(&waiter->list);
@@ -425,24 +77,23 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
425 77
426void debug_mutex_unlock(struct mutex *lock) 78void debug_mutex_unlock(struct mutex *lock)
427{ 79{
428 DEBUG_WARN_ON(lock->magic != lock); 80 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
429 DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 81 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
430 DEBUG_WARN_ON(lock->owner != current_thread_info()); 82 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
431 if (debug_mutex_on) { 83 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
432 DEBUG_WARN_ON(list_empty(&lock->held_list));
433 list_del_init(&lock->held_list);
434 }
435} 84}
436 85
437void debug_mutex_init(struct mutex *lock, const char *name) 86void debug_mutex_init(struct mutex *lock, const char *name,
87 struct lock_class_key *key)
438{ 88{
89#ifdef CONFIG_DEBUG_LOCK_ALLOC
439 /* 90 /*
440 * Make sure we are not reinitializing a held lock: 91 * Make sure we are not reinitializing a held lock:
441 */ 92 */
442 mutex_debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 93 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
94 lockdep_init_map(&lock->dep_map, name, key);
95#endif
443 lock->owner = NULL; 96 lock->owner = NULL;
444 INIT_LIST_HEAD(&lock->held_list);
445 lock->name = name;
446 lock->magic = lock; 97 lock->magic = lock;
447} 98}
448 99
@@ -456,7 +107,7 @@ void debug_mutex_init(struct mutex *lock, const char *name)
456 */ 107 */
457void fastcall mutex_destroy(struct mutex *lock) 108void fastcall mutex_destroy(struct mutex *lock)
458{ 109{
459 DEBUG_WARN_ON(mutex_is_locked(lock)); 110 DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
460 lock->magic = NULL; 111 lock->magic = NULL;
461} 112}
462 113
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index a5196c36a5fd..babfbdfc534b 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -10,110 +10,44 @@
10 * More details are in kernel/mutex-debug.c. 10 * More details are in kernel/mutex-debug.c.
11 */ 11 */
12 12
13extern spinlock_t debug_mutex_lock;
14extern struct list_head debug_mutex_held_locks;
15extern int debug_mutex_on;
16
17/*
18 * In the debug case we carry the caller's instruction pointer into
19 * other functions, but we dont want the function argument overhead
20 * in the nondebug case - hence these macros:
21 */
22#define __IP_DECL__ , unsigned long ip
23#define __IP__ , ip
24#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
25
26/* 13/*
27 * This must be called with lock->wait_lock held. 14 * This must be called with lock->wait_lock held.
28 */ 15 */
29extern void debug_mutex_set_owner(struct mutex *lock, 16extern void
30 struct thread_info *new_owner __IP_DECL__); 17debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
31 18
32static inline void debug_mutex_clear_owner(struct mutex *lock) 19static inline void debug_mutex_clear_owner(struct mutex *lock)
33{ 20{
34 lock->owner = NULL; 21 lock->owner = NULL;
35} 22}
36 23
37extern void debug_mutex_init_waiter(struct mutex_waiter *waiter); 24extern void debug_mutex_lock_common(struct mutex *lock,
25 struct mutex_waiter *waiter);
38extern void debug_mutex_wake_waiter(struct mutex *lock, 26extern void debug_mutex_wake_waiter(struct mutex *lock,
39 struct mutex_waiter *waiter); 27 struct mutex_waiter *waiter);
40extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); 28extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
41extern void debug_mutex_add_waiter(struct mutex *lock, 29extern void debug_mutex_add_waiter(struct mutex *lock,
42 struct mutex_waiter *waiter, 30 struct mutex_waiter *waiter,
43 struct thread_info *ti __IP_DECL__); 31 struct thread_info *ti);
44extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 32extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
45 struct thread_info *ti); 33 struct thread_info *ti);
46extern void debug_mutex_unlock(struct mutex *lock); 34extern void debug_mutex_unlock(struct mutex *lock);
47extern void debug_mutex_init(struct mutex *lock, const char *name); 35extern void debug_mutex_init(struct mutex *lock, const char *name,
48 36 struct lock_class_key *key);
49#define debug_spin_lock_save(lock, flags) \
50 do { \
51 local_irq_save(flags); \
52 if (debug_mutex_on) \
53 spin_lock(lock); \
54 } while (0)
55
56#define debug_spin_unlock_restore(lock, flags) \
57 do { \
58 if (debug_mutex_on) \
59 spin_unlock(lock); \
60 local_irq_restore(flags); \
61 preempt_check_resched(); \
62 } while (0)
63 37
64#define spin_lock_mutex(lock, flags) \ 38#define spin_lock_mutex(lock, flags) \
65 do { \ 39 do { \
66 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 40 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
67 \ 41 \
68 DEBUG_WARN_ON(in_interrupt()); \ 42 DEBUG_LOCKS_WARN_ON(in_interrupt()); \
69 debug_spin_lock_save(&debug_mutex_lock, flags); \ 43 local_irq_save(flags); \
70 spin_lock(lock); \ 44 __raw_spin_lock(&(lock)->raw_lock); \
71 DEBUG_WARN_ON(l->magic != l); \ 45 DEBUG_LOCKS_WARN_ON(l->magic != l); \
72 } while (0) 46 } while (0)
73 47
74#define spin_unlock_mutex(lock, flags) \ 48#define spin_unlock_mutex(lock, flags) \
75 do { \ 49 do { \
76 spin_unlock(lock); \ 50 __raw_spin_unlock(&(lock)->raw_lock); \
77 debug_spin_unlock_restore(&debug_mutex_lock, flags); \ 51 local_irq_restore(flags); \
52 preempt_check_resched(); \
78 } while (0) 53 } while (0)
79
80#define DEBUG_OFF() \
81do { \
82 if (debug_mutex_on) { \
83 debug_mutex_on = 0; \
84 console_verbose(); \
85 if (spin_is_locked(&debug_mutex_lock)) \
86 spin_unlock(&debug_mutex_lock); \
87 } \
88} while (0)
89
90#define DEBUG_BUG() \
91do { \
92 if (debug_mutex_on) { \
93 DEBUG_OFF(); \
94 BUG(); \
95 } \
96} while (0)
97
98#define DEBUG_WARN_ON(c) \
99do { \
100 if (unlikely(c && debug_mutex_on)) { \
101 DEBUG_OFF(); \
102 WARN_ON(1); \
103 } \
104} while (0)
105
106# define DEBUG_BUG_ON(c) \
107do { \
108 if (unlikely(c)) \
109 DEBUG_BUG(); \
110} while (0)
111
112#ifdef CONFIG_SMP
113# define SMP_DEBUG_WARN_ON(c) DEBUG_WARN_ON(c)
114# define SMP_DEBUG_BUG_ON(c) DEBUG_BUG_ON(c)
115#else
116# define SMP_DEBUG_WARN_ON(c) do { } while (0)
117# define SMP_DEBUG_BUG_ON(c) do { } while (0)
118#endif
119
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 7043db21bbce..8c71cf72a497 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/debug_locks.h>
20 21
21/* 22/*
22 * In the DEBUG case we are using the "NULL fastpath" for mutexes, 23 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -38,13 +39,14 @@
38 * 39 *
39 * It is not allowed to initialize an already locked mutex. 40 * It is not allowed to initialize an already locked mutex.
40 */ 41 */
41void fastcall __mutex_init(struct mutex *lock, const char *name) 42void
43__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
42{ 44{
43 atomic_set(&lock->count, 1); 45 atomic_set(&lock->count, 1);
44 spin_lock_init(&lock->wait_lock); 46 spin_lock_init(&lock->wait_lock);
45 INIT_LIST_HEAD(&lock->wait_list); 47 INIT_LIST_HEAD(&lock->wait_list);
46 48
47 debug_mutex_init(lock, name); 49 debug_mutex_init(lock, name, key);
48} 50}
49 51
50EXPORT_SYMBOL(__mutex_init); 52EXPORT_SYMBOL(__mutex_init);
@@ -56,7 +58,7 @@ EXPORT_SYMBOL(__mutex_init);
56 * branch is predicted by the CPU as default-untaken. 58 * branch is predicted by the CPU as default-untaken.
57 */ 59 */
58static void fastcall noinline __sched 60static void fastcall noinline __sched
59__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__); 61__mutex_lock_slowpath(atomic_t *lock_count);
60 62
61/*** 63/***
62 * mutex_lock - acquire the mutex 64 * mutex_lock - acquire the mutex
@@ -79,7 +81,7 @@ __mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
79 * 81 *
80 * This function is similar to (but not equivalent to) down(). 82 * This function is similar to (but not equivalent to) down().
81 */ 83 */
82void fastcall __sched mutex_lock(struct mutex *lock) 84void inline fastcall __sched mutex_lock(struct mutex *lock)
83{ 85{
84 might_sleep(); 86 might_sleep();
85 /* 87 /*
@@ -92,7 +94,7 @@ void fastcall __sched mutex_lock(struct mutex *lock)
92EXPORT_SYMBOL(mutex_lock); 94EXPORT_SYMBOL(mutex_lock);
93 95
94static void fastcall noinline __sched 96static void fastcall noinline __sched
95__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__); 97__mutex_unlock_slowpath(atomic_t *lock_count);
96 98
97/*** 99/***
98 * mutex_unlock - release the mutex 100 * mutex_unlock - release the mutex
@@ -120,18 +122,18 @@ EXPORT_SYMBOL(mutex_unlock);
120 * Lock a mutex (possibly interruptible), slowpath: 122 * Lock a mutex (possibly interruptible), slowpath:
121 */ 123 */
122static inline int __sched 124static inline int __sched
123__mutex_lock_common(struct mutex *lock, long state __IP_DECL__) 125__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
124{ 126{
125 struct task_struct *task = current; 127 struct task_struct *task = current;
126 struct mutex_waiter waiter; 128 struct mutex_waiter waiter;
127 unsigned int old_val; 129 unsigned int old_val;
128 unsigned long flags; 130 unsigned long flags;
129 131
130 debug_mutex_init_waiter(&waiter);
131
132 spin_lock_mutex(&lock->wait_lock, flags); 132 spin_lock_mutex(&lock->wait_lock, flags);
133 133
134 debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); 134 debug_mutex_lock_common(lock, &waiter);
135 mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
136 debug_mutex_add_waiter(lock, &waiter, task->thread_info);
135 137
136 /* add waiting tasks to the end of the waitqueue (FIFO): */ 138 /* add waiting tasks to the end of the waitqueue (FIFO): */
137 list_add_tail(&waiter.list, &lock->wait_list); 139 list_add_tail(&waiter.list, &lock->wait_list);
@@ -158,6 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
158 if (unlikely(state == TASK_INTERRUPTIBLE && 160 if (unlikely(state == TASK_INTERRUPTIBLE &&
159 signal_pending(task))) { 161 signal_pending(task))) {
160 mutex_remove_waiter(lock, &waiter, task->thread_info); 162 mutex_remove_waiter(lock, &waiter, task->thread_info);
163 mutex_release(&lock->dep_map, 1, _RET_IP_);
161 spin_unlock_mutex(&lock->wait_lock, flags); 164 spin_unlock_mutex(&lock->wait_lock, flags);
162 165
163 debug_mutex_free_waiter(&waiter); 166 debug_mutex_free_waiter(&waiter);
@@ -173,7 +176,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
173 176
174 /* got the lock - rejoice! */ 177 /* got the lock - rejoice! */
175 mutex_remove_waiter(lock, &waiter, task->thread_info); 178 mutex_remove_waiter(lock, &waiter, task->thread_info);
176 debug_mutex_set_owner(lock, task->thread_info __IP__); 179 debug_mutex_set_owner(lock, task->thread_info);
177 180
178 /* set it to 0 if there are no waiters left: */ 181 /* set it to 0 if there are no waiters left: */
179 if (likely(list_empty(&lock->wait_list))) 182 if (likely(list_empty(&lock->wait_list)))
@@ -183,32 +186,40 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
183 186
184 debug_mutex_free_waiter(&waiter); 187 debug_mutex_free_waiter(&waiter);
185 188
186 DEBUG_WARN_ON(list_empty(&lock->held_list));
187 DEBUG_WARN_ON(lock->owner != task->thread_info);
188
189 return 0; 189 return 0;
190} 190}
191 191
192static void fastcall noinline __sched 192static void fastcall noinline __sched
193__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__) 193__mutex_lock_slowpath(atomic_t *lock_count)
194{ 194{
195 struct mutex *lock = container_of(lock_count, struct mutex, count); 195 struct mutex *lock = container_of(lock_count, struct mutex, count);
196 196
197 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__); 197 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
198}
199
200#ifdef CONFIG_DEBUG_LOCK_ALLOC
201void __sched
202mutex_lock_nested(struct mutex *lock, unsigned int subclass)
203{
204 might_sleep();
205 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
198} 206}
199 207
208EXPORT_SYMBOL_GPL(mutex_lock_nested);
209#endif
210
200/* 211/*
201 * Release the lock, slowpath: 212 * Release the lock, slowpath:
202 */ 213 */
203static fastcall noinline void 214static fastcall inline void
204__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) 215__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
205{ 216{
206 struct mutex *lock = container_of(lock_count, struct mutex, count); 217 struct mutex *lock = container_of(lock_count, struct mutex, count);
207 unsigned long flags; 218 unsigned long flags;
208 219
209 DEBUG_WARN_ON(lock->owner != current_thread_info());
210
211 spin_lock_mutex(&lock->wait_lock, flags); 220 spin_lock_mutex(&lock->wait_lock, flags);
221 mutex_release(&lock->dep_map, nested, _RET_IP_);
222 debug_mutex_unlock(lock);
212 223
213 /* 224 /*
214 * some architectures leave the lock unlocked in the fastpath failure 225 * some architectures leave the lock unlocked in the fastpath failure
@@ -218,8 +229,6 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
218 if (__mutex_slowpath_needs_to_unlock()) 229 if (__mutex_slowpath_needs_to_unlock())
219 atomic_set(&lock->count, 1); 230 atomic_set(&lock->count, 1);
220 231
221 debug_mutex_unlock(lock);
222
223 if (!list_empty(&lock->wait_list)) { 232 if (!list_empty(&lock->wait_list)) {
224 /* get the first entry from the wait-list: */ 233 /* get the first entry from the wait-list: */
225 struct mutex_waiter *waiter = 234 struct mutex_waiter *waiter =
@@ -237,11 +246,20 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
237} 246}
238 247
239/* 248/*
249 * Release the lock, slowpath:
250 */
251static fastcall noinline void
252__mutex_unlock_slowpath(atomic_t *lock_count)
253{
254 __mutex_unlock_common_slowpath(lock_count, 1);
255}
256
257/*
240 * Here come the less common (and hence less performance-critical) APIs: 258 * Here come the less common (and hence less performance-critical) APIs:
241 * mutex_lock_interruptible() and mutex_trylock(). 259 * mutex_lock_interruptible() and mutex_trylock().
242 */ 260 */
243static int fastcall noinline __sched 261static int fastcall noinline __sched
244__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__); 262__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
245 263
246/*** 264/***
247 * mutex_lock_interruptible - acquire the mutex, interruptable 265 * mutex_lock_interruptible - acquire the mutex, interruptable
@@ -264,11 +282,11 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
264EXPORT_SYMBOL(mutex_lock_interruptible); 282EXPORT_SYMBOL(mutex_lock_interruptible);
265 283
266static int fastcall noinline __sched 284static int fastcall noinline __sched
267__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) 285__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
268{ 286{
269 struct mutex *lock = container_of(lock_count, struct mutex, count); 287 struct mutex *lock = container_of(lock_count, struct mutex, count);
270 288
271 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__); 289 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
272} 290}
273 291
274/* 292/*
@@ -284,8 +302,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
284 spin_lock_mutex(&lock->wait_lock, flags); 302 spin_lock_mutex(&lock->wait_lock, flags);
285 303
286 prev = atomic_xchg(&lock->count, -1); 304 prev = atomic_xchg(&lock->count, -1);
287 if (likely(prev == 1)) 305 if (likely(prev == 1)) {
288 debug_mutex_set_owner(lock, current_thread_info() __RET_IP__); 306 debug_mutex_set_owner(lock, current_thread_info());
307 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
308 }
289 /* Set it back to 0 if there are no waiters: */ 309 /* Set it back to 0 if there are no waiters: */
290 if (likely(list_empty(&lock->wait_list))) 310 if (likely(list_empty(&lock->wait_list)))
291 atomic_set(&lock->count, 0); 311 atomic_set(&lock->count, 0);
@@ -309,7 +329,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
309 * This function must not be used in interrupt context. The 329 * This function must not be used in interrupt context. The
310 * mutex must be released by the same task that acquired it. 330 * mutex must be released by the same task that acquired it.
311 */ 331 */
312int fastcall mutex_trylock(struct mutex *lock) 332int fastcall __sched mutex_trylock(struct mutex *lock)
313{ 333{
314 return __mutex_fastpath_trylock(&lock->count, 334 return __mutex_fastpath_trylock(&lock->count,
315 __mutex_trylock_slowpath); 335 __mutex_trylock_slowpath);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 069189947257..a075dafbb290 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,22 +16,15 @@
16#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
18 18
19#define DEBUG_WARN_ON(c) do { } while (0)
20#define debug_mutex_set_owner(lock, new_owner) do { } while (0) 19#define debug_mutex_set_owner(lock, new_owner) do { } while (0)
21#define debug_mutex_clear_owner(lock) do { } while (0) 20#define debug_mutex_clear_owner(lock) do { } while (0)
22#define debug_mutex_init_waiter(waiter) do { } while (0)
23#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) 21#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
24#define debug_mutex_free_waiter(waiter) do { } while (0) 22#define debug_mutex_free_waiter(waiter) do { } while (0)
25#define debug_mutex_add_waiter(lock, waiter, ti, ip) do { } while (0) 23#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
26#define debug_mutex_unlock(lock) do { } while (0) 24#define debug_mutex_unlock(lock) do { } while (0)
27#define debug_mutex_init(lock, name) do { } while (0) 25#define debug_mutex_init(lock, name, key) do { } while (0)
28
29/*
30 * Return-address parameters/declarations. They are very useful for
31 * debugging, but add overhead in the !DEBUG case - so we go the
32 * trouble of using this not too elegant but zero-cost solution:
33 */
34#define __IP_DECL__
35#define __IP__
36#define __RET_IP__
37 26
27static inline void
28debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
29{
30}
diff --git a/kernel/panic.c b/kernel/panic.c
index cc2a4c9c36ac..ab13f0f668b5 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -8,7 +8,6 @@
8 * This function is used through-out the kernel (including mm and fs) 8 * This function is used through-out the kernel (including mm and fs)
9 * to indicate a major problem. 9 * to indicate a major problem.
10 */ 10 */
11#include <linux/config.h>
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/sched.h> 12#include <linux/sched.h>
14#include <linux/delay.h> 13#include <linux/delay.h>
diff --git a/kernel/params.c b/kernel/params.c
index af43ecdc8d9b..91aea7aa532e 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -15,7 +15,6 @@
15 along with this program; if not, write to the Free Software 15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/ 17*/
18#include <linux/config.h>
19#include <linux/moduleparam.h> 18#include <linux/moduleparam.h>
20#include <linux/kernel.h> 19#include <linux/kernel.h>
21#include <linux/string.h> 20#include <linux/string.h>
diff --git a/kernel/pid.c b/kernel/pid.c
index eeb836b65ca4..93e212f20671 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -218,7 +218,7 @@ struct pid * fastcall find_pid(int nr)
218 return NULL; 218 return NULL;
219} 219}
220 220
221int fastcall attach_pid(task_t *task, enum pid_type type, int nr) 221int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
222{ 222{
223 struct pid_link *link; 223 struct pid_link *link;
224 struct pid *pid; 224 struct pid *pid;
@@ -233,7 +233,7 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
233 return 0; 233 return 0;
234} 234}
235 235
236void fastcall detach_pid(task_t *task, enum pid_type type) 236void fastcall detach_pid(struct task_struct *task, enum pid_type type)
237{ 237{
238 struct pid_link *link; 238 struct pid_link *link;
239 struct pid *pid; 239 struct pid *pid;
@@ -267,7 +267,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
267/* 267/*
268 * Must be called under rcu_read_lock() or with tasklist_lock read-held. 268 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
269 */ 269 */
270task_t *find_task_by_pid_type(int type, int nr) 270struct task_struct *find_task_by_pid_type(int type, int nr)
271{ 271{
272 return pid_task(find_pid(nr), type); 272 return pid_task(find_pid(nr), type);
273} 273}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 857b4fa09124..ae44a70aae8a 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -100,18 +100,6 @@ config PM_STD_PARTITION
100 suspended image to. It will simply pick the first available swap 100 suspended image to. It will simply pick the first available swap
101 device. 101 device.
102 102
103config SWSUSP_ENCRYPT
104 bool "Encrypt suspend image"
105 depends on SOFTWARE_SUSPEND && CRYPTO=y && (CRYPTO_AES=y || CRYPTO_AES_586=y || CRYPTO_AES_X86_64=y)
106 default ""
107 ---help---
108 To prevent data gathering from swap after resume you can encrypt
109 the suspend image with a temporary key that is deleted on
110 resume.
111
112 Note that the temporary key is stored unencrypted on disk while the
113 system is suspended.
114
115config SUSPEND_SMP 103config SUSPEND_SMP
116 bool 104 bool
117 depends on HOTPLUG_CPU && X86 && PM 105 depends on HOTPLUG_CPU && X86 && PM
diff --git a/kernel/printk.c b/kernel/printk.c
index 95b7fe17f124..bdba5d80496c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -26,7 +26,6 @@
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
28#include <linux/interrupt.h> /* For in_interrupt() */ 28#include <linux/interrupt.h> /* For in_interrupt() */
29#include <linux/config.h>
30#include <linux/delay.h> 29#include <linux/delay.h>
31#include <linux/smp.h> 30#include <linux/smp.h>
32#include <linux/security.h> 31#include <linux/security.h>
@@ -519,7 +518,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
519 zap_locks(); 518 zap_locks();
520 519
521 /* This stops the holder of console_sem just where we want him */ 520 /* This stops the holder of console_sem just where we want him */
522 spin_lock_irqsave(&logbuf_lock, flags); 521 local_irq_save(flags);
522 lockdep_off();
523 spin_lock(&logbuf_lock);
523 printk_cpu = smp_processor_id(); 524 printk_cpu = smp_processor_id();
524 525
525 /* Emit the output into the temporary buffer */ 526 /* Emit the output into the temporary buffer */
@@ -589,7 +590,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
589 */ 590 */
590 console_locked = 1; 591 console_locked = 1;
591 printk_cpu = UINT_MAX; 592 printk_cpu = UINT_MAX;
592 spin_unlock_irqrestore(&logbuf_lock, flags); 593 spin_unlock(&logbuf_lock);
593 594
594 /* 595 /*
595 * Console drivers may assume that per-cpu resources have 596 * Console drivers may assume that per-cpu resources have
@@ -605,6 +606,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
605 console_locked = 0; 606 console_locked = 0;
606 up(&console_sem); 607 up(&console_sem);
607 } 608 }
609 lockdep_on();
610 local_irq_restore(flags);
608 } else { 611 } else {
609 /* 612 /*
610 * Someone else owns the drivers. We drop the spinlock, which 613 * Someone else owns the drivers. We drop the spinlock, which
@@ -612,7 +615,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
612 * console drivers with the output which we just produced. 615 * console drivers with the output which we just produced.
613 */ 616 */
614 printk_cpu = UINT_MAX; 617 printk_cpu = UINT_MAX;
615 spin_unlock_irqrestore(&logbuf_lock, flags); 618 spin_unlock(&logbuf_lock);
619 lockdep_on();
620 local_irq_restore(flags);
616 } 621 }
617 622
618 preempt_enable(); 623 preempt_enable();
@@ -810,8 +815,15 @@ void release_console_sem(void)
810 console_may_schedule = 0; 815 console_may_schedule = 0;
811 up(&console_sem); 816 up(&console_sem);
812 spin_unlock_irqrestore(&logbuf_lock, flags); 817 spin_unlock_irqrestore(&logbuf_lock, flags);
813 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) 818 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
814 wake_up_interruptible(&log_wait); 819 /*
820 * If we printk from within the lock dependency code,
821 * from within the scheduler code, then do not lock
822 * up due to self-recursion:
823 */
824 if (!lockdep_internal())
825 wake_up_interruptible(&log_wait);
826 }
815} 827}
816EXPORT_SYMBOL(release_console_sem); 828EXPORT_SYMBOL(release_console_sem);
817 829
diff --git a/kernel/profile.c b/kernel/profile.c
index 5a730fdb1a2c..d5bd75e7501c 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -13,7 +13,6 @@
13 * to resolve timer interrupt livelocks, William Irwin, Oracle, 2004 13 * to resolve timer interrupt livelocks, William Irwin, Oracle, 2004
14 */ 14 */
15 15
16#include <linux/config.h>
17#include <linux/module.h> 16#include <linux/module.h>
18#include <linux/profile.h> 17#include <linux/profile.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 335c5b932e14..9a111f70145c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -28,7 +28,7 @@
28 * 28 *
29 * Must be called with the tasklist lock write-held. 29 * Must be called with the tasklist lock write-held.
30 */ 30 */
31void __ptrace_link(task_t *child, task_t *new_parent) 31void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
32{ 32{
33 BUG_ON(!list_empty(&child->ptrace_list)); 33 BUG_ON(!list_empty(&child->ptrace_list));
34 if (child->parent == new_parent) 34 if (child->parent == new_parent)
@@ -46,7 +46,7 @@ void __ptrace_link(task_t *child, task_t *new_parent)
46 * TASK_TRACED, resume it now. 46 * TASK_TRACED, resume it now.
47 * Requires that irqs be disabled. 47 * Requires that irqs be disabled.
48 */ 48 */
49void ptrace_untrace(task_t *child) 49void ptrace_untrace(struct task_struct *child)
50{ 50{
51 spin_lock(&child->sighand->siglock); 51 spin_lock(&child->sighand->siglock);
52 if (child->state == TASK_TRACED) { 52 if (child->state == TASK_TRACED) {
@@ -65,7 +65,7 @@ void ptrace_untrace(task_t *child)
65 * 65 *
66 * Must be called with the tasklist lock write-held. 66 * Must be called with the tasklist lock write-held.
67 */ 67 */
68void __ptrace_unlink(task_t *child) 68void __ptrace_unlink(struct task_struct *child)
69{ 69{
70 BUG_ON(!child->ptrace); 70 BUG_ON(!child->ptrace);
71 71
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f464f5ae3f11..759805c9859a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -53,13 +53,13 @@
53static struct rcu_ctrlblk rcu_ctrlblk = { 53static struct rcu_ctrlblk rcu_ctrlblk = {
54 .cur = -300, 54 .cur = -300,
55 .completed = -300, 55 .completed = -300,
56 .lock = SPIN_LOCK_UNLOCKED, 56 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
57 .cpumask = CPU_MASK_NONE, 57 .cpumask = CPU_MASK_NONE,
58}; 58};
59static struct rcu_ctrlblk rcu_bh_ctrlblk = { 59static struct rcu_ctrlblk rcu_bh_ctrlblk = {
60 .cur = -300, 60 .cur = -300,
61 .completed = -300, 61 .completed = -300,
62 .lock = SPIN_LOCK_UNLOCKED, 62 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
63 .cpumask = CPU_MASK_NONE, 63 .cpumask = CPU_MASK_NONE,
64}; 64};
65 65
diff --git a/kernel/resource.c b/kernel/resource.c
index bf1130d81b7f..129cf046e561 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -7,7 +7,6 @@
7 * Arbitrary resource management. 7 * Arbitrary resource management.
8 */ 8 */
9 9
10#include <linux/config.h>
11#include <linux/module.h> 10#include <linux/module.h>
12#include <linux/sched.h> 11#include <linux/sched.h>
13#include <linux/errno.h> 12#include <linux/errno.h>
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 4aa8a2c9f453..0c1faa950af7 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -26,6 +26,7 @@
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
27#include <linux/plist.h> 27#include <linux/plist.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29#include <linux/debug_locks.h>
29 30
30#include "rtmutex_common.h" 31#include "rtmutex_common.h"
31 32
@@ -45,8 +46,6 @@ do { \
45 console_verbose(); \ 46 console_verbose(); \
46 if (spin_is_locked(&current->pi_lock)) \ 47 if (spin_is_locked(&current->pi_lock)) \
47 spin_unlock(&current->pi_lock); \ 48 spin_unlock(&current->pi_lock); \
48 if (spin_is_locked(&current->held_list_lock)) \
49 spin_unlock(&current->held_list_lock); \
50 } \ 49 } \
51} while (0) 50} while (0)
52 51
@@ -97,7 +96,7 @@ void deadlock_trace_off(void)
97 rt_trace_on = 0; 96 rt_trace_on = 0;
98} 97}
99 98
100static void printk_task(task_t *p) 99static void printk_task(struct task_struct *p)
101{ 100{
102 if (p) 101 if (p)
103 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); 102 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
@@ -105,14 +104,6 @@ static void printk_task(task_t *p)
105 printk("<none>"); 104 printk("<none>");
106} 105}
107 106
108static void printk_task_short(task_t *p)
109{
110 if (p)
111 printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
112 else
113 printk("<none>");
114}
115
116static void printk_lock(struct rt_mutex *lock, int print_owner) 107static void printk_lock(struct rt_mutex *lock, int print_owner)
117{ 108{
118 if (lock->name) 109 if (lock->name)
@@ -128,222 +119,6 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)
128 printk_task(rt_mutex_owner(lock)); 119 printk_task(rt_mutex_owner(lock));
129 printk("\n"); 120 printk("\n");
130 } 121 }
131 if (rt_mutex_owner(lock)) {
132 printk("... acquired at: ");
133 print_symbol("%s\n", lock->acquire_ip);
134 }
135}
136
137static void printk_waiter(struct rt_mutex_waiter *w)
138{
139 printk("-------------------------\n");
140 printk("| waiter struct %p:\n", w);
141 printk("| w->list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
142 w->list_entry.plist.prio_list.prev, w->list_entry.plist.prio_list.next,
143 w->list_entry.plist.node_list.prev, w->list_entry.plist.node_list.next,
144 w->list_entry.prio);
145 printk("| w->pi_list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
146 w->pi_list_entry.plist.prio_list.prev, w->pi_list_entry.plist.prio_list.next,
147 w->pi_list_entry.plist.node_list.prev, w->pi_list_entry.plist.node_list.next,
148 w->pi_list_entry.prio);
149 printk("\n| lock:\n");
150 printk_lock(w->lock, 1);
151 printk("| w->ti->task:\n");
152 printk_task(w->task);
153 printk("| blocked at: ");
154 print_symbol("%s\n", w->ip);
155 printk("-------------------------\n");
156}
157
158static void show_task_locks(task_t *p)
159{
160 switch (p->state) {
161 case TASK_RUNNING: printk("R"); break;
162 case TASK_INTERRUPTIBLE: printk("S"); break;
163 case TASK_UNINTERRUPTIBLE: printk("D"); break;
164 case TASK_STOPPED: printk("T"); break;
165 case EXIT_ZOMBIE: printk("Z"); break;
166 case EXIT_DEAD: printk("X"); break;
167 default: printk("?"); break;
168 }
169 printk_task(p);
170 if (p->pi_blocked_on) {
171 struct rt_mutex *lock = p->pi_blocked_on->lock;
172
173 printk(" blocked on:");
174 printk_lock(lock, 1);
175 } else
176 printk(" (not blocked)\n");
177}
178
179void rt_mutex_show_held_locks(task_t *task, int verbose)
180{
181 struct list_head *curr, *cursor = NULL;
182 struct rt_mutex *lock;
183 task_t *t;
184 unsigned long flags;
185 int count = 0;
186
187 if (!rt_trace_on)
188 return;
189
190 if (verbose) {
191 printk("------------------------------\n");
192 printk("| showing all locks held by: | (");
193 printk_task_short(task);
194 printk("):\n");
195 printk("------------------------------\n");
196 }
197
198next:
199 spin_lock_irqsave(&task->held_list_lock, flags);
200 list_for_each(curr, &task->held_list_head) {
201 if (cursor && curr != cursor)
202 continue;
203 lock = list_entry(curr, struct rt_mutex, held_list_entry);
204 t = rt_mutex_owner(lock);
205 WARN_ON(t != task);
206 count++;
207 cursor = curr->next;
208 spin_unlock_irqrestore(&task->held_list_lock, flags);
209
210 printk("\n#%03d: ", count);
211 printk_lock(lock, 0);
212 goto next;
213 }
214 spin_unlock_irqrestore(&task->held_list_lock, flags);
215
216 printk("\n");
217}
218
219void rt_mutex_show_all_locks(void)
220{
221 task_t *g, *p;
222 int count = 10;
223 int unlock = 1;
224
225 printk("\n");
226 printk("----------------------\n");
227 printk("| showing all tasks: |\n");
228 printk("----------------------\n");
229
230 /*
231 * Here we try to get the tasklist_lock as hard as possible,
232 * if not successful after 2 seconds we ignore it (but keep
233 * trying). This is to enable a debug printout even if a
234 * tasklist_lock-holding task deadlocks or crashes.
235 */
236retry:
237 if (!read_trylock(&tasklist_lock)) {
238 if (count == 10)
239 printk("hm, tasklist_lock locked, retrying... ");
240 if (count) {
241 count--;
242 printk(" #%d", 10-count);
243 mdelay(200);
244 goto retry;
245 }
246 printk(" ignoring it.\n");
247 unlock = 0;
248 }
249 if (count != 10)
250 printk(" locked it.\n");
251
252 do_each_thread(g, p) {
253 show_task_locks(p);
254 if (!unlock)
255 if (read_trylock(&tasklist_lock))
256 unlock = 1;
257 } while_each_thread(g, p);
258
259 printk("\n");
260
261 printk("-----------------------------------------\n");
262 printk("| showing all locks held in the system: |\n");
263 printk("-----------------------------------------\n");
264
265 do_each_thread(g, p) {
266 rt_mutex_show_held_locks(p, 0);
267 if (!unlock)
268 if (read_trylock(&tasklist_lock))
269 unlock = 1;
270 } while_each_thread(g, p);
271
272
273 printk("=============================================\n\n");
274
275 if (unlock)
276 read_unlock(&tasklist_lock);
277}
278
279void rt_mutex_debug_check_no_locks_held(task_t *task)
280{
281 struct rt_mutex_waiter *w;
282 struct list_head *curr;
283 struct rt_mutex *lock;
284
285 if (!rt_trace_on)
286 return;
287 if (!rt_prio(task->normal_prio) && rt_prio(task->prio)) {
288 printk("BUG: PI priority boost leaked!\n");
289 printk_task(task);
290 printk("\n");
291 }
292 if (list_empty(&task->held_list_head))
293 return;
294
295 spin_lock(&task->pi_lock);
296 plist_for_each_entry(w, &task->pi_waiters, pi_list_entry) {
297 TRACE_OFF();
298
299 printk("hm, PI interest held at exit time? Task:\n");
300 printk_task(task);
301 printk_waiter(w);
302 return;
303 }
304 spin_unlock(&task->pi_lock);
305
306 list_for_each(curr, &task->held_list_head) {
307 lock = list_entry(curr, struct rt_mutex, held_list_entry);
308
309 printk("BUG: %s/%d, lock held at task exit time!\n",
310 task->comm, task->pid);
311 printk_lock(lock, 1);
312 if (rt_mutex_owner(lock) != task)
313 printk("exiting task is not even the owner??\n");
314 }
315}
316
317int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
318{
319 const void *to = from + len;
320 struct list_head *curr;
321 struct rt_mutex *lock;
322 unsigned long flags;
323 void *lock_addr;
324
325 if (!rt_trace_on)
326 return 0;
327
328 spin_lock_irqsave(&current->held_list_lock, flags);
329 list_for_each(curr, &current->held_list_head) {
330 lock = list_entry(curr, struct rt_mutex, held_list_entry);
331 lock_addr = lock;
332 if (lock_addr < from || lock_addr >= to)
333 continue;
334 TRACE_OFF();
335
336 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
337 current->comm, current->pid, lock, from, to);
338 dump_stack();
339 printk_lock(lock, 1);
340 if (rt_mutex_owner(lock) != current)
341 printk("freeing task is not even the owner??\n");
342 return 1;
343 }
344 spin_unlock_irqrestore(&current->held_list_lock, flags);
345
346 return 0;
347} 122}
348 123
349void rt_mutex_debug_task_free(struct task_struct *task) 124void rt_mutex_debug_task_free(struct task_struct *task)
@@ -395,85 +170,41 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
395 current->comm, current->pid); 170 current->comm, current->pid);
396 printk_lock(waiter->lock, 1); 171 printk_lock(waiter->lock, 1);
397 172
398 printk("... trying at: ");
399 print_symbol("%s\n", waiter->ip);
400
401 printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid); 173 printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
402 printk_lock(waiter->deadlock_lock, 1); 174 printk_lock(waiter->deadlock_lock, 1);
403 175
404 rt_mutex_show_held_locks(current, 1); 176 debug_show_held_locks(current);
405 rt_mutex_show_held_locks(task, 1); 177 debug_show_held_locks(task);
406 178
407 printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid); 179 printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
408 show_stack(task, NULL); 180 show_stack(task, NULL);
409 printk("\n%s/%d's [current] stackdump:\n\n", 181 printk("\n%s/%d's [current] stackdump:\n\n",
410 current->comm, current->pid); 182 current->comm, current->pid);
411 dump_stack(); 183 dump_stack();
412 rt_mutex_show_all_locks(); 184 debug_show_all_locks();
185
413 printk("[ turning off deadlock detection." 186 printk("[ turning off deadlock detection."
414 "Please report this trace. ]\n\n"); 187 "Please report this trace. ]\n\n");
415 local_irq_disable(); 188 local_irq_disable();
416} 189}
417 190
418void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__) 191void debug_rt_mutex_lock(struct rt_mutex *lock)
419{ 192{
420 unsigned long flags;
421
422 if (rt_trace_on) {
423 TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
424
425 spin_lock_irqsave(&current->held_list_lock, flags);
426 list_add_tail(&lock->held_list_entry, &current->held_list_head);
427 spin_unlock_irqrestore(&current->held_list_lock, flags);
428
429 lock->acquire_ip = ip;
430 }
431} 193}
432 194
433void debug_rt_mutex_unlock(struct rt_mutex *lock) 195void debug_rt_mutex_unlock(struct rt_mutex *lock)
434{ 196{
435 unsigned long flags; 197 TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
436
437 if (rt_trace_on) {
438 TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
439 TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
440
441 spin_lock_irqsave(&current->held_list_lock, flags);
442 list_del_init(&lock->held_list_entry);
443 spin_unlock_irqrestore(&current->held_list_lock, flags);
444 }
445} 198}
446 199
447void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, 200void
448 struct task_struct *powner __IP_DECL__) 201debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
449{ 202{
450 unsigned long flags;
451
452 if (rt_trace_on) {
453 TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
454
455 spin_lock_irqsave(&powner->held_list_lock, flags);
456 list_add_tail(&lock->held_list_entry, &powner->held_list_head);
457 spin_unlock_irqrestore(&powner->held_list_lock, flags);
458
459 lock->acquire_ip = ip;
460 }
461} 203}
462 204
463void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) 205void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
464{ 206{
465 unsigned long flags; 207 TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
466
467 if (rt_trace_on) {
468 struct task_struct *owner = rt_mutex_owner(lock);
469
470 TRACE_WARN_ON_LOCKED(!owner);
471 TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
472
473 spin_lock_irqsave(&owner->held_list_lock, flags);
474 list_del_init(&lock->held_list_entry);
475 spin_unlock_irqrestore(&owner->held_list_lock, flags);
476 }
477} 208}
478 209
479void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 210void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
@@ -493,17 +224,15 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
493 224
494void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) 225void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
495{ 226{
496 void *addr = lock; 227 /*
497 228 * Make sure we are not reinitializing a held lock:
498 if (rt_trace_on) { 229 */
499 rt_mutex_debug_check_no_locks_freed(addr, 230 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
500 sizeof(struct rt_mutex)); 231 lock->name = name;
501 INIT_LIST_HEAD(&lock->held_list_entry);
502 lock->name = name;
503 }
504} 232}
505 233
506void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task) 234void
235rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
507{ 236{
508} 237}
509 238
diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h
index 7612fbc62d70..14193d596d78 100644
--- a/kernel/rtmutex-debug.h
+++ b/kernel/rtmutex-debug.h
@@ -9,20 +9,16 @@
9 * This file contains macros used solely by rtmutex.c. Debug version. 9 * This file contains macros used solely by rtmutex.c. Debug version.
10 */ 10 */
11 11
12#define __IP_DECL__ , unsigned long ip
13#define __IP__ , ip
14#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
15
16extern void 12extern void
17rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); 13rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
18extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); 14extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
19extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); 15extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
20extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); 16extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
21extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); 17extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
22extern void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__); 18extern void debug_rt_mutex_lock(struct rt_mutex *lock);
23extern void debug_rt_mutex_unlock(struct rt_mutex *lock); 19extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
24extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, 20extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
25 struct task_struct *powner __IP_DECL__); 21 struct task_struct *powner);
26extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); 22extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
27extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, 23extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
28 struct rt_mutex *lock); 24 struct rt_mutex *lock);
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index e82c2f848249..494dac872a13 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -33,7 +33,7 @@ struct test_thread_data {
33}; 33};
34 34
35static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; 35static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
36static task_t *threads[MAX_RT_TEST_THREADS]; 36static struct task_struct *threads[MAX_RT_TEST_THREADS];
37static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; 37static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
38 38
39enum test_opcodes { 39enum test_opcodes {
@@ -361,8 +361,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
361static ssize_t sysfs_test_status(struct sys_device *dev, char *buf) 361static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)
362{ 362{
363 struct test_thread_data *td; 363 struct test_thread_data *td;
364 struct task_struct *tsk;
364 char *curr = buf; 365 char *curr = buf;
365 task_t *tsk;
366 int i; 366 int i;
367 367
368 td = container_of(dev, struct test_thread_data, sysdev); 368 td = container_of(dev, struct test_thread_data, sysdev);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 45d61016da57..d2ef13b485e7 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -157,12 +157,11 @@ int max_lock_depth = 1024;
157 * Decreases task's usage by one - may thus free the task. 157 * Decreases task's usage by one - may thus free the task.
158 * Returns 0 or -EDEADLK. 158 * Returns 0 or -EDEADLK.
159 */ 159 */
160static int rt_mutex_adjust_prio_chain(task_t *task, 160static int rt_mutex_adjust_prio_chain(struct task_struct *task,
161 int deadlock_detect, 161 int deadlock_detect,
162 struct rt_mutex *orig_lock, 162 struct rt_mutex *orig_lock,
163 struct rt_mutex_waiter *orig_waiter, 163 struct rt_mutex_waiter *orig_waiter,
164 struct task_struct *top_task 164 struct task_struct *top_task)
165 __IP_DECL__)
166{ 165{
167 struct rt_mutex *lock; 166 struct rt_mutex *lock;
168 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 167 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -283,6 +282,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task,
283 spin_unlock_irqrestore(&task->pi_lock, flags); 282 spin_unlock_irqrestore(&task->pi_lock, flags);
284 out_put_task: 283 out_put_task:
285 put_task_struct(task); 284 put_task_struct(task);
285
286 return ret; 286 return ret;
287} 287}
288 288
@@ -357,7 +357,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
357 * 357 *
358 * Must be called with lock->wait_lock held. 358 * Must be called with lock->wait_lock held.
359 */ 359 */
360static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__) 360static int try_to_take_rt_mutex(struct rt_mutex *lock)
361{ 361{
362 /* 362 /*
363 * We have to be careful here if the atomic speedups are 363 * We have to be careful here if the atomic speedups are
@@ -384,7 +384,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
384 return 0; 384 return 0;
385 385
386 /* We got the lock. */ 386 /* We got the lock. */
387 debug_rt_mutex_lock(lock __IP__); 387 debug_rt_mutex_lock(lock);
388 388
389 rt_mutex_set_owner(lock, current, 0); 389 rt_mutex_set_owner(lock, current, 0);
390 390
@@ -402,13 +402,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
402 */ 402 */
403static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 403static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
404 struct rt_mutex_waiter *waiter, 404 struct rt_mutex_waiter *waiter,
405 int detect_deadlock 405 int detect_deadlock)
406 __IP_DECL__)
407{ 406{
407 struct task_struct *owner = rt_mutex_owner(lock);
408 struct rt_mutex_waiter *top_waiter = waiter; 408 struct rt_mutex_waiter *top_waiter = waiter;
409 task_t *owner = rt_mutex_owner(lock);
410 int boost = 0, res;
411 unsigned long flags; 409 unsigned long flags;
410 int boost = 0, res;
412 411
413 spin_lock_irqsave(&current->pi_lock, flags); 412 spin_lock_irqsave(&current->pi_lock, flags);
414 __rt_mutex_adjust_prio(current); 413 __rt_mutex_adjust_prio(current);
@@ -454,7 +453,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
454 spin_unlock(&lock->wait_lock); 453 spin_unlock(&lock->wait_lock);
455 454
456 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 455 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
457 current __IP__); 456 current);
458 457
459 spin_lock(&lock->wait_lock); 458 spin_lock(&lock->wait_lock);
460 459
@@ -526,12 +525,12 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
526 * Must be called with lock->wait_lock held 525 * Must be called with lock->wait_lock held
527 */ 526 */
528static void remove_waiter(struct rt_mutex *lock, 527static void remove_waiter(struct rt_mutex *lock,
529 struct rt_mutex_waiter *waiter __IP_DECL__) 528 struct rt_mutex_waiter *waiter)
530{ 529{
531 int first = (waiter == rt_mutex_top_waiter(lock)); 530 int first = (waiter == rt_mutex_top_waiter(lock));
532 int boost = 0; 531 struct task_struct *owner = rt_mutex_owner(lock);
533 task_t *owner = rt_mutex_owner(lock);
534 unsigned long flags; 532 unsigned long flags;
533 int boost = 0;
535 534
536 spin_lock_irqsave(&current->pi_lock, flags); 535 spin_lock_irqsave(&current->pi_lock, flags);
537 plist_del(&waiter->list_entry, &lock->wait_list); 536 plist_del(&waiter->list_entry, &lock->wait_list);
@@ -568,7 +567,7 @@ static void remove_waiter(struct rt_mutex *lock,
568 567
569 spin_unlock(&lock->wait_lock); 568 spin_unlock(&lock->wait_lock);
570 569
571 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current __IP__); 570 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
572 571
573 spin_lock(&lock->wait_lock); 572 spin_lock(&lock->wait_lock);
574} 573}
@@ -595,7 +594,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
595 get_task_struct(task); 594 get_task_struct(task);
596 spin_unlock_irqrestore(&task->pi_lock, flags); 595 spin_unlock_irqrestore(&task->pi_lock, flags);
597 596
598 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task __RET_IP__); 597 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
599} 598}
600 599
601/* 600/*
@@ -604,7 +603,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
604static int __sched 603static int __sched
605rt_mutex_slowlock(struct rt_mutex *lock, int state, 604rt_mutex_slowlock(struct rt_mutex *lock, int state,
606 struct hrtimer_sleeper *timeout, 605 struct hrtimer_sleeper *timeout,
607 int detect_deadlock __IP_DECL__) 606 int detect_deadlock)
608{ 607{
609 struct rt_mutex_waiter waiter; 608 struct rt_mutex_waiter waiter;
610 int ret = 0; 609 int ret = 0;
@@ -615,7 +614,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
615 spin_lock(&lock->wait_lock); 614 spin_lock(&lock->wait_lock);
616 615
617 /* Try to acquire the lock again: */ 616 /* Try to acquire the lock again: */
618 if (try_to_take_rt_mutex(lock __IP__)) { 617 if (try_to_take_rt_mutex(lock)) {
619 spin_unlock(&lock->wait_lock); 618 spin_unlock(&lock->wait_lock);
620 return 0; 619 return 0;
621 } 620 }
@@ -629,7 +628,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
629 628
630 for (;;) { 629 for (;;) {
631 /* Try to acquire the lock: */ 630 /* Try to acquire the lock: */
632 if (try_to_take_rt_mutex(lock __IP__)) 631 if (try_to_take_rt_mutex(lock))
633 break; 632 break;
634 633
635 /* 634 /*
@@ -653,7 +652,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
653 */ 652 */
654 if (!waiter.task) { 653 if (!waiter.task) {
655 ret = task_blocks_on_rt_mutex(lock, &waiter, 654 ret = task_blocks_on_rt_mutex(lock, &waiter,
656 detect_deadlock __IP__); 655 detect_deadlock);
657 /* 656 /*
658 * If we got woken up by the owner then start loop 657 * If we got woken up by the owner then start loop
659 * all over without going into schedule to try 658 * all over without going into schedule to try
@@ -680,7 +679,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
680 set_current_state(TASK_RUNNING); 679 set_current_state(TASK_RUNNING);
681 680
682 if (unlikely(waiter.task)) 681 if (unlikely(waiter.task))
683 remove_waiter(lock, &waiter __IP__); 682 remove_waiter(lock, &waiter);
684 683
685 /* 684 /*
686 * try_to_take_rt_mutex() sets the waiter bit 685 * try_to_take_rt_mutex() sets the waiter bit
@@ -711,7 +710,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
711 * Slow path try-lock function: 710 * Slow path try-lock function:
712 */ 711 */
713static inline int 712static inline int
714rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__) 713rt_mutex_slowtrylock(struct rt_mutex *lock)
715{ 714{
716 int ret = 0; 715 int ret = 0;
717 716
@@ -719,7 +718,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__)
719 718
720 if (likely(rt_mutex_owner(lock) != current)) { 719 if (likely(rt_mutex_owner(lock) != current)) {
721 720
722 ret = try_to_take_rt_mutex(lock __IP__); 721 ret = try_to_take_rt_mutex(lock);
723 /* 722 /*
724 * try_to_take_rt_mutex() sets the lock waiters 723 * try_to_take_rt_mutex() sets the lock waiters
725 * bit unconditionally. Clean this up. 724 * bit unconditionally. Clean this up.
@@ -769,13 +768,13 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
769 int detect_deadlock, 768 int detect_deadlock,
770 int (*slowfn)(struct rt_mutex *lock, int state, 769 int (*slowfn)(struct rt_mutex *lock, int state,
771 struct hrtimer_sleeper *timeout, 770 struct hrtimer_sleeper *timeout,
772 int detect_deadlock __IP_DECL__)) 771 int detect_deadlock))
773{ 772{
774 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { 773 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
775 rt_mutex_deadlock_account_lock(lock, current); 774 rt_mutex_deadlock_account_lock(lock, current);
776 return 0; 775 return 0;
777 } else 776 } else
778 return slowfn(lock, state, NULL, detect_deadlock __RET_IP__); 777 return slowfn(lock, state, NULL, detect_deadlock);
779} 778}
780 779
781static inline int 780static inline int
@@ -783,24 +782,24 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
783 struct hrtimer_sleeper *timeout, int detect_deadlock, 782 struct hrtimer_sleeper *timeout, int detect_deadlock,
784 int (*slowfn)(struct rt_mutex *lock, int state, 783 int (*slowfn)(struct rt_mutex *lock, int state,
785 struct hrtimer_sleeper *timeout, 784 struct hrtimer_sleeper *timeout,
786 int detect_deadlock __IP_DECL__)) 785 int detect_deadlock))
787{ 786{
788 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { 787 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
789 rt_mutex_deadlock_account_lock(lock, current); 788 rt_mutex_deadlock_account_lock(lock, current);
790 return 0; 789 return 0;
791 } else 790 } else
792 return slowfn(lock, state, timeout, detect_deadlock __RET_IP__); 791 return slowfn(lock, state, timeout, detect_deadlock);
793} 792}
794 793
795static inline int 794static inline int
796rt_mutex_fasttrylock(struct rt_mutex *lock, 795rt_mutex_fasttrylock(struct rt_mutex *lock,
797 int (*slowfn)(struct rt_mutex *lock __IP_DECL__)) 796 int (*slowfn)(struct rt_mutex *lock))
798{ 797{
799 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { 798 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
800 rt_mutex_deadlock_account_lock(lock, current); 799 rt_mutex_deadlock_account_lock(lock, current);
801 return 1; 800 return 1;
802 } 801 }
803 return slowfn(lock __RET_IP__); 802 return slowfn(lock);
804} 803}
805 804
806static inline void 805static inline void
@@ -948,7 +947,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
948 struct task_struct *proxy_owner) 947 struct task_struct *proxy_owner)
949{ 948{
950 __rt_mutex_init(lock, NULL); 949 __rt_mutex_init(lock, NULL);
951 debug_rt_mutex_proxy_lock(lock, proxy_owner __RET_IP__); 950 debug_rt_mutex_proxy_lock(lock, proxy_owner);
952 rt_mutex_set_owner(lock, proxy_owner, 0); 951 rt_mutex_set_owner(lock, proxy_owner, 0);
953 rt_mutex_deadlock_account_lock(lock, proxy_owner); 952 rt_mutex_deadlock_account_lock(lock, proxy_owner);
954} 953}
diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h
index 1e0fca13ff72..a1a1dd06421d 100644
--- a/kernel/rtmutex.h
+++ b/kernel/rtmutex.h
@@ -10,9 +10,6 @@
10 * Non-debug version. 10 * Non-debug version.
11 */ 11 */
12 12
13#define __IP_DECL__
14#define __IP__
15#define __RET_IP__
16#define rt_mutex_deadlock_check(l) (0) 13#define rt_mutex_deadlock_check(l) (0)
17#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) 14#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
18#define rt_mutex_deadlock_account_unlock(l) do { } while (0) 15#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
new file mode 100644
index 000000000000..291ded556aa0
--- /dev/null
+++ b/kernel/rwsem.c
@@ -0,0 +1,147 @@
1/* kernel/rwsem.c: R/W semaphores, public implementation
2 *
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from asm-i386/semaphore.h
5 */
6
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/rwsem.h>
11
12#include <asm/system.h>
13#include <asm/atomic.h>
14
15/*
16 * lock for reading
17 */
18void down_read(struct rw_semaphore *sem)
19{
20 might_sleep();
21 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
22
23 __down_read(sem);
24}
25
26EXPORT_SYMBOL(down_read);
27
28/*
29 * trylock for reading -- returns 1 if successful, 0 if contention
30 */
31int down_read_trylock(struct rw_semaphore *sem)
32{
33 int ret = __down_read_trylock(sem);
34
35 if (ret == 1)
36 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
37 return ret;
38}
39
40EXPORT_SYMBOL(down_read_trylock);
41
42/*
43 * lock for writing
44 */
45void down_write(struct rw_semaphore *sem)
46{
47 might_sleep();
48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
49
50 __down_write(sem);
51}
52
53EXPORT_SYMBOL(down_write);
54
55/*
56 * trylock for writing -- returns 1 if successful, 0 if contention
57 */
58int down_write_trylock(struct rw_semaphore *sem)
59{
60 int ret = __down_write_trylock(sem);
61
62 if (ret == 1)
63 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
64 return ret;
65}
66
67EXPORT_SYMBOL(down_write_trylock);
68
69/*
70 * release a read lock
71 */
72void up_read(struct rw_semaphore *sem)
73{
74 rwsem_release(&sem->dep_map, 1, _RET_IP_);
75
76 __up_read(sem);
77}
78
79EXPORT_SYMBOL(up_read);
80
81/*
82 * release a write lock
83 */
84void up_write(struct rw_semaphore *sem)
85{
86 rwsem_release(&sem->dep_map, 1, _RET_IP_);
87
88 __up_write(sem);
89}
90
91EXPORT_SYMBOL(up_write);
92
93/*
94 * downgrade write lock to read lock
95 */
96void downgrade_write(struct rw_semaphore *sem)
97{
98 /*
99 * lockdep: a downgraded write will live on as a write
100 * dependency.
101 */
102 __downgrade_write(sem);
103}
104
105EXPORT_SYMBOL(downgrade_write);
106
107#ifdef CONFIG_DEBUG_LOCK_ALLOC
108
109void down_read_nested(struct rw_semaphore *sem, int subclass)
110{
111 might_sleep();
112 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
113
114 __down_read(sem);
115}
116
117EXPORT_SYMBOL(down_read_nested);
118
119void down_read_non_owner(struct rw_semaphore *sem)
120{
121 might_sleep();
122
123 __down_read(sem);
124}
125
126EXPORT_SYMBOL(down_read_non_owner);
127
128void down_write_nested(struct rw_semaphore *sem, int subclass)
129{
130 might_sleep();
131 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
132
133 __down_write_nested(sem, subclass);
134}
135
136EXPORT_SYMBOL(down_write_nested);
137
138void up_read_non_owner(struct rw_semaphore *sem)
139{
140 __up_read(sem);
141}
142
143EXPORT_SYMBOL(up_read_non_owner);
144
145#endif
146
147
diff --git a/kernel/sched.c b/kernel/sched.c
index 2629c1711fd6..4ee400f9d56b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -30,6 +30,7 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/completion.h> 31#include <linux/completion.h>
32#include <linux/kernel_stat.h> 32#include <linux/kernel_stat.h>
33#include <linux/debug_locks.h>
33#include <linux/security.h> 34#include <linux/security.h>
34#include <linux/notifier.h> 35#include <linux/notifier.h>
35#include <linux/profile.h> 36#include <linux/profile.h>
@@ -178,20 +179,15 @@ static unsigned int static_prio_timeslice(int static_prio)
178 return SCALE_PRIO(DEF_TIMESLICE, static_prio); 179 return SCALE_PRIO(DEF_TIMESLICE, static_prio);
179} 180}
180 181
181static inline unsigned int task_timeslice(task_t *p) 182static inline unsigned int task_timeslice(struct task_struct *p)
182{ 183{
183 return static_prio_timeslice(p->static_prio); 184 return static_prio_timeslice(p->static_prio);
184} 185}
185 186
186#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
187 < (long long) (sd)->cache_hot_time)
188
189/* 187/*
190 * These are the runqueue data structures: 188 * These are the runqueue data structures:
191 */ 189 */
192 190
193typedef struct runqueue runqueue_t;
194
195struct prio_array { 191struct prio_array {
196 unsigned int nr_active; 192 unsigned int nr_active;
197 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ 193 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
@@ -205,7 +201,7 @@ struct prio_array {
205 * (such as the load balancing or the thread migration code), lock 201 * (such as the load balancing or the thread migration code), lock
206 * acquire operations must be ordered by ascending &runqueue. 202 * acquire operations must be ordered by ascending &runqueue.
207 */ 203 */
208struct runqueue { 204struct rq {
209 spinlock_t lock; 205 spinlock_t lock;
210 206
211 /* 207 /*
@@ -229,9 +225,9 @@ struct runqueue {
229 225
230 unsigned long expired_timestamp; 226 unsigned long expired_timestamp;
231 unsigned long long timestamp_last_tick; 227 unsigned long long timestamp_last_tick;
232 task_t *curr, *idle; 228 struct task_struct *curr, *idle;
233 struct mm_struct *prev_mm; 229 struct mm_struct *prev_mm;
234 prio_array_t *active, *expired, arrays[2]; 230 struct prio_array *active, *expired, arrays[2];
235 int best_expired_prio; 231 int best_expired_prio;
236 atomic_t nr_iowait; 232 atomic_t nr_iowait;
237 233
@@ -242,7 +238,7 @@ struct runqueue {
242 int active_balance; 238 int active_balance;
243 int push_cpu; 239 int push_cpu;
244 240
245 task_t *migration_thread; 241 struct task_struct *migration_thread;
246 struct list_head migration_queue; 242 struct list_head migration_queue;
247#endif 243#endif
248 244
@@ -265,9 +261,10 @@ struct runqueue {
265 unsigned long ttwu_cnt; 261 unsigned long ttwu_cnt;
266 unsigned long ttwu_local; 262 unsigned long ttwu_local;
267#endif 263#endif
264 struct lock_class_key rq_lock_key;
268}; 265};
269 266
270static DEFINE_PER_CPU(struct runqueue, runqueues); 267static DEFINE_PER_CPU(struct rq, runqueues);
271 268
272/* 269/*
273 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 270 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@@ -276,8 +273,8 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
276 * The domain tree of any CPU may only be accessed from within 273 * The domain tree of any CPU may only be accessed from within
277 * preempt-disabled sections. 274 * preempt-disabled sections.
278 */ 275 */
279#define for_each_domain(cpu, domain) \ 276#define for_each_domain(cpu, __sd) \
280for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) 277 for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
281 278
282#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) 279#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
283#define this_rq() (&__get_cpu_var(runqueues)) 280#define this_rq() (&__get_cpu_var(runqueues))
@@ -292,26 +289,33 @@ for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
292#endif 289#endif
293 290
294#ifndef __ARCH_WANT_UNLOCKED_CTXSW 291#ifndef __ARCH_WANT_UNLOCKED_CTXSW
295static inline int task_running(runqueue_t *rq, task_t *p) 292static inline int task_running(struct rq *rq, struct task_struct *p)
296{ 293{
297 return rq->curr == p; 294 return rq->curr == p;
298} 295}
299 296
300static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 297static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
301{ 298{
302} 299}
303 300
304static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 301static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
305{ 302{
306#ifdef CONFIG_DEBUG_SPINLOCK 303#ifdef CONFIG_DEBUG_SPINLOCK
307 /* this is a valid case when another task releases the spinlock */ 304 /* this is a valid case when another task releases the spinlock */
308 rq->lock.owner = current; 305 rq->lock.owner = current;
309#endif 306#endif
307 /*
308 * If we are tracking spinlock dependencies then we have to
309 * fix up the runqueue lock - which gets 'carried over' from
310 * prev into current:
311 */
312 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
313
310 spin_unlock_irq(&rq->lock); 314 spin_unlock_irq(&rq->lock);
311} 315}
312 316
313#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 317#else /* __ARCH_WANT_UNLOCKED_CTXSW */
314static inline int task_running(runqueue_t *rq, task_t *p) 318static inline int task_running(struct rq *rq, struct task_struct *p)
315{ 319{
316#ifdef CONFIG_SMP 320#ifdef CONFIG_SMP
317 return p->oncpu; 321 return p->oncpu;
@@ -320,7 +324,7 @@ static inline int task_running(runqueue_t *rq, task_t *p)
320#endif 324#endif
321} 325}
322 326
323static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 327static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
324{ 328{
325#ifdef CONFIG_SMP 329#ifdef CONFIG_SMP
326 /* 330 /*
@@ -337,7 +341,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
337#endif 341#endif
338} 342}
339 343
340static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 344static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
341{ 345{
342#ifdef CONFIG_SMP 346#ifdef CONFIG_SMP
343 /* 347 /*
@@ -358,10 +362,10 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
358 * __task_rq_lock - lock the runqueue a given task resides on. 362 * __task_rq_lock - lock the runqueue a given task resides on.
359 * Must be called interrupts disabled. 363 * Must be called interrupts disabled.
360 */ 364 */
361static inline runqueue_t *__task_rq_lock(task_t *p) 365static inline struct rq *__task_rq_lock(struct task_struct *p)
362 __acquires(rq->lock) 366 __acquires(rq->lock)
363{ 367{
364 struct runqueue *rq; 368 struct rq *rq;
365 369
366repeat_lock_task: 370repeat_lock_task:
367 rq = task_rq(p); 371 rq = task_rq(p);
@@ -378,10 +382,10 @@ repeat_lock_task:
378 * interrupts. Note the ordering: we can safely lookup the task_rq without 382 * interrupts. Note the ordering: we can safely lookup the task_rq without
379 * explicitly disabling preemption. 383 * explicitly disabling preemption.
380 */ 384 */
381static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) 385static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
382 __acquires(rq->lock) 386 __acquires(rq->lock)
383{ 387{
384 struct runqueue *rq; 388 struct rq *rq;
385 389
386repeat_lock_task: 390repeat_lock_task:
387 local_irq_save(*flags); 391 local_irq_save(*flags);
@@ -394,13 +398,13 @@ repeat_lock_task:
394 return rq; 398 return rq;
395} 399}
396 400
397static inline void __task_rq_unlock(runqueue_t *rq) 401static inline void __task_rq_unlock(struct rq *rq)
398 __releases(rq->lock) 402 __releases(rq->lock)
399{ 403{
400 spin_unlock(&rq->lock); 404 spin_unlock(&rq->lock);
401} 405}
402 406
403static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) 407static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
404 __releases(rq->lock) 408 __releases(rq->lock)
405{ 409{
406 spin_unlock_irqrestore(&rq->lock, *flags); 410 spin_unlock_irqrestore(&rq->lock, *flags);
@@ -420,7 +424,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
420 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); 424 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
421 seq_printf(seq, "timestamp %lu\n", jiffies); 425 seq_printf(seq, "timestamp %lu\n", jiffies);
422 for_each_online_cpu(cpu) { 426 for_each_online_cpu(cpu) {
423 runqueue_t *rq = cpu_rq(cpu); 427 struct rq *rq = cpu_rq(cpu);
424#ifdef CONFIG_SMP 428#ifdef CONFIG_SMP
425 struct sched_domain *sd; 429 struct sched_domain *sd;
426 int dcnt = 0; 430 int dcnt = 0;
@@ -507,10 +511,10 @@ struct file_operations proc_schedstat_operations = {
507/* 511/*
508 * rq_lock - lock a given runqueue and disable interrupts. 512 * rq_lock - lock a given runqueue and disable interrupts.
509 */ 513 */
510static inline runqueue_t *this_rq_lock(void) 514static inline struct rq *this_rq_lock(void)
511 __acquires(rq->lock) 515 __acquires(rq->lock)
512{ 516{
513 runqueue_t *rq; 517 struct rq *rq;
514 518
515 local_irq_disable(); 519 local_irq_disable();
516 rq = this_rq(); 520 rq = this_rq();
@@ -535,7 +539,7 @@ static inline runqueue_t *this_rq_lock(void)
535 * long it was from the *first* time it was queued to the time that it 539 * long it was from the *first* time it was queued to the time that it
536 * finally hit a cpu. 540 * finally hit a cpu.
537 */ 541 */
538static inline void sched_info_dequeued(task_t *t) 542static inline void sched_info_dequeued(struct task_struct *t)
539{ 543{
540 t->sched_info.last_queued = 0; 544 t->sched_info.last_queued = 0;
541} 545}
@@ -545,10 +549,10 @@ static inline void sched_info_dequeued(task_t *t)
545 * long it was waiting to run. We also note when it began so that we 549 * long it was waiting to run. We also note when it began so that we
546 * can keep stats on how long its timeslice is. 550 * can keep stats on how long its timeslice is.
547 */ 551 */
548static void sched_info_arrive(task_t *t) 552static void sched_info_arrive(struct task_struct *t)
549{ 553{
550 unsigned long now = jiffies, diff = 0; 554 unsigned long now = jiffies, diff = 0;
551 struct runqueue *rq = task_rq(t); 555 struct rq *rq = task_rq(t);
552 556
553 if (t->sched_info.last_queued) 557 if (t->sched_info.last_queued)
554 diff = now - t->sched_info.last_queued; 558 diff = now - t->sched_info.last_queued;
@@ -579,7 +583,7 @@ static void sched_info_arrive(task_t *t)
579 * the timestamp if it is already not set. It's assumed that 583 * the timestamp if it is already not set. It's assumed that
580 * sched_info_dequeued() will clear that stamp when appropriate. 584 * sched_info_dequeued() will clear that stamp when appropriate.
581 */ 585 */
582static inline void sched_info_queued(task_t *t) 586static inline void sched_info_queued(struct task_struct *t)
583{ 587{
584 if (!t->sched_info.last_queued) 588 if (!t->sched_info.last_queued)
585 t->sched_info.last_queued = jiffies; 589 t->sched_info.last_queued = jiffies;
@@ -589,9 +593,9 @@ static inline void sched_info_queued(task_t *t)
589 * Called when a process ceases being the active-running process, either 593 * Called when a process ceases being the active-running process, either
590 * voluntarily or involuntarily. Now we can calculate how long we ran. 594 * voluntarily or involuntarily. Now we can calculate how long we ran.
591 */ 595 */
592static inline void sched_info_depart(task_t *t) 596static inline void sched_info_depart(struct task_struct *t)
593{ 597{
594 struct runqueue *rq = task_rq(t); 598 struct rq *rq = task_rq(t);
595 unsigned long diff = jiffies - t->sched_info.last_arrival; 599 unsigned long diff = jiffies - t->sched_info.last_arrival;
596 600
597 t->sched_info.cpu_time += diff; 601 t->sched_info.cpu_time += diff;
@@ -605,9 +609,10 @@ static inline void sched_info_depart(task_t *t)
605 * their time slice. (This may also be called when switching to or from 609 * their time slice. (This may also be called when switching to or from
606 * the idle task.) We are only called when prev != next. 610 * the idle task.) We are only called when prev != next.
607 */ 611 */
608static inline void sched_info_switch(task_t *prev, task_t *next) 612static inline void
613sched_info_switch(struct task_struct *prev, struct task_struct *next)
609{ 614{
610 struct runqueue *rq = task_rq(prev); 615 struct rq *rq = task_rq(prev);
611 616
612 /* 617 /*
613 * prev now departs the cpu. It's not interesting to record 618 * prev now departs the cpu. It's not interesting to record
@@ -628,7 +633,7 @@ static inline void sched_info_switch(task_t *prev, task_t *next)
628/* 633/*
629 * Adding/removing a task to/from a priority array: 634 * Adding/removing a task to/from a priority array:
630 */ 635 */
631static void dequeue_task(struct task_struct *p, prio_array_t *array) 636static void dequeue_task(struct task_struct *p, struct prio_array *array)
632{ 637{
633 array->nr_active--; 638 array->nr_active--;
634 list_del(&p->run_list); 639 list_del(&p->run_list);
@@ -636,7 +641,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array)
636 __clear_bit(p->prio, array->bitmap); 641 __clear_bit(p->prio, array->bitmap);
637} 642}
638 643
639static void enqueue_task(struct task_struct *p, prio_array_t *array) 644static void enqueue_task(struct task_struct *p, struct prio_array *array)
640{ 645{
641 sched_info_queued(p); 646 sched_info_queued(p);
642 list_add_tail(&p->run_list, array->queue + p->prio); 647 list_add_tail(&p->run_list, array->queue + p->prio);
@@ -649,12 +654,13 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array)
649 * Put task to the end of the run list without the overhead of dequeue 654 * Put task to the end of the run list without the overhead of dequeue
650 * followed by enqueue. 655 * followed by enqueue.
651 */ 656 */
652static void requeue_task(struct task_struct *p, prio_array_t *array) 657static void requeue_task(struct task_struct *p, struct prio_array *array)
653{ 658{
654 list_move_tail(&p->run_list, array->queue + p->prio); 659 list_move_tail(&p->run_list, array->queue + p->prio);
655} 660}
656 661
657static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) 662static inline void
663enqueue_task_head(struct task_struct *p, struct prio_array *array)
658{ 664{
659 list_add(&p->run_list, array->queue + p->prio); 665 list_add(&p->run_list, array->queue + p->prio);
660 __set_bit(p->prio, array->bitmap); 666 __set_bit(p->prio, array->bitmap);
@@ -677,7 +683,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
677 * Both properties are important to certain workloads. 683 * Both properties are important to certain workloads.
678 */ 684 */
679 685
680static inline int __normal_prio(task_t *p) 686static inline int __normal_prio(struct task_struct *p)
681{ 687{
682 int bonus, prio; 688 int bonus, prio;
683 689
@@ -713,7 +719,7 @@ static inline int __normal_prio(task_t *p)
713#define RTPRIO_TO_LOAD_WEIGHT(rp) \ 719#define RTPRIO_TO_LOAD_WEIGHT(rp) \
714 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) 720 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
715 721
716static void set_load_weight(task_t *p) 722static void set_load_weight(struct task_struct *p)
717{ 723{
718 if (has_rt_policy(p)) { 724 if (has_rt_policy(p)) {
719#ifdef CONFIG_SMP 725#ifdef CONFIG_SMP
@@ -731,23 +737,25 @@ static void set_load_weight(task_t *p)
731 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); 737 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
732} 738}
733 739
734static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p) 740static inline void
741inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)
735{ 742{
736 rq->raw_weighted_load += p->load_weight; 743 rq->raw_weighted_load += p->load_weight;
737} 744}
738 745
739static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p) 746static inline void
747dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)
740{ 748{
741 rq->raw_weighted_load -= p->load_weight; 749 rq->raw_weighted_load -= p->load_weight;
742} 750}
743 751
744static inline void inc_nr_running(task_t *p, runqueue_t *rq) 752static inline void inc_nr_running(struct task_struct *p, struct rq *rq)
745{ 753{
746 rq->nr_running++; 754 rq->nr_running++;
747 inc_raw_weighted_load(rq, p); 755 inc_raw_weighted_load(rq, p);
748} 756}
749 757
750static inline void dec_nr_running(task_t *p, runqueue_t *rq) 758static inline void dec_nr_running(struct task_struct *p, struct rq *rq)
751{ 759{
752 rq->nr_running--; 760 rq->nr_running--;
753 dec_raw_weighted_load(rq, p); 761 dec_raw_weighted_load(rq, p);
@@ -760,7 +768,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq)
760 * setprio syscalls, and whenever the interactivity 768 * setprio syscalls, and whenever the interactivity
761 * estimator recalculates. 769 * estimator recalculates.
762 */ 770 */
763static inline int normal_prio(task_t *p) 771static inline int normal_prio(struct task_struct *p)
764{ 772{
765 int prio; 773 int prio;
766 774
@@ -778,7 +786,7 @@ static inline int normal_prio(task_t *p)
778 * interactivity modifiers. Will be RT if the task got 786 * interactivity modifiers. Will be RT if the task got
779 * RT-boosted. If not then it returns p->normal_prio. 787 * RT-boosted. If not then it returns p->normal_prio.
780 */ 788 */
781static int effective_prio(task_t *p) 789static int effective_prio(struct task_struct *p)
782{ 790{
783 p->normal_prio = normal_prio(p); 791 p->normal_prio = normal_prio(p);
784 /* 792 /*
@@ -794,9 +802,9 @@ static int effective_prio(task_t *p)
794/* 802/*
795 * __activate_task - move a task to the runqueue. 803 * __activate_task - move a task to the runqueue.
796 */ 804 */
797static void __activate_task(task_t *p, runqueue_t *rq) 805static void __activate_task(struct task_struct *p, struct rq *rq)
798{ 806{
799 prio_array_t *target = rq->active; 807 struct prio_array *target = rq->active;
800 808
801 if (batch_task(p)) 809 if (batch_task(p))
802 target = rq->expired; 810 target = rq->expired;
@@ -807,7 +815,7 @@ static void __activate_task(task_t *p, runqueue_t *rq)
807/* 815/*
808 * __activate_idle_task - move idle task to the _front_ of runqueue. 816 * __activate_idle_task - move idle task to the _front_ of runqueue.
809 */ 817 */
810static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 818static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)
811{ 819{
812 enqueue_task_head(p, rq->active); 820 enqueue_task_head(p, rq->active);
813 inc_nr_running(p, rq); 821 inc_nr_running(p, rq);
@@ -817,7 +825,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
817 * Recalculate p->normal_prio and p->prio after having slept, 825 * Recalculate p->normal_prio and p->prio after having slept,
818 * updating the sleep-average too: 826 * updating the sleep-average too:
819 */ 827 */
820static int recalc_task_prio(task_t *p, unsigned long long now) 828static int recalc_task_prio(struct task_struct *p, unsigned long long now)
821{ 829{
822 /* Caller must always ensure 'now >= p->timestamp' */ 830 /* Caller must always ensure 'now >= p->timestamp' */
823 unsigned long sleep_time = now - p->timestamp; 831 unsigned long sleep_time = now - p->timestamp;
@@ -889,7 +897,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
889 * Update all the scheduling statistics stuff. (sleep average 897 * Update all the scheduling statistics stuff. (sleep average
890 * calculation, priority modifiers, etc.) 898 * calculation, priority modifiers, etc.)
891 */ 899 */
892static void activate_task(task_t *p, runqueue_t *rq, int local) 900static void activate_task(struct task_struct *p, struct rq *rq, int local)
893{ 901{
894 unsigned long long now; 902 unsigned long long now;
895 903
@@ -897,7 +905,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
897#ifdef CONFIG_SMP 905#ifdef CONFIG_SMP
898 if (!local) { 906 if (!local) {
899 /* Compensate for drifting sched_clock */ 907 /* Compensate for drifting sched_clock */
900 runqueue_t *this_rq = this_rq(); 908 struct rq *this_rq = this_rq();
901 now = (now - this_rq->timestamp_last_tick) 909 now = (now - this_rq->timestamp_last_tick)
902 + rq->timestamp_last_tick; 910 + rq->timestamp_last_tick;
903 } 911 }
@@ -936,7 +944,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
936/* 944/*
937 * deactivate_task - remove a task from the runqueue. 945 * deactivate_task - remove a task from the runqueue.
938 */ 946 */
939static void deactivate_task(struct task_struct *p, runqueue_t *rq) 947static void deactivate_task(struct task_struct *p, struct rq *rq)
940{ 948{
941 dec_nr_running(p, rq); 949 dec_nr_running(p, rq);
942 dequeue_task(p, p->array); 950 dequeue_task(p, p->array);
@@ -956,7 +964,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
956#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 964#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
957#endif 965#endif
958 966
959static void resched_task(task_t *p) 967static void resched_task(struct task_struct *p)
960{ 968{
961 int cpu; 969 int cpu;
962 970
@@ -977,7 +985,7 @@ static void resched_task(task_t *p)
977 smp_send_reschedule(cpu); 985 smp_send_reschedule(cpu);
978} 986}
979#else 987#else
980static inline void resched_task(task_t *p) 988static inline void resched_task(struct task_struct *p)
981{ 989{
982 assert_spin_locked(&task_rq(p)->lock); 990 assert_spin_locked(&task_rq(p)->lock);
983 set_tsk_need_resched(p); 991 set_tsk_need_resched(p);
@@ -988,7 +996,7 @@ static inline void resched_task(task_t *p)
988 * task_curr - is this task currently executing on a CPU? 996 * task_curr - is this task currently executing on a CPU?
989 * @p: the task in question. 997 * @p: the task in question.
990 */ 998 */
991inline int task_curr(const task_t *p) 999inline int task_curr(const struct task_struct *p)
992{ 1000{
993 return cpu_curr(task_cpu(p)) == p; 1001 return cpu_curr(task_cpu(p)) == p;
994} 1002}
@@ -1000,22 +1008,23 @@ unsigned long weighted_cpuload(const int cpu)
1000} 1008}
1001 1009
1002#ifdef CONFIG_SMP 1010#ifdef CONFIG_SMP
1003typedef struct { 1011struct migration_req {
1004 struct list_head list; 1012 struct list_head list;
1005 1013
1006 task_t *task; 1014 struct task_struct *task;
1007 int dest_cpu; 1015 int dest_cpu;
1008 1016
1009 struct completion done; 1017 struct completion done;
1010} migration_req_t; 1018};
1011 1019
1012/* 1020/*
1013 * The task's runqueue lock must be held. 1021 * The task's runqueue lock must be held.
1014 * Returns true if you have to wait for migration thread. 1022 * Returns true if you have to wait for migration thread.
1015 */ 1023 */
1016static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) 1024static int
1025migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
1017{ 1026{
1018 runqueue_t *rq = task_rq(p); 1027 struct rq *rq = task_rq(p);
1019 1028
1020 /* 1029 /*
1021 * If the task is not on a runqueue (and not running), then 1030 * If the task is not on a runqueue (and not running), then
@@ -1030,6 +1039,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1030 req->task = p; 1039 req->task = p;
1031 req->dest_cpu = dest_cpu; 1040 req->dest_cpu = dest_cpu;
1032 list_add(&req->list, &rq->migration_queue); 1041 list_add(&req->list, &rq->migration_queue);
1042
1033 return 1; 1043 return 1;
1034} 1044}
1035 1045
@@ -1042,10 +1052,10 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1042 * smp_call_function() if an IPI is sent by the same process we are 1052 * smp_call_function() if an IPI is sent by the same process we are
1043 * waiting to become inactive. 1053 * waiting to become inactive.
1044 */ 1054 */
1045void wait_task_inactive(task_t *p) 1055void wait_task_inactive(struct task_struct *p)
1046{ 1056{
1047 unsigned long flags; 1057 unsigned long flags;
1048 runqueue_t *rq; 1058 struct rq *rq;
1049 int preempted; 1059 int preempted;
1050 1060
1051repeat: 1061repeat:
@@ -1076,7 +1086,7 @@ repeat:
1076 * to another CPU then no harm is done and the purpose has been 1086 * to another CPU then no harm is done and the purpose has been
1077 * achieved as well. 1087 * achieved as well.
1078 */ 1088 */
1079void kick_process(task_t *p) 1089void kick_process(struct task_struct *p)
1080{ 1090{
1081 int cpu; 1091 int cpu;
1082 1092
@@ -1096,7 +1106,7 @@ void kick_process(task_t *p)
1096 */ 1106 */
1097static inline unsigned long source_load(int cpu, int type) 1107static inline unsigned long source_load(int cpu, int type)
1098{ 1108{
1099 runqueue_t *rq = cpu_rq(cpu); 1109 struct rq *rq = cpu_rq(cpu);
1100 1110
1101 if (type == 0) 1111 if (type == 0)
1102 return rq->raw_weighted_load; 1112 return rq->raw_weighted_load;
@@ -1110,7 +1120,7 @@ static inline unsigned long source_load(int cpu, int type)
1110 */ 1120 */
1111static inline unsigned long target_load(int cpu, int type) 1121static inline unsigned long target_load(int cpu, int type)
1112{ 1122{
1113 runqueue_t *rq = cpu_rq(cpu); 1123 struct rq *rq = cpu_rq(cpu);
1114 1124
1115 if (type == 0) 1125 if (type == 0)
1116 return rq->raw_weighted_load; 1126 return rq->raw_weighted_load;
@@ -1123,10 +1133,10 @@ static inline unsigned long target_load(int cpu, int type)
1123 */ 1133 */
1124static inline unsigned long cpu_avg_load_per_task(int cpu) 1134static inline unsigned long cpu_avg_load_per_task(int cpu)
1125{ 1135{
1126 runqueue_t *rq = cpu_rq(cpu); 1136 struct rq *rq = cpu_rq(cpu);
1127 unsigned long n = rq->nr_running; 1137 unsigned long n = rq->nr_running;
1128 1138
1129 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; 1139 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
1130} 1140}
1131 1141
1132/* 1142/*
@@ -1279,7 +1289,7 @@ nextlevel:
1279 * Returns the CPU we should wake onto. 1289 * Returns the CPU we should wake onto.
1280 */ 1290 */
1281#if defined(ARCH_HAS_SCHED_WAKE_IDLE) 1291#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1282static int wake_idle(int cpu, task_t *p) 1292static int wake_idle(int cpu, struct task_struct *p)
1283{ 1293{
1284 cpumask_t tmp; 1294 cpumask_t tmp;
1285 struct sched_domain *sd; 1295 struct sched_domain *sd;
@@ -1302,7 +1312,7 @@ static int wake_idle(int cpu, task_t *p)
1302 return cpu; 1312 return cpu;
1303} 1313}
1304#else 1314#else
1305static inline int wake_idle(int cpu, task_t *p) 1315static inline int wake_idle(int cpu, struct task_struct *p)
1306{ 1316{
1307 return cpu; 1317 return cpu;
1308} 1318}
@@ -1322,15 +1332,15 @@ static inline int wake_idle(int cpu, task_t *p)
1322 * 1332 *
1323 * returns failure only if the task is already active. 1333 * returns failure only if the task is already active.
1324 */ 1334 */
1325static int try_to_wake_up(task_t *p, unsigned int state, int sync) 1335static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1326{ 1336{
1327 int cpu, this_cpu, success = 0; 1337 int cpu, this_cpu, success = 0;
1328 unsigned long flags; 1338 unsigned long flags;
1329 long old_state; 1339 long old_state;
1330 runqueue_t *rq; 1340 struct rq *rq;
1331#ifdef CONFIG_SMP 1341#ifdef CONFIG_SMP
1332 unsigned long load, this_load;
1333 struct sched_domain *sd, *this_sd = NULL; 1342 struct sched_domain *sd, *this_sd = NULL;
1343 unsigned long load, this_load;
1334 int new_cpu; 1344 int new_cpu;
1335#endif 1345#endif
1336 1346
@@ -1480,15 +1490,14 @@ out:
1480 return success; 1490 return success;
1481} 1491}
1482 1492
1483int fastcall wake_up_process(task_t *p) 1493int fastcall wake_up_process(struct task_struct *p)
1484{ 1494{
1485 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | 1495 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
1486 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); 1496 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
1487} 1497}
1488
1489EXPORT_SYMBOL(wake_up_process); 1498EXPORT_SYMBOL(wake_up_process);
1490 1499
1491int fastcall wake_up_state(task_t *p, unsigned int state) 1500int fastcall wake_up_state(struct task_struct *p, unsigned int state)
1492{ 1501{
1493 return try_to_wake_up(p, state, 0); 1502 return try_to_wake_up(p, state, 0);
1494} 1503}
@@ -1497,7 +1506,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
1497 * Perform scheduler related setup for a newly forked process p. 1506 * Perform scheduler related setup for a newly forked process p.
1498 * p is forked by current. 1507 * p is forked by current.
1499 */ 1508 */
1500void fastcall sched_fork(task_t *p, int clone_flags) 1509void fastcall sched_fork(struct task_struct *p, int clone_flags)
1501{ 1510{
1502 int cpu = get_cpu(); 1511 int cpu = get_cpu();
1503 1512
@@ -1565,11 +1574,11 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1565 * that must be done for every newly created context, then puts the task 1574 * that must be done for every newly created context, then puts the task
1566 * on the runqueue and wakes it. 1575 * on the runqueue and wakes it.
1567 */ 1576 */
1568void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) 1577void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1569{ 1578{
1579 struct rq *rq, *this_rq;
1570 unsigned long flags; 1580 unsigned long flags;
1571 int this_cpu, cpu; 1581 int this_cpu, cpu;
1572 runqueue_t *rq, *this_rq;
1573 1582
1574 rq = task_rq_lock(p, &flags); 1583 rq = task_rq_lock(p, &flags);
1575 BUG_ON(p->state != TASK_RUNNING); 1584 BUG_ON(p->state != TASK_RUNNING);
@@ -1649,10 +1658,10 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1649 * artificially, because any timeslice recovered here 1658 * artificially, because any timeslice recovered here
1650 * was given away by the parent in the first place.) 1659 * was given away by the parent in the first place.)
1651 */ 1660 */
1652void fastcall sched_exit(task_t *p) 1661void fastcall sched_exit(struct task_struct *p)
1653{ 1662{
1654 unsigned long flags; 1663 unsigned long flags;
1655 runqueue_t *rq; 1664 struct rq *rq;
1656 1665
1657 /* 1666 /*
1658 * If the child was a (relative-) CPU hog then decrease 1667 * If the child was a (relative-) CPU hog then decrease
@@ -1683,7 +1692,7 @@ void fastcall sched_exit(task_t *p)
1683 * prepare_task_switch sets up locking and calls architecture specific 1692 * prepare_task_switch sets up locking and calls architecture specific
1684 * hooks. 1693 * hooks.
1685 */ 1694 */
1686static inline void prepare_task_switch(runqueue_t *rq, task_t *next) 1695static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
1687{ 1696{
1688 prepare_lock_switch(rq, next); 1697 prepare_lock_switch(rq, next);
1689 prepare_arch_switch(next); 1698 prepare_arch_switch(next);
@@ -1704,7 +1713,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
1704 * with the lock held can cause deadlocks; see schedule() for 1713 * with the lock held can cause deadlocks; see schedule() for
1705 * details.) 1714 * details.)
1706 */ 1715 */
1707static inline void finish_task_switch(runqueue_t *rq, task_t *prev) 1716static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
1708 __releases(rq->lock) 1717 __releases(rq->lock)
1709{ 1718{
1710 struct mm_struct *mm = rq->prev_mm; 1719 struct mm_struct *mm = rq->prev_mm;
@@ -1742,10 +1751,11 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
1742 * schedule_tail - first thing a freshly forked thread must call. 1751 * schedule_tail - first thing a freshly forked thread must call.
1743 * @prev: the thread we just switched away from. 1752 * @prev: the thread we just switched away from.
1744 */ 1753 */
1745asmlinkage void schedule_tail(task_t *prev) 1754asmlinkage void schedule_tail(struct task_struct *prev)
1746 __releases(rq->lock) 1755 __releases(rq->lock)
1747{ 1756{
1748 runqueue_t *rq = this_rq(); 1757 struct rq *rq = this_rq();
1758
1749 finish_task_switch(rq, prev); 1759 finish_task_switch(rq, prev);
1750#ifdef __ARCH_WANT_UNLOCKED_CTXSW 1760#ifdef __ARCH_WANT_UNLOCKED_CTXSW
1751 /* In this case, finish_task_switch does not reenable preemption */ 1761 /* In this case, finish_task_switch does not reenable preemption */
@@ -1759,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev)
1759 * context_switch - switch to the new MM and the new 1769 * context_switch - switch to the new MM and the new
1760 * thread's register state. 1770 * thread's register state.
1761 */ 1771 */
1762static inline 1772static inline struct task_struct *
1763task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) 1773context_switch(struct rq *rq, struct task_struct *prev,
1774 struct task_struct *next)
1764{ 1775{
1765 struct mm_struct *mm = next->mm; 1776 struct mm_struct *mm = next->mm;
1766 struct mm_struct *oldmm = prev->active_mm; 1777 struct mm_struct *oldmm = prev->active_mm;
@@ -1777,6 +1788,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
1777 WARN_ON(rq->prev_mm); 1788 WARN_ON(rq->prev_mm);
1778 rq->prev_mm = oldmm; 1789 rq->prev_mm = oldmm;
1779 } 1790 }
1791 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
1780 1792
1781 /* Here we just switch the register state and the stack. */ 1793 /* Here we just switch the register state and the stack. */
1782 switch_to(prev, next, prev); 1794 switch_to(prev, next, prev);
@@ -1857,12 +1869,21 @@ unsigned long nr_active(void)
1857#ifdef CONFIG_SMP 1869#ifdef CONFIG_SMP
1858 1870
1859/* 1871/*
1872 * Is this task likely cache-hot:
1873 */
1874static inline int
1875task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
1876{
1877 return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
1878}
1879
1880/*
1860 * double_rq_lock - safely lock two runqueues 1881 * double_rq_lock - safely lock two runqueues
1861 * 1882 *
1862 * Note this does not disable interrupts like task_rq_lock, 1883 * Note this does not disable interrupts like task_rq_lock,
1863 * you need to do so manually before calling. 1884 * you need to do so manually before calling.
1864 */ 1885 */
1865static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) 1886static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1866 __acquires(rq1->lock) 1887 __acquires(rq1->lock)
1867 __acquires(rq2->lock) 1888 __acquires(rq2->lock)
1868{ 1889{
@@ -1886,7 +1907,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1886 * Note this does not restore interrupts like task_rq_unlock, 1907 * Note this does not restore interrupts like task_rq_unlock,
1887 * you need to do so manually after calling. 1908 * you need to do so manually after calling.
1888 */ 1909 */
1889static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) 1910static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1890 __releases(rq1->lock) 1911 __releases(rq1->lock)
1891 __releases(rq2->lock) 1912 __releases(rq2->lock)
1892{ 1913{
@@ -1900,7 +1921,7 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
1900/* 1921/*
1901 * double_lock_balance - lock the busiest runqueue, this_rq is locked already. 1922 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
1902 */ 1923 */
1903static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) 1924static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
1904 __releases(this_rq->lock) 1925 __releases(this_rq->lock)
1905 __acquires(busiest->lock) 1926 __acquires(busiest->lock)
1906 __acquires(this_rq->lock) 1927 __acquires(this_rq->lock)
@@ -1921,11 +1942,11 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
1921 * allow dest_cpu, which will force the cpu onto dest_cpu. Then 1942 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
1922 * the cpu_allowed mask is restored. 1943 * the cpu_allowed mask is restored.
1923 */ 1944 */
1924static void sched_migrate_task(task_t *p, int dest_cpu) 1945static void sched_migrate_task(struct task_struct *p, int dest_cpu)
1925{ 1946{
1926 migration_req_t req; 1947 struct migration_req req;
1927 runqueue_t *rq;
1928 unsigned long flags; 1948 unsigned long flags;
1949 struct rq *rq;
1929 1950
1930 rq = task_rq_lock(p, &flags); 1951 rq = task_rq_lock(p, &flags);
1931 if (!cpu_isset(dest_cpu, p->cpus_allowed) 1952 if (!cpu_isset(dest_cpu, p->cpus_allowed)
@@ -1936,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu)
1936 if (migrate_task(p, dest_cpu, &req)) { 1957 if (migrate_task(p, dest_cpu, &req)) {
1937 /* Need to wait for migration thread (might exit: take ref). */ 1958 /* Need to wait for migration thread (might exit: take ref). */
1938 struct task_struct *mt = rq->migration_thread; 1959 struct task_struct *mt = rq->migration_thread;
1960
1939 get_task_struct(mt); 1961 get_task_struct(mt);
1940 task_rq_unlock(rq, &flags); 1962 task_rq_unlock(rq, &flags);
1941 wake_up_process(mt); 1963 wake_up_process(mt);
1942 put_task_struct(mt); 1964 put_task_struct(mt);
1943 wait_for_completion(&req.done); 1965 wait_for_completion(&req.done);
1966
1944 return; 1967 return;
1945 } 1968 }
1946out: 1969out:
@@ -1964,9 +1987,9 @@ void sched_exec(void)
1964 * pull_task - move a task from a remote runqueue to the local runqueue. 1987 * pull_task - move a task from a remote runqueue to the local runqueue.
1965 * Both runqueues must be locked. 1988 * Both runqueues must be locked.
1966 */ 1989 */
1967static 1990static void pull_task(struct rq *src_rq, struct prio_array *src_array,
1968void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, 1991 struct task_struct *p, struct rq *this_rq,
1969 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1992 struct prio_array *this_array, int this_cpu)
1970{ 1993{
1971 dequeue_task(p, src_array); 1994 dequeue_task(p, src_array);
1972 dec_nr_running(p, src_rq); 1995 dec_nr_running(p, src_rq);
@@ -1987,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1987 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? 2010 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
1988 */ 2011 */
1989static 2012static
1990int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, 2013int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
1991 struct sched_domain *sd, enum idle_type idle, 2014 struct sched_domain *sd, enum idle_type idle,
1992 int *all_pinned) 2015 int *all_pinned)
1993{ 2016{
@@ -2019,6 +2042,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2019} 2042}
2020 2043
2021#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) 2044#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio)
2045
2022/* 2046/*
2023 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2047 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
2024 * load from busiest to this_rq, as part of a balancing operation within 2048 * load from busiest to this_rq, as part of a balancing operation within
@@ -2026,18 +2050,17 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2026 * 2050 *
2027 * Called with both runqueues locked. 2051 * Called with both runqueues locked.
2028 */ 2052 */
2029static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, 2053static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2030 unsigned long max_nr_move, unsigned long max_load_move, 2054 unsigned long max_nr_move, unsigned long max_load_move,
2031 struct sched_domain *sd, enum idle_type idle, 2055 struct sched_domain *sd, enum idle_type idle,
2032 int *all_pinned) 2056 int *all_pinned)
2033{ 2057{
2034 prio_array_t *array, *dst_array; 2058 int idx, pulled = 0, pinned = 0, this_best_prio, best_prio,
2059 best_prio_seen, skip_for_load;
2060 struct prio_array *array, *dst_array;
2035 struct list_head *head, *curr; 2061 struct list_head *head, *curr;
2036 int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio; 2062 struct task_struct *tmp;
2037 int busiest_best_prio_seen;
2038 int skip_for_load; /* skip the task based on weighted load issues */
2039 long rem_load_move; 2063 long rem_load_move;
2040 task_t *tmp;
2041 2064
2042 if (max_nr_move == 0 || max_load_move == 0) 2065 if (max_nr_move == 0 || max_load_move == 0)
2043 goto out; 2066 goto out;
@@ -2045,15 +2068,15 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
2045 rem_load_move = max_load_move; 2068 rem_load_move = max_load_move;
2046 pinned = 1; 2069 pinned = 1;
2047 this_best_prio = rq_best_prio(this_rq); 2070 this_best_prio = rq_best_prio(this_rq);
2048 busiest_best_prio = rq_best_prio(busiest); 2071 best_prio = rq_best_prio(busiest);
2049 /* 2072 /*
2050 * Enable handling of the case where there is more than one task 2073 * Enable handling of the case where there is more than one task
2051 * with the best priority. If the current running task is one 2074 * with the best priority. If the current running task is one
2052 * of those with prio==busiest_best_prio we know it won't be moved 2075 * of those with prio==best_prio we know it won't be moved
2053 * and therefore it's safe to override the skip (based on load) of 2076 * and therefore it's safe to override the skip (based on load) of
2054 * any task we find with that prio. 2077 * any task we find with that prio.
2055 */ 2078 */
2056 busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio; 2079 best_prio_seen = best_prio == busiest->curr->prio;
2057 2080
2058 /* 2081 /*
2059 * We first consider expired tasks. Those will likely not be 2082 * We first consider expired tasks. Those will likely not be
@@ -2089,7 +2112,7 @@ skip_bitmap:
2089 head = array->queue + idx; 2112 head = array->queue + idx;
2090 curr = head->prev; 2113 curr = head->prev;
2091skip_queue: 2114skip_queue:
2092 tmp = list_entry(curr, task_t, run_list); 2115 tmp = list_entry(curr, struct task_struct, run_list);
2093 2116
2094 curr = curr->prev; 2117 curr = curr->prev;
2095 2118
@@ -2100,10 +2123,11 @@ skip_queue:
2100 */ 2123 */
2101 skip_for_load = tmp->load_weight > rem_load_move; 2124 skip_for_load = tmp->load_weight > rem_load_move;
2102 if (skip_for_load && idx < this_best_prio) 2125 if (skip_for_load && idx < this_best_prio)
2103 skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio; 2126 skip_for_load = !best_prio_seen && idx == best_prio;
2104 if (skip_for_load || 2127 if (skip_for_load ||
2105 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { 2128 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
2106 busiest_best_prio_seen |= idx == busiest_best_prio; 2129
2130 best_prio_seen |= idx == best_prio;
2107 if (curr != head) 2131 if (curr != head)
2108 goto skip_queue; 2132 goto skip_queue;
2109 idx++; 2133 idx++;
@@ -2146,8 +2170,8 @@ out:
2146 2170
2147/* 2171/*
2148 * find_busiest_group finds and returns the busiest CPU group within the 2172 * find_busiest_group finds and returns the busiest CPU group within the
2149 * domain. It calculates and returns the amount of weighted load which should be 2173 * domain. It calculates and returns the amount of weighted load which
2150 * moved to restore balance via the imbalance parameter. 2174 * should be moved to restore balance via the imbalance parameter.
2151 */ 2175 */
2152static struct sched_group * 2176static struct sched_group *
2153find_busiest_group(struct sched_domain *sd, int this_cpu, 2177find_busiest_group(struct sched_domain *sd, int this_cpu,
@@ -2188,7 +2212,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2188 sum_weighted_load = sum_nr_running = avg_load = 0; 2212 sum_weighted_load = sum_nr_running = avg_load = 0;
2189 2213
2190 for_each_cpu_mask(i, group->cpumask) { 2214 for_each_cpu_mask(i, group->cpumask) {
2191 runqueue_t *rq = cpu_rq(i); 2215 struct rq *rq = cpu_rq(i);
2192 2216
2193 if (*sd_idle && !idle_cpu(i)) 2217 if (*sd_idle && !idle_cpu(i))
2194 *sd_idle = 0; 2218 *sd_idle = 0;
@@ -2269,7 +2293,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2269 * capacity but still has some space to pick up some load 2293 * capacity but still has some space to pick up some load
2270 * from other group and save more power 2294 * from other group and save more power
2271 */ 2295 */
2272 if (sum_nr_running <= group_capacity - 1) 2296 if (sum_nr_running <= group_capacity - 1) {
2273 if (sum_nr_running > leader_nr_running || 2297 if (sum_nr_running > leader_nr_running ||
2274 (sum_nr_running == leader_nr_running && 2298 (sum_nr_running == leader_nr_running &&
2275 first_cpu(group->cpumask) > 2299 first_cpu(group->cpumask) >
@@ -2277,7 +2301,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2277 group_leader = group; 2301 group_leader = group;
2278 leader_nr_running = sum_nr_running; 2302 leader_nr_running = sum_nr_running;
2279 } 2303 }
2280 2304 }
2281group_next: 2305group_next:
2282#endif 2306#endif
2283 group = group->next; 2307 group = group->next;
@@ -2332,8 +2356,7 @@ group_next:
2332 * moved 2356 * moved
2333 */ 2357 */
2334 if (*imbalance < busiest_load_per_task) { 2358 if (*imbalance < busiest_load_per_task) {
2335 unsigned long pwr_now, pwr_move; 2359 unsigned long tmp, pwr_now, pwr_move;
2336 unsigned long tmp;
2337 unsigned int imbn; 2360 unsigned int imbn;
2338 2361
2339small_imbalance: 2362small_imbalance:
@@ -2405,22 +2428,23 @@ ret:
2405/* 2428/*
2406 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2429 * find_busiest_queue - find the busiest runqueue among the cpus in group.
2407 */ 2430 */
2408static runqueue_t *find_busiest_queue(struct sched_group *group, 2431static struct rq *
2409 enum idle_type idle, unsigned long imbalance) 2432find_busiest_queue(struct sched_group *group, enum idle_type idle,
2433 unsigned long imbalance)
2410{ 2434{
2435 struct rq *busiest = NULL, *rq;
2411 unsigned long max_load = 0; 2436 unsigned long max_load = 0;
2412 runqueue_t *busiest = NULL, *rqi;
2413 int i; 2437 int i;
2414 2438
2415 for_each_cpu_mask(i, group->cpumask) { 2439 for_each_cpu_mask(i, group->cpumask) {
2416 rqi = cpu_rq(i); 2440 rq = cpu_rq(i);
2417 2441
2418 if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance) 2442 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
2419 continue; 2443 continue;
2420 2444
2421 if (rqi->raw_weighted_load > max_load) { 2445 if (rq->raw_weighted_load > max_load) {
2422 max_load = rqi->raw_weighted_load; 2446 max_load = rq->raw_weighted_load;
2423 busiest = rqi; 2447 busiest = rq;
2424 } 2448 }
2425 } 2449 }
2426 2450
@@ -2433,22 +2457,24 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2433 */ 2457 */
2434#define MAX_PINNED_INTERVAL 512 2458#define MAX_PINNED_INTERVAL 512
2435 2459
2436#define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0) 2460static inline unsigned long minus_1_or_zero(unsigned long n)
2461{
2462 return n > 0 ? n - 1 : 0;
2463}
2464
2437/* 2465/*
2438 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2466 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2439 * tasks if there is an imbalance. 2467 * tasks if there is an imbalance.
2440 * 2468 *
2441 * Called with this_rq unlocked. 2469 * Called with this_rq unlocked.
2442 */ 2470 */
2443static int load_balance(int this_cpu, runqueue_t *this_rq, 2471static int load_balance(int this_cpu, struct rq *this_rq,
2444 struct sched_domain *sd, enum idle_type idle) 2472 struct sched_domain *sd, enum idle_type idle)
2445{ 2473{
2474 int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
2446 struct sched_group *group; 2475 struct sched_group *group;
2447 runqueue_t *busiest;
2448 unsigned long imbalance; 2476 unsigned long imbalance;
2449 int nr_moved, all_pinned = 0; 2477 struct rq *busiest;
2450 int active_balance = 0;
2451 int sd_idle = 0;
2452 2478
2453 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2479 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
2454 !sched_smt_power_savings) 2480 !sched_smt_power_savings)
@@ -2482,8 +2508,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2482 */ 2508 */
2483 double_rq_lock(this_rq, busiest); 2509 double_rq_lock(this_rq, busiest);
2484 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2510 nr_moved = move_tasks(this_rq, this_cpu, busiest,
2485 minus_1_or_zero(busiest->nr_running), 2511 minus_1_or_zero(busiest->nr_running),
2486 imbalance, sd, idle, &all_pinned); 2512 imbalance, sd, idle, &all_pinned);
2487 double_rq_unlock(this_rq, busiest); 2513 double_rq_unlock(this_rq, busiest);
2488 2514
2489 /* All tasks on this runqueue were pinned by CPU affinity */ 2515 /* All tasks on this runqueue were pinned by CPU affinity */
@@ -2556,7 +2582,8 @@ out_one_pinned:
2556 (sd->balance_interval < sd->max_interval)) 2582 (sd->balance_interval < sd->max_interval))
2557 sd->balance_interval *= 2; 2583 sd->balance_interval *= 2;
2558 2584
2559 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2585 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2586 !sched_smt_power_savings)
2560 return -1; 2587 return -1;
2561 return 0; 2588 return 0;
2562} 2589}
@@ -2568,11 +2595,11 @@ out_one_pinned:
2568 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). 2595 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
2569 * this_rq is locked. 2596 * this_rq is locked.
2570 */ 2597 */
2571static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, 2598static int
2572 struct sched_domain *sd) 2599load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2573{ 2600{
2574 struct sched_group *group; 2601 struct sched_group *group;
2575 runqueue_t *busiest = NULL; 2602 struct rq *busiest = NULL;
2576 unsigned long imbalance; 2603 unsigned long imbalance;
2577 int nr_moved = 0; 2604 int nr_moved = 0;
2578 int sd_idle = 0; 2605 int sd_idle = 0;
@@ -2618,9 +2645,11 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2618 2645
2619out_balanced: 2646out_balanced:
2620 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2647 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
2621 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2648 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2649 !sched_smt_power_savings)
2622 return -1; 2650 return -1;
2623 sd->nr_balance_failed = 0; 2651 sd->nr_balance_failed = 0;
2652
2624 return 0; 2653 return 0;
2625} 2654}
2626 2655
@@ -2628,16 +2657,15 @@ out_balanced:
2628 * idle_balance is called by schedule() if this_cpu is about to become 2657 * idle_balance is called by schedule() if this_cpu is about to become
2629 * idle. Attempts to pull tasks from other CPUs. 2658 * idle. Attempts to pull tasks from other CPUs.
2630 */ 2659 */
2631static void idle_balance(int this_cpu, runqueue_t *this_rq) 2660static void idle_balance(int this_cpu, struct rq *this_rq)
2632{ 2661{
2633 struct sched_domain *sd; 2662 struct sched_domain *sd;
2634 2663
2635 for_each_domain(this_cpu, sd) { 2664 for_each_domain(this_cpu, sd) {
2636 if (sd->flags & SD_BALANCE_NEWIDLE) { 2665 if (sd->flags & SD_BALANCE_NEWIDLE) {
2637 if (load_balance_newidle(this_cpu, this_rq, sd)) { 2666 /* If we've pulled tasks over stop searching: */
2638 /* We've pulled tasks over so stop searching */ 2667 if (load_balance_newidle(this_cpu, this_rq, sd))
2639 break; 2668 break;
2640 }
2641 } 2669 }
2642 } 2670 }
2643} 2671}
@@ -2650,14 +2678,14 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq)
2650 * 2678 *
2651 * Called with busiest_rq locked. 2679 * Called with busiest_rq locked.
2652 */ 2680 */
2653static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) 2681static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2654{ 2682{
2655 struct sched_domain *sd;
2656 runqueue_t *target_rq;
2657 int target_cpu = busiest_rq->push_cpu; 2683 int target_cpu = busiest_rq->push_cpu;
2684 struct sched_domain *sd;
2685 struct rq *target_rq;
2658 2686
2687 /* Is there any task to move? */
2659 if (busiest_rq->nr_running <= 1) 2688 if (busiest_rq->nr_running <= 1)
2660 /* no task to move */
2661 return; 2689 return;
2662 2690
2663 target_rq = cpu_rq(target_cpu); 2691 target_rq = cpu_rq(target_cpu);
@@ -2675,21 +2703,20 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
2675 /* Search for an sd spanning us and the target CPU. */ 2703 /* Search for an sd spanning us and the target CPU. */
2676 for_each_domain(target_cpu, sd) { 2704 for_each_domain(target_cpu, sd) {
2677 if ((sd->flags & SD_LOAD_BALANCE) && 2705 if ((sd->flags & SD_LOAD_BALANCE) &&
2678 cpu_isset(busiest_cpu, sd->span)) 2706 cpu_isset(busiest_cpu, sd->span))
2679 break; 2707 break;
2680 } 2708 }
2681 2709
2682 if (unlikely(sd == NULL)) 2710 if (likely(sd)) {
2683 goto out; 2711 schedstat_inc(sd, alb_cnt);
2684
2685 schedstat_inc(sd, alb_cnt);
2686 2712
2687 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2713 if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
2688 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL)) 2714 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE,
2689 schedstat_inc(sd, alb_pushed); 2715 NULL))
2690 else 2716 schedstat_inc(sd, alb_pushed);
2691 schedstat_inc(sd, alb_failed); 2717 else
2692out: 2718 schedstat_inc(sd, alb_failed);
2719 }
2693 spin_unlock(&target_rq->lock); 2720 spin_unlock(&target_rq->lock);
2694} 2721}
2695 2722
@@ -2702,23 +2729,27 @@ out:
2702 * Balancing parameters are set up in arch_init_sched_domains. 2729 * Balancing parameters are set up in arch_init_sched_domains.
2703 */ 2730 */
2704 2731
2705/* Don't have all balancing operations going off at once */ 2732/* Don't have all balancing operations going off at once: */
2706#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) 2733static inline unsigned long cpu_offset(int cpu)
2734{
2735 return jiffies + cpu * HZ / NR_CPUS;
2736}
2707 2737
2708static void rebalance_tick(int this_cpu, runqueue_t *this_rq, 2738static void
2709 enum idle_type idle) 2739rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
2710{ 2740{
2711 unsigned long old_load, this_load; 2741 unsigned long this_load, interval, j = cpu_offset(this_cpu);
2712 unsigned long j = jiffies + CPU_OFFSET(this_cpu);
2713 struct sched_domain *sd; 2742 struct sched_domain *sd;
2714 int i; 2743 int i, scale;
2715 2744
2716 this_load = this_rq->raw_weighted_load; 2745 this_load = this_rq->raw_weighted_load;
2717 /* Update our load */ 2746
2718 for (i = 0; i < 3; i++) { 2747 /* Update our load: */
2719 unsigned long new_load = this_load; 2748 for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
2720 int scale = 1 << i; 2749 unsigned long old_load, new_load;
2750
2721 old_load = this_rq->cpu_load[i]; 2751 old_load = this_rq->cpu_load[i];
2752 new_load = this_load;
2722 /* 2753 /*
2723 * Round up the averaging division if load is increasing. This 2754 * Round up the averaging division if load is increasing. This
2724 * prevents us from getting stuck on 9 if the load is 10, for 2755 * prevents us from getting stuck on 9 if the load is 10, for
@@ -2730,8 +2761,6 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2730 } 2761 }
2731 2762
2732 for_each_domain(this_cpu, sd) { 2763 for_each_domain(this_cpu, sd) {
2733 unsigned long interval;
2734
2735 if (!(sd->flags & SD_LOAD_BALANCE)) 2764 if (!(sd->flags & SD_LOAD_BALANCE))
2736 continue; 2765 continue;
2737 2766
@@ -2761,17 +2790,18 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2761/* 2790/*
2762 * on UP we do not need to balance between CPUs: 2791 * on UP we do not need to balance between CPUs:
2763 */ 2792 */
2764static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) 2793static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
2765{ 2794{
2766} 2795}
2767static inline void idle_balance(int cpu, runqueue_t *rq) 2796static inline void idle_balance(int cpu, struct rq *rq)
2768{ 2797{
2769} 2798}
2770#endif 2799#endif
2771 2800
2772static inline int wake_priority_sleeper(runqueue_t *rq) 2801static inline int wake_priority_sleeper(struct rq *rq)
2773{ 2802{
2774 int ret = 0; 2803 int ret = 0;
2804
2775#ifdef CONFIG_SCHED_SMT 2805#ifdef CONFIG_SCHED_SMT
2776 spin_lock(&rq->lock); 2806 spin_lock(&rq->lock);
2777 /* 2807 /*
@@ -2795,25 +2825,26 @@ EXPORT_PER_CPU_SYMBOL(kstat);
2795 * This is called on clock ticks and on context switches. 2825 * This is called on clock ticks and on context switches.
2796 * Bank in p->sched_time the ns elapsed since the last tick or switch. 2826 * Bank in p->sched_time the ns elapsed since the last tick or switch.
2797 */ 2827 */
2798static inline void update_cpu_clock(task_t *p, runqueue_t *rq, 2828static inline void
2799 unsigned long long now) 2829update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
2800{ 2830{
2801 unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); 2831 p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
2802 p->sched_time += now - last;
2803} 2832}
2804 2833
2805/* 2834/*
2806 * Return current->sched_time plus any more ns on the sched_clock 2835 * Return current->sched_time plus any more ns on the sched_clock
2807 * that have not yet been banked. 2836 * that have not yet been banked.
2808 */ 2837 */
2809unsigned long long current_sched_time(const task_t *tsk) 2838unsigned long long current_sched_time(const struct task_struct *p)
2810{ 2839{
2811 unsigned long long ns; 2840 unsigned long long ns;
2812 unsigned long flags; 2841 unsigned long flags;
2842
2813 local_irq_save(flags); 2843 local_irq_save(flags);
2814 ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); 2844 ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
2815 ns = tsk->sched_time + (sched_clock() - ns); 2845 ns = p->sched_time + sched_clock() - ns;
2816 local_irq_restore(flags); 2846 local_irq_restore(flags);
2847
2817 return ns; 2848 return ns;
2818} 2849}
2819 2850
@@ -2827,11 +2858,16 @@ unsigned long long current_sched_time(const task_t *tsk)
2827 * increasing number of running tasks. We also ignore the interactivity 2858 * increasing number of running tasks. We also ignore the interactivity
2828 * if a better static_prio task has expired: 2859 * if a better static_prio task has expired:
2829 */ 2860 */
2830#define EXPIRED_STARVING(rq) \ 2861static inline int expired_starving(struct rq *rq)
2831 ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ 2862{
2832 (jiffies - (rq)->expired_timestamp >= \ 2863 if (rq->curr->static_prio > rq->best_expired_prio)
2833 STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ 2864 return 1;
2834 ((rq)->curr->static_prio > (rq)->best_expired_prio)) 2865 if (!STARVATION_LIMIT || !rq->expired_timestamp)
2866 return 0;
2867 if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running)
2868 return 1;
2869 return 0;
2870}
2835 2871
2836/* 2872/*
2837 * Account user cpu time to a process. 2873 * Account user cpu time to a process.
@@ -2864,7 +2900,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
2864 cputime_t cputime) 2900 cputime_t cputime)
2865{ 2901{
2866 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2902 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2867 runqueue_t *rq = this_rq(); 2903 struct rq *rq = this_rq();
2868 cputime64_t tmp; 2904 cputime64_t tmp;
2869 2905
2870 p->stime = cputime_add(p->stime, cputime); 2906 p->stime = cputime_add(p->stime, cputime);
@@ -2894,7 +2930,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2894{ 2930{
2895 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2931 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2896 cputime64_t tmp = cputime_to_cputime64(steal); 2932 cputime64_t tmp = cputime_to_cputime64(steal);
2897 runqueue_t *rq = this_rq(); 2933 struct rq *rq = this_rq();
2898 2934
2899 if (p == rq->idle) { 2935 if (p == rq->idle) {
2900 p->stime = cputime_add(p->stime, steal); 2936 p->stime = cputime_add(p->stime, steal);
@@ -2915,10 +2951,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2915 */ 2951 */
2916void scheduler_tick(void) 2952void scheduler_tick(void)
2917{ 2953{
2918 int cpu = smp_processor_id();
2919 runqueue_t *rq = this_rq();
2920 task_t *p = current;
2921 unsigned long long now = sched_clock(); 2954 unsigned long long now = sched_clock();
2955 struct task_struct *p = current;
2956 int cpu = smp_processor_id();
2957 struct rq *rq = cpu_rq(cpu);
2922 2958
2923 update_cpu_clock(p, rq, now); 2959 update_cpu_clock(p, rq, now);
2924 2960
@@ -2968,7 +3004,7 @@ void scheduler_tick(void)
2968 3004
2969 if (!rq->expired_timestamp) 3005 if (!rq->expired_timestamp)
2970 rq->expired_timestamp = jiffies; 3006 rq->expired_timestamp = jiffies;
2971 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { 3007 if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
2972 enqueue_task(p, rq->expired); 3008 enqueue_task(p, rq->expired);
2973 if (p->static_prio < rq->best_expired_prio) 3009 if (p->static_prio < rq->best_expired_prio)
2974 rq->best_expired_prio = p->static_prio; 3010 rq->best_expired_prio = p->static_prio;
@@ -3007,7 +3043,7 @@ out:
3007} 3043}
3008 3044
3009#ifdef CONFIG_SCHED_SMT 3045#ifdef CONFIG_SCHED_SMT
3010static inline void wakeup_busy_runqueue(runqueue_t *rq) 3046static inline void wakeup_busy_runqueue(struct rq *rq)
3011{ 3047{
3012 /* If an SMT runqueue is sleeping due to priority reasons wake it up */ 3048 /* If an SMT runqueue is sleeping due to priority reasons wake it up */
3013 if (rq->curr == rq->idle && rq->nr_running) 3049 if (rq->curr == rq->idle && rq->nr_running)
@@ -3033,7 +3069,7 @@ static void wake_sleeping_dependent(int this_cpu)
3033 return; 3069 return;
3034 3070
3035 for_each_cpu_mask(i, sd->span) { 3071 for_each_cpu_mask(i, sd->span) {
3036 runqueue_t *smt_rq = cpu_rq(i); 3072 struct rq *smt_rq = cpu_rq(i);
3037 3073
3038 if (i == this_cpu) 3074 if (i == this_cpu)
3039 continue; 3075 continue;
@@ -3050,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu)
3050 * utilize, if another task runs on a sibling. This models the 3086 * utilize, if another task runs on a sibling. This models the
3051 * slowdown effect of other tasks running on siblings: 3087 * slowdown effect of other tasks running on siblings:
3052 */ 3088 */
3053static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) 3089static inline unsigned long
3090smt_slice(struct task_struct *p, struct sched_domain *sd)
3054{ 3091{
3055 return p->time_slice * (100 - sd->per_cpu_gain) / 100; 3092 return p->time_slice * (100 - sd->per_cpu_gain) / 100;
3056} 3093}
@@ -3061,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
3061 * acquire their lock. As we only trylock the normal locking order does not 3098 * acquire their lock. As we only trylock the normal locking order does not
3062 * need to be obeyed. 3099 * need to be obeyed.
3063 */ 3100 */
3064static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) 3101static int
3102dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3065{ 3103{
3066 struct sched_domain *tmp, *sd = NULL; 3104 struct sched_domain *tmp, *sd = NULL;
3067 int ret = 0, i; 3105 int ret = 0, i;
@@ -3081,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
3081 return 0; 3119 return 0;
3082 3120
3083 for_each_cpu_mask(i, sd->span) { 3121 for_each_cpu_mask(i, sd->span) {
3084 runqueue_t *smt_rq; 3122 struct task_struct *smt_curr;
3085 task_t *smt_curr; 3123 struct rq *smt_rq;
3086 3124
3087 if (i == this_cpu) 3125 if (i == this_cpu)
3088 continue; 3126 continue;
@@ -3127,9 +3165,8 @@ unlock:
3127static inline void wake_sleeping_dependent(int this_cpu) 3165static inline void wake_sleeping_dependent(int this_cpu)
3128{ 3166{
3129} 3167}
3130 3168static inline int
3131static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, 3169dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3132 task_t *p)
3133{ 3170{
3134 return 0; 3171 return 0;
3135} 3172}
@@ -3142,12 +3179,13 @@ void fastcall add_preempt_count(int val)
3142 /* 3179 /*
3143 * Underflow? 3180 * Underflow?
3144 */ 3181 */
3145 BUG_ON((preempt_count() < 0)); 3182 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
3183 return;
3146 preempt_count() += val; 3184 preempt_count() += val;
3147 /* 3185 /*
3148 * Spinlock count overflowing soon? 3186 * Spinlock count overflowing soon?
3149 */ 3187 */
3150 BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); 3188 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
3151} 3189}
3152EXPORT_SYMBOL(add_preempt_count); 3190EXPORT_SYMBOL(add_preempt_count);
3153 3191
@@ -3156,11 +3194,15 @@ void fastcall sub_preempt_count(int val)
3156 /* 3194 /*
3157 * Underflow? 3195 * Underflow?
3158 */ 3196 */
3159 BUG_ON(val > preempt_count()); 3197 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
3198 return;
3160 /* 3199 /*
3161 * Is the spinlock portion underflowing? 3200 * Is the spinlock portion underflowing?
3162 */ 3201 */
3163 BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); 3202 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
3203 !(preempt_count() & PREEMPT_MASK)))
3204 return;
3205
3164 preempt_count() -= val; 3206 preempt_count() -= val;
3165} 3207}
3166EXPORT_SYMBOL(sub_preempt_count); 3208EXPORT_SYMBOL(sub_preempt_count);
@@ -3178,14 +3220,14 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
3178 */ 3220 */
3179asmlinkage void __sched schedule(void) 3221asmlinkage void __sched schedule(void)
3180{ 3222{
3181 long *switch_count; 3223 struct task_struct *prev, *next;
3182 task_t *prev, *next; 3224 struct prio_array *array;
3183 runqueue_t *rq;
3184 prio_array_t *array;
3185 struct list_head *queue; 3225 struct list_head *queue;
3186 unsigned long long now; 3226 unsigned long long now;
3187 unsigned long run_time; 3227 unsigned long run_time;
3188 int cpu, idx, new_prio; 3228 int cpu, idx, new_prio;
3229 long *switch_count;
3230 struct rq *rq;
3189 3231
3190 /* 3232 /*
3191 * Test if we are atomic. Since do_exit() needs to call into 3233 * Test if we are atomic. Since do_exit() needs to call into
@@ -3275,7 +3317,7 @@ need_resched_nonpreemptible:
3275 3317
3276 idx = sched_find_first_bit(array->bitmap); 3318 idx = sched_find_first_bit(array->bitmap);
3277 queue = array->queue + idx; 3319 queue = array->queue + idx;
3278 next = list_entry(queue->next, task_t, run_list); 3320 next = list_entry(queue->next, struct task_struct, run_list);
3279 3321
3280 if (!rt_task(next) && interactive_sleep(next->sleep_type)) { 3322 if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
3281 unsigned long long delta = now - next->timestamp; 3323 unsigned long long delta = now - next->timestamp;
@@ -3338,7 +3380,6 @@ switch_tasks:
3338 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3380 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3339 goto need_resched; 3381 goto need_resched;
3340} 3382}
3341
3342EXPORT_SYMBOL(schedule); 3383EXPORT_SYMBOL(schedule);
3343 3384
3344#ifdef CONFIG_PREEMPT 3385#ifdef CONFIG_PREEMPT
@@ -3383,7 +3424,6 @@ need_resched:
3383 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3424 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3384 goto need_resched; 3425 goto need_resched;
3385} 3426}
3386
3387EXPORT_SYMBOL(preempt_schedule); 3427EXPORT_SYMBOL(preempt_schedule);
3388 3428
3389/* 3429/*
@@ -3432,10 +3472,8 @@ need_resched:
3432int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, 3472int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
3433 void *key) 3473 void *key)
3434{ 3474{
3435 task_t *p = curr->private; 3475 return try_to_wake_up(curr->private, mode, sync);
3436 return try_to_wake_up(p, mode, sync);
3437} 3476}
3438
3439EXPORT_SYMBOL(default_wake_function); 3477EXPORT_SYMBOL(default_wake_function);
3440 3478
3441/* 3479/*
@@ -3453,13 +3491,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3453 struct list_head *tmp, *next; 3491 struct list_head *tmp, *next;
3454 3492
3455 list_for_each_safe(tmp, next, &q->task_list) { 3493 list_for_each_safe(tmp, next, &q->task_list) {
3456 wait_queue_t *curr; 3494 wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
3457 unsigned flags; 3495 unsigned flags = curr->flags;
3458 curr = list_entry(tmp, wait_queue_t, task_list); 3496
3459 flags = curr->flags;
3460 if (curr->func(curr, mode, sync, key) && 3497 if (curr->func(curr, mode, sync, key) &&
3461 (flags & WQ_FLAG_EXCLUSIVE) && 3498 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
3462 !--nr_exclusive)
3463 break; 3499 break;
3464 } 3500 }
3465} 3501}
@@ -3480,7 +3516,6 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
3480 __wake_up_common(q, mode, nr_exclusive, 0, key); 3516 __wake_up_common(q, mode, nr_exclusive, 0, key);
3481 spin_unlock_irqrestore(&q->lock, flags); 3517 spin_unlock_irqrestore(&q->lock, flags);
3482} 3518}
3483
3484EXPORT_SYMBOL(__wake_up); 3519EXPORT_SYMBOL(__wake_up);
3485 3520
3486/* 3521/*
@@ -3549,6 +3584,7 @@ EXPORT_SYMBOL(complete_all);
3549void fastcall __sched wait_for_completion(struct completion *x) 3584void fastcall __sched wait_for_completion(struct completion *x)
3550{ 3585{
3551 might_sleep(); 3586 might_sleep();
3587
3552 spin_lock_irq(&x->wait.lock); 3588 spin_lock_irq(&x->wait.lock);
3553 if (!x->done) { 3589 if (!x->done) {
3554 DECLARE_WAITQUEUE(wait, current); 3590 DECLARE_WAITQUEUE(wait, current);
@@ -3693,7 +3729,6 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
3693 schedule(); 3729 schedule();
3694 SLEEP_ON_TAIL 3730 SLEEP_ON_TAIL
3695} 3731}
3696
3697EXPORT_SYMBOL(interruptible_sleep_on); 3732EXPORT_SYMBOL(interruptible_sleep_on);
3698 3733
3699long fastcall __sched 3734long fastcall __sched
@@ -3709,7 +3744,6 @@ interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
3709 3744
3710 return timeout; 3745 return timeout;
3711} 3746}
3712
3713EXPORT_SYMBOL(interruptible_sleep_on_timeout); 3747EXPORT_SYMBOL(interruptible_sleep_on_timeout);
3714 3748
3715void fastcall __sched sleep_on(wait_queue_head_t *q) 3749void fastcall __sched sleep_on(wait_queue_head_t *q)
@@ -3722,7 +3756,6 @@ void fastcall __sched sleep_on(wait_queue_head_t *q)
3722 schedule(); 3756 schedule();
3723 SLEEP_ON_TAIL 3757 SLEEP_ON_TAIL
3724} 3758}
3725
3726EXPORT_SYMBOL(sleep_on); 3759EXPORT_SYMBOL(sleep_on);
3727 3760
3728long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) 3761long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
@@ -3752,11 +3785,11 @@ EXPORT_SYMBOL(sleep_on_timeout);
3752 * 3785 *
3753 * Used by the rt_mutex code to implement priority inheritance logic. 3786 * Used by the rt_mutex code to implement priority inheritance logic.
3754 */ 3787 */
3755void rt_mutex_setprio(task_t *p, int prio) 3788void rt_mutex_setprio(struct task_struct *p, int prio)
3756{ 3789{
3790 struct prio_array *array;
3757 unsigned long flags; 3791 unsigned long flags;
3758 prio_array_t *array; 3792 struct rq *rq;
3759 runqueue_t *rq;
3760 int oldprio; 3793 int oldprio;
3761 3794
3762 BUG_ON(prio < 0 || prio > MAX_PRIO); 3795 BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -3793,12 +3826,12 @@ void rt_mutex_setprio(task_t *p, int prio)
3793 3826
3794#endif 3827#endif
3795 3828
3796void set_user_nice(task_t *p, long nice) 3829void set_user_nice(struct task_struct *p, long nice)
3797{ 3830{
3798 unsigned long flags; 3831 struct prio_array *array;
3799 prio_array_t *array;
3800 runqueue_t *rq;
3801 int old_prio, delta; 3832 int old_prio, delta;
3833 unsigned long flags;
3834 struct rq *rq;
3802 3835
3803 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 3836 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
3804 return; 3837 return;
@@ -3849,10 +3882,11 @@ EXPORT_SYMBOL(set_user_nice);
3849 * @p: task 3882 * @p: task
3850 * @nice: nice value 3883 * @nice: nice value
3851 */ 3884 */
3852int can_nice(const task_t *p, const int nice) 3885int can_nice(const struct task_struct *p, const int nice)
3853{ 3886{
3854 /* convert nice value [19,-20] to rlimit style value [1,40] */ 3887 /* convert nice value [19,-20] to rlimit style value [1,40] */
3855 int nice_rlim = 20 - nice; 3888 int nice_rlim = 20 - nice;
3889
3856 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || 3890 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
3857 capable(CAP_SYS_NICE)); 3891 capable(CAP_SYS_NICE));
3858} 3892}
@@ -3868,8 +3902,7 @@ int can_nice(const task_t *p, const int nice)
3868 */ 3902 */
3869asmlinkage long sys_nice(int increment) 3903asmlinkage long sys_nice(int increment)
3870{ 3904{
3871 int retval; 3905 long nice, retval;
3872 long nice;
3873 3906
3874 /* 3907 /*
3875 * Setpriority might change our priority at the same moment. 3908 * Setpriority might change our priority at the same moment.
@@ -3908,7 +3941,7 @@ asmlinkage long sys_nice(int increment)
3908 * RT tasks are offset by -200. Normal tasks are centered 3941 * RT tasks are offset by -200. Normal tasks are centered
3909 * around 0, value goes from -16 to +15. 3942 * around 0, value goes from -16 to +15.
3910 */ 3943 */
3911int task_prio(const task_t *p) 3944int task_prio(const struct task_struct *p)
3912{ 3945{
3913 return p->prio - MAX_RT_PRIO; 3946 return p->prio - MAX_RT_PRIO;
3914} 3947}
@@ -3917,7 +3950,7 @@ int task_prio(const task_t *p)
3917 * task_nice - return the nice value of a given task. 3950 * task_nice - return the nice value of a given task.
3918 * @p: the task in question. 3951 * @p: the task in question.
3919 */ 3952 */
3920int task_nice(const task_t *p) 3953int task_nice(const struct task_struct *p)
3921{ 3954{
3922 return TASK_NICE(p); 3955 return TASK_NICE(p);
3923} 3956}
@@ -3936,7 +3969,7 @@ int idle_cpu(int cpu)
3936 * idle_task - return the idle task for a given cpu. 3969 * idle_task - return the idle task for a given cpu.
3937 * @cpu: the processor in question. 3970 * @cpu: the processor in question.
3938 */ 3971 */
3939task_t *idle_task(int cpu) 3972struct task_struct *idle_task(int cpu)
3940{ 3973{
3941 return cpu_rq(cpu)->idle; 3974 return cpu_rq(cpu)->idle;
3942} 3975}
@@ -3945,7 +3978,7 @@ task_t *idle_task(int cpu)
3945 * find_process_by_pid - find a process with a matching PID value. 3978 * find_process_by_pid - find a process with a matching PID value.
3946 * @pid: the pid in question. 3979 * @pid: the pid in question.
3947 */ 3980 */
3948static inline task_t *find_process_by_pid(pid_t pid) 3981static inline struct task_struct *find_process_by_pid(pid_t pid)
3949{ 3982{
3950 return pid ? find_task_by_pid(pid) : current; 3983 return pid ? find_task_by_pid(pid) : current;
3951} 3984}
@@ -3954,6 +3987,7 @@ static inline task_t *find_process_by_pid(pid_t pid)
3954static void __setscheduler(struct task_struct *p, int policy, int prio) 3987static void __setscheduler(struct task_struct *p, int policy, int prio)
3955{ 3988{
3956 BUG_ON(p->array); 3989 BUG_ON(p->array);
3990
3957 p->policy = policy; 3991 p->policy = policy;
3958 p->rt_priority = prio; 3992 p->rt_priority = prio;
3959 p->normal_prio = normal_prio(p); 3993 p->normal_prio = normal_prio(p);
@@ -3977,11 +4011,10 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3977int sched_setscheduler(struct task_struct *p, int policy, 4011int sched_setscheduler(struct task_struct *p, int policy,
3978 struct sched_param *param) 4012 struct sched_param *param)
3979{ 4013{
3980 int retval; 4014 int retval, oldprio, oldpolicy = -1;
3981 int oldprio, oldpolicy = -1; 4015 struct prio_array *array;
3982 prio_array_t *array;
3983 unsigned long flags; 4016 unsigned long flags;
3984 runqueue_t *rq; 4017 struct rq *rq;
3985 4018
3986 /* may grab non-irq protected spin_locks */ 4019 /* may grab non-irq protected spin_locks */
3987 BUG_ON(in_interrupt()); 4020 BUG_ON(in_interrupt());
@@ -4079,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
4079static int 4112static int
4080do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 4113do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4081{ 4114{
4082 int retval;
4083 struct sched_param lparam; 4115 struct sched_param lparam;
4084 struct task_struct *p; 4116 struct task_struct *p;
4117 int retval;
4085 4118
4086 if (!param || pid < 0) 4119 if (!param || pid < 0)
4087 return -EINVAL; 4120 return -EINVAL;
@@ -4097,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4097 read_unlock_irq(&tasklist_lock); 4130 read_unlock_irq(&tasklist_lock);
4098 retval = sched_setscheduler(p, policy, &lparam); 4131 retval = sched_setscheduler(p, policy, &lparam);
4099 put_task_struct(p); 4132 put_task_struct(p);
4133
4100 return retval; 4134 return retval;
4101} 4135}
4102 4136
@@ -4132,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
4132 */ 4166 */
4133asmlinkage long sys_sched_getscheduler(pid_t pid) 4167asmlinkage long sys_sched_getscheduler(pid_t pid)
4134{ 4168{
4169 struct task_struct *p;
4135 int retval = -EINVAL; 4170 int retval = -EINVAL;
4136 task_t *p;
4137 4171
4138 if (pid < 0) 4172 if (pid < 0)
4139 goto out_nounlock; 4173 goto out_nounlock;
@@ -4160,8 +4194,8 @@ out_nounlock:
4160asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 4194asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
4161{ 4195{
4162 struct sched_param lp; 4196 struct sched_param lp;
4197 struct task_struct *p;
4163 int retval = -EINVAL; 4198 int retval = -EINVAL;
4164 task_t *p;
4165 4199
4166 if (!param || pid < 0) 4200 if (!param || pid < 0)
4167 goto out_nounlock; 4201 goto out_nounlock;
@@ -4194,9 +4228,9 @@ out_unlock:
4194 4228
4195long sched_setaffinity(pid_t pid, cpumask_t new_mask) 4229long sched_setaffinity(pid_t pid, cpumask_t new_mask)
4196{ 4230{
4197 task_t *p;
4198 int retval;
4199 cpumask_t cpus_allowed; 4231 cpumask_t cpus_allowed;
4232 struct task_struct *p;
4233 int retval;
4200 4234
4201 lock_cpu_hotplug(); 4235 lock_cpu_hotplug();
4202 read_lock(&tasklist_lock); 4236 read_lock(&tasklist_lock);
@@ -4282,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
4282 4316
4283long sched_getaffinity(pid_t pid, cpumask_t *mask) 4317long sched_getaffinity(pid_t pid, cpumask_t *mask)
4284{ 4318{
4319 struct task_struct *p;
4285 int retval; 4320 int retval;
4286 task_t *p;
4287 4321
4288 lock_cpu_hotplug(); 4322 lock_cpu_hotplug();
4289 read_lock(&tasklist_lock); 4323 read_lock(&tasklist_lock);
@@ -4342,9 +4376,8 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
4342 */ 4376 */
4343asmlinkage long sys_sched_yield(void) 4377asmlinkage long sys_sched_yield(void)
4344{ 4378{
4345 runqueue_t *rq = this_rq_lock(); 4379 struct rq *rq = this_rq_lock();
4346 prio_array_t *array = current->array; 4380 struct prio_array *array = current->array, *target = rq->expired;
4347 prio_array_t *target = rq->expired;
4348 4381
4349 schedstat_inc(rq, yld_cnt); 4382 schedstat_inc(rq, yld_cnt);
4350 /* 4383 /*
@@ -4378,6 +4411,7 @@ asmlinkage long sys_sched_yield(void)
4378 * no need to preempt or enable interrupts: 4411 * no need to preempt or enable interrupts:
4379 */ 4412 */
4380 __release(rq->lock); 4413 __release(rq->lock);
4414 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
4381 _raw_spin_unlock(&rq->lock); 4415 _raw_spin_unlock(&rq->lock);
4382 preempt_enable_no_resched(); 4416 preempt_enable_no_resched();
4383 4417
@@ -4386,7 +4420,16 @@ asmlinkage long sys_sched_yield(void)
4386 return 0; 4420 return 0;
4387} 4421}
4388 4422
4389static inline void __cond_resched(void) 4423static inline int __resched_legal(void)
4424{
4425 if (unlikely(preempt_count()))
4426 return 0;
4427 if (unlikely(system_state != SYSTEM_RUNNING))
4428 return 0;
4429 return 1;
4430}
4431
4432static void __cond_resched(void)
4390{ 4433{
4391#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 4434#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
4392 __might_sleep(__FILE__, __LINE__); 4435 __might_sleep(__FILE__, __LINE__);
@@ -4396,10 +4439,6 @@ static inline void __cond_resched(void)
4396 * PREEMPT_ACTIVE, which could trigger a second 4439 * PREEMPT_ACTIVE, which could trigger a second
4397 * cond_resched() call. 4440 * cond_resched() call.
4398 */ 4441 */
4399 if (unlikely(preempt_count()))
4400 return;
4401 if (unlikely(system_state != SYSTEM_RUNNING))
4402 return;
4403 do { 4442 do {
4404 add_preempt_count(PREEMPT_ACTIVE); 4443 add_preempt_count(PREEMPT_ACTIVE);
4405 schedule(); 4444 schedule();
@@ -4409,13 +4448,12 @@ static inline void __cond_resched(void)
4409 4448
4410int __sched cond_resched(void) 4449int __sched cond_resched(void)
4411{ 4450{
4412 if (need_resched()) { 4451 if (need_resched() && __resched_legal()) {
4413 __cond_resched(); 4452 __cond_resched();
4414 return 1; 4453 return 1;
4415 } 4454 }
4416 return 0; 4455 return 0;
4417} 4456}
4418
4419EXPORT_SYMBOL(cond_resched); 4457EXPORT_SYMBOL(cond_resched);
4420 4458
4421/* 4459/*
@@ -4436,7 +4474,8 @@ int cond_resched_lock(spinlock_t *lock)
4436 ret = 1; 4474 ret = 1;
4437 spin_lock(lock); 4475 spin_lock(lock);
4438 } 4476 }
4439 if (need_resched()) { 4477 if (need_resched() && __resched_legal()) {
4478 spin_release(&lock->dep_map, 1, _THIS_IP_);
4440 _raw_spin_unlock(lock); 4479 _raw_spin_unlock(lock);
4441 preempt_enable_no_resched(); 4480 preempt_enable_no_resched();
4442 __cond_resched(); 4481 __cond_resched();
@@ -4445,25 +4484,24 @@ int cond_resched_lock(spinlock_t *lock)
4445 } 4484 }
4446 return ret; 4485 return ret;
4447} 4486}
4448
4449EXPORT_SYMBOL(cond_resched_lock); 4487EXPORT_SYMBOL(cond_resched_lock);
4450 4488
4451int __sched cond_resched_softirq(void) 4489int __sched cond_resched_softirq(void)
4452{ 4490{
4453 BUG_ON(!in_softirq()); 4491 BUG_ON(!in_softirq());
4454 4492
4455 if (need_resched()) { 4493 if (need_resched() && __resched_legal()) {
4456 __local_bh_enable(); 4494 raw_local_irq_disable();
4495 _local_bh_enable();
4496 raw_local_irq_enable();
4457 __cond_resched(); 4497 __cond_resched();
4458 local_bh_disable(); 4498 local_bh_disable();
4459 return 1; 4499 return 1;
4460 } 4500 }
4461 return 0; 4501 return 0;
4462} 4502}
4463
4464EXPORT_SYMBOL(cond_resched_softirq); 4503EXPORT_SYMBOL(cond_resched_softirq);
4465 4504
4466
4467/** 4505/**
4468 * yield - yield the current processor to other threads. 4506 * yield - yield the current processor to other threads.
4469 * 4507 *
@@ -4475,7 +4513,6 @@ void __sched yield(void)
4475 set_current_state(TASK_RUNNING); 4513 set_current_state(TASK_RUNNING);
4476 sys_sched_yield(); 4514 sys_sched_yield();
4477} 4515}
4478
4479EXPORT_SYMBOL(yield); 4516EXPORT_SYMBOL(yield);
4480 4517
4481/* 4518/*
@@ -4487,18 +4524,17 @@ EXPORT_SYMBOL(yield);
4487 */ 4524 */
4488void __sched io_schedule(void) 4525void __sched io_schedule(void)
4489{ 4526{
4490 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4527 struct rq *rq = &__raw_get_cpu_var(runqueues);
4491 4528
4492 atomic_inc(&rq->nr_iowait); 4529 atomic_inc(&rq->nr_iowait);
4493 schedule(); 4530 schedule();
4494 atomic_dec(&rq->nr_iowait); 4531 atomic_dec(&rq->nr_iowait);
4495} 4532}
4496
4497EXPORT_SYMBOL(io_schedule); 4533EXPORT_SYMBOL(io_schedule);
4498 4534
4499long __sched io_schedule_timeout(long timeout) 4535long __sched io_schedule_timeout(long timeout)
4500{ 4536{
4501 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4537 struct rq *rq = &__raw_get_cpu_var(runqueues);
4502 long ret; 4538 long ret;
4503 4539
4504 atomic_inc(&rq->nr_iowait); 4540 atomic_inc(&rq->nr_iowait);
@@ -4565,9 +4601,9 @@ asmlinkage long sys_sched_get_priority_min(int policy)
4565asmlinkage 4601asmlinkage
4566long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 4602long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
4567{ 4603{
4604 struct task_struct *p;
4568 int retval = -EINVAL; 4605 int retval = -EINVAL;
4569 struct timespec t; 4606 struct timespec t;
4570 task_t *p;
4571 4607
4572 if (pid < 0) 4608 if (pid < 0)
4573 goto out_nounlock; 4609 goto out_nounlock;
@@ -4595,28 +4631,32 @@ out_unlock:
4595 4631
4596static inline struct task_struct *eldest_child(struct task_struct *p) 4632static inline struct task_struct *eldest_child(struct task_struct *p)
4597{ 4633{
4598 if (list_empty(&p->children)) return NULL; 4634 if (list_empty(&p->children))
4635 return NULL;
4599 return list_entry(p->children.next,struct task_struct,sibling); 4636 return list_entry(p->children.next,struct task_struct,sibling);
4600} 4637}
4601 4638
4602static inline struct task_struct *older_sibling(struct task_struct *p) 4639static inline struct task_struct *older_sibling(struct task_struct *p)
4603{ 4640{
4604 if (p->sibling.prev==&p->parent->children) return NULL; 4641 if (p->sibling.prev==&p->parent->children)
4642 return NULL;
4605 return list_entry(p->sibling.prev,struct task_struct,sibling); 4643 return list_entry(p->sibling.prev,struct task_struct,sibling);
4606} 4644}
4607 4645
4608static inline struct task_struct *younger_sibling(struct task_struct *p) 4646static inline struct task_struct *younger_sibling(struct task_struct *p)
4609{ 4647{
4610 if (p->sibling.next==&p->parent->children) return NULL; 4648 if (p->sibling.next==&p->parent->children)
4649 return NULL;
4611 return list_entry(p->sibling.next,struct task_struct,sibling); 4650 return list_entry(p->sibling.next,struct task_struct,sibling);
4612} 4651}
4613 4652
4614static void show_task(task_t *p) 4653static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
4654
4655static void show_task(struct task_struct *p)
4615{ 4656{
4616 task_t *relative; 4657 struct task_struct *relative;
4617 unsigned state;
4618 unsigned long free = 0; 4658 unsigned long free = 0;
4619 static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; 4659 unsigned state;
4620 4660
4621 printk("%-13.13s ", p->comm); 4661 printk("%-13.13s ", p->comm);
4622 state = p->state ? __ffs(p->state) + 1 : 0; 4662 state = p->state ? __ffs(p->state) + 1 : 0;
@@ -4667,7 +4707,7 @@ static void show_task(task_t *p)
4667 4707
4668void show_state(void) 4708void show_state(void)
4669{ 4709{
4670 task_t *g, *p; 4710 struct task_struct *g, *p;
4671 4711
4672#if (BITS_PER_LONG == 32) 4712#if (BITS_PER_LONG == 32)
4673 printk("\n" 4713 printk("\n"
@@ -4689,7 +4729,7 @@ void show_state(void)
4689 } while_each_thread(g, p); 4729 } while_each_thread(g, p);
4690 4730
4691 read_unlock(&tasklist_lock); 4731 read_unlock(&tasklist_lock);
4692 mutex_debug_show_all_locks(); 4732 debug_show_all_locks();
4693} 4733}
4694 4734
4695/** 4735/**
@@ -4700,9 +4740,9 @@ void show_state(void)
4700 * NOTE: this function does not set the idle thread's NEED_RESCHED 4740 * NOTE: this function does not set the idle thread's NEED_RESCHED
4701 * flag, to make booting more robust. 4741 * flag, to make booting more robust.
4702 */ 4742 */
4703void __devinit init_idle(task_t *idle, int cpu) 4743void __devinit init_idle(struct task_struct *idle, int cpu)
4704{ 4744{
4705 runqueue_t *rq = cpu_rq(cpu); 4745 struct rq *rq = cpu_rq(cpu);
4706 unsigned long flags; 4746 unsigned long flags;
4707 4747
4708 idle->timestamp = sched_clock(); 4748 idle->timestamp = sched_clock();
@@ -4741,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4741/* 4781/*
4742 * This is how migration works: 4782 * This is how migration works:
4743 * 4783 *
4744 * 1) we queue a migration_req_t structure in the source CPU's 4784 * 1) we queue a struct migration_req structure in the source CPU's
4745 * runqueue and wake up that CPU's migration thread. 4785 * runqueue and wake up that CPU's migration thread.
4746 * 2) we down() the locked semaphore => thread blocks. 4786 * 2) we down() the locked semaphore => thread blocks.
4747 * 3) migration thread wakes up (implicitly it forces the migrated 4787 * 3) migration thread wakes up (implicitly it forces the migrated
@@ -4763,12 +4803,12 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4763 * task must not exit() & deallocate itself prematurely. The 4803 * task must not exit() & deallocate itself prematurely. The
4764 * call is not atomic; no spinlocks may be held. 4804 * call is not atomic; no spinlocks may be held.
4765 */ 4805 */
4766int set_cpus_allowed(task_t *p, cpumask_t new_mask) 4806int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
4767{ 4807{
4808 struct migration_req req;
4768 unsigned long flags; 4809 unsigned long flags;
4810 struct rq *rq;
4769 int ret = 0; 4811 int ret = 0;
4770 migration_req_t req;
4771 runqueue_t *rq;
4772 4812
4773 rq = task_rq_lock(p, &flags); 4813 rq = task_rq_lock(p, &flags);
4774 if (!cpus_intersects(new_mask, cpu_online_map)) { 4814 if (!cpus_intersects(new_mask, cpu_online_map)) {
@@ -4791,9 +4831,9 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
4791 } 4831 }
4792out: 4832out:
4793 task_rq_unlock(rq, &flags); 4833 task_rq_unlock(rq, &flags);
4834
4794 return ret; 4835 return ret;
4795} 4836}
4796
4797EXPORT_SYMBOL_GPL(set_cpus_allowed); 4837EXPORT_SYMBOL_GPL(set_cpus_allowed);
4798 4838
4799/* 4839/*
@@ -4809,7 +4849,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
4809 */ 4849 */
4810static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 4850static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4811{ 4851{
4812 runqueue_t *rq_dest, *rq_src; 4852 struct rq *rq_dest, *rq_src;
4813 int ret = 0; 4853 int ret = 0;
4814 4854
4815 if (unlikely(cpu_is_offline(dest_cpu))) 4855 if (unlikely(cpu_is_offline(dest_cpu)))
@@ -4854,16 +4894,16 @@ out:
4854 */ 4894 */
4855static int migration_thread(void *data) 4895static int migration_thread(void *data)
4856{ 4896{
4857 runqueue_t *rq;
4858 int cpu = (long)data; 4897 int cpu = (long)data;
4898 struct rq *rq;
4859 4899
4860 rq = cpu_rq(cpu); 4900 rq = cpu_rq(cpu);
4861 BUG_ON(rq->migration_thread != current); 4901 BUG_ON(rq->migration_thread != current);
4862 4902
4863 set_current_state(TASK_INTERRUPTIBLE); 4903 set_current_state(TASK_INTERRUPTIBLE);
4864 while (!kthread_should_stop()) { 4904 while (!kthread_should_stop()) {
4905 struct migration_req *req;
4865 struct list_head *head; 4906 struct list_head *head;
4866 migration_req_t *req;
4867 4907
4868 try_to_freeze(); 4908 try_to_freeze();
4869 4909
@@ -4887,7 +4927,7 @@ static int migration_thread(void *data)
4887 set_current_state(TASK_INTERRUPTIBLE); 4927 set_current_state(TASK_INTERRUPTIBLE);
4888 continue; 4928 continue;
4889 } 4929 }
4890 req = list_entry(head->next, migration_req_t, list); 4930 req = list_entry(head->next, struct migration_req, list);
4891 list_del_init(head->next); 4931 list_del_init(head->next);
4892 4932
4893 spin_unlock(&rq->lock); 4933 spin_unlock(&rq->lock);
@@ -4912,28 +4952,28 @@ wait_to_die:
4912 4952
4913#ifdef CONFIG_HOTPLUG_CPU 4953#ifdef CONFIG_HOTPLUG_CPU
4914/* Figure out where task on dead CPU should go, use force if neccessary. */ 4954/* Figure out where task on dead CPU should go, use force if neccessary. */
4915static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) 4955static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
4916{ 4956{
4917 runqueue_t *rq;
4918 unsigned long flags; 4957 unsigned long flags;
4919 int dest_cpu;
4920 cpumask_t mask; 4958 cpumask_t mask;
4959 struct rq *rq;
4960 int dest_cpu;
4921 4961
4922restart: 4962restart:
4923 /* On same node? */ 4963 /* On same node? */
4924 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 4964 mask = node_to_cpumask(cpu_to_node(dead_cpu));
4925 cpus_and(mask, mask, tsk->cpus_allowed); 4965 cpus_and(mask, mask, p->cpus_allowed);
4926 dest_cpu = any_online_cpu(mask); 4966 dest_cpu = any_online_cpu(mask);
4927 4967
4928 /* On any allowed CPU? */ 4968 /* On any allowed CPU? */
4929 if (dest_cpu == NR_CPUS) 4969 if (dest_cpu == NR_CPUS)
4930 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4970 dest_cpu = any_online_cpu(p->cpus_allowed);
4931 4971
4932 /* No more Mr. Nice Guy. */ 4972 /* No more Mr. Nice Guy. */
4933 if (dest_cpu == NR_CPUS) { 4973 if (dest_cpu == NR_CPUS) {
4934 rq = task_rq_lock(tsk, &flags); 4974 rq = task_rq_lock(p, &flags);
4935 cpus_setall(tsk->cpus_allowed); 4975 cpus_setall(p->cpus_allowed);
4936 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4976 dest_cpu = any_online_cpu(p->cpus_allowed);
4937 task_rq_unlock(rq, &flags); 4977 task_rq_unlock(rq, &flags);
4938 4978
4939 /* 4979 /*
@@ -4941,12 +4981,12 @@ restart:
4941 * kernel threads (both mm NULL), since they never 4981 * kernel threads (both mm NULL), since they never
4942 * leave kernel. 4982 * leave kernel.
4943 */ 4983 */
4944 if (tsk->mm && printk_ratelimit()) 4984 if (p->mm && printk_ratelimit())
4945 printk(KERN_INFO "process %d (%s) no " 4985 printk(KERN_INFO "process %d (%s) no "
4946 "longer affine to cpu%d\n", 4986 "longer affine to cpu%d\n",
4947 tsk->pid, tsk->comm, dead_cpu); 4987 p->pid, p->comm, dead_cpu);
4948 } 4988 }
4949 if (!__migrate_task(tsk, dead_cpu, dest_cpu)) 4989 if (!__migrate_task(p, dead_cpu, dest_cpu))
4950 goto restart; 4990 goto restart;
4951} 4991}
4952 4992
@@ -4957,9 +4997,9 @@ restart:
4957 * their home CPUs. So we just add the counter to another CPU's counter, 4997 * their home CPUs. So we just add the counter to another CPU's counter,
4958 * to keep the global sum constant after CPU-down: 4998 * to keep the global sum constant after CPU-down:
4959 */ 4999 */
4960static void migrate_nr_uninterruptible(runqueue_t *rq_src) 5000static void migrate_nr_uninterruptible(struct rq *rq_src)
4961{ 5001{
4962 runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); 5002 struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
4963 unsigned long flags; 5003 unsigned long flags;
4964 5004
4965 local_irq_save(flags); 5005 local_irq_save(flags);
@@ -4973,48 +5013,51 @@ static void migrate_nr_uninterruptible(runqueue_t *rq_src)
4973/* Run through task list and migrate tasks from the dead cpu. */ 5013/* Run through task list and migrate tasks from the dead cpu. */
4974static void migrate_live_tasks(int src_cpu) 5014static void migrate_live_tasks(int src_cpu)
4975{ 5015{
4976 struct task_struct *tsk, *t; 5016 struct task_struct *p, *t;
4977 5017
4978 write_lock_irq(&tasklist_lock); 5018 write_lock_irq(&tasklist_lock);
4979 5019
4980 do_each_thread(t, tsk) { 5020 do_each_thread(t, p) {
4981 if (tsk == current) 5021 if (p == current)
4982 continue; 5022 continue;
4983 5023
4984 if (task_cpu(tsk) == src_cpu) 5024 if (task_cpu(p) == src_cpu)
4985 move_task_off_dead_cpu(src_cpu, tsk); 5025 move_task_off_dead_cpu(src_cpu, p);
4986 } while_each_thread(t, tsk); 5026 } while_each_thread(t, p);
4987 5027
4988 write_unlock_irq(&tasklist_lock); 5028 write_unlock_irq(&tasklist_lock);
4989} 5029}
4990 5030
4991/* Schedules idle task to be the next runnable task on current CPU. 5031/* Schedules idle task to be the next runnable task on current CPU.
4992 * It does so by boosting its priority to highest possible and adding it to 5032 * It does so by boosting its priority to highest possible and adding it to
4993 * the _front_ of runqueue. Used by CPU offline code. 5033 * the _front_ of the runqueue. Used by CPU offline code.
4994 */ 5034 */
4995void sched_idle_next(void) 5035void sched_idle_next(void)
4996{ 5036{
4997 int cpu = smp_processor_id(); 5037 int this_cpu = smp_processor_id();
4998 runqueue_t *rq = this_rq(); 5038 struct rq *rq = cpu_rq(this_cpu);
4999 struct task_struct *p = rq->idle; 5039 struct task_struct *p = rq->idle;
5000 unsigned long flags; 5040 unsigned long flags;
5001 5041
5002 /* cpu has to be offline */ 5042 /* cpu has to be offline */
5003 BUG_ON(cpu_online(cpu)); 5043 BUG_ON(cpu_online(this_cpu));
5004 5044
5005 /* Strictly not necessary since rest of the CPUs are stopped by now 5045 /*
5006 * and interrupts disabled on current cpu. 5046 * Strictly not necessary since rest of the CPUs are stopped by now
5047 * and interrupts disabled on the current cpu.
5007 */ 5048 */
5008 spin_lock_irqsave(&rq->lock, flags); 5049 spin_lock_irqsave(&rq->lock, flags);
5009 5050
5010 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); 5051 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
5011 /* Add idle task to _front_ of it's priority queue */ 5052
5053 /* Add idle task to the _front_ of its priority queue: */
5012 __activate_idle_task(p, rq); 5054 __activate_idle_task(p, rq);
5013 5055
5014 spin_unlock_irqrestore(&rq->lock, flags); 5056 spin_unlock_irqrestore(&rq->lock, flags);
5015} 5057}
5016 5058
5017/* Ensures that the idle task is using init_mm right before its cpu goes 5059/*
5060 * Ensures that the idle task is using init_mm right before its cpu goes
5018 * offline. 5061 * offline.
5019 */ 5062 */
5020void idle_task_exit(void) 5063void idle_task_exit(void)
@@ -5028,17 +5071,17 @@ void idle_task_exit(void)
5028 mmdrop(mm); 5071 mmdrop(mm);
5029} 5072}
5030 5073
5031static void migrate_dead(unsigned int dead_cpu, task_t *tsk) 5074static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
5032{ 5075{
5033 struct runqueue *rq = cpu_rq(dead_cpu); 5076 struct rq *rq = cpu_rq(dead_cpu);
5034 5077
5035 /* Must be exiting, otherwise would be on tasklist. */ 5078 /* Must be exiting, otherwise would be on tasklist. */
5036 BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); 5079 BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
5037 5080
5038 /* Cannot have done final schedule yet: would have vanished. */ 5081 /* Cannot have done final schedule yet: would have vanished. */
5039 BUG_ON(tsk->flags & PF_DEAD); 5082 BUG_ON(p->flags & PF_DEAD);
5040 5083
5041 get_task_struct(tsk); 5084 get_task_struct(p);
5042 5085
5043 /* 5086 /*
5044 * Drop lock around migration; if someone else moves it, 5087 * Drop lock around migration; if someone else moves it,
@@ -5046,25 +5089,25 @@ static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
5046 * fine. 5089 * fine.
5047 */ 5090 */
5048 spin_unlock_irq(&rq->lock); 5091 spin_unlock_irq(&rq->lock);
5049 move_task_off_dead_cpu(dead_cpu, tsk); 5092 move_task_off_dead_cpu(dead_cpu, p);
5050 spin_lock_irq(&rq->lock); 5093 spin_lock_irq(&rq->lock);
5051 5094
5052 put_task_struct(tsk); 5095 put_task_struct(p);
5053} 5096}
5054 5097
5055/* release_task() removes task from tasklist, so we won't find dead tasks. */ 5098/* release_task() removes task from tasklist, so we won't find dead tasks. */
5056static void migrate_dead_tasks(unsigned int dead_cpu) 5099static void migrate_dead_tasks(unsigned int dead_cpu)
5057{ 5100{
5058 unsigned arr, i; 5101 struct rq *rq = cpu_rq(dead_cpu);
5059 struct runqueue *rq = cpu_rq(dead_cpu); 5102 unsigned int arr, i;
5060 5103
5061 for (arr = 0; arr < 2; arr++) { 5104 for (arr = 0; arr < 2; arr++) {
5062 for (i = 0; i < MAX_PRIO; i++) { 5105 for (i = 0; i < MAX_PRIO; i++) {
5063 struct list_head *list = &rq->arrays[arr].queue[i]; 5106 struct list_head *list = &rq->arrays[arr].queue[i];
5107
5064 while (!list_empty(list)) 5108 while (!list_empty(list))
5065 migrate_dead(dead_cpu, 5109 migrate_dead(dead_cpu, list_entry(list->next,
5066 list_entry(list->next, task_t, 5110 struct task_struct, run_list));
5067 run_list));
5068 } 5111 }
5069 } 5112 }
5070} 5113}
@@ -5074,14 +5117,13 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5074 * migration_call - callback that gets triggered when a CPU is added. 5117 * migration_call - callback that gets triggered when a CPU is added.
5075 * Here we can start up the necessary migration thread for the new CPU. 5118 * Here we can start up the necessary migration thread for the new CPU.
5076 */ 5119 */
5077static int __cpuinit migration_call(struct notifier_block *nfb, 5120static int __cpuinit
5078 unsigned long action, 5121migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5079 void *hcpu)
5080{ 5122{
5081 int cpu = (long)hcpu;
5082 struct task_struct *p; 5123 struct task_struct *p;
5083 struct runqueue *rq; 5124 int cpu = (long)hcpu;
5084 unsigned long flags; 5125 unsigned long flags;
5126 struct rq *rq;
5085 5127
5086 switch (action) { 5128 switch (action) {
5087 case CPU_UP_PREPARE: 5129 case CPU_UP_PREPARE:
@@ -5096,10 +5138,12 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5096 task_rq_unlock(rq, &flags); 5138 task_rq_unlock(rq, &flags);
5097 cpu_rq(cpu)->migration_thread = p; 5139 cpu_rq(cpu)->migration_thread = p;
5098 break; 5140 break;
5141
5099 case CPU_ONLINE: 5142 case CPU_ONLINE:
5100 /* Strictly unneccessary, as first user will wake it. */ 5143 /* Strictly unneccessary, as first user will wake it. */
5101 wake_up_process(cpu_rq(cpu)->migration_thread); 5144 wake_up_process(cpu_rq(cpu)->migration_thread);
5102 break; 5145 break;
5146
5103#ifdef CONFIG_HOTPLUG_CPU 5147#ifdef CONFIG_HOTPLUG_CPU
5104 case CPU_UP_CANCELED: 5148 case CPU_UP_CANCELED:
5105 if (!cpu_rq(cpu)->migration_thread) 5149 if (!cpu_rq(cpu)->migration_thread)
@@ -5110,6 +5154,7 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5110 kthread_stop(cpu_rq(cpu)->migration_thread); 5154 kthread_stop(cpu_rq(cpu)->migration_thread);
5111 cpu_rq(cpu)->migration_thread = NULL; 5155 cpu_rq(cpu)->migration_thread = NULL;
5112 break; 5156 break;
5157
5113 case CPU_DEAD: 5158 case CPU_DEAD:
5114 migrate_live_tasks(cpu); 5159 migrate_live_tasks(cpu);
5115 rq = cpu_rq(cpu); 5160 rq = cpu_rq(cpu);
@@ -5130,9 +5175,10 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5130 * the requestors. */ 5175 * the requestors. */
5131 spin_lock_irq(&rq->lock); 5176 spin_lock_irq(&rq->lock);
5132 while (!list_empty(&rq->migration_queue)) { 5177 while (!list_empty(&rq->migration_queue)) {
5133 migration_req_t *req; 5178 struct migration_req *req;
5179
5134 req = list_entry(rq->migration_queue.next, 5180 req = list_entry(rq->migration_queue.next,
5135 migration_req_t, list); 5181 struct migration_req, list);
5136 list_del_init(&req->list); 5182 list_del_init(&req->list);
5137 complete(&req->done); 5183 complete(&req->done);
5138 } 5184 }
@@ -5154,10 +5200,12 @@ static struct notifier_block __cpuinitdata migration_notifier = {
5154int __init migration_init(void) 5200int __init migration_init(void)
5155{ 5201{
5156 void *cpu = (void *)(long)smp_processor_id(); 5202 void *cpu = (void *)(long)smp_processor_id();
5157 /* Start one for boot CPU. */ 5203
5204 /* Start one for the boot CPU: */
5158 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); 5205 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
5159 migration_call(&migration_notifier, CPU_ONLINE, cpu); 5206 migration_call(&migration_notifier, CPU_ONLINE, cpu);
5160 register_cpu_notifier(&migration_notifier); 5207 register_cpu_notifier(&migration_notifier);
5208
5161 return 0; 5209 return 0;
5162} 5210}
5163#endif 5211#endif
@@ -5253,7 +5301,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
5253 } while (sd); 5301 } while (sd);
5254} 5302}
5255#else 5303#else
5256#define sched_domain_debug(sd, cpu) {} 5304# define sched_domain_debug(sd, cpu) do { } while (0)
5257#endif 5305#endif
5258 5306
5259static int sd_degenerate(struct sched_domain *sd) 5307static int sd_degenerate(struct sched_domain *sd)
@@ -5279,8 +5327,8 @@ static int sd_degenerate(struct sched_domain *sd)
5279 return 1; 5327 return 1;
5280} 5328}
5281 5329
5282static int sd_parent_degenerate(struct sched_domain *sd, 5330static int
5283 struct sched_domain *parent) 5331sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
5284{ 5332{
5285 unsigned long cflags = sd->flags, pflags = parent->flags; 5333 unsigned long cflags = sd->flags, pflags = parent->flags;
5286 5334
@@ -5313,7 +5361,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,
5313 */ 5361 */
5314static void cpu_attach_domain(struct sched_domain *sd, int cpu) 5362static void cpu_attach_domain(struct sched_domain *sd, int cpu)
5315{ 5363{
5316 runqueue_t *rq = cpu_rq(cpu); 5364 struct rq *rq = cpu_rq(cpu);
5317 struct sched_domain *tmp; 5365 struct sched_domain *tmp;
5318 5366
5319 /* Remove the sched domains which do not contribute to scheduling. */ 5367 /* Remove the sched domains which do not contribute to scheduling. */
@@ -5575,8 +5623,8 @@ static void touch_cache(void *__cache, unsigned long __size)
5575/* 5623/*
5576 * Measure the cache-cost of one task migration. Returns in units of nsec. 5624 * Measure the cache-cost of one task migration. Returns in units of nsec.
5577 */ 5625 */
5578static unsigned long long measure_one(void *cache, unsigned long size, 5626static unsigned long long
5579 int source, int target) 5627measure_one(void *cache, unsigned long size, int source, int target)
5580{ 5628{
5581 cpumask_t mask, saved_mask; 5629 cpumask_t mask, saved_mask;
5582 unsigned long long t0, t1, t2, t3, cost; 5630 unsigned long long t0, t1, t2, t3, cost;
@@ -5926,9 +5974,9 @@ static int find_next_best_node(int node, unsigned long *used_nodes)
5926 */ 5974 */
5927static cpumask_t sched_domain_node_span(int node) 5975static cpumask_t sched_domain_node_span(int node)
5928{ 5976{
5929 int i;
5930 cpumask_t span, nodemask;
5931 DECLARE_BITMAP(used_nodes, MAX_NUMNODES); 5977 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
5978 cpumask_t span, nodemask;
5979 int i;
5932 5980
5933 cpus_clear(span); 5981 cpus_clear(span);
5934 bitmap_zero(used_nodes, MAX_NUMNODES); 5982 bitmap_zero(used_nodes, MAX_NUMNODES);
@@ -5939,6 +5987,7 @@ static cpumask_t sched_domain_node_span(int node)
5939 5987
5940 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { 5988 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
5941 int next_node = find_next_best_node(node, used_nodes); 5989 int next_node = find_next_best_node(node, used_nodes);
5990
5942 nodemask = node_to_cpumask(next_node); 5991 nodemask = node_to_cpumask(next_node);
5943 cpus_or(span, span, nodemask); 5992 cpus_or(span, span, nodemask);
5944 } 5993 }
@@ -5948,19 +5997,23 @@ static cpumask_t sched_domain_node_span(int node)
5948#endif 5997#endif
5949 5998
5950int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 5999int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6000
5951/* 6001/*
5952 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we 6002 * SMT sched-domains:
5953 * can switch it on easily if needed.
5954 */ 6003 */
5955#ifdef CONFIG_SCHED_SMT 6004#ifdef CONFIG_SCHED_SMT
5956static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 6005static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
5957static struct sched_group sched_group_cpus[NR_CPUS]; 6006static struct sched_group sched_group_cpus[NR_CPUS];
6007
5958static int cpu_to_cpu_group(int cpu) 6008static int cpu_to_cpu_group(int cpu)
5959{ 6009{
5960 return cpu; 6010 return cpu;
5961} 6011}
5962#endif 6012#endif
5963 6013
6014/*
6015 * multi-core sched-domains:
6016 */
5964#ifdef CONFIG_SCHED_MC 6017#ifdef CONFIG_SCHED_MC
5965static DEFINE_PER_CPU(struct sched_domain, core_domains); 6018static DEFINE_PER_CPU(struct sched_domain, core_domains);
5966static struct sched_group *sched_group_core_bycpu[NR_CPUS]; 6019static struct sched_group *sched_group_core_bycpu[NR_CPUS];
@@ -5980,9 +6033,10 @@ static int cpu_to_core_group(int cpu)
5980 6033
5981static DEFINE_PER_CPU(struct sched_domain, phys_domains); 6034static DEFINE_PER_CPU(struct sched_domain, phys_domains);
5982static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; 6035static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
6036
5983static int cpu_to_phys_group(int cpu) 6037static int cpu_to_phys_group(int cpu)
5984{ 6038{
5985#if defined(CONFIG_SCHED_MC) 6039#ifdef CONFIG_SCHED_MC
5986 cpumask_t mask = cpu_coregroup_map(cpu); 6040 cpumask_t mask = cpu_coregroup_map(cpu);
5987 return first_cpu(mask); 6041 return first_cpu(mask);
5988#elif defined(CONFIG_SCHED_SMT) 6042#elif defined(CONFIG_SCHED_SMT)
@@ -6528,6 +6582,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
6528int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) 6582int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
6529{ 6583{
6530 int err = 0; 6584 int err = 0;
6585
6531#ifdef CONFIG_SCHED_SMT 6586#ifdef CONFIG_SCHED_SMT
6532 if (smt_capable()) 6587 if (smt_capable())
6533 err = sysfs_create_file(&cls->kset.kobj, 6588 err = sysfs_create_file(&cls->kset.kobj,
@@ -6547,7 +6602,8 @@ static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
6547{ 6602{
6548 return sprintf(page, "%u\n", sched_mc_power_savings); 6603 return sprintf(page, "%u\n", sched_mc_power_savings);
6549} 6604}
6550static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6605static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
6606 const char *buf, size_t count)
6551{ 6607{
6552 return sched_power_savings_store(buf, count, 0); 6608 return sched_power_savings_store(buf, count, 0);
6553} 6609}
@@ -6560,7 +6616,8 @@ static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
6560{ 6616{
6561 return sprintf(page, "%u\n", sched_smt_power_savings); 6617 return sprintf(page, "%u\n", sched_smt_power_savings);
6562} 6618}
6563static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6619static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
6620 const char *buf, size_t count)
6564{ 6621{
6565 return sched_power_savings_store(buf, count, 1); 6622 return sched_power_savings_store(buf, count, 1);
6566} 6623}
@@ -6622,6 +6679,7 @@ int in_sched_functions(unsigned long addr)
6622{ 6679{
6623 /* Linker adds these: start and end of __sched functions */ 6680 /* Linker adds these: start and end of __sched functions */
6624 extern char __sched_text_start[], __sched_text_end[]; 6681 extern char __sched_text_start[], __sched_text_end[];
6682
6625 return in_lock_functions(addr) || 6683 return in_lock_functions(addr) ||
6626 (addr >= (unsigned long)__sched_text_start 6684 (addr >= (unsigned long)__sched_text_start
6627 && addr < (unsigned long)__sched_text_end); 6685 && addr < (unsigned long)__sched_text_end);
@@ -6629,14 +6687,15 @@ int in_sched_functions(unsigned long addr)
6629 6687
6630void __init sched_init(void) 6688void __init sched_init(void)
6631{ 6689{
6632 runqueue_t *rq;
6633 int i, j, k; 6690 int i, j, k;
6634 6691
6635 for_each_possible_cpu(i) { 6692 for_each_possible_cpu(i) {
6636 prio_array_t *array; 6693 struct prio_array *array;
6694 struct rq *rq;
6637 6695
6638 rq = cpu_rq(i); 6696 rq = cpu_rq(i);
6639 spin_lock_init(&rq->lock); 6697 spin_lock_init(&rq->lock);
6698 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
6640 rq->nr_running = 0; 6699 rq->nr_running = 0;
6641 rq->active = rq->arrays; 6700 rq->active = rq->arrays;
6642 rq->expired = rq->arrays + 1; 6701 rq->expired = rq->arrays + 1;
@@ -6683,7 +6742,7 @@ void __init sched_init(void)
6683#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6742#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
6684void __might_sleep(char *file, int line) 6743void __might_sleep(char *file, int line)
6685{ 6744{
6686#if defined(in_atomic) 6745#ifdef in_atomic
6687 static unsigned long prev_jiffy; /* ratelimiting */ 6746 static unsigned long prev_jiffy; /* ratelimiting */
6688 6747
6689 if ((in_atomic() || irqs_disabled()) && 6748 if ((in_atomic() || irqs_disabled()) &&
@@ -6705,10 +6764,10 @@ EXPORT_SYMBOL(__might_sleep);
6705#ifdef CONFIG_MAGIC_SYSRQ 6764#ifdef CONFIG_MAGIC_SYSRQ
6706void normalize_rt_tasks(void) 6765void normalize_rt_tasks(void)
6707{ 6766{
6767 struct prio_array *array;
6708 struct task_struct *p; 6768 struct task_struct *p;
6709 prio_array_t *array;
6710 unsigned long flags; 6769 unsigned long flags;
6711 runqueue_t *rq; 6770 struct rq *rq;
6712 6771
6713 read_lock_irq(&tasklist_lock); 6772 read_lock_irq(&tasklist_lock);
6714 for_each_process(p) { 6773 for_each_process(p) {
@@ -6752,7 +6811,7 @@ void normalize_rt_tasks(void)
6752 * 6811 *
6753 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6812 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6754 */ 6813 */
6755task_t *curr_task(int cpu) 6814struct task_struct *curr_task(int cpu)
6756{ 6815{
6757 return cpu_curr(cpu); 6816 return cpu_curr(cpu);
6758} 6817}
@@ -6772,7 +6831,7 @@ task_t *curr_task(int cpu)
6772 * 6831 *
6773 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6832 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6774 */ 6833 */
6775void set_curr_task(int cpu, task_t *p) 6834void set_curr_task(int cpu, struct task_struct *p)
6776{ 6835{
6777 cpu_curr(cpu) = p; 6836 cpu_curr(cpu) = p;
6778} 6837}
diff --git a/kernel/signal.c b/kernel/signal.c
index 52adf53929f6..7fe874d12fae 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -10,7 +10,6 @@
10 * to allow signals to be sent reliably. 10 * to allow signals to be sent reliably.
11 */ 11 */
12 12
13#include <linux/config.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
15#include <linux/module.h> 14#include <linux/module.h>
16#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
@@ -584,7 +583,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
584 && !capable(CAP_KILL)) 583 && !capable(CAP_KILL))
585 return error; 584 return error;
586 585
587 error = security_task_kill(t, info, sig); 586 error = security_task_kill(t, info, sig, 0);
588 if (!error) 587 if (!error)
589 audit_signal_info(sig, t); /* Let audit system see the signal */ 588 audit_signal_info(sig, t); /* Let audit system see the signal */
590 return error; 589 return error;
@@ -1107,7 +1106,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1107 1106
1108/* like kill_proc_info(), but doesn't use uid/euid of "current" */ 1107/* like kill_proc_info(), but doesn't use uid/euid of "current" */
1109int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid, 1108int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
1110 uid_t uid, uid_t euid) 1109 uid_t uid, uid_t euid, u32 secid)
1111{ 1110{
1112 int ret = -EINVAL; 1111 int ret = -EINVAL;
1113 struct task_struct *p; 1112 struct task_struct *p;
@@ -1127,6 +1126,9 @@ int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
1127 ret = -EPERM; 1126 ret = -EPERM;
1128 goto out_unlock; 1127 goto out_unlock;
1129 } 1128 }
1129 ret = security_task_kill(p, info, sig, secid);
1130 if (ret)
1131 goto out_unlock;
1130 if (sig && p->sighand) { 1132 if (sig && p->sighand) {
1131 unsigned long flags; 1133 unsigned long flags;
1132 spin_lock_irqsave(&p->sighand->siglock, flags); 1134 spin_lock_irqsave(&p->sighand->siglock, flags);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8f03e3b89b55..215541e26c1a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -62,6 +62,119 @@ static inline void wakeup_softirqd(void)
62} 62}
63 63
64/* 64/*
65 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately:
67 */
68static void __local_bh_disable(unsigned long ip)
69{
70 unsigned long flags;
71
72 WARN_ON_ONCE(in_irq());
73
74 raw_local_irq_save(flags);
75 add_preempt_count(SOFTIRQ_OFFSET);
76 /*
77 * Were softirqs turned off above:
78 */
79 if (softirq_count() == SOFTIRQ_OFFSET)
80 trace_softirqs_off(ip);
81 raw_local_irq_restore(flags);
82}
83
84void local_bh_disable(void)
85{
86 __local_bh_disable((unsigned long)__builtin_return_address(0));
87}
88
89EXPORT_SYMBOL(local_bh_disable);
90
91void __local_bh_enable(void)
92{
93 WARN_ON_ONCE(in_irq());
94
95 /*
96 * softirqs should never be enabled by __local_bh_enable(),
97 * it always nests inside local_bh_enable() sections:
98 */
99 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
100
101 sub_preempt_count(SOFTIRQ_OFFSET);
102}
103EXPORT_SYMBOL_GPL(__local_bh_enable);
104
105/*
106 * Special-case - softirqs can safely be enabled in
107 * cond_resched_softirq(), or by __do_softirq(),
108 * without processing still-pending softirqs:
109 */
110void _local_bh_enable(void)
111{
112 WARN_ON_ONCE(in_irq());
113 WARN_ON_ONCE(!irqs_disabled());
114
115 if (softirq_count() == SOFTIRQ_OFFSET)
116 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117 sub_preempt_count(SOFTIRQ_OFFSET);
118}
119
120EXPORT_SYMBOL(_local_bh_enable);
121
122void local_bh_enable(void)
123{
124 unsigned long flags;
125
126 WARN_ON_ONCE(in_irq());
127 WARN_ON_ONCE(irqs_disabled());
128
129 local_irq_save(flags);
130 /*
131 * Are softirqs going to be turned on now:
132 */
133 if (softirq_count() == SOFTIRQ_OFFSET)
134 trace_softirqs_on((unsigned long)__builtin_return_address(0));
135 /*
136 * Keep preemption disabled until we are done with
137 * softirq processing:
138 */
139 sub_preempt_count(SOFTIRQ_OFFSET - 1);
140
141 if (unlikely(!in_interrupt() && local_softirq_pending()))
142 do_softirq();
143
144 dec_preempt_count();
145 local_irq_restore(flags);
146 preempt_check_resched();
147}
148EXPORT_SYMBOL(local_bh_enable);
149
150void local_bh_enable_ip(unsigned long ip)
151{
152 unsigned long flags;
153
154 WARN_ON_ONCE(in_irq());
155
156 local_irq_save(flags);
157 /*
158 * Are softirqs going to be turned on now:
159 */
160 if (softirq_count() == SOFTIRQ_OFFSET)
161 trace_softirqs_on(ip);
162 /*
163 * Keep preemption disabled until we are done with
164 * softirq processing:
165 */
166 sub_preempt_count(SOFTIRQ_OFFSET - 1);
167
168 if (unlikely(!in_interrupt() && local_softirq_pending()))
169 do_softirq();
170
171 dec_preempt_count();
172 local_irq_restore(flags);
173 preempt_check_resched();
174}
175EXPORT_SYMBOL(local_bh_enable_ip);
176
177/*
65 * We restart softirq processing MAX_SOFTIRQ_RESTART times, 178 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
66 * and we fall back to softirqd after that. 179 * and we fall back to softirqd after that.
67 * 180 *
@@ -80,8 +193,11 @@ asmlinkage void __do_softirq(void)
80 int cpu; 193 int cpu;
81 194
82 pending = local_softirq_pending(); 195 pending = local_softirq_pending();
196 account_system_vtime(current);
197
198 __local_bh_disable((unsigned long)__builtin_return_address(0));
199 trace_softirq_enter();
83 200
84 local_bh_disable();
85 cpu = smp_processor_id(); 201 cpu = smp_processor_id();
86restart: 202restart:
87 /* Reset the pending bitmask before enabling irqs */ 203 /* Reset the pending bitmask before enabling irqs */
@@ -109,7 +225,10 @@ restart:
109 if (pending) 225 if (pending)
110 wakeup_softirqd(); 226 wakeup_softirqd();
111 227
112 __local_bh_enable(); 228 trace_softirq_exit();
229
230 account_system_vtime(current);
231 _local_bh_enable();
113} 232}
114 233
115#ifndef __ARCH_HAS_DO_SOFTIRQ 234#ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -136,23 +255,6 @@ EXPORT_SYMBOL(do_softirq);
136 255
137#endif 256#endif
138 257
139void local_bh_enable(void)
140{
141 WARN_ON(irqs_disabled());
142 /*
143 * Keep preemption disabled until we are done with
144 * softirq processing:
145 */
146 sub_preempt_count(SOFTIRQ_OFFSET - 1);
147
148 if (unlikely(!in_interrupt() && local_softirq_pending()))
149 do_softirq();
150
151 dec_preempt_count();
152 preempt_check_resched();
153}
154EXPORT_SYMBOL(local_bh_enable);
155
156#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 258#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
157# define invoke_softirq() __do_softirq() 259# define invoke_softirq() __do_softirq()
158#else 260#else
@@ -165,6 +267,7 @@ EXPORT_SYMBOL(local_bh_enable);
165void irq_exit(void) 267void irq_exit(void)
166{ 268{
167 account_system_vtime(current); 269 account_system_vtime(current);
270 trace_hardirq_exit();
168 sub_preempt_count(IRQ_EXIT_OFFSET); 271 sub_preempt_count(IRQ_EXIT_OFFSET);
169 if (!in_interrupt() && local_softirq_pending()) 272 if (!in_interrupt() && local_softirq_pending())
170 invoke_softirq(); 273 invoke_softirq();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index d1b810782bc4..bfd6ad9c0330 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -9,11 +9,11 @@
9 * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them) 9 * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them)
10 */ 10 */
11 11
12#include <linux/config.h>
13#include <linux/linkage.h> 12#include <linux/linkage.h>
14#include <linux/preempt.h> 13#include <linux/preempt.h>
15#include <linux/spinlock.h> 14#include <linux/spinlock.h>
16#include <linux/interrupt.h> 15#include <linux/interrupt.h>
16#include <linux/debug_locks.h>
17#include <linux/module.h> 17#include <linux/module.h>
18 18
19/* 19/*
@@ -30,8 +30,10 @@ EXPORT_SYMBOL(generic__raw_read_trylock);
30int __lockfunc _spin_trylock(spinlock_t *lock) 30int __lockfunc _spin_trylock(spinlock_t *lock)
31{ 31{
32 preempt_disable(); 32 preempt_disable();
33 if (_raw_spin_trylock(lock)) 33 if (_raw_spin_trylock(lock)) {
34 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
34 return 1; 35 return 1;
36 }
35 37
36 preempt_enable(); 38 preempt_enable();
37 return 0; 39 return 0;
@@ -41,8 +43,10 @@ EXPORT_SYMBOL(_spin_trylock);
41int __lockfunc _read_trylock(rwlock_t *lock) 43int __lockfunc _read_trylock(rwlock_t *lock)
42{ 44{
43 preempt_disable(); 45 preempt_disable();
44 if (_raw_read_trylock(lock)) 46 if (_raw_read_trylock(lock)) {
47 rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
45 return 1; 48 return 1;
49 }
46 50
47 preempt_enable(); 51 preempt_enable();
48 return 0; 52 return 0;
@@ -52,19 +56,28 @@ EXPORT_SYMBOL(_read_trylock);
52int __lockfunc _write_trylock(rwlock_t *lock) 56int __lockfunc _write_trylock(rwlock_t *lock)
53{ 57{
54 preempt_disable(); 58 preempt_disable();
55 if (_raw_write_trylock(lock)) 59 if (_raw_write_trylock(lock)) {
60 rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
56 return 1; 61 return 1;
62 }
57 63
58 preempt_enable(); 64 preempt_enable();
59 return 0; 65 return 0;
60} 66}
61EXPORT_SYMBOL(_write_trylock); 67EXPORT_SYMBOL(_write_trylock);
62 68
63#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) 69/*
70 * If lockdep is enabled then we use the non-preemption spin-ops
71 * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
72 * not re-enabled during lock-acquire (which the preempt-spin-ops do):
73 */
74#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
75 defined(CONFIG_PROVE_LOCKING)
64 76
65void __lockfunc _read_lock(rwlock_t *lock) 77void __lockfunc _read_lock(rwlock_t *lock)
66{ 78{
67 preempt_disable(); 79 preempt_disable();
80 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
68 _raw_read_lock(lock); 81 _raw_read_lock(lock);
69} 82}
70EXPORT_SYMBOL(_read_lock); 83EXPORT_SYMBOL(_read_lock);
@@ -75,7 +88,17 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
75 88
76 local_irq_save(flags); 89 local_irq_save(flags);
77 preempt_disable(); 90 preempt_disable();
91 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
92 /*
93 * On lockdep we dont want the hand-coded irq-enable of
94 * _raw_spin_lock_flags() code, because lockdep assumes
95 * that interrupts are not re-enabled during lock-acquire:
96 */
97#ifdef CONFIG_PROVE_LOCKING
98 _raw_spin_lock(lock);
99#else
78 _raw_spin_lock_flags(lock, &flags); 100 _raw_spin_lock_flags(lock, &flags);
101#endif
79 return flags; 102 return flags;
80} 103}
81EXPORT_SYMBOL(_spin_lock_irqsave); 104EXPORT_SYMBOL(_spin_lock_irqsave);
@@ -84,6 +107,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
84{ 107{
85 local_irq_disable(); 108 local_irq_disable();
86 preempt_disable(); 109 preempt_disable();
110 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
87 _raw_spin_lock(lock); 111 _raw_spin_lock(lock);
88} 112}
89EXPORT_SYMBOL(_spin_lock_irq); 113EXPORT_SYMBOL(_spin_lock_irq);
@@ -92,6 +116,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
92{ 116{
93 local_bh_disable(); 117 local_bh_disable();
94 preempt_disable(); 118 preempt_disable();
119 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
95 _raw_spin_lock(lock); 120 _raw_spin_lock(lock);
96} 121}
97EXPORT_SYMBOL(_spin_lock_bh); 122EXPORT_SYMBOL(_spin_lock_bh);
@@ -102,6 +127,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
102 127
103 local_irq_save(flags); 128 local_irq_save(flags);
104 preempt_disable(); 129 preempt_disable();
130 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
105 _raw_read_lock(lock); 131 _raw_read_lock(lock);
106 return flags; 132 return flags;
107} 133}
@@ -111,6 +137,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
111{ 137{
112 local_irq_disable(); 138 local_irq_disable();
113 preempt_disable(); 139 preempt_disable();
140 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
114 _raw_read_lock(lock); 141 _raw_read_lock(lock);
115} 142}
116EXPORT_SYMBOL(_read_lock_irq); 143EXPORT_SYMBOL(_read_lock_irq);
@@ -119,6 +146,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
119{ 146{
120 local_bh_disable(); 147 local_bh_disable();
121 preempt_disable(); 148 preempt_disable();
149 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
122 _raw_read_lock(lock); 150 _raw_read_lock(lock);
123} 151}
124EXPORT_SYMBOL(_read_lock_bh); 152EXPORT_SYMBOL(_read_lock_bh);
@@ -129,6 +157,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
129 157
130 local_irq_save(flags); 158 local_irq_save(flags);
131 preempt_disable(); 159 preempt_disable();
160 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
132 _raw_write_lock(lock); 161 _raw_write_lock(lock);
133 return flags; 162 return flags;
134} 163}
@@ -138,6 +167,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
138{ 167{
139 local_irq_disable(); 168 local_irq_disable();
140 preempt_disable(); 169 preempt_disable();
170 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
141 _raw_write_lock(lock); 171 _raw_write_lock(lock);
142} 172}
143EXPORT_SYMBOL(_write_lock_irq); 173EXPORT_SYMBOL(_write_lock_irq);
@@ -146,6 +176,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
146{ 176{
147 local_bh_disable(); 177 local_bh_disable();
148 preempt_disable(); 178 preempt_disable();
179 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
149 _raw_write_lock(lock); 180 _raw_write_lock(lock);
150} 181}
151EXPORT_SYMBOL(_write_lock_bh); 182EXPORT_SYMBOL(_write_lock_bh);
@@ -153,6 +184,7 @@ EXPORT_SYMBOL(_write_lock_bh);
153void __lockfunc _spin_lock(spinlock_t *lock) 184void __lockfunc _spin_lock(spinlock_t *lock)
154{ 185{
155 preempt_disable(); 186 preempt_disable();
187 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
156 _raw_spin_lock(lock); 188 _raw_spin_lock(lock);
157} 189}
158 190
@@ -161,6 +193,7 @@ EXPORT_SYMBOL(_spin_lock);
161void __lockfunc _write_lock(rwlock_t *lock) 193void __lockfunc _write_lock(rwlock_t *lock)
162{ 194{
163 preempt_disable(); 195 preempt_disable();
196 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
164 _raw_write_lock(lock); 197 _raw_write_lock(lock);
165} 198}
166 199
@@ -256,8 +289,22 @@ BUILD_LOCK_OPS(write, rwlock);
256 289
257#endif /* CONFIG_PREEMPT */ 290#endif /* CONFIG_PREEMPT */
258 291
292#ifdef CONFIG_DEBUG_LOCK_ALLOC
293
294void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
295{
296 preempt_disable();
297 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
298 _raw_spin_lock(lock);
299}
300
301EXPORT_SYMBOL(_spin_lock_nested);
302
303#endif
304
259void __lockfunc _spin_unlock(spinlock_t *lock) 305void __lockfunc _spin_unlock(spinlock_t *lock)
260{ 306{
307 spin_release(&lock->dep_map, 1, _RET_IP_);
261 _raw_spin_unlock(lock); 308 _raw_spin_unlock(lock);
262 preempt_enable(); 309 preempt_enable();
263} 310}
@@ -265,6 +312,7 @@ EXPORT_SYMBOL(_spin_unlock);
265 312
266void __lockfunc _write_unlock(rwlock_t *lock) 313void __lockfunc _write_unlock(rwlock_t *lock)
267{ 314{
315 rwlock_release(&lock->dep_map, 1, _RET_IP_);
268 _raw_write_unlock(lock); 316 _raw_write_unlock(lock);
269 preempt_enable(); 317 preempt_enable();
270} 318}
@@ -272,6 +320,7 @@ EXPORT_SYMBOL(_write_unlock);
272 320
273void __lockfunc _read_unlock(rwlock_t *lock) 321void __lockfunc _read_unlock(rwlock_t *lock)
274{ 322{
323 rwlock_release(&lock->dep_map, 1, _RET_IP_);
275 _raw_read_unlock(lock); 324 _raw_read_unlock(lock);
276 preempt_enable(); 325 preempt_enable();
277} 326}
@@ -279,6 +328,7 @@ EXPORT_SYMBOL(_read_unlock);
279 328
280void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) 329void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
281{ 330{
331 spin_release(&lock->dep_map, 1, _RET_IP_);
282 _raw_spin_unlock(lock); 332 _raw_spin_unlock(lock);
283 local_irq_restore(flags); 333 local_irq_restore(flags);
284 preempt_enable(); 334 preempt_enable();
@@ -287,6 +337,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore);
287 337
288void __lockfunc _spin_unlock_irq(spinlock_t *lock) 338void __lockfunc _spin_unlock_irq(spinlock_t *lock)
289{ 339{
340 spin_release(&lock->dep_map, 1, _RET_IP_);
290 _raw_spin_unlock(lock); 341 _raw_spin_unlock(lock);
291 local_irq_enable(); 342 local_irq_enable();
292 preempt_enable(); 343 preempt_enable();
@@ -295,14 +346,16 @@ EXPORT_SYMBOL(_spin_unlock_irq);
295 346
296void __lockfunc _spin_unlock_bh(spinlock_t *lock) 347void __lockfunc _spin_unlock_bh(spinlock_t *lock)
297{ 348{
349 spin_release(&lock->dep_map, 1, _RET_IP_);
298 _raw_spin_unlock(lock); 350 _raw_spin_unlock(lock);
299 preempt_enable_no_resched(); 351 preempt_enable_no_resched();
300 local_bh_enable(); 352 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
301} 353}
302EXPORT_SYMBOL(_spin_unlock_bh); 354EXPORT_SYMBOL(_spin_unlock_bh);
303 355
304void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 356void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
305{ 357{
358 rwlock_release(&lock->dep_map, 1, _RET_IP_);
306 _raw_read_unlock(lock); 359 _raw_read_unlock(lock);
307 local_irq_restore(flags); 360 local_irq_restore(flags);
308 preempt_enable(); 361 preempt_enable();
@@ -311,6 +364,7 @@ EXPORT_SYMBOL(_read_unlock_irqrestore);
311 364
312void __lockfunc _read_unlock_irq(rwlock_t *lock) 365void __lockfunc _read_unlock_irq(rwlock_t *lock)
313{ 366{
367 rwlock_release(&lock->dep_map, 1, _RET_IP_);
314 _raw_read_unlock(lock); 368 _raw_read_unlock(lock);
315 local_irq_enable(); 369 local_irq_enable();
316 preempt_enable(); 370 preempt_enable();
@@ -319,14 +373,16 @@ EXPORT_SYMBOL(_read_unlock_irq);
319 373
320void __lockfunc _read_unlock_bh(rwlock_t *lock) 374void __lockfunc _read_unlock_bh(rwlock_t *lock)
321{ 375{
376 rwlock_release(&lock->dep_map, 1, _RET_IP_);
322 _raw_read_unlock(lock); 377 _raw_read_unlock(lock);
323 preempt_enable_no_resched(); 378 preempt_enable_no_resched();
324 local_bh_enable(); 379 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
325} 380}
326EXPORT_SYMBOL(_read_unlock_bh); 381EXPORT_SYMBOL(_read_unlock_bh);
327 382
328void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 383void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
329{ 384{
385 rwlock_release(&lock->dep_map, 1, _RET_IP_);
330 _raw_write_unlock(lock); 386 _raw_write_unlock(lock);
331 local_irq_restore(flags); 387 local_irq_restore(flags);
332 preempt_enable(); 388 preempt_enable();
@@ -335,6 +391,7 @@ EXPORT_SYMBOL(_write_unlock_irqrestore);
335 391
336void __lockfunc _write_unlock_irq(rwlock_t *lock) 392void __lockfunc _write_unlock_irq(rwlock_t *lock)
337{ 393{
394 rwlock_release(&lock->dep_map, 1, _RET_IP_);
338 _raw_write_unlock(lock); 395 _raw_write_unlock(lock);
339 local_irq_enable(); 396 local_irq_enable();
340 preempt_enable(); 397 preempt_enable();
@@ -343,9 +400,10 @@ EXPORT_SYMBOL(_write_unlock_irq);
343 400
344void __lockfunc _write_unlock_bh(rwlock_t *lock) 401void __lockfunc _write_unlock_bh(rwlock_t *lock)
345{ 402{
403 rwlock_release(&lock->dep_map, 1, _RET_IP_);
346 _raw_write_unlock(lock); 404 _raw_write_unlock(lock);
347 preempt_enable_no_resched(); 405 preempt_enable_no_resched();
348 local_bh_enable(); 406 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
349} 407}
350EXPORT_SYMBOL(_write_unlock_bh); 408EXPORT_SYMBOL(_write_unlock_bh);
351 409
@@ -353,11 +411,13 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
353{ 411{
354 local_bh_disable(); 412 local_bh_disable();
355 preempt_disable(); 413 preempt_disable();
356 if (_raw_spin_trylock(lock)) 414 if (_raw_spin_trylock(lock)) {
415 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
357 return 1; 416 return 1;
417 }
358 418
359 preempt_enable_no_resched(); 419 preempt_enable_no_resched();
360 local_bh_enable(); 420 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
361 return 0; 421 return 0;
362} 422}
363EXPORT_SYMBOL(_spin_trylock_bh); 423EXPORT_SYMBOL(_spin_trylock_bh);
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
new file mode 100644
index 000000000000..b71816e47a30
--- /dev/null
+++ b/kernel/stacktrace.c
@@ -0,0 +1,24 @@
1/*
2 * kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/kallsyms.h>
10#include <linux/stacktrace.h>
11
12void print_stack_trace(struct stack_trace *trace, int spaces)
13{
14 int i, j;
15
16 for (i = 0; i < trace->nr_entries; i++) {
17 unsigned long ip = trace->entries[i];
18
19 for (j = 0; j < spaces + 1; j++)
20 printk(" ");
21 print_ip_sym(ip);
22 }
23}
24
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2c0aacc37c55..dcfb5d731466 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -4,7 +4,6 @@
4#include <linux/cpu.h> 4#include <linux/cpu.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/syscalls.h> 6#include <linux/syscalls.h>
7#include <linux/kthread.h>
8#include <asm/atomic.h> 7#include <asm/atomic.h>
9#include <asm/semaphore.h> 8#include <asm/semaphore.h>
10#include <asm/uaccess.h> 9#include <asm/uaccess.h>
@@ -26,11 +25,13 @@ static unsigned int stopmachine_num_threads;
26static atomic_t stopmachine_thread_ack; 25static atomic_t stopmachine_thread_ack;
27static DECLARE_MUTEX(stopmachine_mutex); 26static DECLARE_MUTEX(stopmachine_mutex);
28 27
29static int stopmachine(void *unused) 28static int stopmachine(void *cpu)
30{ 29{
31 int irqs_disabled = 0; 30 int irqs_disabled = 0;
32 int prepared = 0; 31 int prepared = 0;
33 32
33 set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
34
34 /* Ack: we are alive */ 35 /* Ack: we are alive */
35 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ 36 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
36 atomic_inc(&stopmachine_thread_ack); 37 atomic_inc(&stopmachine_thread_ack);
@@ -84,8 +85,7 @@ static void stopmachine_set_state(enum stopmachine_state state)
84 85
85static int stop_machine(void) 86static int stop_machine(void)
86{ 87{
87 int ret = 0; 88 int i, ret = 0;
88 unsigned int i;
89 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 89 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
90 90
91 /* One high-prio thread per cpu. We'll do this one. */ 91 /* One high-prio thread per cpu. We'll do this one. */
@@ -96,16 +96,11 @@ static int stop_machine(void)
96 stopmachine_state = STOPMACHINE_WAIT; 96 stopmachine_state = STOPMACHINE_WAIT;
97 97
98 for_each_online_cpu(i) { 98 for_each_online_cpu(i) {
99 struct task_struct *tsk;
100 if (i == raw_smp_processor_id()) 99 if (i == raw_smp_processor_id())
101 continue; 100 continue;
102 tsk = kthread_create(stopmachine, NULL, "stopmachine"); 101 ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
103 if (IS_ERR(tsk)) { 102 if (ret < 0)
104 ret = PTR_ERR(tsk);
105 break; 103 break;
106 }
107 kthread_bind(tsk, i);
108 wake_up_process(tsk);
109 stopmachine_num_threads++; 104 stopmachine_num_threads++;
110 } 105 }
111 106
diff --git a/kernel/sys.c b/kernel/sys.c
index 2d5179c67cec..dbb3b9c7ea64 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -4,7 +4,6 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/config.h>
8#include <linux/module.h> 7#include <linux/module.h>
9#include <linux/mm.h> 8#include <linux/mm.h>
10#include <linux/utsname.h> 9#include <linux/utsname.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 93a2c5398648..362a0cc37138 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -18,7 +18,6 @@
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling 18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */ 19 */
20 20
21#include <linux/config.h>
22#include <linux/module.h> 21#include <linux/module.h>
23#include <linux/mm.h> 22#include <linux/mm.h>
24#include <linux/swap.h> 23#include <linux/swap.h>
@@ -934,13 +933,15 @@ static ctl_table vm_table[] = {
934 .extra1 = &zero, 933 .extra1 = &zero,
935 }, 934 },
936 { 935 {
937 .ctl_name = VM_ZONE_RECLAIM_INTERVAL, 936 .ctl_name = VM_MIN_UNMAPPED,
938 .procname = "zone_reclaim_interval", 937 .procname = "min_unmapped_ratio",
939 .data = &zone_reclaim_interval, 938 .data = &sysctl_min_unmapped_ratio,
940 .maxlen = sizeof(zone_reclaim_interval), 939 .maxlen = sizeof(sysctl_min_unmapped_ratio),
941 .mode = 0644, 940 .mode = 0644,
942 .proc_handler = &proc_dointvec_jiffies, 941 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
943 .strategy = &sysctl_jiffies, 942 .strategy = &sysctl_intvec,
943 .extra1 = &zero,
944 .extra2 = &one_hundred,
944 }, 945 },
945#endif 946#endif
946#ifdef CONFIG_X86_32 947#ifdef CONFIG_X86_32
diff --git a/kernel/timer.c b/kernel/timer.c
index 5a8960253063..396a3c024c2c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1208,7 +1208,7 @@ unsigned long wall_jiffies = INITIAL_JIFFIES;
1208 * playing with xtime and avenrun. 1208 * playing with xtime and avenrun.
1209 */ 1209 */
1210#ifndef ARCH_HAVE_XTIME_LOCK 1210#ifndef ARCH_HAVE_XTIME_LOCK
1211seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; 1211__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
1212 1212
1213EXPORT_SYMBOL(xtime_lock); 1213EXPORT_SYMBOL(xtime_lock);
1214#endif 1214#endif
@@ -1368,7 +1368,7 @@ asmlinkage long sys_getegid(void)
1368 1368
1369static void process_timeout(unsigned long __data) 1369static void process_timeout(unsigned long __data)
1370{ 1370{
1371 wake_up_process((task_t *)__data); 1371 wake_up_process((struct task_struct *)__data);
1372} 1372}
1373 1373
1374/** 1374/**
@@ -1559,6 +1559,13 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1559 return 0; 1559 return 0;
1560} 1560}
1561 1561
1562/*
1563 * lockdep: we want to track each per-CPU base as a separate lock-class,
1564 * but timer-bases are kmalloc()-ed, so we need to attach separate
1565 * keys to them:
1566 */
1567static struct lock_class_key base_lock_keys[NR_CPUS];
1568
1562static int __devinit init_timers_cpu(int cpu) 1569static int __devinit init_timers_cpu(int cpu)
1563{ 1570{
1564 int j; 1571 int j;
@@ -1594,6 +1601,8 @@ static int __devinit init_timers_cpu(int cpu)
1594 } 1601 }
1595 1602
1596 spin_lock_init(&base->lock); 1603 spin_lock_init(&base->lock);
1604 lockdep_set_class(&base->lock, base_lock_keys + cpu);
1605
1597 for (j = 0; j < TVN_SIZE; j++) { 1606 for (j = 0; j < TVN_SIZE; j++) {
1598 INIT_LIST_HEAD(base->tv5.vec + j); 1607 INIT_LIST_HEAD(base->tv5.vec + j);
1599 INIT_LIST_HEAD(base->tv4.vec + j); 1608 INIT_LIST_HEAD(base->tv4.vec + j);
diff --git a/kernel/wait.c b/kernel/wait.c
index 791681cfea98..a1d57aeb7f75 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * (C) 2004 William Irwin, Oracle 4 * (C) 2004 William Irwin, Oracle
5 */ 5 */
6#include <linux/config.h>
7#include <linux/init.h> 6#include <linux/init.h>
8#include <linux/module.h> 7#include <linux/module.h>
9#include <linux/sched.h> 8#include <linux/sched.h>
@@ -11,6 +10,10 @@
11#include <linux/wait.h> 10#include <linux/wait.h>
12#include <linux/hash.h> 11#include <linux/hash.h>
13 12
13struct lock_class_key waitqueue_lock_key;
14
15EXPORT_SYMBOL(waitqueue_lock_key);
16
14void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 17void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
15{ 18{
16 unsigned long flags; 19 unsigned long flags;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7f1c30c7273b..eebb1d839235 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,7 +51,7 @@ struct cpu_workqueue_struct {
51 wait_queue_head_t work_done; 51 wait_queue_head_t work_done;
52 52
53 struct workqueue_struct *wq; 53 struct workqueue_struct *wq;
54 task_t *thread; 54 struct task_struct *thread;
55 55
56 int run_depth; /* Detect run_workqueue() recursion depth */ 56 int run_depth; /* Detect run_workqueue() recursion depth */
57} ____cacheline_aligned; 57} ____cacheline_aligned;