aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-14 08:58:01 -0400
committerIngo Molnar <mingo@elte.hu>2008-08-14 08:58:01 -0400
commit51ca3c679194e7435c25b8e77b0a73c597e41ae9 (patch)
treea681dca369607ab0f371d5246b0f75140b860a8a /kernel
parentb55793f7528ce1b73c25b3ac8a86a6cda2a0f9a4 (diff)
parentb635acec48bcaa9183fcbf4e3955616b0d4119b5 (diff)
Merge branch 'linus' into x86/core
Conflicts: arch/x86/kernel/genapic_64.c include/asm-x86/kvm_host.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/audit.c13
-rw-r--r--kernel/auditfilter.c10
-rw-r--r--kernel/auditsc.c11
-rw-r--r--kernel/cgroup.c39
-rw-r--r--kernel/cpu.c7
-rw-r--r--kernel/cpuset.c71
-rw-r--r--kernel/dma-coherent.c15
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/irq/proc.c96
-rw-r--r--kernel/kgdb.c94
-rw-r--r--kernel/lockdep.c295
-rw-r--r--kernel/lockdep_internals.h6
-rw-r--r--kernel/lockdep_proc.c37
-rw-r--r--kernel/marker.c12
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/mutex.c1
-rw-r--r--kernel/pm_qos_params.c16
-rw-r--r--kernel/posix-timers.c19
-rw-r--r--kernel/printk.c8
-rw-r--r--kernel/relay.c12
-rw-r--r--kernel/resource.c2
-rw-r--r--kernel/sched.c67
-rw-r--r--kernel/sched_clock.c178
-rw-r--r--kernel/sched_fair.c21
-rw-r--r--kernel/sched_rt.c8
-rw-r--r--kernel/semaphore.c4
-rw-r--r--kernel/signal.c1
-rw-r--r--kernel/smp.c58
-rw-r--r--kernel/spinlock.c11
-rw-r--r--kernel/stop_machine.c1
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/workqueue.c37
34 files changed, 724 insertions, 441 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 382dd5a8b2d..94fabd534b0 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
55 default 1000 if HZ_1000 55 default 1000 if HZ_1000
56 56
57config SCHED_HRTICK 57config SCHED_HRTICK
58 def_bool HIGH_RES_TIMERS && USE_GENERIC_SMP_HELPERS 58 def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS)
diff --git a/kernel/audit.c b/kernel/audit.c
index e092f1c0ce3..4414e93d875 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -707,12 +707,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
707 if (status_get->mask & AUDIT_STATUS_ENABLED) { 707 if (status_get->mask & AUDIT_STATUS_ENABLED) {
708 err = audit_set_enabled(status_get->enabled, 708 err = audit_set_enabled(status_get->enabled,
709 loginuid, sessionid, sid); 709 loginuid, sessionid, sid);
710 if (err < 0) return err; 710 if (err < 0)
711 return err;
711 } 712 }
712 if (status_get->mask & AUDIT_STATUS_FAILURE) { 713 if (status_get->mask & AUDIT_STATUS_FAILURE) {
713 err = audit_set_failure(status_get->failure, 714 err = audit_set_failure(status_get->failure,
714 loginuid, sessionid, sid); 715 loginuid, sessionid, sid);
715 if (err < 0) return err; 716 if (err < 0)
717 return err;
716 } 718 }
717 if (status_get->mask & AUDIT_STATUS_PID) { 719 if (status_get->mask & AUDIT_STATUS_PID) {
718 int new_pid = status_get->pid; 720 int new_pid = status_get->pid;
@@ -725,9 +727,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
725 audit_pid = new_pid; 727 audit_pid = new_pid;
726 audit_nlk_pid = NETLINK_CB(skb).pid; 728 audit_nlk_pid = NETLINK_CB(skb).pid;
727 } 729 }
728 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) 730 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) {
729 err = audit_set_rate_limit(status_get->rate_limit, 731 err = audit_set_rate_limit(status_get->rate_limit,
730 loginuid, sessionid, sid); 732 loginuid, sessionid, sid);
733 if (err < 0)
734 return err;
735 }
731 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) 736 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
732 err = audit_set_backlog_limit(status_get->backlog_limit, 737 err = audit_set_backlog_limit(status_get->backlog_limit,
733 loginuid, sessionid, sid); 738 loginuid, sessionid, sid);
@@ -1366,7 +1371,7 @@ int audit_string_contains_control(const char *string, size_t len)
1366{ 1371{
1367 const unsigned char *p; 1372 const unsigned char *p;
1368 for (p = string; p < (const unsigned char *)string + len && *p; p++) { 1373 for (p = string; p < (const unsigned char *)string + len && *p; p++) {
1369 if (*p == '"' || *p < 0x21 || *p > 0x7f) 1374 if (*p == '"' || *p < 0x21 || *p > 0x7e)
1370 return 1; 1375 return 1;
1371 } 1376 }
1372 return 0; 1377 return 0;
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 98c50cc671b..b7d354e2b0e 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1022,8 +1022,11 @@ static void audit_update_watch(struct audit_parent *parent,
1022 struct audit_buffer *ab; 1022 struct audit_buffer *ab;
1023 ab = audit_log_start(NULL, GFP_KERNEL, 1023 ab = audit_log_start(NULL, GFP_KERNEL,
1024 AUDIT_CONFIG_CHANGE); 1024 AUDIT_CONFIG_CHANGE);
1025 audit_log_format(ab, "auid=%u ses=%u",
1026 audit_get_loginuid(current),
1027 audit_get_sessionid(current));
1025 audit_log_format(ab, 1028 audit_log_format(ab,
1026 "op=updated rules specifying path="); 1029 " op=updated rules specifying path=");
1027 audit_log_untrustedstring(ab, owatch->path); 1030 audit_log_untrustedstring(ab, owatch->path);
1028 audit_log_format(ab, " with dev=%u ino=%lu\n", 1031 audit_log_format(ab, " with dev=%u ino=%lu\n",
1029 dev, ino); 1032 dev, ino);
@@ -1058,7 +1061,10 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
1058 struct audit_buffer *ab; 1061 struct audit_buffer *ab;
1059 ab = audit_log_start(NULL, GFP_KERNEL, 1062 ab = audit_log_start(NULL, GFP_KERNEL,
1060 AUDIT_CONFIG_CHANGE); 1063 AUDIT_CONFIG_CHANGE);
1061 audit_log_format(ab, "op=remove rule path="); 1064 audit_log_format(ab, "auid=%u ses=%u",
1065 audit_get_loginuid(current),
1066 audit_get_sessionid(current));
1067 audit_log_format(ab, " op=remove rule path=");
1062 audit_log_untrustedstring(ab, w->path); 1068 audit_log_untrustedstring(ab, w->path);
1063 if (r->filterkey) { 1069 if (r->filterkey) {
1064 audit_log_format(ab, " key="); 1070 audit_log_format(ab, " key=");
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4699950e65b..972f8e61d36 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -243,6 +243,9 @@ static inline int open_arg(int flags, int mask)
243 243
244static int audit_match_perm(struct audit_context *ctx, int mask) 244static int audit_match_perm(struct audit_context *ctx, int mask)
245{ 245{
246 if (unlikely(!ctx))
247 return 0;
248
246 unsigned n = ctx->major; 249 unsigned n = ctx->major;
247 switch (audit_classify_syscall(ctx->arch, n)) { 250 switch (audit_classify_syscall(ctx->arch, n)) {
248 case 0: /* native */ 251 case 0: /* native */
@@ -284,6 +287,10 @@ static int audit_match_filetype(struct audit_context *ctx, int which)
284{ 287{
285 unsigned index = which & ~S_IFMT; 288 unsigned index = which & ~S_IFMT;
286 mode_t mode = which & S_IFMT; 289 mode_t mode = which & S_IFMT;
290
291 if (unlikely(!ctx))
292 return 0;
293
287 if (index >= ctx->name_count) 294 if (index >= ctx->name_count)
288 return 0; 295 return 0;
289 if (ctx->names[index].ino == -1) 296 if (ctx->names[index].ino == -1)
@@ -610,7 +617,7 @@ static int audit_filter_rules(struct task_struct *tsk,
610 if (!result) 617 if (!result)
611 return 0; 618 return 0;
612 } 619 }
613 if (rule->filterkey) 620 if (rule->filterkey && ctx)
614 ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC); 621 ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
615 switch (rule->action) { 622 switch (rule->action) {
616 case AUDIT_NEVER: *state = AUDIT_DISABLED; break; 623 case AUDIT_NEVER: *state = AUDIT_DISABLED; break;
@@ -2375,7 +2382,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
2375 struct audit_context *ctx = tsk->audit_context; 2382 struct audit_context *ctx = tsk->audit_context;
2376 2383
2377 if (audit_pid && t->tgid == audit_pid) { 2384 if (audit_pid && t->tgid == audit_pid) {
2378 if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { 2385 if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
2379 audit_sig_pid = tsk->pid; 2386 audit_sig_pid = tsk->pid;
2380 if (tsk->loginuid != -1) 2387 if (tsk->loginuid != -1)
2381 audit_sig_uid = tsk->loginuid; 2388 audit_sig_uid = tsk->loginuid;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 657f8f8d93a..13932abde15 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -355,6 +355,17 @@ static struct css_set *find_existing_css_set(
355 return NULL; 355 return NULL;
356} 356}
357 357
358static void free_cg_links(struct list_head *tmp)
359{
360 struct cg_cgroup_link *link;
361 struct cg_cgroup_link *saved_link;
362
363 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
364 list_del(&link->cgrp_link_list);
365 kfree(link);
366 }
367}
368
358/* 369/*
359 * allocate_cg_links() allocates "count" cg_cgroup_link structures 370 * allocate_cg_links() allocates "count" cg_cgroup_link structures
360 * and chains them on tmp through their cgrp_link_list fields. Returns 0 on 371 * and chains them on tmp through their cgrp_link_list fields. Returns 0 on
@@ -363,17 +374,12 @@ static struct css_set *find_existing_css_set(
363static int allocate_cg_links(int count, struct list_head *tmp) 374static int allocate_cg_links(int count, struct list_head *tmp)
364{ 375{
365 struct cg_cgroup_link *link; 376 struct cg_cgroup_link *link;
366 struct cg_cgroup_link *saved_link;
367 int i; 377 int i;
368 INIT_LIST_HEAD(tmp); 378 INIT_LIST_HEAD(tmp);
369 for (i = 0; i < count; i++) { 379 for (i = 0; i < count; i++) {
370 link = kmalloc(sizeof(*link), GFP_KERNEL); 380 link = kmalloc(sizeof(*link), GFP_KERNEL);
371 if (!link) { 381 if (!link) {
372 list_for_each_entry_safe(link, saved_link, tmp, 382 free_cg_links(tmp);
373 cgrp_link_list) {
374 list_del(&link->cgrp_link_list);
375 kfree(link);
376 }
377 return -ENOMEM; 383 return -ENOMEM;
378 } 384 }
379 list_add(&link->cgrp_link_list, tmp); 385 list_add(&link->cgrp_link_list, tmp);
@@ -381,17 +387,6 @@ static int allocate_cg_links(int count, struct list_head *tmp)
381 return 0; 387 return 0;
382} 388}
383 389
384static void free_cg_links(struct list_head *tmp)
385{
386 struct cg_cgroup_link *link;
387 struct cg_cgroup_link *saved_link;
388
389 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
390 list_del(&link->cgrp_link_list);
391 kfree(link);
392 }
393}
394
395/* 390/*
396 * find_css_set() takes an existing cgroup group and a 391 * find_css_set() takes an existing cgroup group and a
397 * cgroup object, and returns a css_set object that's 392 * cgroup object, and returns a css_set object that's
@@ -956,7 +951,6 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
956 struct super_block *sb; 951 struct super_block *sb;
957 struct cgroupfs_root *root; 952 struct cgroupfs_root *root;
958 struct list_head tmp_cg_links; 953 struct list_head tmp_cg_links;
959 INIT_LIST_HEAD(&tmp_cg_links);
960 954
961 /* First find the desired set of subsystems */ 955 /* First find the desired set of subsystems */
962 ret = parse_cgroupfs_options(data, &opts); 956 ret = parse_cgroupfs_options(data, &opts);
@@ -1424,14 +1418,17 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
1424 if (buffer == NULL) 1418 if (buffer == NULL)
1425 return -ENOMEM; 1419 return -ENOMEM;
1426 } 1420 }
1427 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) 1421 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
1428 return -EFAULT; 1422 retval = -EFAULT;
1423 goto out;
1424 }
1429 1425
1430 buffer[nbytes] = 0; /* nul-terminate */ 1426 buffer[nbytes] = 0; /* nul-terminate */
1431 strstrip(buffer); 1427 strstrip(buffer);
1432 retval = cft->write_string(cgrp, cft, buffer); 1428 retval = cft->write_string(cgrp, cft, buffer);
1433 if (!retval) 1429 if (!retval)
1434 retval = nbytes; 1430 retval = nbytes;
1431out:
1435 if (buffer != local_buffer) 1432 if (buffer != local_buffer)
1436 kfree(buffer); 1433 kfree(buffer);
1437 return retval; 1434 return retval;
@@ -2371,7 +2368,7 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2371 return cgroup_create(c_parent, dentry, mode | S_IFDIR); 2368 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
2372} 2369}
2373 2370
2374static inline int cgroup_has_css_refs(struct cgroup *cgrp) 2371static int cgroup_has_css_refs(struct cgroup *cgrp)
2375{ 2372{
2376 /* Check the reference count on each subsystem. Since we 2373 /* Check the reference count on each subsystem. Since we
2377 * already established that there are no tasks in the 2374 * already established that there are no tasks in the
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e202a68d1cc..f17e9854c24 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -349,6 +349,8 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
349 goto out_notify; 349 goto out_notify;
350 BUG_ON(!cpu_online(cpu)); 350 BUG_ON(!cpu_online(cpu));
351 351
352 cpu_set(cpu, cpu_active_map);
353
352 /* Now call notifier in preparation. */ 354 /* Now call notifier in preparation. */
353 raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); 355 raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
354 356
@@ -367,7 +369,7 @@ int __cpuinit cpu_up(unsigned int cpu)
367 if (!cpu_isset(cpu, cpu_possible_map)) { 369 if (!cpu_isset(cpu, cpu_possible_map)) {
368 printk(KERN_ERR "can't online cpu %d because it is not " 370 printk(KERN_ERR "can't online cpu %d because it is not "
369 "configured as may-hotadd at boot time\n", cpu); 371 "configured as may-hotadd at boot time\n", cpu);
370#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390) 372#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
371 printk(KERN_ERR "please check additional_cpus= boot " 373 printk(KERN_ERR "please check additional_cpus= boot "
372 "parameter\n"); 374 "parameter\n");
373#endif 375#endif
@@ -383,9 +385,6 @@ int __cpuinit cpu_up(unsigned int cpu)
383 385
384 err = _cpu_up(cpu, 0); 386 err = _cpu_up(cpu, 0);
385 387
386 if (cpu_online(cpu))
387 cpu_set(cpu, cpu_active_map);
388
389out: 388out:
390 cpu_maps_update_done(); 389 cpu_maps_update_done();
391 return err; 390 return err;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 91cf85b36dd..d5ab79cf516 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -54,7 +54,6 @@
54#include <asm/uaccess.h> 54#include <asm/uaccess.h>
55#include <asm/atomic.h> 55#include <asm/atomic.h>
56#include <linux/mutex.h> 56#include <linux/mutex.h>
57#include <linux/kfifo.h>
58#include <linux/workqueue.h> 57#include <linux/workqueue.h>
59#include <linux/cgroup.h> 58#include <linux/cgroup.h>
60 59
@@ -486,13 +485,38 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
486static void 485static void
487update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) 486update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
488{ 487{
489 if (!dattr)
490 return;
491 if (dattr->relax_domain_level < c->relax_domain_level) 488 if (dattr->relax_domain_level < c->relax_domain_level)
492 dattr->relax_domain_level = c->relax_domain_level; 489 dattr->relax_domain_level = c->relax_domain_level;
493 return; 490 return;
494} 491}
495 492
493static void
494update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
495{
496 LIST_HEAD(q);
497
498 list_add(&c->stack_list, &q);
499 while (!list_empty(&q)) {
500 struct cpuset *cp;
501 struct cgroup *cont;
502 struct cpuset *child;
503
504 cp = list_first_entry(&q, struct cpuset, stack_list);
505 list_del(q.next);
506
507 if (cpus_empty(cp->cpus_allowed))
508 continue;
509
510 if (is_sched_load_balance(cp))
511 update_domain_attr(dattr, cp);
512
513 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
514 child = cgroup_cs(cont);
515 list_add_tail(&child->stack_list, &q);
516 }
517 }
518}
519
496/* 520/*
497 * rebuild_sched_domains() 521 * rebuild_sched_domains()
498 * 522 *
@@ -532,7 +556,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
532 * So the reverse nesting would risk an ABBA deadlock. 556 * So the reverse nesting would risk an ABBA deadlock.
533 * 557 *
534 * The three key local variables below are: 558 * The three key local variables below are:
535 * q - a kfifo queue of cpuset pointers, used to implement a 559 * q - a linked-list queue of cpuset pointers, used to implement a
536 * top-down scan of all cpusets. This scan loads a pointer 560 * top-down scan of all cpusets. This scan loads a pointer
537 * to each cpuset marked is_sched_load_balance into the 561 * to each cpuset marked is_sched_load_balance into the
538 * array 'csa'. For our purposes, rebuilding the schedulers 562 * array 'csa'. For our purposes, rebuilding the schedulers
@@ -567,7 +591,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
567 591
568void rebuild_sched_domains(void) 592void rebuild_sched_domains(void)
569{ 593{
570 struct kfifo *q; /* queue of cpusets to be scanned */ 594 LIST_HEAD(q); /* queue of cpusets to be scanned*/
571 struct cpuset *cp; /* scans q */ 595 struct cpuset *cp; /* scans q */
572 struct cpuset **csa; /* array of all cpuset ptrs */ 596 struct cpuset **csa; /* array of all cpuset ptrs */
573 int csn; /* how many cpuset ptrs in csa so far */ 597 int csn; /* how many cpuset ptrs in csa so far */
@@ -577,7 +601,6 @@ void rebuild_sched_domains(void)
577 int ndoms; /* number of sched domains in result */ 601 int ndoms; /* number of sched domains in result */
578 int nslot; /* next empty doms[] cpumask_t slot */ 602 int nslot; /* next empty doms[] cpumask_t slot */
579 603
580 q = NULL;
581 csa = NULL; 604 csa = NULL;
582 doms = NULL; 605 doms = NULL;
583 dattr = NULL; 606 dattr = NULL;
@@ -591,35 +614,42 @@ void rebuild_sched_domains(void)
591 dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); 614 dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
592 if (dattr) { 615 if (dattr) {
593 *dattr = SD_ATTR_INIT; 616 *dattr = SD_ATTR_INIT;
594 update_domain_attr(dattr, &top_cpuset); 617 update_domain_attr_tree(dattr, &top_cpuset);
595 } 618 }
596 *doms = top_cpuset.cpus_allowed; 619 *doms = top_cpuset.cpus_allowed;
597 goto rebuild; 620 goto rebuild;
598 } 621 }
599 622
600 q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
601 if (IS_ERR(q))
602 goto done;
603 csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL); 623 csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
604 if (!csa) 624 if (!csa)
605 goto done; 625 goto done;
606 csn = 0; 626 csn = 0;
607 627
608 cp = &top_cpuset; 628 list_add(&top_cpuset.stack_list, &q);
609 __kfifo_put(q, (void *)&cp, sizeof(cp)); 629 while (!list_empty(&q)) {
610 while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
611 struct cgroup *cont; 630 struct cgroup *cont;
612 struct cpuset *child; /* scans child cpusets of cp */ 631 struct cpuset *child; /* scans child cpusets of cp */
613 632
633 cp = list_first_entry(&q, struct cpuset, stack_list);
634 list_del(q.next);
635
614 if (cpus_empty(cp->cpus_allowed)) 636 if (cpus_empty(cp->cpus_allowed))
615 continue; 637 continue;
616 638
617 if (is_sched_load_balance(cp)) 639 /*
640 * All child cpusets contain a subset of the parent's cpus, so
641 * just skip them, and then we call update_domain_attr_tree()
642 * to calc relax_domain_level of the corresponding sched
643 * domain.
644 */
645 if (is_sched_load_balance(cp)) {
618 csa[csn++] = cp; 646 csa[csn++] = cp;
647 continue;
648 }
619 649
620 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { 650 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
621 child = cgroup_cs(cont); 651 child = cgroup_cs(cont);
622 __kfifo_put(q, (void *)&child, sizeof(cp)); 652 list_add_tail(&child->stack_list, &q);
623 } 653 }
624 } 654 }
625 655
@@ -686,7 +716,7 @@ restart:
686 cpus_or(*dp, *dp, b->cpus_allowed); 716 cpus_or(*dp, *dp, b->cpus_allowed);
687 b->pn = -1; 717 b->pn = -1;
688 if (dattr) 718 if (dattr)
689 update_domain_attr(dattr 719 update_domain_attr_tree(dattr
690 + nslot, b); 720 + nslot, b);
691 } 721 }
692 } 722 }
@@ -702,8 +732,6 @@ rebuild:
702 put_online_cpus(); 732 put_online_cpus();
703 733
704done: 734done:
705 if (q && !IS_ERR(q))
706 kfifo_free(q);
707 kfree(csa); 735 kfree(csa);
708 /* Don't kfree(doms) -- partition_sched_domains() does that. */ 736 /* Don't kfree(doms) -- partition_sched_domains() does that. */
709 /* Don't kfree(dattr) -- partition_sched_domains() does that. */ 737 /* Don't kfree(dattr) -- partition_sched_domains() does that. */
@@ -1833,24 +1861,21 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
1833 */ 1861 */
1834static void scan_for_empty_cpusets(const struct cpuset *root) 1862static void scan_for_empty_cpusets(const struct cpuset *root)
1835{ 1863{
1864 LIST_HEAD(queue);
1836 struct cpuset *cp; /* scans cpusets being updated */ 1865 struct cpuset *cp; /* scans cpusets being updated */
1837 struct cpuset *child; /* scans child cpusets of cp */ 1866 struct cpuset *child; /* scans child cpusets of cp */
1838 struct list_head queue;
1839 struct cgroup *cont; 1867 struct cgroup *cont;
1840 nodemask_t oldmems; 1868 nodemask_t oldmems;
1841 1869
1842 INIT_LIST_HEAD(&queue);
1843
1844 list_add_tail((struct list_head *)&root->stack_list, &queue); 1870 list_add_tail((struct list_head *)&root->stack_list, &queue);
1845 1871
1846 while (!list_empty(&queue)) { 1872 while (!list_empty(&queue)) {
1847 cp = container_of(queue.next, struct cpuset, stack_list); 1873 cp = list_first_entry(&queue, struct cpuset, stack_list);
1848 list_del(queue.next); 1874 list_del(queue.next);
1849 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { 1875 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
1850 child = cgroup_cs(cont); 1876 child = cgroup_cs(cont);
1851 list_add_tail(&child->stack_list, &queue); 1877 list_add_tail(&child->stack_list, &queue);
1852 } 1878 }
1853 cont = cp->css.cgroup;
1854 1879
1855 /* Continue past cpusets with all cpus, mems online */ 1880 /* Continue past cpusets with all cpus, mems online */
1856 if (cpus_subset(cp->cpus_allowed, cpu_online_map) && 1881 if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
index 7517115a8cc..c1d4d5b4c61 100644
--- a/kernel/dma-coherent.c
+++ b/kernel/dma-coherent.c
@@ -77,15 +77,14 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
77{ 77{
78 struct dma_coherent_mem *mem = dev->dma_mem; 78 struct dma_coherent_mem *mem = dev->dma_mem;
79 int pos, err; 79 int pos, err;
80 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
81 80
82 pages >>= PAGE_SHIFT; 81 size += device_addr & ~PAGE_MASK;
83 82
84 if (!mem) 83 if (!mem)
85 return ERR_PTR(-EINVAL); 84 return ERR_PTR(-EINVAL);
86 85
87 pos = (device_addr - mem->device_base) >> PAGE_SHIFT; 86 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
88 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); 87 err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
89 if (err != 0) 88 if (err != 0)
90 return ERR_PTR(err); 89 return ERR_PTR(err);
91 return mem->virt_base + (pos << PAGE_SHIFT); 90 return mem->virt_base + (pos << PAGE_SHIFT);
@@ -93,7 +92,7 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
93EXPORT_SYMBOL(dma_mark_declared_memory_occupied); 92EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
94 93
95/** 94/**
96 * Try to allocate memory from the per-device coherent area. 95 * dma_alloc_from_coherent() - try to allocate memory from the per-device coherent area
97 * 96 *
98 * @dev: device from which we allocate memory 97 * @dev: device from which we allocate memory
99 * @size: size of requested memory area 98 * @size: size of requested memory area
@@ -101,11 +100,11 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
101 * @ret: This pointer will be filled with the virtual address 100 * @ret: This pointer will be filled with the virtual address
102 * to allocated area. 101 * to allocated area.
103 * 102 *
104 * This function should be only called from per-arch %dma_alloc_coherent() 103 * This function should be only called from per-arch dma_alloc_coherent()
105 * to support allocation from per-device coherent memory pools. 104 * to support allocation from per-device coherent memory pools.
106 * 105 *
107 * Returns 0 if dma_alloc_coherent should continue with allocating from 106 * Returns 0 if dma_alloc_coherent should continue with allocating from
108 * generic memory areas, or !0 if dma_alloc_coherent should return %ret. 107 * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
109 */ 108 */
110int dma_alloc_from_coherent(struct device *dev, ssize_t size, 109int dma_alloc_from_coherent(struct device *dev, ssize_t size,
111 dma_addr_t *dma_handle, void **ret) 110 dma_addr_t *dma_handle, void **ret)
@@ -127,7 +126,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
127} 126}
128 127
129/** 128/**
130 * Try to free the memory allocated from per-device coherent memory pool. 129 * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
131 * @dev: device from which the memory was allocated 130 * @dev: device from which the memory was allocated
132 * @order: the order of pages allocated 131 * @order: the order of pages allocated
133 * @vaddr: virtual address of allocated pages 132 * @vaddr: virtual address of allocated pages
@@ -136,7 +135,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
136 * coherent memory pool and if so, releases that memory. 135 * coherent memory pool and if so, releases that memory.
137 * 136 *
138 * Returns 1 if we correctly released the memory, or 0 if 137 * Returns 1 if we correctly released the memory, or 0 if
139 * %dma_release_coherent() should proceed with releasing memory from 138 * dma_release_coherent() should proceed with releasing memory from
140 * generic pools. 139 * generic pools.
141 */ 140 */
142int dma_release_from_coherent(struct device *dev, int order, void *vaddr) 141int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
diff --git a/kernel/exit.c b/kernel/exit.c
index eb4d6470d1d..38ec4063014 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -911,10 +911,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
911 tsk->exit_signal = SIGCHLD; 911 tsk->exit_signal = SIGCHLD;
912 912
913 signal = tracehook_notify_death(tsk, &cookie, group_dead); 913 signal = tracehook_notify_death(tsk, &cookie, group_dead);
914 if (signal > 0) 914 if (signal >= 0)
915 signal = do_notify_parent(tsk, signal); 915 signal = do_notify_parent(tsk, signal);
916 916
917 tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE; 917 tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE;
918 918
919 /* mt-exec, de_thread() is waiting for us */ 919 /* mt-exec, de_thread() is waiting for us */
920 if (thread_group_leader(tsk) && 920 if (thread_group_leader(tsk) &&
@@ -927,7 +927,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
927 tracehook_report_death(tsk, signal, cookie, group_dead); 927 tracehook_report_death(tsk, signal, cookie, group_dead);
928 928
929 /* If the process is dead, release it - nobody will wait for it */ 929 /* If the process is dead, release it - nobody will wait for it */
930 if (signal < 0) 930 if (signal == DEATH_REAP)
931 release_task(tsk); 931 release_task(tsk);
932} 932}
933 933
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 22d10d3189f..60c49e32439 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -330,7 +330,8 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
330 ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK); 330 ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
331 331
332 if (ret) 332 if (ret)
333 pr_err("setting flow type for irq %u failed (%pF)\n", 333 pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
334 (int)(flags & IRQF_TRIGGER_MASK),
334 irq, chip->set_type); 335 irq, chip->set_type);
335 336
336 return ret; 337 return ret;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6c6d35d68ee..a09dd29c2fd 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/irq.h> 9#include <linux/irq.h>
10#include <linux/proc_fs.h> 10#include <linux/proc_fs.h>
11#include <linux/seq_file.h>
11#include <linux/interrupt.h> 12#include <linux/interrupt.h>
12 13
13#include "internals.h" 14#include "internals.h"
@@ -16,23 +17,18 @@ static struct proc_dir_entry *root_irq_dir;
16 17
17#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
18 19
19static int irq_affinity_read_proc(char *page, char **start, off_t off, 20static int irq_affinity_proc_show(struct seq_file *m, void *v)
20 int count, int *eof, void *data)
21{ 21{
22 struct irq_desc *desc = irq_desc + (long)data; 22 struct irq_desc *desc = irq_desc + (long)m->private;
23 cpumask_t *mask = &desc->affinity; 23 cpumask_t *mask = &desc->affinity;
24 int len;
25 24
26#ifdef CONFIG_GENERIC_PENDING_IRQ 25#ifdef CONFIG_GENERIC_PENDING_IRQ
27 if (desc->status & IRQ_MOVE_PENDING) 26 if (desc->status & IRQ_MOVE_PENDING)
28 mask = &desc->pending_mask; 27 mask = &desc->pending_mask;
29#endif 28#endif
30 len = cpumask_scnprintf(page, count, *mask); 29 seq_cpumask(m, mask);
31 30 seq_putc(m, '\n');
32 if (count - len < 2) 31 return 0;
33 return -EINVAL;
34 len += sprintf(page + len, "\n");
35 return len;
36} 32}
37 33
38#ifndef is_affinity_mask_valid 34#ifndef is_affinity_mask_valid
@@ -40,11 +36,12 @@ static int irq_affinity_read_proc(char *page, char **start, off_t off,
40#endif 36#endif
41 37
42int no_irq_affinity; 38int no_irq_affinity;
43static int irq_affinity_write_proc(struct file *file, const char __user *buffer, 39static ssize_t irq_affinity_proc_write(struct file *file,
44 unsigned long count, void *data) 40 const char __user *buffer, size_t count, loff_t *pos)
45{ 41{
46 unsigned int irq = (int)(long)data, full_count = count, err; 42 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
47 cpumask_t new_value; 43 cpumask_t new_value;
44 int err;
48 45
49 if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || 46 if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
50 irq_balancing_disabled(irq)) 47 irq_balancing_disabled(irq))
@@ -65,28 +62,38 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
65 if (!cpus_intersects(new_value, cpu_online_map)) 62 if (!cpus_intersects(new_value, cpu_online_map))
66 /* Special case for empty set - allow the architecture 63 /* Special case for empty set - allow the architecture
67 code to set default SMP affinity. */ 64 code to set default SMP affinity. */
68 return irq_select_affinity(irq) ? -EINVAL : full_count; 65 return irq_select_affinity(irq) ? -EINVAL : count;
69 66
70 irq_set_affinity(irq, new_value); 67 irq_set_affinity(irq, new_value);
71 68
72 return full_count; 69 return count;
73} 70}
74 71
75static int default_affinity_read(char *page, char **start, off_t off, 72static int irq_affinity_proc_open(struct inode *inode, struct file *file)
76 int count, int *eof, void *data)
77{ 73{
78 int len = cpumask_scnprintf(page, count, irq_default_affinity); 74 return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
79 if (count - len < 2)
80 return -EINVAL;
81 len += sprintf(page + len, "\n");
82 return len;
83} 75}
84 76
85static int default_affinity_write(struct file *file, const char __user *buffer, 77static const struct file_operations irq_affinity_proc_fops = {
86 unsigned long count, void *data) 78 .open = irq_affinity_proc_open,
79 .read = seq_read,
80 .llseek = seq_lseek,
81 .release = single_release,
82 .write = irq_affinity_proc_write,
83};
84
85static int default_affinity_show(struct seq_file *m, void *v)
86{
87 seq_cpumask(m, &irq_default_affinity);
88 seq_putc(m, '\n');
89 return 0;
90}
91
92static ssize_t default_affinity_write(struct file *file,
93 const char __user *buffer, size_t count, loff_t *ppos)
87{ 94{
88 unsigned int full_count = count, err;
89 cpumask_t new_value; 95 cpumask_t new_value;
96 int err;
90 97
91 err = cpumask_parse_user(buffer, count, new_value); 98 err = cpumask_parse_user(buffer, count, new_value);
92 if (err) 99 if (err)
@@ -105,8 +112,21 @@ static int default_affinity_write(struct file *file, const char __user *buffer,
105 112
106 irq_default_affinity = new_value; 113 irq_default_affinity = new_value;
107 114
108 return full_count; 115 return count;
109} 116}
117
118static int default_affinity_open(struct inode *inode, struct file *file)
119{
120 return single_open(file, default_affinity_show, NULL);
121}
122
123static const struct file_operations default_affinity_proc_fops = {
124 .open = default_affinity_open,
125 .read = seq_read,
126 .llseek = seq_lseek,
127 .release = single_release,
128 .write = default_affinity_write,
129};
110#endif 130#endif
111 131
112static int irq_spurious_read(char *page, char **start, off_t off, 132static int irq_spurious_read(char *page, char **start, off_t off,
@@ -178,16 +198,9 @@ void register_irq_proc(unsigned int irq)
178 irq_desc[irq].dir = proc_mkdir(name, root_irq_dir); 198 irq_desc[irq].dir = proc_mkdir(name, root_irq_dir);
179 199
180#ifdef CONFIG_SMP 200#ifdef CONFIG_SMP
181 { 201 /* create /proc/irq/<irq>/smp_affinity */
182 /* create /proc/irq/<irq>/smp_affinity */ 202 proc_create_data("smp_affinity", 0600, irq_desc[irq].dir,
183 entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir); 203 &irq_affinity_proc_fops, (void *)(long)irq);
184
185 if (entry) {
186 entry->data = (void *)(long)irq;
187 entry->read_proc = irq_affinity_read_proc;
188 entry->write_proc = irq_affinity_write_proc;
189 }
190 }
191#endif 204#endif
192 205
193 entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir); 206 entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
@@ -208,15 +221,8 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
208void register_default_affinity_proc(void) 221void register_default_affinity_proc(void)
209{ 222{
210#ifdef CONFIG_SMP 223#ifdef CONFIG_SMP
211 struct proc_dir_entry *entry; 224 proc_create("irq/default_smp_affinity", 0600, NULL,
212 225 &default_affinity_proc_fops);
213 /* create /proc/irq/default_smp_affinity */
214 entry = create_proc_entry("default_smp_affinity", 0600, root_irq_dir);
215 if (entry) {
216 entry->data = NULL;
217 entry->read_proc = default_affinity_read;
218 entry->write_proc = default_affinity_write;
219 }
220#endif 226#endif
221} 227}
222 228
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 3ec23c3ec97..eaa21fc9ad1 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -56,12 +56,14 @@
56 56
57static int kgdb_break_asap; 57static int kgdb_break_asap;
58 58
59#define KGDB_MAX_THREAD_QUERY 17
59struct kgdb_state { 60struct kgdb_state {
60 int ex_vector; 61 int ex_vector;
61 int signo; 62 int signo;
62 int err_code; 63 int err_code;
63 int cpu; 64 int cpu;
64 int pass_exception; 65 int pass_exception;
66 unsigned long thr_query;
65 unsigned long threadid; 67 unsigned long threadid;
66 long kgdb_usethreadid; 68 long kgdb_usethreadid;
67 struct pt_regs *linux_regs; 69 struct pt_regs *linux_regs;
@@ -166,13 +168,6 @@ early_param("nokgdbroundup", opt_nokgdbroundup);
166 * Weak aliases for breakpoint management, 168 * Weak aliases for breakpoint management,
167 * can be overriden by architectures when needed: 169 * can be overriden by architectures when needed:
168 */ 170 */
169int __weak kgdb_validate_break_address(unsigned long addr)
170{
171 char tmp_variable[BREAK_INSTR_SIZE];
172
173 return probe_kernel_read(tmp_variable, (char *)addr, BREAK_INSTR_SIZE);
174}
175
176int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) 171int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
177{ 172{
178 int err; 173 int err;
@@ -191,6 +186,25 @@ int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
191 (char *)bundle, BREAK_INSTR_SIZE); 186 (char *)bundle, BREAK_INSTR_SIZE);
192} 187}
193 188
189int __weak kgdb_validate_break_address(unsigned long addr)
190{
191 char tmp_variable[BREAK_INSTR_SIZE];
192 int err;
193 /* Validate setting the breakpoint and then removing it. In the
194 * remove fails, the kernel needs to emit a bad message because we
195 * are deep trouble not being able to put things back the way we
196 * found them.
197 */
198 err = kgdb_arch_set_breakpoint(addr, tmp_variable);
199 if (err)
200 return err;
201 err = kgdb_arch_remove_breakpoint(addr, tmp_variable);
202 if (err)
203 printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
204 "memory destroyed at: %lx", addr);
205 return err;
206}
207
194unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs) 208unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs)
195{ 209{
196 return instruction_pointer(regs); 210 return instruction_pointer(regs);
@@ -433,9 +447,14 @@ int kgdb_hex2long(char **ptr, unsigned long *long_val)
433{ 447{
434 int hex_val; 448 int hex_val;
435 int num = 0; 449 int num = 0;
450 int negate = 0;
436 451
437 *long_val = 0; 452 *long_val = 0;
438 453
454 if (**ptr == '-') {
455 negate = 1;
456 (*ptr)++;
457 }
439 while (**ptr) { 458 while (**ptr) {
440 hex_val = hex(**ptr); 459 hex_val = hex(**ptr);
441 if (hex_val < 0) 460 if (hex_val < 0)
@@ -446,6 +465,9 @@ int kgdb_hex2long(char **ptr, unsigned long *long_val)
446 (*ptr)++; 465 (*ptr)++;
447 } 466 }
448 467
468 if (negate)
469 *long_val = -*long_val;
470
449 return num; 471 return num;
450} 472}
451 473
@@ -515,10 +537,16 @@ static void int_to_threadref(unsigned char *id, int value)
515static struct task_struct *getthread(struct pt_regs *regs, int tid) 537static struct task_struct *getthread(struct pt_regs *regs, int tid)
516{ 538{
517 /* 539 /*
518 * Non-positive TIDs are remapped idle tasks: 540 * Non-positive TIDs are remapped to the cpu shadow information
519 */ 541 */
520 if (tid <= 0) 542 if (tid == 0 || tid == -1)
521 return idle_task(-tid); 543 tid = -atomic_read(&kgdb_active) - 2;
544 if (tid < 0) {
545 if (kgdb_info[-tid - 2].task)
546 return kgdb_info[-tid - 2].task;
547 else
548 return idle_task(-tid - 2);
549 }
522 550
523 /* 551 /*
524 * find_task_by_pid_ns() does not take the tasklist lock anymore 552 * find_task_by_pid_ns() does not take the tasklist lock anymore
@@ -725,14 +753,15 @@ setundefined:
725} 753}
726 754
727/* 755/*
728 * Remap normal tasks to their real PID, idle tasks to -1 ... -NR_CPUs: 756 * Remap normal tasks to their real PID,
757 * CPU shadow threads are mapped to -CPU - 2
729 */ 758 */
730static inline int shadow_pid(int realpid) 759static inline int shadow_pid(int realpid)
731{ 760{
732 if (realpid) 761 if (realpid)
733 return realpid; 762 return realpid;
734 763
735 return -1-raw_smp_processor_id(); 764 return -raw_smp_processor_id() - 2;
736} 765}
737 766
738static char gdbmsgbuf[BUFMAX + 1]; 767static char gdbmsgbuf[BUFMAX + 1];
@@ -826,7 +855,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks)
826 local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo; 855 local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo;
827 } else { 856 } else {
828 local_debuggerinfo = NULL; 857 local_debuggerinfo = NULL;
829 for (i = 0; i < NR_CPUS; i++) { 858 for_each_online_cpu(i) {
830 /* 859 /*
831 * Try to find the task on some other 860 * Try to find the task on some other
832 * or possibly this node if we do not 861 * or possibly this node if we do not
@@ -960,10 +989,13 @@ static int gdb_cmd_reboot(struct kgdb_state *ks)
960/* Handle the 'q' query packets */ 989/* Handle the 'q' query packets */
961static void gdb_cmd_query(struct kgdb_state *ks) 990static void gdb_cmd_query(struct kgdb_state *ks)
962{ 991{
963 struct task_struct *thread; 992 struct task_struct *g;
993 struct task_struct *p;
964 unsigned char thref[8]; 994 unsigned char thref[8];
965 char *ptr; 995 char *ptr;
966 int i; 996 int i;
997 int cpu;
998 int finished = 0;
967 999
968 switch (remcom_in_buffer[1]) { 1000 switch (remcom_in_buffer[1]) {
969 case 's': 1001 case 's':
@@ -973,22 +1005,34 @@ static void gdb_cmd_query(struct kgdb_state *ks)
973 break; 1005 break;
974 } 1006 }
975 1007
976 if (remcom_in_buffer[1] == 'f') 1008 i = 0;
977 ks->threadid = 1;
978
979 remcom_out_buffer[0] = 'm'; 1009 remcom_out_buffer[0] = 'm';
980 ptr = remcom_out_buffer + 1; 1010 ptr = remcom_out_buffer + 1;
981 1011 if (remcom_in_buffer[1] == 'f') {
982 for (i = 0; i < 17; ks->threadid++) { 1012 /* Each cpu is a shadow thread */
983 thread = getthread(ks->linux_regs, ks->threadid); 1013 for_each_online_cpu(cpu) {
984 if (thread) { 1014 ks->thr_query = 0;
985 int_to_threadref(thref, ks->threadid); 1015 int_to_threadref(thref, -cpu - 2);
986 pack_threadid(ptr, thref); 1016 pack_threadid(ptr, thref);
987 ptr += BUF_THREAD_ID_SIZE; 1017 ptr += BUF_THREAD_ID_SIZE;
988 *(ptr++) = ','; 1018 *(ptr++) = ',';
989 i++; 1019 i++;
990 } 1020 }
991 } 1021 }
1022
1023 do_each_thread(g, p) {
1024 if (i >= ks->thr_query && !finished) {
1025 int_to_threadref(thref, p->pid);
1026 pack_threadid(ptr, thref);
1027 ptr += BUF_THREAD_ID_SIZE;
1028 *(ptr++) = ',';
1029 ks->thr_query++;
1030 if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0)
1031 finished = 1;
1032 }
1033 i++;
1034 } while_each_thread(g, p);
1035
992 *(--ptr) = '\0'; 1036 *(--ptr) = '\0';
993 break; 1037 break;
994 1038
@@ -1011,15 +1055,15 @@ static void gdb_cmd_query(struct kgdb_state *ks)
1011 error_packet(remcom_out_buffer, -EINVAL); 1055 error_packet(remcom_out_buffer, -EINVAL);
1012 break; 1056 break;
1013 } 1057 }
1014 if (ks->threadid > 0) { 1058 if ((int)ks->threadid > 0) {
1015 kgdb_mem2hex(getthread(ks->linux_regs, 1059 kgdb_mem2hex(getthread(ks->linux_regs,
1016 ks->threadid)->comm, 1060 ks->threadid)->comm,
1017 remcom_out_buffer, 16); 1061 remcom_out_buffer, 16);
1018 } else { 1062 } else {
1019 static char tmpstr[23 + BUF_THREAD_ID_SIZE]; 1063 static char tmpstr[23 + BUF_THREAD_ID_SIZE];
1020 1064
1021 sprintf(tmpstr, "Shadow task %d for pid 0", 1065 sprintf(tmpstr, "shadowCPU%d",
1022 (int)(-ks->threadid-1)); 1066 (int)(-ks->threadid - 2));
1023 kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr)); 1067 kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr));
1024 } 1068 }
1025 break; 1069 break;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index d38a6436297..1aa91fd6b06 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -124,6 +124,15 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
124unsigned long nr_lock_classes; 124unsigned long nr_lock_classes;
125static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; 125static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
126 126
127static inline struct lock_class *hlock_class(struct held_lock *hlock)
128{
129 if (!hlock->class_idx) {
130 DEBUG_LOCKS_WARN_ON(1);
131 return NULL;
132 }
133 return lock_classes + hlock->class_idx - 1;
134}
135
127#ifdef CONFIG_LOCK_STAT 136#ifdef CONFIG_LOCK_STAT
128static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); 137static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
129 138
@@ -222,7 +231,7 @@ static void lock_release_holdtime(struct held_lock *hlock)
222 231
223 holdtime = sched_clock() - hlock->holdtime_stamp; 232 holdtime = sched_clock() - hlock->holdtime_stamp;
224 233
225 stats = get_lock_stats(hlock->class); 234 stats = get_lock_stats(hlock_class(hlock));
226 if (hlock->read) 235 if (hlock->read)
227 lock_time_inc(&stats->read_holdtime, holdtime); 236 lock_time_inc(&stats->read_holdtime, holdtime);
228 else 237 else
@@ -372,6 +381,19 @@ unsigned int nr_process_chains;
372unsigned int max_lockdep_depth; 381unsigned int max_lockdep_depth;
373unsigned int max_recursion_depth; 382unsigned int max_recursion_depth;
374 383
384static unsigned int lockdep_dependency_gen_id;
385
386static bool lockdep_dependency_visit(struct lock_class *source,
387 unsigned int depth)
388{
389 if (!depth)
390 lockdep_dependency_gen_id++;
391 if (source->dep_gen_id == lockdep_dependency_gen_id)
392 return true;
393 source->dep_gen_id = lockdep_dependency_gen_id;
394 return false;
395}
396
375#ifdef CONFIG_DEBUG_LOCKDEP 397#ifdef CONFIG_DEBUG_LOCKDEP
376/* 398/*
377 * We cannot printk in early bootup code. Not even early_printk() 399 * We cannot printk in early bootup code. Not even early_printk()
@@ -505,7 +527,7 @@ static void print_lockdep_cache(struct lockdep_map *lock)
505 527
506static void print_lock(struct held_lock *hlock) 528static void print_lock(struct held_lock *hlock)
507{ 529{
508 print_lock_name(hlock->class); 530 print_lock_name(hlock_class(hlock));
509 printk(", at: "); 531 printk(", at: ");
510 print_ip_sym(hlock->acquire_ip); 532 print_ip_sym(hlock->acquire_ip);
511} 533}
@@ -558,6 +580,9 @@ static void print_lock_dependencies(struct lock_class *class, int depth)
558{ 580{
559 struct lock_list *entry; 581 struct lock_list *entry;
560 582
583 if (lockdep_dependency_visit(class, depth))
584 return;
585
561 if (DEBUG_LOCKS_WARN_ON(depth >= 20)) 586 if (DEBUG_LOCKS_WARN_ON(depth >= 20))
562 return; 587 return;
563 588
@@ -932,7 +957,7 @@ static noinline int print_circular_bug_tail(void)
932 if (debug_locks_silent) 957 if (debug_locks_silent)
933 return 0; 958 return 0;
934 959
935 this.class = check_source->class; 960 this.class = hlock_class(check_source);
936 if (!save_trace(&this.trace)) 961 if (!save_trace(&this.trace))
937 return 0; 962 return 0;
938 963
@@ -959,6 +984,67 @@ static int noinline print_infinite_recursion_bug(void)
959 return 0; 984 return 0;
960} 985}
961 986
987unsigned long __lockdep_count_forward_deps(struct lock_class *class,
988 unsigned int depth)
989{
990 struct lock_list *entry;
991 unsigned long ret = 1;
992
993 if (lockdep_dependency_visit(class, depth))
994 return 0;
995
996 /*
997 * Recurse this class's dependency list:
998 */
999 list_for_each_entry(entry, &class->locks_after, entry)
1000 ret += __lockdep_count_forward_deps(entry->class, depth + 1);
1001
1002 return ret;
1003}
1004
1005unsigned long lockdep_count_forward_deps(struct lock_class *class)
1006{
1007 unsigned long ret, flags;
1008
1009 local_irq_save(flags);
1010 __raw_spin_lock(&lockdep_lock);
1011 ret = __lockdep_count_forward_deps(class, 0);
1012 __raw_spin_unlock(&lockdep_lock);
1013 local_irq_restore(flags);
1014
1015 return ret;
1016}
1017
1018unsigned long __lockdep_count_backward_deps(struct lock_class *class,
1019 unsigned int depth)
1020{
1021 struct lock_list *entry;
1022 unsigned long ret = 1;
1023
1024 if (lockdep_dependency_visit(class, depth))
1025 return 0;
1026 /*
1027 * Recurse this class's dependency list:
1028 */
1029 list_for_each_entry(entry, &class->locks_before, entry)
1030 ret += __lockdep_count_backward_deps(entry->class, depth + 1);
1031
1032 return ret;
1033}
1034
1035unsigned long lockdep_count_backward_deps(struct lock_class *class)
1036{
1037 unsigned long ret, flags;
1038
1039 local_irq_save(flags);
1040 __raw_spin_lock(&lockdep_lock);
1041 ret = __lockdep_count_backward_deps(class, 0);
1042 __raw_spin_unlock(&lockdep_lock);
1043 local_irq_restore(flags);
1044
1045 return ret;
1046}
1047
962/* 1048/*
963 * Prove that the dependency graph starting at <entry> can not 1049 * Prove that the dependency graph starting at <entry> can not
964 * lead to <target>. Print an error and return 0 if it does. 1050 * lead to <target>. Print an error and return 0 if it does.
@@ -968,6 +1054,9 @@ check_noncircular(struct lock_class *source, unsigned int depth)
968{ 1054{
969 struct lock_list *entry; 1055 struct lock_list *entry;
970 1056
1057 if (lockdep_dependency_visit(source, depth))
1058 return 1;
1059
971 debug_atomic_inc(&nr_cyclic_check_recursions); 1060 debug_atomic_inc(&nr_cyclic_check_recursions);
972 if (depth > max_recursion_depth) 1061 if (depth > max_recursion_depth)
973 max_recursion_depth = depth; 1062 max_recursion_depth = depth;
@@ -977,7 +1066,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
977 * Check this lock's dependency list: 1066 * Check this lock's dependency list:
978 */ 1067 */
979 list_for_each_entry(entry, &source->locks_after, entry) { 1068 list_for_each_entry(entry, &source->locks_after, entry) {
980 if (entry->class == check_target->class) 1069 if (entry->class == hlock_class(check_target))
981 return print_circular_bug_header(entry, depth+1); 1070 return print_circular_bug_header(entry, depth+1);
982 debug_atomic_inc(&nr_cyclic_checks); 1071 debug_atomic_inc(&nr_cyclic_checks);
983 if (!check_noncircular(entry->class, depth+1)) 1072 if (!check_noncircular(entry->class, depth+1))
@@ -1011,6 +1100,9 @@ find_usage_forwards(struct lock_class *source, unsigned int depth)
1011 struct lock_list *entry; 1100 struct lock_list *entry;
1012 int ret; 1101 int ret;
1013 1102
1103 if (lockdep_dependency_visit(source, depth))
1104 return 1;
1105
1014 if (depth > max_recursion_depth) 1106 if (depth > max_recursion_depth)
1015 max_recursion_depth = depth; 1107 max_recursion_depth = depth;
1016 if (depth >= RECURSION_LIMIT) 1108 if (depth >= RECURSION_LIMIT)
@@ -1050,6 +1142,9 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
1050 struct lock_list *entry; 1142 struct lock_list *entry;
1051 int ret; 1143 int ret;
1052 1144
1145 if (lockdep_dependency_visit(source, depth))
1146 return 1;
1147
1053 if (!__raw_spin_is_locked(&lockdep_lock)) 1148 if (!__raw_spin_is_locked(&lockdep_lock))
1054 return DEBUG_LOCKS_WARN_ON(1); 1149 return DEBUG_LOCKS_WARN_ON(1);
1055 1150
@@ -1064,6 +1159,11 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
1064 return 2; 1159 return 2;
1065 } 1160 }
1066 1161
1162 if (!source && debug_locks_off_graph_unlock()) {
1163 WARN_ON(1);
1164 return 0;
1165 }
1166
1067 /* 1167 /*
1068 * Check this lock's dependency list: 1168 * Check this lock's dependency list:
1069 */ 1169 */
@@ -1103,9 +1203,9 @@ print_bad_irq_dependency(struct task_struct *curr,
1103 printk("\nand this task is already holding:\n"); 1203 printk("\nand this task is already holding:\n");
1104 print_lock(prev); 1204 print_lock(prev);
1105 printk("which would create a new lock dependency:\n"); 1205 printk("which would create a new lock dependency:\n");
1106 print_lock_name(prev->class); 1206 print_lock_name(hlock_class(prev));
1107 printk(" ->"); 1207 printk(" ->");
1108 print_lock_name(next->class); 1208 print_lock_name(hlock_class(next));
1109 printk("\n"); 1209 printk("\n");
1110 1210
1111 printk("\nbut this new dependency connects a %s-irq-safe lock:\n", 1211 printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
@@ -1146,12 +1246,12 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
1146 1246
1147 find_usage_bit = bit_backwards; 1247 find_usage_bit = bit_backwards;
1148 /* fills in <backwards_match> */ 1248 /* fills in <backwards_match> */
1149 ret = find_usage_backwards(prev->class, 0); 1249 ret = find_usage_backwards(hlock_class(prev), 0);
1150 if (!ret || ret == 1) 1250 if (!ret || ret == 1)
1151 return ret; 1251 return ret;
1152 1252
1153 find_usage_bit = bit_forwards; 1253 find_usage_bit = bit_forwards;
1154 ret = find_usage_forwards(next->class, 0); 1254 ret = find_usage_forwards(hlock_class(next), 0);
1155 if (!ret || ret == 1) 1255 if (!ret || ret == 1)
1156 return ret; 1256 return ret;
1157 /* ret == 2 */ 1257 /* ret == 2 */
@@ -1272,18 +1372,32 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
1272 struct lockdep_map *next_instance, int read) 1372 struct lockdep_map *next_instance, int read)
1273{ 1373{
1274 struct held_lock *prev; 1374 struct held_lock *prev;
1375 struct held_lock *nest = NULL;
1275 int i; 1376 int i;
1276 1377
1277 for (i = 0; i < curr->lockdep_depth; i++) { 1378 for (i = 0; i < curr->lockdep_depth; i++) {
1278 prev = curr->held_locks + i; 1379 prev = curr->held_locks + i;
1279 if (prev->class != next->class) 1380
1381 if (prev->instance == next->nest_lock)
1382 nest = prev;
1383
1384 if (hlock_class(prev) != hlock_class(next))
1280 continue; 1385 continue;
1386
1281 /* 1387 /*
1282 * Allow read-after-read recursion of the same 1388 * Allow read-after-read recursion of the same
1283 * lock class (i.e. read_lock(lock)+read_lock(lock)): 1389 * lock class (i.e. read_lock(lock)+read_lock(lock)):
1284 */ 1390 */
1285 if ((read == 2) && prev->read) 1391 if ((read == 2) && prev->read)
1286 return 2; 1392 return 2;
1393
1394 /*
1395 * We're holding the nest_lock, which serializes this lock's
1396 * nesting behaviour.
1397 */
1398 if (nest)
1399 return 2;
1400
1287 return print_deadlock_bug(curr, prev, next); 1401 return print_deadlock_bug(curr, prev, next);
1288 } 1402 }
1289 return 1; 1403 return 1;
@@ -1329,7 +1443,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1329 */ 1443 */
1330 check_source = next; 1444 check_source = next;
1331 check_target = prev; 1445 check_target = prev;
1332 if (!(check_noncircular(next->class, 0))) 1446 if (!(check_noncircular(hlock_class(next), 0)))
1333 return print_circular_bug_tail(); 1447 return print_circular_bug_tail();
1334 1448
1335 if (!check_prev_add_irq(curr, prev, next)) 1449 if (!check_prev_add_irq(curr, prev, next))
@@ -1353,8 +1467,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1353 * chains - the second one will be new, but L1 already has 1467 * chains - the second one will be new, but L1 already has
1354 * L2 added to its dependency list, due to the first chain.) 1468 * L2 added to its dependency list, due to the first chain.)
1355 */ 1469 */
1356 list_for_each_entry(entry, &prev->class->locks_after, entry) { 1470 list_for_each_entry(entry, &hlock_class(prev)->locks_after, entry) {
1357 if (entry->class == next->class) { 1471 if (entry->class == hlock_class(next)) {
1358 if (distance == 1) 1472 if (distance == 1)
1359 entry->distance = 1; 1473 entry->distance = 1;
1360 return 2; 1474 return 2;
@@ -1365,26 +1479,28 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1365 * Ok, all validations passed, add the new lock 1479 * Ok, all validations passed, add the new lock
1366 * to the previous lock's dependency list: 1480 * to the previous lock's dependency list:
1367 */ 1481 */
1368 ret = add_lock_to_list(prev->class, next->class, 1482 ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
1369 &prev->class->locks_after, next->acquire_ip, distance); 1483 &hlock_class(prev)->locks_after,
1484 next->acquire_ip, distance);
1370 1485
1371 if (!ret) 1486 if (!ret)
1372 return 0; 1487 return 0;
1373 1488
1374 ret = add_lock_to_list(next->class, prev->class, 1489 ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
1375 &next->class->locks_before, next->acquire_ip, distance); 1490 &hlock_class(next)->locks_before,
1491 next->acquire_ip, distance);
1376 if (!ret) 1492 if (!ret)
1377 return 0; 1493 return 0;
1378 1494
1379 /* 1495 /*
1380 * Debugging printouts: 1496 * Debugging printouts:
1381 */ 1497 */
1382 if (verbose(prev->class) || verbose(next->class)) { 1498 if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
1383 graph_unlock(); 1499 graph_unlock();
1384 printk("\n new dependency: "); 1500 printk("\n new dependency: ");
1385 print_lock_name(prev->class); 1501 print_lock_name(hlock_class(prev));
1386 printk(" => "); 1502 printk(" => ");
1387 print_lock_name(next->class); 1503 print_lock_name(hlock_class(next));
1388 printk("\n"); 1504 printk("\n");
1389 dump_stack(); 1505 dump_stack();
1390 return graph_lock(); 1506 return graph_lock();
@@ -1481,7 +1597,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
1481 struct held_lock *hlock, 1597 struct held_lock *hlock,
1482 u64 chain_key) 1598 u64 chain_key)
1483{ 1599{
1484 struct lock_class *class = hlock->class; 1600 struct lock_class *class = hlock_class(hlock);
1485 struct list_head *hash_head = chainhashentry(chain_key); 1601 struct list_head *hash_head = chainhashentry(chain_key);
1486 struct lock_chain *chain; 1602 struct lock_chain *chain;
1487 struct held_lock *hlock_curr, *hlock_next; 1603 struct held_lock *hlock_curr, *hlock_next;
@@ -1554,7 +1670,7 @@ cache_hit:
1554 if (likely(cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { 1670 if (likely(cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
1555 chain->base = cn; 1671 chain->base = cn;
1556 for (j = 0; j < chain->depth - 1; j++, i++) { 1672 for (j = 0; j < chain->depth - 1; j++, i++) {
1557 int lock_id = curr->held_locks[i].class - lock_classes; 1673 int lock_id = curr->held_locks[i].class_idx - 1;
1558 chain_hlocks[chain->base + j] = lock_id; 1674 chain_hlocks[chain->base + j] = lock_id;
1559 } 1675 }
1560 chain_hlocks[chain->base + j] = class - lock_classes; 1676 chain_hlocks[chain->base + j] = class - lock_classes;
@@ -1650,7 +1766,7 @@ static void check_chain_key(struct task_struct *curr)
1650 WARN_ON(1); 1766 WARN_ON(1);
1651 return; 1767 return;
1652 } 1768 }
1653 id = hlock->class - lock_classes; 1769 id = hlock->class_idx - 1;
1654 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) 1770 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
1655 return; 1771 return;
1656 1772
@@ -1695,7 +1811,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
1695 print_lock(this); 1811 print_lock(this);
1696 1812
1697 printk("{%s} state was registered at:\n", usage_str[prev_bit]); 1813 printk("{%s} state was registered at:\n", usage_str[prev_bit]);
1698 print_stack_trace(this->class->usage_traces + prev_bit, 1); 1814 print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
1699 1815
1700 print_irqtrace_events(curr); 1816 print_irqtrace_events(curr);
1701 printk("\nother info that might help us debug this:\n"); 1817 printk("\nother info that might help us debug this:\n");
@@ -1714,7 +1830,7 @@ static inline int
1714valid_state(struct task_struct *curr, struct held_lock *this, 1830valid_state(struct task_struct *curr, struct held_lock *this,
1715 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) 1831 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
1716{ 1832{
1717 if (unlikely(this->class->usage_mask & (1 << bad_bit))) 1833 if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit)))
1718 return print_usage_bug(curr, this, bad_bit, new_bit); 1834 return print_usage_bug(curr, this, bad_bit, new_bit);
1719 return 1; 1835 return 1;
1720} 1836}
@@ -1753,7 +1869,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1753 lockdep_print_held_locks(curr); 1869 lockdep_print_held_locks(curr);
1754 1870
1755 printk("\nthe first lock's dependencies:\n"); 1871 printk("\nthe first lock's dependencies:\n");
1756 print_lock_dependencies(this->class, 0); 1872 print_lock_dependencies(hlock_class(this), 0);
1757 1873
1758 printk("\nthe second lock's dependencies:\n"); 1874 printk("\nthe second lock's dependencies:\n");
1759 print_lock_dependencies(other, 0); 1875 print_lock_dependencies(other, 0);
@@ -1776,7 +1892,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
1776 1892
1777 find_usage_bit = bit; 1893 find_usage_bit = bit;
1778 /* fills in <forwards_match> */ 1894 /* fills in <forwards_match> */
1779 ret = find_usage_forwards(this->class, 0); 1895 ret = find_usage_forwards(hlock_class(this), 0);
1780 if (!ret || ret == 1) 1896 if (!ret || ret == 1)
1781 return ret; 1897 return ret;
1782 1898
@@ -1795,7 +1911,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
1795 1911
1796 find_usage_bit = bit; 1912 find_usage_bit = bit;
1797 /* fills in <backwards_match> */ 1913 /* fills in <backwards_match> */
1798 ret = find_usage_backwards(this->class, 0); 1914 ret = find_usage_backwards(hlock_class(this), 0);
1799 if (!ret || ret == 1) 1915 if (!ret || ret == 1)
1800 return ret; 1916 return ret;
1801 1917
@@ -1861,7 +1977,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1861 LOCK_ENABLED_HARDIRQS_READ, "hard-read")) 1977 LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
1862 return 0; 1978 return 0;
1863#endif 1979#endif
1864 if (hardirq_verbose(this->class)) 1980 if (hardirq_verbose(hlock_class(this)))
1865 ret = 2; 1981 ret = 2;
1866 break; 1982 break;
1867 case LOCK_USED_IN_SOFTIRQ: 1983 case LOCK_USED_IN_SOFTIRQ:
@@ -1886,7 +2002,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1886 LOCK_ENABLED_SOFTIRQS_READ, "soft-read")) 2002 LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
1887 return 0; 2003 return 0;
1888#endif 2004#endif
1889 if (softirq_verbose(this->class)) 2005 if (softirq_verbose(hlock_class(this)))
1890 ret = 2; 2006 ret = 2;
1891 break; 2007 break;
1892 case LOCK_USED_IN_HARDIRQ_READ: 2008 case LOCK_USED_IN_HARDIRQ_READ:
@@ -1899,7 +2015,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1899 if (!check_usage_forwards(curr, this, 2015 if (!check_usage_forwards(curr, this,
1900 LOCK_ENABLED_HARDIRQS, "hard")) 2016 LOCK_ENABLED_HARDIRQS, "hard"))
1901 return 0; 2017 return 0;
1902 if (hardirq_verbose(this->class)) 2018 if (hardirq_verbose(hlock_class(this)))
1903 ret = 2; 2019 ret = 2;
1904 break; 2020 break;
1905 case LOCK_USED_IN_SOFTIRQ_READ: 2021 case LOCK_USED_IN_SOFTIRQ_READ:
@@ -1912,7 +2028,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1912 if (!check_usage_forwards(curr, this, 2028 if (!check_usage_forwards(curr, this,
1913 LOCK_ENABLED_SOFTIRQS, "soft")) 2029 LOCK_ENABLED_SOFTIRQS, "soft"))
1914 return 0; 2030 return 0;
1915 if (softirq_verbose(this->class)) 2031 if (softirq_verbose(hlock_class(this)))
1916 ret = 2; 2032 ret = 2;
1917 break; 2033 break;
1918 case LOCK_ENABLED_HARDIRQS: 2034 case LOCK_ENABLED_HARDIRQS:
@@ -1938,7 +2054,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1938 LOCK_USED_IN_HARDIRQ_READ, "hard-read")) 2054 LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
1939 return 0; 2055 return 0;
1940#endif 2056#endif
1941 if (hardirq_verbose(this->class)) 2057 if (hardirq_verbose(hlock_class(this)))
1942 ret = 2; 2058 ret = 2;
1943 break; 2059 break;
1944 case LOCK_ENABLED_SOFTIRQS: 2060 case LOCK_ENABLED_SOFTIRQS:
@@ -1964,7 +2080,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1964 LOCK_USED_IN_SOFTIRQ_READ, "soft-read")) 2080 LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
1965 return 0; 2081 return 0;
1966#endif 2082#endif
1967 if (softirq_verbose(this->class)) 2083 if (softirq_verbose(hlock_class(this)))
1968 ret = 2; 2084 ret = 2;
1969 break; 2085 break;
1970 case LOCK_ENABLED_HARDIRQS_READ: 2086 case LOCK_ENABLED_HARDIRQS_READ:
@@ -1979,7 +2095,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1979 LOCK_USED_IN_HARDIRQ, "hard")) 2095 LOCK_USED_IN_HARDIRQ, "hard"))
1980 return 0; 2096 return 0;
1981#endif 2097#endif
1982 if (hardirq_verbose(this->class)) 2098 if (hardirq_verbose(hlock_class(this)))
1983 ret = 2; 2099 ret = 2;
1984 break; 2100 break;
1985 case LOCK_ENABLED_SOFTIRQS_READ: 2101 case LOCK_ENABLED_SOFTIRQS_READ:
@@ -1994,7 +2110,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1994 LOCK_USED_IN_SOFTIRQ, "soft")) 2110 LOCK_USED_IN_SOFTIRQ, "soft"))
1995 return 0; 2111 return 0;
1996#endif 2112#endif
1997 if (softirq_verbose(this->class)) 2113 if (softirq_verbose(hlock_class(this)))
1998 ret = 2; 2114 ret = 2;
1999 break; 2115 break;
2000 default: 2116 default:
@@ -2310,7 +2426,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2310 * If already set then do not dirty the cacheline, 2426 * If already set then do not dirty the cacheline,
2311 * nor do any checks: 2427 * nor do any checks:
2312 */ 2428 */
2313 if (likely(this->class->usage_mask & new_mask)) 2429 if (likely(hlock_class(this)->usage_mask & new_mask))
2314 return 1; 2430 return 1;
2315 2431
2316 if (!graph_lock()) 2432 if (!graph_lock())
@@ -2318,14 +2434,14 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2318 /* 2434 /*
2319 * Make sure we didnt race: 2435 * Make sure we didnt race:
2320 */ 2436 */
2321 if (unlikely(this->class->usage_mask & new_mask)) { 2437 if (unlikely(hlock_class(this)->usage_mask & new_mask)) {
2322 graph_unlock(); 2438 graph_unlock();
2323 return 1; 2439 return 1;
2324 } 2440 }
2325 2441
2326 this->class->usage_mask |= new_mask; 2442 hlock_class(this)->usage_mask |= new_mask;
2327 2443
2328 if (!save_trace(this->class->usage_traces + new_bit)) 2444 if (!save_trace(hlock_class(this)->usage_traces + new_bit))
2329 return 0; 2445 return 0;
2330 2446
2331 switch (new_bit) { 2447 switch (new_bit) {
@@ -2405,7 +2521,7 @@ EXPORT_SYMBOL_GPL(lockdep_init_map);
2405 */ 2521 */
2406static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, 2522static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2407 int trylock, int read, int check, int hardirqs_off, 2523 int trylock, int read, int check, int hardirqs_off,
2408 unsigned long ip) 2524 struct lockdep_map *nest_lock, unsigned long ip)
2409{ 2525{
2410 struct task_struct *curr = current; 2526 struct task_struct *curr = current;
2411 struct lock_class *class = NULL; 2527 struct lock_class *class = NULL;
@@ -2459,10 +2575,12 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2459 return 0; 2575 return 0;
2460 2576
2461 hlock = curr->held_locks + depth; 2577 hlock = curr->held_locks + depth;
2462 2578 if (DEBUG_LOCKS_WARN_ON(!class))
2463 hlock->class = class; 2579 return 0;
2580 hlock->class_idx = class - lock_classes + 1;
2464 hlock->acquire_ip = ip; 2581 hlock->acquire_ip = ip;
2465 hlock->instance = lock; 2582 hlock->instance = lock;
2583 hlock->nest_lock = nest_lock;
2466 hlock->trylock = trylock; 2584 hlock->trylock = trylock;
2467 hlock->read = read; 2585 hlock->read = read;
2468 hlock->check = check; 2586 hlock->check = check;
@@ -2574,6 +2692,55 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
2574 return 1; 2692 return 1;
2575} 2693}
2576 2694
2695static int
2696__lock_set_subclass(struct lockdep_map *lock,
2697 unsigned int subclass, unsigned long ip)
2698{
2699 struct task_struct *curr = current;
2700 struct held_lock *hlock, *prev_hlock;
2701 struct lock_class *class;
2702 unsigned int depth;
2703 int i;
2704
2705 depth = curr->lockdep_depth;
2706 if (DEBUG_LOCKS_WARN_ON(!depth))
2707 return 0;
2708
2709 prev_hlock = NULL;
2710 for (i = depth-1; i >= 0; i--) {
2711 hlock = curr->held_locks + i;
2712 /*
2713 * We must not cross into another context:
2714 */
2715 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
2716 break;
2717 if (hlock->instance == lock)
2718 goto found_it;
2719 prev_hlock = hlock;
2720 }
2721 return print_unlock_inbalance_bug(curr, lock, ip);
2722
2723found_it:
2724 class = register_lock_class(lock, subclass, 0);
2725 hlock->class_idx = class - lock_classes + 1;
2726
2727 curr->lockdep_depth = i;
2728 curr->curr_chain_key = hlock->prev_chain_key;
2729
2730 for (; i < depth; i++) {
2731 hlock = curr->held_locks + i;
2732 if (!__lock_acquire(hlock->instance,
2733 hlock_class(hlock)->subclass, hlock->trylock,
2734 hlock->read, hlock->check, hlock->hardirqs_off,
2735 hlock->nest_lock, hlock->acquire_ip))
2736 return 0;
2737 }
2738
2739 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
2740 return 0;
2741 return 1;
2742}
2743
2577/* 2744/*
2578 * Remove the lock to the list of currently held locks in a 2745 * Remove the lock to the list of currently held locks in a
2579 * potentially non-nested (out of order) manner. This is a 2746 * potentially non-nested (out of order) manner. This is a
@@ -2624,9 +2791,9 @@ found_it:
2624 for (i++; i < depth; i++) { 2791 for (i++; i < depth; i++) {
2625 hlock = curr->held_locks + i; 2792 hlock = curr->held_locks + i;
2626 if (!__lock_acquire(hlock->instance, 2793 if (!__lock_acquire(hlock->instance,
2627 hlock->class->subclass, hlock->trylock, 2794 hlock_class(hlock)->subclass, hlock->trylock,
2628 hlock->read, hlock->check, hlock->hardirqs_off, 2795 hlock->read, hlock->check, hlock->hardirqs_off,
2629 hlock->acquire_ip)) 2796 hlock->nest_lock, hlock->acquire_ip))
2630 return 0; 2797 return 0;
2631 } 2798 }
2632 2799
@@ -2669,7 +2836,7 @@ static int lock_release_nested(struct task_struct *curr,
2669 2836
2670#ifdef CONFIG_DEBUG_LOCKDEP 2837#ifdef CONFIG_DEBUG_LOCKDEP
2671 hlock->prev_chain_key = 0; 2838 hlock->prev_chain_key = 0;
2672 hlock->class = NULL; 2839 hlock->class_idx = 0;
2673 hlock->acquire_ip = 0; 2840 hlock->acquire_ip = 0;
2674 hlock->irq_context = 0; 2841 hlock->irq_context = 0;
2675#endif 2842#endif
@@ -2738,18 +2905,36 @@ static void check_flags(unsigned long flags)
2738#endif 2905#endif
2739} 2906}
2740 2907
2908void
2909lock_set_subclass(struct lockdep_map *lock,
2910 unsigned int subclass, unsigned long ip)
2911{
2912 unsigned long flags;
2913
2914 if (unlikely(current->lockdep_recursion))
2915 return;
2916
2917 raw_local_irq_save(flags);
2918 current->lockdep_recursion = 1;
2919 check_flags(flags);
2920 if (__lock_set_subclass(lock, subclass, ip))
2921 check_chain_key(current);
2922 current->lockdep_recursion = 0;
2923 raw_local_irq_restore(flags);
2924}
2925
2926EXPORT_SYMBOL_GPL(lock_set_subclass);
2927
2741/* 2928/*
2742 * We are not always called with irqs disabled - do that here, 2929 * We are not always called with irqs disabled - do that here,
2743 * and also avoid lockdep recursion: 2930 * and also avoid lockdep recursion:
2744 */ 2931 */
2745void lock_acquire(struct lockdep_map *lock, unsigned int subclass, 2932void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2746 int trylock, int read, int check, unsigned long ip) 2933 int trylock, int read, int check,
2934 struct lockdep_map *nest_lock, unsigned long ip)
2747{ 2935{
2748 unsigned long flags; 2936 unsigned long flags;
2749 2937
2750 if (unlikely(!lock_stat && !prove_locking))
2751 return;
2752
2753 if (unlikely(current->lockdep_recursion)) 2938 if (unlikely(current->lockdep_recursion))
2754 return; 2939 return;
2755 2940
@@ -2758,7 +2943,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2758 2943
2759 current->lockdep_recursion = 1; 2944 current->lockdep_recursion = 1;
2760 __lock_acquire(lock, subclass, trylock, read, check, 2945 __lock_acquire(lock, subclass, trylock, read, check,
2761 irqs_disabled_flags(flags), ip); 2946 irqs_disabled_flags(flags), nest_lock, ip);
2762 current->lockdep_recursion = 0; 2947 current->lockdep_recursion = 0;
2763 raw_local_irq_restore(flags); 2948 raw_local_irq_restore(flags);
2764} 2949}
@@ -2770,9 +2955,6 @@ void lock_release(struct lockdep_map *lock, int nested,
2770{ 2955{
2771 unsigned long flags; 2956 unsigned long flags;
2772 2957
2773 if (unlikely(!lock_stat && !prove_locking))
2774 return;
2775
2776 if (unlikely(current->lockdep_recursion)) 2958 if (unlikely(current->lockdep_recursion))
2777 return; 2959 return;
2778 2960
@@ -2845,9 +3027,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
2845found_it: 3027found_it:
2846 hlock->waittime_stamp = sched_clock(); 3028 hlock->waittime_stamp = sched_clock();
2847 3029
2848 point = lock_contention_point(hlock->class, ip); 3030 point = lock_contention_point(hlock_class(hlock), ip);
2849 3031
2850 stats = get_lock_stats(hlock->class); 3032 stats = get_lock_stats(hlock_class(hlock));
2851 if (point < ARRAY_SIZE(stats->contention_point)) 3033 if (point < ARRAY_SIZE(stats->contention_point))
2852 stats->contention_point[i]++; 3034 stats->contention_point[i]++;
2853 if (lock->cpu != smp_processor_id()) 3035 if (lock->cpu != smp_processor_id())
@@ -2893,7 +3075,7 @@ found_it:
2893 hlock->holdtime_stamp = now; 3075 hlock->holdtime_stamp = now;
2894 } 3076 }
2895 3077
2896 stats = get_lock_stats(hlock->class); 3078 stats = get_lock_stats(hlock_class(hlock));
2897 if (waittime) { 3079 if (waittime) {
2898 if (hlock->read) 3080 if (hlock->read)
2899 lock_time_inc(&stats->read_waittime, waittime); 3081 lock_time_inc(&stats->read_waittime, waittime);
@@ -2988,6 +3170,7 @@ static void zap_class(struct lock_class *class)
2988 list_del_rcu(&class->hash_entry); 3170 list_del_rcu(&class->hash_entry);
2989 list_del_rcu(&class->lock_entry); 3171 list_del_rcu(&class->lock_entry);
2990 3172
3173 class->key = NULL;
2991} 3174}
2992 3175
2993static inline int within(const void *addr, void *start, unsigned long size) 3176static inline int within(const void *addr, void *start, unsigned long size)
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index c3600a091a2..55db193d366 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -17,9 +17,6 @@
17 */ 17 */
18#define MAX_LOCKDEP_ENTRIES 8192UL 18#define MAX_LOCKDEP_ENTRIES 8192UL
19 19
20#define MAX_LOCKDEP_KEYS_BITS 11
21#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
22
23#define MAX_LOCKDEP_CHAINS_BITS 14 20#define MAX_LOCKDEP_CHAINS_BITS 14
24#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) 21#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
25 22
@@ -53,6 +50,9 @@ extern unsigned int nr_process_chains;
53extern unsigned int max_lockdep_depth; 50extern unsigned int max_lockdep_depth;
54extern unsigned int max_recursion_depth; 51extern unsigned int max_recursion_depth;
55 52
53extern unsigned long lockdep_count_forward_deps(struct lock_class *);
54extern unsigned long lockdep_count_backward_deps(struct lock_class *);
55
56#ifdef CONFIG_DEBUG_LOCKDEP 56#ifdef CONFIG_DEBUG_LOCKDEP
57/* 57/*
58 * Various lockdep statistics: 58 * Various lockdep statistics:
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 9b0e940e254..fa19aee604c 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -63,34 +63,6 @@ static void l_stop(struct seq_file *m, void *v)
63{ 63{
64} 64}
65 65
66static unsigned long count_forward_deps(struct lock_class *class)
67{
68 struct lock_list *entry;
69 unsigned long ret = 1;
70
71 /*
72 * Recurse this class's dependency list:
73 */
74 list_for_each_entry(entry, &class->locks_after, entry)
75 ret += count_forward_deps(entry->class);
76
77 return ret;
78}
79
80static unsigned long count_backward_deps(struct lock_class *class)
81{
82 struct lock_list *entry;
83 unsigned long ret = 1;
84
85 /*
86 * Recurse this class's dependency list:
87 */
88 list_for_each_entry(entry, &class->locks_before, entry)
89 ret += count_backward_deps(entry->class);
90
91 return ret;
92}
93
94static void print_name(struct seq_file *m, struct lock_class *class) 66static void print_name(struct seq_file *m, struct lock_class *class)
95{ 67{
96 char str[128]; 68 char str[128];
@@ -124,10 +96,10 @@ static int l_show(struct seq_file *m, void *v)
124#ifdef CONFIG_DEBUG_LOCKDEP 96#ifdef CONFIG_DEBUG_LOCKDEP
125 seq_printf(m, " OPS:%8ld", class->ops); 97 seq_printf(m, " OPS:%8ld", class->ops);
126#endif 98#endif
127 nr_forward_deps = count_forward_deps(class); 99 nr_forward_deps = lockdep_count_forward_deps(class);
128 seq_printf(m, " FD:%5ld", nr_forward_deps); 100 seq_printf(m, " FD:%5ld", nr_forward_deps);
129 101
130 nr_backward_deps = count_backward_deps(class); 102 nr_backward_deps = lockdep_count_backward_deps(class);
131 seq_printf(m, " BD:%5ld", nr_backward_deps); 103 seq_printf(m, " BD:%5ld", nr_backward_deps);
132 104
133 get_usage_chars(class, &c1, &c2, &c3, &c4); 105 get_usage_chars(class, &c1, &c2, &c3, &c4);
@@ -229,6 +201,9 @@ static int lc_show(struct seq_file *m, void *v)
229 201
230 for (i = 0; i < chain->depth; i++) { 202 for (i = 0; i < chain->depth; i++) {
231 class = lock_chain_get_class(chain, i); 203 class = lock_chain_get_class(chain, i);
204 if (!class->key)
205 continue;
206
232 seq_printf(m, "[%p] ", class->key); 207 seq_printf(m, "[%p] ", class->key);
233 print_name(m, class); 208 print_name(m, class);
234 seq_puts(m, "\n"); 209 seq_puts(m, "\n");
@@ -350,7 +325,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
350 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 325 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
351 nr_hardirq_read_unsafe++; 326 nr_hardirq_read_unsafe++;
352 327
353 sum_forward_deps += count_forward_deps(class); 328 sum_forward_deps += lockdep_count_forward_deps(class);
354 } 329 }
355#ifdef CONFIG_DEBUG_LOCKDEP 330#ifdef CONFIG_DEBUG_LOCKDEP
356 DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused); 331 DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
diff --git a/kernel/marker.c b/kernel/marker.c
index 971da531790..7d1faecd7a5 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -126,6 +126,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
126 struct marker_probe_closure *multi; 126 struct marker_probe_closure *multi;
127 int i; 127 int i;
128 /* 128 /*
129 * Read mdata->ptype before mdata->multi.
130 */
131 smp_rmb();
132 multi = mdata->multi;
133 /*
129 * multi points to an array, therefore accessing the array 134 * multi points to an array, therefore accessing the array
130 * depends on reading multi. However, even in this case, 135 * depends on reading multi. However, even in this case,
131 * we must insure that the pointer is read _before_ the array 136 * we must insure that the pointer is read _before_ the array
@@ -133,7 +138,6 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
133 * in the fast path, so put the explicit barrier here. 138 * in the fast path, so put the explicit barrier here.
134 */ 139 */
135 smp_read_barrier_depends(); 140 smp_read_barrier_depends();
136 multi = mdata->multi;
137 for (i = 0; multi[i].func; i++) { 141 for (i = 0; multi[i].func; i++) {
138 va_start(args, call_private); 142 va_start(args, call_private);
139 multi[i].func(multi[i].probe_private, call_private, 143 multi[i].func(multi[i].probe_private, call_private,
@@ -175,6 +179,11 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
175 struct marker_probe_closure *multi; 179 struct marker_probe_closure *multi;
176 int i; 180 int i;
177 /* 181 /*
182 * Read mdata->ptype before mdata->multi.
183 */
184 smp_rmb();
185 multi = mdata->multi;
186 /*
178 * multi points to an array, therefore accessing the array 187 * multi points to an array, therefore accessing the array
179 * depends on reading multi. However, even in this case, 188 * depends on reading multi. However, even in this case,
180 * we must insure that the pointer is read _before_ the array 189 * we must insure that the pointer is read _before_ the array
@@ -182,7 +191,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
182 * in the fast path, so put the explicit barrier here. 191 * in the fast path, so put the explicit barrier here.
183 */ 192 */
184 smp_read_barrier_depends(); 193 smp_read_barrier_depends();
185 multi = mdata->multi;
186 for (i = 0; multi[i].func; i++) 194 for (i = 0; multi[i].func; i++)
187 multi[i].func(multi[i].probe_private, call_private, 195 multi[i].func(multi[i].probe_private, call_private,
188 mdata->format, &args); 196 mdata->format, &args);
diff --git a/kernel/module.c b/kernel/module.c
index 61d212120df..08864d257eb 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2288,7 +2288,7 @@ sys_init_module(void __user *umod,
2288 2288
2289 /* Start the module */ 2289 /* Start the module */
2290 if (mod->init != NULL) 2290 if (mod->init != NULL)
2291 ret = mod->init(); 2291 ret = do_one_initcall(mod->init);
2292 if (ret < 0) { 2292 if (ret < 0) {
2293 /* Init routine failed: abort. Try to protect us from 2293 /* Init routine failed: abort. Try to protect us from
2294 buggy refcounters. */ 2294 buggy refcounters. */
diff --git a/kernel/mutex.c b/kernel/mutex.c
index bcdc9ac8ef6..12c779dc65d 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -34,6 +34,7 @@
34/*** 34/***
35 * mutex_init - initialize the mutex 35 * mutex_init - initialize the mutex
36 * @lock: the mutex to be initialized 36 * @lock: the mutex to be initialized
37 * @key: the lock_class_key for the class; used by mutex lock debugging
37 * 38 *
38 * Initialize the mutex to unlocked state. 39 * Initialize the mutex to unlocked state.
39 * 40 *
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 8cb75702638..da9c2dda6a4 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -24,7 +24,7 @@
24 * requirement that the application has is cleaned up when closes the file 24 * requirement that the application has is cleaned up when closes the file
25 * pointer or exits the pm_qos_object will get an opportunity to clean up. 25 * pointer or exits the pm_qos_object will get an opportunity to clean up.
26 * 26 *
27 * mark gross mgross@linux.intel.com 27 * Mark Gross <mgross@linux.intel.com>
28 */ 28 */
29 29
30#include <linux/pm_qos_params.h> 30#include <linux/pm_qos_params.h>
@@ -211,8 +211,8 @@ EXPORT_SYMBOL_GPL(pm_qos_requirement);
211 * @value: defines the qos request 211 * @value: defines the qos request
212 * 212 *
213 * This function inserts a new entry in the pm_qos_class list of requested qos 213 * This function inserts a new entry in the pm_qos_class list of requested qos
214 * performance charactoistics. It recomputes the agregate QoS expectations for 214 * performance characteristics. It recomputes the aggregate QoS expectations
215 * the pm_qos_class of parrameters. 215 * for the pm_qos_class of parameters.
216 */ 216 */
217int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value) 217int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
218{ 218{
@@ -250,10 +250,10 @@ EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
250 * @name: identifies the request 250 * @name: identifies the request
251 * @value: defines the qos request 251 * @value: defines the qos request
252 * 252 *
253 * Updates an existing qos requierement for the pm_qos_class of parameters along 253 * Updates an existing qos requirement for the pm_qos_class of parameters along
254 * with updating the target pm_qos_class value. 254 * with updating the target pm_qos_class value.
255 * 255 *
256 * If the named request isn't in the lest then no change is made. 256 * If the named request isn't in the list then no change is made.
257 */ 257 */
258int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value) 258int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
259{ 259{
@@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
287 * @pm_qos_class: identifies which list of qos request to us 287 * @pm_qos_class: identifies which list of qos request to us
288 * @name: identifies the request 288 * @name: identifies the request
289 * 289 *
290 * Will remove named qos request from pm_qos_class list of parrameters and 290 * Will remove named qos request from pm_qos_class list of parameters and
291 * recompute the current target value for the pm_qos_class. 291 * recompute the current target value for the pm_qos_class.
292 */ 292 */
293void pm_qos_remove_requirement(int pm_qos_class, char *name) 293void pm_qos_remove_requirement(int pm_qos_class, char *name)
@@ -319,7 +319,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
319 * @notifier: notifier block managed by caller. 319 * @notifier: notifier block managed by caller.
320 * 320 *
321 * will register the notifier into a notification chain that gets called 321 * will register the notifier into a notification chain that gets called
322 * uppon changes to the pm_qos_class target value. 322 * upon changes to the pm_qos_class target value.
323 */ 323 */
324 int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) 324 int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
325{ 325{
@@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(pm_qos_add_notifier);
338 * @notifier: notifier block to be removed. 338 * @notifier: notifier block to be removed.
339 * 339 *
340 * will remove the notifier from the notification chain that gets called 340 * will remove the notifier from the notification chain that gets called
341 * uppon changes to the pm_qos_class target value. 341 * upon changes to the pm_qos_class target value.
342 */ 342 */
343int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) 343int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
344{ 344{
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 9a21681aa80..e36d5798cbf 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -289,21 +289,29 @@ void do_schedule_next_timer(struct siginfo *info)
289 else 289 else
290 schedule_next_timer(timr); 290 schedule_next_timer(timr);
291 291
292 info->si_overrun = timr->it_overrun_last; 292 info->si_overrun += timr->it_overrun_last;
293 } 293 }
294 294
295 if (timr) 295 if (timr)
296 unlock_timer(timr, flags); 296 unlock_timer(timr, flags);
297} 297}
298 298
299int posix_timer_event(struct k_itimer *timr,int si_private) 299int posix_timer_event(struct k_itimer *timr, int si_private)
300{ 300{
301 memset(&timr->sigq->info, 0, sizeof(siginfo_t)); 301 /*
302 * FIXME: if ->sigq is queued we can race with
303 * dequeue_signal()->do_schedule_next_timer().
304 *
305 * If dequeue_signal() sees the "right" value of
306 * si_sys_private it calls do_schedule_next_timer().
307 * We re-queue ->sigq and drop ->it_lock().
308 * do_schedule_next_timer() locks the timer
309 * and re-schedules it while ->sigq is pending.
310 * Not really bad, but not that we want.
311 */
302 timr->sigq->info.si_sys_private = si_private; 312 timr->sigq->info.si_sys_private = si_private;
303 /* Send signal to the process that owns this timer.*/
304 313
305 timr->sigq->info.si_signo = timr->it_sigev_signo; 314 timr->sigq->info.si_signo = timr->it_sigev_signo;
306 timr->sigq->info.si_errno = 0;
307 timr->sigq->info.si_code = SI_TIMER; 315 timr->sigq->info.si_code = SI_TIMER;
308 timr->sigq->info.si_tid = timr->it_id; 316 timr->sigq->info.si_tid = timr->it_id;
309 timr->sigq->info.si_value = timr->it_sigev_value; 317 timr->sigq->info.si_value = timr->it_sigev_value;
@@ -435,6 +443,7 @@ static struct k_itimer * alloc_posix_timer(void)
435 kmem_cache_free(posix_timers_cache, tmr); 443 kmem_cache_free(posix_timers_cache, tmr);
436 tmr = NULL; 444 tmr = NULL;
437 } 445 }
446 memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
438 return tmr; 447 return tmr;
439} 448}
440 449
diff --git a/kernel/printk.c b/kernel/printk.c
index a7f7559c5f6..b51b1567bb5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1309,14 +1309,14 @@ void tty_write_message(struct tty_struct *tty, char *msg)
1309 1309
1310#if defined CONFIG_PRINTK 1310#if defined CONFIG_PRINTK
1311 1311
1312DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
1313/* 1312/*
1314 * printk rate limiting, lifted from the networking subsystem. 1313 * printk rate limiting, lifted from the networking subsystem.
1315 * 1314 *
1316 * This enforces a rate limit: not more than one kernel message 1315 * This enforces a rate limit: not more than 10 kernel messages
1317 * every printk_ratelimit_jiffies to make a denial-of-service 1316 * every 5s to make a denial-of-service attack impossible.
1318 * attack impossible.
1319 */ 1317 */
1318DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
1319
1320int printk_ratelimit(void) 1320int printk_ratelimit(void)
1321{ 1321{
1322 return __ratelimit(&printk_ratelimit_state); 1322 return __ratelimit(&printk_ratelimit_state);
diff --git a/kernel/relay.c b/kernel/relay.c
index 04006ef970b..8d13a7855c0 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -944,6 +944,10 @@ static void relay_file_read_consume(struct rchan_buf *buf,
944 size_t n_subbufs = buf->chan->n_subbufs; 944 size_t n_subbufs = buf->chan->n_subbufs;
945 size_t read_subbuf; 945 size_t read_subbuf;
946 946
947 if (buf->subbufs_produced == buf->subbufs_consumed &&
948 buf->offset == buf->bytes_consumed)
949 return;
950
947 if (buf->bytes_consumed + bytes_consumed > subbuf_size) { 951 if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
948 relay_subbufs_consumed(buf->chan, buf->cpu, 1); 952 relay_subbufs_consumed(buf->chan, buf->cpu, 1);
949 buf->bytes_consumed = 0; 953 buf->bytes_consumed = 0;
@@ -975,6 +979,8 @@ static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
975 979
976 relay_file_read_consume(buf, read_pos, 0); 980 relay_file_read_consume(buf, read_pos, 0);
977 981
982 consumed = buf->subbufs_consumed;
983
978 if (unlikely(buf->offset > subbuf_size)) { 984 if (unlikely(buf->offset > subbuf_size)) {
979 if (produced == consumed) 985 if (produced == consumed)
980 return 0; 986 return 0;
@@ -993,8 +999,12 @@ static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
993 if (consumed > produced) 999 if (consumed > produced)
994 produced += n_subbufs * subbuf_size; 1000 produced += n_subbufs * subbuf_size;
995 1001
996 if (consumed == produced) 1002 if (consumed == produced) {
1003 if (buf->offset == subbuf_size &&
1004 buf->subbufs_produced > buf->subbufs_consumed)
1005 return 1;
997 return 0; 1006 return 0;
1007 }
998 1008
999 return 1; 1009 return 1;
1000} 1010}
diff --git a/kernel/resource.c b/kernel/resource.c
index 74af2d7cb5a..f5b518eabef 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -490,7 +490,7 @@ resource_size_t resource_alignment(struct resource *res)
490{ 490{
491 switch (res->flags & (IORESOURCE_SIZEALIGN | IORESOURCE_STARTALIGN)) { 491 switch (res->flags & (IORESOURCE_SIZEALIGN | IORESOURCE_STARTALIGN)) {
492 case IORESOURCE_SIZEALIGN: 492 case IORESOURCE_SIZEALIGN:
493 return res->end - res->start + 1; 493 return resource_size(res);
494 case IORESOURCE_STARTALIGN: 494 case IORESOURCE_STARTALIGN:
495 return res->start; 495 return res->start;
496 default: 496 default:
diff --git a/kernel/sched.c b/kernel/sched.c
index 0236958addc..d601fb0406c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -600,7 +600,6 @@ struct rq {
600 /* BKL stats */ 600 /* BKL stats */
601 unsigned int bkl_count; 601 unsigned int bkl_count;
602#endif 602#endif
603 struct lock_class_key rq_lock_key;
604}; 603};
605 604
606static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 605static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -834,7 +833,7 @@ static inline u64 global_rt_period(void)
834 833
835static inline u64 global_rt_runtime(void) 834static inline u64 global_rt_runtime(void)
836{ 835{
837 if (sysctl_sched_rt_period < 0) 836 if (sysctl_sched_rt_runtime < 0)
838 return RUNTIME_INF; 837 return RUNTIME_INF;
839 838
840 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 839 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
@@ -2759,10 +2758,10 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
2759 } else { 2758 } else {
2760 if (rq1 < rq2) { 2759 if (rq1 < rq2) {
2761 spin_lock(&rq1->lock); 2760 spin_lock(&rq1->lock);
2762 spin_lock(&rq2->lock); 2761 spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
2763 } else { 2762 } else {
2764 spin_lock(&rq2->lock); 2763 spin_lock(&rq2->lock);
2765 spin_lock(&rq1->lock); 2764 spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
2766 } 2765 }
2767 } 2766 }
2768 update_rq_clock(rq1); 2767 update_rq_clock(rq1);
@@ -2805,14 +2804,21 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
2805 if (busiest < this_rq) { 2804 if (busiest < this_rq) {
2806 spin_unlock(&this_rq->lock); 2805 spin_unlock(&this_rq->lock);
2807 spin_lock(&busiest->lock); 2806 spin_lock(&busiest->lock);
2808 spin_lock(&this_rq->lock); 2807 spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
2809 ret = 1; 2808 ret = 1;
2810 } else 2809 } else
2811 spin_lock(&busiest->lock); 2810 spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
2812 } 2811 }
2813 return ret; 2812 return ret;
2814} 2813}
2815 2814
2815static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
2816 __releases(busiest->lock)
2817{
2818 spin_unlock(&busiest->lock);
2819 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
2820}
2821
2816/* 2822/*
2817 * If dest_cpu is allowed for this process, migrate the task to it. 2823 * If dest_cpu is allowed for this process, migrate the task to it.
2818 * This is accomplished by forcing the cpu_allowed mask to only 2824 * This is accomplished by forcing the cpu_allowed mask to only
@@ -3637,7 +3643,7 @@ redo:
3637 ld_moved = move_tasks(this_rq, this_cpu, busiest, 3643 ld_moved = move_tasks(this_rq, this_cpu, busiest,
3638 imbalance, sd, CPU_NEWLY_IDLE, 3644 imbalance, sd, CPU_NEWLY_IDLE,
3639 &all_pinned); 3645 &all_pinned);
3640 spin_unlock(&busiest->lock); 3646 double_unlock_balance(this_rq, busiest);
3641 3647
3642 if (unlikely(all_pinned)) { 3648 if (unlikely(all_pinned)) {
3643 cpu_clear(cpu_of(busiest), *cpus); 3649 cpu_clear(cpu_of(busiest), *cpus);
@@ -3752,7 +3758,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3752 else 3758 else
3753 schedstat_inc(sd, alb_failed); 3759 schedstat_inc(sd, alb_failed);
3754 } 3760 }
3755 spin_unlock(&target_rq->lock); 3761 double_unlock_balance(busiest_rq, target_rq);
3756} 3762}
3757 3763
3758#ifdef CONFIG_NO_HZ 3764#ifdef CONFIG_NO_HZ
@@ -5004,19 +5010,21 @@ recheck:
5004 return -EPERM; 5010 return -EPERM;
5005 } 5011 }
5006 5012
5013 if (user) {
5007#ifdef CONFIG_RT_GROUP_SCHED 5014#ifdef CONFIG_RT_GROUP_SCHED
5008 /* 5015 /*
5009 * Do not allow realtime tasks into groups that have no runtime 5016 * Do not allow realtime tasks into groups that have no runtime
5010 * assigned. 5017 * assigned.
5011 */ 5018 */
5012 if (user 5019 if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
5013 && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) 5020 return -EPERM;
5014 return -EPERM;
5015#endif 5021#endif
5016 5022
5017 retval = security_task_setscheduler(p, policy, param); 5023 retval = security_task_setscheduler(p, policy, param);
5018 if (retval) 5024 if (retval)
5019 return retval; 5025 return retval;
5026 }
5027
5020 /* 5028 /*
5021 * make sure no PI-waiters arrive (or leave) while we are 5029 * make sure no PI-waiters arrive (or leave) while we are
5022 * changing the priority of the task: 5030 * changing the priority of the task:
@@ -7671,34 +7679,34 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
7671} 7679}
7672 7680
7673#ifdef CONFIG_SCHED_MC 7681#ifdef CONFIG_SCHED_MC
7674static ssize_t sched_mc_power_savings_show(struct sys_device *dev, 7682static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
7675 struct sysdev_attribute *attr, char *page) 7683 char *page)
7676{ 7684{
7677 return sprintf(page, "%u\n", sched_mc_power_savings); 7685 return sprintf(page, "%u\n", sched_mc_power_savings);
7678} 7686}
7679static ssize_t sched_mc_power_savings_store(struct sys_device *dev, 7687static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
7680 struct sysdev_attribute *attr,
7681 const char *buf, size_t count) 7688 const char *buf, size_t count)
7682{ 7689{
7683 return sched_power_savings_store(buf, count, 0); 7690 return sched_power_savings_store(buf, count, 0);
7684} 7691}
7685static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, 7692static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644,
7686 sched_mc_power_savings_store); 7693 sched_mc_power_savings_show,
7694 sched_mc_power_savings_store);
7687#endif 7695#endif
7688 7696
7689#ifdef CONFIG_SCHED_SMT 7697#ifdef CONFIG_SCHED_SMT
7690static ssize_t sched_smt_power_savings_show(struct sys_device *dev, 7698static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
7691 struct sysdev_attribute *attr, char *page) 7699 char *page)
7692{ 7700{
7693 return sprintf(page, "%u\n", sched_smt_power_savings); 7701 return sprintf(page, "%u\n", sched_smt_power_savings);
7694} 7702}
7695static ssize_t sched_smt_power_savings_store(struct sys_device *dev, 7703static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
7696 struct sysdev_attribute *attr,
7697 const char *buf, size_t count) 7704 const char *buf, size_t count)
7698{ 7705{
7699 return sched_power_savings_store(buf, count, 1); 7706 return sched_power_savings_store(buf, count, 1);
7700} 7707}
7701static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, 7708static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644,
7709 sched_smt_power_savings_show,
7702 sched_smt_power_savings_store); 7710 sched_smt_power_savings_store);
7703#endif 7711#endif
7704 7712
@@ -7998,7 +8006,6 @@ void __init sched_init(void)
7998 8006
7999 rq = cpu_rq(i); 8007 rq = cpu_rq(i);
8000 spin_lock_init(&rq->lock); 8008 spin_lock_init(&rq->lock);
8001 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
8002 rq->nr_running = 0; 8009 rq->nr_running = 0;
8003 init_cfs_rq(&rq->cfs, rq); 8010 init_cfs_rq(&rq->cfs, rq);
8004 init_rt_rq(&rq->rt, rq); 8011 init_rt_rq(&rq->rt, rq);
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 22ed55d1167..204991a0bfa 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -32,13 +32,19 @@
32#include <linux/ktime.h> 32#include <linux/ktime.h>
33#include <linux/module.h> 33#include <linux/module.h>
34 34
35/*
36 * Scheduler clock - returns current time in nanosec units.
37 * This is default implementation.
38 * Architectures and sub-architectures can override this.
39 */
40unsigned long long __attribute__((weak)) sched_clock(void)
41{
42 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
43}
35 44
36#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 45static __read_mostly int sched_clock_running;
37 46
38#define MULTI_SHIFT 15 47#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
39/* Max is double, Min is 1/2 */
40#define MAX_MULTI (2LL << MULTI_SHIFT)
41#define MIN_MULTI (1LL << (MULTI_SHIFT-1))
42 48
43struct sched_clock_data { 49struct sched_clock_data {
44 /* 50 /*
@@ -49,14 +55,9 @@ struct sched_clock_data {
49 raw_spinlock_t lock; 55 raw_spinlock_t lock;
50 56
51 unsigned long tick_jiffies; 57 unsigned long tick_jiffies;
52 u64 prev_raw;
53 u64 tick_raw; 58 u64 tick_raw;
54 u64 tick_gtod; 59 u64 tick_gtod;
55 u64 clock; 60 u64 clock;
56 s64 multi;
57#ifdef CONFIG_NO_HZ
58 int check_max;
59#endif
60}; 61};
61 62
62static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); 63static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
@@ -71,8 +72,6 @@ static inline struct sched_clock_data *cpu_sdc(int cpu)
71 return &per_cpu(sched_clock_data, cpu); 72 return &per_cpu(sched_clock_data, cpu);
72} 73}
73 74
74static __read_mostly int sched_clock_running;
75
76void sched_clock_init(void) 75void sched_clock_init(void)
77{ 76{
78 u64 ktime_now = ktime_to_ns(ktime_get()); 77 u64 ktime_now = ktime_to_ns(ktime_get());
@@ -84,90 +83,39 @@ void sched_clock_init(void)
84 83
85 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 84 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
86 scd->tick_jiffies = now_jiffies; 85 scd->tick_jiffies = now_jiffies;
87 scd->prev_raw = 0;
88 scd->tick_raw = 0; 86 scd->tick_raw = 0;
89 scd->tick_gtod = ktime_now; 87 scd->tick_gtod = ktime_now;
90 scd->clock = ktime_now; 88 scd->clock = ktime_now;
91 scd->multi = 1 << MULTI_SHIFT;
92#ifdef CONFIG_NO_HZ
93 scd->check_max = 1;
94#endif
95 } 89 }
96 90
97 sched_clock_running = 1; 91 sched_clock_running = 1;
98} 92}
99 93
100#ifdef CONFIG_NO_HZ
101/*
102 * The dynamic ticks makes the delta jiffies inaccurate. This
103 * prevents us from checking the maximum time update.
104 * Disable the maximum check during stopped ticks.
105 */
106void sched_clock_tick_stop(int cpu)
107{
108 struct sched_clock_data *scd = cpu_sdc(cpu);
109
110 scd->check_max = 0;
111}
112
113void sched_clock_tick_start(int cpu)
114{
115 struct sched_clock_data *scd = cpu_sdc(cpu);
116
117 scd->check_max = 1;
118}
119
120static int check_max(struct sched_clock_data *scd)
121{
122 return scd->check_max;
123}
124#else
125static int check_max(struct sched_clock_data *scd)
126{
127 return 1;
128}
129#endif /* CONFIG_NO_HZ */
130
131/* 94/*
132 * update the percpu scd from the raw @now value 95 * update the percpu scd from the raw @now value
133 * 96 *
134 * - filter out backward motion 97 * - filter out backward motion
135 * - use jiffies to generate a min,max window to clip the raw values 98 * - use jiffies to generate a min,max window to clip the raw values
136 */ 99 */
137static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *time) 100static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
138{ 101{
139 unsigned long now_jiffies = jiffies; 102 unsigned long now_jiffies = jiffies;
140 long delta_jiffies = now_jiffies - scd->tick_jiffies; 103 long delta_jiffies = now_jiffies - scd->tick_jiffies;
141 u64 clock = scd->clock; 104 u64 clock = scd->clock;
142 u64 min_clock, max_clock; 105 u64 min_clock, max_clock;
143 s64 delta = now - scd->prev_raw; 106 s64 delta = now - scd->tick_raw;
144 107
145 WARN_ON_ONCE(!irqs_disabled()); 108 WARN_ON_ONCE(!irqs_disabled());
146 109 min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
147 /*
148 * At schedule tick the clock can be just under the gtod. We don't
149 * want to push it too prematurely.
150 */
151 min_clock = scd->tick_gtod + (delta_jiffies * TICK_NSEC);
152 if (min_clock > TICK_NSEC)
153 min_clock -= TICK_NSEC / 2;
154 110
155 if (unlikely(delta < 0)) { 111 if (unlikely(delta < 0)) {
156 clock++; 112 clock++;
157 goto out; 113 goto out;
158 } 114 }
159 115
160 /* 116 max_clock = min_clock + TICK_NSEC;
161 * The clock must stay within a jiffie of the gtod.
162 * But since we may be at the start of a jiffy or the end of one
163 * we add another jiffy buffer.
164 */
165 max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC;
166
167 delta *= scd->multi;
168 delta >>= MULTI_SHIFT;
169 117
170 if (unlikely(clock + delta > max_clock) && check_max(scd)) { 118 if (unlikely(clock + delta > max_clock)) {
171 if (clock < max_clock) 119 if (clock < max_clock)
172 clock = max_clock; 120 clock = max_clock;
173 else 121 else
@@ -180,12 +128,10 @@ static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *tim
180 if (unlikely(clock < min_clock)) 128 if (unlikely(clock < min_clock))
181 clock = min_clock; 129 clock = min_clock;
182 130
183 if (time) 131 scd->tick_jiffies = now_jiffies;
184 *time = clock; 132 scd->clock = clock;
185 else { 133
186 scd->prev_raw = now; 134 return clock;
187 scd->clock = clock;
188 }
189} 135}
190 136
191static void lock_double_clock(struct sched_clock_data *data1, 137static void lock_double_clock(struct sched_clock_data *data1,
@@ -203,7 +149,7 @@ static void lock_double_clock(struct sched_clock_data *data1,
203u64 sched_clock_cpu(int cpu) 149u64 sched_clock_cpu(int cpu)
204{ 150{
205 struct sched_clock_data *scd = cpu_sdc(cpu); 151 struct sched_clock_data *scd = cpu_sdc(cpu);
206 u64 now, clock; 152 u64 now, clock, this_clock, remote_clock;
207 153
208 if (unlikely(!sched_clock_running)) 154 if (unlikely(!sched_clock_running))
209 return 0ull; 155 return 0ull;
@@ -212,43 +158,44 @@ u64 sched_clock_cpu(int cpu)
212 now = sched_clock(); 158 now = sched_clock();
213 159
214 if (cpu != raw_smp_processor_id()) { 160 if (cpu != raw_smp_processor_id()) {
215 /*
216 * in order to update a remote cpu's clock based on our
217 * unstable raw time rebase it against:
218 * tick_raw (offset between raw counters)
219 * tick_gotd (tick offset between cpus)
220 */
221 struct sched_clock_data *my_scd = this_scd(); 161 struct sched_clock_data *my_scd = this_scd();
222 162
223 lock_double_clock(scd, my_scd); 163 lock_double_clock(scd, my_scd);
224 164
225 now -= my_scd->tick_raw; 165 this_clock = __update_sched_clock(my_scd, now);
226 now += scd->tick_raw; 166 remote_clock = scd->clock;
227 167
228 now += my_scd->tick_gtod; 168 /*
229 now -= scd->tick_gtod; 169 * Use the opportunity that we have both locks
170 * taken to couple the two clocks: we take the
171 * larger time as the latest time for both
172 * runqueues. (this creates monotonic movement)
173 */
174 if (likely(remote_clock < this_clock)) {
175 clock = this_clock;
176 scd->clock = clock;
177 } else {
178 /*
179 * Should be rare, but possible:
180 */
181 clock = remote_clock;
182 my_scd->clock = remote_clock;
183 }
230 184
231 __raw_spin_unlock(&my_scd->lock); 185 __raw_spin_unlock(&my_scd->lock);
232
233 __update_sched_clock(scd, now, &clock);
234
235 __raw_spin_unlock(&scd->lock);
236
237 } else { 186 } else {
238 __raw_spin_lock(&scd->lock); 187 __raw_spin_lock(&scd->lock);
239 __update_sched_clock(scd, now, NULL); 188 clock = __update_sched_clock(scd, now);
240 clock = scd->clock;
241 __raw_spin_unlock(&scd->lock);
242 } 189 }
243 190
191 __raw_spin_unlock(&scd->lock);
192
244 return clock; 193 return clock;
245} 194}
246 195
247void sched_clock_tick(void) 196void sched_clock_tick(void)
248{ 197{
249 struct sched_clock_data *scd = this_scd(); 198 struct sched_clock_data *scd = this_scd();
250 unsigned long now_jiffies = jiffies;
251 s64 mult, delta_gtod, delta_raw;
252 u64 now, now_gtod; 199 u64 now, now_gtod;
253 200
254 if (unlikely(!sched_clock_running)) 201 if (unlikely(!sched_clock_running))
@@ -260,29 +207,14 @@ void sched_clock_tick(void)
260 now = sched_clock(); 207 now = sched_clock();
261 208
262 __raw_spin_lock(&scd->lock); 209 __raw_spin_lock(&scd->lock);
263 __update_sched_clock(scd, now, NULL); 210 __update_sched_clock(scd, now);
264 /* 211 /*
265 * update tick_gtod after __update_sched_clock() because that will 212 * update tick_gtod after __update_sched_clock() because that will
266 * already observe 1 new jiffy; adding a new tick_gtod to that would 213 * already observe 1 new jiffy; adding a new tick_gtod to that would
267 * increase the clock 2 jiffies. 214 * increase the clock 2 jiffies.
268 */ 215 */
269 delta_gtod = now_gtod - scd->tick_gtod;
270 delta_raw = now - scd->tick_raw;
271
272 if ((long)delta_raw > 0) {
273 mult = delta_gtod << MULTI_SHIFT;
274 do_div(mult, delta_raw);
275 scd->multi = mult;
276 if (scd->multi > MAX_MULTI)
277 scd->multi = MAX_MULTI;
278 else if (scd->multi < MIN_MULTI)
279 scd->multi = MIN_MULTI;
280 } else
281 scd->multi = 1 << MULTI_SHIFT;
282
283 scd->tick_raw = now; 216 scd->tick_raw = now;
284 scd->tick_gtod = now_gtod; 217 scd->tick_gtod = now_gtod;
285 scd->tick_jiffies = now_jiffies;
286 __raw_spin_unlock(&scd->lock); 218 __raw_spin_unlock(&scd->lock);
287} 219}
288 220
@@ -301,7 +233,6 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
301void sched_clock_idle_wakeup_event(u64 delta_ns) 233void sched_clock_idle_wakeup_event(u64 delta_ns)
302{ 234{
303 struct sched_clock_data *scd = this_scd(); 235 struct sched_clock_data *scd = this_scd();
304 u64 now = sched_clock();
305 236
306 /* 237 /*
307 * Override the previous timestamp and ignore all 238 * Override the previous timestamp and ignore all
@@ -310,27 +241,30 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
310 * rq clock: 241 * rq clock:
311 */ 242 */
312 __raw_spin_lock(&scd->lock); 243 __raw_spin_lock(&scd->lock);
313 scd->prev_raw = now;
314 scd->clock += delta_ns; 244 scd->clock += delta_ns;
315 scd->multi = 1 << MULTI_SHIFT;
316 __raw_spin_unlock(&scd->lock); 245 __raw_spin_unlock(&scd->lock);
317 246
318 touch_softlockup_watchdog(); 247 touch_softlockup_watchdog();
319} 248}
320EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 249EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
321 250
322#endif 251#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
323 252
324/* 253void sched_clock_init(void)
325 * Scheduler clock - returns current time in nanosec units.
326 * This is default implementation.
327 * Architectures and sub-architectures can override this.
328 */
329unsigned long long __attribute__((weak)) sched_clock(void)
330{ 254{
331 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); 255 sched_clock_running = 1;
332} 256}
333 257
258u64 sched_clock_cpu(int cpu)
259{
260 if (unlikely(!sched_clock_running))
261 return 0;
262
263 return sched_clock();
264}
265
266#endif
267
334unsigned long long cpu_clock(int cpu) 268unsigned long long cpu_clock(int cpu)
335{ 269{
336 unsigned long long clock; 270 unsigned long long clock;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index cf2cd6ce4cb..fb8994c6d4b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -899,7 +899,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
899 * doesn't make sense. Rely on vruntime for fairness. 899 * doesn't make sense. Rely on vruntime for fairness.
900 */ 900 */
901 if (rq->curr != p) 901 if (rq->curr != p)
902 delta = max(10000LL, delta); 902 delta = max_t(s64, 10000LL, delta);
903 903
904 hrtick_start(rq, delta); 904 hrtick_start(rq, delta);
905 } 905 }
@@ -1442,18 +1442,23 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
1442 struct task_struct *p = NULL; 1442 struct task_struct *p = NULL;
1443 struct sched_entity *se; 1443 struct sched_entity *se;
1444 1444
1445 while (next != &cfs_rq->tasks) { 1445 if (next == &cfs_rq->tasks)
1446 return NULL;
1447
1448 /* Skip over entities that are not tasks */
1449 do {
1446 se = list_entry(next, struct sched_entity, group_node); 1450 se = list_entry(next, struct sched_entity, group_node);
1447 next = next->next; 1451 next = next->next;
1452 } while (next != &cfs_rq->tasks && !entity_is_task(se));
1448 1453
1449 /* Skip over entities that are not tasks */ 1454 if (next == &cfs_rq->tasks)
1450 if (entity_is_task(se)) { 1455 return NULL;
1451 p = task_of(se);
1452 break;
1453 }
1454 }
1455 1456
1456 cfs_rq->balance_iterator = next; 1457 cfs_rq->balance_iterator = next;
1458
1459 if (entity_is_task(se))
1460 p = task_of(se);
1461
1457 return p; 1462 return p;
1458} 1463}
1459 1464
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 908c04f9dad..6163e4cf885 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -861,6 +861,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
861#define RT_MAX_TRIES 3 861#define RT_MAX_TRIES 3
862 862
863static int double_lock_balance(struct rq *this_rq, struct rq *busiest); 863static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
864static void double_unlock_balance(struct rq *this_rq, struct rq *busiest);
865
864static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); 866static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
865 867
866static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 868static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
@@ -1022,7 +1024,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1022 break; 1024 break;
1023 1025
1024 /* try again */ 1026 /* try again */
1025 spin_unlock(&lowest_rq->lock); 1027 double_unlock_balance(rq, lowest_rq);
1026 lowest_rq = NULL; 1028 lowest_rq = NULL;
1027 } 1029 }
1028 1030
@@ -1091,7 +1093,7 @@ static int push_rt_task(struct rq *rq)
1091 1093
1092 resched_task(lowest_rq->curr); 1094 resched_task(lowest_rq->curr);
1093 1095
1094 spin_unlock(&lowest_rq->lock); 1096 double_unlock_balance(rq, lowest_rq);
1095 1097
1096 ret = 1; 1098 ret = 1;
1097out: 1099out:
@@ -1197,7 +1199,7 @@ static int pull_rt_task(struct rq *this_rq)
1197 1199
1198 } 1200 }
1199 skip: 1201 skip:
1200 spin_unlock(&src_rq->lock); 1202 double_unlock_balance(this_rq, src_rq);
1201 } 1203 }
1202 1204
1203 return ret; 1205 return ret;
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index aaaeae8244e..94a62c0d4ad 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -212,9 +212,7 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
212 waiter.up = 0; 212 waiter.up = 0;
213 213
214 for (;;) { 214 for (;;) {
215 if (state == TASK_INTERRUPTIBLE && signal_pending(task)) 215 if (signal_pending_state(state, task))
216 goto interrupted;
217 if (state == TASK_KILLABLE && fatal_signal_pending(task))
218 goto interrupted; 216 goto interrupted;
219 if (timeout <= 0) 217 if (timeout <= 0)
220 goto timed_out; 218 goto timed_out;
diff --git a/kernel/signal.c b/kernel/signal.c
index 954f77d7e3b..c539f60c6f4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1304,6 +1304,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1304 q->info.si_overrun++; 1304 q->info.si_overrun++;
1305 goto out; 1305 goto out;
1306 } 1306 }
1307 q->info.si_overrun = 0;
1307 1308
1308 signalfd_notify(t, sig); 1309 signalfd_notify(t, sig);
1309 pending = group ? &t->signal->shared_pending : &t->pending; 1310 pending = group ? &t->signal->shared_pending : &t->pending;
diff --git a/kernel/smp.c b/kernel/smp.c
index 96fc7c0edc5..782e2b93e46 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -135,7 +135,8 @@ void generic_smp_call_function_interrupt(void)
135 */ 135 */
136 smp_wmb(); 136 smp_wmb();
137 data->csd.flags &= ~CSD_FLAG_WAIT; 137 data->csd.flags &= ~CSD_FLAG_WAIT;
138 } else 138 }
139 if (data->csd.flags & CSD_FLAG_ALLOC)
139 call_rcu(&data->rcu_head, rcu_free_call_data); 140 call_rcu(&data->rcu_head, rcu_free_call_data);
140 } 141 }
141 rcu_read_unlock(); 142 rcu_read_unlock();
@@ -260,6 +261,42 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
260 generic_exec_single(cpu, data); 261 generic_exec_single(cpu, data);
261} 262}
262 263
264/* Dummy function */
265static void quiesce_dummy(void *unused)
266{
267}
268
269/*
270 * Ensure stack based data used in call function mask is safe to free.
271 *
272 * This is needed by smp_call_function_mask when using on-stack data, because
273 * a single call function queue is shared by all CPUs, and any CPU may pick up
274 * the data item on the queue at any time before it is deleted. So we need to
275 * ensure that all CPUs have transitioned through a quiescent state after
276 * this call.
277 *
278 * This is a very slow function, implemented by sending synchronous IPIs to
279 * all possible CPUs. For this reason, we have to alloc data rather than use
280 * stack based data even in the case of synchronous calls. The stack based
281 * data is then just used for deadlock/oom fallback which will be very rare.
282 *
283 * If a faster scheme can be made, we could go back to preferring stack based
284 * data -- the data allocation/free is non-zero cost.
285 */
286static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
287{
288 struct call_single_data data;
289 int cpu;
290
291 data.func = quiesce_dummy;
292 data.info = NULL;
293
294 for_each_cpu_mask(cpu, mask) {
295 data.flags = CSD_FLAG_WAIT;
296 generic_exec_single(cpu, &data);
297 }
298}
299
263/** 300/**
264 * smp_call_function_mask(): Run a function on a set of other CPUs. 301 * smp_call_function_mask(): Run a function on a set of other CPUs.
265 * @mask: The set of cpus to run on. 302 * @mask: The set of cpus to run on.
@@ -285,6 +322,7 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
285 cpumask_t allbutself; 322 cpumask_t allbutself;
286 unsigned long flags; 323 unsigned long flags;
287 int cpu, num_cpus; 324 int cpu, num_cpus;
325 int slowpath = 0;
288 326
289 /* Can deadlock when called with interrupts disabled */ 327 /* Can deadlock when called with interrupts disabled */
290 WARN_ON(irqs_disabled()); 328 WARN_ON(irqs_disabled());
@@ -306,15 +344,16 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
306 return smp_call_function_single(cpu, func, info, wait); 344 return smp_call_function_single(cpu, func, info, wait);
307 } 345 }
308 346
309 if (!wait) { 347 data = kmalloc(sizeof(*data), GFP_ATOMIC);
310 data = kmalloc(sizeof(*data), GFP_ATOMIC); 348 if (data) {
311 if (data) 349 data->csd.flags = CSD_FLAG_ALLOC;
312 data->csd.flags = CSD_FLAG_ALLOC; 350 if (wait)
313 } 351 data->csd.flags |= CSD_FLAG_WAIT;
314 if (!data) { 352 } else {
315 data = &d; 353 data = &d;
316 data->csd.flags = CSD_FLAG_WAIT; 354 data->csd.flags = CSD_FLAG_WAIT;
317 wait = 1; 355 wait = 1;
356 slowpath = 1;
318 } 357 }
319 358
320 spin_lock_init(&data->lock); 359 spin_lock_init(&data->lock);
@@ -331,8 +370,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
331 arch_send_call_function_ipi(mask); 370 arch_send_call_function_ipi(mask);
332 371
333 /* optionally wait for the CPUs to complete */ 372 /* optionally wait for the CPUs to complete */
334 if (wait) 373 if (wait) {
335 csd_flag_wait(&data->csd); 374 csd_flag_wait(&data->csd);
375 if (unlikely(slowpath))
376 smp_call_function_mask_quiesce_stack(mask);
377 }
336 378
337 return 0; 379 return 0;
338} 380}
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index a1fb54c93cd..44baeea94ab 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -292,6 +292,7 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
292} 292}
293 293
294EXPORT_SYMBOL(_spin_lock_nested); 294EXPORT_SYMBOL(_spin_lock_nested);
295
295unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass) 296unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
296{ 297{
297 unsigned long flags; 298 unsigned long flags;
@@ -314,6 +315,16 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas
314 315
315EXPORT_SYMBOL(_spin_lock_irqsave_nested); 316EXPORT_SYMBOL(_spin_lock_irqsave_nested);
316 317
318void __lockfunc _spin_lock_nest_lock(spinlock_t *lock,
319 struct lockdep_map *nest_lock)
320{
321 preempt_disable();
322 spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
323 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
324}
325
326EXPORT_SYMBOL(_spin_lock_nest_lock);
327
317#endif 328#endif
318 329
319void __lockfunc _spin_unlock(spinlock_t *lock) 330void __lockfunc _spin_unlock(spinlock_t *lock)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index e446c7c7d6a..af3c7cea258 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -65,7 +65,6 @@ static void ack_state(void)
65static int stop_cpu(struct stop_machine_data *smdata) 65static int stop_cpu(struct stop_machine_data *smdata)
66{ 66{
67 enum stopmachine_state curstate = STOPMACHINE_NONE; 67 enum stopmachine_state curstate = STOPMACHINE_NONE;
68 int uninitialized_var(ret);
69 68
70 /* Simple state machine */ 69 /* Simple state machine */
71 do { 70 do {
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 825b4c00fe4..f5da526424a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -289,7 +289,6 @@ void tick_nohz_stop_sched_tick(int inidle)
289 ts->tick_stopped = 1; 289 ts->tick_stopped = 1;
290 ts->idle_jiffies = last_jiffies; 290 ts->idle_jiffies = last_jiffies;
291 rcu_enter_nohz(); 291 rcu_enter_nohz();
292 sched_clock_tick_stop(cpu);
293 } 292 }
294 293
295 /* 294 /*
@@ -392,7 +391,6 @@ void tick_nohz_restart_sched_tick(void)
392 select_nohz_load_balancer(0); 391 select_nohz_load_balancer(0);
393 now = ktime_get(); 392 now = ktime_get();
394 tick_do_update_jiffies64(now); 393 tick_do_update_jiffies64(now);
395 sched_clock_tick_start(cpu);
396 cpu_clear(cpu, nohz_cpu_mask); 394 cpu_clear(cpu, nohz_cpu_mask);
397 395
398 /* 396 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ec7e4f62aaf..4048e92aa04 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -290,11 +290,11 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
290 290
291 BUG_ON(get_wq_data(work) != cwq); 291 BUG_ON(get_wq_data(work) != cwq);
292 work_clear_pending(work); 292 work_clear_pending(work);
293 lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 293 lock_map_acquire(&cwq->wq->lockdep_map);
294 lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_); 294 lock_map_acquire(&lockdep_map);
295 f(work); 295 f(work);
296 lock_release(&lockdep_map, 1, _THIS_IP_); 296 lock_map_release(&lockdep_map);
297 lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_); 297 lock_map_release(&cwq->wq->lockdep_map);
298 298
299 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { 299 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
300 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " 300 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
@@ -413,8 +413,8 @@ void flush_workqueue(struct workqueue_struct *wq)
413 int cpu; 413 int cpu;
414 414
415 might_sleep(); 415 might_sleep();
416 lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 416 lock_map_acquire(&wq->lockdep_map);
417 lock_release(&wq->lockdep_map, 1, _THIS_IP_); 417 lock_map_release(&wq->lockdep_map);
418 for_each_cpu_mask_nr(cpu, *cpu_map) 418 for_each_cpu_mask_nr(cpu, *cpu_map)
419 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); 419 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
420} 420}
@@ -441,8 +441,8 @@ int flush_work(struct work_struct *work)
441 if (!cwq) 441 if (!cwq)
442 return 0; 442 return 0;
443 443
444 lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 444 lock_map_acquire(&cwq->wq->lockdep_map);
445 lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_); 445 lock_map_release(&cwq->wq->lockdep_map);
446 446
447 prev = NULL; 447 prev = NULL;
448 spin_lock_irq(&cwq->lock); 448 spin_lock_irq(&cwq->lock);
@@ -536,8 +536,8 @@ static void wait_on_work(struct work_struct *work)
536 536
537 might_sleep(); 537 might_sleep();
538 538
539 lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 539 lock_map_acquire(&work->lockdep_map);
540 lock_release(&work->lockdep_map, 1, _THIS_IP_); 540 lock_map_release(&work->lockdep_map);
541 541
542 cwq = get_wq_data(work); 542 cwq = get_wq_data(work);
543 if (!cwq) 543 if (!cwq)
@@ -830,10 +830,21 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
830 start_workqueue_thread(cwq, -1); 830 start_workqueue_thread(cwq, -1);
831 } else { 831 } else {
832 cpu_maps_update_begin(); 832 cpu_maps_update_begin();
833 /*
834 * We must place this wq on list even if the code below fails.
835 * cpu_down(cpu) can remove cpu from cpu_populated_map before
836 * destroy_workqueue() takes the lock, in that case we leak
837 * cwq[cpu]->thread.
838 */
833 spin_lock(&workqueue_lock); 839 spin_lock(&workqueue_lock);
834 list_add(&wq->list, &workqueues); 840 list_add(&wq->list, &workqueues);
835 spin_unlock(&workqueue_lock); 841 spin_unlock(&workqueue_lock);
836 842 /*
843 * We must initialize cwqs for each possible cpu even if we
844 * are going to call destroy_workqueue() finally. Otherwise
845 * cpu_up() can hit the uninitialized cwq once we drop the
846 * lock.
847 */
837 for_each_possible_cpu(cpu) { 848 for_each_possible_cpu(cpu) {
838 cwq = init_cpu_workqueue(wq, cpu); 849 cwq = init_cpu_workqueue(wq, cpu);
839 if (err || !cpu_online(cpu)) 850 if (err || !cpu_online(cpu))
@@ -861,8 +872,8 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
861 if (cwq->thread == NULL) 872 if (cwq->thread == NULL)
862 return; 873 return;
863 874
864 lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 875 lock_map_acquire(&cwq->wq->lockdep_map);
865 lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_); 876 lock_map_release(&cwq->wq->lockdep_map);
866 877
867 flush_cpu_workqueue(cwq); 878 flush_cpu_workqueue(cwq);
868 /* 879 /*