diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 71 |
1 files changed, 48 insertions, 23 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 91cf85b36dd5..d5ab79cf516d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -54,7 +54,6 @@ | |||
54 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
55 | #include <asm/atomic.h> | 55 | #include <asm/atomic.h> |
56 | #include <linux/mutex.h> | 56 | #include <linux/mutex.h> |
57 | #include <linux/kfifo.h> | ||
58 | #include <linux/workqueue.h> | 57 | #include <linux/workqueue.h> |
59 | #include <linux/cgroup.h> | 58 | #include <linux/cgroup.h> |
60 | 59 | ||
@@ -486,13 +485,38 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) | |||
486 | static void | 485 | static void |
487 | update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | 486 | update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) |
488 | { | 487 | { |
489 | if (!dattr) | ||
490 | return; | ||
491 | if (dattr->relax_domain_level < c->relax_domain_level) | 488 | if (dattr->relax_domain_level < c->relax_domain_level) |
492 | dattr->relax_domain_level = c->relax_domain_level; | 489 | dattr->relax_domain_level = c->relax_domain_level; |
493 | return; | 490 | return; |
494 | } | 491 | } |
495 | 492 | ||
493 | static void | ||
494 | update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | ||
495 | { | ||
496 | LIST_HEAD(q); | ||
497 | |||
498 | list_add(&c->stack_list, &q); | ||
499 | while (!list_empty(&q)) { | ||
500 | struct cpuset *cp; | ||
501 | struct cgroup *cont; | ||
502 | struct cpuset *child; | ||
503 | |||
504 | cp = list_first_entry(&q, struct cpuset, stack_list); | ||
505 | list_del(q.next); | ||
506 | |||
507 | if (cpus_empty(cp->cpus_allowed)) | ||
508 | continue; | ||
509 | |||
510 | if (is_sched_load_balance(cp)) | ||
511 | update_domain_attr(dattr, cp); | ||
512 | |||
513 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | ||
514 | child = cgroup_cs(cont); | ||
515 | list_add_tail(&child->stack_list, &q); | ||
516 | } | ||
517 | } | ||
518 | } | ||
519 | |||
496 | /* | 520 | /* |
497 | * rebuild_sched_domains() | 521 | * rebuild_sched_domains() |
498 | * | 522 | * |
@@ -532,7 +556,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | |||
532 | * So the reverse nesting would risk an ABBA deadlock. | 556 | * So the reverse nesting would risk an ABBA deadlock. |
533 | * | 557 | * |
534 | * The three key local variables below are: | 558 | * The three key local variables below are: |
535 | * q - a kfifo queue of cpuset pointers, used to implement a | 559 | * q - a linked-list queue of cpuset pointers, used to implement a |
536 | * top-down scan of all cpusets. This scan loads a pointer | 560 | * top-down scan of all cpusets. This scan loads a pointer |
537 | * to each cpuset marked is_sched_load_balance into the | 561 | * to each cpuset marked is_sched_load_balance into the |
538 | * array 'csa'. For our purposes, rebuilding the schedulers | 562 | * array 'csa'. For our purposes, rebuilding the schedulers |
@@ -567,7 +591,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | |||
567 | 591 | ||
568 | void rebuild_sched_domains(void) | 592 | void rebuild_sched_domains(void) |
569 | { | 593 | { |
570 | struct kfifo *q; /* queue of cpusets to be scanned */ | 594 | LIST_HEAD(q); /* queue of cpusets to be scanned*/ |
571 | struct cpuset *cp; /* scans q */ | 595 | struct cpuset *cp; /* scans q */ |
572 | struct cpuset **csa; /* array of all cpuset ptrs */ | 596 | struct cpuset **csa; /* array of all cpuset ptrs */ |
573 | int csn; /* how many cpuset ptrs in csa so far */ | 597 | int csn; /* how many cpuset ptrs in csa so far */ |
@@ -577,7 +601,6 @@ void rebuild_sched_domains(void) | |||
577 | int ndoms; /* number of sched domains in result */ | 601 | int ndoms; /* number of sched domains in result */ |
578 | int nslot; /* next empty doms[] cpumask_t slot */ | 602 | int nslot; /* next empty doms[] cpumask_t slot */ |
579 | 603 | ||
580 | q = NULL; | ||
581 | csa = NULL; | 604 | csa = NULL; |
582 | doms = NULL; | 605 | doms = NULL; |
583 | dattr = NULL; | 606 | dattr = NULL; |
@@ -591,35 +614,42 @@ void rebuild_sched_domains(void) | |||
591 | dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); | 614 | dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); |
592 | if (dattr) { | 615 | if (dattr) { |
593 | *dattr = SD_ATTR_INIT; | 616 | *dattr = SD_ATTR_INIT; |
594 | update_domain_attr(dattr, &top_cpuset); | 617 | update_domain_attr_tree(dattr, &top_cpuset); |
595 | } | 618 | } |
596 | *doms = top_cpuset.cpus_allowed; | 619 | *doms = top_cpuset.cpus_allowed; |
597 | goto rebuild; | 620 | goto rebuild; |
598 | } | 621 | } |
599 | 622 | ||
600 | q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL); | ||
601 | if (IS_ERR(q)) | ||
602 | goto done; | ||
603 | csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL); | 623 | csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL); |
604 | if (!csa) | 624 | if (!csa) |
605 | goto done; | 625 | goto done; |
606 | csn = 0; | 626 | csn = 0; |
607 | 627 | ||
608 | cp = &top_cpuset; | 628 | list_add(&top_cpuset.stack_list, &q); |
609 | __kfifo_put(q, (void *)&cp, sizeof(cp)); | 629 | while (!list_empty(&q)) { |
610 | while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { | ||
611 | struct cgroup *cont; | 630 | struct cgroup *cont; |
612 | struct cpuset *child; /* scans child cpusets of cp */ | 631 | struct cpuset *child; /* scans child cpusets of cp */ |
613 | 632 | ||
633 | cp = list_first_entry(&q, struct cpuset, stack_list); | ||
634 | list_del(q.next); | ||
635 | |||
614 | if (cpus_empty(cp->cpus_allowed)) | 636 | if (cpus_empty(cp->cpus_allowed)) |
615 | continue; | 637 | continue; |
616 | 638 | ||
617 | if (is_sched_load_balance(cp)) | 639 | /* |
640 | * All child cpusets contain a subset of the parent's cpus, so | ||
641 | * just skip them, and then we call update_domain_attr_tree() | ||
642 | * to calc relax_domain_level of the corresponding sched | ||
643 | * domain. | ||
644 | */ | ||
645 | if (is_sched_load_balance(cp)) { | ||
618 | csa[csn++] = cp; | 646 | csa[csn++] = cp; |
647 | continue; | ||
648 | } | ||
619 | 649 | ||
620 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | 650 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { |
621 | child = cgroup_cs(cont); | 651 | child = cgroup_cs(cont); |
622 | __kfifo_put(q, (void *)&child, sizeof(cp)); | 652 | list_add_tail(&child->stack_list, &q); |
623 | } | 653 | } |
624 | } | 654 | } |
625 | 655 | ||
@@ -686,7 +716,7 @@ restart: | |||
686 | cpus_or(*dp, *dp, b->cpus_allowed); | 716 | cpus_or(*dp, *dp, b->cpus_allowed); |
687 | b->pn = -1; | 717 | b->pn = -1; |
688 | if (dattr) | 718 | if (dattr) |
689 | update_domain_attr(dattr | 719 | update_domain_attr_tree(dattr |
690 | + nslot, b); | 720 | + nslot, b); |
691 | } | 721 | } |
692 | } | 722 | } |
@@ -702,8 +732,6 @@ rebuild: | |||
702 | put_online_cpus(); | 732 | put_online_cpus(); |
703 | 733 | ||
704 | done: | 734 | done: |
705 | if (q && !IS_ERR(q)) | ||
706 | kfifo_free(q); | ||
707 | kfree(csa); | 735 | kfree(csa); |
708 | /* Don't kfree(doms) -- partition_sched_domains() does that. */ | 736 | /* Don't kfree(doms) -- partition_sched_domains() does that. */ |
709 | /* Don't kfree(dattr) -- partition_sched_domains() does that. */ | 737 | /* Don't kfree(dattr) -- partition_sched_domains() does that. */ |
@@ -1833,24 +1861,21 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
1833 | */ | 1861 | */ |
1834 | static void scan_for_empty_cpusets(const struct cpuset *root) | 1862 | static void scan_for_empty_cpusets(const struct cpuset *root) |
1835 | { | 1863 | { |
1864 | LIST_HEAD(queue); | ||
1836 | struct cpuset *cp; /* scans cpusets being updated */ | 1865 | struct cpuset *cp; /* scans cpusets being updated */ |
1837 | struct cpuset *child; /* scans child cpusets of cp */ | 1866 | struct cpuset *child; /* scans child cpusets of cp */ |
1838 | struct list_head queue; | ||
1839 | struct cgroup *cont; | 1867 | struct cgroup *cont; |
1840 | nodemask_t oldmems; | 1868 | nodemask_t oldmems; |
1841 | 1869 | ||
1842 | INIT_LIST_HEAD(&queue); | ||
1843 | |||
1844 | list_add_tail((struct list_head *)&root->stack_list, &queue); | 1870 | list_add_tail((struct list_head *)&root->stack_list, &queue); |
1845 | 1871 | ||
1846 | while (!list_empty(&queue)) { | 1872 | while (!list_empty(&queue)) { |
1847 | cp = container_of(queue.next, struct cpuset, stack_list); | 1873 | cp = list_first_entry(&queue, struct cpuset, stack_list); |
1848 | list_del(queue.next); | 1874 | list_del(queue.next); |
1849 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | 1875 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { |
1850 | child = cgroup_cs(cont); | 1876 | child = cgroup_cs(cont); |
1851 | list_add_tail(&child->stack_list, &queue); | 1877 | list_add_tail(&child->stack_list, &queue); |
1852 | } | 1878 | } |
1853 | cont = cp->css.cgroup; | ||
1854 | 1879 | ||
1855 | /* Continue past cpusets with all cpus, mems online */ | 1880 | /* Continue past cpusets with all cpus, mems online */ |
1856 | if (cpus_subset(cp->cpus_allowed, cpu_online_map) && | 1881 | if (cpus_subset(cp->cpus_allowed, cpu_online_map) && |