diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2011-04-07 08:09:50 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-04-11 06:58:19 -0400 |
commit | dce840a08702bd13a9a186e07e63d1ef82256b5e (patch) | |
tree | 168bb98aed7f5761ebe31aa92c34959e9d0f238a | |
parent | a9c9a9b6bff27ac9c746344a9c1a19bf3327002c (diff) |
sched: Dynamically allocate sched_domain/sched_group data-structures
Instead of relying on static allocations for the sched_domain and
sched_group trees, dynamically allocate and RCU free them.
Allocating this dynamically also allows for some build_sched_groups()
simplification since we can now (like with other simplifications) rely
on the sched_domain tree instead of hard-coded knowledge.
One tricky to note is that detach_destroy_domains() needs to hold
rcu_read_lock() over the entire tear-down, per-cpu is not sufficient
since that can lead to partial sched_group existance (could possibly
be solved by doing the tear-down backwards but this is much more
robust).
A concequence of the above is that we can no longer print the
sched_domain debug stuff from cpu_attach_domain() since that might now
run with preemption disabled (due to classic RCU etc.) and
sched_domain_debug() does some GFP_KERNEL allocations.
Another thing to note is that we now fully rely on normal RCU and not
RCU-sched, this is because with the new and exiting RCU flavours we
grew over the years BH doesn't necessarily hold off RCU-sched grace
periods (-rt is known to break this). This would in fact already cause
us grief since we do sched_domain/sched_group iterations from softirq
context.
This patch is somewhat larger than I would like it to be, but I didn't
find any means of shrinking/splitting this.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.245307941@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 5 | ||||
-rw-r--r-- | kernel/sched.c | 479 | ||||
-rw-r--r-- | kernel/sched_fair.c | 30 |
3 files changed, 218 insertions, 296 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ec2c027e92c..020b79d6c486 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -868,6 +868,7 @@ static inline int sd_power_saving_flags(void) | |||
868 | 868 | ||
869 | struct sched_group { | 869 | struct sched_group { |
870 | struct sched_group *next; /* Must be a circular list */ | 870 | struct sched_group *next; /* Must be a circular list */ |
871 | atomic_t ref; | ||
871 | 872 | ||
872 | /* | 873 | /* |
873 | * CPU power of this group, SCHED_LOAD_SCALE being max power for a | 874 | * CPU power of this group, SCHED_LOAD_SCALE being max power for a |
@@ -973,6 +974,10 @@ struct sched_domain { | |||
973 | #ifdef CONFIG_SCHED_DEBUG | 974 | #ifdef CONFIG_SCHED_DEBUG |
974 | char *name; | 975 | char *name; |
975 | #endif | 976 | #endif |
977 | union { | ||
978 | void *private; /* used during construction */ | ||
979 | struct rcu_head rcu; /* used during destruction */ | ||
980 | }; | ||
976 | 981 | ||
977 | unsigned int span_weight; | 982 | unsigned int span_weight; |
978 | /* | 983 | /* |
diff --git a/kernel/sched.c b/kernel/sched.c index 1cca59ec4a49..65204845063e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -417,6 +417,7 @@ struct rt_rq { | |||
417 | */ | 417 | */ |
418 | struct root_domain { | 418 | struct root_domain { |
419 | atomic_t refcount; | 419 | atomic_t refcount; |
420 | struct rcu_head rcu; | ||
420 | cpumask_var_t span; | 421 | cpumask_var_t span; |
421 | cpumask_var_t online; | 422 | cpumask_var_t online; |
422 | 423 | ||
@@ -571,7 +572,7 @@ static inline int cpu_of(struct rq *rq) | |||
571 | 572 | ||
572 | #define rcu_dereference_check_sched_domain(p) \ | 573 | #define rcu_dereference_check_sched_domain(p) \ |
573 | rcu_dereference_check((p), \ | 574 | rcu_dereference_check((p), \ |
574 | rcu_read_lock_sched_held() || \ | 575 | rcu_read_lock_held() || \ |
575 | lockdep_is_held(&sched_domains_mutex)) | 576 | lockdep_is_held(&sched_domains_mutex)) |
576 | 577 | ||
577 | /* | 578 | /* |
@@ -6572,12 +6573,11 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
6572 | return 1; | 6573 | return 1; |
6573 | } | 6574 | } |
6574 | 6575 | ||
6575 | static void free_rootdomain(struct root_domain *rd) | 6576 | static void free_rootdomain(struct rcu_head *rcu) |
6576 | { | 6577 | { |
6577 | synchronize_sched(); | 6578 | struct root_domain *rd = container_of(rcu, struct root_domain, rcu); |
6578 | 6579 | ||
6579 | cpupri_cleanup(&rd->cpupri); | 6580 | cpupri_cleanup(&rd->cpupri); |
6580 | |||
6581 | free_cpumask_var(rd->rto_mask); | 6581 | free_cpumask_var(rd->rto_mask); |
6582 | free_cpumask_var(rd->online); | 6582 | free_cpumask_var(rd->online); |
6583 | free_cpumask_var(rd->span); | 6583 | free_cpumask_var(rd->span); |
@@ -6618,7 +6618,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6618 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 6618 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
6619 | 6619 | ||
6620 | if (old_rd) | 6620 | if (old_rd) |
6621 | free_rootdomain(old_rd); | 6621 | call_rcu_sched(&old_rd->rcu, free_rootdomain); |
6622 | } | 6622 | } |
6623 | 6623 | ||
6624 | static int init_rootdomain(struct root_domain *rd) | 6624 | static int init_rootdomain(struct root_domain *rd) |
@@ -6669,6 +6669,25 @@ static struct root_domain *alloc_rootdomain(void) | |||
6669 | return rd; | 6669 | return rd; |
6670 | } | 6670 | } |
6671 | 6671 | ||
6672 | static void free_sched_domain(struct rcu_head *rcu) | ||
6673 | { | ||
6674 | struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); | ||
6675 | if (atomic_dec_and_test(&sd->groups->ref)) | ||
6676 | kfree(sd->groups); | ||
6677 | kfree(sd); | ||
6678 | } | ||
6679 | |||
6680 | static void destroy_sched_domain(struct sched_domain *sd, int cpu) | ||
6681 | { | ||
6682 | call_rcu(&sd->rcu, free_sched_domain); | ||
6683 | } | ||
6684 | |||
6685 | static void destroy_sched_domains(struct sched_domain *sd, int cpu) | ||
6686 | { | ||
6687 | for (; sd; sd = sd->parent) | ||
6688 | destroy_sched_domain(sd, cpu); | ||
6689 | } | ||
6690 | |||
6672 | /* | 6691 | /* |
6673 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must | 6692 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must |
6674 | * hold the hotplug lock. | 6693 | * hold the hotplug lock. |
@@ -6689,20 +6708,25 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) | |||
6689 | tmp->parent = parent->parent; | 6708 | tmp->parent = parent->parent; |
6690 | if (parent->parent) | 6709 | if (parent->parent) |
6691 | parent->parent->child = tmp; | 6710 | parent->parent->child = tmp; |
6711 | destroy_sched_domain(parent, cpu); | ||
6692 | } else | 6712 | } else |
6693 | tmp = tmp->parent; | 6713 | tmp = tmp->parent; |
6694 | } | 6714 | } |
6695 | 6715 | ||
6696 | if (sd && sd_degenerate(sd)) { | 6716 | if (sd && sd_degenerate(sd)) { |
6717 | tmp = sd; | ||
6697 | sd = sd->parent; | 6718 | sd = sd->parent; |
6719 | destroy_sched_domain(tmp, cpu); | ||
6698 | if (sd) | 6720 | if (sd) |
6699 | sd->child = NULL; | 6721 | sd->child = NULL; |
6700 | } | 6722 | } |
6701 | 6723 | ||
6702 | sched_domain_debug(sd, cpu); | 6724 | /* sched_domain_debug(sd, cpu); */ |
6703 | 6725 | ||
6704 | rq_attach_root(rq, rd); | 6726 | rq_attach_root(rq, rd); |
6727 | tmp = rq->sd; | ||
6705 | rcu_assign_pointer(rq->sd, sd); | 6728 | rcu_assign_pointer(rq->sd, sd); |
6729 | destroy_sched_domains(tmp, cpu); | ||
6706 | } | 6730 | } |
6707 | 6731 | ||
6708 | /* cpus with isolated domains */ | 6732 | /* cpus with isolated domains */ |
@@ -6718,56 +6742,6 @@ static int __init isolated_cpu_setup(char *str) | |||
6718 | 6742 | ||
6719 | __setup("isolcpus=", isolated_cpu_setup); | 6743 | __setup("isolcpus=", isolated_cpu_setup); |
6720 | 6744 | ||
6721 | /* | ||
6722 | * init_sched_build_groups takes the cpumask we wish to span, and a pointer | ||
6723 | * to a function which identifies what group(along with sched group) a CPU | ||
6724 | * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids | ||
6725 | * (due to the fact that we keep track of groups covered with a struct cpumask). | ||
6726 | * | ||
6727 | * init_sched_build_groups will build a circular linked list of the groups | ||
6728 | * covered by the given span, and will set each group's ->cpumask correctly, | ||
6729 | * and ->cpu_power to 0. | ||
6730 | */ | ||
6731 | static void | ||
6732 | init_sched_build_groups(const struct cpumask *span, | ||
6733 | const struct cpumask *cpu_map, | ||
6734 | int (*group_fn)(int cpu, const struct cpumask *cpu_map, | ||
6735 | struct sched_group **sg, | ||
6736 | struct cpumask *tmpmask), | ||
6737 | struct cpumask *covered, struct cpumask *tmpmask) | ||
6738 | { | ||
6739 | struct sched_group *first = NULL, *last = NULL; | ||
6740 | int i; | ||
6741 | |||
6742 | cpumask_clear(covered); | ||
6743 | |||
6744 | for_each_cpu(i, span) { | ||
6745 | struct sched_group *sg; | ||
6746 | int group = group_fn(i, cpu_map, &sg, tmpmask); | ||
6747 | int j; | ||
6748 | |||
6749 | if (cpumask_test_cpu(i, covered)) | ||
6750 | continue; | ||
6751 | |||
6752 | cpumask_clear(sched_group_cpus(sg)); | ||
6753 | sg->cpu_power = 0; | ||
6754 | |||
6755 | for_each_cpu(j, span) { | ||
6756 | if (group_fn(j, cpu_map, NULL, tmpmask) != group) | ||
6757 | continue; | ||
6758 | |||
6759 | cpumask_set_cpu(j, covered); | ||
6760 | cpumask_set_cpu(j, sched_group_cpus(sg)); | ||
6761 | } | ||
6762 | if (!first) | ||
6763 | first = sg; | ||
6764 | if (last) | ||
6765 | last->next = sg; | ||
6766 | last = sg; | ||
6767 | } | ||
6768 | last->next = first; | ||
6769 | } | ||
6770 | |||
6771 | #define SD_NODES_PER_DOMAIN 16 | 6745 | #define SD_NODES_PER_DOMAIN 16 |
6772 | 6746 | ||
6773 | #ifdef CONFIG_NUMA | 6747 | #ifdef CONFIG_NUMA |
@@ -6858,154 +6832,96 @@ struct static_sched_domain { | |||
6858 | DECLARE_BITMAP(span, CONFIG_NR_CPUS); | 6832 | DECLARE_BITMAP(span, CONFIG_NR_CPUS); |
6859 | }; | 6833 | }; |
6860 | 6834 | ||
6835 | struct sd_data { | ||
6836 | struct sched_domain **__percpu sd; | ||
6837 | struct sched_group **__percpu sg; | ||
6838 | }; | ||
6839 | |||
6861 | struct s_data { | 6840 | struct s_data { |
6862 | #ifdef CONFIG_NUMA | 6841 | #ifdef CONFIG_NUMA |
6863 | int sd_allnodes; | 6842 | int sd_allnodes; |
6864 | #endif | 6843 | #endif |
6865 | cpumask_var_t nodemask; | 6844 | cpumask_var_t nodemask; |
6866 | cpumask_var_t send_covered; | 6845 | cpumask_var_t send_covered; |
6867 | cpumask_var_t tmpmask; | ||
6868 | struct sched_domain ** __percpu sd; | 6846 | struct sched_domain ** __percpu sd; |
6847 | struct sd_data sdd[SD_LV_MAX]; | ||
6869 | struct root_domain *rd; | 6848 | struct root_domain *rd; |
6870 | }; | 6849 | }; |
6871 | 6850 | ||
6872 | enum s_alloc { | 6851 | enum s_alloc { |
6873 | sa_rootdomain, | 6852 | sa_rootdomain, |
6874 | sa_sd, | 6853 | sa_sd, |
6875 | sa_tmpmask, | 6854 | sa_sd_storage, |
6876 | sa_send_covered, | 6855 | sa_send_covered, |
6877 | sa_nodemask, | 6856 | sa_nodemask, |
6878 | sa_none, | 6857 | sa_none, |
6879 | }; | 6858 | }; |
6880 | 6859 | ||
6881 | /* | 6860 | /* |
6882 | * SMT sched-domains: | 6861 | * Assumes the sched_domain tree is fully constructed |
6883 | */ | 6862 | */ |
6884 | #ifdef CONFIG_SCHED_SMT | 6863 | static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) |
6885 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); | ||
6886 | static DEFINE_PER_CPU(struct static_sched_group, sched_groups); | ||
6887 | |||
6888 | static int | ||
6889 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | ||
6890 | struct sched_group **sg, struct cpumask *unused) | ||
6891 | { | 6864 | { |
6892 | if (sg) | 6865 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); |
6893 | *sg = &per_cpu(sched_groups, cpu).sg; | 6866 | struct sched_domain *child = sd->child; |
6894 | return cpu; | ||
6895 | } | ||
6896 | #endif /* CONFIG_SCHED_SMT */ | ||
6897 | 6867 | ||
6898 | /* | 6868 | if (child) |
6899 | * multi-core sched-domains: | 6869 | cpu = cpumask_first(sched_domain_span(child)); |
6900 | */ | ||
6901 | #ifdef CONFIG_SCHED_MC | ||
6902 | static DEFINE_PER_CPU(struct static_sched_domain, core_domains); | ||
6903 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); | ||
6904 | 6870 | ||
6905 | static int | ||
6906 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | ||
6907 | struct sched_group **sg, struct cpumask *mask) | ||
6908 | { | ||
6909 | int group; | ||
6910 | #ifdef CONFIG_SCHED_SMT | ||
6911 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6912 | group = cpumask_first(mask); | ||
6913 | #else | ||
6914 | group = cpu; | ||
6915 | #endif | ||
6916 | if (sg) | 6871 | if (sg) |
6917 | *sg = &per_cpu(sched_group_core, group).sg; | 6872 | *sg = *per_cpu_ptr(sdd->sg, cpu); |
6918 | return group; | 6873 | |
6874 | return cpu; | ||
6919 | } | 6875 | } |
6920 | #endif /* CONFIG_SCHED_MC */ | ||
6921 | 6876 | ||
6922 | /* | 6877 | /* |
6923 | * book sched-domains: | 6878 | * build_sched_groups takes the cpumask we wish to span, and a pointer |
6879 | * to a function which identifies what group(along with sched group) a CPU | ||
6880 | * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids | ||
6881 | * (due to the fact that we keep track of groups covered with a struct cpumask). | ||
6882 | * | ||
6883 | * build_sched_groups will build a circular linked list of the groups | ||
6884 | * covered by the given span, and will set each group's ->cpumask correctly, | ||
6885 | * and ->cpu_power to 0. | ||
6924 | */ | 6886 | */ |
6925 | #ifdef CONFIG_SCHED_BOOK | 6887 | static void |
6926 | static DEFINE_PER_CPU(struct static_sched_domain, book_domains); | 6888 | build_sched_groups(struct sched_domain *sd, struct cpumask *covered) |
6927 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); | ||
6928 | |||
6929 | static int | ||
6930 | cpu_to_book_group(int cpu, const struct cpumask *cpu_map, | ||
6931 | struct sched_group **sg, struct cpumask *mask) | ||
6932 | { | ||
6933 | int group = cpu; | ||
6934 | #ifdef CONFIG_SCHED_MC | ||
6935 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | ||
6936 | group = cpumask_first(mask); | ||
6937 | #elif defined(CONFIG_SCHED_SMT) | ||
6938 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6939 | group = cpumask_first(mask); | ||
6940 | #endif | ||
6941 | if (sg) | ||
6942 | *sg = &per_cpu(sched_group_book, group).sg; | ||
6943 | return group; | ||
6944 | } | ||
6945 | #endif /* CONFIG_SCHED_BOOK */ | ||
6946 | |||
6947 | static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); | ||
6948 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); | ||
6949 | |||
6950 | static int | ||
6951 | cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, | ||
6952 | struct sched_group **sg, struct cpumask *mask) | ||
6953 | { | 6889 | { |
6954 | int group; | 6890 | struct sched_group *first = NULL, *last = NULL; |
6955 | #ifdef CONFIG_SCHED_BOOK | 6891 | struct sd_data *sdd = sd->private; |
6956 | cpumask_and(mask, cpu_book_mask(cpu), cpu_map); | 6892 | const struct cpumask *span = sched_domain_span(sd); |
6957 | group = cpumask_first(mask); | 6893 | int i; |
6958 | #elif defined(CONFIG_SCHED_MC) | ||
6959 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | ||
6960 | group = cpumask_first(mask); | ||
6961 | #elif defined(CONFIG_SCHED_SMT) | ||
6962 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6963 | group = cpumask_first(mask); | ||
6964 | #else | ||
6965 | group = cpu; | ||
6966 | #endif | ||
6967 | if (sg) | ||
6968 | *sg = &per_cpu(sched_group_phys, group).sg; | ||
6969 | return group; | ||
6970 | } | ||
6971 | |||
6972 | #ifdef CONFIG_NUMA | ||
6973 | static DEFINE_PER_CPU(struct static_sched_domain, node_domains); | ||
6974 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_node); | ||
6975 | 6894 | ||
6976 | static int cpu_to_node_group(int cpu, const struct cpumask *cpu_map, | 6895 | cpumask_clear(covered); |
6977 | struct sched_group **sg, | ||
6978 | struct cpumask *nodemask) | ||
6979 | { | ||
6980 | int group; | ||
6981 | 6896 | ||
6982 | cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map); | 6897 | for_each_cpu(i, span) { |
6983 | group = cpumask_first(nodemask); | 6898 | struct sched_group *sg; |
6899 | int group = get_group(i, sdd, &sg); | ||
6900 | int j; | ||
6984 | 6901 | ||
6985 | if (sg) | 6902 | if (cpumask_test_cpu(i, covered)) |
6986 | *sg = &per_cpu(sched_group_node, group).sg; | 6903 | continue; |
6987 | return group; | ||
6988 | } | ||
6989 | 6904 | ||
6990 | static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains); | 6905 | cpumask_clear(sched_group_cpus(sg)); |
6991 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); | 6906 | sg->cpu_power = 0; |
6992 | 6907 | ||
6993 | static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, | 6908 | for_each_cpu(j, span) { |
6994 | struct sched_group **sg, | 6909 | if (get_group(j, sdd, NULL) != group) |
6995 | struct cpumask *nodemask) | 6910 | continue; |
6996 | { | ||
6997 | int group; | ||
6998 | 6911 | ||
6999 | cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map); | 6912 | cpumask_set_cpu(j, covered); |
7000 | group = cpumask_first(nodemask); | 6913 | cpumask_set_cpu(j, sched_group_cpus(sg)); |
6914 | } | ||
7001 | 6915 | ||
7002 | if (sg) | 6916 | if (!first) |
7003 | *sg = &per_cpu(sched_group_allnodes, group).sg; | 6917 | first = sg; |
7004 | return group; | 6918 | if (last) |
6919 | last->next = sg; | ||
6920 | last = sg; | ||
6921 | } | ||
6922 | last->next = first; | ||
7005 | } | 6923 | } |
7006 | 6924 | ||
7007 | #endif /* CONFIG_NUMA */ | ||
7008 | |||
7009 | /* | 6925 | /* |
7010 | * Initialize sched groups cpu_power. | 6926 | * Initialize sched groups cpu_power. |
7011 | * | 6927 | * |
@@ -7039,15 +6955,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
7039 | # define SD_INIT_NAME(sd, type) do { } while (0) | 6955 | # define SD_INIT_NAME(sd, type) do { } while (0) |
7040 | #endif | 6956 | #endif |
7041 | 6957 | ||
7042 | #define SD_INIT(sd, type) sd_init_##type(sd) | 6958 | #define SD_INIT_FUNC(type) \ |
7043 | 6959 | static noinline struct sched_domain *sd_init_##type(struct s_data *d, int cpu) \ | |
7044 | #define SD_INIT_FUNC(type) \ | 6960 | { \ |
7045 | static noinline void sd_init_##type(struct sched_domain *sd) \ | 6961 | struct sched_domain *sd = *per_cpu_ptr(d->sdd[SD_LV_##type].sd, cpu); \ |
7046 | { \ | 6962 | *sd = SD_##type##_INIT; \ |
7047 | memset(sd, 0, sizeof(*sd)); \ | 6963 | sd->level = SD_LV_##type; \ |
7048 | *sd = SD_##type##_INIT; \ | 6964 | SD_INIT_NAME(sd, type); \ |
7049 | sd->level = SD_LV_##type; \ | 6965 | sd->private = &d->sdd[SD_LV_##type]; \ |
7050 | SD_INIT_NAME(sd, type); \ | 6966 | return sd; \ |
7051 | } | 6967 | } |
7052 | 6968 | ||
7053 | SD_INIT_FUNC(CPU) | 6969 | SD_INIT_FUNC(CPU) |
@@ -7103,13 +7019,22 @@ static void set_domain_attribute(struct sched_domain *sd, | |||
7103 | static void __free_domain_allocs(struct s_data *d, enum s_alloc what, | 7019 | static void __free_domain_allocs(struct s_data *d, enum s_alloc what, |
7104 | const struct cpumask *cpu_map) | 7020 | const struct cpumask *cpu_map) |
7105 | { | 7021 | { |
7022 | int i, j; | ||
7023 | |||
7106 | switch (what) { | 7024 | switch (what) { |
7107 | case sa_rootdomain: | 7025 | case sa_rootdomain: |
7108 | free_rootdomain(d->rd); /* fall through */ | 7026 | free_rootdomain(&d->rd->rcu); /* fall through */ |
7109 | case sa_sd: | 7027 | case sa_sd: |
7110 | free_percpu(d->sd); /* fall through */ | 7028 | free_percpu(d->sd); /* fall through */ |
7111 | case sa_tmpmask: | 7029 | case sa_sd_storage: |
7112 | free_cpumask_var(d->tmpmask); /* fall through */ | 7030 | for (i = 0; i < SD_LV_MAX; i++) { |
7031 | for_each_cpu(j, cpu_map) { | ||
7032 | kfree(*per_cpu_ptr(d->sdd[i].sd, j)); | ||
7033 | kfree(*per_cpu_ptr(d->sdd[i].sg, j)); | ||
7034 | } | ||
7035 | free_percpu(d->sdd[i].sd); | ||
7036 | free_percpu(d->sdd[i].sg); | ||
7037 | } /* fall through */ | ||
7113 | case sa_send_covered: | 7038 | case sa_send_covered: |
7114 | free_cpumask_var(d->send_covered); /* fall through */ | 7039 | free_cpumask_var(d->send_covered); /* fall through */ |
7115 | case sa_nodemask: | 7040 | case sa_nodemask: |
@@ -7122,25 +7047,70 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, | |||
7122 | static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, | 7047 | static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, |
7123 | const struct cpumask *cpu_map) | 7048 | const struct cpumask *cpu_map) |
7124 | { | 7049 | { |
7050 | int i, j; | ||
7051 | |||
7052 | memset(d, 0, sizeof(*d)); | ||
7053 | |||
7125 | if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL)) | 7054 | if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL)) |
7126 | return sa_none; | 7055 | return sa_none; |
7127 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) | 7056 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) |
7128 | return sa_nodemask; | 7057 | return sa_nodemask; |
7129 | if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) | 7058 | for (i = 0; i < SD_LV_MAX; i++) { |
7130 | return sa_send_covered; | 7059 | d->sdd[i].sd = alloc_percpu(struct sched_domain *); |
7131 | d->sd = alloc_percpu(struct sched_domain *); | 7060 | if (!d->sdd[i].sd) |
7132 | if (!d->sd) { | 7061 | return sa_sd_storage; |
7133 | printk(KERN_WARNING "Cannot alloc per-cpu pointers\n"); | 7062 | |
7134 | return sa_tmpmask; | 7063 | d->sdd[i].sg = alloc_percpu(struct sched_group *); |
7064 | if (!d->sdd[i].sg) | ||
7065 | return sa_sd_storage; | ||
7066 | |||
7067 | for_each_cpu(j, cpu_map) { | ||
7068 | struct sched_domain *sd; | ||
7069 | struct sched_group *sg; | ||
7070 | |||
7071 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), | ||
7072 | GFP_KERNEL, cpu_to_node(j)); | ||
7073 | if (!sd) | ||
7074 | return sa_sd_storage; | ||
7075 | |||
7076 | *per_cpu_ptr(d->sdd[i].sd, j) = sd; | ||
7077 | |||
7078 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | ||
7079 | GFP_KERNEL, cpu_to_node(j)); | ||
7080 | if (!sg) | ||
7081 | return sa_sd_storage; | ||
7082 | |||
7083 | *per_cpu_ptr(d->sdd[i].sg, j) = sg; | ||
7084 | } | ||
7135 | } | 7085 | } |
7086 | d->sd = alloc_percpu(struct sched_domain *); | ||
7087 | if (!d->sd) | ||
7088 | return sa_sd_storage; | ||
7136 | d->rd = alloc_rootdomain(); | 7089 | d->rd = alloc_rootdomain(); |
7137 | if (!d->rd) { | 7090 | if (!d->rd) |
7138 | printk(KERN_WARNING "Cannot alloc root domain\n"); | ||
7139 | return sa_sd; | 7091 | return sa_sd; |
7140 | } | ||
7141 | return sa_rootdomain; | 7092 | return sa_rootdomain; |
7142 | } | 7093 | } |
7143 | 7094 | ||
7095 | /* | ||
7096 | * NULL the sd_data elements we've used to build the sched_domain and | ||
7097 | * sched_group structure so that the subsequent __free_domain_allocs() | ||
7098 | * will not free the data we're using. | ||
7099 | */ | ||
7100 | static void claim_allocations(int cpu, struct sched_domain *sd) | ||
7101 | { | ||
7102 | struct sd_data *sdd = sd->private; | ||
7103 | struct sched_group *sg = sd->groups; | ||
7104 | |||
7105 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); | ||
7106 | *per_cpu_ptr(sdd->sd, cpu) = NULL; | ||
7107 | |||
7108 | if (cpu == cpumask_first(sched_group_cpus(sg))) { | ||
7109 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg); | ||
7110 | *per_cpu_ptr(sdd->sg, cpu) = NULL; | ||
7111 | } | ||
7112 | } | ||
7113 | |||
7144 | static struct sched_domain *__build_numa_sched_domains(struct s_data *d, | 7114 | static struct sched_domain *__build_numa_sched_domains(struct s_data *d, |
7145 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i) | 7115 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i) |
7146 | { | 7116 | { |
@@ -7151,24 +7121,20 @@ static struct sched_domain *__build_numa_sched_domains(struct s_data *d, | |||
7151 | d->sd_allnodes = 0; | 7121 | d->sd_allnodes = 0; |
7152 | if (cpumask_weight(cpu_map) > | 7122 | if (cpumask_weight(cpu_map) > |
7153 | SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) { | 7123 | SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) { |
7154 | sd = &per_cpu(allnodes_domains, i).sd; | 7124 | sd = sd_init_ALLNODES(d, i); |
7155 | SD_INIT(sd, ALLNODES); | ||
7156 | set_domain_attribute(sd, attr); | 7125 | set_domain_attribute(sd, attr); |
7157 | cpumask_copy(sched_domain_span(sd), cpu_map); | 7126 | cpumask_copy(sched_domain_span(sd), cpu_map); |
7158 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7159 | d->sd_allnodes = 1; | 7127 | d->sd_allnodes = 1; |
7160 | } | 7128 | } |
7161 | parent = sd; | 7129 | parent = sd; |
7162 | 7130 | ||
7163 | sd = &per_cpu(node_domains, i).sd; | 7131 | sd = sd_init_NODE(d, i); |
7164 | SD_INIT(sd, NODE); | ||
7165 | set_domain_attribute(sd, attr); | 7132 | set_domain_attribute(sd, attr); |
7166 | sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); | 7133 | sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); |
7167 | sd->parent = parent; | 7134 | sd->parent = parent; |
7168 | if (parent) | 7135 | if (parent) |
7169 | parent->child = sd; | 7136 | parent->child = sd; |
7170 | cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map); | 7137 | cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map); |
7171 | cpu_to_node_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7172 | #endif | 7138 | #endif |
7173 | return sd; | 7139 | return sd; |
7174 | } | 7140 | } |
@@ -7178,14 +7144,12 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d, | |||
7178 | struct sched_domain *parent, int i) | 7144 | struct sched_domain *parent, int i) |
7179 | { | 7145 | { |
7180 | struct sched_domain *sd; | 7146 | struct sched_domain *sd; |
7181 | sd = &per_cpu(phys_domains, i).sd; | 7147 | sd = sd_init_CPU(d, i); |
7182 | SD_INIT(sd, CPU); | ||
7183 | set_domain_attribute(sd, attr); | 7148 | set_domain_attribute(sd, attr); |
7184 | cpumask_copy(sched_domain_span(sd), d->nodemask); | 7149 | cpumask_copy(sched_domain_span(sd), d->nodemask); |
7185 | sd->parent = parent; | 7150 | sd->parent = parent; |
7186 | if (parent) | 7151 | if (parent) |
7187 | parent->child = sd; | 7152 | parent->child = sd; |
7188 | cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7189 | return sd; | 7153 | return sd; |
7190 | } | 7154 | } |
7191 | 7155 | ||
@@ -7195,13 +7159,11 @@ static struct sched_domain *__build_book_sched_domain(struct s_data *d, | |||
7195 | { | 7159 | { |
7196 | struct sched_domain *sd = parent; | 7160 | struct sched_domain *sd = parent; |
7197 | #ifdef CONFIG_SCHED_BOOK | 7161 | #ifdef CONFIG_SCHED_BOOK |
7198 | sd = &per_cpu(book_domains, i).sd; | 7162 | sd = sd_init_BOOK(d, i); |
7199 | SD_INIT(sd, BOOK); | ||
7200 | set_domain_attribute(sd, attr); | 7163 | set_domain_attribute(sd, attr); |
7201 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); | 7164 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); |
7202 | sd->parent = parent; | 7165 | sd->parent = parent; |
7203 | parent->child = sd; | 7166 | parent->child = sd; |
7204 | cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7205 | #endif | 7167 | #endif |
7206 | return sd; | 7168 | return sd; |
7207 | } | 7169 | } |
@@ -7212,13 +7174,11 @@ static struct sched_domain *__build_mc_sched_domain(struct s_data *d, | |||
7212 | { | 7174 | { |
7213 | struct sched_domain *sd = parent; | 7175 | struct sched_domain *sd = parent; |
7214 | #ifdef CONFIG_SCHED_MC | 7176 | #ifdef CONFIG_SCHED_MC |
7215 | sd = &per_cpu(core_domains, i).sd; | 7177 | sd = sd_init_MC(d, i); |
7216 | SD_INIT(sd, MC); | ||
7217 | set_domain_attribute(sd, attr); | 7178 | set_domain_attribute(sd, attr); |
7218 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i)); | 7179 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i)); |
7219 | sd->parent = parent; | 7180 | sd->parent = parent; |
7220 | parent->child = sd; | 7181 | parent->child = sd; |
7221 | cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7222 | #endif | 7182 | #endif |
7223 | return sd; | 7183 | return sd; |
7224 | } | 7184 | } |
@@ -7229,92 +7189,32 @@ static struct sched_domain *__build_smt_sched_domain(struct s_data *d, | |||
7229 | { | 7189 | { |
7230 | struct sched_domain *sd = parent; | 7190 | struct sched_domain *sd = parent; |
7231 | #ifdef CONFIG_SCHED_SMT | 7191 | #ifdef CONFIG_SCHED_SMT |
7232 | sd = &per_cpu(cpu_domains, i).sd; | 7192 | sd = sd_init_SIBLING(d, i); |
7233 | SD_INIT(sd, SIBLING); | ||
7234 | set_domain_attribute(sd, attr); | 7193 | set_domain_attribute(sd, attr); |
7235 | cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i)); | 7194 | cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i)); |
7236 | sd->parent = parent; | 7195 | sd->parent = parent; |
7237 | parent->child = sd; | 7196 | parent->child = sd; |
7238 | cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7239 | #endif | 7197 | #endif |
7240 | return sd; | 7198 | return sd; |
7241 | } | 7199 | } |
7242 | 7200 | ||
7243 | static void build_sched_groups(struct s_data *d, struct sched_domain *sd, | ||
7244 | const struct cpumask *cpu_map, int cpu) | ||
7245 | { | ||
7246 | switch (sd->level) { | ||
7247 | #ifdef CONFIG_SCHED_SMT | ||
7248 | case SD_LV_SIBLING: /* set up CPU (sibling) groups */ | ||
7249 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7250 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7251 | &cpu_to_cpu_group, | ||
7252 | d->send_covered, d->tmpmask); | ||
7253 | break; | ||
7254 | #endif | ||
7255 | #ifdef CONFIG_SCHED_MC | ||
7256 | case SD_LV_MC: /* set up multi-core groups */ | ||
7257 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7258 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7259 | &cpu_to_core_group, | ||
7260 | d->send_covered, d->tmpmask); | ||
7261 | break; | ||
7262 | #endif | ||
7263 | #ifdef CONFIG_SCHED_BOOK | ||
7264 | case SD_LV_BOOK: /* set up book groups */ | ||
7265 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7266 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7267 | &cpu_to_book_group, | ||
7268 | d->send_covered, d->tmpmask); | ||
7269 | break; | ||
7270 | #endif | ||
7271 | case SD_LV_CPU: /* set up physical groups */ | ||
7272 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7273 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7274 | &cpu_to_phys_group, | ||
7275 | d->send_covered, d->tmpmask); | ||
7276 | break; | ||
7277 | #ifdef CONFIG_NUMA | ||
7278 | case SD_LV_NODE: | ||
7279 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7280 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7281 | &cpu_to_node_group, | ||
7282 | d->send_covered, d->tmpmask); | ||
7283 | |||
7284 | case SD_LV_ALLNODES: | ||
7285 | if (cpu == cpumask_first(cpu_map)) | ||
7286 | init_sched_build_groups(cpu_map, cpu_map, | ||
7287 | &cpu_to_allnodes_group, | ||
7288 | d->send_covered, d->tmpmask); | ||
7289 | break; | ||
7290 | #endif | ||
7291 | default: | ||
7292 | break; | ||
7293 | } | ||
7294 | } | ||
7295 | |||
7296 | /* | 7201 | /* |
7297 | * Build sched domains for a given set of cpus and attach the sched domains | 7202 | * Build sched domains for a given set of cpus and attach the sched domains |
7298 | * to the individual cpus | 7203 | * to the individual cpus |
7299 | */ | 7204 | */ |
7300 | static int __build_sched_domains(const struct cpumask *cpu_map, | 7205 | static int build_sched_domains(const struct cpumask *cpu_map, |
7301 | struct sched_domain_attr *attr) | 7206 | struct sched_domain_attr *attr) |
7302 | { | 7207 | { |
7303 | enum s_alloc alloc_state = sa_none; | 7208 | enum s_alloc alloc_state = sa_none; |
7209 | struct sched_domain *sd; | ||
7304 | struct s_data d; | 7210 | struct s_data d; |
7305 | struct sched_domain *sd, *tmp; | ||
7306 | int i; | 7211 | int i; |
7307 | #ifdef CONFIG_NUMA | ||
7308 | d.sd_allnodes = 0; | ||
7309 | #endif | ||
7310 | 7212 | ||
7311 | alloc_state = __visit_domain_allocation_hell(&d, cpu_map); | 7213 | alloc_state = __visit_domain_allocation_hell(&d, cpu_map); |
7312 | if (alloc_state != sa_rootdomain) | 7214 | if (alloc_state != sa_rootdomain) |
7313 | goto error; | 7215 | goto error; |
7314 | 7216 | ||
7315 | /* | 7217 | /* Set up domains for cpus specified by the cpu_map. */ |
7316 | * Set up domains for cpus specified by the cpu_map. | ||
7317 | */ | ||
7318 | for_each_cpu(i, cpu_map) { | 7218 | for_each_cpu(i, cpu_map) { |
7319 | cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)), | 7219 | cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)), |
7320 | cpu_map); | 7220 | cpu_map); |
@@ -7326,10 +7226,19 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7326 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); | 7226 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); |
7327 | 7227 | ||
7328 | *per_cpu_ptr(d.sd, i) = sd; | 7228 | *per_cpu_ptr(d.sd, i) = sd; |
7229 | } | ||
7230 | |||
7231 | /* Build the groups for the domains */ | ||
7232 | for_each_cpu(i, cpu_map) { | ||
7233 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { | ||
7234 | sd->span_weight = cpumask_weight(sched_domain_span(sd)); | ||
7235 | get_group(i, sd->private, &sd->groups); | ||
7236 | atomic_inc(&sd->groups->ref); | ||
7329 | 7237 | ||
7330 | for (tmp = sd; tmp; tmp = tmp->parent) { | 7238 | if (i != cpumask_first(sched_domain_span(sd))) |
7331 | tmp->span_weight = cpumask_weight(sched_domain_span(tmp)); | 7239 | continue; |
7332 | build_sched_groups(&d, tmp, cpu_map, i); | 7240 | |
7241 | build_sched_groups(sd, d.send_covered); | ||
7333 | } | 7242 | } |
7334 | } | 7243 | } |
7335 | 7244 | ||
@@ -7338,18 +7247,21 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7338 | if (!cpumask_test_cpu(i, cpu_map)) | 7247 | if (!cpumask_test_cpu(i, cpu_map)) |
7339 | continue; | 7248 | continue; |
7340 | 7249 | ||
7341 | sd = *per_cpu_ptr(d.sd, i); | 7250 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { |
7342 | for (; sd; sd = sd->parent) | 7251 | claim_allocations(i, sd); |
7343 | init_sched_groups_power(i, sd); | 7252 | init_sched_groups_power(i, sd); |
7253 | } | ||
7344 | } | 7254 | } |
7345 | 7255 | ||
7346 | /* Attach the domains */ | 7256 | /* Attach the domains */ |
7257 | rcu_read_lock(); | ||
7347 | for_each_cpu(i, cpu_map) { | 7258 | for_each_cpu(i, cpu_map) { |
7348 | sd = *per_cpu_ptr(d.sd, i); | 7259 | sd = *per_cpu_ptr(d.sd, i); |
7349 | cpu_attach_domain(sd, d.rd, i); | 7260 | cpu_attach_domain(sd, d.rd, i); |
7350 | } | 7261 | } |
7262 | rcu_read_unlock(); | ||
7351 | 7263 | ||
7352 | __free_domain_allocs(&d, sa_tmpmask, cpu_map); | 7264 | __free_domain_allocs(&d, sa_sd, cpu_map); |
7353 | return 0; | 7265 | return 0; |
7354 | 7266 | ||
7355 | error: | 7267 | error: |
@@ -7357,11 +7269,6 @@ error: | |||
7357 | return -ENOMEM; | 7269 | return -ENOMEM; |
7358 | } | 7270 | } |
7359 | 7271 | ||
7360 | static int build_sched_domains(const struct cpumask *cpu_map) | ||
7361 | { | ||
7362 | return __build_sched_domains(cpu_map, NULL); | ||
7363 | } | ||
7364 | |||
7365 | static cpumask_var_t *doms_cur; /* current sched domains */ | 7272 | static cpumask_var_t *doms_cur; /* current sched domains */ |
7366 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 7273 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
7367 | static struct sched_domain_attr *dattr_cur; | 7274 | static struct sched_domain_attr *dattr_cur; |
@@ -7425,31 +7332,24 @@ static int init_sched_domains(const struct cpumask *cpu_map) | |||
7425 | doms_cur = &fallback_doms; | 7332 | doms_cur = &fallback_doms; |
7426 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); | 7333 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
7427 | dattr_cur = NULL; | 7334 | dattr_cur = NULL; |
7428 | err = build_sched_domains(doms_cur[0]); | 7335 | err = build_sched_domains(doms_cur[0], NULL); |
7429 | register_sched_domain_sysctl(); | 7336 | register_sched_domain_sysctl(); |
7430 | 7337 | ||
7431 | return err; | 7338 | return err; |
7432 | } | 7339 | } |
7433 | 7340 | ||
7434 | static void destroy_sched_domains(const struct cpumask *cpu_map, | ||
7435 | struct cpumask *tmpmask) | ||
7436 | { | ||
7437 | } | ||
7438 | |||
7439 | /* | 7341 | /* |
7440 | * Detach sched domains from a group of cpus specified in cpu_map | 7342 | * Detach sched domains from a group of cpus specified in cpu_map |
7441 | * These cpus will now be attached to the NULL domain | 7343 | * These cpus will now be attached to the NULL domain |
7442 | */ | 7344 | */ |
7443 | static void detach_destroy_domains(const struct cpumask *cpu_map) | 7345 | static void detach_destroy_domains(const struct cpumask *cpu_map) |
7444 | { | 7346 | { |
7445 | /* Save because hotplug lock held. */ | ||
7446 | static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS); | ||
7447 | int i; | 7347 | int i; |
7448 | 7348 | ||
7349 | rcu_read_lock(); | ||
7449 | for_each_cpu(i, cpu_map) | 7350 | for_each_cpu(i, cpu_map) |
7450 | cpu_attach_domain(NULL, &def_root_domain, i); | 7351 | cpu_attach_domain(NULL, &def_root_domain, i); |
7451 | synchronize_sched(); | 7352 | rcu_read_unlock(); |
7452 | destroy_sched_domains(cpu_map, to_cpumask(tmpmask)); | ||
7453 | } | 7353 | } |
7454 | 7354 | ||
7455 | /* handle null as "default" */ | 7355 | /* handle null as "default" */ |
@@ -7538,8 +7438,7 @@ match1: | |||
7538 | goto match2; | 7438 | goto match2; |
7539 | } | 7439 | } |
7540 | /* no match - add a new doms_new */ | 7440 | /* no match - add a new doms_new */ |
7541 | __build_sched_domains(doms_new[i], | 7441 | build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL); |
7542 | dattr_new ? dattr_new + i : NULL); | ||
7543 | match2: | 7442 | match2: |
7544 | ; | 7443 | ; |
7545 | } | 7444 | } |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 4ee50f0af8d1..4a8ac7c2a18e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1622,6 +1622,7 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
1622 | /* | 1622 | /* |
1623 | * Otherwise, iterate the domains and find an elegible idle cpu. | 1623 | * Otherwise, iterate the domains and find an elegible idle cpu. |
1624 | */ | 1624 | */ |
1625 | rcu_read_lock(); | ||
1625 | for_each_domain(target, sd) { | 1626 | for_each_domain(target, sd) { |
1626 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) | 1627 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) |
1627 | break; | 1628 | break; |
@@ -1641,6 +1642,7 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
1641 | cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) | 1642 | cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) |
1642 | break; | 1643 | break; |
1643 | } | 1644 | } |
1645 | rcu_read_unlock(); | ||
1644 | 1646 | ||
1645 | return target; | 1647 | return target; |
1646 | } | 1648 | } |
@@ -1673,6 +1675,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
1673 | new_cpu = prev_cpu; | 1675 | new_cpu = prev_cpu; |
1674 | } | 1676 | } |
1675 | 1677 | ||
1678 | rcu_read_lock(); | ||
1676 | for_each_domain(cpu, tmp) { | 1679 | for_each_domain(cpu, tmp) { |
1677 | if (!(tmp->flags & SD_LOAD_BALANCE)) | 1680 | if (!(tmp->flags & SD_LOAD_BALANCE)) |
1678 | continue; | 1681 | continue; |
@@ -1723,9 +1726,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
1723 | 1726 | ||
1724 | if (affine_sd) { | 1727 | if (affine_sd) { |
1725 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) | 1728 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) |
1726 | return select_idle_sibling(p, cpu); | 1729 | prev_cpu = cpu; |
1727 | else | 1730 | |
1728 | return select_idle_sibling(p, prev_cpu); | 1731 | new_cpu = select_idle_sibling(p, prev_cpu); |
1732 | goto unlock; | ||
1729 | } | 1733 | } |
1730 | 1734 | ||
1731 | while (sd) { | 1735 | while (sd) { |
@@ -1766,6 +1770,8 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
1766 | } | 1770 | } |
1767 | /* while loop will break here if sd == NULL */ | 1771 | /* while loop will break here if sd == NULL */ |
1768 | } | 1772 | } |
1773 | unlock: | ||
1774 | rcu_read_unlock(); | ||
1769 | 1775 | ||
1770 | return new_cpu; | 1776 | return new_cpu; |
1771 | } | 1777 | } |
@@ -3462,6 +3468,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3462 | raw_spin_unlock(&this_rq->lock); | 3468 | raw_spin_unlock(&this_rq->lock); |
3463 | 3469 | ||
3464 | update_shares(this_cpu); | 3470 | update_shares(this_cpu); |
3471 | rcu_read_lock(); | ||
3465 | for_each_domain(this_cpu, sd) { | 3472 | for_each_domain(this_cpu, sd) { |
3466 | unsigned long interval; | 3473 | unsigned long interval; |
3467 | int balance = 1; | 3474 | int balance = 1; |
@@ -3483,6 +3490,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3483 | break; | 3490 | break; |
3484 | } | 3491 | } |
3485 | } | 3492 | } |
3493 | rcu_read_unlock(); | ||
3486 | 3494 | ||
3487 | raw_spin_lock(&this_rq->lock); | 3495 | raw_spin_lock(&this_rq->lock); |
3488 | 3496 | ||
@@ -3531,6 +3539,7 @@ static int active_load_balance_cpu_stop(void *data) | |||
3531 | double_lock_balance(busiest_rq, target_rq); | 3539 | double_lock_balance(busiest_rq, target_rq); |
3532 | 3540 | ||
3533 | /* Search for an sd spanning us and the target CPU. */ | 3541 | /* Search for an sd spanning us and the target CPU. */ |
3542 | rcu_read_lock(); | ||
3534 | for_each_domain(target_cpu, sd) { | 3543 | for_each_domain(target_cpu, sd) { |
3535 | if ((sd->flags & SD_LOAD_BALANCE) && | 3544 | if ((sd->flags & SD_LOAD_BALANCE) && |
3536 | cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) | 3545 | cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) |
@@ -3546,6 +3555,7 @@ static int active_load_balance_cpu_stop(void *data) | |||
3546 | else | 3555 | else |
3547 | schedstat_inc(sd, alb_failed); | 3556 | schedstat_inc(sd, alb_failed); |
3548 | } | 3557 | } |
3558 | rcu_read_unlock(); | ||
3549 | double_unlock_balance(busiest_rq, target_rq); | 3559 | double_unlock_balance(busiest_rq, target_rq); |
3550 | out_unlock: | 3560 | out_unlock: |
3551 | busiest_rq->active_balance = 0; | 3561 | busiest_rq->active_balance = 0; |
@@ -3672,6 +3682,7 @@ static int find_new_ilb(int cpu) | |||
3672 | { | 3682 | { |
3673 | struct sched_domain *sd; | 3683 | struct sched_domain *sd; |
3674 | struct sched_group *ilb_group; | 3684 | struct sched_group *ilb_group; |
3685 | int ilb = nr_cpu_ids; | ||
3675 | 3686 | ||
3676 | /* | 3687 | /* |
3677 | * Have idle load balancer selection from semi-idle packages only | 3688 | * Have idle load balancer selection from semi-idle packages only |
@@ -3687,20 +3698,25 @@ static int find_new_ilb(int cpu) | |||
3687 | if (cpumask_weight(nohz.idle_cpus_mask) < 2) | 3698 | if (cpumask_weight(nohz.idle_cpus_mask) < 2) |
3688 | goto out_done; | 3699 | goto out_done; |
3689 | 3700 | ||
3701 | rcu_read_lock(); | ||
3690 | for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { | 3702 | for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { |
3691 | ilb_group = sd->groups; | 3703 | ilb_group = sd->groups; |
3692 | 3704 | ||
3693 | do { | 3705 | do { |
3694 | if (is_semi_idle_group(ilb_group)) | 3706 | if (is_semi_idle_group(ilb_group)) { |
3695 | return cpumask_first(nohz.grp_idle_mask); | 3707 | ilb = cpumask_first(nohz.grp_idle_mask); |
3708 | goto unlock; | ||
3709 | } | ||
3696 | 3710 | ||
3697 | ilb_group = ilb_group->next; | 3711 | ilb_group = ilb_group->next; |
3698 | 3712 | ||
3699 | } while (ilb_group != sd->groups); | 3713 | } while (ilb_group != sd->groups); |
3700 | } | 3714 | } |
3715 | unlock: | ||
3716 | rcu_read_unlock(); | ||
3701 | 3717 | ||
3702 | out_done: | 3718 | out_done: |
3703 | return nr_cpu_ids; | 3719 | return ilb; |
3704 | } | 3720 | } |
3705 | #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ | 3721 | #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ |
3706 | static inline int find_new_ilb(int call_cpu) | 3722 | static inline int find_new_ilb(int call_cpu) |
@@ -3845,6 +3861,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3845 | 3861 | ||
3846 | update_shares(cpu); | 3862 | update_shares(cpu); |
3847 | 3863 | ||
3864 | rcu_read_lock(); | ||
3848 | for_each_domain(cpu, sd) { | 3865 | for_each_domain(cpu, sd) { |
3849 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3866 | if (!(sd->flags & SD_LOAD_BALANCE)) |
3850 | continue; | 3867 | continue; |
@@ -3890,6 +3907,7 @@ out: | |||
3890 | if (!balance) | 3907 | if (!balance) |
3891 | break; | 3908 | break; |
3892 | } | 3909 | } |
3910 | rcu_read_unlock(); | ||
3893 | 3911 | ||
3894 | /* | 3912 | /* |
3895 | * next_balance will be updated only when there is a need. | 3913 | * next_balance will be updated only when there is a need. |