diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2011-04-07 08:09:50 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-04-11 06:58:19 -0400 |
commit | dce840a08702bd13a9a186e07e63d1ef82256b5e (patch) | |
tree | 168bb98aed7f5761ebe31aa92c34959e9d0f238a /kernel/sched.c | |
parent | a9c9a9b6bff27ac9c746344a9c1a19bf3327002c (diff) |
sched: Dynamically allocate sched_domain/sched_group data-structures
Instead of relying on static allocations for the sched_domain and
sched_group trees, dynamically allocate and RCU free them.
Allocating this dynamically also allows for some build_sched_groups()
simplification since we can now (like with other simplifications) rely
on the sched_domain tree instead of hard-coded knowledge.
One tricky to note is that detach_destroy_domains() needs to hold
rcu_read_lock() over the entire tear-down, per-cpu is not sufficient
since that can lead to partial sched_group existance (could possibly
be solved by doing the tear-down backwards but this is much more
robust).
A concequence of the above is that we can no longer print the
sched_domain debug stuff from cpu_attach_domain() since that might now
run with preemption disabled (due to classic RCU etc.) and
sched_domain_debug() does some GFP_KERNEL allocations.
Another thing to note is that we now fully rely on normal RCU and not
RCU-sched, this is because with the new and exiting RCU flavours we
grew over the years BH doesn't necessarily hold off RCU-sched grace
periods (-rt is known to break this). This would in fact already cause
us grief since we do sched_domain/sched_group iterations from softirq
context.
This patch is somewhat larger than I would like it to be, but I didn't
find any means of shrinking/splitting this.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.245307941@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 479 |
1 files changed, 189 insertions, 290 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 1cca59ec4a49..65204845063e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -417,6 +417,7 @@ struct rt_rq { | |||
417 | */ | 417 | */ |
418 | struct root_domain { | 418 | struct root_domain { |
419 | atomic_t refcount; | 419 | atomic_t refcount; |
420 | struct rcu_head rcu; | ||
420 | cpumask_var_t span; | 421 | cpumask_var_t span; |
421 | cpumask_var_t online; | 422 | cpumask_var_t online; |
422 | 423 | ||
@@ -571,7 +572,7 @@ static inline int cpu_of(struct rq *rq) | |||
571 | 572 | ||
572 | #define rcu_dereference_check_sched_domain(p) \ | 573 | #define rcu_dereference_check_sched_domain(p) \ |
573 | rcu_dereference_check((p), \ | 574 | rcu_dereference_check((p), \ |
574 | rcu_read_lock_sched_held() || \ | 575 | rcu_read_lock_held() || \ |
575 | lockdep_is_held(&sched_domains_mutex)) | 576 | lockdep_is_held(&sched_domains_mutex)) |
576 | 577 | ||
577 | /* | 578 | /* |
@@ -6572,12 +6573,11 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
6572 | return 1; | 6573 | return 1; |
6573 | } | 6574 | } |
6574 | 6575 | ||
6575 | static void free_rootdomain(struct root_domain *rd) | 6576 | static void free_rootdomain(struct rcu_head *rcu) |
6576 | { | 6577 | { |
6577 | synchronize_sched(); | 6578 | struct root_domain *rd = container_of(rcu, struct root_domain, rcu); |
6578 | 6579 | ||
6579 | cpupri_cleanup(&rd->cpupri); | 6580 | cpupri_cleanup(&rd->cpupri); |
6580 | |||
6581 | free_cpumask_var(rd->rto_mask); | 6581 | free_cpumask_var(rd->rto_mask); |
6582 | free_cpumask_var(rd->online); | 6582 | free_cpumask_var(rd->online); |
6583 | free_cpumask_var(rd->span); | 6583 | free_cpumask_var(rd->span); |
@@ -6618,7 +6618,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6618 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 6618 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
6619 | 6619 | ||
6620 | if (old_rd) | 6620 | if (old_rd) |
6621 | free_rootdomain(old_rd); | 6621 | call_rcu_sched(&old_rd->rcu, free_rootdomain); |
6622 | } | 6622 | } |
6623 | 6623 | ||
6624 | static int init_rootdomain(struct root_domain *rd) | 6624 | static int init_rootdomain(struct root_domain *rd) |
@@ -6669,6 +6669,25 @@ static struct root_domain *alloc_rootdomain(void) | |||
6669 | return rd; | 6669 | return rd; |
6670 | } | 6670 | } |
6671 | 6671 | ||
6672 | static void free_sched_domain(struct rcu_head *rcu) | ||
6673 | { | ||
6674 | struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); | ||
6675 | if (atomic_dec_and_test(&sd->groups->ref)) | ||
6676 | kfree(sd->groups); | ||
6677 | kfree(sd); | ||
6678 | } | ||
6679 | |||
6680 | static void destroy_sched_domain(struct sched_domain *sd, int cpu) | ||
6681 | { | ||
6682 | call_rcu(&sd->rcu, free_sched_domain); | ||
6683 | } | ||
6684 | |||
6685 | static void destroy_sched_domains(struct sched_domain *sd, int cpu) | ||
6686 | { | ||
6687 | for (; sd; sd = sd->parent) | ||
6688 | destroy_sched_domain(sd, cpu); | ||
6689 | } | ||
6690 | |||
6672 | /* | 6691 | /* |
6673 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must | 6692 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must |
6674 | * hold the hotplug lock. | 6693 | * hold the hotplug lock. |
@@ -6689,20 +6708,25 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) | |||
6689 | tmp->parent = parent->parent; | 6708 | tmp->parent = parent->parent; |
6690 | if (parent->parent) | 6709 | if (parent->parent) |
6691 | parent->parent->child = tmp; | 6710 | parent->parent->child = tmp; |
6711 | destroy_sched_domain(parent, cpu); | ||
6692 | } else | 6712 | } else |
6693 | tmp = tmp->parent; | 6713 | tmp = tmp->parent; |
6694 | } | 6714 | } |
6695 | 6715 | ||
6696 | if (sd && sd_degenerate(sd)) { | 6716 | if (sd && sd_degenerate(sd)) { |
6717 | tmp = sd; | ||
6697 | sd = sd->parent; | 6718 | sd = sd->parent; |
6719 | destroy_sched_domain(tmp, cpu); | ||
6698 | if (sd) | 6720 | if (sd) |
6699 | sd->child = NULL; | 6721 | sd->child = NULL; |
6700 | } | 6722 | } |
6701 | 6723 | ||
6702 | sched_domain_debug(sd, cpu); | 6724 | /* sched_domain_debug(sd, cpu); */ |
6703 | 6725 | ||
6704 | rq_attach_root(rq, rd); | 6726 | rq_attach_root(rq, rd); |
6727 | tmp = rq->sd; | ||
6705 | rcu_assign_pointer(rq->sd, sd); | 6728 | rcu_assign_pointer(rq->sd, sd); |
6729 | destroy_sched_domains(tmp, cpu); | ||
6706 | } | 6730 | } |
6707 | 6731 | ||
6708 | /* cpus with isolated domains */ | 6732 | /* cpus with isolated domains */ |
@@ -6718,56 +6742,6 @@ static int __init isolated_cpu_setup(char *str) | |||
6718 | 6742 | ||
6719 | __setup("isolcpus=", isolated_cpu_setup); | 6743 | __setup("isolcpus=", isolated_cpu_setup); |
6720 | 6744 | ||
6721 | /* | ||
6722 | * init_sched_build_groups takes the cpumask we wish to span, and a pointer | ||
6723 | * to a function which identifies what group(along with sched group) a CPU | ||
6724 | * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids | ||
6725 | * (due to the fact that we keep track of groups covered with a struct cpumask). | ||
6726 | * | ||
6727 | * init_sched_build_groups will build a circular linked list of the groups | ||
6728 | * covered by the given span, and will set each group's ->cpumask correctly, | ||
6729 | * and ->cpu_power to 0. | ||
6730 | */ | ||
6731 | static void | ||
6732 | init_sched_build_groups(const struct cpumask *span, | ||
6733 | const struct cpumask *cpu_map, | ||
6734 | int (*group_fn)(int cpu, const struct cpumask *cpu_map, | ||
6735 | struct sched_group **sg, | ||
6736 | struct cpumask *tmpmask), | ||
6737 | struct cpumask *covered, struct cpumask *tmpmask) | ||
6738 | { | ||
6739 | struct sched_group *first = NULL, *last = NULL; | ||
6740 | int i; | ||
6741 | |||
6742 | cpumask_clear(covered); | ||
6743 | |||
6744 | for_each_cpu(i, span) { | ||
6745 | struct sched_group *sg; | ||
6746 | int group = group_fn(i, cpu_map, &sg, tmpmask); | ||
6747 | int j; | ||
6748 | |||
6749 | if (cpumask_test_cpu(i, covered)) | ||
6750 | continue; | ||
6751 | |||
6752 | cpumask_clear(sched_group_cpus(sg)); | ||
6753 | sg->cpu_power = 0; | ||
6754 | |||
6755 | for_each_cpu(j, span) { | ||
6756 | if (group_fn(j, cpu_map, NULL, tmpmask) != group) | ||
6757 | continue; | ||
6758 | |||
6759 | cpumask_set_cpu(j, covered); | ||
6760 | cpumask_set_cpu(j, sched_group_cpus(sg)); | ||
6761 | } | ||
6762 | if (!first) | ||
6763 | first = sg; | ||
6764 | if (last) | ||
6765 | last->next = sg; | ||
6766 | last = sg; | ||
6767 | } | ||
6768 | last->next = first; | ||
6769 | } | ||
6770 | |||
6771 | #define SD_NODES_PER_DOMAIN 16 | 6745 | #define SD_NODES_PER_DOMAIN 16 |
6772 | 6746 | ||
6773 | #ifdef CONFIG_NUMA | 6747 | #ifdef CONFIG_NUMA |
@@ -6858,154 +6832,96 @@ struct static_sched_domain { | |||
6858 | DECLARE_BITMAP(span, CONFIG_NR_CPUS); | 6832 | DECLARE_BITMAP(span, CONFIG_NR_CPUS); |
6859 | }; | 6833 | }; |
6860 | 6834 | ||
6835 | struct sd_data { | ||
6836 | struct sched_domain **__percpu sd; | ||
6837 | struct sched_group **__percpu sg; | ||
6838 | }; | ||
6839 | |||
6861 | struct s_data { | 6840 | struct s_data { |
6862 | #ifdef CONFIG_NUMA | 6841 | #ifdef CONFIG_NUMA |
6863 | int sd_allnodes; | 6842 | int sd_allnodes; |
6864 | #endif | 6843 | #endif |
6865 | cpumask_var_t nodemask; | 6844 | cpumask_var_t nodemask; |
6866 | cpumask_var_t send_covered; | 6845 | cpumask_var_t send_covered; |
6867 | cpumask_var_t tmpmask; | ||
6868 | struct sched_domain ** __percpu sd; | 6846 | struct sched_domain ** __percpu sd; |
6847 | struct sd_data sdd[SD_LV_MAX]; | ||
6869 | struct root_domain *rd; | 6848 | struct root_domain *rd; |
6870 | }; | 6849 | }; |
6871 | 6850 | ||
6872 | enum s_alloc { | 6851 | enum s_alloc { |
6873 | sa_rootdomain, | 6852 | sa_rootdomain, |
6874 | sa_sd, | 6853 | sa_sd, |
6875 | sa_tmpmask, | 6854 | sa_sd_storage, |
6876 | sa_send_covered, | 6855 | sa_send_covered, |
6877 | sa_nodemask, | 6856 | sa_nodemask, |
6878 | sa_none, | 6857 | sa_none, |
6879 | }; | 6858 | }; |
6880 | 6859 | ||
6881 | /* | 6860 | /* |
6882 | * SMT sched-domains: | 6861 | * Assumes the sched_domain tree is fully constructed |
6883 | */ | 6862 | */ |
6884 | #ifdef CONFIG_SCHED_SMT | 6863 | static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) |
6885 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); | ||
6886 | static DEFINE_PER_CPU(struct static_sched_group, sched_groups); | ||
6887 | |||
6888 | static int | ||
6889 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | ||
6890 | struct sched_group **sg, struct cpumask *unused) | ||
6891 | { | 6864 | { |
6892 | if (sg) | 6865 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); |
6893 | *sg = &per_cpu(sched_groups, cpu).sg; | 6866 | struct sched_domain *child = sd->child; |
6894 | return cpu; | ||
6895 | } | ||
6896 | #endif /* CONFIG_SCHED_SMT */ | ||
6897 | 6867 | ||
6898 | /* | 6868 | if (child) |
6899 | * multi-core sched-domains: | 6869 | cpu = cpumask_first(sched_domain_span(child)); |
6900 | */ | ||
6901 | #ifdef CONFIG_SCHED_MC | ||
6902 | static DEFINE_PER_CPU(struct static_sched_domain, core_domains); | ||
6903 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); | ||
6904 | 6870 | ||
6905 | static int | ||
6906 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | ||
6907 | struct sched_group **sg, struct cpumask *mask) | ||
6908 | { | ||
6909 | int group; | ||
6910 | #ifdef CONFIG_SCHED_SMT | ||
6911 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6912 | group = cpumask_first(mask); | ||
6913 | #else | ||
6914 | group = cpu; | ||
6915 | #endif | ||
6916 | if (sg) | 6871 | if (sg) |
6917 | *sg = &per_cpu(sched_group_core, group).sg; | 6872 | *sg = *per_cpu_ptr(sdd->sg, cpu); |
6918 | return group; | 6873 | |
6874 | return cpu; | ||
6919 | } | 6875 | } |
6920 | #endif /* CONFIG_SCHED_MC */ | ||
6921 | 6876 | ||
6922 | /* | 6877 | /* |
6923 | * book sched-domains: | 6878 | * build_sched_groups takes the cpumask we wish to span, and a pointer |
6879 | * to a function which identifies what group(along with sched group) a CPU | ||
6880 | * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids | ||
6881 | * (due to the fact that we keep track of groups covered with a struct cpumask). | ||
6882 | * | ||
6883 | * build_sched_groups will build a circular linked list of the groups | ||
6884 | * covered by the given span, and will set each group's ->cpumask correctly, | ||
6885 | * and ->cpu_power to 0. | ||
6924 | */ | 6886 | */ |
6925 | #ifdef CONFIG_SCHED_BOOK | 6887 | static void |
6926 | static DEFINE_PER_CPU(struct static_sched_domain, book_domains); | 6888 | build_sched_groups(struct sched_domain *sd, struct cpumask *covered) |
6927 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); | ||
6928 | |||
6929 | static int | ||
6930 | cpu_to_book_group(int cpu, const struct cpumask *cpu_map, | ||
6931 | struct sched_group **sg, struct cpumask *mask) | ||
6932 | { | ||
6933 | int group = cpu; | ||
6934 | #ifdef CONFIG_SCHED_MC | ||
6935 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | ||
6936 | group = cpumask_first(mask); | ||
6937 | #elif defined(CONFIG_SCHED_SMT) | ||
6938 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6939 | group = cpumask_first(mask); | ||
6940 | #endif | ||
6941 | if (sg) | ||
6942 | *sg = &per_cpu(sched_group_book, group).sg; | ||
6943 | return group; | ||
6944 | } | ||
6945 | #endif /* CONFIG_SCHED_BOOK */ | ||
6946 | |||
6947 | static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); | ||
6948 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); | ||
6949 | |||
6950 | static int | ||
6951 | cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, | ||
6952 | struct sched_group **sg, struct cpumask *mask) | ||
6953 | { | 6889 | { |
6954 | int group; | 6890 | struct sched_group *first = NULL, *last = NULL; |
6955 | #ifdef CONFIG_SCHED_BOOK | 6891 | struct sd_data *sdd = sd->private; |
6956 | cpumask_and(mask, cpu_book_mask(cpu), cpu_map); | 6892 | const struct cpumask *span = sched_domain_span(sd); |
6957 | group = cpumask_first(mask); | 6893 | int i; |
6958 | #elif defined(CONFIG_SCHED_MC) | ||
6959 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | ||
6960 | group = cpumask_first(mask); | ||
6961 | #elif defined(CONFIG_SCHED_SMT) | ||
6962 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6963 | group = cpumask_first(mask); | ||
6964 | #else | ||
6965 | group = cpu; | ||
6966 | #endif | ||
6967 | if (sg) | ||
6968 | *sg = &per_cpu(sched_group_phys, group).sg; | ||
6969 | return group; | ||
6970 | } | ||
6971 | |||
6972 | #ifdef CONFIG_NUMA | ||
6973 | static DEFINE_PER_CPU(struct static_sched_domain, node_domains); | ||
6974 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_node); | ||
6975 | 6894 | ||
6976 | static int cpu_to_node_group(int cpu, const struct cpumask *cpu_map, | 6895 | cpumask_clear(covered); |
6977 | struct sched_group **sg, | ||
6978 | struct cpumask *nodemask) | ||
6979 | { | ||
6980 | int group; | ||
6981 | 6896 | ||
6982 | cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map); | 6897 | for_each_cpu(i, span) { |
6983 | group = cpumask_first(nodemask); | 6898 | struct sched_group *sg; |
6899 | int group = get_group(i, sdd, &sg); | ||
6900 | int j; | ||
6984 | 6901 | ||
6985 | if (sg) | 6902 | if (cpumask_test_cpu(i, covered)) |
6986 | *sg = &per_cpu(sched_group_node, group).sg; | 6903 | continue; |
6987 | return group; | ||
6988 | } | ||
6989 | 6904 | ||
6990 | static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains); | 6905 | cpumask_clear(sched_group_cpus(sg)); |
6991 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); | 6906 | sg->cpu_power = 0; |
6992 | 6907 | ||
6993 | static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, | 6908 | for_each_cpu(j, span) { |
6994 | struct sched_group **sg, | 6909 | if (get_group(j, sdd, NULL) != group) |
6995 | struct cpumask *nodemask) | 6910 | continue; |
6996 | { | ||
6997 | int group; | ||
6998 | 6911 | ||
6999 | cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map); | 6912 | cpumask_set_cpu(j, covered); |
7000 | group = cpumask_first(nodemask); | 6913 | cpumask_set_cpu(j, sched_group_cpus(sg)); |
6914 | } | ||
7001 | 6915 | ||
7002 | if (sg) | 6916 | if (!first) |
7003 | *sg = &per_cpu(sched_group_allnodes, group).sg; | 6917 | first = sg; |
7004 | return group; | 6918 | if (last) |
6919 | last->next = sg; | ||
6920 | last = sg; | ||
6921 | } | ||
6922 | last->next = first; | ||
7005 | } | 6923 | } |
7006 | 6924 | ||
7007 | #endif /* CONFIG_NUMA */ | ||
7008 | |||
7009 | /* | 6925 | /* |
7010 | * Initialize sched groups cpu_power. | 6926 | * Initialize sched groups cpu_power. |
7011 | * | 6927 | * |
@@ -7039,15 +6955,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
7039 | # define SD_INIT_NAME(sd, type) do { } while (0) | 6955 | # define SD_INIT_NAME(sd, type) do { } while (0) |
7040 | #endif | 6956 | #endif |
7041 | 6957 | ||
7042 | #define SD_INIT(sd, type) sd_init_##type(sd) | 6958 | #define SD_INIT_FUNC(type) \ |
7043 | 6959 | static noinline struct sched_domain *sd_init_##type(struct s_data *d, int cpu) \ | |
7044 | #define SD_INIT_FUNC(type) \ | 6960 | { \ |
7045 | static noinline void sd_init_##type(struct sched_domain *sd) \ | 6961 | struct sched_domain *sd = *per_cpu_ptr(d->sdd[SD_LV_##type].sd, cpu); \ |
7046 | { \ | 6962 | *sd = SD_##type##_INIT; \ |
7047 | memset(sd, 0, sizeof(*sd)); \ | 6963 | sd->level = SD_LV_##type; \ |
7048 | *sd = SD_##type##_INIT; \ | 6964 | SD_INIT_NAME(sd, type); \ |
7049 | sd->level = SD_LV_##type; \ | 6965 | sd->private = &d->sdd[SD_LV_##type]; \ |
7050 | SD_INIT_NAME(sd, type); \ | 6966 | return sd; \ |
7051 | } | 6967 | } |
7052 | 6968 | ||
7053 | SD_INIT_FUNC(CPU) | 6969 | SD_INIT_FUNC(CPU) |
@@ -7103,13 +7019,22 @@ static void set_domain_attribute(struct sched_domain *sd, | |||
7103 | static void __free_domain_allocs(struct s_data *d, enum s_alloc what, | 7019 | static void __free_domain_allocs(struct s_data *d, enum s_alloc what, |
7104 | const struct cpumask *cpu_map) | 7020 | const struct cpumask *cpu_map) |
7105 | { | 7021 | { |
7022 | int i, j; | ||
7023 | |||
7106 | switch (what) { | 7024 | switch (what) { |
7107 | case sa_rootdomain: | 7025 | case sa_rootdomain: |
7108 | free_rootdomain(d->rd); /* fall through */ | 7026 | free_rootdomain(&d->rd->rcu); /* fall through */ |
7109 | case sa_sd: | 7027 | case sa_sd: |
7110 | free_percpu(d->sd); /* fall through */ | 7028 | free_percpu(d->sd); /* fall through */ |
7111 | case sa_tmpmask: | 7029 | case sa_sd_storage: |
7112 | free_cpumask_var(d->tmpmask); /* fall through */ | 7030 | for (i = 0; i < SD_LV_MAX; i++) { |
7031 | for_each_cpu(j, cpu_map) { | ||
7032 | kfree(*per_cpu_ptr(d->sdd[i].sd, j)); | ||
7033 | kfree(*per_cpu_ptr(d->sdd[i].sg, j)); | ||
7034 | } | ||
7035 | free_percpu(d->sdd[i].sd); | ||
7036 | free_percpu(d->sdd[i].sg); | ||
7037 | } /* fall through */ | ||
7113 | case sa_send_covered: | 7038 | case sa_send_covered: |
7114 | free_cpumask_var(d->send_covered); /* fall through */ | 7039 | free_cpumask_var(d->send_covered); /* fall through */ |
7115 | case sa_nodemask: | 7040 | case sa_nodemask: |
@@ -7122,25 +7047,70 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, | |||
7122 | static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, | 7047 | static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, |
7123 | const struct cpumask *cpu_map) | 7048 | const struct cpumask *cpu_map) |
7124 | { | 7049 | { |
7050 | int i, j; | ||
7051 | |||
7052 | memset(d, 0, sizeof(*d)); | ||
7053 | |||
7125 | if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL)) | 7054 | if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL)) |
7126 | return sa_none; | 7055 | return sa_none; |
7127 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) | 7056 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) |
7128 | return sa_nodemask; | 7057 | return sa_nodemask; |
7129 | if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) | 7058 | for (i = 0; i < SD_LV_MAX; i++) { |
7130 | return sa_send_covered; | 7059 | d->sdd[i].sd = alloc_percpu(struct sched_domain *); |
7131 | d->sd = alloc_percpu(struct sched_domain *); | 7060 | if (!d->sdd[i].sd) |
7132 | if (!d->sd) { | 7061 | return sa_sd_storage; |
7133 | printk(KERN_WARNING "Cannot alloc per-cpu pointers\n"); | 7062 | |
7134 | return sa_tmpmask; | 7063 | d->sdd[i].sg = alloc_percpu(struct sched_group *); |
7064 | if (!d->sdd[i].sg) | ||
7065 | return sa_sd_storage; | ||
7066 | |||
7067 | for_each_cpu(j, cpu_map) { | ||
7068 | struct sched_domain *sd; | ||
7069 | struct sched_group *sg; | ||
7070 | |||
7071 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), | ||
7072 | GFP_KERNEL, cpu_to_node(j)); | ||
7073 | if (!sd) | ||
7074 | return sa_sd_storage; | ||
7075 | |||
7076 | *per_cpu_ptr(d->sdd[i].sd, j) = sd; | ||
7077 | |||
7078 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | ||
7079 | GFP_KERNEL, cpu_to_node(j)); | ||
7080 | if (!sg) | ||
7081 | return sa_sd_storage; | ||
7082 | |||
7083 | *per_cpu_ptr(d->sdd[i].sg, j) = sg; | ||
7084 | } | ||
7135 | } | 7085 | } |
7086 | d->sd = alloc_percpu(struct sched_domain *); | ||
7087 | if (!d->sd) | ||
7088 | return sa_sd_storage; | ||
7136 | d->rd = alloc_rootdomain(); | 7089 | d->rd = alloc_rootdomain(); |
7137 | if (!d->rd) { | 7090 | if (!d->rd) |
7138 | printk(KERN_WARNING "Cannot alloc root domain\n"); | ||
7139 | return sa_sd; | 7091 | return sa_sd; |
7140 | } | ||
7141 | return sa_rootdomain; | 7092 | return sa_rootdomain; |
7142 | } | 7093 | } |
7143 | 7094 | ||
7095 | /* | ||
7096 | * NULL the sd_data elements we've used to build the sched_domain and | ||
7097 | * sched_group structure so that the subsequent __free_domain_allocs() | ||
7098 | * will not free the data we're using. | ||
7099 | */ | ||
7100 | static void claim_allocations(int cpu, struct sched_domain *sd) | ||
7101 | { | ||
7102 | struct sd_data *sdd = sd->private; | ||
7103 | struct sched_group *sg = sd->groups; | ||
7104 | |||
7105 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); | ||
7106 | *per_cpu_ptr(sdd->sd, cpu) = NULL; | ||
7107 | |||
7108 | if (cpu == cpumask_first(sched_group_cpus(sg))) { | ||
7109 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg); | ||
7110 | *per_cpu_ptr(sdd->sg, cpu) = NULL; | ||
7111 | } | ||
7112 | } | ||
7113 | |||
7144 | static struct sched_domain *__build_numa_sched_domains(struct s_data *d, | 7114 | static struct sched_domain *__build_numa_sched_domains(struct s_data *d, |
7145 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i) | 7115 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i) |
7146 | { | 7116 | { |
@@ -7151,24 +7121,20 @@ static struct sched_domain *__build_numa_sched_domains(struct s_data *d, | |||
7151 | d->sd_allnodes = 0; | 7121 | d->sd_allnodes = 0; |
7152 | if (cpumask_weight(cpu_map) > | 7122 | if (cpumask_weight(cpu_map) > |
7153 | SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) { | 7123 | SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) { |
7154 | sd = &per_cpu(allnodes_domains, i).sd; | 7124 | sd = sd_init_ALLNODES(d, i); |
7155 | SD_INIT(sd, ALLNODES); | ||
7156 | set_domain_attribute(sd, attr); | 7125 | set_domain_attribute(sd, attr); |
7157 | cpumask_copy(sched_domain_span(sd), cpu_map); | 7126 | cpumask_copy(sched_domain_span(sd), cpu_map); |
7158 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7159 | d->sd_allnodes = 1; | 7127 | d->sd_allnodes = 1; |
7160 | } | 7128 | } |
7161 | parent = sd; | 7129 | parent = sd; |
7162 | 7130 | ||
7163 | sd = &per_cpu(node_domains, i).sd; | 7131 | sd = sd_init_NODE(d, i); |
7164 | SD_INIT(sd, NODE); | ||
7165 | set_domain_attribute(sd, attr); | 7132 | set_domain_attribute(sd, attr); |
7166 | sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); | 7133 | sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); |
7167 | sd->parent = parent; | 7134 | sd->parent = parent; |
7168 | if (parent) | 7135 | if (parent) |
7169 | parent->child = sd; | 7136 | parent->child = sd; |
7170 | cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map); | 7137 | cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map); |
7171 | cpu_to_node_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7172 | #endif | 7138 | #endif |
7173 | return sd; | 7139 | return sd; |
7174 | } | 7140 | } |
@@ -7178,14 +7144,12 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d, | |||
7178 | struct sched_domain *parent, int i) | 7144 | struct sched_domain *parent, int i) |
7179 | { | 7145 | { |
7180 | struct sched_domain *sd; | 7146 | struct sched_domain *sd; |
7181 | sd = &per_cpu(phys_domains, i).sd; | 7147 | sd = sd_init_CPU(d, i); |
7182 | SD_INIT(sd, CPU); | ||
7183 | set_domain_attribute(sd, attr); | 7148 | set_domain_attribute(sd, attr); |
7184 | cpumask_copy(sched_domain_span(sd), d->nodemask); | 7149 | cpumask_copy(sched_domain_span(sd), d->nodemask); |
7185 | sd->parent = parent; | 7150 | sd->parent = parent; |
7186 | if (parent) | 7151 | if (parent) |
7187 | parent->child = sd; | 7152 | parent->child = sd; |
7188 | cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7189 | return sd; | 7153 | return sd; |
7190 | } | 7154 | } |
7191 | 7155 | ||
@@ -7195,13 +7159,11 @@ static struct sched_domain *__build_book_sched_domain(struct s_data *d, | |||
7195 | { | 7159 | { |
7196 | struct sched_domain *sd = parent; | 7160 | struct sched_domain *sd = parent; |
7197 | #ifdef CONFIG_SCHED_BOOK | 7161 | #ifdef CONFIG_SCHED_BOOK |
7198 | sd = &per_cpu(book_domains, i).sd; | 7162 | sd = sd_init_BOOK(d, i); |
7199 | SD_INIT(sd, BOOK); | ||
7200 | set_domain_attribute(sd, attr); | 7163 | set_domain_attribute(sd, attr); |
7201 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); | 7164 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); |
7202 | sd->parent = parent; | 7165 | sd->parent = parent; |
7203 | parent->child = sd; | 7166 | parent->child = sd; |
7204 | cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7205 | #endif | 7167 | #endif |
7206 | return sd; | 7168 | return sd; |
7207 | } | 7169 | } |
@@ -7212,13 +7174,11 @@ static struct sched_domain *__build_mc_sched_domain(struct s_data *d, | |||
7212 | { | 7174 | { |
7213 | struct sched_domain *sd = parent; | 7175 | struct sched_domain *sd = parent; |
7214 | #ifdef CONFIG_SCHED_MC | 7176 | #ifdef CONFIG_SCHED_MC |
7215 | sd = &per_cpu(core_domains, i).sd; | 7177 | sd = sd_init_MC(d, i); |
7216 | SD_INIT(sd, MC); | ||
7217 | set_domain_attribute(sd, attr); | 7178 | set_domain_attribute(sd, attr); |
7218 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i)); | 7179 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i)); |
7219 | sd->parent = parent; | 7180 | sd->parent = parent; |
7220 | parent->child = sd; | 7181 | parent->child = sd; |
7221 | cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7222 | #endif | 7182 | #endif |
7223 | return sd; | 7183 | return sd; |
7224 | } | 7184 | } |
@@ -7229,92 +7189,32 @@ static struct sched_domain *__build_smt_sched_domain(struct s_data *d, | |||
7229 | { | 7189 | { |
7230 | struct sched_domain *sd = parent; | 7190 | struct sched_domain *sd = parent; |
7231 | #ifdef CONFIG_SCHED_SMT | 7191 | #ifdef CONFIG_SCHED_SMT |
7232 | sd = &per_cpu(cpu_domains, i).sd; | 7192 | sd = sd_init_SIBLING(d, i); |
7233 | SD_INIT(sd, SIBLING); | ||
7234 | set_domain_attribute(sd, attr); | 7193 | set_domain_attribute(sd, attr); |
7235 | cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i)); | 7194 | cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i)); |
7236 | sd->parent = parent; | 7195 | sd->parent = parent; |
7237 | parent->child = sd; | 7196 | parent->child = sd; |
7238 | cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7239 | #endif | 7197 | #endif |
7240 | return sd; | 7198 | return sd; |
7241 | } | 7199 | } |
7242 | 7200 | ||
7243 | static void build_sched_groups(struct s_data *d, struct sched_domain *sd, | ||
7244 | const struct cpumask *cpu_map, int cpu) | ||
7245 | { | ||
7246 | switch (sd->level) { | ||
7247 | #ifdef CONFIG_SCHED_SMT | ||
7248 | case SD_LV_SIBLING: /* set up CPU (sibling) groups */ | ||
7249 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7250 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7251 | &cpu_to_cpu_group, | ||
7252 | d->send_covered, d->tmpmask); | ||
7253 | break; | ||
7254 | #endif | ||
7255 | #ifdef CONFIG_SCHED_MC | ||
7256 | case SD_LV_MC: /* set up multi-core groups */ | ||
7257 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7258 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7259 | &cpu_to_core_group, | ||
7260 | d->send_covered, d->tmpmask); | ||
7261 | break; | ||
7262 | #endif | ||
7263 | #ifdef CONFIG_SCHED_BOOK | ||
7264 | case SD_LV_BOOK: /* set up book groups */ | ||
7265 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7266 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7267 | &cpu_to_book_group, | ||
7268 | d->send_covered, d->tmpmask); | ||
7269 | break; | ||
7270 | #endif | ||
7271 | case SD_LV_CPU: /* set up physical groups */ | ||
7272 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7273 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7274 | &cpu_to_phys_group, | ||
7275 | d->send_covered, d->tmpmask); | ||
7276 | break; | ||
7277 | #ifdef CONFIG_NUMA | ||
7278 | case SD_LV_NODE: | ||
7279 | if (cpu == cpumask_first(sched_domain_span(sd))) | ||
7280 | init_sched_build_groups(sched_domain_span(sd), cpu_map, | ||
7281 | &cpu_to_node_group, | ||
7282 | d->send_covered, d->tmpmask); | ||
7283 | |||
7284 | case SD_LV_ALLNODES: | ||
7285 | if (cpu == cpumask_first(cpu_map)) | ||
7286 | init_sched_build_groups(cpu_map, cpu_map, | ||
7287 | &cpu_to_allnodes_group, | ||
7288 | d->send_covered, d->tmpmask); | ||
7289 | break; | ||
7290 | #endif | ||
7291 | default: | ||
7292 | break; | ||
7293 | } | ||
7294 | } | ||
7295 | |||
7296 | /* | 7201 | /* |
7297 | * Build sched domains for a given set of cpus and attach the sched domains | 7202 | * Build sched domains for a given set of cpus and attach the sched domains |
7298 | * to the individual cpus | 7203 | * to the individual cpus |
7299 | */ | 7204 | */ |
7300 | static int __build_sched_domains(const struct cpumask *cpu_map, | 7205 | static int build_sched_domains(const struct cpumask *cpu_map, |
7301 | struct sched_domain_attr *attr) | 7206 | struct sched_domain_attr *attr) |
7302 | { | 7207 | { |
7303 | enum s_alloc alloc_state = sa_none; | 7208 | enum s_alloc alloc_state = sa_none; |
7209 | struct sched_domain *sd; | ||
7304 | struct s_data d; | 7210 | struct s_data d; |
7305 | struct sched_domain *sd, *tmp; | ||
7306 | int i; | 7211 | int i; |
7307 | #ifdef CONFIG_NUMA | ||
7308 | d.sd_allnodes = 0; | ||
7309 | #endif | ||
7310 | 7212 | ||
7311 | alloc_state = __visit_domain_allocation_hell(&d, cpu_map); | 7213 | alloc_state = __visit_domain_allocation_hell(&d, cpu_map); |
7312 | if (alloc_state != sa_rootdomain) | 7214 | if (alloc_state != sa_rootdomain) |
7313 | goto error; | 7215 | goto error; |
7314 | 7216 | ||
7315 | /* | 7217 | /* Set up domains for cpus specified by the cpu_map. */ |
7316 | * Set up domains for cpus specified by the cpu_map. | ||
7317 | */ | ||
7318 | for_each_cpu(i, cpu_map) { | 7218 | for_each_cpu(i, cpu_map) { |
7319 | cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)), | 7219 | cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)), |
7320 | cpu_map); | 7220 | cpu_map); |
@@ -7326,10 +7226,19 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7326 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); | 7226 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); |
7327 | 7227 | ||
7328 | *per_cpu_ptr(d.sd, i) = sd; | 7228 | *per_cpu_ptr(d.sd, i) = sd; |
7229 | } | ||
7230 | |||
7231 | /* Build the groups for the domains */ | ||
7232 | for_each_cpu(i, cpu_map) { | ||
7233 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { | ||
7234 | sd->span_weight = cpumask_weight(sched_domain_span(sd)); | ||
7235 | get_group(i, sd->private, &sd->groups); | ||
7236 | atomic_inc(&sd->groups->ref); | ||
7329 | 7237 | ||
7330 | for (tmp = sd; tmp; tmp = tmp->parent) { | 7238 | if (i != cpumask_first(sched_domain_span(sd))) |
7331 | tmp->span_weight = cpumask_weight(sched_domain_span(tmp)); | 7239 | continue; |
7332 | build_sched_groups(&d, tmp, cpu_map, i); | 7240 | |
7241 | build_sched_groups(sd, d.send_covered); | ||
7333 | } | 7242 | } |
7334 | } | 7243 | } |
7335 | 7244 | ||
@@ -7338,18 +7247,21 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7338 | if (!cpumask_test_cpu(i, cpu_map)) | 7247 | if (!cpumask_test_cpu(i, cpu_map)) |
7339 | continue; | 7248 | continue; |
7340 | 7249 | ||
7341 | sd = *per_cpu_ptr(d.sd, i); | 7250 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { |
7342 | for (; sd; sd = sd->parent) | 7251 | claim_allocations(i, sd); |
7343 | init_sched_groups_power(i, sd); | 7252 | init_sched_groups_power(i, sd); |
7253 | } | ||
7344 | } | 7254 | } |
7345 | 7255 | ||
7346 | /* Attach the domains */ | 7256 | /* Attach the domains */ |
7257 | rcu_read_lock(); | ||
7347 | for_each_cpu(i, cpu_map) { | 7258 | for_each_cpu(i, cpu_map) { |
7348 | sd = *per_cpu_ptr(d.sd, i); | 7259 | sd = *per_cpu_ptr(d.sd, i); |
7349 | cpu_attach_domain(sd, d.rd, i); | 7260 | cpu_attach_domain(sd, d.rd, i); |
7350 | } | 7261 | } |
7262 | rcu_read_unlock(); | ||
7351 | 7263 | ||
7352 | __free_domain_allocs(&d, sa_tmpmask, cpu_map); | 7264 | __free_domain_allocs(&d, sa_sd, cpu_map); |
7353 | return 0; | 7265 | return 0; |
7354 | 7266 | ||
7355 | error: | 7267 | error: |
@@ -7357,11 +7269,6 @@ error: | |||
7357 | return -ENOMEM; | 7269 | return -ENOMEM; |
7358 | } | 7270 | } |
7359 | 7271 | ||
7360 | static int build_sched_domains(const struct cpumask *cpu_map) | ||
7361 | { | ||
7362 | return __build_sched_domains(cpu_map, NULL); | ||
7363 | } | ||
7364 | |||
7365 | static cpumask_var_t *doms_cur; /* current sched domains */ | 7272 | static cpumask_var_t *doms_cur; /* current sched domains */ |
7366 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 7273 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
7367 | static struct sched_domain_attr *dattr_cur; | 7274 | static struct sched_domain_attr *dattr_cur; |
@@ -7425,31 +7332,24 @@ static int init_sched_domains(const struct cpumask *cpu_map) | |||
7425 | doms_cur = &fallback_doms; | 7332 | doms_cur = &fallback_doms; |
7426 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); | 7333 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
7427 | dattr_cur = NULL; | 7334 | dattr_cur = NULL; |
7428 | err = build_sched_domains(doms_cur[0]); | 7335 | err = build_sched_domains(doms_cur[0], NULL); |
7429 | register_sched_domain_sysctl(); | 7336 | register_sched_domain_sysctl(); |
7430 | 7337 | ||
7431 | return err; | 7338 | return err; |
7432 | } | 7339 | } |
7433 | 7340 | ||
7434 | static void destroy_sched_domains(const struct cpumask *cpu_map, | ||
7435 | struct cpumask *tmpmask) | ||
7436 | { | ||
7437 | } | ||
7438 | |||
7439 | /* | 7341 | /* |
7440 | * Detach sched domains from a group of cpus specified in cpu_map | 7342 | * Detach sched domains from a group of cpus specified in cpu_map |
7441 | * These cpus will now be attached to the NULL domain | 7343 | * These cpus will now be attached to the NULL domain |
7442 | */ | 7344 | */ |
7443 | static void detach_destroy_domains(const struct cpumask *cpu_map) | 7345 | static void detach_destroy_domains(const struct cpumask *cpu_map) |
7444 | { | 7346 | { |
7445 | /* Save because hotplug lock held. */ | ||
7446 | static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS); | ||
7447 | int i; | 7347 | int i; |
7448 | 7348 | ||
7349 | rcu_read_lock(); | ||
7449 | for_each_cpu(i, cpu_map) | 7350 | for_each_cpu(i, cpu_map) |
7450 | cpu_attach_domain(NULL, &def_root_domain, i); | 7351 | cpu_attach_domain(NULL, &def_root_domain, i); |
7451 | synchronize_sched(); | 7352 | rcu_read_unlock(); |
7452 | destroy_sched_domains(cpu_map, to_cpumask(tmpmask)); | ||
7453 | } | 7353 | } |
7454 | 7354 | ||
7455 | /* handle null as "default" */ | 7355 | /* handle null as "default" */ |
@@ -7538,8 +7438,7 @@ match1: | |||
7538 | goto match2; | 7438 | goto match2; |
7539 | } | 7439 | } |
7540 | /* no match - add a new doms_new */ | 7440 | /* no match - add a new doms_new */ |
7541 | __build_sched_domains(doms_new[i], | 7441 | build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL); |
7542 | dattr_new ? dattr_new + i : NULL); | ||
7543 | match2: | 7442 | match2: |
7544 | ; | 7443 | ; |
7545 | } | 7444 | } |