aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-04-07 08:09:50 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-11 06:58:19 -0400
commitdce840a08702bd13a9a186e07e63d1ef82256b5e (patch)
tree168bb98aed7f5761ebe31aa92c34959e9d0f238a /kernel/sched.c
parenta9c9a9b6bff27ac9c746344a9c1a19bf3327002c (diff)
sched: Dynamically allocate sched_domain/sched_group data-structures
Instead of relying on static allocations for the sched_domain and sched_group trees, dynamically allocate and RCU free them. Allocating this dynamically also allows for some build_sched_groups() simplification since we can now (like with other simplifications) rely on the sched_domain tree instead of hard-coded knowledge. One tricky to note is that detach_destroy_domains() needs to hold rcu_read_lock() over the entire tear-down, per-cpu is not sufficient since that can lead to partial sched_group existance (could possibly be solved by doing the tear-down backwards but this is much more robust). A concequence of the above is that we can no longer print the sched_domain debug stuff from cpu_attach_domain() since that might now run with preemption disabled (due to classic RCU etc.) and sched_domain_debug() does some GFP_KERNEL allocations. Another thing to note is that we now fully rely on normal RCU and not RCU-sched, this is because with the new and exiting RCU flavours we grew over the years BH doesn't necessarily hold off RCU-sched grace periods (-rt is known to break this). This would in fact already cause us grief since we do sched_domain/sched_group iterations from softirq context. This patch is somewhat larger than I would like it to be, but I didn't find any means of shrinking/splitting this. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/20110407122942.245307941@chello.nl Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c479
1 files changed, 189 insertions, 290 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1cca59ec4a4..65204845063 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -417,6 +417,7 @@ struct rt_rq {
417 */ 417 */
418struct root_domain { 418struct root_domain {
419 atomic_t refcount; 419 atomic_t refcount;
420 struct rcu_head rcu;
420 cpumask_var_t span; 421 cpumask_var_t span;
421 cpumask_var_t online; 422 cpumask_var_t online;
422 423
@@ -571,7 +572,7 @@ static inline int cpu_of(struct rq *rq)
571 572
572#define rcu_dereference_check_sched_domain(p) \ 573#define rcu_dereference_check_sched_domain(p) \
573 rcu_dereference_check((p), \ 574 rcu_dereference_check((p), \
574 rcu_read_lock_sched_held() || \ 575 rcu_read_lock_held() || \
575 lockdep_is_held(&sched_domains_mutex)) 576 lockdep_is_held(&sched_domains_mutex))
576 577
577/* 578/*
@@ -6572,12 +6573,11 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
6572 return 1; 6573 return 1;
6573} 6574}
6574 6575
6575static void free_rootdomain(struct root_domain *rd) 6576static void free_rootdomain(struct rcu_head *rcu)
6576{ 6577{
6577 synchronize_sched(); 6578 struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
6578 6579
6579 cpupri_cleanup(&rd->cpupri); 6580 cpupri_cleanup(&rd->cpupri);
6580
6581 free_cpumask_var(rd->rto_mask); 6581 free_cpumask_var(rd->rto_mask);
6582 free_cpumask_var(rd->online); 6582 free_cpumask_var(rd->online);
6583 free_cpumask_var(rd->span); 6583 free_cpumask_var(rd->span);
@@ -6618,7 +6618,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6618 raw_spin_unlock_irqrestore(&rq->lock, flags); 6618 raw_spin_unlock_irqrestore(&rq->lock, flags);
6619 6619
6620 if (old_rd) 6620 if (old_rd)
6621 free_rootdomain(old_rd); 6621 call_rcu_sched(&old_rd->rcu, free_rootdomain);
6622} 6622}
6623 6623
6624static int init_rootdomain(struct root_domain *rd) 6624static int init_rootdomain(struct root_domain *rd)
@@ -6669,6 +6669,25 @@ static struct root_domain *alloc_rootdomain(void)
6669 return rd; 6669 return rd;
6670} 6670}
6671 6671
6672static void free_sched_domain(struct rcu_head *rcu)
6673{
6674 struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
6675 if (atomic_dec_and_test(&sd->groups->ref))
6676 kfree(sd->groups);
6677 kfree(sd);
6678}
6679
6680static void destroy_sched_domain(struct sched_domain *sd, int cpu)
6681{
6682 call_rcu(&sd->rcu, free_sched_domain);
6683}
6684
6685static void destroy_sched_domains(struct sched_domain *sd, int cpu)
6686{
6687 for (; sd; sd = sd->parent)
6688 destroy_sched_domain(sd, cpu);
6689}
6690
6672/* 6691/*
6673 * Attach the domain 'sd' to 'cpu' as its base domain. Callers must 6692 * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
6674 * hold the hotplug lock. 6693 * hold the hotplug lock.
@@ -6689,20 +6708,25 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
6689 tmp->parent = parent->parent; 6708 tmp->parent = parent->parent;
6690 if (parent->parent) 6709 if (parent->parent)
6691 parent->parent->child = tmp; 6710 parent->parent->child = tmp;
6711 destroy_sched_domain(parent, cpu);
6692 } else 6712 } else
6693 tmp = tmp->parent; 6713 tmp = tmp->parent;
6694 } 6714 }
6695 6715
6696 if (sd && sd_degenerate(sd)) { 6716 if (sd && sd_degenerate(sd)) {
6717 tmp = sd;
6697 sd = sd->parent; 6718 sd = sd->parent;
6719 destroy_sched_domain(tmp, cpu);
6698 if (sd) 6720 if (sd)
6699 sd->child = NULL; 6721 sd->child = NULL;
6700 } 6722 }
6701 6723
6702 sched_domain_debug(sd, cpu); 6724 /* sched_domain_debug(sd, cpu); */
6703 6725
6704 rq_attach_root(rq, rd); 6726 rq_attach_root(rq, rd);
6727 tmp = rq->sd;
6705 rcu_assign_pointer(rq->sd, sd); 6728 rcu_assign_pointer(rq->sd, sd);
6729 destroy_sched_domains(tmp, cpu);
6706} 6730}
6707 6731
6708/* cpus with isolated domains */ 6732/* cpus with isolated domains */
@@ -6718,56 +6742,6 @@ static int __init isolated_cpu_setup(char *str)
6718 6742
6719__setup("isolcpus=", isolated_cpu_setup); 6743__setup("isolcpus=", isolated_cpu_setup);
6720 6744
6721/*
6722 * init_sched_build_groups takes the cpumask we wish to span, and a pointer
6723 * to a function which identifies what group(along with sched group) a CPU
6724 * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6725 * (due to the fact that we keep track of groups covered with a struct cpumask).
6726 *
6727 * init_sched_build_groups will build a circular linked list of the groups
6728 * covered by the given span, and will set each group's ->cpumask correctly,
6729 * and ->cpu_power to 0.
6730 */
6731static void
6732init_sched_build_groups(const struct cpumask *span,
6733 const struct cpumask *cpu_map,
6734 int (*group_fn)(int cpu, const struct cpumask *cpu_map,
6735 struct sched_group **sg,
6736 struct cpumask *tmpmask),
6737 struct cpumask *covered, struct cpumask *tmpmask)
6738{
6739 struct sched_group *first = NULL, *last = NULL;
6740 int i;
6741
6742 cpumask_clear(covered);
6743
6744 for_each_cpu(i, span) {
6745 struct sched_group *sg;
6746 int group = group_fn(i, cpu_map, &sg, tmpmask);
6747 int j;
6748
6749 if (cpumask_test_cpu(i, covered))
6750 continue;
6751
6752 cpumask_clear(sched_group_cpus(sg));
6753 sg->cpu_power = 0;
6754
6755 for_each_cpu(j, span) {
6756 if (group_fn(j, cpu_map, NULL, tmpmask) != group)
6757 continue;
6758
6759 cpumask_set_cpu(j, covered);
6760 cpumask_set_cpu(j, sched_group_cpus(sg));
6761 }
6762 if (!first)
6763 first = sg;
6764 if (last)
6765 last->next = sg;
6766 last = sg;
6767 }
6768 last->next = first;
6769}
6770
6771#define SD_NODES_PER_DOMAIN 16 6745#define SD_NODES_PER_DOMAIN 16
6772 6746
6773#ifdef CONFIG_NUMA 6747#ifdef CONFIG_NUMA
@@ -6858,154 +6832,96 @@ struct static_sched_domain {
6858 DECLARE_BITMAP(span, CONFIG_NR_CPUS); 6832 DECLARE_BITMAP(span, CONFIG_NR_CPUS);
6859}; 6833};
6860 6834
6835struct sd_data {
6836 struct sched_domain **__percpu sd;
6837 struct sched_group **__percpu sg;
6838};
6839
6861struct s_data { 6840struct s_data {
6862#ifdef CONFIG_NUMA 6841#ifdef CONFIG_NUMA
6863 int sd_allnodes; 6842 int sd_allnodes;
6864#endif 6843#endif
6865 cpumask_var_t nodemask; 6844 cpumask_var_t nodemask;
6866 cpumask_var_t send_covered; 6845 cpumask_var_t send_covered;
6867 cpumask_var_t tmpmask;
6868 struct sched_domain ** __percpu sd; 6846 struct sched_domain ** __percpu sd;
6847 struct sd_data sdd[SD_LV_MAX];
6869 struct root_domain *rd; 6848 struct root_domain *rd;
6870}; 6849};
6871 6850
6872enum s_alloc { 6851enum s_alloc {
6873 sa_rootdomain, 6852 sa_rootdomain,
6874 sa_sd, 6853 sa_sd,
6875 sa_tmpmask, 6854 sa_sd_storage,
6876 sa_send_covered, 6855 sa_send_covered,
6877 sa_nodemask, 6856 sa_nodemask,
6878 sa_none, 6857 sa_none,
6879}; 6858};
6880 6859
6881/* 6860/*
6882 * SMT sched-domains: 6861 * Assumes the sched_domain tree is fully constructed
6883 */ 6862 */
6884#ifdef CONFIG_SCHED_SMT 6863static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
6885static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
6886static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
6887
6888static int
6889cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
6890 struct sched_group **sg, struct cpumask *unused)
6891{ 6864{
6892 if (sg) 6865 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
6893 *sg = &per_cpu(sched_groups, cpu).sg; 6866 struct sched_domain *child = sd->child;
6894 return cpu;
6895}
6896#endif /* CONFIG_SCHED_SMT */
6897 6867
6898/* 6868 if (child)
6899 * multi-core sched-domains: 6869 cpu = cpumask_first(sched_domain_span(child));
6900 */
6901#ifdef CONFIG_SCHED_MC
6902static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
6903static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
6904 6870
6905static int
6906cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
6907 struct sched_group **sg, struct cpumask *mask)
6908{
6909 int group;
6910#ifdef CONFIG_SCHED_SMT
6911 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
6912 group = cpumask_first(mask);
6913#else
6914 group = cpu;
6915#endif
6916 if (sg) 6871 if (sg)
6917 *sg = &per_cpu(sched_group_core, group).sg; 6872 *sg = *per_cpu_ptr(sdd->sg, cpu);
6918 return group; 6873
6874 return cpu;
6919} 6875}
6920#endif /* CONFIG_SCHED_MC */
6921 6876
6922/* 6877/*
6923 * book sched-domains: 6878 * build_sched_groups takes the cpumask we wish to span, and a pointer
6879 * to a function which identifies what group(along with sched group) a CPU
6880 * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6881 * (due to the fact that we keep track of groups covered with a struct cpumask).
6882 *
6883 * build_sched_groups will build a circular linked list of the groups
6884 * covered by the given span, and will set each group's ->cpumask correctly,
6885 * and ->cpu_power to 0.
6924 */ 6886 */
6925#ifdef CONFIG_SCHED_BOOK 6887static void
6926static DEFINE_PER_CPU(struct static_sched_domain, book_domains); 6888build_sched_groups(struct sched_domain *sd, struct cpumask *covered)
6927static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
6928
6929static int
6930cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
6931 struct sched_group **sg, struct cpumask *mask)
6932{
6933 int group = cpu;
6934#ifdef CONFIG_SCHED_MC
6935 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
6936 group = cpumask_first(mask);
6937#elif defined(CONFIG_SCHED_SMT)
6938 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
6939 group = cpumask_first(mask);
6940#endif
6941 if (sg)
6942 *sg = &per_cpu(sched_group_book, group).sg;
6943 return group;
6944}
6945#endif /* CONFIG_SCHED_BOOK */
6946
6947static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
6948static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
6949
6950static int
6951cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
6952 struct sched_group **sg, struct cpumask *mask)
6953{ 6889{
6954 int group; 6890 struct sched_group *first = NULL, *last = NULL;
6955#ifdef CONFIG_SCHED_BOOK 6891 struct sd_data *sdd = sd->private;
6956 cpumask_and(mask, cpu_book_mask(cpu), cpu_map); 6892 const struct cpumask *span = sched_domain_span(sd);
6957 group = cpumask_first(mask); 6893 int i;
6958#elif defined(CONFIG_SCHED_MC)
6959 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
6960 group = cpumask_first(mask);
6961#elif defined(CONFIG_SCHED_SMT)
6962 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
6963 group = cpumask_first(mask);
6964#else
6965 group = cpu;
6966#endif
6967 if (sg)
6968 *sg = &per_cpu(sched_group_phys, group).sg;
6969 return group;
6970}
6971
6972#ifdef CONFIG_NUMA
6973static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
6974static DEFINE_PER_CPU(struct static_sched_group, sched_group_node);
6975 6894
6976static int cpu_to_node_group(int cpu, const struct cpumask *cpu_map, 6895 cpumask_clear(covered);
6977 struct sched_group **sg,
6978 struct cpumask *nodemask)
6979{
6980 int group;
6981 6896
6982 cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map); 6897 for_each_cpu(i, span) {
6983 group = cpumask_first(nodemask); 6898 struct sched_group *sg;
6899 int group = get_group(i, sdd, &sg);
6900 int j;
6984 6901
6985 if (sg) 6902 if (cpumask_test_cpu(i, covered))
6986 *sg = &per_cpu(sched_group_node, group).sg; 6903 continue;
6987 return group;
6988}
6989 6904
6990static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains); 6905 cpumask_clear(sched_group_cpus(sg));
6991static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); 6906 sg->cpu_power = 0;
6992 6907
6993static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, 6908 for_each_cpu(j, span) {
6994 struct sched_group **sg, 6909 if (get_group(j, sdd, NULL) != group)
6995 struct cpumask *nodemask) 6910 continue;
6996{
6997 int group;
6998 6911
6999 cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map); 6912 cpumask_set_cpu(j, covered);
7000 group = cpumask_first(nodemask); 6913 cpumask_set_cpu(j, sched_group_cpus(sg));
6914 }
7001 6915
7002 if (sg) 6916 if (!first)
7003 *sg = &per_cpu(sched_group_allnodes, group).sg; 6917 first = sg;
7004 return group; 6918 if (last)
6919 last->next = sg;
6920 last = sg;
6921 }
6922 last->next = first;
7005} 6923}
7006 6924
7007#endif /* CONFIG_NUMA */
7008
7009/* 6925/*
7010 * Initialize sched groups cpu_power. 6926 * Initialize sched groups cpu_power.
7011 * 6927 *
@@ -7039,15 +6955,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
7039# define SD_INIT_NAME(sd, type) do { } while (0) 6955# define SD_INIT_NAME(sd, type) do { } while (0)
7040#endif 6956#endif
7041 6957
7042#define SD_INIT(sd, type) sd_init_##type(sd) 6958#define SD_INIT_FUNC(type) \
7043 6959static noinline struct sched_domain *sd_init_##type(struct s_data *d, int cpu) \
7044#define SD_INIT_FUNC(type) \ 6960{ \
7045static noinline void sd_init_##type(struct sched_domain *sd) \ 6961 struct sched_domain *sd = *per_cpu_ptr(d->sdd[SD_LV_##type].sd, cpu); \
7046{ \ 6962 *sd = SD_##type##_INIT; \
7047 memset(sd, 0, sizeof(*sd)); \ 6963 sd->level = SD_LV_##type; \
7048 *sd = SD_##type##_INIT; \ 6964 SD_INIT_NAME(sd, type); \
7049 sd->level = SD_LV_##type; \ 6965 sd->private = &d->sdd[SD_LV_##type]; \
7050 SD_INIT_NAME(sd, type); \ 6966 return sd; \
7051} 6967}
7052 6968
7053SD_INIT_FUNC(CPU) 6969SD_INIT_FUNC(CPU)
@@ -7103,13 +7019,22 @@ static void set_domain_attribute(struct sched_domain *sd,
7103static void __free_domain_allocs(struct s_data *d, enum s_alloc what, 7019static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
7104 const struct cpumask *cpu_map) 7020 const struct cpumask *cpu_map)
7105{ 7021{
7022 int i, j;
7023
7106 switch (what) { 7024 switch (what) {
7107 case sa_rootdomain: 7025 case sa_rootdomain:
7108 free_rootdomain(d->rd); /* fall through */ 7026 free_rootdomain(&d->rd->rcu); /* fall through */
7109 case sa_sd: 7027 case sa_sd:
7110 free_percpu(d->sd); /* fall through */ 7028 free_percpu(d->sd); /* fall through */
7111 case sa_tmpmask: 7029 case sa_sd_storage:
7112 free_cpumask_var(d->tmpmask); /* fall through */ 7030 for (i = 0; i < SD_LV_MAX; i++) {
7031 for_each_cpu(j, cpu_map) {
7032 kfree(*per_cpu_ptr(d->sdd[i].sd, j));
7033 kfree(*per_cpu_ptr(d->sdd[i].sg, j));
7034 }
7035 free_percpu(d->sdd[i].sd);
7036 free_percpu(d->sdd[i].sg);
7037 } /* fall through */
7113 case sa_send_covered: 7038 case sa_send_covered:
7114 free_cpumask_var(d->send_covered); /* fall through */ 7039 free_cpumask_var(d->send_covered); /* fall through */
7115 case sa_nodemask: 7040 case sa_nodemask:
@@ -7122,25 +7047,70 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
7122static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, 7047static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
7123 const struct cpumask *cpu_map) 7048 const struct cpumask *cpu_map)
7124{ 7049{
7050 int i, j;
7051
7052 memset(d, 0, sizeof(*d));
7053
7125 if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL)) 7054 if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
7126 return sa_none; 7055 return sa_none;
7127 if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) 7056 if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
7128 return sa_nodemask; 7057 return sa_nodemask;
7129 if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) 7058 for (i = 0; i < SD_LV_MAX; i++) {
7130 return sa_send_covered; 7059 d->sdd[i].sd = alloc_percpu(struct sched_domain *);
7131 d->sd = alloc_percpu(struct sched_domain *); 7060 if (!d->sdd[i].sd)
7132 if (!d->sd) { 7061 return sa_sd_storage;
7133 printk(KERN_WARNING "Cannot alloc per-cpu pointers\n"); 7062
7134 return sa_tmpmask; 7063 d->sdd[i].sg = alloc_percpu(struct sched_group *);
7064 if (!d->sdd[i].sg)
7065 return sa_sd_storage;
7066
7067 for_each_cpu(j, cpu_map) {
7068 struct sched_domain *sd;
7069 struct sched_group *sg;
7070
7071 sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
7072 GFP_KERNEL, cpu_to_node(j));
7073 if (!sd)
7074 return sa_sd_storage;
7075
7076 *per_cpu_ptr(d->sdd[i].sd, j) = sd;
7077
7078 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
7079 GFP_KERNEL, cpu_to_node(j));
7080 if (!sg)
7081 return sa_sd_storage;
7082
7083 *per_cpu_ptr(d->sdd[i].sg, j) = sg;
7084 }
7135 } 7085 }
7086 d->sd = alloc_percpu(struct sched_domain *);
7087 if (!d->sd)
7088 return sa_sd_storage;
7136 d->rd = alloc_rootdomain(); 7089 d->rd = alloc_rootdomain();
7137 if (!d->rd) { 7090 if (!d->rd)
7138 printk(KERN_WARNING "Cannot alloc root domain\n");
7139 return sa_sd; 7091 return sa_sd;
7140 }
7141 return sa_rootdomain; 7092 return sa_rootdomain;
7142} 7093}
7143 7094
7095/*
7096 * NULL the sd_data elements we've used to build the sched_domain and
7097 * sched_group structure so that the subsequent __free_domain_allocs()
7098 * will not free the data we're using.
7099 */
7100static void claim_allocations(int cpu, struct sched_domain *sd)
7101{
7102 struct sd_data *sdd = sd->private;
7103 struct sched_group *sg = sd->groups;
7104
7105 WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
7106 *per_cpu_ptr(sdd->sd, cpu) = NULL;
7107
7108 if (cpu == cpumask_first(sched_group_cpus(sg))) {
7109 WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
7110 *per_cpu_ptr(sdd->sg, cpu) = NULL;
7111 }
7112}
7113
7144static struct sched_domain *__build_numa_sched_domains(struct s_data *d, 7114static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
7145 const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i) 7115 const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i)
7146{ 7116{
@@ -7151,24 +7121,20 @@ static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
7151 d->sd_allnodes = 0; 7121 d->sd_allnodes = 0;
7152 if (cpumask_weight(cpu_map) > 7122 if (cpumask_weight(cpu_map) >
7153 SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) { 7123 SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) {
7154 sd = &per_cpu(allnodes_domains, i).sd; 7124 sd = sd_init_ALLNODES(d, i);
7155 SD_INIT(sd, ALLNODES);
7156 set_domain_attribute(sd, attr); 7125 set_domain_attribute(sd, attr);
7157 cpumask_copy(sched_domain_span(sd), cpu_map); 7126 cpumask_copy(sched_domain_span(sd), cpu_map);
7158 cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask);
7159 d->sd_allnodes = 1; 7127 d->sd_allnodes = 1;
7160 } 7128 }
7161 parent = sd; 7129 parent = sd;
7162 7130
7163 sd = &per_cpu(node_domains, i).sd; 7131 sd = sd_init_NODE(d, i);
7164 SD_INIT(sd, NODE);
7165 set_domain_attribute(sd, attr); 7132 set_domain_attribute(sd, attr);
7166 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); 7133 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
7167 sd->parent = parent; 7134 sd->parent = parent;
7168 if (parent) 7135 if (parent)
7169 parent->child = sd; 7136 parent->child = sd;
7170 cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map); 7137 cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
7171 cpu_to_node_group(i, cpu_map, &sd->groups, d->tmpmask);
7172#endif 7138#endif
7173 return sd; 7139 return sd;
7174} 7140}
@@ -7178,14 +7144,12 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
7178 struct sched_domain *parent, int i) 7144 struct sched_domain *parent, int i)
7179{ 7145{
7180 struct sched_domain *sd; 7146 struct sched_domain *sd;
7181 sd = &per_cpu(phys_domains, i).sd; 7147 sd = sd_init_CPU(d, i);
7182 SD_INIT(sd, CPU);
7183 set_domain_attribute(sd, attr); 7148 set_domain_attribute(sd, attr);
7184 cpumask_copy(sched_domain_span(sd), d->nodemask); 7149 cpumask_copy(sched_domain_span(sd), d->nodemask);
7185 sd->parent = parent; 7150 sd->parent = parent;
7186 if (parent) 7151 if (parent)
7187 parent->child = sd; 7152 parent->child = sd;
7188 cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask);
7189 return sd; 7153 return sd;
7190} 7154}
7191 7155
@@ -7195,13 +7159,11 @@ static struct sched_domain *__build_book_sched_domain(struct s_data *d,
7195{ 7159{
7196 struct sched_domain *sd = parent; 7160 struct sched_domain *sd = parent;
7197#ifdef CONFIG_SCHED_BOOK 7161#ifdef CONFIG_SCHED_BOOK
7198 sd = &per_cpu(book_domains, i).sd; 7162 sd = sd_init_BOOK(d, i);
7199 SD_INIT(sd, BOOK);
7200 set_domain_attribute(sd, attr); 7163 set_domain_attribute(sd, attr);
7201 cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); 7164 cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
7202 sd->parent = parent; 7165 sd->parent = parent;
7203 parent->child = sd; 7166 parent->child = sd;
7204 cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
7205#endif 7167#endif
7206 return sd; 7168 return sd;
7207} 7169}
@@ -7212,13 +7174,11 @@ static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
7212{ 7174{
7213 struct sched_domain *sd = parent; 7175 struct sched_domain *sd = parent;
7214#ifdef CONFIG_SCHED_MC 7176#ifdef CONFIG_SCHED_MC
7215 sd = &per_cpu(core_domains, i).sd; 7177 sd = sd_init_MC(d, i);
7216 SD_INIT(sd, MC);
7217 set_domain_attribute(sd, attr); 7178 set_domain_attribute(sd, attr);
7218 cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i)); 7179 cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i));
7219 sd->parent = parent; 7180 sd->parent = parent;
7220 parent->child = sd; 7181 parent->child = sd;
7221 cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask);
7222#endif 7182#endif
7223 return sd; 7183 return sd;
7224} 7184}
@@ -7229,92 +7189,32 @@ static struct sched_domain *__build_smt_sched_domain(struct s_data *d,
7229{ 7189{
7230 struct sched_domain *sd = parent; 7190 struct sched_domain *sd = parent;
7231#ifdef CONFIG_SCHED_SMT 7191#ifdef CONFIG_SCHED_SMT
7232 sd = &per_cpu(cpu_domains, i).sd; 7192 sd = sd_init_SIBLING(d, i);
7233 SD_INIT(sd, SIBLING);
7234 set_domain_attribute(sd, attr); 7193 set_domain_attribute(sd, attr);
7235 cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i)); 7194 cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i));
7236 sd->parent = parent; 7195 sd->parent = parent;
7237 parent->child = sd; 7196 parent->child = sd;
7238 cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask);
7239#endif 7197#endif
7240 return sd; 7198 return sd;
7241} 7199}
7242 7200
7243static void build_sched_groups(struct s_data *d, struct sched_domain *sd,
7244 const struct cpumask *cpu_map, int cpu)
7245{
7246 switch (sd->level) {
7247#ifdef CONFIG_SCHED_SMT
7248 case SD_LV_SIBLING: /* set up CPU (sibling) groups */
7249 if (cpu == cpumask_first(sched_domain_span(sd)))
7250 init_sched_build_groups(sched_domain_span(sd), cpu_map,
7251 &cpu_to_cpu_group,
7252 d->send_covered, d->tmpmask);
7253 break;
7254#endif
7255#ifdef CONFIG_SCHED_MC
7256 case SD_LV_MC: /* set up multi-core groups */
7257 if (cpu == cpumask_first(sched_domain_span(sd)))
7258 init_sched_build_groups(sched_domain_span(sd), cpu_map,
7259 &cpu_to_core_group,
7260 d->send_covered, d->tmpmask);
7261 break;
7262#endif
7263#ifdef CONFIG_SCHED_BOOK
7264 case SD_LV_BOOK: /* set up book groups */
7265 if (cpu == cpumask_first(sched_domain_span(sd)))
7266 init_sched_build_groups(sched_domain_span(sd), cpu_map,
7267 &cpu_to_book_group,
7268 d->send_covered, d->tmpmask);
7269 break;
7270#endif
7271 case SD_LV_CPU: /* set up physical groups */
7272 if (cpu == cpumask_first(sched_domain_span(sd)))
7273 init_sched_build_groups(sched_domain_span(sd), cpu_map,
7274 &cpu_to_phys_group,
7275 d->send_covered, d->tmpmask);
7276 break;
7277#ifdef CONFIG_NUMA
7278 case SD_LV_NODE:
7279 if (cpu == cpumask_first(sched_domain_span(sd)))
7280 init_sched_build_groups(sched_domain_span(sd), cpu_map,
7281 &cpu_to_node_group,
7282 d->send_covered, d->tmpmask);
7283
7284 case SD_LV_ALLNODES:
7285 if (cpu == cpumask_first(cpu_map))
7286 init_sched_build_groups(cpu_map, cpu_map,
7287 &cpu_to_allnodes_group,
7288 d->send_covered, d->tmpmask);
7289 break;
7290#endif
7291 default:
7292 break;
7293 }
7294}
7295
7296/* 7201/*
7297 * Build sched domains for a given set of cpus and attach the sched domains 7202 * Build sched domains for a given set of cpus and attach the sched domains
7298 * to the individual cpus 7203 * to the individual cpus
7299 */ 7204 */
7300static int __build_sched_domains(const struct cpumask *cpu_map, 7205static int build_sched_domains(const struct cpumask *cpu_map,
7301 struct sched_domain_attr *attr) 7206 struct sched_domain_attr *attr)
7302{ 7207{
7303 enum s_alloc alloc_state = sa_none; 7208 enum s_alloc alloc_state = sa_none;
7209 struct sched_domain *sd;
7304 struct s_data d; 7210 struct s_data d;
7305 struct sched_domain *sd, *tmp;
7306 int i; 7211 int i;
7307#ifdef CONFIG_NUMA
7308 d.sd_allnodes = 0;
7309#endif
7310 7212
7311 alloc_state = __visit_domain_allocation_hell(&d, cpu_map); 7213 alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
7312 if (alloc_state != sa_rootdomain) 7214 if (alloc_state != sa_rootdomain)
7313 goto error; 7215 goto error;
7314 7216
7315 /* 7217 /* Set up domains for cpus specified by the cpu_map. */
7316 * Set up domains for cpus specified by the cpu_map.
7317 */
7318 for_each_cpu(i, cpu_map) { 7218 for_each_cpu(i, cpu_map) {
7319 cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)), 7219 cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)),
7320 cpu_map); 7220 cpu_map);
@@ -7326,10 +7226,19 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7326 sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); 7226 sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
7327 7227
7328 *per_cpu_ptr(d.sd, i) = sd; 7228 *per_cpu_ptr(d.sd, i) = sd;
7229 }
7230
7231 /* Build the groups for the domains */
7232 for_each_cpu(i, cpu_map) {
7233 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
7234 sd->span_weight = cpumask_weight(sched_domain_span(sd));
7235 get_group(i, sd->private, &sd->groups);
7236 atomic_inc(&sd->groups->ref);
7329 7237
7330 for (tmp = sd; tmp; tmp = tmp->parent) { 7238 if (i != cpumask_first(sched_domain_span(sd)))
7331 tmp->span_weight = cpumask_weight(sched_domain_span(tmp)); 7239 continue;
7332 build_sched_groups(&d, tmp, cpu_map, i); 7240
7241 build_sched_groups(sd, d.send_covered);
7333 } 7242 }
7334 } 7243 }
7335 7244
@@ -7338,18 +7247,21 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7338 if (!cpumask_test_cpu(i, cpu_map)) 7247 if (!cpumask_test_cpu(i, cpu_map))
7339 continue; 7248 continue;
7340 7249
7341 sd = *per_cpu_ptr(d.sd, i); 7250 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
7342 for (; sd; sd = sd->parent) 7251 claim_allocations(i, sd);
7343 init_sched_groups_power(i, sd); 7252 init_sched_groups_power(i, sd);
7253 }
7344 } 7254 }
7345 7255
7346 /* Attach the domains */ 7256 /* Attach the domains */
7257 rcu_read_lock();
7347 for_each_cpu(i, cpu_map) { 7258 for_each_cpu(i, cpu_map) {
7348 sd = *per_cpu_ptr(d.sd, i); 7259 sd = *per_cpu_ptr(d.sd, i);
7349 cpu_attach_domain(sd, d.rd, i); 7260 cpu_attach_domain(sd, d.rd, i);
7350 } 7261 }
7262 rcu_read_unlock();
7351 7263
7352 __free_domain_allocs(&d, sa_tmpmask, cpu_map); 7264 __free_domain_allocs(&d, sa_sd, cpu_map);
7353 return 0; 7265 return 0;
7354 7266
7355error: 7267error:
@@ -7357,11 +7269,6 @@ error:
7357 return -ENOMEM; 7269 return -ENOMEM;
7358} 7270}
7359 7271
7360static int build_sched_domains(const struct cpumask *cpu_map)
7361{
7362 return __build_sched_domains(cpu_map, NULL);
7363}
7364
7365static cpumask_var_t *doms_cur; /* current sched domains */ 7272static cpumask_var_t *doms_cur; /* current sched domains */
7366static int ndoms_cur; /* number of sched domains in 'doms_cur' */ 7273static int ndoms_cur; /* number of sched domains in 'doms_cur' */
7367static struct sched_domain_attr *dattr_cur; 7274static struct sched_domain_attr *dattr_cur;
@@ -7425,31 +7332,24 @@ static int init_sched_domains(const struct cpumask *cpu_map)
7425 doms_cur = &fallback_doms; 7332 doms_cur = &fallback_doms;
7426 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); 7333 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
7427 dattr_cur = NULL; 7334 dattr_cur = NULL;
7428 err = build_sched_domains(doms_cur[0]); 7335 err = build_sched_domains(doms_cur[0], NULL);
7429 register_sched_domain_sysctl(); 7336 register_sched_domain_sysctl();
7430 7337
7431 return err; 7338 return err;
7432} 7339}
7433 7340
7434static void destroy_sched_domains(const struct cpumask *cpu_map,
7435 struct cpumask *tmpmask)
7436{
7437}
7438
7439/* 7341/*
7440 * Detach sched domains from a group of cpus specified in cpu_map 7342 * Detach sched domains from a group of cpus specified in cpu_map
7441 * These cpus will now be attached to the NULL domain 7343 * These cpus will now be attached to the NULL domain
7442 */ 7344 */
7443static void detach_destroy_domains(const struct cpumask *cpu_map) 7345static void detach_destroy_domains(const struct cpumask *cpu_map)
7444{ 7346{
7445 /* Save because hotplug lock held. */
7446 static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
7447 int i; 7347 int i;
7448 7348
7349 rcu_read_lock();
7449 for_each_cpu(i, cpu_map) 7350 for_each_cpu(i, cpu_map)
7450 cpu_attach_domain(NULL, &def_root_domain, i); 7351 cpu_attach_domain(NULL, &def_root_domain, i);
7451 synchronize_sched(); 7352 rcu_read_unlock();
7452 destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
7453} 7353}
7454 7354
7455/* handle null as "default" */ 7355/* handle null as "default" */
@@ -7538,8 +7438,7 @@ match1:
7538 goto match2; 7438 goto match2;
7539 } 7439 }
7540 /* no match - add a new doms_new */ 7440 /* no match - add a new doms_new */
7541 __build_sched_domains(doms_new[i], 7441 build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
7542 dattr_new ? dattr_new + i : NULL);
7543match2: 7442match2:
7544 ; 7443 ;
7545 } 7444 }