aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Travis <travis@sgi.com>2008-04-04 21:11:11 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-19 13:44:59 -0400
commit7c16ec585c558960a508ccf9a08fcb9ed49b3754 (patch)
treecca2b12203a10944d7095a07df7292421f578dc9
parentc5f59f0833df945eef7ff35f3dc6ba61c5f293dd (diff)
cpumask: reduce stack usage in SD_x_INIT initializers
* Remove empty cpumask_t (and all non-zero/non-null) variables in SD_*_INIT macros. Use memset(0) to clear. Also, don't inline the initializer functions to save on stack space in build_sched_domains(). * Merge change to include/linux/topology.h that uses the new node_to_cpumask_ptr function in the nr_cpus_node macro into this patch. Depends on: [mm-patch]: asm-generic-add-node_to_cpumask_ptr-macro.patch [sched-devel]: sched: add new set_cpus_allowed_ptr function Cc: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Mike Travis <travis@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/asm-x86/topology.h5
-rw-r--r--include/linux/topology.h46
-rw-r--r--kernel/sched.c368
3 files changed, 256 insertions, 163 deletions
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index b167ca90f96f..9ef74c5d5ad6 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -154,10 +154,6 @@ extern unsigned long node_remap_size[];
154 154
155/* sched_domains SD_NODE_INIT for NUMAQ machines */ 155/* sched_domains SD_NODE_INIT for NUMAQ machines */
156#define SD_NODE_INIT (struct sched_domain) { \ 156#define SD_NODE_INIT (struct sched_domain) { \
157 .span = CPU_MASK_NONE, \
158 .parent = NULL, \
159 .child = NULL, \
160 .groups = NULL, \
161 .min_interval = 8, \ 157 .min_interval = 8, \
162 .max_interval = 32, \ 158 .max_interval = 32, \
163 .busy_factor = 32, \ 159 .busy_factor = 32, \
@@ -175,7 +171,6 @@ extern unsigned long node_remap_size[];
175 | SD_WAKE_BALANCE, \ 171 | SD_WAKE_BALANCE, \
176 .last_balance = jiffies, \ 172 .last_balance = jiffies, \
177 .balance_interval = 1, \ 173 .balance_interval = 1, \
178 .nr_balance_failed = 0, \
179} 174}
180 175
181#ifdef CONFIG_X86_64_ACPI_NUMA 176#ifdef CONFIG_X86_64_ACPI_NUMA
diff --git a/include/linux/topology.h b/include/linux/topology.h
index bd14f8b30f09..4bb7074a2c3a 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -38,16 +38,15 @@
38#endif 38#endif
39 39
40#ifndef nr_cpus_node 40#ifndef nr_cpus_node
41#define nr_cpus_node(node) \ 41#define nr_cpus_node(node) \
42 ({ \ 42 ({ \
43 cpumask_t __tmp__; \ 43 node_to_cpumask_ptr(__tmp__, node); \
44 __tmp__ = node_to_cpumask(node); \ 44 cpus_weight(*__tmp__); \
45 cpus_weight(__tmp__); \
46 }) 45 })
47#endif 46#endif
48 47
49#define for_each_node_with_cpus(node) \ 48#define for_each_node_with_cpus(node) \
50 for_each_online_node(node) \ 49 for_each_online_node(node) \
51 if (nr_cpus_node(node)) 50 if (nr_cpus_node(node))
52 51
53void arch_update_cpu_topology(void); 52void arch_update_cpu_topology(void);
@@ -80,7 +79,9 @@ void arch_update_cpu_topology(void);
80 * by defining their own arch-specific initializer in include/asm/topology.h. 79 * by defining their own arch-specific initializer in include/asm/topology.h.
81 * A definition there will automagically override these default initializers 80 * A definition there will automagically override these default initializers
82 * and allow arch-specific performance tuning of sched_domains. 81 * and allow arch-specific performance tuning of sched_domains.
82 * (Only non-zero and non-null fields need be specified.)
83 */ 83 */
84
84#ifdef CONFIG_SCHED_SMT 85#ifdef CONFIG_SCHED_SMT
85/* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is, 86/* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is,
86 * so can't we drop this in favor of CONFIG_SCHED_SMT? 87 * so can't we drop this in favor of CONFIG_SCHED_SMT?
@@ -89,20 +90,10 @@ void arch_update_cpu_topology(void);
89/* Common values for SMT siblings */ 90/* Common values for SMT siblings */
90#ifndef SD_SIBLING_INIT 91#ifndef SD_SIBLING_INIT
91#define SD_SIBLING_INIT (struct sched_domain) { \ 92#define SD_SIBLING_INIT (struct sched_domain) { \
92 .span = CPU_MASK_NONE, \
93 .parent = NULL, \
94 .child = NULL, \
95 .groups = NULL, \
96 .min_interval = 1, \ 93 .min_interval = 1, \
97 .max_interval = 2, \ 94 .max_interval = 2, \
98 .busy_factor = 64, \ 95 .busy_factor = 64, \
99 .imbalance_pct = 110, \ 96 .imbalance_pct = 110, \
100 .cache_nice_tries = 0, \
101 .busy_idx = 0, \
102 .idle_idx = 0, \
103 .newidle_idx = 0, \
104 .wake_idx = 0, \
105 .forkexec_idx = 0, \
106 .flags = SD_LOAD_BALANCE \ 97 .flags = SD_LOAD_BALANCE \
107 | SD_BALANCE_NEWIDLE \ 98 | SD_BALANCE_NEWIDLE \
108 | SD_BALANCE_FORK \ 99 | SD_BALANCE_FORK \
@@ -112,7 +103,6 @@ void arch_update_cpu_topology(void);
112 | SD_SHARE_CPUPOWER, \ 103 | SD_SHARE_CPUPOWER, \
113 .last_balance = jiffies, \ 104 .last_balance = jiffies, \
114 .balance_interval = 1, \ 105 .balance_interval = 1, \
115 .nr_balance_failed = 0, \
116} 106}
117#endif 107#endif
118#endif /* CONFIG_SCHED_SMT */ 108#endif /* CONFIG_SCHED_SMT */
@@ -121,18 +111,12 @@ void arch_update_cpu_topology(void);
121/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */ 111/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
122#ifndef SD_MC_INIT 112#ifndef SD_MC_INIT
123#define SD_MC_INIT (struct sched_domain) { \ 113#define SD_MC_INIT (struct sched_domain) { \
124 .span = CPU_MASK_NONE, \
125 .parent = NULL, \
126 .child = NULL, \
127 .groups = NULL, \
128 .min_interval = 1, \ 114 .min_interval = 1, \
129 .max_interval = 4, \ 115 .max_interval = 4, \
130 .busy_factor = 64, \ 116 .busy_factor = 64, \
131 .imbalance_pct = 125, \ 117 .imbalance_pct = 125, \
132 .cache_nice_tries = 1, \ 118 .cache_nice_tries = 1, \
133 .busy_idx = 2, \ 119 .busy_idx = 2, \
134 .idle_idx = 0, \
135 .newidle_idx = 0, \
136 .wake_idx = 1, \ 120 .wake_idx = 1, \
137 .forkexec_idx = 1, \ 121 .forkexec_idx = 1, \
138 .flags = SD_LOAD_BALANCE \ 122 .flags = SD_LOAD_BALANCE \
@@ -144,7 +128,6 @@ void arch_update_cpu_topology(void);
144 | BALANCE_FOR_MC_POWER, \ 128 | BALANCE_FOR_MC_POWER, \
145 .last_balance = jiffies, \ 129 .last_balance = jiffies, \
146 .balance_interval = 1, \ 130 .balance_interval = 1, \
147 .nr_balance_failed = 0, \
148} 131}
149#endif 132#endif
150#endif /* CONFIG_SCHED_MC */ 133#endif /* CONFIG_SCHED_MC */
@@ -152,10 +135,6 @@ void arch_update_cpu_topology(void);
152/* Common values for CPUs */ 135/* Common values for CPUs */
153#ifndef SD_CPU_INIT 136#ifndef SD_CPU_INIT
154#define SD_CPU_INIT (struct sched_domain) { \ 137#define SD_CPU_INIT (struct sched_domain) { \
155 .span = CPU_MASK_NONE, \
156 .parent = NULL, \
157 .child = NULL, \
158 .groups = NULL, \
159 .min_interval = 1, \ 138 .min_interval = 1, \
160 .max_interval = 4, \ 139 .max_interval = 4, \
161 .busy_factor = 64, \ 140 .busy_factor = 64, \
@@ -174,16 +153,11 @@ void arch_update_cpu_topology(void);
174 | BALANCE_FOR_PKG_POWER,\ 153 | BALANCE_FOR_PKG_POWER,\
175 .last_balance = jiffies, \ 154 .last_balance = jiffies, \
176 .balance_interval = 1, \ 155 .balance_interval = 1, \
177 .nr_balance_failed = 0, \
178} 156}
179#endif 157#endif
180 158
181/* sched_domains SD_ALLNODES_INIT for NUMA machines */ 159/* sched_domains SD_ALLNODES_INIT for NUMA machines */
182#define SD_ALLNODES_INIT (struct sched_domain) { \ 160#define SD_ALLNODES_INIT (struct sched_domain) { \
183 .span = CPU_MASK_NONE, \
184 .parent = NULL, \
185 .child = NULL, \
186 .groups = NULL, \
187 .min_interval = 64, \ 161 .min_interval = 64, \
188 .max_interval = 64*num_online_cpus(), \ 162 .max_interval = 64*num_online_cpus(), \
189 .busy_factor = 128, \ 163 .busy_factor = 128, \
@@ -191,14 +165,10 @@ void arch_update_cpu_topology(void);
191 .cache_nice_tries = 1, \ 165 .cache_nice_tries = 1, \
192 .busy_idx = 3, \ 166 .busy_idx = 3, \
193 .idle_idx = 3, \ 167 .idle_idx = 3, \
194 .newidle_idx = 0, /* unused */ \
195 .wake_idx = 0, /* unused */ \
196 .forkexec_idx = 0, /* unused */ \
197 .flags = SD_LOAD_BALANCE \ 168 .flags = SD_LOAD_BALANCE \
198 | SD_SERIALIZE, \ 169 | SD_SERIALIZE, \
199 .last_balance = jiffies, \ 170 .last_balance = jiffies, \
200 .balance_interval = 64, \ 171 .balance_interval = 64, \
201 .nr_balance_failed = 0, \
202} 172}
203 173
204#ifdef CONFIG_NUMA 174#ifdef CONFIG_NUMA
diff --git a/kernel/sched.c b/kernel/sched.c
index 9f7980f8ec00..6809178eaa9d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1869,17 +1869,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
1869 * find_idlest_cpu - find the idlest cpu among the cpus in group. 1869 * find_idlest_cpu - find the idlest cpu among the cpus in group.
1870 */ 1870 */
1871static int 1871static int
1872find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) 1872find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
1873 cpumask_t *tmp)
1873{ 1874{
1874 cpumask_t tmp;
1875 unsigned long load, min_load = ULONG_MAX; 1875 unsigned long load, min_load = ULONG_MAX;
1876 int idlest = -1; 1876 int idlest = -1;
1877 int i; 1877 int i;
1878 1878
1879 /* Traverse only the allowed CPUs */ 1879 /* Traverse only the allowed CPUs */
1880 cpus_and(tmp, group->cpumask, p->cpus_allowed); 1880 cpus_and(*tmp, group->cpumask, p->cpus_allowed);
1881 1881
1882 for_each_cpu_mask(i, tmp) { 1882 for_each_cpu_mask(i, *tmp) {
1883 load = weighted_cpuload(i); 1883 load = weighted_cpuload(i);
1884 1884
1885 if (load < min_load || (load == min_load && i == this_cpu)) { 1885 if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -1918,7 +1918,7 @@ static int sched_balance_self(int cpu, int flag)
1918 } 1918 }
1919 1919
1920 while (sd) { 1920 while (sd) {
1921 cpumask_t span; 1921 cpumask_t span, tmpmask;
1922 struct sched_group *group; 1922 struct sched_group *group;
1923 int new_cpu, weight; 1923 int new_cpu, weight;
1924 1924
@@ -1934,7 +1934,7 @@ static int sched_balance_self(int cpu, int flag)
1934 continue; 1934 continue;
1935 } 1935 }
1936 1936
1937 new_cpu = find_idlest_cpu(group, t, cpu); 1937 new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask);
1938 if (new_cpu == -1 || new_cpu == cpu) { 1938 if (new_cpu == -1 || new_cpu == cpu) {
1939 /* Now try balancing at a lower domain level of cpu */ 1939 /* Now try balancing at a lower domain level of cpu */
1940 sd = sd->child; 1940 sd = sd->child;
@@ -2818,7 +2818,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
2818static struct sched_group * 2818static struct sched_group *
2819find_busiest_group(struct sched_domain *sd, int this_cpu, 2819find_busiest_group(struct sched_domain *sd, int this_cpu,
2820 unsigned long *imbalance, enum cpu_idle_type idle, 2820 unsigned long *imbalance, enum cpu_idle_type idle,
2821 int *sd_idle, cpumask_t *cpus, int *balance) 2821 int *sd_idle, const cpumask_t *cpus, int *balance)
2822{ 2822{
2823 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; 2823 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
2824 unsigned long max_load, avg_load, total_load, this_load, total_pwr; 2824 unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -3119,7 +3119,7 @@ ret:
3119 */ 3119 */
3120static struct rq * 3120static struct rq *
3121find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, 3121find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3122 unsigned long imbalance, cpumask_t *cpus) 3122 unsigned long imbalance, const cpumask_t *cpus)
3123{ 3123{
3124 struct rq *busiest = NULL, *rq; 3124 struct rq *busiest = NULL, *rq;
3125 unsigned long max_load = 0; 3125 unsigned long max_load = 0;
@@ -3158,15 +3158,16 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3158 */ 3158 */
3159static int load_balance(int this_cpu, struct rq *this_rq, 3159static int load_balance(int this_cpu, struct rq *this_rq,
3160 struct sched_domain *sd, enum cpu_idle_type idle, 3160 struct sched_domain *sd, enum cpu_idle_type idle,
3161 int *balance) 3161 int *balance, cpumask_t *cpus)
3162{ 3162{
3163 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3163 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
3164 struct sched_group *group; 3164 struct sched_group *group;
3165 unsigned long imbalance; 3165 unsigned long imbalance;
3166 struct rq *busiest; 3166 struct rq *busiest;
3167 cpumask_t cpus = CPU_MASK_ALL;
3168 unsigned long flags; 3167 unsigned long flags;
3169 3168
3169 cpus_setall(*cpus);
3170
3170 /* 3171 /*
3171 * When power savings policy is enabled for the parent domain, idle 3172 * When power savings policy is enabled for the parent domain, idle
3172 * sibling can pick up load irrespective of busy siblings. In this case, 3173 * sibling can pick up load irrespective of busy siblings. In this case,
@@ -3181,7 +3182,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3181 3182
3182redo: 3183redo:
3183 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, 3184 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
3184 &cpus, balance); 3185 cpus, balance);
3185 3186
3186 if (*balance == 0) 3187 if (*balance == 0)
3187 goto out_balanced; 3188 goto out_balanced;
@@ -3191,7 +3192,7 @@ redo:
3191 goto out_balanced; 3192 goto out_balanced;
3192 } 3193 }
3193 3194
3194 busiest = find_busiest_queue(group, idle, imbalance, &cpus); 3195 busiest = find_busiest_queue(group, idle, imbalance, cpus);
3195 if (!busiest) { 3196 if (!busiest) {
3196 schedstat_inc(sd, lb_nobusyq[idle]); 3197 schedstat_inc(sd, lb_nobusyq[idle]);
3197 goto out_balanced; 3198 goto out_balanced;
@@ -3224,8 +3225,8 @@ redo:
3224 3225
3225 /* All tasks on this runqueue were pinned by CPU affinity */ 3226 /* All tasks on this runqueue were pinned by CPU affinity */
3226 if (unlikely(all_pinned)) { 3227 if (unlikely(all_pinned)) {
3227 cpu_clear(cpu_of(busiest), cpus); 3228 cpu_clear(cpu_of(busiest), *cpus);
3228 if (!cpus_empty(cpus)) 3229 if (!cpus_empty(*cpus))
3229 goto redo; 3230 goto redo;
3230 goto out_balanced; 3231 goto out_balanced;
3231 } 3232 }
@@ -3310,7 +3311,8 @@ out_one_pinned:
3310 * this_rq is locked. 3311 * this_rq is locked.
3311 */ 3312 */
3312static int 3313static int
3313load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) 3314load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3315 cpumask_t *cpus)
3314{ 3316{
3315 struct sched_group *group; 3317 struct sched_group *group;
3316 struct rq *busiest = NULL; 3318 struct rq *busiest = NULL;
@@ -3318,7 +3320,8 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
3318 int ld_moved = 0; 3320 int ld_moved = 0;
3319 int sd_idle = 0; 3321 int sd_idle = 0;
3320 int all_pinned = 0; 3322 int all_pinned = 0;
3321 cpumask_t cpus = CPU_MASK_ALL; 3323
3324 cpus_setall(*cpus);
3322 3325
3323 /* 3326 /*
3324 * When power savings policy is enabled for the parent domain, idle 3327 * When power savings policy is enabled for the parent domain, idle
@@ -3333,14 +3336,13 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
3333 schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); 3336 schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]);
3334redo: 3337redo:
3335 group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, 3338 group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE,
3336 &sd_idle, &cpus, NULL); 3339 &sd_idle, cpus, NULL);
3337 if (!group) { 3340 if (!group) {
3338 schedstat_inc(sd, lb_nobusyg[CPU_NEWLY_IDLE]); 3341 schedstat_inc(sd, lb_nobusyg[CPU_NEWLY_IDLE]);
3339 goto out_balanced; 3342 goto out_balanced;
3340 } 3343 }
3341 3344
3342 busiest = find_busiest_queue(group, CPU_NEWLY_IDLE, imbalance, 3345 busiest = find_busiest_queue(group, CPU_NEWLY_IDLE, imbalance, cpus);
3343 &cpus);
3344 if (!busiest) { 3346 if (!busiest) {
3345 schedstat_inc(sd, lb_nobusyq[CPU_NEWLY_IDLE]); 3347 schedstat_inc(sd, lb_nobusyq[CPU_NEWLY_IDLE]);
3346 goto out_balanced; 3348 goto out_balanced;
@@ -3362,8 +3364,8 @@ redo:
3362 spin_unlock(&busiest->lock); 3364 spin_unlock(&busiest->lock);
3363 3365
3364 if (unlikely(all_pinned)) { 3366 if (unlikely(all_pinned)) {
3365 cpu_clear(cpu_of(busiest), cpus); 3367 cpu_clear(cpu_of(busiest), *cpus);
3366 if (!cpus_empty(cpus)) 3368 if (!cpus_empty(*cpus))
3367 goto redo; 3369 goto redo;
3368 } 3370 }
3369 } 3371 }
@@ -3397,6 +3399,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3397 struct sched_domain *sd; 3399 struct sched_domain *sd;
3398 int pulled_task = -1; 3400 int pulled_task = -1;
3399 unsigned long next_balance = jiffies + HZ; 3401 unsigned long next_balance = jiffies + HZ;
3402 cpumask_t tmpmask;
3400 3403
3401 for_each_domain(this_cpu, sd) { 3404 for_each_domain(this_cpu, sd) {
3402 unsigned long interval; 3405 unsigned long interval;
@@ -3406,8 +3409,8 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3406 3409
3407 if (sd->flags & SD_BALANCE_NEWIDLE) 3410 if (sd->flags & SD_BALANCE_NEWIDLE)
3408 /* If we've pulled tasks over stop searching: */ 3411 /* If we've pulled tasks over stop searching: */
3409 pulled_task = load_balance_newidle(this_cpu, 3412 pulled_task = load_balance_newidle(this_cpu, this_rq,
3410 this_rq, sd); 3413 sd, &tmpmask);
3411 3414
3412 interval = msecs_to_jiffies(sd->balance_interval); 3415 interval = msecs_to_jiffies(sd->balance_interval);
3413 if (time_after(next_balance, sd->last_balance + interval)) 3416 if (time_after(next_balance, sd->last_balance + interval))
@@ -3566,6 +3569,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3566 /* Earliest time when we have to do rebalance again */ 3569 /* Earliest time when we have to do rebalance again */
3567 unsigned long next_balance = jiffies + 60*HZ; 3570 unsigned long next_balance = jiffies + 60*HZ;
3568 int update_next_balance = 0; 3571 int update_next_balance = 0;
3572 cpumask_t tmp;
3569 3573
3570 for_each_domain(cpu, sd) { 3574 for_each_domain(cpu, sd) {
3571 if (!(sd->flags & SD_LOAD_BALANCE)) 3575 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3589,7 +3593,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3589 } 3593 }
3590 3594
3591 if (time_after_eq(jiffies, sd->last_balance + interval)) { 3595 if (time_after_eq(jiffies, sd->last_balance + interval)) {
3592 if (load_balance(cpu, rq, sd, idle, &balance)) { 3596 if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) {
3593 /* 3597 /*
3594 * We've pulled tasks over so either we're no 3598 * We've pulled tasks over so either we're no
3595 * longer idle, or one of our SMT siblings is 3599 * longer idle, or one of our SMT siblings is
@@ -4945,7 +4949,7 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
4945 cpuset_cpus_allowed(p, &cpus_allowed); 4949 cpuset_cpus_allowed(p, &cpus_allowed);
4946 cpus_and(new_mask, new_mask, cpus_allowed); 4950 cpus_and(new_mask, new_mask, cpus_allowed);
4947 again: 4951 again:
4948 retval = set_cpus_allowed(p, new_mask); 4952 retval = set_cpus_allowed_ptr(p, &new_mask);
4949 4953
4950 if (!retval) { 4954 if (!retval) {
4951 cpuset_cpus_allowed(p, &cpus_allowed); 4955 cpuset_cpus_allowed(p, &cpus_allowed);
@@ -5700,7 +5704,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
5700 */ 5704 */
5701static void migrate_nr_uninterruptible(struct rq *rq_src) 5705static void migrate_nr_uninterruptible(struct rq *rq_src)
5702{ 5706{
5703 struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); 5707 struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR));
5704 unsigned long flags; 5708 unsigned long flags;
5705 5709
5706 local_irq_save(flags); 5710 local_irq_save(flags);
@@ -6118,14 +6122,14 @@ EXPORT_SYMBOL(nr_cpu_ids);
6118 6122
6119#ifdef CONFIG_SCHED_DEBUG 6123#ifdef CONFIG_SCHED_DEBUG
6120 6124
6121static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level) 6125static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6126 cpumask_t *groupmask)
6122{ 6127{
6123 struct sched_group *group = sd->groups; 6128 struct sched_group *group = sd->groups;
6124 cpumask_t groupmask;
6125 char str[256]; 6129 char str[256];
6126 6130
6127 cpulist_scnprintf(str, sizeof(str), sd->span); 6131 cpulist_scnprintf(str, sizeof(str), sd->span);
6128 cpus_clear(groupmask); 6132 cpus_clear(*groupmask);
6129 6133
6130 printk(KERN_DEBUG "%*s domain %d: ", level, "", level); 6134 printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
6131 6135
@@ -6169,13 +6173,13 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level)
6169 break; 6173 break;
6170 } 6174 }
6171 6175
6172 if (cpus_intersects(groupmask, group->cpumask)) { 6176 if (cpus_intersects(*groupmask, group->cpumask)) {
6173 printk(KERN_CONT "\n"); 6177 printk(KERN_CONT "\n");
6174 printk(KERN_ERR "ERROR: repeated CPUs\n"); 6178 printk(KERN_ERR "ERROR: repeated CPUs\n");
6175 break; 6179 break;
6176 } 6180 }
6177 6181
6178 cpus_or(groupmask, groupmask, group->cpumask); 6182 cpus_or(*groupmask, *groupmask, group->cpumask);
6179 6183
6180 cpulist_scnprintf(str, sizeof(str), group->cpumask); 6184 cpulist_scnprintf(str, sizeof(str), group->cpumask);
6181 printk(KERN_CONT " %s", str); 6185 printk(KERN_CONT " %s", str);
@@ -6184,10 +6188,10 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level)
6184 } while (group != sd->groups); 6188 } while (group != sd->groups);
6185 printk(KERN_CONT "\n"); 6189 printk(KERN_CONT "\n");
6186 6190
6187 if (!cpus_equal(sd->span, groupmask)) 6191 if (!cpus_equal(sd->span, *groupmask))
6188 printk(KERN_ERR "ERROR: groups don't span domain->span\n"); 6192 printk(KERN_ERR "ERROR: groups don't span domain->span\n");
6189 6193
6190 if (sd->parent && !cpus_subset(groupmask, sd->parent->span)) 6194 if (sd->parent && !cpus_subset(*groupmask, sd->parent->span))
6191 printk(KERN_ERR "ERROR: parent span is not a superset " 6195 printk(KERN_ERR "ERROR: parent span is not a superset "
6192 "of domain->span\n"); 6196 "of domain->span\n");
6193 return 0; 6197 return 0;
@@ -6195,6 +6199,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level)
6195 6199
6196static void sched_domain_debug(struct sched_domain *sd, int cpu) 6200static void sched_domain_debug(struct sched_domain *sd, int cpu)
6197{ 6201{
6202 cpumask_t *groupmask;
6198 int level = 0; 6203 int level = 0;
6199 6204
6200 if (!sd) { 6205 if (!sd) {
@@ -6204,14 +6209,21 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6204 6209
6205 printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); 6210 printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
6206 6211
6212 groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
6213 if (!groupmask) {
6214 printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
6215 return;
6216 }
6217
6207 for (;;) { 6218 for (;;) {
6208 if (sched_domain_debug_one(sd, cpu, level)) 6219 if (sched_domain_debug_one(sd, cpu, level, groupmask))
6209 break; 6220 break;
6210 level++; 6221 level++;
6211 sd = sd->parent; 6222 sd = sd->parent;
6212 if (!sd) 6223 if (!sd)
6213 break; 6224 break;
6214 } 6225 }
6226 kfree(groupmask);
6215} 6227}
6216#else 6228#else
6217# define sched_domain_debug(sd, cpu) do { } while (0) 6229# define sched_domain_debug(sd, cpu) do { } while (0)
@@ -6399,30 +6411,33 @@ __setup("isolcpus=", isolated_cpu_setup);
6399 * and ->cpu_power to 0. 6411 * and ->cpu_power to 0.
6400 */ 6412 */
6401static void 6413static void
6402init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map, 6414init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
6403 int (*group_fn)(int cpu, const cpumask_t *cpu_map, 6415 int (*group_fn)(int cpu, const cpumask_t *cpu_map,
6404 struct sched_group **sg)) 6416 struct sched_group **sg,
6417 cpumask_t *tmpmask),
6418 cpumask_t *covered, cpumask_t *tmpmask)
6405{ 6419{
6406 struct sched_group *first = NULL, *last = NULL; 6420 struct sched_group *first = NULL, *last = NULL;
6407 cpumask_t covered = CPU_MASK_NONE;
6408 int i; 6421 int i;
6409 6422
6410 for_each_cpu_mask(i, span) { 6423 cpus_clear(*covered);
6424
6425 for_each_cpu_mask(i, *span) {
6411 struct sched_group *sg; 6426 struct sched_group *sg;
6412 int group = group_fn(i, cpu_map, &sg); 6427 int group = group_fn(i, cpu_map, &sg, tmpmask);
6413 int j; 6428 int j;
6414 6429
6415 if (cpu_isset(i, covered)) 6430 if (cpu_isset(i, *covered))
6416 continue; 6431 continue;
6417 6432
6418 sg->cpumask = CPU_MASK_NONE; 6433 cpus_clear(sg->cpumask);
6419 sg->__cpu_power = 0; 6434 sg->__cpu_power = 0;
6420 6435
6421 for_each_cpu_mask(j, span) { 6436 for_each_cpu_mask(j, *span) {
6422 if (group_fn(j, cpu_map, NULL) != group) 6437 if (group_fn(j, cpu_map, NULL, tmpmask) != group)
6423 continue; 6438 continue;
6424 6439
6425 cpu_set(j, covered); 6440 cpu_set(j, *covered);
6426 cpu_set(j, sg->cpumask); 6441 cpu_set(j, sg->cpumask);
6427 } 6442 }
6428 if (!first) 6443 if (!first)
@@ -6520,7 +6535,8 @@ static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
6520static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); 6535static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
6521 6536
6522static int 6537static int
6523cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) 6538cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
6539 cpumask_t *unused)
6524{ 6540{
6525 if (sg) 6541 if (sg)
6526 *sg = &per_cpu(sched_group_cpus, cpu); 6542 *sg = &per_cpu(sched_group_cpus, cpu);
@@ -6538,19 +6554,22 @@ static DEFINE_PER_CPU(struct sched_group, sched_group_core);
6538 6554
6539#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) 6555#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
6540static int 6556static int
6541cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) 6557cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
6558 cpumask_t *mask)
6542{ 6559{
6543 int group; 6560 int group;
6544 cpumask_t mask = per_cpu(cpu_sibling_map, cpu); 6561
6545 cpus_and(mask, mask, *cpu_map); 6562 *mask = per_cpu(cpu_sibling_map, cpu);
6546 group = first_cpu(mask); 6563 cpus_and(*mask, *mask, *cpu_map);
6564 group = first_cpu(*mask);
6547 if (sg) 6565 if (sg)
6548 *sg = &per_cpu(sched_group_core, group); 6566 *sg = &per_cpu(sched_group_core, group);
6549 return group; 6567 return group;
6550} 6568}
6551#elif defined(CONFIG_SCHED_MC) 6569#elif defined(CONFIG_SCHED_MC)
6552static int 6570static int
6553cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) 6571cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
6572 cpumask_t *unused)
6554{ 6573{
6555 if (sg) 6574 if (sg)
6556 *sg = &per_cpu(sched_group_core, cpu); 6575 *sg = &per_cpu(sched_group_core, cpu);
@@ -6562,17 +6581,18 @@ static DEFINE_PER_CPU(struct sched_domain, phys_domains);
6562static DEFINE_PER_CPU(struct sched_group, sched_group_phys); 6581static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
6563 6582
6564static int 6583static int
6565cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) 6584cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
6585 cpumask_t *mask)
6566{ 6586{
6567 int group; 6587 int group;
6568#ifdef CONFIG_SCHED_MC 6588#ifdef CONFIG_SCHED_MC
6569 cpumask_t mask = cpu_coregroup_map(cpu); 6589 *mask = cpu_coregroup_map(cpu);
6570 cpus_and(mask, mask, *cpu_map); 6590 cpus_and(*mask, *mask, *cpu_map);
6571 group = first_cpu(mask); 6591 group = first_cpu(*mask);
6572#elif defined(CONFIG_SCHED_SMT) 6592#elif defined(CONFIG_SCHED_SMT)
6573 cpumask_t mask = per_cpu(cpu_sibling_map, cpu); 6593 *mask = per_cpu(cpu_sibling_map, cpu);
6574 cpus_and(mask, mask, *cpu_map); 6594 cpus_and(*mask, *mask, *cpu_map);
6575 group = first_cpu(mask); 6595 group = first_cpu(*mask);
6576#else 6596#else
6577 group = cpu; 6597 group = cpu;
6578#endif 6598#endif
@@ -6594,13 +6614,13 @@ static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
6594static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); 6614static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
6595 6615
6596static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, 6616static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
6597 struct sched_group **sg) 6617 struct sched_group **sg, cpumask_t *nodemask)
6598{ 6618{
6599 cpumask_t nodemask = node_to_cpumask(cpu_to_node(cpu));
6600 int group; 6619 int group;
6601 6620
6602 cpus_and(nodemask, nodemask, *cpu_map); 6621 *nodemask = node_to_cpumask(cpu_to_node(cpu));
6603 group = first_cpu(nodemask); 6622 cpus_and(*nodemask, *nodemask, *cpu_map);
6623 group = first_cpu(*nodemask);
6604 6624
6605 if (sg) 6625 if (sg)
6606 *sg = &per_cpu(sched_group_allnodes, group); 6626 *sg = &per_cpu(sched_group_allnodes, group);
@@ -6636,7 +6656,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
6636 6656
6637#ifdef CONFIG_NUMA 6657#ifdef CONFIG_NUMA
6638/* Free memory allocated for various sched_group structures */ 6658/* Free memory allocated for various sched_group structures */
6639static void free_sched_groups(const cpumask_t *cpu_map) 6659static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
6640{ 6660{
6641 int cpu, i; 6661 int cpu, i;
6642 6662
@@ -6648,11 +6668,11 @@ static void free_sched_groups(const cpumask_t *cpu_map)
6648 continue; 6668 continue;
6649 6669
6650 for (i = 0; i < MAX_NUMNODES; i++) { 6670 for (i = 0; i < MAX_NUMNODES; i++) {
6651 cpumask_t nodemask = node_to_cpumask(i);
6652 struct sched_group *oldsg, *sg = sched_group_nodes[i]; 6671 struct sched_group *oldsg, *sg = sched_group_nodes[i];
6653 6672
6654 cpus_and(nodemask, nodemask, *cpu_map); 6673 *nodemask = node_to_cpumask(i);
6655 if (cpus_empty(nodemask)) 6674 cpus_and(*nodemask, *nodemask, *cpu_map);
6675 if (cpus_empty(*nodemask))
6656 continue; 6676 continue;
6657 6677
6658 if (sg == NULL) 6678 if (sg == NULL)
@@ -6670,7 +6690,7 @@ next_sg:
6670 } 6690 }
6671} 6691}
6672#else 6692#else
6673static void free_sched_groups(const cpumask_t *cpu_map) 6693static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
6674{ 6694{
6675} 6695}
6676#endif 6696#endif
@@ -6728,6 +6748,65 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6728} 6748}
6729 6749
6730/* 6750/*
6751 * Initializers for schedule domains
6752 * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
6753 */
6754
6755#define SD_INIT(sd, type) sd_init_##type(sd)
6756#define SD_INIT_FUNC(type) \
6757static noinline void sd_init_##type(struct sched_domain *sd) \
6758{ \
6759 memset(sd, 0, sizeof(*sd)); \
6760 *sd = SD_##type##_INIT; \
6761}
6762
6763SD_INIT_FUNC(CPU)
6764#ifdef CONFIG_NUMA
6765 SD_INIT_FUNC(ALLNODES)
6766 SD_INIT_FUNC(NODE)
6767#endif
6768#ifdef CONFIG_SCHED_SMT
6769 SD_INIT_FUNC(SIBLING)
6770#endif
6771#ifdef CONFIG_SCHED_MC
6772 SD_INIT_FUNC(MC)
6773#endif
6774
6775/*
6776 * To minimize stack usage kmalloc room for cpumasks and share the
6777 * space as the usage in build_sched_domains() dictates. Used only
6778 * if the amount of space is significant.
6779 */
6780struct allmasks {
6781 cpumask_t tmpmask; /* make this one first */
6782 union {
6783 cpumask_t nodemask;
6784 cpumask_t this_sibling_map;
6785 cpumask_t this_core_map;
6786 };
6787 cpumask_t send_covered;
6788
6789#ifdef CONFIG_NUMA
6790 cpumask_t domainspan;
6791 cpumask_t covered;
6792 cpumask_t notcovered;
6793#endif
6794};
6795
6796#if NR_CPUS > 128
6797#define SCHED_CPUMASK_ALLOC 1
6798#define SCHED_CPUMASK_FREE(v) kfree(v)
6799#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
6800#else
6801#define SCHED_CPUMASK_ALLOC 0
6802#define SCHED_CPUMASK_FREE(v)
6803#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
6804#endif
6805
6806#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
6807 ((unsigned long)(a) + offsetof(struct allmasks, v))
6808
6809/*
6731 * Build sched domains for a given set of cpus and attach the sched domains 6810 * Build sched domains for a given set of cpus and attach the sched domains
6732 * to the individual cpus 6811 * to the individual cpus
6733 */ 6812 */
@@ -6735,6 +6814,8 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6735{ 6814{
6736 int i; 6815 int i;
6737 struct root_domain *rd; 6816 struct root_domain *rd;
6817 SCHED_CPUMASK_DECLARE(allmasks);
6818 cpumask_t *tmpmask;
6738#ifdef CONFIG_NUMA 6819#ifdef CONFIG_NUMA
6739 struct sched_group **sched_group_nodes = NULL; 6820 struct sched_group **sched_group_nodes = NULL;
6740 int sd_allnodes = 0; 6821 int sd_allnodes = 0;
@@ -6748,38 +6829,60 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6748 printk(KERN_WARNING "Can not alloc sched group node list\n"); 6829 printk(KERN_WARNING "Can not alloc sched group node list\n");
6749 return -ENOMEM; 6830 return -ENOMEM;
6750 } 6831 }
6751 sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
6752#endif 6832#endif
6753 6833
6754 rd = alloc_rootdomain(); 6834 rd = alloc_rootdomain();
6755 if (!rd) { 6835 if (!rd) {
6756 printk(KERN_WARNING "Cannot alloc root domain\n"); 6836 printk(KERN_WARNING "Cannot alloc root domain\n");
6837#ifdef CONFIG_NUMA
6838 kfree(sched_group_nodes);
6839#endif
6757 return -ENOMEM; 6840 return -ENOMEM;
6758 } 6841 }
6759 6842
6843#if SCHED_CPUMASK_ALLOC
6844 /* get space for all scratch cpumask variables */
6845 allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
6846 if (!allmasks) {
6847 printk(KERN_WARNING "Cannot alloc cpumask array\n");
6848 kfree(rd);
6849#ifdef CONFIG_NUMA
6850 kfree(sched_group_nodes);
6851#endif
6852 return -ENOMEM;
6853 }
6854#endif
6855 tmpmask = (cpumask_t *)allmasks;
6856
6857
6858#ifdef CONFIG_NUMA
6859 sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
6860#endif
6861
6760 /* 6862 /*
6761 * Set up domains for cpus specified by the cpu_map. 6863 * Set up domains for cpus specified by the cpu_map.
6762 */ 6864 */
6763 for_each_cpu_mask(i, *cpu_map) { 6865 for_each_cpu_mask(i, *cpu_map) {
6764 struct sched_domain *sd = NULL, *p; 6866 struct sched_domain *sd = NULL, *p;
6765 cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); 6867 SCHED_CPUMASK_VAR(nodemask, allmasks);
6766 6868
6767 cpus_and(nodemask, nodemask, *cpu_map); 6869 *nodemask = node_to_cpumask(cpu_to_node(i));
6870 cpus_and(*nodemask, *nodemask, *cpu_map);
6768 6871
6769#ifdef CONFIG_NUMA 6872#ifdef CONFIG_NUMA
6770 if (cpus_weight(*cpu_map) > 6873 if (cpus_weight(*cpu_map) >
6771 SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { 6874 SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) {
6772 sd = &per_cpu(allnodes_domains, i); 6875 sd = &per_cpu(allnodes_domains, i);
6773 *sd = SD_ALLNODES_INIT; 6876 SD_INIT(sd, ALLNODES);
6774 sd->span = *cpu_map; 6877 sd->span = *cpu_map;
6775 cpu_to_allnodes_group(i, cpu_map, &sd->groups); 6878 cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
6776 p = sd; 6879 p = sd;
6777 sd_allnodes = 1; 6880 sd_allnodes = 1;
6778 } else 6881 } else
6779 p = NULL; 6882 p = NULL;
6780 6883
6781 sd = &per_cpu(node_domains, i); 6884 sd = &per_cpu(node_domains, i);
6782 *sd = SD_NODE_INIT; 6885 SD_INIT(sd, NODE);
6783 sd->span = sched_domain_node_span(cpu_to_node(i)); 6886 sd->span = sched_domain_node_span(cpu_to_node(i));
6784 sd->parent = p; 6887 sd->parent = p;
6785 if (p) 6888 if (p)
@@ -6789,94 +6892,114 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6789 6892
6790 p = sd; 6893 p = sd;
6791 sd = &per_cpu(phys_domains, i); 6894 sd = &per_cpu(phys_domains, i);
6792 *sd = SD_CPU_INIT; 6895 SD_INIT(sd, CPU);
6793 sd->span = nodemask; 6896 sd->span = *nodemask;
6794 sd->parent = p; 6897 sd->parent = p;
6795 if (p) 6898 if (p)
6796 p->child = sd; 6899 p->child = sd;
6797 cpu_to_phys_group(i, cpu_map, &sd->groups); 6900 cpu_to_phys_group(i, cpu_map, &sd->groups, tmpmask);
6798 6901
6799#ifdef CONFIG_SCHED_MC 6902#ifdef CONFIG_SCHED_MC
6800 p = sd; 6903 p = sd;
6801 sd = &per_cpu(core_domains, i); 6904 sd = &per_cpu(core_domains, i);
6802 *sd = SD_MC_INIT; 6905 SD_INIT(sd, MC);
6803 sd->span = cpu_coregroup_map(i); 6906 sd->span = cpu_coregroup_map(i);
6804 cpus_and(sd->span, sd->span, *cpu_map); 6907 cpus_and(sd->span, sd->span, *cpu_map);
6805 sd->parent = p; 6908 sd->parent = p;
6806 p->child = sd; 6909 p->child = sd;
6807 cpu_to_core_group(i, cpu_map, &sd->groups); 6910 cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
6808#endif 6911#endif
6809 6912
6810#ifdef CONFIG_SCHED_SMT 6913#ifdef CONFIG_SCHED_SMT
6811 p = sd; 6914 p = sd;
6812 sd = &per_cpu(cpu_domains, i); 6915 sd = &per_cpu(cpu_domains, i);
6813 *sd = SD_SIBLING_INIT; 6916 SD_INIT(sd, SIBLING);
6814 sd->span = per_cpu(cpu_sibling_map, i); 6917 sd->span = per_cpu(cpu_sibling_map, i);
6815 cpus_and(sd->span, sd->span, *cpu_map); 6918 cpus_and(sd->span, sd->span, *cpu_map);
6816 sd->parent = p; 6919 sd->parent = p;
6817 p->child = sd; 6920 p->child = sd;
6818 cpu_to_cpu_group(i, cpu_map, &sd->groups); 6921 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
6819#endif 6922#endif
6820 } 6923 }
6821 6924
6822#ifdef CONFIG_SCHED_SMT 6925#ifdef CONFIG_SCHED_SMT
6823 /* Set up CPU (sibling) groups */ 6926 /* Set up CPU (sibling) groups */
6824 for_each_cpu_mask(i, *cpu_map) { 6927 for_each_cpu_mask(i, *cpu_map) {
6825 cpumask_t this_sibling_map = per_cpu(cpu_sibling_map, i); 6928 SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
6826 cpus_and(this_sibling_map, this_sibling_map, *cpu_map); 6929 SCHED_CPUMASK_VAR(send_covered, allmasks);
6827 if (i != first_cpu(this_sibling_map)) 6930
6931 *this_sibling_map = per_cpu(cpu_sibling_map, i);
6932 cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
6933 if (i != first_cpu(*this_sibling_map))
6828 continue; 6934 continue;
6829 6935
6830 init_sched_build_groups(this_sibling_map, cpu_map, 6936 init_sched_build_groups(this_sibling_map, cpu_map,
6831 &cpu_to_cpu_group); 6937 &cpu_to_cpu_group,
6938 send_covered, tmpmask);
6832 } 6939 }
6833#endif 6940#endif
6834 6941
6835#ifdef CONFIG_SCHED_MC 6942#ifdef CONFIG_SCHED_MC
6836 /* Set up multi-core groups */ 6943 /* Set up multi-core groups */
6837 for_each_cpu_mask(i, *cpu_map) { 6944 for_each_cpu_mask(i, *cpu_map) {
6838 cpumask_t this_core_map = cpu_coregroup_map(i); 6945 SCHED_CPUMASK_VAR(this_core_map, allmasks);
6839 cpus_and(this_core_map, this_core_map, *cpu_map); 6946 SCHED_CPUMASK_VAR(send_covered, allmasks);
6840 if (i != first_cpu(this_core_map)) 6947
6948 *this_core_map = cpu_coregroup_map(i);
6949 cpus_and(*this_core_map, *this_core_map, *cpu_map);
6950 if (i != first_cpu(*this_core_map))
6841 continue; 6951 continue;
6952
6842 init_sched_build_groups(this_core_map, cpu_map, 6953 init_sched_build_groups(this_core_map, cpu_map,
6843 &cpu_to_core_group); 6954 &cpu_to_core_group,
6955 send_covered, tmpmask);
6844 } 6956 }
6845#endif 6957#endif
6846 6958
6847 /* Set up physical groups */ 6959 /* Set up physical groups */
6848 for (i = 0; i < MAX_NUMNODES; i++) { 6960 for (i = 0; i < MAX_NUMNODES; i++) {
6849 cpumask_t nodemask = node_to_cpumask(i); 6961 SCHED_CPUMASK_VAR(nodemask, allmasks);
6962 SCHED_CPUMASK_VAR(send_covered, allmasks);
6850 6963
6851 cpus_and(nodemask, nodemask, *cpu_map); 6964 *nodemask = node_to_cpumask(i);
6852 if (cpus_empty(nodemask)) 6965 cpus_and(*nodemask, *nodemask, *cpu_map);
6966 if (cpus_empty(*nodemask))
6853 continue; 6967 continue;
6854 6968
6855 init_sched_build_groups(nodemask, cpu_map, &cpu_to_phys_group); 6969 init_sched_build_groups(nodemask, cpu_map,
6970 &cpu_to_phys_group,
6971 send_covered, tmpmask);
6856 } 6972 }
6857 6973
6858#ifdef CONFIG_NUMA 6974#ifdef CONFIG_NUMA
6859 /* Set up node groups */ 6975 /* Set up node groups */
6860 if (sd_allnodes) 6976 if (sd_allnodes) {
6861 init_sched_build_groups(*cpu_map, cpu_map, 6977 SCHED_CPUMASK_VAR(send_covered, allmasks);
6862 &cpu_to_allnodes_group); 6978
6979 init_sched_build_groups(cpu_map, cpu_map,
6980 &cpu_to_allnodes_group,
6981 send_covered, tmpmask);
6982 }
6863 6983
6864 for (i = 0; i < MAX_NUMNODES; i++) { 6984 for (i = 0; i < MAX_NUMNODES; i++) {
6865 /* Set up node groups */ 6985 /* Set up node groups */
6866 struct sched_group *sg, *prev; 6986 struct sched_group *sg, *prev;
6867 cpumask_t nodemask = node_to_cpumask(i); 6987 SCHED_CPUMASK_VAR(nodemask, allmasks);
6868 cpumask_t domainspan; 6988 SCHED_CPUMASK_VAR(domainspan, allmasks);
6869 cpumask_t covered = CPU_MASK_NONE; 6989 SCHED_CPUMASK_VAR(covered, allmasks);
6870 int j; 6990 int j;
6871 6991
6872 cpus_and(nodemask, nodemask, *cpu_map); 6992 *nodemask = node_to_cpumask(i);
6873 if (cpus_empty(nodemask)) { 6993 cpus_clear(*covered);
6994
6995 cpus_and(*nodemask, *nodemask, *cpu_map);
6996 if (cpus_empty(*nodemask)) {
6874 sched_group_nodes[i] = NULL; 6997 sched_group_nodes[i] = NULL;
6875 continue; 6998 continue;
6876 } 6999 }
6877 7000
6878 domainspan = sched_domain_node_span(i); 7001 *domainspan = sched_domain_node_span(i);
6879 cpus_and(domainspan, domainspan, *cpu_map); 7002 cpus_and(*domainspan, *domainspan, *cpu_map);
6880 7003
6881 sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); 7004 sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i);
6882 if (!sg) { 7005 if (!sg) {
@@ -6885,31 +7008,31 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6885 goto error; 7008 goto error;
6886 } 7009 }
6887 sched_group_nodes[i] = sg; 7010 sched_group_nodes[i] = sg;
6888 for_each_cpu_mask(j, nodemask) { 7011 for_each_cpu_mask(j, *nodemask) {
6889 struct sched_domain *sd; 7012 struct sched_domain *sd;
6890 7013
6891 sd = &per_cpu(node_domains, j); 7014 sd = &per_cpu(node_domains, j);
6892 sd->groups = sg; 7015 sd->groups = sg;
6893 } 7016 }
6894 sg->__cpu_power = 0; 7017 sg->__cpu_power = 0;
6895 sg->cpumask = nodemask; 7018 sg->cpumask = *nodemask;
6896 sg->next = sg; 7019 sg->next = sg;
6897 cpus_or(covered, covered, nodemask); 7020 cpus_or(*covered, *covered, *nodemask);
6898 prev = sg; 7021 prev = sg;
6899 7022
6900 for (j = 0; j < MAX_NUMNODES; j++) { 7023 for (j = 0; j < MAX_NUMNODES; j++) {
6901 cpumask_t tmp, notcovered; 7024 SCHED_CPUMASK_VAR(notcovered, allmasks);
6902 int n = (i + j) % MAX_NUMNODES; 7025 int n = (i + j) % MAX_NUMNODES;
6903 node_to_cpumask_ptr(pnodemask, n); 7026 node_to_cpumask_ptr(pnodemask, n);
6904 7027
6905 cpus_complement(notcovered, covered); 7028 cpus_complement(*notcovered, *covered);
6906 cpus_and(tmp, notcovered, *cpu_map); 7029 cpus_and(*tmpmask, *notcovered, *cpu_map);
6907 cpus_and(tmp, tmp, domainspan); 7030 cpus_and(*tmpmask, *tmpmask, *domainspan);
6908 if (cpus_empty(tmp)) 7031 if (cpus_empty(*tmpmask))
6909 break; 7032 break;
6910 7033
6911 cpus_and(tmp, tmp, *pnodemask); 7034 cpus_and(*tmpmask, *tmpmask, *pnodemask);
6912 if (cpus_empty(tmp)) 7035 if (cpus_empty(*tmpmask))
6913 continue; 7036 continue;
6914 7037
6915 sg = kmalloc_node(sizeof(struct sched_group), 7038 sg = kmalloc_node(sizeof(struct sched_group),
@@ -6920,9 +7043,9 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6920 goto error; 7043 goto error;
6921 } 7044 }
6922 sg->__cpu_power = 0; 7045 sg->__cpu_power = 0;
6923 sg->cpumask = tmp; 7046 sg->cpumask = *tmpmask;
6924 sg->next = prev->next; 7047 sg->next = prev->next;
6925 cpus_or(covered, covered, tmp); 7048 cpus_or(*covered, *covered, *tmpmask);
6926 prev->next = sg; 7049 prev->next = sg;
6927 prev = sg; 7050 prev = sg;
6928 } 7051 }
@@ -6958,7 +7081,8 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6958 if (sd_allnodes) { 7081 if (sd_allnodes) {
6959 struct sched_group *sg; 7082 struct sched_group *sg;
6960 7083
6961 cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg); 7084 cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg,
7085 tmpmask);
6962 init_numa_sched_groups_power(sg); 7086 init_numa_sched_groups_power(sg);
6963 } 7087 }
6964#endif 7088#endif
@@ -6976,11 +7100,13 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6976 cpu_attach_domain(sd, rd, i); 7100 cpu_attach_domain(sd, rd, i);
6977 } 7101 }
6978 7102
7103 SCHED_CPUMASK_FREE((void *)allmasks);
6979 return 0; 7104 return 0;
6980 7105
6981#ifdef CONFIG_NUMA 7106#ifdef CONFIG_NUMA
6982error: 7107error:
6983 free_sched_groups(cpu_map); 7108 free_sched_groups(cpu_map, tmpmask);
7109 SCHED_CPUMASK_FREE((void *)allmasks);
6984 return -ENOMEM; 7110 return -ENOMEM;
6985#endif 7111#endif
6986} 7112}
@@ -7020,9 +7146,10 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
7020 return err; 7146 return err;
7021} 7147}
7022 7148
7023static void arch_destroy_sched_domains(const cpumask_t *cpu_map) 7149static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
7150 cpumask_t *tmpmask)
7024{ 7151{
7025 free_sched_groups(cpu_map); 7152 free_sched_groups(cpu_map, tmpmask);
7026} 7153}
7027 7154
7028/* 7155/*
@@ -7031,6 +7158,7 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
7031 */ 7158 */
7032static void detach_destroy_domains(const cpumask_t *cpu_map) 7159static void detach_destroy_domains(const cpumask_t *cpu_map)
7033{ 7160{
7161 cpumask_t tmpmask;
7034 int i; 7162 int i;
7035 7163
7036 unregister_sched_domain_sysctl(); 7164 unregister_sched_domain_sysctl();
@@ -7038,7 +7166,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
7038 for_each_cpu_mask(i, *cpu_map) 7166 for_each_cpu_mask(i, *cpu_map)
7039 cpu_attach_domain(NULL, &def_root_domain, i); 7167 cpu_attach_domain(NULL, &def_root_domain, i);
7040 synchronize_sched(); 7168 synchronize_sched();
7041 arch_destroy_sched_domains(cpu_map); 7169 arch_destroy_sched_domains(cpu_map, &tmpmask);
7042} 7170}
7043 7171
7044/* 7172/*
@@ -7246,7 +7374,7 @@ void __init sched_init_smp(void)
7246 hotcpu_notifier(update_sched_domains, 0); 7374 hotcpu_notifier(update_sched_domains, 0);
7247 7375
7248 /* Move init over to a non-isolated CPU */ 7376 /* Move init over to a non-isolated CPU */
7249 if (set_cpus_allowed(current, non_isolated_cpus) < 0) 7377 if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
7250 BUG(); 7378 BUG();
7251 sched_init_granularity(); 7379 sched_init_granularity();
7252} 7380}