diff options
-rw-r--r-- | include/asm-x86/topology.h | 5 | ||||
-rw-r--r-- | include/linux/topology.h | 46 | ||||
-rw-r--r-- | kernel/sched.c | 368 |
3 files changed, 256 insertions, 163 deletions
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index b167ca90f96f..9ef74c5d5ad6 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h | |||
@@ -154,10 +154,6 @@ extern unsigned long node_remap_size[]; | |||
154 | 154 | ||
155 | /* sched_domains SD_NODE_INIT for NUMAQ machines */ | 155 | /* sched_domains SD_NODE_INIT for NUMAQ machines */ |
156 | #define SD_NODE_INIT (struct sched_domain) { \ | 156 | #define SD_NODE_INIT (struct sched_domain) { \ |
157 | .span = CPU_MASK_NONE, \ | ||
158 | .parent = NULL, \ | ||
159 | .child = NULL, \ | ||
160 | .groups = NULL, \ | ||
161 | .min_interval = 8, \ | 157 | .min_interval = 8, \ |
162 | .max_interval = 32, \ | 158 | .max_interval = 32, \ |
163 | .busy_factor = 32, \ | 159 | .busy_factor = 32, \ |
@@ -175,7 +171,6 @@ extern unsigned long node_remap_size[]; | |||
175 | | SD_WAKE_BALANCE, \ | 171 | | SD_WAKE_BALANCE, \ |
176 | .last_balance = jiffies, \ | 172 | .last_balance = jiffies, \ |
177 | .balance_interval = 1, \ | 173 | .balance_interval = 1, \ |
178 | .nr_balance_failed = 0, \ | ||
179 | } | 174 | } |
180 | 175 | ||
181 | #ifdef CONFIG_X86_64_ACPI_NUMA | 176 | #ifdef CONFIG_X86_64_ACPI_NUMA |
diff --git a/include/linux/topology.h b/include/linux/topology.h index bd14f8b30f09..4bb7074a2c3a 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
@@ -38,16 +38,15 @@ | |||
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | #ifndef nr_cpus_node | 40 | #ifndef nr_cpus_node |
41 | #define nr_cpus_node(node) \ | 41 | #define nr_cpus_node(node) \ |
42 | ({ \ | 42 | ({ \ |
43 | cpumask_t __tmp__; \ | 43 | node_to_cpumask_ptr(__tmp__, node); \ |
44 | __tmp__ = node_to_cpumask(node); \ | 44 | cpus_weight(*__tmp__); \ |
45 | cpus_weight(__tmp__); \ | ||
46 | }) | 45 | }) |
47 | #endif | 46 | #endif |
48 | 47 | ||
49 | #define for_each_node_with_cpus(node) \ | 48 | #define for_each_node_with_cpus(node) \ |
50 | for_each_online_node(node) \ | 49 | for_each_online_node(node) \ |
51 | if (nr_cpus_node(node)) | 50 | if (nr_cpus_node(node)) |
52 | 51 | ||
53 | void arch_update_cpu_topology(void); | 52 | void arch_update_cpu_topology(void); |
@@ -80,7 +79,9 @@ void arch_update_cpu_topology(void); | |||
80 | * by defining their own arch-specific initializer in include/asm/topology.h. | 79 | * by defining their own arch-specific initializer in include/asm/topology.h. |
81 | * A definition there will automagically override these default initializers | 80 | * A definition there will automagically override these default initializers |
82 | * and allow arch-specific performance tuning of sched_domains. | 81 | * and allow arch-specific performance tuning of sched_domains. |
82 | * (Only non-zero and non-null fields need be specified.) | ||
83 | */ | 83 | */ |
84 | |||
84 | #ifdef CONFIG_SCHED_SMT | 85 | #ifdef CONFIG_SCHED_SMT |
85 | /* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is, | 86 | /* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is, |
86 | * so can't we drop this in favor of CONFIG_SCHED_SMT? | 87 | * so can't we drop this in favor of CONFIG_SCHED_SMT? |
@@ -89,20 +90,10 @@ void arch_update_cpu_topology(void); | |||
89 | /* Common values for SMT siblings */ | 90 | /* Common values for SMT siblings */ |
90 | #ifndef SD_SIBLING_INIT | 91 | #ifndef SD_SIBLING_INIT |
91 | #define SD_SIBLING_INIT (struct sched_domain) { \ | 92 | #define SD_SIBLING_INIT (struct sched_domain) { \ |
92 | .span = CPU_MASK_NONE, \ | ||
93 | .parent = NULL, \ | ||
94 | .child = NULL, \ | ||
95 | .groups = NULL, \ | ||
96 | .min_interval = 1, \ | 93 | .min_interval = 1, \ |
97 | .max_interval = 2, \ | 94 | .max_interval = 2, \ |
98 | .busy_factor = 64, \ | 95 | .busy_factor = 64, \ |
99 | .imbalance_pct = 110, \ | 96 | .imbalance_pct = 110, \ |
100 | .cache_nice_tries = 0, \ | ||
101 | .busy_idx = 0, \ | ||
102 | .idle_idx = 0, \ | ||
103 | .newidle_idx = 0, \ | ||
104 | .wake_idx = 0, \ | ||
105 | .forkexec_idx = 0, \ | ||
106 | .flags = SD_LOAD_BALANCE \ | 97 | .flags = SD_LOAD_BALANCE \ |
107 | | SD_BALANCE_NEWIDLE \ | 98 | | SD_BALANCE_NEWIDLE \ |
108 | | SD_BALANCE_FORK \ | 99 | | SD_BALANCE_FORK \ |
@@ -112,7 +103,6 @@ void arch_update_cpu_topology(void); | |||
112 | | SD_SHARE_CPUPOWER, \ | 103 | | SD_SHARE_CPUPOWER, \ |
113 | .last_balance = jiffies, \ | 104 | .last_balance = jiffies, \ |
114 | .balance_interval = 1, \ | 105 | .balance_interval = 1, \ |
115 | .nr_balance_failed = 0, \ | ||
116 | } | 106 | } |
117 | #endif | 107 | #endif |
118 | #endif /* CONFIG_SCHED_SMT */ | 108 | #endif /* CONFIG_SCHED_SMT */ |
@@ -121,18 +111,12 @@ void arch_update_cpu_topology(void); | |||
121 | /* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */ | 111 | /* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */ |
122 | #ifndef SD_MC_INIT | 112 | #ifndef SD_MC_INIT |
123 | #define SD_MC_INIT (struct sched_domain) { \ | 113 | #define SD_MC_INIT (struct sched_domain) { \ |
124 | .span = CPU_MASK_NONE, \ | ||
125 | .parent = NULL, \ | ||
126 | .child = NULL, \ | ||
127 | .groups = NULL, \ | ||
128 | .min_interval = 1, \ | 114 | .min_interval = 1, \ |
129 | .max_interval = 4, \ | 115 | .max_interval = 4, \ |
130 | .busy_factor = 64, \ | 116 | .busy_factor = 64, \ |
131 | .imbalance_pct = 125, \ | 117 | .imbalance_pct = 125, \ |
132 | .cache_nice_tries = 1, \ | 118 | .cache_nice_tries = 1, \ |
133 | .busy_idx = 2, \ | 119 | .busy_idx = 2, \ |
134 | .idle_idx = 0, \ | ||
135 | .newidle_idx = 0, \ | ||
136 | .wake_idx = 1, \ | 120 | .wake_idx = 1, \ |
137 | .forkexec_idx = 1, \ | 121 | .forkexec_idx = 1, \ |
138 | .flags = SD_LOAD_BALANCE \ | 122 | .flags = SD_LOAD_BALANCE \ |
@@ -144,7 +128,6 @@ void arch_update_cpu_topology(void); | |||
144 | | BALANCE_FOR_MC_POWER, \ | 128 | | BALANCE_FOR_MC_POWER, \ |
145 | .last_balance = jiffies, \ | 129 | .last_balance = jiffies, \ |
146 | .balance_interval = 1, \ | 130 | .balance_interval = 1, \ |
147 | .nr_balance_failed = 0, \ | ||
148 | } | 131 | } |
149 | #endif | 132 | #endif |
150 | #endif /* CONFIG_SCHED_MC */ | 133 | #endif /* CONFIG_SCHED_MC */ |
@@ -152,10 +135,6 @@ void arch_update_cpu_topology(void); | |||
152 | /* Common values for CPUs */ | 135 | /* Common values for CPUs */ |
153 | #ifndef SD_CPU_INIT | 136 | #ifndef SD_CPU_INIT |
154 | #define SD_CPU_INIT (struct sched_domain) { \ | 137 | #define SD_CPU_INIT (struct sched_domain) { \ |
155 | .span = CPU_MASK_NONE, \ | ||
156 | .parent = NULL, \ | ||
157 | .child = NULL, \ | ||
158 | .groups = NULL, \ | ||
159 | .min_interval = 1, \ | 138 | .min_interval = 1, \ |
160 | .max_interval = 4, \ | 139 | .max_interval = 4, \ |
161 | .busy_factor = 64, \ | 140 | .busy_factor = 64, \ |
@@ -174,16 +153,11 @@ void arch_update_cpu_topology(void); | |||
174 | | BALANCE_FOR_PKG_POWER,\ | 153 | | BALANCE_FOR_PKG_POWER,\ |
175 | .last_balance = jiffies, \ | 154 | .last_balance = jiffies, \ |
176 | .balance_interval = 1, \ | 155 | .balance_interval = 1, \ |
177 | .nr_balance_failed = 0, \ | ||
178 | } | 156 | } |
179 | #endif | 157 | #endif |
180 | 158 | ||
181 | /* sched_domains SD_ALLNODES_INIT for NUMA machines */ | 159 | /* sched_domains SD_ALLNODES_INIT for NUMA machines */ |
182 | #define SD_ALLNODES_INIT (struct sched_domain) { \ | 160 | #define SD_ALLNODES_INIT (struct sched_domain) { \ |
183 | .span = CPU_MASK_NONE, \ | ||
184 | .parent = NULL, \ | ||
185 | .child = NULL, \ | ||
186 | .groups = NULL, \ | ||
187 | .min_interval = 64, \ | 161 | .min_interval = 64, \ |
188 | .max_interval = 64*num_online_cpus(), \ | 162 | .max_interval = 64*num_online_cpus(), \ |
189 | .busy_factor = 128, \ | 163 | .busy_factor = 128, \ |
@@ -191,14 +165,10 @@ void arch_update_cpu_topology(void); | |||
191 | .cache_nice_tries = 1, \ | 165 | .cache_nice_tries = 1, \ |
192 | .busy_idx = 3, \ | 166 | .busy_idx = 3, \ |
193 | .idle_idx = 3, \ | 167 | .idle_idx = 3, \ |
194 | .newidle_idx = 0, /* unused */ \ | ||
195 | .wake_idx = 0, /* unused */ \ | ||
196 | .forkexec_idx = 0, /* unused */ \ | ||
197 | .flags = SD_LOAD_BALANCE \ | 168 | .flags = SD_LOAD_BALANCE \ |
198 | | SD_SERIALIZE, \ | 169 | | SD_SERIALIZE, \ |
199 | .last_balance = jiffies, \ | 170 | .last_balance = jiffies, \ |
200 | .balance_interval = 64, \ | 171 | .balance_interval = 64, \ |
201 | .nr_balance_failed = 0, \ | ||
202 | } | 172 | } |
203 | 173 | ||
204 | #ifdef CONFIG_NUMA | 174 | #ifdef CONFIG_NUMA |
diff --git a/kernel/sched.c b/kernel/sched.c index 9f7980f8ec00..6809178eaa9d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1869,17 +1869,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
1869 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | 1869 | * find_idlest_cpu - find the idlest cpu among the cpus in group. |
1870 | */ | 1870 | */ |
1871 | static int | 1871 | static int |
1872 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | 1872 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, |
1873 | cpumask_t *tmp) | ||
1873 | { | 1874 | { |
1874 | cpumask_t tmp; | ||
1875 | unsigned long load, min_load = ULONG_MAX; | 1875 | unsigned long load, min_load = ULONG_MAX; |
1876 | int idlest = -1; | 1876 | int idlest = -1; |
1877 | int i; | 1877 | int i; |
1878 | 1878 | ||
1879 | /* Traverse only the allowed CPUs */ | 1879 | /* Traverse only the allowed CPUs */ |
1880 | cpus_and(tmp, group->cpumask, p->cpus_allowed); | 1880 | cpus_and(*tmp, group->cpumask, p->cpus_allowed); |
1881 | 1881 | ||
1882 | for_each_cpu_mask(i, tmp) { | 1882 | for_each_cpu_mask(i, *tmp) { |
1883 | load = weighted_cpuload(i); | 1883 | load = weighted_cpuload(i); |
1884 | 1884 | ||
1885 | if (load < min_load || (load == min_load && i == this_cpu)) { | 1885 | if (load < min_load || (load == min_load && i == this_cpu)) { |
@@ -1918,7 +1918,7 @@ static int sched_balance_self(int cpu, int flag) | |||
1918 | } | 1918 | } |
1919 | 1919 | ||
1920 | while (sd) { | 1920 | while (sd) { |
1921 | cpumask_t span; | 1921 | cpumask_t span, tmpmask; |
1922 | struct sched_group *group; | 1922 | struct sched_group *group; |
1923 | int new_cpu, weight; | 1923 | int new_cpu, weight; |
1924 | 1924 | ||
@@ -1934,7 +1934,7 @@ static int sched_balance_self(int cpu, int flag) | |||
1934 | continue; | 1934 | continue; |
1935 | } | 1935 | } |
1936 | 1936 | ||
1937 | new_cpu = find_idlest_cpu(group, t, cpu); | 1937 | new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask); |
1938 | if (new_cpu == -1 || new_cpu == cpu) { | 1938 | if (new_cpu == -1 || new_cpu == cpu) { |
1939 | /* Now try balancing at a lower domain level of cpu */ | 1939 | /* Now try balancing at a lower domain level of cpu */ |
1940 | sd = sd->child; | 1940 | sd = sd->child; |
@@ -2818,7 +2818,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2818 | static struct sched_group * | 2818 | static struct sched_group * |
2819 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 2819 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
2820 | unsigned long *imbalance, enum cpu_idle_type idle, | 2820 | unsigned long *imbalance, enum cpu_idle_type idle, |
2821 | int *sd_idle, cpumask_t *cpus, int *balance) | 2821 | int *sd_idle, const cpumask_t *cpus, int *balance) |
2822 | { | 2822 | { |
2823 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 2823 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
2824 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 2824 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
@@ -3119,7 +3119,7 @@ ret: | |||
3119 | */ | 3119 | */ |
3120 | static struct rq * | 3120 | static struct rq * |
3121 | find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, | 3121 | find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, |
3122 | unsigned long imbalance, cpumask_t *cpus) | 3122 | unsigned long imbalance, const cpumask_t *cpus) |
3123 | { | 3123 | { |
3124 | struct rq *busiest = NULL, *rq; | 3124 | struct rq *busiest = NULL, *rq; |
3125 | unsigned long max_load = 0; | 3125 | unsigned long max_load = 0; |
@@ -3158,15 +3158,16 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, | |||
3158 | */ | 3158 | */ |
3159 | static int load_balance(int this_cpu, struct rq *this_rq, | 3159 | static int load_balance(int this_cpu, struct rq *this_rq, |
3160 | struct sched_domain *sd, enum cpu_idle_type idle, | 3160 | struct sched_domain *sd, enum cpu_idle_type idle, |
3161 | int *balance) | 3161 | int *balance, cpumask_t *cpus) |
3162 | { | 3162 | { |
3163 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 3163 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; |
3164 | struct sched_group *group; | 3164 | struct sched_group *group; |
3165 | unsigned long imbalance; | 3165 | unsigned long imbalance; |
3166 | struct rq *busiest; | 3166 | struct rq *busiest; |
3167 | cpumask_t cpus = CPU_MASK_ALL; | ||
3168 | unsigned long flags; | 3167 | unsigned long flags; |
3169 | 3168 | ||
3169 | cpus_setall(*cpus); | ||
3170 | |||
3170 | /* | 3171 | /* |
3171 | * When power savings policy is enabled for the parent domain, idle | 3172 | * When power savings policy is enabled for the parent domain, idle |
3172 | * sibling can pick up load irrespective of busy siblings. In this case, | 3173 | * sibling can pick up load irrespective of busy siblings. In this case, |
@@ -3181,7 +3182,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3181 | 3182 | ||
3182 | redo: | 3183 | redo: |
3183 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 3184 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
3184 | &cpus, balance); | 3185 | cpus, balance); |
3185 | 3186 | ||
3186 | if (*balance == 0) | 3187 | if (*balance == 0) |
3187 | goto out_balanced; | 3188 | goto out_balanced; |
@@ -3191,7 +3192,7 @@ redo: | |||
3191 | goto out_balanced; | 3192 | goto out_balanced; |
3192 | } | 3193 | } |
3193 | 3194 | ||
3194 | busiest = find_busiest_queue(group, idle, imbalance, &cpus); | 3195 | busiest = find_busiest_queue(group, idle, imbalance, cpus); |
3195 | if (!busiest) { | 3196 | if (!busiest) { |
3196 | schedstat_inc(sd, lb_nobusyq[idle]); | 3197 | schedstat_inc(sd, lb_nobusyq[idle]); |
3197 | goto out_balanced; | 3198 | goto out_balanced; |
@@ -3224,8 +3225,8 @@ redo: | |||
3224 | 3225 | ||
3225 | /* All tasks on this runqueue were pinned by CPU affinity */ | 3226 | /* All tasks on this runqueue were pinned by CPU affinity */ |
3226 | if (unlikely(all_pinned)) { | 3227 | if (unlikely(all_pinned)) { |
3227 | cpu_clear(cpu_of(busiest), cpus); | 3228 | cpu_clear(cpu_of(busiest), *cpus); |
3228 | if (!cpus_empty(cpus)) | 3229 | if (!cpus_empty(*cpus)) |
3229 | goto redo; | 3230 | goto redo; |
3230 | goto out_balanced; | 3231 | goto out_balanced; |
3231 | } | 3232 | } |
@@ -3310,7 +3311,8 @@ out_one_pinned: | |||
3310 | * this_rq is locked. | 3311 | * this_rq is locked. |
3311 | */ | 3312 | */ |
3312 | static int | 3313 | static int |
3313 | load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | 3314 | load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, |
3315 | cpumask_t *cpus) | ||
3314 | { | 3316 | { |
3315 | struct sched_group *group; | 3317 | struct sched_group *group; |
3316 | struct rq *busiest = NULL; | 3318 | struct rq *busiest = NULL; |
@@ -3318,7 +3320,8 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
3318 | int ld_moved = 0; | 3320 | int ld_moved = 0; |
3319 | int sd_idle = 0; | 3321 | int sd_idle = 0; |
3320 | int all_pinned = 0; | 3322 | int all_pinned = 0; |
3321 | cpumask_t cpus = CPU_MASK_ALL; | 3323 | |
3324 | cpus_setall(*cpus); | ||
3322 | 3325 | ||
3323 | /* | 3326 | /* |
3324 | * When power savings policy is enabled for the parent domain, idle | 3327 | * When power savings policy is enabled for the parent domain, idle |
@@ -3333,14 +3336,13 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
3333 | schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); | 3336 | schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); |
3334 | redo: | 3337 | redo: |
3335 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, | 3338 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, |
3336 | &sd_idle, &cpus, NULL); | 3339 | &sd_idle, cpus, NULL); |
3337 | if (!group) { | 3340 | if (!group) { |
3338 | schedstat_inc(sd, lb_nobusyg[CPU_NEWLY_IDLE]); | 3341 | schedstat_inc(sd, lb_nobusyg[CPU_NEWLY_IDLE]); |
3339 | goto out_balanced; | 3342 | goto out_balanced; |
3340 | } | 3343 | } |
3341 | 3344 | ||
3342 | busiest = find_busiest_queue(group, CPU_NEWLY_IDLE, imbalance, | 3345 | busiest = find_busiest_queue(group, CPU_NEWLY_IDLE, imbalance, cpus); |
3343 | &cpus); | ||
3344 | if (!busiest) { | 3346 | if (!busiest) { |
3345 | schedstat_inc(sd, lb_nobusyq[CPU_NEWLY_IDLE]); | 3347 | schedstat_inc(sd, lb_nobusyq[CPU_NEWLY_IDLE]); |
3346 | goto out_balanced; | 3348 | goto out_balanced; |
@@ -3362,8 +3364,8 @@ redo: | |||
3362 | spin_unlock(&busiest->lock); | 3364 | spin_unlock(&busiest->lock); |
3363 | 3365 | ||
3364 | if (unlikely(all_pinned)) { | 3366 | if (unlikely(all_pinned)) { |
3365 | cpu_clear(cpu_of(busiest), cpus); | 3367 | cpu_clear(cpu_of(busiest), *cpus); |
3366 | if (!cpus_empty(cpus)) | 3368 | if (!cpus_empty(*cpus)) |
3367 | goto redo; | 3369 | goto redo; |
3368 | } | 3370 | } |
3369 | } | 3371 | } |
@@ -3397,6 +3399,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3397 | struct sched_domain *sd; | 3399 | struct sched_domain *sd; |
3398 | int pulled_task = -1; | 3400 | int pulled_task = -1; |
3399 | unsigned long next_balance = jiffies + HZ; | 3401 | unsigned long next_balance = jiffies + HZ; |
3402 | cpumask_t tmpmask; | ||
3400 | 3403 | ||
3401 | for_each_domain(this_cpu, sd) { | 3404 | for_each_domain(this_cpu, sd) { |
3402 | unsigned long interval; | 3405 | unsigned long interval; |
@@ -3406,8 +3409,8 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3406 | 3409 | ||
3407 | if (sd->flags & SD_BALANCE_NEWIDLE) | 3410 | if (sd->flags & SD_BALANCE_NEWIDLE) |
3408 | /* If we've pulled tasks over stop searching: */ | 3411 | /* If we've pulled tasks over stop searching: */ |
3409 | pulled_task = load_balance_newidle(this_cpu, | 3412 | pulled_task = load_balance_newidle(this_cpu, this_rq, |
3410 | this_rq, sd); | 3413 | sd, &tmpmask); |
3411 | 3414 | ||
3412 | interval = msecs_to_jiffies(sd->balance_interval); | 3415 | interval = msecs_to_jiffies(sd->balance_interval); |
3413 | if (time_after(next_balance, sd->last_balance + interval)) | 3416 | if (time_after(next_balance, sd->last_balance + interval)) |
@@ -3566,6 +3569,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3566 | /* Earliest time when we have to do rebalance again */ | 3569 | /* Earliest time when we have to do rebalance again */ |
3567 | unsigned long next_balance = jiffies + 60*HZ; | 3570 | unsigned long next_balance = jiffies + 60*HZ; |
3568 | int update_next_balance = 0; | 3571 | int update_next_balance = 0; |
3572 | cpumask_t tmp; | ||
3569 | 3573 | ||
3570 | for_each_domain(cpu, sd) { | 3574 | for_each_domain(cpu, sd) { |
3571 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3575 | if (!(sd->flags & SD_LOAD_BALANCE)) |
@@ -3589,7 +3593,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3589 | } | 3593 | } |
3590 | 3594 | ||
3591 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | 3595 | if (time_after_eq(jiffies, sd->last_balance + interval)) { |
3592 | if (load_balance(cpu, rq, sd, idle, &balance)) { | 3596 | if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) { |
3593 | /* | 3597 | /* |
3594 | * We've pulled tasks over so either we're no | 3598 | * We've pulled tasks over so either we're no |
3595 | * longer idle, or one of our SMT siblings is | 3599 | * longer idle, or one of our SMT siblings is |
@@ -4945,7 +4949,7 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) | |||
4945 | cpuset_cpus_allowed(p, &cpus_allowed); | 4949 | cpuset_cpus_allowed(p, &cpus_allowed); |
4946 | cpus_and(new_mask, new_mask, cpus_allowed); | 4950 | cpus_and(new_mask, new_mask, cpus_allowed); |
4947 | again: | 4951 | again: |
4948 | retval = set_cpus_allowed(p, new_mask); | 4952 | retval = set_cpus_allowed_ptr(p, &new_mask); |
4949 | 4953 | ||
4950 | if (!retval) { | 4954 | if (!retval) { |
4951 | cpuset_cpus_allowed(p, &cpus_allowed); | 4955 | cpuset_cpus_allowed(p, &cpus_allowed); |
@@ -5700,7 +5704,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
5700 | */ | 5704 | */ |
5701 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 5705 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
5702 | { | 5706 | { |
5703 | struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); | 5707 | struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR)); |
5704 | unsigned long flags; | 5708 | unsigned long flags; |
5705 | 5709 | ||
5706 | local_irq_save(flags); | 5710 | local_irq_save(flags); |
@@ -6118,14 +6122,14 @@ EXPORT_SYMBOL(nr_cpu_ids); | |||
6118 | 6122 | ||
6119 | #ifdef CONFIG_SCHED_DEBUG | 6123 | #ifdef CONFIG_SCHED_DEBUG |
6120 | 6124 | ||
6121 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level) | 6125 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
6126 | cpumask_t *groupmask) | ||
6122 | { | 6127 | { |
6123 | struct sched_group *group = sd->groups; | 6128 | struct sched_group *group = sd->groups; |
6124 | cpumask_t groupmask; | ||
6125 | char str[256]; | 6129 | char str[256]; |
6126 | 6130 | ||
6127 | cpulist_scnprintf(str, sizeof(str), sd->span); | 6131 | cpulist_scnprintf(str, sizeof(str), sd->span); |
6128 | cpus_clear(groupmask); | 6132 | cpus_clear(*groupmask); |
6129 | 6133 | ||
6130 | printk(KERN_DEBUG "%*s domain %d: ", level, "", level); | 6134 | printk(KERN_DEBUG "%*s domain %d: ", level, "", level); |
6131 | 6135 | ||
@@ -6169,13 +6173,13 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level) | |||
6169 | break; | 6173 | break; |
6170 | } | 6174 | } |
6171 | 6175 | ||
6172 | if (cpus_intersects(groupmask, group->cpumask)) { | 6176 | if (cpus_intersects(*groupmask, group->cpumask)) { |
6173 | printk(KERN_CONT "\n"); | 6177 | printk(KERN_CONT "\n"); |
6174 | printk(KERN_ERR "ERROR: repeated CPUs\n"); | 6178 | printk(KERN_ERR "ERROR: repeated CPUs\n"); |
6175 | break; | 6179 | break; |
6176 | } | 6180 | } |
6177 | 6181 | ||
6178 | cpus_or(groupmask, groupmask, group->cpumask); | 6182 | cpus_or(*groupmask, *groupmask, group->cpumask); |
6179 | 6183 | ||
6180 | cpulist_scnprintf(str, sizeof(str), group->cpumask); | 6184 | cpulist_scnprintf(str, sizeof(str), group->cpumask); |
6181 | printk(KERN_CONT " %s", str); | 6185 | printk(KERN_CONT " %s", str); |
@@ -6184,10 +6188,10 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level) | |||
6184 | } while (group != sd->groups); | 6188 | } while (group != sd->groups); |
6185 | printk(KERN_CONT "\n"); | 6189 | printk(KERN_CONT "\n"); |
6186 | 6190 | ||
6187 | if (!cpus_equal(sd->span, groupmask)) | 6191 | if (!cpus_equal(sd->span, *groupmask)) |
6188 | printk(KERN_ERR "ERROR: groups don't span domain->span\n"); | 6192 | printk(KERN_ERR "ERROR: groups don't span domain->span\n"); |
6189 | 6193 | ||
6190 | if (sd->parent && !cpus_subset(groupmask, sd->parent->span)) | 6194 | if (sd->parent && !cpus_subset(*groupmask, sd->parent->span)) |
6191 | printk(KERN_ERR "ERROR: parent span is not a superset " | 6195 | printk(KERN_ERR "ERROR: parent span is not a superset " |
6192 | "of domain->span\n"); | 6196 | "of domain->span\n"); |
6193 | return 0; | 6197 | return 0; |
@@ -6195,6 +6199,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level) | |||
6195 | 6199 | ||
6196 | static void sched_domain_debug(struct sched_domain *sd, int cpu) | 6200 | static void sched_domain_debug(struct sched_domain *sd, int cpu) |
6197 | { | 6201 | { |
6202 | cpumask_t *groupmask; | ||
6198 | int level = 0; | 6203 | int level = 0; |
6199 | 6204 | ||
6200 | if (!sd) { | 6205 | if (!sd) { |
@@ -6204,14 +6209,21 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
6204 | 6209 | ||
6205 | printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); | 6210 | printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); |
6206 | 6211 | ||
6212 | groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); | ||
6213 | if (!groupmask) { | ||
6214 | printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); | ||
6215 | return; | ||
6216 | } | ||
6217 | |||
6207 | for (;;) { | 6218 | for (;;) { |
6208 | if (sched_domain_debug_one(sd, cpu, level)) | 6219 | if (sched_domain_debug_one(sd, cpu, level, groupmask)) |
6209 | break; | 6220 | break; |
6210 | level++; | 6221 | level++; |
6211 | sd = sd->parent; | 6222 | sd = sd->parent; |
6212 | if (!sd) | 6223 | if (!sd) |
6213 | break; | 6224 | break; |
6214 | } | 6225 | } |
6226 | kfree(groupmask); | ||
6215 | } | 6227 | } |
6216 | #else | 6228 | #else |
6217 | # define sched_domain_debug(sd, cpu) do { } while (0) | 6229 | # define sched_domain_debug(sd, cpu) do { } while (0) |
@@ -6399,30 +6411,33 @@ __setup("isolcpus=", isolated_cpu_setup); | |||
6399 | * and ->cpu_power to 0. | 6411 | * and ->cpu_power to 0. |
6400 | */ | 6412 | */ |
6401 | static void | 6413 | static void |
6402 | init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map, | 6414 | init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, |
6403 | int (*group_fn)(int cpu, const cpumask_t *cpu_map, | 6415 | int (*group_fn)(int cpu, const cpumask_t *cpu_map, |
6404 | struct sched_group **sg)) | 6416 | struct sched_group **sg, |
6417 | cpumask_t *tmpmask), | ||
6418 | cpumask_t *covered, cpumask_t *tmpmask) | ||
6405 | { | 6419 | { |
6406 | struct sched_group *first = NULL, *last = NULL; | 6420 | struct sched_group *first = NULL, *last = NULL; |
6407 | cpumask_t covered = CPU_MASK_NONE; | ||
6408 | int i; | 6421 | int i; |
6409 | 6422 | ||
6410 | for_each_cpu_mask(i, span) { | 6423 | cpus_clear(*covered); |
6424 | |||
6425 | for_each_cpu_mask(i, *span) { | ||
6411 | struct sched_group *sg; | 6426 | struct sched_group *sg; |
6412 | int group = group_fn(i, cpu_map, &sg); | 6427 | int group = group_fn(i, cpu_map, &sg, tmpmask); |
6413 | int j; | 6428 | int j; |
6414 | 6429 | ||
6415 | if (cpu_isset(i, covered)) | 6430 | if (cpu_isset(i, *covered)) |
6416 | continue; | 6431 | continue; |
6417 | 6432 | ||
6418 | sg->cpumask = CPU_MASK_NONE; | 6433 | cpus_clear(sg->cpumask); |
6419 | sg->__cpu_power = 0; | 6434 | sg->__cpu_power = 0; |
6420 | 6435 | ||
6421 | for_each_cpu_mask(j, span) { | 6436 | for_each_cpu_mask(j, *span) { |
6422 | if (group_fn(j, cpu_map, NULL) != group) | 6437 | if (group_fn(j, cpu_map, NULL, tmpmask) != group) |
6423 | continue; | 6438 | continue; |
6424 | 6439 | ||
6425 | cpu_set(j, covered); | 6440 | cpu_set(j, *covered); |
6426 | cpu_set(j, sg->cpumask); | 6441 | cpu_set(j, sg->cpumask); |
6427 | } | 6442 | } |
6428 | if (!first) | 6443 | if (!first) |
@@ -6520,7 +6535,8 @@ static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | |||
6520 | static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); | 6535 | static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); |
6521 | 6536 | ||
6522 | static int | 6537 | static int |
6523 | cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) | 6538 | cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, |
6539 | cpumask_t *unused) | ||
6524 | { | 6540 | { |
6525 | if (sg) | 6541 | if (sg) |
6526 | *sg = &per_cpu(sched_group_cpus, cpu); | 6542 | *sg = &per_cpu(sched_group_cpus, cpu); |
@@ -6538,19 +6554,22 @@ static DEFINE_PER_CPU(struct sched_group, sched_group_core); | |||
6538 | 6554 | ||
6539 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | 6555 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) |
6540 | static int | 6556 | static int |
6541 | cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) | 6557 | cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, |
6558 | cpumask_t *mask) | ||
6542 | { | 6559 | { |
6543 | int group; | 6560 | int group; |
6544 | cpumask_t mask = per_cpu(cpu_sibling_map, cpu); | 6561 | |
6545 | cpus_and(mask, mask, *cpu_map); | 6562 | *mask = per_cpu(cpu_sibling_map, cpu); |
6546 | group = first_cpu(mask); | 6563 | cpus_and(*mask, *mask, *cpu_map); |
6564 | group = first_cpu(*mask); | ||
6547 | if (sg) | 6565 | if (sg) |
6548 | *sg = &per_cpu(sched_group_core, group); | 6566 | *sg = &per_cpu(sched_group_core, group); |
6549 | return group; | 6567 | return group; |
6550 | } | 6568 | } |
6551 | #elif defined(CONFIG_SCHED_MC) | 6569 | #elif defined(CONFIG_SCHED_MC) |
6552 | static int | 6570 | static int |
6553 | cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) | 6571 | cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, |
6572 | cpumask_t *unused) | ||
6554 | { | 6573 | { |
6555 | if (sg) | 6574 | if (sg) |
6556 | *sg = &per_cpu(sched_group_core, cpu); | 6575 | *sg = &per_cpu(sched_group_core, cpu); |
@@ -6562,17 +6581,18 @@ static DEFINE_PER_CPU(struct sched_domain, phys_domains); | |||
6562 | static DEFINE_PER_CPU(struct sched_group, sched_group_phys); | 6581 | static DEFINE_PER_CPU(struct sched_group, sched_group_phys); |
6563 | 6582 | ||
6564 | static int | 6583 | static int |
6565 | cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) | 6584 | cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, |
6585 | cpumask_t *mask) | ||
6566 | { | 6586 | { |
6567 | int group; | 6587 | int group; |
6568 | #ifdef CONFIG_SCHED_MC | 6588 | #ifdef CONFIG_SCHED_MC |
6569 | cpumask_t mask = cpu_coregroup_map(cpu); | 6589 | *mask = cpu_coregroup_map(cpu); |
6570 | cpus_and(mask, mask, *cpu_map); | 6590 | cpus_and(*mask, *mask, *cpu_map); |
6571 | group = first_cpu(mask); | 6591 | group = first_cpu(*mask); |
6572 | #elif defined(CONFIG_SCHED_SMT) | 6592 | #elif defined(CONFIG_SCHED_SMT) |
6573 | cpumask_t mask = per_cpu(cpu_sibling_map, cpu); | 6593 | *mask = per_cpu(cpu_sibling_map, cpu); |
6574 | cpus_and(mask, mask, *cpu_map); | 6594 | cpus_and(*mask, *mask, *cpu_map); |
6575 | group = first_cpu(mask); | 6595 | group = first_cpu(*mask); |
6576 | #else | 6596 | #else |
6577 | group = cpu; | 6597 | group = cpu; |
6578 | #endif | 6598 | #endif |
@@ -6594,13 +6614,13 @@ static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); | |||
6594 | static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); | 6614 | static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); |
6595 | 6615 | ||
6596 | static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, | 6616 | static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, |
6597 | struct sched_group **sg) | 6617 | struct sched_group **sg, cpumask_t *nodemask) |
6598 | { | 6618 | { |
6599 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(cpu)); | ||
6600 | int group; | 6619 | int group; |
6601 | 6620 | ||
6602 | cpus_and(nodemask, nodemask, *cpu_map); | 6621 | *nodemask = node_to_cpumask(cpu_to_node(cpu)); |
6603 | group = first_cpu(nodemask); | 6622 | cpus_and(*nodemask, *nodemask, *cpu_map); |
6623 | group = first_cpu(*nodemask); | ||
6604 | 6624 | ||
6605 | if (sg) | 6625 | if (sg) |
6606 | *sg = &per_cpu(sched_group_allnodes, group); | 6626 | *sg = &per_cpu(sched_group_allnodes, group); |
@@ -6636,7 +6656,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
6636 | 6656 | ||
6637 | #ifdef CONFIG_NUMA | 6657 | #ifdef CONFIG_NUMA |
6638 | /* Free memory allocated for various sched_group structures */ | 6658 | /* Free memory allocated for various sched_group structures */ |
6639 | static void free_sched_groups(const cpumask_t *cpu_map) | 6659 | static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) |
6640 | { | 6660 | { |
6641 | int cpu, i; | 6661 | int cpu, i; |
6642 | 6662 | ||
@@ -6648,11 +6668,11 @@ static void free_sched_groups(const cpumask_t *cpu_map) | |||
6648 | continue; | 6668 | continue; |
6649 | 6669 | ||
6650 | for (i = 0; i < MAX_NUMNODES; i++) { | 6670 | for (i = 0; i < MAX_NUMNODES; i++) { |
6651 | cpumask_t nodemask = node_to_cpumask(i); | ||
6652 | struct sched_group *oldsg, *sg = sched_group_nodes[i]; | 6671 | struct sched_group *oldsg, *sg = sched_group_nodes[i]; |
6653 | 6672 | ||
6654 | cpus_and(nodemask, nodemask, *cpu_map); | 6673 | *nodemask = node_to_cpumask(i); |
6655 | if (cpus_empty(nodemask)) | 6674 | cpus_and(*nodemask, *nodemask, *cpu_map); |
6675 | if (cpus_empty(*nodemask)) | ||
6656 | continue; | 6676 | continue; |
6657 | 6677 | ||
6658 | if (sg == NULL) | 6678 | if (sg == NULL) |
@@ -6670,7 +6690,7 @@ next_sg: | |||
6670 | } | 6690 | } |
6671 | } | 6691 | } |
6672 | #else | 6692 | #else |
6673 | static void free_sched_groups(const cpumask_t *cpu_map) | 6693 | static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) |
6674 | { | 6694 | { |
6675 | } | 6695 | } |
6676 | #endif | 6696 | #endif |
@@ -6728,6 +6748,65 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
6728 | } | 6748 | } |
6729 | 6749 | ||
6730 | /* | 6750 | /* |
6751 | * Initializers for schedule domains | ||
6752 | * Non-inlined to reduce accumulated stack pressure in build_sched_domains() | ||
6753 | */ | ||
6754 | |||
6755 | #define SD_INIT(sd, type) sd_init_##type(sd) | ||
6756 | #define SD_INIT_FUNC(type) \ | ||
6757 | static noinline void sd_init_##type(struct sched_domain *sd) \ | ||
6758 | { \ | ||
6759 | memset(sd, 0, sizeof(*sd)); \ | ||
6760 | *sd = SD_##type##_INIT; \ | ||
6761 | } | ||
6762 | |||
6763 | SD_INIT_FUNC(CPU) | ||
6764 | #ifdef CONFIG_NUMA | ||
6765 | SD_INIT_FUNC(ALLNODES) | ||
6766 | SD_INIT_FUNC(NODE) | ||
6767 | #endif | ||
6768 | #ifdef CONFIG_SCHED_SMT | ||
6769 | SD_INIT_FUNC(SIBLING) | ||
6770 | #endif | ||
6771 | #ifdef CONFIG_SCHED_MC | ||
6772 | SD_INIT_FUNC(MC) | ||
6773 | #endif | ||
6774 | |||
6775 | /* | ||
6776 | * To minimize stack usage kmalloc room for cpumasks and share the | ||
6777 | * space as the usage in build_sched_domains() dictates. Used only | ||
6778 | * if the amount of space is significant. | ||
6779 | */ | ||
6780 | struct allmasks { | ||
6781 | cpumask_t tmpmask; /* make this one first */ | ||
6782 | union { | ||
6783 | cpumask_t nodemask; | ||
6784 | cpumask_t this_sibling_map; | ||
6785 | cpumask_t this_core_map; | ||
6786 | }; | ||
6787 | cpumask_t send_covered; | ||
6788 | |||
6789 | #ifdef CONFIG_NUMA | ||
6790 | cpumask_t domainspan; | ||
6791 | cpumask_t covered; | ||
6792 | cpumask_t notcovered; | ||
6793 | #endif | ||
6794 | }; | ||
6795 | |||
6796 | #if NR_CPUS > 128 | ||
6797 | #define SCHED_CPUMASK_ALLOC 1 | ||
6798 | #define SCHED_CPUMASK_FREE(v) kfree(v) | ||
6799 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks *v | ||
6800 | #else | ||
6801 | #define SCHED_CPUMASK_ALLOC 0 | ||
6802 | #define SCHED_CPUMASK_FREE(v) | ||
6803 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v | ||
6804 | #endif | ||
6805 | |||
6806 | #define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ | ||
6807 | ((unsigned long)(a) + offsetof(struct allmasks, v)) | ||
6808 | |||
6809 | /* | ||
6731 | * Build sched domains for a given set of cpus and attach the sched domains | 6810 | * Build sched domains for a given set of cpus and attach the sched domains |
6732 | * to the individual cpus | 6811 | * to the individual cpus |
6733 | */ | 6812 | */ |
@@ -6735,6 +6814,8 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6735 | { | 6814 | { |
6736 | int i; | 6815 | int i; |
6737 | struct root_domain *rd; | 6816 | struct root_domain *rd; |
6817 | SCHED_CPUMASK_DECLARE(allmasks); | ||
6818 | cpumask_t *tmpmask; | ||
6738 | #ifdef CONFIG_NUMA | 6819 | #ifdef CONFIG_NUMA |
6739 | struct sched_group **sched_group_nodes = NULL; | 6820 | struct sched_group **sched_group_nodes = NULL; |
6740 | int sd_allnodes = 0; | 6821 | int sd_allnodes = 0; |
@@ -6748,38 +6829,60 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6748 | printk(KERN_WARNING "Can not alloc sched group node list\n"); | 6829 | printk(KERN_WARNING "Can not alloc sched group node list\n"); |
6749 | return -ENOMEM; | 6830 | return -ENOMEM; |
6750 | } | 6831 | } |
6751 | sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; | ||
6752 | #endif | 6832 | #endif |
6753 | 6833 | ||
6754 | rd = alloc_rootdomain(); | 6834 | rd = alloc_rootdomain(); |
6755 | if (!rd) { | 6835 | if (!rd) { |
6756 | printk(KERN_WARNING "Cannot alloc root domain\n"); | 6836 | printk(KERN_WARNING "Cannot alloc root domain\n"); |
6837 | #ifdef CONFIG_NUMA | ||
6838 | kfree(sched_group_nodes); | ||
6839 | #endif | ||
6757 | return -ENOMEM; | 6840 | return -ENOMEM; |
6758 | } | 6841 | } |
6759 | 6842 | ||
6843 | #if SCHED_CPUMASK_ALLOC | ||
6844 | /* get space for all scratch cpumask variables */ | ||
6845 | allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL); | ||
6846 | if (!allmasks) { | ||
6847 | printk(KERN_WARNING "Cannot alloc cpumask array\n"); | ||
6848 | kfree(rd); | ||
6849 | #ifdef CONFIG_NUMA | ||
6850 | kfree(sched_group_nodes); | ||
6851 | #endif | ||
6852 | return -ENOMEM; | ||
6853 | } | ||
6854 | #endif | ||
6855 | tmpmask = (cpumask_t *)allmasks; | ||
6856 | |||
6857 | |||
6858 | #ifdef CONFIG_NUMA | ||
6859 | sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; | ||
6860 | #endif | ||
6861 | |||
6760 | /* | 6862 | /* |
6761 | * Set up domains for cpus specified by the cpu_map. | 6863 | * Set up domains for cpus specified by the cpu_map. |
6762 | */ | 6864 | */ |
6763 | for_each_cpu_mask(i, *cpu_map) { | 6865 | for_each_cpu_mask(i, *cpu_map) { |
6764 | struct sched_domain *sd = NULL, *p; | 6866 | struct sched_domain *sd = NULL, *p; |
6765 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); | 6867 | SCHED_CPUMASK_VAR(nodemask, allmasks); |
6766 | 6868 | ||
6767 | cpus_and(nodemask, nodemask, *cpu_map); | 6869 | *nodemask = node_to_cpumask(cpu_to_node(i)); |
6870 | cpus_and(*nodemask, *nodemask, *cpu_map); | ||
6768 | 6871 | ||
6769 | #ifdef CONFIG_NUMA | 6872 | #ifdef CONFIG_NUMA |
6770 | if (cpus_weight(*cpu_map) > | 6873 | if (cpus_weight(*cpu_map) > |
6771 | SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { | 6874 | SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { |
6772 | sd = &per_cpu(allnodes_domains, i); | 6875 | sd = &per_cpu(allnodes_domains, i); |
6773 | *sd = SD_ALLNODES_INIT; | 6876 | SD_INIT(sd, ALLNODES); |
6774 | sd->span = *cpu_map; | 6877 | sd->span = *cpu_map; |
6775 | cpu_to_allnodes_group(i, cpu_map, &sd->groups); | 6878 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); |
6776 | p = sd; | 6879 | p = sd; |
6777 | sd_allnodes = 1; | 6880 | sd_allnodes = 1; |
6778 | } else | 6881 | } else |
6779 | p = NULL; | 6882 | p = NULL; |
6780 | 6883 | ||
6781 | sd = &per_cpu(node_domains, i); | 6884 | sd = &per_cpu(node_domains, i); |
6782 | *sd = SD_NODE_INIT; | 6885 | SD_INIT(sd, NODE); |
6783 | sd->span = sched_domain_node_span(cpu_to_node(i)); | 6886 | sd->span = sched_domain_node_span(cpu_to_node(i)); |
6784 | sd->parent = p; | 6887 | sd->parent = p; |
6785 | if (p) | 6888 | if (p) |
@@ -6789,94 +6892,114 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6789 | 6892 | ||
6790 | p = sd; | 6893 | p = sd; |
6791 | sd = &per_cpu(phys_domains, i); | 6894 | sd = &per_cpu(phys_domains, i); |
6792 | *sd = SD_CPU_INIT; | 6895 | SD_INIT(sd, CPU); |
6793 | sd->span = nodemask; | 6896 | sd->span = *nodemask; |
6794 | sd->parent = p; | 6897 | sd->parent = p; |
6795 | if (p) | 6898 | if (p) |
6796 | p->child = sd; | 6899 | p->child = sd; |
6797 | cpu_to_phys_group(i, cpu_map, &sd->groups); | 6900 | cpu_to_phys_group(i, cpu_map, &sd->groups, tmpmask); |
6798 | 6901 | ||
6799 | #ifdef CONFIG_SCHED_MC | 6902 | #ifdef CONFIG_SCHED_MC |
6800 | p = sd; | 6903 | p = sd; |
6801 | sd = &per_cpu(core_domains, i); | 6904 | sd = &per_cpu(core_domains, i); |
6802 | *sd = SD_MC_INIT; | 6905 | SD_INIT(sd, MC); |
6803 | sd->span = cpu_coregroup_map(i); | 6906 | sd->span = cpu_coregroup_map(i); |
6804 | cpus_and(sd->span, sd->span, *cpu_map); | 6907 | cpus_and(sd->span, sd->span, *cpu_map); |
6805 | sd->parent = p; | 6908 | sd->parent = p; |
6806 | p->child = sd; | 6909 | p->child = sd; |
6807 | cpu_to_core_group(i, cpu_map, &sd->groups); | 6910 | cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); |
6808 | #endif | 6911 | #endif |
6809 | 6912 | ||
6810 | #ifdef CONFIG_SCHED_SMT | 6913 | #ifdef CONFIG_SCHED_SMT |
6811 | p = sd; | 6914 | p = sd; |
6812 | sd = &per_cpu(cpu_domains, i); | 6915 | sd = &per_cpu(cpu_domains, i); |
6813 | *sd = SD_SIBLING_INIT; | 6916 | SD_INIT(sd, SIBLING); |
6814 | sd->span = per_cpu(cpu_sibling_map, i); | 6917 | sd->span = per_cpu(cpu_sibling_map, i); |
6815 | cpus_and(sd->span, sd->span, *cpu_map); | 6918 | cpus_and(sd->span, sd->span, *cpu_map); |
6816 | sd->parent = p; | 6919 | sd->parent = p; |
6817 | p->child = sd; | 6920 | p->child = sd; |
6818 | cpu_to_cpu_group(i, cpu_map, &sd->groups); | 6921 | cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); |
6819 | #endif | 6922 | #endif |
6820 | } | 6923 | } |
6821 | 6924 | ||
6822 | #ifdef CONFIG_SCHED_SMT | 6925 | #ifdef CONFIG_SCHED_SMT |
6823 | /* Set up CPU (sibling) groups */ | 6926 | /* Set up CPU (sibling) groups */ |
6824 | for_each_cpu_mask(i, *cpu_map) { | 6927 | for_each_cpu_mask(i, *cpu_map) { |
6825 | cpumask_t this_sibling_map = per_cpu(cpu_sibling_map, i); | 6928 | SCHED_CPUMASK_VAR(this_sibling_map, allmasks); |
6826 | cpus_and(this_sibling_map, this_sibling_map, *cpu_map); | 6929 | SCHED_CPUMASK_VAR(send_covered, allmasks); |
6827 | if (i != first_cpu(this_sibling_map)) | 6930 | |
6931 | *this_sibling_map = per_cpu(cpu_sibling_map, i); | ||
6932 | cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map); | ||
6933 | if (i != first_cpu(*this_sibling_map)) | ||
6828 | continue; | 6934 | continue; |
6829 | 6935 | ||
6830 | init_sched_build_groups(this_sibling_map, cpu_map, | 6936 | init_sched_build_groups(this_sibling_map, cpu_map, |
6831 | &cpu_to_cpu_group); | 6937 | &cpu_to_cpu_group, |
6938 | send_covered, tmpmask); | ||
6832 | } | 6939 | } |
6833 | #endif | 6940 | #endif |
6834 | 6941 | ||
6835 | #ifdef CONFIG_SCHED_MC | 6942 | #ifdef CONFIG_SCHED_MC |
6836 | /* Set up multi-core groups */ | 6943 | /* Set up multi-core groups */ |
6837 | for_each_cpu_mask(i, *cpu_map) { | 6944 | for_each_cpu_mask(i, *cpu_map) { |
6838 | cpumask_t this_core_map = cpu_coregroup_map(i); | 6945 | SCHED_CPUMASK_VAR(this_core_map, allmasks); |
6839 | cpus_and(this_core_map, this_core_map, *cpu_map); | 6946 | SCHED_CPUMASK_VAR(send_covered, allmasks); |
6840 | if (i != first_cpu(this_core_map)) | 6947 | |
6948 | *this_core_map = cpu_coregroup_map(i); | ||
6949 | cpus_and(*this_core_map, *this_core_map, *cpu_map); | ||
6950 | if (i != first_cpu(*this_core_map)) | ||
6841 | continue; | 6951 | continue; |
6952 | |||
6842 | init_sched_build_groups(this_core_map, cpu_map, | 6953 | init_sched_build_groups(this_core_map, cpu_map, |
6843 | &cpu_to_core_group); | 6954 | &cpu_to_core_group, |
6955 | send_covered, tmpmask); | ||
6844 | } | 6956 | } |
6845 | #endif | 6957 | #endif |
6846 | 6958 | ||
6847 | /* Set up physical groups */ | 6959 | /* Set up physical groups */ |
6848 | for (i = 0; i < MAX_NUMNODES; i++) { | 6960 | for (i = 0; i < MAX_NUMNODES; i++) { |
6849 | cpumask_t nodemask = node_to_cpumask(i); | 6961 | SCHED_CPUMASK_VAR(nodemask, allmasks); |
6962 | SCHED_CPUMASK_VAR(send_covered, allmasks); | ||
6850 | 6963 | ||
6851 | cpus_and(nodemask, nodemask, *cpu_map); | 6964 | *nodemask = node_to_cpumask(i); |
6852 | if (cpus_empty(nodemask)) | 6965 | cpus_and(*nodemask, *nodemask, *cpu_map); |
6966 | if (cpus_empty(*nodemask)) | ||
6853 | continue; | 6967 | continue; |
6854 | 6968 | ||
6855 | init_sched_build_groups(nodemask, cpu_map, &cpu_to_phys_group); | 6969 | init_sched_build_groups(nodemask, cpu_map, |
6970 | &cpu_to_phys_group, | ||
6971 | send_covered, tmpmask); | ||
6856 | } | 6972 | } |
6857 | 6973 | ||
6858 | #ifdef CONFIG_NUMA | 6974 | #ifdef CONFIG_NUMA |
6859 | /* Set up node groups */ | 6975 | /* Set up node groups */ |
6860 | if (sd_allnodes) | 6976 | if (sd_allnodes) { |
6861 | init_sched_build_groups(*cpu_map, cpu_map, | 6977 | SCHED_CPUMASK_VAR(send_covered, allmasks); |
6862 | &cpu_to_allnodes_group); | 6978 | |
6979 | init_sched_build_groups(cpu_map, cpu_map, | ||
6980 | &cpu_to_allnodes_group, | ||
6981 | send_covered, tmpmask); | ||
6982 | } | ||
6863 | 6983 | ||
6864 | for (i = 0; i < MAX_NUMNODES; i++) { | 6984 | for (i = 0; i < MAX_NUMNODES; i++) { |
6865 | /* Set up node groups */ | 6985 | /* Set up node groups */ |
6866 | struct sched_group *sg, *prev; | 6986 | struct sched_group *sg, *prev; |
6867 | cpumask_t nodemask = node_to_cpumask(i); | 6987 | SCHED_CPUMASK_VAR(nodemask, allmasks); |
6868 | cpumask_t domainspan; | 6988 | SCHED_CPUMASK_VAR(domainspan, allmasks); |
6869 | cpumask_t covered = CPU_MASK_NONE; | 6989 | SCHED_CPUMASK_VAR(covered, allmasks); |
6870 | int j; | 6990 | int j; |
6871 | 6991 | ||
6872 | cpus_and(nodemask, nodemask, *cpu_map); | 6992 | *nodemask = node_to_cpumask(i); |
6873 | if (cpus_empty(nodemask)) { | 6993 | cpus_clear(*covered); |
6994 | |||
6995 | cpus_and(*nodemask, *nodemask, *cpu_map); | ||
6996 | if (cpus_empty(*nodemask)) { | ||
6874 | sched_group_nodes[i] = NULL; | 6997 | sched_group_nodes[i] = NULL; |
6875 | continue; | 6998 | continue; |
6876 | } | 6999 | } |
6877 | 7000 | ||
6878 | domainspan = sched_domain_node_span(i); | 7001 | *domainspan = sched_domain_node_span(i); |
6879 | cpus_and(domainspan, domainspan, *cpu_map); | 7002 | cpus_and(*domainspan, *domainspan, *cpu_map); |
6880 | 7003 | ||
6881 | sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); | 7004 | sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); |
6882 | if (!sg) { | 7005 | if (!sg) { |
@@ -6885,31 +7008,31 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6885 | goto error; | 7008 | goto error; |
6886 | } | 7009 | } |
6887 | sched_group_nodes[i] = sg; | 7010 | sched_group_nodes[i] = sg; |
6888 | for_each_cpu_mask(j, nodemask) { | 7011 | for_each_cpu_mask(j, *nodemask) { |
6889 | struct sched_domain *sd; | 7012 | struct sched_domain *sd; |
6890 | 7013 | ||
6891 | sd = &per_cpu(node_domains, j); | 7014 | sd = &per_cpu(node_domains, j); |
6892 | sd->groups = sg; | 7015 | sd->groups = sg; |
6893 | } | 7016 | } |
6894 | sg->__cpu_power = 0; | 7017 | sg->__cpu_power = 0; |
6895 | sg->cpumask = nodemask; | 7018 | sg->cpumask = *nodemask; |
6896 | sg->next = sg; | 7019 | sg->next = sg; |
6897 | cpus_or(covered, covered, nodemask); | 7020 | cpus_or(*covered, *covered, *nodemask); |
6898 | prev = sg; | 7021 | prev = sg; |
6899 | 7022 | ||
6900 | for (j = 0; j < MAX_NUMNODES; j++) { | 7023 | for (j = 0; j < MAX_NUMNODES; j++) { |
6901 | cpumask_t tmp, notcovered; | 7024 | SCHED_CPUMASK_VAR(notcovered, allmasks); |
6902 | int n = (i + j) % MAX_NUMNODES; | 7025 | int n = (i + j) % MAX_NUMNODES; |
6903 | node_to_cpumask_ptr(pnodemask, n); | 7026 | node_to_cpumask_ptr(pnodemask, n); |
6904 | 7027 | ||
6905 | cpus_complement(notcovered, covered); | 7028 | cpus_complement(*notcovered, *covered); |
6906 | cpus_and(tmp, notcovered, *cpu_map); | 7029 | cpus_and(*tmpmask, *notcovered, *cpu_map); |
6907 | cpus_and(tmp, tmp, domainspan); | 7030 | cpus_and(*tmpmask, *tmpmask, *domainspan); |
6908 | if (cpus_empty(tmp)) | 7031 | if (cpus_empty(*tmpmask)) |
6909 | break; | 7032 | break; |
6910 | 7033 | ||
6911 | cpus_and(tmp, tmp, *pnodemask); | 7034 | cpus_and(*tmpmask, *tmpmask, *pnodemask); |
6912 | if (cpus_empty(tmp)) | 7035 | if (cpus_empty(*tmpmask)) |
6913 | continue; | 7036 | continue; |
6914 | 7037 | ||
6915 | sg = kmalloc_node(sizeof(struct sched_group), | 7038 | sg = kmalloc_node(sizeof(struct sched_group), |
@@ -6920,9 +7043,9 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6920 | goto error; | 7043 | goto error; |
6921 | } | 7044 | } |
6922 | sg->__cpu_power = 0; | 7045 | sg->__cpu_power = 0; |
6923 | sg->cpumask = tmp; | 7046 | sg->cpumask = *tmpmask; |
6924 | sg->next = prev->next; | 7047 | sg->next = prev->next; |
6925 | cpus_or(covered, covered, tmp); | 7048 | cpus_or(*covered, *covered, *tmpmask); |
6926 | prev->next = sg; | 7049 | prev->next = sg; |
6927 | prev = sg; | 7050 | prev = sg; |
6928 | } | 7051 | } |
@@ -6958,7 +7081,8 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6958 | if (sd_allnodes) { | 7081 | if (sd_allnodes) { |
6959 | struct sched_group *sg; | 7082 | struct sched_group *sg; |
6960 | 7083 | ||
6961 | cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg); | 7084 | cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg, |
7085 | tmpmask); | ||
6962 | init_numa_sched_groups_power(sg); | 7086 | init_numa_sched_groups_power(sg); |
6963 | } | 7087 | } |
6964 | #endif | 7088 | #endif |
@@ -6976,11 +7100,13 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6976 | cpu_attach_domain(sd, rd, i); | 7100 | cpu_attach_domain(sd, rd, i); |
6977 | } | 7101 | } |
6978 | 7102 | ||
7103 | SCHED_CPUMASK_FREE((void *)allmasks); | ||
6979 | return 0; | 7104 | return 0; |
6980 | 7105 | ||
6981 | #ifdef CONFIG_NUMA | 7106 | #ifdef CONFIG_NUMA |
6982 | error: | 7107 | error: |
6983 | free_sched_groups(cpu_map); | 7108 | free_sched_groups(cpu_map, tmpmask); |
7109 | SCHED_CPUMASK_FREE((void *)allmasks); | ||
6984 | return -ENOMEM; | 7110 | return -ENOMEM; |
6985 | #endif | 7111 | #endif |
6986 | } | 7112 | } |
@@ -7020,9 +7146,10 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map) | |||
7020 | return err; | 7146 | return err; |
7021 | } | 7147 | } |
7022 | 7148 | ||
7023 | static void arch_destroy_sched_domains(const cpumask_t *cpu_map) | 7149 | static void arch_destroy_sched_domains(const cpumask_t *cpu_map, |
7150 | cpumask_t *tmpmask) | ||
7024 | { | 7151 | { |
7025 | free_sched_groups(cpu_map); | 7152 | free_sched_groups(cpu_map, tmpmask); |
7026 | } | 7153 | } |
7027 | 7154 | ||
7028 | /* | 7155 | /* |
@@ -7031,6 +7158,7 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map) | |||
7031 | */ | 7158 | */ |
7032 | static void detach_destroy_domains(const cpumask_t *cpu_map) | 7159 | static void detach_destroy_domains(const cpumask_t *cpu_map) |
7033 | { | 7160 | { |
7161 | cpumask_t tmpmask; | ||
7034 | int i; | 7162 | int i; |
7035 | 7163 | ||
7036 | unregister_sched_domain_sysctl(); | 7164 | unregister_sched_domain_sysctl(); |
@@ -7038,7 +7166,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) | |||
7038 | for_each_cpu_mask(i, *cpu_map) | 7166 | for_each_cpu_mask(i, *cpu_map) |
7039 | cpu_attach_domain(NULL, &def_root_domain, i); | 7167 | cpu_attach_domain(NULL, &def_root_domain, i); |
7040 | synchronize_sched(); | 7168 | synchronize_sched(); |
7041 | arch_destroy_sched_domains(cpu_map); | 7169 | arch_destroy_sched_domains(cpu_map, &tmpmask); |
7042 | } | 7170 | } |
7043 | 7171 | ||
7044 | /* | 7172 | /* |
@@ -7246,7 +7374,7 @@ void __init sched_init_smp(void) | |||
7246 | hotcpu_notifier(update_sched_domains, 0); | 7374 | hotcpu_notifier(update_sched_domains, 0); |
7247 | 7375 | ||
7248 | /* Move init over to a non-isolated CPU */ | 7376 | /* Move init over to a non-isolated CPU */ |
7249 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) | 7377 | if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) |
7250 | BUG(); | 7378 | BUG(); |
7251 | sched_init_granularity(); | 7379 | sched_init_granularity(); |
7252 | } | 7380 | } |