aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/domain.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/kernel/domain.c')
-rw-r--r--arch/ia64/kernel/domain.c382
1 files changed, 382 insertions, 0 deletions
diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c
new file mode 100644
index 000000000000..fe532c970438
--- /dev/null
+++ b/arch/ia64/kernel/domain.c
@@ -0,0 +1,382 @@
1/*
2 * arch/ia64/kernel/domain.c
3 * Architecture specific sched-domains builder.
4 *
5 * Copyright (C) 2004 Jesse Barnes
6 * Copyright (C) 2004 Silicon Graphics, Inc.
7 */
8
9#include <linux/sched.h>
10#include <linux/percpu.h>
11#include <linux/slab.h>
12#include <linux/cpumask.h>
13#include <linux/init.h>
14#include <linux/topology.h>
15#include <linux/nodemask.h>
16
17#define SD_NODES_PER_DOMAIN 6
18
19#ifdef CONFIG_NUMA
20/**
21 * find_next_best_node - find the next node to include in a sched_domain
22 * @node: node whose sched_domain we're building
23 * @used_nodes: nodes already in the sched_domain
24 *
25 * Find the next node to include in a given scheduling domain. Simply
26 * finds the closest node not already in the @used_nodes map.
27 *
28 * Should use nodemask_t.
29 */
30static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
31{
32 int i, n, val, min_val, best_node = 0;
33
34 min_val = INT_MAX;
35
36 for (i = 0; i < MAX_NUMNODES; i++) {
37 /* Start at @node */
38 n = (node + i) % MAX_NUMNODES;
39
40 if (!nr_cpus_node(n))
41 continue;
42
43 /* Skip already used nodes */
44 if (test_bit(n, used_nodes))
45 continue;
46
47 /* Simple min distance search */
48 val = node_distance(node, n);
49
50 if (val < min_val) {
51 min_val = val;
52 best_node = n;
53 }
54 }
55
56 set_bit(best_node, used_nodes);
57 return best_node;
58}
59
60/**
61 * sched_domain_node_span - get a cpumask for a node's sched_domain
62 * @node: node whose cpumask we're constructing
63 * @size: number of nodes to include in this span
64 *
65 * Given a node, construct a good cpumask for its sched_domain to span. It
66 * should be one that prevents unnecessary balancing, but also spreads tasks
67 * out optimally.
68 */
69static cpumask_t __devinit sched_domain_node_span(int node)
70{
71 int i;
72 cpumask_t span, nodemask;
73 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
74
75 cpus_clear(span);
76 bitmap_zero(used_nodes, MAX_NUMNODES);
77
78 nodemask = node_to_cpumask(node);
79 cpus_or(span, span, nodemask);
80 set_bit(node, used_nodes);
81
82 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
83 int next_node = find_next_best_node(node, used_nodes);
84 nodemask = node_to_cpumask(next_node);
85 cpus_or(span, span, nodemask);
86 }
87
88 return span;
89}
90#endif
91
92/*
93 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
94 * can switch it on easily if needed.
95 */
96#ifdef CONFIG_SCHED_SMT
97static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
98static struct sched_group sched_group_cpus[NR_CPUS];
99static int __devinit cpu_to_cpu_group(int cpu)
100{
101 return cpu;
102}
103#endif
104
105static DEFINE_PER_CPU(struct sched_domain, phys_domains);
106static struct sched_group sched_group_phys[NR_CPUS];
107static int __devinit cpu_to_phys_group(int cpu)
108{
109#ifdef CONFIG_SCHED_SMT
110 return first_cpu(cpu_sibling_map[cpu]);
111#else
112 return cpu;
113#endif
114}
115
116#ifdef CONFIG_NUMA
117/*
118 * The init_sched_build_groups can't handle what we want to do with node
119 * groups, so roll our own. Now each node has its own list of groups which
120 * gets dynamically allocated.
121 */
122static DEFINE_PER_CPU(struct sched_domain, node_domains);
123static struct sched_group *sched_group_nodes[MAX_NUMNODES];
124
125static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
126static struct sched_group sched_group_allnodes[MAX_NUMNODES];
127
128static int __devinit cpu_to_allnodes_group(int cpu)
129{
130 return cpu_to_node(cpu);
131}
132#endif
133
134/*
135 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
136 */
137void __devinit arch_init_sched_domains(void)
138{
139 int i;
140 cpumask_t cpu_default_map;
141
142 /*
143 * Setup mask for cpus without special case scheduling requirements.
144 * For now this just excludes isolated cpus, but could be used to
145 * exclude other special cases in the future.
146 */
147 cpus_complement(cpu_default_map, cpu_isolated_map);
148 cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
149
150 /*
151 * Set up domains. Isolated domains just stay on the dummy domain.
152 */
153 for_each_cpu_mask(i, cpu_default_map) {
154 int group;
155 struct sched_domain *sd = NULL, *p;
156 cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
157
158 cpus_and(nodemask, nodemask, cpu_default_map);
159
160#ifdef CONFIG_NUMA
161 if (num_online_cpus()
162 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
163 sd = &per_cpu(allnodes_domains, i);
164 *sd = SD_ALLNODES_INIT;
165 sd->span = cpu_default_map;
166 group = cpu_to_allnodes_group(i);
167 sd->groups = &sched_group_allnodes[group];
168 p = sd;
169 } else
170 p = NULL;
171
172 sd = &per_cpu(node_domains, i);
173 *sd = SD_NODE_INIT;
174 sd->span = sched_domain_node_span(cpu_to_node(i));
175 sd->parent = p;
176 cpus_and(sd->span, sd->span, cpu_default_map);
177#endif
178
179 p = sd;
180 sd = &per_cpu(phys_domains, i);
181 group = cpu_to_phys_group(i);
182 *sd = SD_CPU_INIT;
183 sd->span = nodemask;
184 sd->parent = p;
185 sd->groups = &sched_group_phys[group];
186
187#ifdef CONFIG_SCHED_SMT
188 p = sd;
189 sd = &per_cpu(cpu_domains, i);
190 group = cpu_to_cpu_group(i);
191 *sd = SD_SIBLING_INIT;
192 sd->span = cpu_sibling_map[i];
193 cpus_and(sd->span, sd->span, cpu_default_map);
194 sd->parent = p;
195 sd->groups = &sched_group_cpus[group];
196#endif
197 }
198
199#ifdef CONFIG_SCHED_SMT
200 /* Set up CPU (sibling) groups */
201 for_each_cpu_mask(i, cpu_default_map) {
202 cpumask_t this_sibling_map = cpu_sibling_map[i];
203 cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
204 if (i != first_cpu(this_sibling_map))
205 continue;
206
207 init_sched_build_groups(sched_group_cpus, this_sibling_map,
208 &cpu_to_cpu_group);
209 }
210#endif
211
212 /* Set up physical groups */
213 for (i = 0; i < MAX_NUMNODES; i++) {
214 cpumask_t nodemask = node_to_cpumask(i);
215
216 cpus_and(nodemask, nodemask, cpu_default_map);
217 if (cpus_empty(nodemask))
218 continue;
219
220 init_sched_build_groups(sched_group_phys, nodemask,
221 &cpu_to_phys_group);
222 }
223
224#ifdef CONFIG_NUMA
225 init_sched_build_groups(sched_group_allnodes, cpu_default_map,
226 &cpu_to_allnodes_group);
227
228 for (i = 0; i < MAX_NUMNODES; i++) {
229 /* Set up node groups */
230 struct sched_group *sg, *prev;
231 cpumask_t nodemask = node_to_cpumask(i);
232 cpumask_t domainspan;
233 cpumask_t covered = CPU_MASK_NONE;
234 int j;
235
236 cpus_and(nodemask, nodemask, cpu_default_map);
237 if (cpus_empty(nodemask))
238 continue;
239
240 domainspan = sched_domain_node_span(i);
241 cpus_and(domainspan, domainspan, cpu_default_map);
242
243 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
244 sched_group_nodes[i] = sg;
245 for_each_cpu_mask(j, nodemask) {
246 struct sched_domain *sd;
247 sd = &per_cpu(node_domains, j);
248 sd->groups = sg;
249 if (sd->groups == NULL) {
250 /* Turn off balancing if we have no groups */
251 sd->flags = 0;
252 }
253 }
254 if (!sg) {
255 printk(KERN_WARNING
256 "Can not alloc domain group for node %d\n", i);
257 continue;
258 }
259 sg->cpu_power = 0;
260 sg->cpumask = nodemask;
261 cpus_or(covered, covered, nodemask);
262 prev = sg;
263
264 for (j = 0; j < MAX_NUMNODES; j++) {
265 cpumask_t tmp, notcovered;
266 int n = (i + j) % MAX_NUMNODES;
267
268 cpus_complement(notcovered, covered);
269 cpus_and(tmp, notcovered, cpu_default_map);
270 cpus_and(tmp, tmp, domainspan);
271 if (cpus_empty(tmp))
272 break;
273
274 nodemask = node_to_cpumask(n);
275 cpus_and(tmp, tmp, nodemask);
276 if (cpus_empty(tmp))
277 continue;
278
279 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
280 if (!sg) {
281 printk(KERN_WARNING
282 "Can not alloc domain group for node %d\n", j);
283 break;
284 }
285 sg->cpu_power = 0;
286 sg->cpumask = tmp;
287 cpus_or(covered, covered, tmp);
288 prev->next = sg;
289 prev = sg;
290 }
291 prev->next = sched_group_nodes[i];
292 }
293#endif
294
295 /* Calculate CPU power for physical packages and nodes */
296 for_each_cpu_mask(i, cpu_default_map) {
297 int power;
298 struct sched_domain *sd;
299#ifdef CONFIG_SCHED_SMT
300 sd = &per_cpu(cpu_domains, i);
301 power = SCHED_LOAD_SCALE;
302 sd->groups->cpu_power = power;
303#endif
304
305 sd = &per_cpu(phys_domains, i);
306 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
307 (cpus_weight(sd->groups->cpumask)-1) / 10;
308 sd->groups->cpu_power = power;
309
310#ifdef CONFIG_NUMA
311 sd = &per_cpu(allnodes_domains, i);
312 if (sd->groups) {
313 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
314 (cpus_weight(sd->groups->cpumask)-1) / 10;
315 sd->groups->cpu_power = power;
316 }
317#endif
318 }
319
320#ifdef CONFIG_NUMA
321 for (i = 0; i < MAX_NUMNODES; i++) {
322 struct sched_group *sg = sched_group_nodes[i];
323 int j;
324
325 if (sg == NULL)
326 continue;
327next_sg:
328 for_each_cpu_mask(j, sg->cpumask) {
329 struct sched_domain *sd;
330 int power;
331
332 sd = &per_cpu(phys_domains, j);
333 if (j != first_cpu(sd->groups->cpumask)) {
334 /*
335 * Only add "power" once for each
336 * physical package.
337 */
338 continue;
339 }
340 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
341 (cpus_weight(sd->groups->cpumask)-1) / 10;
342
343 sg->cpu_power += power;
344 }
345 sg = sg->next;
346 if (sg != sched_group_nodes[i])
347 goto next_sg;
348 }
349#endif
350
351 /* Attach the domains */
352 for_each_online_cpu(i) {
353 struct sched_domain *sd;
354#ifdef CONFIG_SCHED_SMT
355 sd = &per_cpu(cpu_domains, i);
356#else
357 sd = &per_cpu(phys_domains, i);
358#endif
359 cpu_attach_domain(sd, i);
360 }
361}
362
363void __devinit arch_destroy_sched_domains(void)
364{
365#ifdef CONFIG_NUMA
366 int i;
367 for (i = 0; i < MAX_NUMNODES; i++) {
368 struct sched_group *oldsg, *sg = sched_group_nodes[i];
369 if (sg == NULL)
370 continue;
371 sg = sg->next;
372next_sg:
373 oldsg = sg;
374 sg = sg->next;
375 kfree(oldsg);
376 if (oldsg != sched_group_nodes[i])
377 goto next_sg;
378 sched_group_nodes[i] = NULL;
379 }
380#endif
381}
382