aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/dma.c10
-rw-r--r--kernel/kallsyms.c123
-rw-r--r--kernel/module.c3
-rw-r--r--kernel/resource.c83
-rw-r--r--kernel/sched.c321
5 files changed, 335 insertions, 205 deletions
diff --git a/kernel/dma.c b/kernel/dma.c
index aef0a45b78..2020644c93 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -62,6 +62,11 @@ static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = {
62}; 62};
63 63
64 64
65/**
66 * request_dma - request and reserve a system DMA channel
67 * @dmanr: DMA channel number
68 * @device_id: reserving device ID string, used in /proc/dma
69 */
65int request_dma(unsigned int dmanr, const char * device_id) 70int request_dma(unsigned int dmanr, const char * device_id)
66{ 71{
67 if (dmanr >= MAX_DMA_CHANNELS) 72 if (dmanr >= MAX_DMA_CHANNELS)
@@ -76,7 +81,10 @@ int request_dma(unsigned int dmanr, const char * device_id)
76 return 0; 81 return 0;
77} /* request_dma */ 82} /* request_dma */
78 83
79 84/**
85 * free_dma - free a reserved system DMA channel
86 * @dmanr: DMA channel number
87 */
80void free_dma(unsigned int dmanr) 88void free_dma(unsigned int dmanr)
81{ 89{
82 if (dmanr >= MAX_DMA_CHANNELS) { 90 if (dmanr >= MAX_DMA_CHANNELS) {
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 342bca62c4..eeac3e313b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -69,6 +69,15 @@ static inline int is_kernel(unsigned long addr)
69 return in_gate_area_no_task(addr); 69 return in_gate_area_no_task(addr);
70} 70}
71 71
72static int is_ksym_addr(unsigned long addr)
73{
74 if (all_var)
75 return is_kernel(addr);
76
77 return is_kernel_text(addr) || is_kernel_inittext(addr) ||
78 is_kernel_extratext(addr);
79}
80
72/* expand a compressed symbol data into the resulting uncompressed string, 81/* expand a compressed symbol data into the resulting uncompressed string,
73 given the offset to where the symbol is in the compressed stream */ 82 given the offset to where the symbol is in the compressed stream */
74static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) 83static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
@@ -155,6 +164,73 @@ unsigned long kallsyms_lookup_name(const char *name)
155 return module_kallsyms_lookup_name(name); 164 return module_kallsyms_lookup_name(name);
156} 165}
157 166
167static unsigned long get_symbol_pos(unsigned long addr,
168 unsigned long *symbolsize,
169 unsigned long *offset)
170{
171 unsigned long symbol_start = 0, symbol_end = 0;
172 unsigned long i, low, high, mid;
173
174 /* This kernel should never had been booted. */
175 BUG_ON(!kallsyms_addresses);
176
177 /* do a binary search on the sorted kallsyms_addresses array */
178 low = 0;
179 high = kallsyms_num_syms;
180
181 while (high - low > 1) {
182 mid = (low + high) / 2;
183 if (kallsyms_addresses[mid] <= addr)
184 low = mid;
185 else
186 high = mid;
187 }
188
189 /*
190 * search for the first aliased symbol. Aliased
191 * symbols are symbols with the same address
192 */
193 while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low])
194 --low;
195
196 symbol_start = kallsyms_addresses[low];
197
198 /* Search for next non-aliased symbol */
199 for (i = low + 1; i < kallsyms_num_syms; i++) {
200 if (kallsyms_addresses[i] > symbol_start) {
201 symbol_end = kallsyms_addresses[i];
202 break;
203 }
204 }
205
206 /* if we found no next symbol, we use the end of the section */
207 if (!symbol_end) {
208 if (is_kernel_inittext(addr))
209 symbol_end = (unsigned long)_einittext;
210 else if (all_var)
211 symbol_end = (unsigned long)_end;
212 else
213 symbol_end = (unsigned long)_etext;
214 }
215
216 *symbolsize = symbol_end - symbol_start;
217 *offset = addr - symbol_start;
218
219 return low;
220}
221
222/*
223 * Lookup an address but don't bother to find any names.
224 */
225int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
226 unsigned long *offset)
227{
228 if (is_ksym_addr(addr))
229 return !!get_symbol_pos(addr, symbolsize, offset);
230
231 return !!module_address_lookup(addr, symbolsize, offset, NULL);
232}
233
158/* 234/*
159 * Lookup an address 235 * Lookup an address
160 * - modname is set to NULL if it's in the kernel 236 * - modname is set to NULL if it's in the kernel
@@ -167,57 +243,18 @@ const char *kallsyms_lookup(unsigned long addr,
167 unsigned long *offset, 243 unsigned long *offset,
168 char **modname, char *namebuf) 244 char **modname, char *namebuf)
169{ 245{
170 unsigned long i, low, high, mid;
171 const char *msym; 246 const char *msym;
172 247
173 /* This kernel should never had been booted. */
174 BUG_ON(!kallsyms_addresses);
175
176 namebuf[KSYM_NAME_LEN] = 0; 248 namebuf[KSYM_NAME_LEN] = 0;
177 namebuf[0] = 0; 249 namebuf[0] = 0;
178 250
179 if ((all_var && is_kernel(addr)) || 251 if (is_ksym_addr(addr)) {
180 (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr) || 252 unsigned long pos;
181 is_kernel_extratext(addr)))) {
182 unsigned long symbol_end = 0;
183
184 /* do a binary search on the sorted kallsyms_addresses array */
185 low = 0;
186 high = kallsyms_num_syms;
187
188 while (high-low > 1) {
189 mid = (low + high) / 2;
190 if (kallsyms_addresses[mid] <= addr) low = mid;
191 else high = mid;
192 }
193
194 /* search for the first aliased symbol. Aliased symbols are
195 symbols with the same address */
196 while (low && kallsyms_addresses[low - 1] == kallsyms_addresses[low])
197 --low;
198 253
254 pos = get_symbol_pos(addr, symbolsize, offset);
199 /* Grab name */ 255 /* Grab name */
200 kallsyms_expand_symbol(get_symbol_offset(low), namebuf); 256 kallsyms_expand_symbol(get_symbol_offset(pos), namebuf);
201
202 /* Search for next non-aliased symbol */
203 for (i = low + 1; i < kallsyms_num_syms; i++) {
204 if (kallsyms_addresses[i] > kallsyms_addresses[low]) {
205 symbol_end = kallsyms_addresses[i];
206 break;
207 }
208 }
209
210 /* if we found no next symbol, we use the end of the section */
211 if (!symbol_end) {
212 if (is_kernel_inittext(addr))
213 symbol_end = (unsigned long)_einittext;
214 else
215 symbol_end = all_var ? (unsigned long)_end : (unsigned long)_etext;
216 }
217
218 *symbolsize = symbol_end - kallsyms_addresses[low];
219 *modname = NULL; 257 *modname = NULL;
220 *offset = addr - kallsyms_addresses[low];
221 return namebuf; 258 return namebuf;
222 } 259 }
223 260
diff --git a/kernel/module.c b/kernel/module.c
index 7c77a0a927..7f60e782de 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2040,7 +2040,8 @@ const char *module_address_lookup(unsigned long addr,
2040 list_for_each_entry(mod, &modules, list) { 2040 list_for_each_entry(mod, &modules, list) {
2041 if (within(addr, mod->module_init, mod->init_size) 2041 if (within(addr, mod->module_init, mod->init_size)
2042 || within(addr, mod->module_core, mod->core_size)) { 2042 || within(addr, mod->module_core, mod->core_size)) {
2043 *modname = mod->name; 2043 if (modname)
2044 *modname = mod->name;
2044 return get_ksymbol(mod, addr, size, offset); 2045 return get_ksymbol(mod, addr, size, offset);
2045 } 2046 }
2046 } 2047 }
diff --git a/kernel/resource.c b/kernel/resource.c
index 9db38a1a75..6de60c1214 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -193,6 +193,13 @@ static int __release_resource(struct resource *old)
193 return -EINVAL; 193 return -EINVAL;
194} 194}
195 195
196/**
197 * request_resource - request and reserve an I/O or memory resource
198 * @root: root resource descriptor
199 * @new: resource descriptor desired by caller
200 *
201 * Returns 0 for success, negative error code on error.
202 */
196int request_resource(struct resource *root, struct resource *new) 203int request_resource(struct resource *root, struct resource *new)
197{ 204{
198 struct resource *conflict; 205 struct resource *conflict;
@@ -205,6 +212,15 @@ int request_resource(struct resource *root, struct resource *new)
205 212
206EXPORT_SYMBOL(request_resource); 213EXPORT_SYMBOL(request_resource);
207 214
215/**
216 * ____request_resource - reserve a resource, with resource conflict returned
217 * @root: root resource descriptor
218 * @new: resource descriptor desired by caller
219 *
220 * Returns:
221 * On success, NULL is returned.
222 * On error, a pointer to the conflicting resource is returned.
223 */
208struct resource *____request_resource(struct resource *root, struct resource *new) 224struct resource *____request_resource(struct resource *root, struct resource *new)
209{ 225{
210 struct resource *conflict; 226 struct resource *conflict;
@@ -217,6 +233,10 @@ struct resource *____request_resource(struct resource *root, struct resource *ne
217 233
218EXPORT_SYMBOL(____request_resource); 234EXPORT_SYMBOL(____request_resource);
219 235
236/**
237 * release_resource - release a previously reserved resource
238 * @old: resource pointer
239 */
220int release_resource(struct resource *old) 240int release_resource(struct resource *old)
221{ 241{
222 int retval; 242 int retval;
@@ -315,8 +335,16 @@ static int find_resource(struct resource *root, struct resource *new,
315 return -EBUSY; 335 return -EBUSY;
316} 336}
317 337
318/* 338/**
319 * Allocate empty slot in the resource tree given range and alignment. 339 * allocate_resource - allocate empty slot in the resource tree given range & alignment
340 * @root: root resource descriptor
341 * @new: resource descriptor desired by caller
342 * @size: requested resource region size
343 * @min: minimum size to allocate
344 * @max: maximum size to allocate
345 * @align: alignment requested, in bytes
346 * @alignf: alignment function, optional, called if not NULL
347 * @alignf_data: arbitrary data to pass to the @alignf function
320 */ 348 */
321int allocate_resource(struct resource *root, struct resource *new, 349int allocate_resource(struct resource *root, struct resource *new,
322 resource_size_t size, resource_size_t min, 350 resource_size_t size, resource_size_t min,
@@ -407,10 +435,15 @@ int insert_resource(struct resource *parent, struct resource *new)
407 return result; 435 return result;
408} 436}
409 437
410/* 438/**
439 * adjust_resource - modify a resource's start and size
440 * @res: resource to modify
441 * @start: new start value
442 * @size: new size
443 *
411 * Given an existing resource, change its start and size to match the 444 * Given an existing resource, change its start and size to match the
412 * arguments. Returns -EBUSY if it can't fit. Existing children of 445 * arguments. Returns 0 on success, -EBUSY if it can't fit.
413 * the resource are assumed to be immutable. 446 * Existing children of the resource are assumed to be immutable.
414 */ 447 */
415int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) 448int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size)
416{ 449{
@@ -456,11 +489,19 @@ EXPORT_SYMBOL(adjust_resource);
456 * Note how this, unlike the above, knows about 489 * Note how this, unlike the above, knows about
457 * the IO flag meanings (busy etc). 490 * the IO flag meanings (busy etc).
458 * 491 *
459 * Request-region creates a new busy region. 492 * request_region creates a new busy region.
460 * 493 *
461 * Check-region returns non-zero if the area is already busy 494 * check_region returns non-zero if the area is already busy.
462 * 495 *
463 * Release-region releases a matching busy region. 496 * release_region releases a matching busy region.
497 */
498
499/**
500 * __request_region - create a new busy resource region
501 * @parent: parent resource descriptor
502 * @start: resource start address
503 * @n: resource region size
504 * @name: reserving caller's ID string
464 */ 505 */
465struct resource * __request_region(struct resource *parent, 506struct resource * __request_region(struct resource *parent,
466 resource_size_t start, resource_size_t n, 507 resource_size_t start, resource_size_t n,
@@ -497,9 +538,23 @@ struct resource * __request_region(struct resource *parent,
497 } 538 }
498 return res; 539 return res;
499} 540}
500
501EXPORT_SYMBOL(__request_region); 541EXPORT_SYMBOL(__request_region);
502 542
543/**
544 * __check_region - check if a resource region is busy or free
545 * @parent: parent resource descriptor
546 * @start: resource start address
547 * @n: resource region size
548 *
549 * Returns 0 if the region is free at the moment it is checked,
550 * returns %-EBUSY if the region is busy.
551 *
552 * NOTE:
553 * This function is deprecated because its use is racy.
554 * Even if it returns 0, a subsequent call to request_region()
555 * may fail because another driver etc. just allocated the region.
556 * Do NOT use it. It will be removed from the kernel.
557 */
503int __check_region(struct resource *parent, resource_size_t start, 558int __check_region(struct resource *parent, resource_size_t start,
504 resource_size_t n) 559 resource_size_t n)
505{ 560{
@@ -513,9 +568,16 @@ int __check_region(struct resource *parent, resource_size_t start,
513 kfree(res); 568 kfree(res);
514 return 0; 569 return 0;
515} 570}
516
517EXPORT_SYMBOL(__check_region); 571EXPORT_SYMBOL(__check_region);
518 572
573/**
574 * __release_region - release a previously reserved resource region
575 * @parent: parent resource descriptor
576 * @start: resource start address
577 * @n: resource region size
578 *
579 * The described resource region must match a currently busy region.
580 */
519void __release_region(struct resource *parent, resource_size_t start, 581void __release_region(struct resource *parent, resource_size_t start,
520 resource_size_t n) 582 resource_size_t n)
521{ 583{
@@ -553,7 +615,6 @@ void __release_region(struct resource *parent, resource_size_t start,
553 "<%016llx-%016llx>\n", (unsigned long long)start, 615 "<%016llx-%016llx>\n", (unsigned long long)start,
554 (unsigned long long)end); 616 (unsigned long long)end);
555} 617}
556
557EXPORT_SYMBOL(__release_region); 618EXPORT_SYMBOL(__release_region);
558 619
559/* 620/*
diff --git a/kernel/sched.c b/kernel/sched.c
index e4e54e86f4..53608a59d6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1232,7 +1232,7 @@ nextgroup:
1232} 1232}
1233 1233
1234/* 1234/*
1235 * find_idlest_queue - find the idlest runqueue among the cpus in group. 1235 * find_idlest_cpu - find the idlest cpu among the cpus in group.
1236 */ 1236 */
1237static int 1237static int
1238find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) 1238find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
@@ -1286,21 +1286,29 @@ static int sched_balance_self(int cpu, int flag)
1286 while (sd) { 1286 while (sd) {
1287 cpumask_t span; 1287 cpumask_t span;
1288 struct sched_group *group; 1288 struct sched_group *group;
1289 int new_cpu; 1289 int new_cpu, weight;
1290 int weight; 1290
1291 if (!(sd->flags & flag)) {
1292 sd = sd->child;
1293 continue;
1294 }
1291 1295
1292 span = sd->span; 1296 span = sd->span;
1293 group = find_idlest_group(sd, t, cpu); 1297 group = find_idlest_group(sd, t, cpu);
1294 if (!group) 1298 if (!group) {
1295 goto nextlevel; 1299 sd = sd->child;
1300 continue;
1301 }
1296 1302
1297 new_cpu = find_idlest_cpu(group, t, cpu); 1303 new_cpu = find_idlest_cpu(group, t, cpu);
1298 if (new_cpu == -1 || new_cpu == cpu) 1304 if (new_cpu == -1 || new_cpu == cpu) {
1299 goto nextlevel; 1305 /* Now try balancing at a lower domain level of cpu */
1306 sd = sd->child;
1307 continue;
1308 }
1300 1309
1301 /* Now try balancing at a lower domain level */ 1310 /* Now try balancing at a lower domain level of new_cpu */
1302 cpu = new_cpu; 1311 cpu = new_cpu;
1303nextlevel:
1304 sd = NULL; 1312 sd = NULL;
1305 weight = cpus_weight(span); 1313 weight = cpus_weight(span);
1306 for_each_domain(cpu, tmp) { 1314 for_each_domain(cpu, tmp) {
@@ -2533,8 +2541,14 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2533 struct rq *busiest; 2541 struct rq *busiest;
2534 cpumask_t cpus = CPU_MASK_ALL; 2542 cpumask_t cpus = CPU_MASK_ALL;
2535 2543
2544 /*
2545 * When power savings policy is enabled for the parent domain, idle
2546 * sibling can pick up load irrespective of busy siblings. In this case,
2547 * let the state of idle sibling percolate up as IDLE, instead of
2548 * portraying it as NOT_IDLE.
2549 */
2536 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2550 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
2537 !sched_smt_power_savings) 2551 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2538 sd_idle = 1; 2552 sd_idle = 1;
2539 2553
2540 schedstat_inc(sd, lb_cnt[idle]); 2554 schedstat_inc(sd, lb_cnt[idle]);
@@ -2630,7 +2644,7 @@ redo:
2630 } 2644 }
2631 2645
2632 if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2646 if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2633 !sched_smt_power_savings) 2647 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2634 return -1; 2648 return -1;
2635 return nr_moved; 2649 return nr_moved;
2636 2650
@@ -2646,7 +2660,7 @@ out_one_pinned:
2646 sd->balance_interval *= 2; 2660 sd->balance_interval *= 2;
2647 2661
2648 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2662 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2649 !sched_smt_power_savings) 2663 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2650 return -1; 2664 return -1;
2651 return 0; 2665 return 0;
2652} 2666}
@@ -2668,7 +2682,14 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2668 int sd_idle = 0; 2682 int sd_idle = 0;
2669 cpumask_t cpus = CPU_MASK_ALL; 2683 cpumask_t cpus = CPU_MASK_ALL;
2670 2684
2671 if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2685 /*
2686 * When power savings policy is enabled for the parent domain, idle
2687 * sibling can pick up load irrespective of busy siblings. In this case,
2688 * let the state of idle sibling percolate up as IDLE, instead of
2689 * portraying it as NOT_IDLE.
2690 */
2691 if (sd->flags & SD_SHARE_CPUPOWER &&
2692 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2672 sd_idle = 1; 2693 sd_idle = 1;
2673 2694
2674 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); 2695 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
@@ -2709,7 +2730,8 @@ redo:
2709 2730
2710 if (!nr_moved) { 2731 if (!nr_moved) {
2711 schedstat_inc(sd, lb_failed[NEWLY_IDLE]); 2732 schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
2712 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) 2733 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2734 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2713 return -1; 2735 return -1;
2714 } else 2736 } else
2715 sd->nr_balance_failed = 0; 2737 sd->nr_balance_failed = 0;
@@ -2719,7 +2741,7 @@ redo:
2719out_balanced: 2741out_balanced:
2720 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2742 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
2721 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2743 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2722 !sched_smt_power_savings) 2744 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2723 return -1; 2745 return -1;
2724 sd->nr_balance_failed = 0; 2746 sd->nr_balance_failed = 0;
2725 2747
@@ -4817,7 +4839,7 @@ void show_state(void)
4817 * NOTE: this function does not set the idle thread's NEED_RESCHED 4839 * NOTE: this function does not set the idle thread's NEED_RESCHED
4818 * flag, to make booting more robust. 4840 * flag, to make booting more robust.
4819 */ 4841 */
4820void __devinit init_idle(struct task_struct *idle, int cpu) 4842void __cpuinit init_idle(struct task_struct *idle, int cpu)
4821{ 4843{
4822 struct rq *rq = cpu_rq(cpu); 4844 struct rq *rq = cpu_rq(cpu);
4823 unsigned long flags; 4845 unsigned long flags;
@@ -5392,7 +5414,9 @@ static int sd_degenerate(struct sched_domain *sd)
5392 if (sd->flags & (SD_LOAD_BALANCE | 5414 if (sd->flags & (SD_LOAD_BALANCE |
5393 SD_BALANCE_NEWIDLE | 5415 SD_BALANCE_NEWIDLE |
5394 SD_BALANCE_FORK | 5416 SD_BALANCE_FORK |
5395 SD_BALANCE_EXEC)) { 5417 SD_BALANCE_EXEC |
5418 SD_SHARE_CPUPOWER |
5419 SD_SHARE_PKG_RESOURCES)) {
5396 if (sd->groups != sd->groups->next) 5420 if (sd->groups != sd->groups->next)
5397 return 0; 5421 return 0;
5398 } 5422 }
@@ -5426,7 +5450,9 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
5426 pflags &= ~(SD_LOAD_BALANCE | 5450 pflags &= ~(SD_LOAD_BALANCE |
5427 SD_BALANCE_NEWIDLE | 5451 SD_BALANCE_NEWIDLE |
5428 SD_BALANCE_FORK | 5452 SD_BALANCE_FORK |
5429 SD_BALANCE_EXEC); 5453 SD_BALANCE_EXEC |
5454 SD_SHARE_CPUPOWER |
5455 SD_SHARE_PKG_RESOURCES);
5430 } 5456 }
5431 if (~cflags & pflags) 5457 if (~cflags & pflags)
5432 return 0; 5458 return 0;
@@ -5448,12 +5474,18 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu)
5448 struct sched_domain *parent = tmp->parent; 5474 struct sched_domain *parent = tmp->parent;
5449 if (!parent) 5475 if (!parent)
5450 break; 5476 break;
5451 if (sd_parent_degenerate(tmp, parent)) 5477 if (sd_parent_degenerate(tmp, parent)) {
5452 tmp->parent = parent->parent; 5478 tmp->parent = parent->parent;
5479 if (parent->parent)
5480 parent->parent->child = tmp;
5481 }
5453 } 5482 }
5454 5483
5455 if (sd && sd_degenerate(sd)) 5484 if (sd && sd_degenerate(sd)) {
5456 sd = sd->parent; 5485 sd = sd->parent;
5486 if (sd)
5487 sd->child = NULL;
5488 }
5457 5489
5458 sched_domain_debug(sd, cpu); 5490 sched_domain_debug(sd, cpu);
5459 5491
@@ -5461,7 +5493,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu)
5461} 5493}
5462 5494
5463/* cpus with isolated domains */ 5495/* cpus with isolated domains */
5464static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; 5496static cpumask_t __cpuinitdata cpu_isolated_map = CPU_MASK_NONE;
5465 5497
5466/* Setup the mask of cpus configured for isolated domains */ 5498/* Setup the mask of cpus configured for isolated domains */
5467static int __init isolated_cpu_setup(char *str) 5499static int __init isolated_cpu_setup(char *str)
@@ -5489,15 +5521,17 @@ __setup ("isolcpus=", isolated_cpu_setup);
5489 * covered by the given span, and will set each group's ->cpumask correctly, 5521 * covered by the given span, and will set each group's ->cpumask correctly,
5490 * and ->cpu_power to 0. 5522 * and ->cpu_power to 0.
5491 */ 5523 */
5492static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, 5524static void
5493 int (*group_fn)(int cpu)) 5525init_sched_build_groups(struct sched_group groups[], cpumask_t span,
5526 const cpumask_t *cpu_map,
5527 int (*group_fn)(int cpu, const cpumask_t *cpu_map))
5494{ 5528{
5495 struct sched_group *first = NULL, *last = NULL; 5529 struct sched_group *first = NULL, *last = NULL;
5496 cpumask_t covered = CPU_MASK_NONE; 5530 cpumask_t covered = CPU_MASK_NONE;
5497 int i; 5531 int i;
5498 5532
5499 for_each_cpu_mask(i, span) { 5533 for_each_cpu_mask(i, span) {
5500 int group = group_fn(i); 5534 int group = group_fn(i, cpu_map);
5501 struct sched_group *sg = &groups[group]; 5535 struct sched_group *sg = &groups[group];
5502 int j; 5536 int j;
5503 5537
@@ -5508,7 +5542,7 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
5508 sg->cpu_power = 0; 5542 sg->cpu_power = 0;
5509 5543
5510 for_each_cpu_mask(j, span) { 5544 for_each_cpu_mask(j, span) {
5511 if (group_fn(j) != group) 5545 if (group_fn(j, cpu_map) != group)
5512 continue; 5546 continue;
5513 5547
5514 cpu_set(j, covered); 5548 cpu_set(j, covered);
@@ -5975,13 +6009,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
5975#endif 6009#endif
5976 ); 6010 );
5977 if (system_state == SYSTEM_BOOTING) { 6011 if (system_state == SYSTEM_BOOTING) {
5978 printk("migration_cost="); 6012 if (num_online_cpus() > 1) {
5979 for (distance = 0; distance <= max_distance; distance++) { 6013 printk("migration_cost=");
5980 if (distance) 6014 for (distance = 0; distance <= max_distance; distance++) {
5981 printk(","); 6015 if (distance)
5982 printk("%ld", (long)migration_cost[distance] / 1000); 6016 printk(",");
6017 printk("%ld", (long)migration_cost[distance] / 1000);
6018 }
6019 printk("\n");
5983 } 6020 }
5984 printk("\n");
5985 } 6021 }
5986 j1 = jiffies; 6022 j1 = jiffies;
5987 if (migration_debug) 6023 if (migration_debug)
@@ -6084,7 +6120,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6084static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 6120static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
6085static struct sched_group sched_group_cpus[NR_CPUS]; 6121static struct sched_group sched_group_cpus[NR_CPUS];
6086 6122
6087static int cpu_to_cpu_group(int cpu) 6123static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map)
6088{ 6124{
6089 return cpu; 6125 return cpu;
6090} 6126}
@@ -6095,31 +6131,36 @@ static int cpu_to_cpu_group(int cpu)
6095 */ 6131 */
6096#ifdef CONFIG_SCHED_MC 6132#ifdef CONFIG_SCHED_MC
6097static DEFINE_PER_CPU(struct sched_domain, core_domains); 6133static DEFINE_PER_CPU(struct sched_domain, core_domains);
6098static struct sched_group *sched_group_core_bycpu[NR_CPUS]; 6134static struct sched_group sched_group_core[NR_CPUS];
6099#endif 6135#endif
6100 6136
6101#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) 6137#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
6102static int cpu_to_core_group(int cpu) 6138static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
6103{ 6139{
6104 return first_cpu(cpu_sibling_map[cpu]); 6140 cpumask_t mask = cpu_sibling_map[cpu];
6141 cpus_and(mask, mask, *cpu_map);
6142 return first_cpu(mask);
6105} 6143}
6106#elif defined(CONFIG_SCHED_MC) 6144#elif defined(CONFIG_SCHED_MC)
6107static int cpu_to_core_group(int cpu) 6145static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
6108{ 6146{
6109 return cpu; 6147 return cpu;
6110} 6148}
6111#endif 6149#endif
6112 6150
6113static DEFINE_PER_CPU(struct sched_domain, phys_domains); 6151static DEFINE_PER_CPU(struct sched_domain, phys_domains);
6114static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; 6152static struct sched_group sched_group_phys[NR_CPUS];
6115 6153
6116static int cpu_to_phys_group(int cpu) 6154static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map)
6117{ 6155{
6118#ifdef CONFIG_SCHED_MC 6156#ifdef CONFIG_SCHED_MC
6119 cpumask_t mask = cpu_coregroup_map(cpu); 6157 cpumask_t mask = cpu_coregroup_map(cpu);
6158 cpus_and(mask, mask, *cpu_map);
6120 return first_cpu(mask); 6159 return first_cpu(mask);
6121#elif defined(CONFIG_SCHED_SMT) 6160#elif defined(CONFIG_SCHED_SMT)
6122 return first_cpu(cpu_sibling_map[cpu]); 6161 cpumask_t mask = cpu_sibling_map[cpu];
6162 cpus_and(mask, mask, *cpu_map);
6163 return first_cpu(mask);
6123#else 6164#else
6124 return cpu; 6165 return cpu;
6125#endif 6166#endif
@@ -6137,7 +6178,7 @@ static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
6137static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); 6178static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
6138static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; 6179static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
6139 6180
6140static int cpu_to_allnodes_group(int cpu) 6181static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map)
6141{ 6182{
6142 return cpu_to_node(cpu); 6183 return cpu_to_node(cpu);
6143} 6184}
@@ -6169,12 +6210,11 @@ next_sg:
6169} 6210}
6170#endif 6211#endif
6171 6212
6213#ifdef CONFIG_NUMA
6172/* Free memory allocated for various sched_group structures */ 6214/* Free memory allocated for various sched_group structures */
6173static void free_sched_groups(const cpumask_t *cpu_map) 6215static void free_sched_groups(const cpumask_t *cpu_map)
6174{ 6216{
6175 int cpu; 6217 int cpu, i;
6176#ifdef CONFIG_NUMA
6177 int i;
6178 6218
6179 for_each_cpu_mask(cpu, *cpu_map) { 6219 for_each_cpu_mask(cpu, *cpu_map) {
6180 struct sched_group *sched_group_allnodes 6220 struct sched_group *sched_group_allnodes
@@ -6211,19 +6251,63 @@ next_sg:
6211 kfree(sched_group_nodes); 6251 kfree(sched_group_nodes);
6212 sched_group_nodes_bycpu[cpu] = NULL; 6252 sched_group_nodes_bycpu[cpu] = NULL;
6213 } 6253 }
6254}
6255#else
6256static void free_sched_groups(const cpumask_t *cpu_map)
6257{
6258}
6214#endif 6259#endif
6215 for_each_cpu_mask(cpu, *cpu_map) { 6260
6216 if (sched_group_phys_bycpu[cpu]) { 6261/*
6217 kfree(sched_group_phys_bycpu[cpu]); 6262 * Initialize sched groups cpu_power.
6218 sched_group_phys_bycpu[cpu] = NULL; 6263 *
6219 } 6264 * cpu_power indicates the capacity of sched group, which is used while
6220#ifdef CONFIG_SCHED_MC 6265 * distributing the load between different sched groups in a sched domain.
6221 if (sched_group_core_bycpu[cpu]) { 6266 * Typically cpu_power for all the groups in a sched domain will be same unless
6222 kfree(sched_group_core_bycpu[cpu]); 6267 * there are asymmetries in the topology. If there are asymmetries, group
6223 sched_group_core_bycpu[cpu] = NULL; 6268 * having more cpu_power will pickup more load compared to the group having
6224 } 6269 * less cpu_power.
6225#endif 6270 *
6271 * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents
6272 * the maximum number of tasks a group can handle in the presence of other idle
6273 * or lightly loaded groups in the same sched domain.
6274 */
6275static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6276{
6277 struct sched_domain *child;
6278 struct sched_group *group;
6279
6280 WARN_ON(!sd || !sd->groups);
6281
6282 if (cpu != first_cpu(sd->groups->cpumask))
6283 return;
6284
6285 child = sd->child;
6286
6287 /*
6288 * For perf policy, if the groups in child domain share resources
6289 * (for example cores sharing some portions of the cache hierarchy
6290 * or SMT), then set this domain groups cpu_power such that each group
6291 * can handle only one task, when there are other idle groups in the
6292 * same sched domain.
6293 */
6294 if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
6295 (child->flags &
6296 (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) {
6297 sd->groups->cpu_power = SCHED_LOAD_SCALE;
6298 return;
6226 } 6299 }
6300
6301 sd->groups->cpu_power = 0;
6302
6303 /*
6304 * add cpu_power of each child group to this groups cpu_power
6305 */
6306 group = child->groups;
6307 do {
6308 sd->groups->cpu_power += group->cpu_power;
6309 group = group->next;
6310 } while (group != child->groups);
6227} 6311}
6228 6312
6229/* 6313/*
@@ -6233,10 +6317,7 @@ next_sg:
6233static int build_sched_domains(const cpumask_t *cpu_map) 6317static int build_sched_domains(const cpumask_t *cpu_map)
6234{ 6318{
6235 int i; 6319 int i;
6236 struct sched_group *sched_group_phys = NULL; 6320 struct sched_domain *sd;
6237#ifdef CONFIG_SCHED_MC
6238 struct sched_group *sched_group_core = NULL;
6239#endif
6240#ifdef CONFIG_NUMA 6321#ifdef CONFIG_NUMA
6241 struct sched_group **sched_group_nodes = NULL; 6322 struct sched_group **sched_group_nodes = NULL;
6242 struct sched_group *sched_group_allnodes = NULL; 6323 struct sched_group *sched_group_allnodes = NULL;
@@ -6268,9 +6349,10 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6268 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { 6349 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
6269 if (!sched_group_allnodes) { 6350 if (!sched_group_allnodes) {
6270 sched_group_allnodes 6351 sched_group_allnodes
6271 = kmalloc(sizeof(struct sched_group) 6352 = kmalloc_node(sizeof(struct sched_group)
6272 * MAX_NUMNODES, 6353 * MAX_NUMNODES,
6273 GFP_KERNEL); 6354 GFP_KERNEL,
6355 cpu_to_node(i));
6274 if (!sched_group_allnodes) { 6356 if (!sched_group_allnodes) {
6275 printk(KERN_WARNING 6357 printk(KERN_WARNING
6276 "Can not alloc allnodes sched group\n"); 6358 "Can not alloc allnodes sched group\n");
@@ -6282,7 +6364,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6282 sd = &per_cpu(allnodes_domains, i); 6364 sd = &per_cpu(allnodes_domains, i);
6283 *sd = SD_ALLNODES_INIT; 6365 *sd = SD_ALLNODES_INIT;
6284 sd->span = *cpu_map; 6366 sd->span = *cpu_map;
6285 group = cpu_to_allnodes_group(i); 6367 group = cpu_to_allnodes_group(i, cpu_map);
6286 sd->groups = &sched_group_allnodes[group]; 6368 sd->groups = &sched_group_allnodes[group];
6287 p = sd; 6369 p = sd;
6288 } else 6370 } else
@@ -6292,60 +6374,42 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6292 *sd = SD_NODE_INIT; 6374 *sd = SD_NODE_INIT;
6293 sd->span = sched_domain_node_span(cpu_to_node(i)); 6375 sd->span = sched_domain_node_span(cpu_to_node(i));
6294 sd->parent = p; 6376 sd->parent = p;
6377 if (p)
6378 p->child = sd;
6295 cpus_and(sd->span, sd->span, *cpu_map); 6379 cpus_and(sd->span, sd->span, *cpu_map);
6296#endif 6380#endif
6297 6381
6298 if (!sched_group_phys) {
6299 sched_group_phys
6300 = kmalloc(sizeof(struct sched_group) * NR_CPUS,
6301 GFP_KERNEL);
6302 if (!sched_group_phys) {
6303 printk (KERN_WARNING "Can not alloc phys sched"
6304 "group\n");
6305 goto error;
6306 }
6307 sched_group_phys_bycpu[i] = sched_group_phys;
6308 }
6309
6310 p = sd; 6382 p = sd;
6311 sd = &per_cpu(phys_domains, i); 6383 sd = &per_cpu(phys_domains, i);
6312 group = cpu_to_phys_group(i); 6384 group = cpu_to_phys_group(i, cpu_map);
6313 *sd = SD_CPU_INIT; 6385 *sd = SD_CPU_INIT;
6314 sd->span = nodemask; 6386 sd->span = nodemask;
6315 sd->parent = p; 6387 sd->parent = p;
6388 if (p)
6389 p->child = sd;
6316 sd->groups = &sched_group_phys[group]; 6390 sd->groups = &sched_group_phys[group];
6317 6391
6318#ifdef CONFIG_SCHED_MC 6392#ifdef CONFIG_SCHED_MC
6319 if (!sched_group_core) {
6320 sched_group_core
6321 = kmalloc(sizeof(struct sched_group) * NR_CPUS,
6322 GFP_KERNEL);
6323 if (!sched_group_core) {
6324 printk (KERN_WARNING "Can not alloc core sched"
6325 "group\n");
6326 goto error;
6327 }
6328 sched_group_core_bycpu[i] = sched_group_core;
6329 }
6330
6331 p = sd; 6393 p = sd;
6332 sd = &per_cpu(core_domains, i); 6394 sd = &per_cpu(core_domains, i);
6333 group = cpu_to_core_group(i); 6395 group = cpu_to_core_group(i, cpu_map);
6334 *sd = SD_MC_INIT; 6396 *sd = SD_MC_INIT;
6335 sd->span = cpu_coregroup_map(i); 6397 sd->span = cpu_coregroup_map(i);
6336 cpus_and(sd->span, sd->span, *cpu_map); 6398 cpus_and(sd->span, sd->span, *cpu_map);
6337 sd->parent = p; 6399 sd->parent = p;
6400 p->child = sd;
6338 sd->groups = &sched_group_core[group]; 6401 sd->groups = &sched_group_core[group];
6339#endif 6402#endif
6340 6403
6341#ifdef CONFIG_SCHED_SMT 6404#ifdef CONFIG_SCHED_SMT
6342 p = sd; 6405 p = sd;
6343 sd = &per_cpu(cpu_domains, i); 6406 sd = &per_cpu(cpu_domains, i);
6344 group = cpu_to_cpu_group(i); 6407 group = cpu_to_cpu_group(i, cpu_map);
6345 *sd = SD_SIBLING_INIT; 6408 *sd = SD_SIBLING_INIT;
6346 sd->span = cpu_sibling_map[i]; 6409 sd->span = cpu_sibling_map[i];
6347 cpus_and(sd->span, sd->span, *cpu_map); 6410 cpus_and(sd->span, sd->span, *cpu_map);
6348 sd->parent = p; 6411 sd->parent = p;
6412 p->child = sd;
6349 sd->groups = &sched_group_cpus[group]; 6413 sd->groups = &sched_group_cpus[group];
6350#endif 6414#endif
6351 } 6415 }
@@ -6359,7 +6423,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6359 continue; 6423 continue;
6360 6424
6361 init_sched_build_groups(sched_group_cpus, this_sibling_map, 6425 init_sched_build_groups(sched_group_cpus, this_sibling_map,
6362 &cpu_to_cpu_group); 6426 cpu_map, &cpu_to_cpu_group);
6363 } 6427 }
6364#endif 6428#endif
6365 6429
@@ -6371,7 +6435,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6371 if (i != first_cpu(this_core_map)) 6435 if (i != first_cpu(this_core_map))
6372 continue; 6436 continue;
6373 init_sched_build_groups(sched_group_core, this_core_map, 6437 init_sched_build_groups(sched_group_core, this_core_map,
6374 &cpu_to_core_group); 6438 cpu_map, &cpu_to_core_group);
6375 } 6439 }
6376#endif 6440#endif
6377 6441
@@ -6385,14 +6449,14 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6385 continue; 6449 continue;
6386 6450
6387 init_sched_build_groups(sched_group_phys, nodemask, 6451 init_sched_build_groups(sched_group_phys, nodemask,
6388 &cpu_to_phys_group); 6452 cpu_map, &cpu_to_phys_group);
6389 } 6453 }
6390 6454
6391#ifdef CONFIG_NUMA 6455#ifdef CONFIG_NUMA
6392 /* Set up node groups */ 6456 /* Set up node groups */
6393 if (sched_group_allnodes) 6457 if (sched_group_allnodes)
6394 init_sched_build_groups(sched_group_allnodes, *cpu_map, 6458 init_sched_build_groups(sched_group_allnodes, *cpu_map,
6395 &cpu_to_allnodes_group); 6459 cpu_map, &cpu_to_allnodes_group);
6396 6460
6397 for (i = 0; i < MAX_NUMNODES; i++) { 6461 for (i = 0; i < MAX_NUMNODES; i++) {
6398 /* Set up node groups */ 6462 /* Set up node groups */
@@ -6464,72 +6528,20 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6464 /* Calculate CPU power for physical packages and nodes */ 6528 /* Calculate CPU power for physical packages and nodes */
6465#ifdef CONFIG_SCHED_SMT 6529#ifdef CONFIG_SCHED_SMT
6466 for_each_cpu_mask(i, *cpu_map) { 6530 for_each_cpu_mask(i, *cpu_map) {
6467 struct sched_domain *sd;
6468 sd = &per_cpu(cpu_domains, i); 6531 sd = &per_cpu(cpu_domains, i);
6469 sd->groups->cpu_power = SCHED_LOAD_SCALE; 6532 init_sched_groups_power(i, sd);
6470 } 6533 }
6471#endif 6534#endif
6472#ifdef CONFIG_SCHED_MC 6535#ifdef CONFIG_SCHED_MC
6473 for_each_cpu_mask(i, *cpu_map) { 6536 for_each_cpu_mask(i, *cpu_map) {
6474 int power;
6475 struct sched_domain *sd;
6476 sd = &per_cpu(core_domains, i); 6537 sd = &per_cpu(core_domains, i);
6477 if (sched_smt_power_savings) 6538 init_sched_groups_power(i, sd);
6478 power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
6479 else
6480 power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
6481 * SCHED_LOAD_SCALE / 10;
6482 sd->groups->cpu_power = power;
6483 } 6539 }
6484#endif 6540#endif
6485 6541
6486 for_each_cpu_mask(i, *cpu_map) { 6542 for_each_cpu_mask(i, *cpu_map) {
6487 struct sched_domain *sd;
6488#ifdef CONFIG_SCHED_MC
6489 sd = &per_cpu(phys_domains, i); 6543 sd = &per_cpu(phys_domains, i);
6490 if (i != first_cpu(sd->groups->cpumask)) 6544 init_sched_groups_power(i, sd);
6491 continue;
6492
6493 sd->groups->cpu_power = 0;
6494 if (sched_mc_power_savings || sched_smt_power_savings) {
6495 int j;
6496
6497 for_each_cpu_mask(j, sd->groups->cpumask) {
6498 struct sched_domain *sd1;
6499 sd1 = &per_cpu(core_domains, j);
6500 /*
6501 * for each core we will add once
6502 * to the group in physical domain
6503 */
6504 if (j != first_cpu(sd1->groups->cpumask))
6505 continue;
6506
6507 if (sched_smt_power_savings)
6508 sd->groups->cpu_power += sd1->groups->cpu_power;
6509 else
6510 sd->groups->cpu_power += SCHED_LOAD_SCALE;
6511 }
6512 } else
6513 /*
6514 * This has to be < 2 * SCHED_LOAD_SCALE
6515 * Lets keep it SCHED_LOAD_SCALE, so that
6516 * while calculating NUMA group's cpu_power
6517 * we can simply do
6518 * numa_group->cpu_power += phys_group->cpu_power;
6519 *
6520 * See "only add power once for each physical pkg"
6521 * comment below
6522 */
6523 sd->groups->cpu_power = SCHED_LOAD_SCALE;
6524#else
6525 int power;
6526 sd = &per_cpu(phys_domains, i);
6527 if (sched_smt_power_savings)
6528 power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
6529 else
6530 power = SCHED_LOAD_SCALE;
6531 sd->groups->cpu_power = power;
6532#endif
6533 } 6545 }
6534 6546
6535#ifdef CONFIG_NUMA 6547#ifdef CONFIG_NUMA
@@ -6537,7 +6549,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6537 init_numa_sched_groups_power(sched_group_nodes[i]); 6549 init_numa_sched_groups_power(sched_group_nodes[i]);
6538 6550
6539 if (sched_group_allnodes) { 6551 if (sched_group_allnodes) {
6540 int group = cpu_to_allnodes_group(first_cpu(*cpu_map)); 6552 int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map);
6541 struct sched_group *sg = &sched_group_allnodes[group]; 6553 struct sched_group *sg = &sched_group_allnodes[group];
6542 6554
6543 init_numa_sched_groups_power(sg); 6555 init_numa_sched_groups_power(sg);
@@ -6563,9 +6575,11 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6563 6575
6564 return 0; 6576 return 0;
6565 6577
6578#ifdef CONFIG_NUMA
6566error: 6579error:
6567 free_sched_groups(cpu_map); 6580 free_sched_groups(cpu_map);
6568 return -ENOMEM; 6581 return -ENOMEM;
6582#endif
6569} 6583}
6570/* 6584/*
6571 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 6585 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
@@ -6747,11 +6761,20 @@ static int update_sched_domains(struct notifier_block *nfb,
6747 6761
6748void __init sched_init_smp(void) 6762void __init sched_init_smp(void)
6749{ 6763{
6764 cpumask_t non_isolated_cpus;
6765
6750 lock_cpu_hotplug(); 6766 lock_cpu_hotplug();
6751 arch_init_sched_domains(&cpu_online_map); 6767 arch_init_sched_domains(&cpu_online_map);
6768 cpus_andnot(non_isolated_cpus, cpu_online_map, cpu_isolated_map);
6769 if (cpus_empty(non_isolated_cpus))
6770 cpu_set(smp_processor_id(), non_isolated_cpus);
6752 unlock_cpu_hotplug(); 6771 unlock_cpu_hotplug();
6753 /* XXX: Theoretical race here - CPU may be hotplugged now */ 6772 /* XXX: Theoretical race here - CPU may be hotplugged now */
6754 hotcpu_notifier(update_sched_domains, 0); 6773 hotcpu_notifier(update_sched_domains, 0);
6774
6775 /* Move init over to a non-isolated CPU */
6776 if (set_cpus_allowed(current, non_isolated_cpus) < 0)
6777 BUG();
6755} 6778}
6756#else 6779#else
6757void __init sched_init_smp(void) 6780void __init sched_init_smp(void)