diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/dma.c | 10 | ||||
-rw-r--r-- | kernel/kallsyms.c | 123 | ||||
-rw-r--r-- | kernel/module.c | 3 | ||||
-rw-r--r-- | kernel/resource.c | 83 | ||||
-rw-r--r-- | kernel/sched.c | 321 |
5 files changed, 335 insertions, 205 deletions
diff --git a/kernel/dma.c b/kernel/dma.c index aef0a45b78..2020644c93 100644 --- a/kernel/dma.c +++ b/kernel/dma.c | |||
@@ -62,6 +62,11 @@ static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = { | |||
62 | }; | 62 | }; |
63 | 63 | ||
64 | 64 | ||
65 | /** | ||
66 | * request_dma - request and reserve a system DMA channel | ||
67 | * @dmanr: DMA channel number | ||
68 | * @device_id: reserving device ID string, used in /proc/dma | ||
69 | */ | ||
65 | int request_dma(unsigned int dmanr, const char * device_id) | 70 | int request_dma(unsigned int dmanr, const char * device_id) |
66 | { | 71 | { |
67 | if (dmanr >= MAX_DMA_CHANNELS) | 72 | if (dmanr >= MAX_DMA_CHANNELS) |
@@ -76,7 +81,10 @@ int request_dma(unsigned int dmanr, const char * device_id) | |||
76 | return 0; | 81 | return 0; |
77 | } /* request_dma */ | 82 | } /* request_dma */ |
78 | 83 | ||
79 | 84 | /** | |
85 | * free_dma - free a reserved system DMA channel | ||
86 | * @dmanr: DMA channel number | ||
87 | */ | ||
80 | void free_dma(unsigned int dmanr) | 88 | void free_dma(unsigned int dmanr) |
81 | { | 89 | { |
82 | if (dmanr >= MAX_DMA_CHANNELS) { | 90 | if (dmanr >= MAX_DMA_CHANNELS) { |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 342bca62c4..eeac3e313b 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -69,6 +69,15 @@ static inline int is_kernel(unsigned long addr) | |||
69 | return in_gate_area_no_task(addr); | 69 | return in_gate_area_no_task(addr); |
70 | } | 70 | } |
71 | 71 | ||
72 | static int is_ksym_addr(unsigned long addr) | ||
73 | { | ||
74 | if (all_var) | ||
75 | return is_kernel(addr); | ||
76 | |||
77 | return is_kernel_text(addr) || is_kernel_inittext(addr) || | ||
78 | is_kernel_extratext(addr); | ||
79 | } | ||
80 | |||
72 | /* expand a compressed symbol data into the resulting uncompressed string, | 81 | /* expand a compressed symbol data into the resulting uncompressed string, |
73 | given the offset to where the symbol is in the compressed stream */ | 82 | given the offset to where the symbol is in the compressed stream */ |
74 | static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) | 83 | static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) |
@@ -155,6 +164,73 @@ unsigned long kallsyms_lookup_name(const char *name) | |||
155 | return module_kallsyms_lookup_name(name); | 164 | return module_kallsyms_lookup_name(name); |
156 | } | 165 | } |
157 | 166 | ||
167 | static unsigned long get_symbol_pos(unsigned long addr, | ||
168 | unsigned long *symbolsize, | ||
169 | unsigned long *offset) | ||
170 | { | ||
171 | unsigned long symbol_start = 0, symbol_end = 0; | ||
172 | unsigned long i, low, high, mid; | ||
173 | |||
174 | /* This kernel should never had been booted. */ | ||
175 | BUG_ON(!kallsyms_addresses); | ||
176 | |||
177 | /* do a binary search on the sorted kallsyms_addresses array */ | ||
178 | low = 0; | ||
179 | high = kallsyms_num_syms; | ||
180 | |||
181 | while (high - low > 1) { | ||
182 | mid = (low + high) / 2; | ||
183 | if (kallsyms_addresses[mid] <= addr) | ||
184 | low = mid; | ||
185 | else | ||
186 | high = mid; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * search for the first aliased symbol. Aliased | ||
191 | * symbols are symbols with the same address | ||
192 | */ | ||
193 | while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low]) | ||
194 | --low; | ||
195 | |||
196 | symbol_start = kallsyms_addresses[low]; | ||
197 | |||
198 | /* Search for next non-aliased symbol */ | ||
199 | for (i = low + 1; i < kallsyms_num_syms; i++) { | ||
200 | if (kallsyms_addresses[i] > symbol_start) { | ||
201 | symbol_end = kallsyms_addresses[i]; | ||
202 | break; | ||
203 | } | ||
204 | } | ||
205 | |||
206 | /* if we found no next symbol, we use the end of the section */ | ||
207 | if (!symbol_end) { | ||
208 | if (is_kernel_inittext(addr)) | ||
209 | symbol_end = (unsigned long)_einittext; | ||
210 | else if (all_var) | ||
211 | symbol_end = (unsigned long)_end; | ||
212 | else | ||
213 | symbol_end = (unsigned long)_etext; | ||
214 | } | ||
215 | |||
216 | *symbolsize = symbol_end - symbol_start; | ||
217 | *offset = addr - symbol_start; | ||
218 | |||
219 | return low; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Lookup an address but don't bother to find any names. | ||
224 | */ | ||
225 | int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, | ||
226 | unsigned long *offset) | ||
227 | { | ||
228 | if (is_ksym_addr(addr)) | ||
229 | return !!get_symbol_pos(addr, symbolsize, offset); | ||
230 | |||
231 | return !!module_address_lookup(addr, symbolsize, offset, NULL); | ||
232 | } | ||
233 | |||
158 | /* | 234 | /* |
159 | * Lookup an address | 235 | * Lookup an address |
160 | * - modname is set to NULL if it's in the kernel | 236 | * - modname is set to NULL if it's in the kernel |
@@ -167,57 +243,18 @@ const char *kallsyms_lookup(unsigned long addr, | |||
167 | unsigned long *offset, | 243 | unsigned long *offset, |
168 | char **modname, char *namebuf) | 244 | char **modname, char *namebuf) |
169 | { | 245 | { |
170 | unsigned long i, low, high, mid; | ||
171 | const char *msym; | 246 | const char *msym; |
172 | 247 | ||
173 | /* This kernel should never had been booted. */ | ||
174 | BUG_ON(!kallsyms_addresses); | ||
175 | |||
176 | namebuf[KSYM_NAME_LEN] = 0; | 248 | namebuf[KSYM_NAME_LEN] = 0; |
177 | namebuf[0] = 0; | 249 | namebuf[0] = 0; |
178 | 250 | ||
179 | if ((all_var && is_kernel(addr)) || | 251 | if (is_ksym_addr(addr)) { |
180 | (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr) || | 252 | unsigned long pos; |
181 | is_kernel_extratext(addr)))) { | ||
182 | unsigned long symbol_end = 0; | ||
183 | |||
184 | /* do a binary search on the sorted kallsyms_addresses array */ | ||
185 | low = 0; | ||
186 | high = kallsyms_num_syms; | ||
187 | |||
188 | while (high-low > 1) { | ||
189 | mid = (low + high) / 2; | ||
190 | if (kallsyms_addresses[mid] <= addr) low = mid; | ||
191 | else high = mid; | ||
192 | } | ||
193 | |||
194 | /* search for the first aliased symbol. Aliased symbols are | ||
195 | symbols with the same address */ | ||
196 | while (low && kallsyms_addresses[low - 1] == kallsyms_addresses[low]) | ||
197 | --low; | ||
198 | 253 | ||
254 | pos = get_symbol_pos(addr, symbolsize, offset); | ||
199 | /* Grab name */ | 255 | /* Grab name */ |
200 | kallsyms_expand_symbol(get_symbol_offset(low), namebuf); | 256 | kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); |
201 | |||
202 | /* Search for next non-aliased symbol */ | ||
203 | for (i = low + 1; i < kallsyms_num_syms; i++) { | ||
204 | if (kallsyms_addresses[i] > kallsyms_addresses[low]) { | ||
205 | symbol_end = kallsyms_addresses[i]; | ||
206 | break; | ||
207 | } | ||
208 | } | ||
209 | |||
210 | /* if we found no next symbol, we use the end of the section */ | ||
211 | if (!symbol_end) { | ||
212 | if (is_kernel_inittext(addr)) | ||
213 | symbol_end = (unsigned long)_einittext; | ||
214 | else | ||
215 | symbol_end = all_var ? (unsigned long)_end : (unsigned long)_etext; | ||
216 | } | ||
217 | |||
218 | *symbolsize = symbol_end - kallsyms_addresses[low]; | ||
219 | *modname = NULL; | 257 | *modname = NULL; |
220 | *offset = addr - kallsyms_addresses[low]; | ||
221 | return namebuf; | 258 | return namebuf; |
222 | } | 259 | } |
223 | 260 | ||
diff --git a/kernel/module.c b/kernel/module.c index 7c77a0a927..7f60e782de 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2040,7 +2040,8 @@ const char *module_address_lookup(unsigned long addr, | |||
2040 | list_for_each_entry(mod, &modules, list) { | 2040 | list_for_each_entry(mod, &modules, list) { |
2041 | if (within(addr, mod->module_init, mod->init_size) | 2041 | if (within(addr, mod->module_init, mod->init_size) |
2042 | || within(addr, mod->module_core, mod->core_size)) { | 2042 | || within(addr, mod->module_core, mod->core_size)) { |
2043 | *modname = mod->name; | 2043 | if (modname) |
2044 | *modname = mod->name; | ||
2044 | return get_ksymbol(mod, addr, size, offset); | 2045 | return get_ksymbol(mod, addr, size, offset); |
2045 | } | 2046 | } |
2046 | } | 2047 | } |
diff --git a/kernel/resource.c b/kernel/resource.c index 9db38a1a75..6de60c1214 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -193,6 +193,13 @@ static int __release_resource(struct resource *old) | |||
193 | return -EINVAL; | 193 | return -EINVAL; |
194 | } | 194 | } |
195 | 195 | ||
196 | /** | ||
197 | * request_resource - request and reserve an I/O or memory resource | ||
198 | * @root: root resource descriptor | ||
199 | * @new: resource descriptor desired by caller | ||
200 | * | ||
201 | * Returns 0 for success, negative error code on error. | ||
202 | */ | ||
196 | int request_resource(struct resource *root, struct resource *new) | 203 | int request_resource(struct resource *root, struct resource *new) |
197 | { | 204 | { |
198 | struct resource *conflict; | 205 | struct resource *conflict; |
@@ -205,6 +212,15 @@ int request_resource(struct resource *root, struct resource *new) | |||
205 | 212 | ||
206 | EXPORT_SYMBOL(request_resource); | 213 | EXPORT_SYMBOL(request_resource); |
207 | 214 | ||
215 | /** | ||
216 | * ____request_resource - reserve a resource, with resource conflict returned | ||
217 | * @root: root resource descriptor | ||
218 | * @new: resource descriptor desired by caller | ||
219 | * | ||
220 | * Returns: | ||
221 | * On success, NULL is returned. | ||
222 | * On error, a pointer to the conflicting resource is returned. | ||
223 | */ | ||
208 | struct resource *____request_resource(struct resource *root, struct resource *new) | 224 | struct resource *____request_resource(struct resource *root, struct resource *new) |
209 | { | 225 | { |
210 | struct resource *conflict; | 226 | struct resource *conflict; |
@@ -217,6 +233,10 @@ struct resource *____request_resource(struct resource *root, struct resource *ne | |||
217 | 233 | ||
218 | EXPORT_SYMBOL(____request_resource); | 234 | EXPORT_SYMBOL(____request_resource); |
219 | 235 | ||
236 | /** | ||
237 | * release_resource - release a previously reserved resource | ||
238 | * @old: resource pointer | ||
239 | */ | ||
220 | int release_resource(struct resource *old) | 240 | int release_resource(struct resource *old) |
221 | { | 241 | { |
222 | int retval; | 242 | int retval; |
@@ -315,8 +335,16 @@ static int find_resource(struct resource *root, struct resource *new, | |||
315 | return -EBUSY; | 335 | return -EBUSY; |
316 | } | 336 | } |
317 | 337 | ||
318 | /* | 338 | /** |
319 | * Allocate empty slot in the resource tree given range and alignment. | 339 | * allocate_resource - allocate empty slot in the resource tree given range & alignment |
340 | * @root: root resource descriptor | ||
341 | * @new: resource descriptor desired by caller | ||
342 | * @size: requested resource region size | ||
343 | * @min: minimum size to allocate | ||
344 | * @max: maximum size to allocate | ||
345 | * @align: alignment requested, in bytes | ||
346 | * @alignf: alignment function, optional, called if not NULL | ||
347 | * @alignf_data: arbitrary data to pass to the @alignf function | ||
320 | */ | 348 | */ |
321 | int allocate_resource(struct resource *root, struct resource *new, | 349 | int allocate_resource(struct resource *root, struct resource *new, |
322 | resource_size_t size, resource_size_t min, | 350 | resource_size_t size, resource_size_t min, |
@@ -407,10 +435,15 @@ int insert_resource(struct resource *parent, struct resource *new) | |||
407 | return result; | 435 | return result; |
408 | } | 436 | } |
409 | 437 | ||
410 | /* | 438 | /** |
439 | * adjust_resource - modify a resource's start and size | ||
440 | * @res: resource to modify | ||
441 | * @start: new start value | ||
442 | * @size: new size | ||
443 | * | ||
411 | * Given an existing resource, change its start and size to match the | 444 | * Given an existing resource, change its start and size to match the |
412 | * arguments. Returns -EBUSY if it can't fit. Existing children of | 445 | * arguments. Returns 0 on success, -EBUSY if it can't fit. |
413 | * the resource are assumed to be immutable. | 446 | * Existing children of the resource are assumed to be immutable. |
414 | */ | 447 | */ |
415 | int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) | 448 | int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) |
416 | { | 449 | { |
@@ -456,11 +489,19 @@ EXPORT_SYMBOL(adjust_resource); | |||
456 | * Note how this, unlike the above, knows about | 489 | * Note how this, unlike the above, knows about |
457 | * the IO flag meanings (busy etc). | 490 | * the IO flag meanings (busy etc). |
458 | * | 491 | * |
459 | * Request-region creates a new busy region. | 492 | * request_region creates a new busy region. |
460 | * | 493 | * |
461 | * Check-region returns non-zero if the area is already busy | 494 | * check_region returns non-zero if the area is already busy. |
462 | * | 495 | * |
463 | * Release-region releases a matching busy region. | 496 | * release_region releases a matching busy region. |
497 | */ | ||
498 | |||
499 | /** | ||
500 | * __request_region - create a new busy resource region | ||
501 | * @parent: parent resource descriptor | ||
502 | * @start: resource start address | ||
503 | * @n: resource region size | ||
504 | * @name: reserving caller's ID string | ||
464 | */ | 505 | */ |
465 | struct resource * __request_region(struct resource *parent, | 506 | struct resource * __request_region(struct resource *parent, |
466 | resource_size_t start, resource_size_t n, | 507 | resource_size_t start, resource_size_t n, |
@@ -497,9 +538,23 @@ struct resource * __request_region(struct resource *parent, | |||
497 | } | 538 | } |
498 | return res; | 539 | return res; |
499 | } | 540 | } |
500 | |||
501 | EXPORT_SYMBOL(__request_region); | 541 | EXPORT_SYMBOL(__request_region); |
502 | 542 | ||
543 | /** | ||
544 | * __check_region - check if a resource region is busy or free | ||
545 | * @parent: parent resource descriptor | ||
546 | * @start: resource start address | ||
547 | * @n: resource region size | ||
548 | * | ||
549 | * Returns 0 if the region is free at the moment it is checked, | ||
550 | * returns %-EBUSY if the region is busy. | ||
551 | * | ||
552 | * NOTE: | ||
553 | * This function is deprecated because its use is racy. | ||
554 | * Even if it returns 0, a subsequent call to request_region() | ||
555 | * may fail because another driver etc. just allocated the region. | ||
556 | * Do NOT use it. It will be removed from the kernel. | ||
557 | */ | ||
503 | int __check_region(struct resource *parent, resource_size_t start, | 558 | int __check_region(struct resource *parent, resource_size_t start, |
504 | resource_size_t n) | 559 | resource_size_t n) |
505 | { | 560 | { |
@@ -513,9 +568,16 @@ int __check_region(struct resource *parent, resource_size_t start, | |||
513 | kfree(res); | 568 | kfree(res); |
514 | return 0; | 569 | return 0; |
515 | } | 570 | } |
516 | |||
517 | EXPORT_SYMBOL(__check_region); | 571 | EXPORT_SYMBOL(__check_region); |
518 | 572 | ||
573 | /** | ||
574 | * __release_region - release a previously reserved resource region | ||
575 | * @parent: parent resource descriptor | ||
576 | * @start: resource start address | ||
577 | * @n: resource region size | ||
578 | * | ||
579 | * The described resource region must match a currently busy region. | ||
580 | */ | ||
519 | void __release_region(struct resource *parent, resource_size_t start, | 581 | void __release_region(struct resource *parent, resource_size_t start, |
520 | resource_size_t n) | 582 | resource_size_t n) |
521 | { | 583 | { |
@@ -553,7 +615,6 @@ void __release_region(struct resource *parent, resource_size_t start, | |||
553 | "<%016llx-%016llx>\n", (unsigned long long)start, | 615 | "<%016llx-%016llx>\n", (unsigned long long)start, |
554 | (unsigned long long)end); | 616 | (unsigned long long)end); |
555 | } | 617 | } |
556 | |||
557 | EXPORT_SYMBOL(__release_region); | 618 | EXPORT_SYMBOL(__release_region); |
558 | 619 | ||
559 | /* | 620 | /* |
diff --git a/kernel/sched.c b/kernel/sched.c index e4e54e86f4..53608a59d6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1232,7 +1232,7 @@ nextgroup: | |||
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | /* | 1234 | /* |
1235 | * find_idlest_queue - find the idlest runqueue among the cpus in group. | 1235 | * find_idlest_cpu - find the idlest cpu among the cpus in group. |
1236 | */ | 1236 | */ |
1237 | static int | 1237 | static int |
1238 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | 1238 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) |
@@ -1286,21 +1286,29 @@ static int sched_balance_self(int cpu, int flag) | |||
1286 | while (sd) { | 1286 | while (sd) { |
1287 | cpumask_t span; | 1287 | cpumask_t span; |
1288 | struct sched_group *group; | 1288 | struct sched_group *group; |
1289 | int new_cpu; | 1289 | int new_cpu, weight; |
1290 | int weight; | 1290 | |
1291 | if (!(sd->flags & flag)) { | ||
1292 | sd = sd->child; | ||
1293 | continue; | ||
1294 | } | ||
1291 | 1295 | ||
1292 | span = sd->span; | 1296 | span = sd->span; |
1293 | group = find_idlest_group(sd, t, cpu); | 1297 | group = find_idlest_group(sd, t, cpu); |
1294 | if (!group) | 1298 | if (!group) { |
1295 | goto nextlevel; | 1299 | sd = sd->child; |
1300 | continue; | ||
1301 | } | ||
1296 | 1302 | ||
1297 | new_cpu = find_idlest_cpu(group, t, cpu); | 1303 | new_cpu = find_idlest_cpu(group, t, cpu); |
1298 | if (new_cpu == -1 || new_cpu == cpu) | 1304 | if (new_cpu == -1 || new_cpu == cpu) { |
1299 | goto nextlevel; | 1305 | /* Now try balancing at a lower domain level of cpu */ |
1306 | sd = sd->child; | ||
1307 | continue; | ||
1308 | } | ||
1300 | 1309 | ||
1301 | /* Now try balancing at a lower domain level */ | 1310 | /* Now try balancing at a lower domain level of new_cpu */ |
1302 | cpu = new_cpu; | 1311 | cpu = new_cpu; |
1303 | nextlevel: | ||
1304 | sd = NULL; | 1312 | sd = NULL; |
1305 | weight = cpus_weight(span); | 1313 | weight = cpus_weight(span); |
1306 | for_each_domain(cpu, tmp) { | 1314 | for_each_domain(cpu, tmp) { |
@@ -2533,8 +2541,14 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
2533 | struct rq *busiest; | 2541 | struct rq *busiest; |
2534 | cpumask_t cpus = CPU_MASK_ALL; | 2542 | cpumask_t cpus = CPU_MASK_ALL; |
2535 | 2543 | ||
2544 | /* | ||
2545 | * When power savings policy is enabled for the parent domain, idle | ||
2546 | * sibling can pick up load irrespective of busy siblings. In this case, | ||
2547 | * let the state of idle sibling percolate up as IDLE, instead of | ||
2548 | * portraying it as NOT_IDLE. | ||
2549 | */ | ||
2536 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && | 2550 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && |
2537 | !sched_smt_power_savings) | 2551 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2538 | sd_idle = 1; | 2552 | sd_idle = 1; |
2539 | 2553 | ||
2540 | schedstat_inc(sd, lb_cnt[idle]); | 2554 | schedstat_inc(sd, lb_cnt[idle]); |
@@ -2630,7 +2644,7 @@ redo: | |||
2630 | } | 2644 | } |
2631 | 2645 | ||
2632 | if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2646 | if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2633 | !sched_smt_power_savings) | 2647 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2634 | return -1; | 2648 | return -1; |
2635 | return nr_moved; | 2649 | return nr_moved; |
2636 | 2650 | ||
@@ -2646,7 +2660,7 @@ out_one_pinned: | |||
2646 | sd->balance_interval *= 2; | 2660 | sd->balance_interval *= 2; |
2647 | 2661 | ||
2648 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2662 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2649 | !sched_smt_power_savings) | 2663 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2650 | return -1; | 2664 | return -1; |
2651 | return 0; | 2665 | return 0; |
2652 | } | 2666 | } |
@@ -2668,7 +2682,14 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
2668 | int sd_idle = 0; | 2682 | int sd_idle = 0; |
2669 | cpumask_t cpus = CPU_MASK_ALL; | 2683 | cpumask_t cpus = CPU_MASK_ALL; |
2670 | 2684 | ||
2671 | if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) | 2685 | /* |
2686 | * When power savings policy is enabled for the parent domain, idle | ||
2687 | * sibling can pick up load irrespective of busy siblings. In this case, | ||
2688 | * let the state of idle sibling percolate up as IDLE, instead of | ||
2689 | * portraying it as NOT_IDLE. | ||
2690 | */ | ||
2691 | if (sd->flags & SD_SHARE_CPUPOWER && | ||
2692 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
2672 | sd_idle = 1; | 2693 | sd_idle = 1; |
2673 | 2694 | ||
2674 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2695 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
@@ -2709,7 +2730,8 @@ redo: | |||
2709 | 2730 | ||
2710 | if (!nr_moved) { | 2731 | if (!nr_moved) { |
2711 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); | 2732 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); |
2712 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | 2733 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2734 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
2713 | return -1; | 2735 | return -1; |
2714 | } else | 2736 | } else |
2715 | sd->nr_balance_failed = 0; | 2737 | sd->nr_balance_failed = 0; |
@@ -2719,7 +2741,7 @@ redo: | |||
2719 | out_balanced: | 2741 | out_balanced: |
2720 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); | 2742 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); |
2721 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2743 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2722 | !sched_smt_power_savings) | 2744 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2723 | return -1; | 2745 | return -1; |
2724 | sd->nr_balance_failed = 0; | 2746 | sd->nr_balance_failed = 0; |
2725 | 2747 | ||
@@ -4817,7 +4839,7 @@ void show_state(void) | |||
4817 | * NOTE: this function does not set the idle thread's NEED_RESCHED | 4839 | * NOTE: this function does not set the idle thread's NEED_RESCHED |
4818 | * flag, to make booting more robust. | 4840 | * flag, to make booting more robust. |
4819 | */ | 4841 | */ |
4820 | void __devinit init_idle(struct task_struct *idle, int cpu) | 4842 | void __cpuinit init_idle(struct task_struct *idle, int cpu) |
4821 | { | 4843 | { |
4822 | struct rq *rq = cpu_rq(cpu); | 4844 | struct rq *rq = cpu_rq(cpu); |
4823 | unsigned long flags; | 4845 | unsigned long flags; |
@@ -5392,7 +5414,9 @@ static int sd_degenerate(struct sched_domain *sd) | |||
5392 | if (sd->flags & (SD_LOAD_BALANCE | | 5414 | if (sd->flags & (SD_LOAD_BALANCE | |
5393 | SD_BALANCE_NEWIDLE | | 5415 | SD_BALANCE_NEWIDLE | |
5394 | SD_BALANCE_FORK | | 5416 | SD_BALANCE_FORK | |
5395 | SD_BALANCE_EXEC)) { | 5417 | SD_BALANCE_EXEC | |
5418 | SD_SHARE_CPUPOWER | | ||
5419 | SD_SHARE_PKG_RESOURCES)) { | ||
5396 | if (sd->groups != sd->groups->next) | 5420 | if (sd->groups != sd->groups->next) |
5397 | return 0; | 5421 | return 0; |
5398 | } | 5422 | } |
@@ -5426,7 +5450,9 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
5426 | pflags &= ~(SD_LOAD_BALANCE | | 5450 | pflags &= ~(SD_LOAD_BALANCE | |
5427 | SD_BALANCE_NEWIDLE | | 5451 | SD_BALANCE_NEWIDLE | |
5428 | SD_BALANCE_FORK | | 5452 | SD_BALANCE_FORK | |
5429 | SD_BALANCE_EXEC); | 5453 | SD_BALANCE_EXEC | |
5454 | SD_SHARE_CPUPOWER | | ||
5455 | SD_SHARE_PKG_RESOURCES); | ||
5430 | } | 5456 | } |
5431 | if (~cflags & pflags) | 5457 | if (~cflags & pflags) |
5432 | return 0; | 5458 | return 0; |
@@ -5448,12 +5474,18 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu) | |||
5448 | struct sched_domain *parent = tmp->parent; | 5474 | struct sched_domain *parent = tmp->parent; |
5449 | if (!parent) | 5475 | if (!parent) |
5450 | break; | 5476 | break; |
5451 | if (sd_parent_degenerate(tmp, parent)) | 5477 | if (sd_parent_degenerate(tmp, parent)) { |
5452 | tmp->parent = parent->parent; | 5478 | tmp->parent = parent->parent; |
5479 | if (parent->parent) | ||
5480 | parent->parent->child = tmp; | ||
5481 | } | ||
5453 | } | 5482 | } |
5454 | 5483 | ||
5455 | if (sd && sd_degenerate(sd)) | 5484 | if (sd && sd_degenerate(sd)) { |
5456 | sd = sd->parent; | 5485 | sd = sd->parent; |
5486 | if (sd) | ||
5487 | sd->child = NULL; | ||
5488 | } | ||
5457 | 5489 | ||
5458 | sched_domain_debug(sd, cpu); | 5490 | sched_domain_debug(sd, cpu); |
5459 | 5491 | ||
@@ -5461,7 +5493,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu) | |||
5461 | } | 5493 | } |
5462 | 5494 | ||
5463 | /* cpus with isolated domains */ | 5495 | /* cpus with isolated domains */ |
5464 | static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; | 5496 | static cpumask_t __cpuinitdata cpu_isolated_map = CPU_MASK_NONE; |
5465 | 5497 | ||
5466 | /* Setup the mask of cpus configured for isolated domains */ | 5498 | /* Setup the mask of cpus configured for isolated domains */ |
5467 | static int __init isolated_cpu_setup(char *str) | 5499 | static int __init isolated_cpu_setup(char *str) |
@@ -5489,15 +5521,17 @@ __setup ("isolcpus=", isolated_cpu_setup); | |||
5489 | * covered by the given span, and will set each group's ->cpumask correctly, | 5521 | * covered by the given span, and will set each group's ->cpumask correctly, |
5490 | * and ->cpu_power to 0. | 5522 | * and ->cpu_power to 0. |
5491 | */ | 5523 | */ |
5492 | static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, | 5524 | static void |
5493 | int (*group_fn)(int cpu)) | 5525 | init_sched_build_groups(struct sched_group groups[], cpumask_t span, |
5526 | const cpumask_t *cpu_map, | ||
5527 | int (*group_fn)(int cpu, const cpumask_t *cpu_map)) | ||
5494 | { | 5528 | { |
5495 | struct sched_group *first = NULL, *last = NULL; | 5529 | struct sched_group *first = NULL, *last = NULL; |
5496 | cpumask_t covered = CPU_MASK_NONE; | 5530 | cpumask_t covered = CPU_MASK_NONE; |
5497 | int i; | 5531 | int i; |
5498 | 5532 | ||
5499 | for_each_cpu_mask(i, span) { | 5533 | for_each_cpu_mask(i, span) { |
5500 | int group = group_fn(i); | 5534 | int group = group_fn(i, cpu_map); |
5501 | struct sched_group *sg = &groups[group]; | 5535 | struct sched_group *sg = &groups[group]; |
5502 | int j; | 5536 | int j; |
5503 | 5537 | ||
@@ -5508,7 +5542,7 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, | |||
5508 | sg->cpu_power = 0; | 5542 | sg->cpu_power = 0; |
5509 | 5543 | ||
5510 | for_each_cpu_mask(j, span) { | 5544 | for_each_cpu_mask(j, span) { |
5511 | if (group_fn(j) != group) | 5545 | if (group_fn(j, cpu_map) != group) |
5512 | continue; | 5546 | continue; |
5513 | 5547 | ||
5514 | cpu_set(j, covered); | 5548 | cpu_set(j, covered); |
@@ -5975,13 +6009,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map) | |||
5975 | #endif | 6009 | #endif |
5976 | ); | 6010 | ); |
5977 | if (system_state == SYSTEM_BOOTING) { | 6011 | if (system_state == SYSTEM_BOOTING) { |
5978 | printk("migration_cost="); | 6012 | if (num_online_cpus() > 1) { |
5979 | for (distance = 0; distance <= max_distance; distance++) { | 6013 | printk("migration_cost="); |
5980 | if (distance) | 6014 | for (distance = 0; distance <= max_distance; distance++) { |
5981 | printk(","); | 6015 | if (distance) |
5982 | printk("%ld", (long)migration_cost[distance] / 1000); | 6016 | printk(","); |
6017 | printk("%ld", (long)migration_cost[distance] / 1000); | ||
6018 | } | ||
6019 | printk("\n"); | ||
5983 | } | 6020 | } |
5984 | printk("\n"); | ||
5985 | } | 6021 | } |
5986 | j1 = jiffies; | 6022 | j1 = jiffies; |
5987 | if (migration_debug) | 6023 | if (migration_debug) |
@@ -6084,7 +6120,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
6084 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | 6120 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); |
6085 | static struct sched_group sched_group_cpus[NR_CPUS]; | 6121 | static struct sched_group sched_group_cpus[NR_CPUS]; |
6086 | 6122 | ||
6087 | static int cpu_to_cpu_group(int cpu) | 6123 | static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map) |
6088 | { | 6124 | { |
6089 | return cpu; | 6125 | return cpu; |
6090 | } | 6126 | } |
@@ -6095,31 +6131,36 @@ static int cpu_to_cpu_group(int cpu) | |||
6095 | */ | 6131 | */ |
6096 | #ifdef CONFIG_SCHED_MC | 6132 | #ifdef CONFIG_SCHED_MC |
6097 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | 6133 | static DEFINE_PER_CPU(struct sched_domain, core_domains); |
6098 | static struct sched_group *sched_group_core_bycpu[NR_CPUS]; | 6134 | static struct sched_group sched_group_core[NR_CPUS]; |
6099 | #endif | 6135 | #endif |
6100 | 6136 | ||
6101 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | 6137 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) |
6102 | static int cpu_to_core_group(int cpu) | 6138 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) |
6103 | { | 6139 | { |
6104 | return first_cpu(cpu_sibling_map[cpu]); | 6140 | cpumask_t mask = cpu_sibling_map[cpu]; |
6141 | cpus_and(mask, mask, *cpu_map); | ||
6142 | return first_cpu(mask); | ||
6105 | } | 6143 | } |
6106 | #elif defined(CONFIG_SCHED_MC) | 6144 | #elif defined(CONFIG_SCHED_MC) |
6107 | static int cpu_to_core_group(int cpu) | 6145 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) |
6108 | { | 6146 | { |
6109 | return cpu; | 6147 | return cpu; |
6110 | } | 6148 | } |
6111 | #endif | 6149 | #endif |
6112 | 6150 | ||
6113 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 6151 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); |
6114 | static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; | 6152 | static struct sched_group sched_group_phys[NR_CPUS]; |
6115 | 6153 | ||
6116 | static int cpu_to_phys_group(int cpu) | 6154 | static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map) |
6117 | { | 6155 | { |
6118 | #ifdef CONFIG_SCHED_MC | 6156 | #ifdef CONFIG_SCHED_MC |
6119 | cpumask_t mask = cpu_coregroup_map(cpu); | 6157 | cpumask_t mask = cpu_coregroup_map(cpu); |
6158 | cpus_and(mask, mask, *cpu_map); | ||
6120 | return first_cpu(mask); | 6159 | return first_cpu(mask); |
6121 | #elif defined(CONFIG_SCHED_SMT) | 6160 | #elif defined(CONFIG_SCHED_SMT) |
6122 | return first_cpu(cpu_sibling_map[cpu]); | 6161 | cpumask_t mask = cpu_sibling_map[cpu]; |
6162 | cpus_and(mask, mask, *cpu_map); | ||
6163 | return first_cpu(mask); | ||
6123 | #else | 6164 | #else |
6124 | return cpu; | 6165 | return cpu; |
6125 | #endif | 6166 | #endif |
@@ -6137,7 +6178,7 @@ static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; | |||
6137 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); | 6178 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); |
6138 | static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; | 6179 | static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; |
6139 | 6180 | ||
6140 | static int cpu_to_allnodes_group(int cpu) | 6181 | static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map) |
6141 | { | 6182 | { |
6142 | return cpu_to_node(cpu); | 6183 | return cpu_to_node(cpu); |
6143 | } | 6184 | } |
@@ -6169,12 +6210,11 @@ next_sg: | |||
6169 | } | 6210 | } |
6170 | #endif | 6211 | #endif |
6171 | 6212 | ||
6213 | #ifdef CONFIG_NUMA | ||
6172 | /* Free memory allocated for various sched_group structures */ | 6214 | /* Free memory allocated for various sched_group structures */ |
6173 | static void free_sched_groups(const cpumask_t *cpu_map) | 6215 | static void free_sched_groups(const cpumask_t *cpu_map) |
6174 | { | 6216 | { |
6175 | int cpu; | 6217 | int cpu, i; |
6176 | #ifdef CONFIG_NUMA | ||
6177 | int i; | ||
6178 | 6218 | ||
6179 | for_each_cpu_mask(cpu, *cpu_map) { | 6219 | for_each_cpu_mask(cpu, *cpu_map) { |
6180 | struct sched_group *sched_group_allnodes | 6220 | struct sched_group *sched_group_allnodes |
@@ -6211,19 +6251,63 @@ next_sg: | |||
6211 | kfree(sched_group_nodes); | 6251 | kfree(sched_group_nodes); |
6212 | sched_group_nodes_bycpu[cpu] = NULL; | 6252 | sched_group_nodes_bycpu[cpu] = NULL; |
6213 | } | 6253 | } |
6254 | } | ||
6255 | #else | ||
6256 | static void free_sched_groups(const cpumask_t *cpu_map) | ||
6257 | { | ||
6258 | } | ||
6214 | #endif | 6259 | #endif |
6215 | for_each_cpu_mask(cpu, *cpu_map) { | 6260 | |
6216 | if (sched_group_phys_bycpu[cpu]) { | 6261 | /* |
6217 | kfree(sched_group_phys_bycpu[cpu]); | 6262 | * Initialize sched groups cpu_power. |
6218 | sched_group_phys_bycpu[cpu] = NULL; | 6263 | * |
6219 | } | 6264 | * cpu_power indicates the capacity of sched group, which is used while |
6220 | #ifdef CONFIG_SCHED_MC | 6265 | * distributing the load between different sched groups in a sched domain. |
6221 | if (sched_group_core_bycpu[cpu]) { | 6266 | * Typically cpu_power for all the groups in a sched domain will be same unless |
6222 | kfree(sched_group_core_bycpu[cpu]); | 6267 | * there are asymmetries in the topology. If there are asymmetries, group |
6223 | sched_group_core_bycpu[cpu] = NULL; | 6268 | * having more cpu_power will pickup more load compared to the group having |
6224 | } | 6269 | * less cpu_power. |
6225 | #endif | 6270 | * |
6271 | * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents | ||
6272 | * the maximum number of tasks a group can handle in the presence of other idle | ||
6273 | * or lightly loaded groups in the same sched domain. | ||
6274 | */ | ||
6275 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) | ||
6276 | { | ||
6277 | struct sched_domain *child; | ||
6278 | struct sched_group *group; | ||
6279 | |||
6280 | WARN_ON(!sd || !sd->groups); | ||
6281 | |||
6282 | if (cpu != first_cpu(sd->groups->cpumask)) | ||
6283 | return; | ||
6284 | |||
6285 | child = sd->child; | ||
6286 | |||
6287 | /* | ||
6288 | * For perf policy, if the groups in child domain share resources | ||
6289 | * (for example cores sharing some portions of the cache hierarchy | ||
6290 | * or SMT), then set this domain groups cpu_power such that each group | ||
6291 | * can handle only one task, when there are other idle groups in the | ||
6292 | * same sched domain. | ||
6293 | */ | ||
6294 | if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) && | ||
6295 | (child->flags & | ||
6296 | (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) { | ||
6297 | sd->groups->cpu_power = SCHED_LOAD_SCALE; | ||
6298 | return; | ||
6226 | } | 6299 | } |
6300 | |||
6301 | sd->groups->cpu_power = 0; | ||
6302 | |||
6303 | /* | ||
6304 | * add cpu_power of each child group to this groups cpu_power | ||
6305 | */ | ||
6306 | group = child->groups; | ||
6307 | do { | ||
6308 | sd->groups->cpu_power += group->cpu_power; | ||
6309 | group = group->next; | ||
6310 | } while (group != child->groups); | ||
6227 | } | 6311 | } |
6228 | 6312 | ||
6229 | /* | 6313 | /* |
@@ -6233,10 +6317,7 @@ next_sg: | |||
6233 | static int build_sched_domains(const cpumask_t *cpu_map) | 6317 | static int build_sched_domains(const cpumask_t *cpu_map) |
6234 | { | 6318 | { |
6235 | int i; | 6319 | int i; |
6236 | struct sched_group *sched_group_phys = NULL; | 6320 | struct sched_domain *sd; |
6237 | #ifdef CONFIG_SCHED_MC | ||
6238 | struct sched_group *sched_group_core = NULL; | ||
6239 | #endif | ||
6240 | #ifdef CONFIG_NUMA | 6321 | #ifdef CONFIG_NUMA |
6241 | struct sched_group **sched_group_nodes = NULL; | 6322 | struct sched_group **sched_group_nodes = NULL; |
6242 | struct sched_group *sched_group_allnodes = NULL; | 6323 | struct sched_group *sched_group_allnodes = NULL; |
@@ -6268,9 +6349,10 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6268 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { | 6349 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { |
6269 | if (!sched_group_allnodes) { | 6350 | if (!sched_group_allnodes) { |
6270 | sched_group_allnodes | 6351 | sched_group_allnodes |
6271 | = kmalloc(sizeof(struct sched_group) | 6352 | = kmalloc_node(sizeof(struct sched_group) |
6272 | * MAX_NUMNODES, | 6353 | * MAX_NUMNODES, |
6273 | GFP_KERNEL); | 6354 | GFP_KERNEL, |
6355 | cpu_to_node(i)); | ||
6274 | if (!sched_group_allnodes) { | 6356 | if (!sched_group_allnodes) { |
6275 | printk(KERN_WARNING | 6357 | printk(KERN_WARNING |
6276 | "Can not alloc allnodes sched group\n"); | 6358 | "Can not alloc allnodes sched group\n"); |
@@ -6282,7 +6364,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6282 | sd = &per_cpu(allnodes_domains, i); | 6364 | sd = &per_cpu(allnodes_domains, i); |
6283 | *sd = SD_ALLNODES_INIT; | 6365 | *sd = SD_ALLNODES_INIT; |
6284 | sd->span = *cpu_map; | 6366 | sd->span = *cpu_map; |
6285 | group = cpu_to_allnodes_group(i); | 6367 | group = cpu_to_allnodes_group(i, cpu_map); |
6286 | sd->groups = &sched_group_allnodes[group]; | 6368 | sd->groups = &sched_group_allnodes[group]; |
6287 | p = sd; | 6369 | p = sd; |
6288 | } else | 6370 | } else |
@@ -6292,60 +6374,42 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6292 | *sd = SD_NODE_INIT; | 6374 | *sd = SD_NODE_INIT; |
6293 | sd->span = sched_domain_node_span(cpu_to_node(i)); | 6375 | sd->span = sched_domain_node_span(cpu_to_node(i)); |
6294 | sd->parent = p; | 6376 | sd->parent = p; |
6377 | if (p) | ||
6378 | p->child = sd; | ||
6295 | cpus_and(sd->span, sd->span, *cpu_map); | 6379 | cpus_and(sd->span, sd->span, *cpu_map); |
6296 | #endif | 6380 | #endif |
6297 | 6381 | ||
6298 | if (!sched_group_phys) { | ||
6299 | sched_group_phys | ||
6300 | = kmalloc(sizeof(struct sched_group) * NR_CPUS, | ||
6301 | GFP_KERNEL); | ||
6302 | if (!sched_group_phys) { | ||
6303 | printk (KERN_WARNING "Can not alloc phys sched" | ||
6304 | "group\n"); | ||
6305 | goto error; | ||
6306 | } | ||
6307 | sched_group_phys_bycpu[i] = sched_group_phys; | ||
6308 | } | ||
6309 | |||
6310 | p = sd; | 6382 | p = sd; |
6311 | sd = &per_cpu(phys_domains, i); | 6383 | sd = &per_cpu(phys_domains, i); |
6312 | group = cpu_to_phys_group(i); | 6384 | group = cpu_to_phys_group(i, cpu_map); |
6313 | *sd = SD_CPU_INIT; | 6385 | *sd = SD_CPU_INIT; |
6314 | sd->span = nodemask; | 6386 | sd->span = nodemask; |
6315 | sd->parent = p; | 6387 | sd->parent = p; |
6388 | if (p) | ||
6389 | p->child = sd; | ||
6316 | sd->groups = &sched_group_phys[group]; | 6390 | sd->groups = &sched_group_phys[group]; |
6317 | 6391 | ||
6318 | #ifdef CONFIG_SCHED_MC | 6392 | #ifdef CONFIG_SCHED_MC |
6319 | if (!sched_group_core) { | ||
6320 | sched_group_core | ||
6321 | = kmalloc(sizeof(struct sched_group) * NR_CPUS, | ||
6322 | GFP_KERNEL); | ||
6323 | if (!sched_group_core) { | ||
6324 | printk (KERN_WARNING "Can not alloc core sched" | ||
6325 | "group\n"); | ||
6326 | goto error; | ||
6327 | } | ||
6328 | sched_group_core_bycpu[i] = sched_group_core; | ||
6329 | } | ||
6330 | |||
6331 | p = sd; | 6393 | p = sd; |
6332 | sd = &per_cpu(core_domains, i); | 6394 | sd = &per_cpu(core_domains, i); |
6333 | group = cpu_to_core_group(i); | 6395 | group = cpu_to_core_group(i, cpu_map); |
6334 | *sd = SD_MC_INIT; | 6396 | *sd = SD_MC_INIT; |
6335 | sd->span = cpu_coregroup_map(i); | 6397 | sd->span = cpu_coregroup_map(i); |
6336 | cpus_and(sd->span, sd->span, *cpu_map); | 6398 | cpus_and(sd->span, sd->span, *cpu_map); |
6337 | sd->parent = p; | 6399 | sd->parent = p; |
6400 | p->child = sd; | ||
6338 | sd->groups = &sched_group_core[group]; | 6401 | sd->groups = &sched_group_core[group]; |
6339 | #endif | 6402 | #endif |
6340 | 6403 | ||
6341 | #ifdef CONFIG_SCHED_SMT | 6404 | #ifdef CONFIG_SCHED_SMT |
6342 | p = sd; | 6405 | p = sd; |
6343 | sd = &per_cpu(cpu_domains, i); | 6406 | sd = &per_cpu(cpu_domains, i); |
6344 | group = cpu_to_cpu_group(i); | 6407 | group = cpu_to_cpu_group(i, cpu_map); |
6345 | *sd = SD_SIBLING_INIT; | 6408 | *sd = SD_SIBLING_INIT; |
6346 | sd->span = cpu_sibling_map[i]; | 6409 | sd->span = cpu_sibling_map[i]; |
6347 | cpus_and(sd->span, sd->span, *cpu_map); | 6410 | cpus_and(sd->span, sd->span, *cpu_map); |
6348 | sd->parent = p; | 6411 | sd->parent = p; |
6412 | p->child = sd; | ||
6349 | sd->groups = &sched_group_cpus[group]; | 6413 | sd->groups = &sched_group_cpus[group]; |
6350 | #endif | 6414 | #endif |
6351 | } | 6415 | } |
@@ -6359,7 +6423,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6359 | continue; | 6423 | continue; |
6360 | 6424 | ||
6361 | init_sched_build_groups(sched_group_cpus, this_sibling_map, | 6425 | init_sched_build_groups(sched_group_cpus, this_sibling_map, |
6362 | &cpu_to_cpu_group); | 6426 | cpu_map, &cpu_to_cpu_group); |
6363 | } | 6427 | } |
6364 | #endif | 6428 | #endif |
6365 | 6429 | ||
@@ -6371,7 +6435,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6371 | if (i != first_cpu(this_core_map)) | 6435 | if (i != first_cpu(this_core_map)) |
6372 | continue; | 6436 | continue; |
6373 | init_sched_build_groups(sched_group_core, this_core_map, | 6437 | init_sched_build_groups(sched_group_core, this_core_map, |
6374 | &cpu_to_core_group); | 6438 | cpu_map, &cpu_to_core_group); |
6375 | } | 6439 | } |
6376 | #endif | 6440 | #endif |
6377 | 6441 | ||
@@ -6385,14 +6449,14 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6385 | continue; | 6449 | continue; |
6386 | 6450 | ||
6387 | init_sched_build_groups(sched_group_phys, nodemask, | 6451 | init_sched_build_groups(sched_group_phys, nodemask, |
6388 | &cpu_to_phys_group); | 6452 | cpu_map, &cpu_to_phys_group); |
6389 | } | 6453 | } |
6390 | 6454 | ||
6391 | #ifdef CONFIG_NUMA | 6455 | #ifdef CONFIG_NUMA |
6392 | /* Set up node groups */ | 6456 | /* Set up node groups */ |
6393 | if (sched_group_allnodes) | 6457 | if (sched_group_allnodes) |
6394 | init_sched_build_groups(sched_group_allnodes, *cpu_map, | 6458 | init_sched_build_groups(sched_group_allnodes, *cpu_map, |
6395 | &cpu_to_allnodes_group); | 6459 | cpu_map, &cpu_to_allnodes_group); |
6396 | 6460 | ||
6397 | for (i = 0; i < MAX_NUMNODES; i++) { | 6461 | for (i = 0; i < MAX_NUMNODES; i++) { |
6398 | /* Set up node groups */ | 6462 | /* Set up node groups */ |
@@ -6464,72 +6528,20 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6464 | /* Calculate CPU power for physical packages and nodes */ | 6528 | /* Calculate CPU power for physical packages and nodes */ |
6465 | #ifdef CONFIG_SCHED_SMT | 6529 | #ifdef CONFIG_SCHED_SMT |
6466 | for_each_cpu_mask(i, *cpu_map) { | 6530 | for_each_cpu_mask(i, *cpu_map) { |
6467 | struct sched_domain *sd; | ||
6468 | sd = &per_cpu(cpu_domains, i); | 6531 | sd = &per_cpu(cpu_domains, i); |
6469 | sd->groups->cpu_power = SCHED_LOAD_SCALE; | 6532 | init_sched_groups_power(i, sd); |
6470 | } | 6533 | } |
6471 | #endif | 6534 | #endif |
6472 | #ifdef CONFIG_SCHED_MC | 6535 | #ifdef CONFIG_SCHED_MC |
6473 | for_each_cpu_mask(i, *cpu_map) { | 6536 | for_each_cpu_mask(i, *cpu_map) { |
6474 | int power; | ||
6475 | struct sched_domain *sd; | ||
6476 | sd = &per_cpu(core_domains, i); | 6537 | sd = &per_cpu(core_domains, i); |
6477 | if (sched_smt_power_savings) | 6538 | init_sched_groups_power(i, sd); |
6478 | power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask); | ||
6479 | else | ||
6480 | power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) | ||
6481 | * SCHED_LOAD_SCALE / 10; | ||
6482 | sd->groups->cpu_power = power; | ||
6483 | } | 6539 | } |
6484 | #endif | 6540 | #endif |
6485 | 6541 | ||
6486 | for_each_cpu_mask(i, *cpu_map) { | 6542 | for_each_cpu_mask(i, *cpu_map) { |
6487 | struct sched_domain *sd; | ||
6488 | #ifdef CONFIG_SCHED_MC | ||
6489 | sd = &per_cpu(phys_domains, i); | 6543 | sd = &per_cpu(phys_domains, i); |
6490 | if (i != first_cpu(sd->groups->cpumask)) | 6544 | init_sched_groups_power(i, sd); |
6491 | continue; | ||
6492 | |||
6493 | sd->groups->cpu_power = 0; | ||
6494 | if (sched_mc_power_savings || sched_smt_power_savings) { | ||
6495 | int j; | ||
6496 | |||
6497 | for_each_cpu_mask(j, sd->groups->cpumask) { | ||
6498 | struct sched_domain *sd1; | ||
6499 | sd1 = &per_cpu(core_domains, j); | ||
6500 | /* | ||
6501 | * for each core we will add once | ||
6502 | * to the group in physical domain | ||
6503 | */ | ||
6504 | if (j != first_cpu(sd1->groups->cpumask)) | ||
6505 | continue; | ||
6506 | |||
6507 | if (sched_smt_power_savings) | ||
6508 | sd->groups->cpu_power += sd1->groups->cpu_power; | ||
6509 | else | ||
6510 | sd->groups->cpu_power += SCHED_LOAD_SCALE; | ||
6511 | } | ||
6512 | } else | ||
6513 | /* | ||
6514 | * This has to be < 2 * SCHED_LOAD_SCALE | ||
6515 | * Lets keep it SCHED_LOAD_SCALE, so that | ||
6516 | * while calculating NUMA group's cpu_power | ||
6517 | * we can simply do | ||
6518 | * numa_group->cpu_power += phys_group->cpu_power; | ||
6519 | * | ||
6520 | * See "only add power once for each physical pkg" | ||
6521 | * comment below | ||
6522 | */ | ||
6523 | sd->groups->cpu_power = SCHED_LOAD_SCALE; | ||
6524 | #else | ||
6525 | int power; | ||
6526 | sd = &per_cpu(phys_domains, i); | ||
6527 | if (sched_smt_power_savings) | ||
6528 | power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask); | ||
6529 | else | ||
6530 | power = SCHED_LOAD_SCALE; | ||
6531 | sd->groups->cpu_power = power; | ||
6532 | #endif | ||
6533 | } | 6545 | } |
6534 | 6546 | ||
6535 | #ifdef CONFIG_NUMA | 6547 | #ifdef CONFIG_NUMA |
@@ -6537,7 +6549,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6537 | init_numa_sched_groups_power(sched_group_nodes[i]); | 6549 | init_numa_sched_groups_power(sched_group_nodes[i]); |
6538 | 6550 | ||
6539 | if (sched_group_allnodes) { | 6551 | if (sched_group_allnodes) { |
6540 | int group = cpu_to_allnodes_group(first_cpu(*cpu_map)); | 6552 | int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map); |
6541 | struct sched_group *sg = &sched_group_allnodes[group]; | 6553 | struct sched_group *sg = &sched_group_allnodes[group]; |
6542 | 6554 | ||
6543 | init_numa_sched_groups_power(sg); | 6555 | init_numa_sched_groups_power(sg); |
@@ -6563,9 +6575,11 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6563 | 6575 | ||
6564 | return 0; | 6576 | return 0; |
6565 | 6577 | ||
6578 | #ifdef CONFIG_NUMA | ||
6566 | error: | 6579 | error: |
6567 | free_sched_groups(cpu_map); | 6580 | free_sched_groups(cpu_map); |
6568 | return -ENOMEM; | 6581 | return -ENOMEM; |
6582 | #endif | ||
6569 | } | 6583 | } |
6570 | /* | 6584 | /* |
6571 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 6585 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
@@ -6747,11 +6761,20 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
6747 | 6761 | ||
6748 | void __init sched_init_smp(void) | 6762 | void __init sched_init_smp(void) |
6749 | { | 6763 | { |
6764 | cpumask_t non_isolated_cpus; | ||
6765 | |||
6750 | lock_cpu_hotplug(); | 6766 | lock_cpu_hotplug(); |
6751 | arch_init_sched_domains(&cpu_online_map); | 6767 | arch_init_sched_domains(&cpu_online_map); |
6768 | cpus_andnot(non_isolated_cpus, cpu_online_map, cpu_isolated_map); | ||
6769 | if (cpus_empty(non_isolated_cpus)) | ||
6770 | cpu_set(smp_processor_id(), non_isolated_cpus); | ||
6752 | unlock_cpu_hotplug(); | 6771 | unlock_cpu_hotplug(); |
6753 | /* XXX: Theoretical race here - CPU may be hotplugged now */ | 6772 | /* XXX: Theoretical race here - CPU may be hotplugged now */ |
6754 | hotcpu_notifier(update_sched_domains, 0); | 6773 | hotcpu_notifier(update_sched_domains, 0); |
6774 | |||
6775 | /* Move init over to a non-isolated CPU */ | ||
6776 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) | ||
6777 | BUG(); | ||
6755 | } | 6778 | } |
6756 | #else | 6779 | #else |
6757 | void __init sched_init_smp(void) | 6780 | void __init sched_init_smp(void) |