diff options
Diffstat (limited to 'kernel/sched/core.c')
| -rw-r--r-- | kernel/sched/core.c | 448 |
1 files changed, 229 insertions, 219 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0533a688ce22..39eb6011bc38 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -83,6 +83,7 @@ | |||
| 83 | 83 | ||
| 84 | #include "sched.h" | 84 | #include "sched.h" |
| 85 | #include "../workqueue_sched.h" | 85 | #include "../workqueue_sched.h" |
| 86 | #include "../smpboot.h" | ||
| 86 | 87 | ||
| 87 | #define CREATE_TRACE_POINTS | 88 | #define CREATE_TRACE_POINTS |
| 88 | #include <trace/events/sched.h> | 89 | #include <trace/events/sched.h> |
| @@ -692,8 +693,6 @@ int tg_nop(struct task_group *tg, void *data) | |||
| 692 | } | 693 | } |
| 693 | #endif | 694 | #endif |
| 694 | 695 | ||
| 695 | void update_cpu_load(struct rq *this_rq); | ||
| 696 | |||
| 697 | static void set_load_weight(struct task_struct *p) | 696 | static void set_load_weight(struct task_struct *p) |
| 698 | { | 697 | { |
| 699 | int prio = p->static_prio - MAX_RT_PRIO; | 698 | int prio = p->static_prio - MAX_RT_PRIO; |
| @@ -2083,6 +2082,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
| 2083 | #endif | 2082 | #endif |
| 2084 | 2083 | ||
| 2085 | /* Here we just switch the register state and the stack. */ | 2084 | /* Here we just switch the register state and the stack. */ |
| 2085 | rcu_switch_from(prev); | ||
| 2086 | switch_to(prev, next, prev); | 2086 | switch_to(prev, next, prev); |
| 2087 | 2087 | ||
| 2088 | barrier(); | 2088 | barrier(); |
| @@ -2486,22 +2486,13 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) | |||
| 2486 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called | 2486 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called |
| 2487 | * every tick. We fix it up based on jiffies. | 2487 | * every tick. We fix it up based on jiffies. |
| 2488 | */ | 2488 | */ |
| 2489 | void update_cpu_load(struct rq *this_rq) | 2489 | static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, |
| 2490 | unsigned long pending_updates) | ||
| 2490 | { | 2491 | { |
| 2491 | unsigned long this_load = this_rq->load.weight; | ||
| 2492 | unsigned long curr_jiffies = jiffies; | ||
| 2493 | unsigned long pending_updates; | ||
| 2494 | int i, scale; | 2492 | int i, scale; |
| 2495 | 2493 | ||
| 2496 | this_rq->nr_load_updates++; | 2494 | this_rq->nr_load_updates++; |
| 2497 | 2495 | ||
| 2498 | /* Avoid repeated calls on same jiffy, when moving in and out of idle */ | ||
| 2499 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
| 2500 | return; | ||
| 2501 | |||
| 2502 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
| 2503 | this_rq->last_load_update_tick = curr_jiffies; | ||
| 2504 | |||
| 2505 | /* Update our load: */ | 2496 | /* Update our load: */ |
| 2506 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ | 2497 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ |
| 2507 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | 2498 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { |
| @@ -2526,9 +2517,45 @@ void update_cpu_load(struct rq *this_rq) | |||
| 2526 | sched_avg_update(this_rq); | 2517 | sched_avg_update(this_rq); |
| 2527 | } | 2518 | } |
| 2528 | 2519 | ||
| 2520 | /* | ||
| 2521 | * Called from nohz_idle_balance() to update the load ratings before doing the | ||
| 2522 | * idle balance. | ||
| 2523 | */ | ||
| 2524 | void update_idle_cpu_load(struct rq *this_rq) | ||
| 2525 | { | ||
| 2526 | unsigned long curr_jiffies = jiffies; | ||
| 2527 | unsigned long load = this_rq->load.weight; | ||
| 2528 | unsigned long pending_updates; | ||
| 2529 | |||
| 2530 | /* | ||
| 2531 | * Bloody broken means of dealing with nohz, but better than nothing.. | ||
| 2532 | * jiffies is updated by one cpu, another cpu can drift wrt the jiffy | ||
| 2533 | * update and see 0 difference the one time and 2 the next, even though | ||
| 2534 | * we ticked at roughtly the same rate. | ||
| 2535 | * | ||
| 2536 | * Hence we only use this from nohz_idle_balance() and skip this | ||
| 2537 | * nonsense when called from the scheduler_tick() since that's | ||
| 2538 | * guaranteed a stable rate. | ||
| 2539 | */ | ||
| 2540 | if (load || curr_jiffies == this_rq->last_load_update_tick) | ||
| 2541 | return; | ||
| 2542 | |||
| 2543 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
| 2544 | this_rq->last_load_update_tick = curr_jiffies; | ||
| 2545 | |||
| 2546 | __update_cpu_load(this_rq, load, pending_updates); | ||
| 2547 | } | ||
| 2548 | |||
| 2549 | /* | ||
| 2550 | * Called from scheduler_tick() | ||
| 2551 | */ | ||
| 2529 | static void update_cpu_load_active(struct rq *this_rq) | 2552 | static void update_cpu_load_active(struct rq *this_rq) |
| 2530 | { | 2553 | { |
| 2531 | update_cpu_load(this_rq); | 2554 | /* |
| 2555 | * See the mess in update_idle_cpu_load(). | ||
| 2556 | */ | ||
| 2557 | this_rq->last_load_update_tick = jiffies; | ||
| 2558 | __update_cpu_load(this_rq, this_rq->load.weight, 1); | ||
| 2532 | 2559 | ||
| 2533 | calc_load_account_active(this_rq); | 2560 | calc_load_account_active(this_rq); |
| 2534 | } | 2561 | } |
| @@ -3113,6 +3140,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
| 3113 | if (irqs_disabled()) | 3140 | if (irqs_disabled()) |
| 3114 | print_irqtrace_events(prev); | 3141 | print_irqtrace_events(prev); |
| 3115 | dump_stack(); | 3142 | dump_stack(); |
| 3143 | add_taint(TAINT_WARN); | ||
| 3116 | } | 3144 | } |
| 3117 | 3145 | ||
| 3118 | /* | 3146 | /* |
| @@ -4042,11 +4070,8 @@ static bool check_same_owner(struct task_struct *p) | |||
| 4042 | 4070 | ||
| 4043 | rcu_read_lock(); | 4071 | rcu_read_lock(); |
| 4044 | pcred = __task_cred(p); | 4072 | pcred = __task_cred(p); |
| 4045 | if (cred->user->user_ns == pcred->user->user_ns) | 4073 | match = (uid_eq(cred->euid, pcred->euid) || |
| 4046 | match = (cred->euid == pcred->euid || | 4074 | uid_eq(cred->euid, pcred->uid)); |
| 4047 | cred->euid == pcred->uid); | ||
| 4048 | else | ||
| 4049 | match = false; | ||
| 4050 | rcu_read_unlock(); | 4075 | rcu_read_unlock(); |
| 4051 | return match; | 4076 | return match; |
| 4052 | } | 4077 | } |
| @@ -5560,7 +5585,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 5560 | break; | 5585 | break; |
| 5561 | } | 5586 | } |
| 5562 | 5587 | ||
| 5563 | if (cpumask_intersects(groupmask, sched_group_cpus(group))) { | 5588 | if (!(sd->flags & SD_OVERLAP) && |
| 5589 | cpumask_intersects(groupmask, sched_group_cpus(group))) { | ||
| 5564 | printk(KERN_CONT "\n"); | 5590 | printk(KERN_CONT "\n"); |
| 5565 | printk(KERN_ERR "ERROR: repeated CPUs\n"); | 5591 | printk(KERN_ERR "ERROR: repeated CPUs\n"); |
| 5566 | break; | 5592 | break; |
| @@ -5898,99 +5924,11 @@ static int __init isolated_cpu_setup(char *str) | |||
| 5898 | 5924 | ||
| 5899 | __setup("isolcpus=", isolated_cpu_setup); | 5925 | __setup("isolcpus=", isolated_cpu_setup); |
| 5900 | 5926 | ||
| 5901 | #ifdef CONFIG_NUMA | ||
| 5902 | |||
| 5903 | /** | ||
| 5904 | * find_next_best_node - find the next node to include in a sched_domain | ||
| 5905 | * @node: node whose sched_domain we're building | ||
| 5906 | * @used_nodes: nodes already in the sched_domain | ||
| 5907 | * | ||
| 5908 | * Find the next node to include in a given scheduling domain. Simply | ||
| 5909 | * finds the closest node not already in the @used_nodes map. | ||
| 5910 | * | ||
| 5911 | * Should use nodemask_t. | ||
| 5912 | */ | ||
| 5913 | static int find_next_best_node(int node, nodemask_t *used_nodes) | ||
| 5914 | { | ||
| 5915 | int i, n, val, min_val, best_node = -1; | ||
| 5916 | |||
| 5917 | min_val = INT_MAX; | ||
| 5918 | |||
| 5919 | for (i = 0; i < nr_node_ids; i++) { | ||
| 5920 | /* Start at @node */ | ||
| 5921 | n = (node + i) % nr_node_ids; | ||
| 5922 | |||
| 5923 | if (!nr_cpus_node(n)) | ||
| 5924 | continue; | ||
| 5925 | |||
| 5926 | /* Skip already used nodes */ | ||
| 5927 | if (node_isset(n, *used_nodes)) | ||
| 5928 | continue; | ||
| 5929 | |||
| 5930 | /* Simple min distance search */ | ||
| 5931 | val = node_distance(node, n); | ||
| 5932 | |||
| 5933 | if (val < min_val) { | ||
| 5934 | min_val = val; | ||
| 5935 | best_node = n; | ||
| 5936 | } | ||
| 5937 | } | ||
| 5938 | |||
| 5939 | if (best_node != -1) | ||
| 5940 | node_set(best_node, *used_nodes); | ||
| 5941 | return best_node; | ||
| 5942 | } | ||
| 5943 | |||
| 5944 | /** | ||
| 5945 | * sched_domain_node_span - get a cpumask for a node's sched_domain | ||
| 5946 | * @node: node whose cpumask we're constructing | ||
| 5947 | * @span: resulting cpumask | ||
| 5948 | * | ||
| 5949 | * Given a node, construct a good cpumask for its sched_domain to span. It | ||
| 5950 | * should be one that prevents unnecessary balancing, but also spreads tasks | ||
| 5951 | * out optimally. | ||
| 5952 | */ | ||
| 5953 | static void sched_domain_node_span(int node, struct cpumask *span) | ||
| 5954 | { | ||
| 5955 | nodemask_t used_nodes; | ||
| 5956 | int i; | ||
| 5957 | |||
| 5958 | cpumask_clear(span); | ||
| 5959 | nodes_clear(used_nodes); | ||
| 5960 | |||
| 5961 | cpumask_or(span, span, cpumask_of_node(node)); | ||
| 5962 | node_set(node, used_nodes); | ||
| 5963 | |||
| 5964 | for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { | ||
| 5965 | int next_node = find_next_best_node(node, &used_nodes); | ||
| 5966 | if (next_node < 0) | ||
| 5967 | break; | ||
| 5968 | cpumask_or(span, span, cpumask_of_node(next_node)); | ||
| 5969 | } | ||
| 5970 | } | ||
| 5971 | |||
| 5972 | static const struct cpumask *cpu_node_mask(int cpu) | ||
| 5973 | { | ||
| 5974 | lockdep_assert_held(&sched_domains_mutex); | ||
| 5975 | |||
| 5976 | sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask); | ||
| 5977 | |||
| 5978 | return sched_domains_tmpmask; | ||
| 5979 | } | ||
| 5980 | |||
| 5981 | static const struct cpumask *cpu_allnodes_mask(int cpu) | ||
| 5982 | { | ||
| 5983 | return cpu_possible_mask; | ||
| 5984 | } | ||
| 5985 | #endif /* CONFIG_NUMA */ | ||
| 5986 | |||
| 5987 | static const struct cpumask *cpu_cpu_mask(int cpu) | 5927 | static const struct cpumask *cpu_cpu_mask(int cpu) |
| 5988 | { | 5928 | { |
| 5989 | return cpumask_of_node(cpu_to_node(cpu)); | 5929 | return cpumask_of_node(cpu_to_node(cpu)); |
| 5990 | } | 5930 | } |
| 5991 | 5931 | ||
| 5992 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | ||
| 5993 | |||
| 5994 | struct sd_data { | 5932 | struct sd_data { |
| 5995 | struct sched_domain **__percpu sd; | 5933 | struct sched_domain **__percpu sd; |
| 5996 | struct sched_group **__percpu sg; | 5934 | struct sched_group **__percpu sg; |
| @@ -6020,6 +5958,7 @@ struct sched_domain_topology_level { | |||
| 6020 | sched_domain_init_f init; | 5958 | sched_domain_init_f init; |
| 6021 | sched_domain_mask_f mask; | 5959 | sched_domain_mask_f mask; |
| 6022 | int flags; | 5960 | int flags; |
| 5961 | int numa_level; | ||
| 6023 | struct sd_data data; | 5962 | struct sd_data data; |
| 6024 | }; | 5963 | }; |
| 6025 | 5964 | ||
| @@ -6211,10 +6150,6 @@ sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \ | |||
| 6211 | } | 6150 | } |
| 6212 | 6151 | ||
| 6213 | SD_INIT_FUNC(CPU) | 6152 | SD_INIT_FUNC(CPU) |
| 6214 | #ifdef CONFIG_NUMA | ||
| 6215 | SD_INIT_FUNC(ALLNODES) | ||
| 6216 | SD_INIT_FUNC(NODE) | ||
| 6217 | #endif | ||
| 6218 | #ifdef CONFIG_SCHED_SMT | 6153 | #ifdef CONFIG_SCHED_SMT |
| 6219 | SD_INIT_FUNC(SIBLING) | 6154 | SD_INIT_FUNC(SIBLING) |
| 6220 | #endif | 6155 | #endif |
| @@ -6336,15 +6271,184 @@ static struct sched_domain_topology_level default_topology[] = { | |||
| 6336 | { sd_init_BOOK, cpu_book_mask, }, | 6271 | { sd_init_BOOK, cpu_book_mask, }, |
| 6337 | #endif | 6272 | #endif |
| 6338 | { sd_init_CPU, cpu_cpu_mask, }, | 6273 | { sd_init_CPU, cpu_cpu_mask, }, |
| 6339 | #ifdef CONFIG_NUMA | ||
| 6340 | { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, }, | ||
| 6341 | { sd_init_ALLNODES, cpu_allnodes_mask, }, | ||
| 6342 | #endif | ||
| 6343 | { NULL, }, | 6274 | { NULL, }, |
| 6344 | }; | 6275 | }; |
| 6345 | 6276 | ||
| 6346 | static struct sched_domain_topology_level *sched_domain_topology = default_topology; | 6277 | static struct sched_domain_topology_level *sched_domain_topology = default_topology; |
| 6347 | 6278 | ||
| 6279 | #ifdef CONFIG_NUMA | ||
| 6280 | |||
| 6281 | static int sched_domains_numa_levels; | ||
| 6282 | static int sched_domains_numa_scale; | ||
| 6283 | static int *sched_domains_numa_distance; | ||
| 6284 | static struct cpumask ***sched_domains_numa_masks; | ||
| 6285 | static int sched_domains_curr_level; | ||
| 6286 | |||
| 6287 | static inline int sd_local_flags(int level) | ||
| 6288 | { | ||
| 6289 | if (sched_domains_numa_distance[level] > REMOTE_DISTANCE) | ||
| 6290 | return 0; | ||
| 6291 | |||
| 6292 | return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; | ||
| 6293 | } | ||
| 6294 | |||
| 6295 | static struct sched_domain * | ||
| 6296 | sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | ||
| 6297 | { | ||
| 6298 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); | ||
| 6299 | int level = tl->numa_level; | ||
| 6300 | int sd_weight = cpumask_weight( | ||
| 6301 | sched_domains_numa_masks[level][cpu_to_node(cpu)]); | ||
| 6302 | |||
| 6303 | *sd = (struct sched_domain){ | ||
| 6304 | .min_interval = sd_weight, | ||
| 6305 | .max_interval = 2*sd_weight, | ||
| 6306 | .busy_factor = 32, | ||
| 6307 | .imbalance_pct = 125, | ||
| 6308 | .cache_nice_tries = 2, | ||
| 6309 | .busy_idx = 3, | ||
| 6310 | .idle_idx = 2, | ||
| 6311 | .newidle_idx = 0, | ||
| 6312 | .wake_idx = 0, | ||
| 6313 | .forkexec_idx = 0, | ||
| 6314 | |||
| 6315 | .flags = 1*SD_LOAD_BALANCE | ||
| 6316 | | 1*SD_BALANCE_NEWIDLE | ||
| 6317 | | 0*SD_BALANCE_EXEC | ||
| 6318 | | 0*SD_BALANCE_FORK | ||
| 6319 | | 0*SD_BALANCE_WAKE | ||
| 6320 | | 0*SD_WAKE_AFFINE | ||
| 6321 | | 0*SD_PREFER_LOCAL | ||
| 6322 | | 0*SD_SHARE_CPUPOWER | ||
| 6323 | | 0*SD_SHARE_PKG_RESOURCES | ||
| 6324 | | 1*SD_SERIALIZE | ||
| 6325 | | 0*SD_PREFER_SIBLING | ||
| 6326 | | sd_local_flags(level) | ||
| 6327 | , | ||
| 6328 | .last_balance = jiffies, | ||
| 6329 | .balance_interval = sd_weight, | ||
| 6330 | }; | ||
| 6331 | SD_INIT_NAME(sd, NUMA); | ||
| 6332 | sd->private = &tl->data; | ||
| 6333 | |||
| 6334 | /* | ||
| 6335 | * Ugly hack to pass state to sd_numa_mask()... | ||
| 6336 | */ | ||
| 6337 | sched_domains_curr_level = tl->numa_level; | ||
| 6338 | |||
| 6339 | return sd; | ||
| 6340 | } | ||
| 6341 | |||
| 6342 | static const struct cpumask *sd_numa_mask(int cpu) | ||
| 6343 | { | ||
| 6344 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; | ||
| 6345 | } | ||
| 6346 | |||
| 6347 | static void sched_init_numa(void) | ||
| 6348 | { | ||
| 6349 | int next_distance, curr_distance = node_distance(0, 0); | ||
| 6350 | struct sched_domain_topology_level *tl; | ||
| 6351 | int level = 0; | ||
| 6352 | int i, j, k; | ||
| 6353 | |||
| 6354 | sched_domains_numa_scale = curr_distance; | ||
| 6355 | sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); | ||
| 6356 | if (!sched_domains_numa_distance) | ||
| 6357 | return; | ||
| 6358 | |||
| 6359 | /* | ||
| 6360 | * O(nr_nodes^2) deduplicating selection sort -- in order to find the | ||
| 6361 | * unique distances in the node_distance() table. | ||
| 6362 | * | ||
| 6363 | * Assumes node_distance(0,j) includes all distances in | ||
| 6364 | * node_distance(i,j) in order to avoid cubic time. | ||
| 6365 | * | ||
| 6366 | * XXX: could be optimized to O(n log n) by using sort() | ||
| 6367 | */ | ||
| 6368 | next_distance = curr_distance; | ||
| 6369 | for (i = 0; i < nr_node_ids; i++) { | ||
| 6370 | for (j = 0; j < nr_node_ids; j++) { | ||
| 6371 | int distance = node_distance(0, j); | ||
| 6372 | if (distance > curr_distance && | ||
| 6373 | (distance < next_distance || | ||
| 6374 | next_distance == curr_distance)) | ||
| 6375 | next_distance = distance; | ||
| 6376 | } | ||
| 6377 | if (next_distance != curr_distance) { | ||
| 6378 | sched_domains_numa_distance[level++] = next_distance; | ||
| 6379 | sched_domains_numa_levels = level; | ||
| 6380 | curr_distance = next_distance; | ||
| 6381 | } else break; | ||
| 6382 | } | ||
| 6383 | /* | ||
| 6384 | * 'level' contains the number of unique distances, excluding the | ||
| 6385 | * identity distance node_distance(i,i). | ||
| 6386 | * | ||
| 6387 | * The sched_domains_nume_distance[] array includes the actual distance | ||
| 6388 | * numbers. | ||
| 6389 | */ | ||
| 6390 | |||
| 6391 | sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); | ||
| 6392 | if (!sched_domains_numa_masks) | ||
| 6393 | return; | ||
| 6394 | |||
| 6395 | /* | ||
| 6396 | * Now for each level, construct a mask per node which contains all | ||
| 6397 | * cpus of nodes that are that many hops away from us. | ||
| 6398 | */ | ||
| 6399 | for (i = 0; i < level; i++) { | ||
| 6400 | sched_domains_numa_masks[i] = | ||
| 6401 | kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL); | ||
| 6402 | if (!sched_domains_numa_masks[i]) | ||
| 6403 | return; | ||
| 6404 | |||
| 6405 | for (j = 0; j < nr_node_ids; j++) { | ||
| 6406 | struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j); | ||
| 6407 | if (!mask) | ||
| 6408 | return; | ||
| 6409 | |||
| 6410 | sched_domains_numa_masks[i][j] = mask; | ||
| 6411 | |||
| 6412 | for (k = 0; k < nr_node_ids; k++) { | ||
| 6413 | if (node_distance(j, k) > sched_domains_numa_distance[i]) | ||
| 6414 | continue; | ||
| 6415 | |||
| 6416 | cpumask_or(mask, mask, cpumask_of_node(k)); | ||
| 6417 | } | ||
| 6418 | } | ||
| 6419 | } | ||
| 6420 | |||
| 6421 | tl = kzalloc((ARRAY_SIZE(default_topology) + level) * | ||
| 6422 | sizeof(struct sched_domain_topology_level), GFP_KERNEL); | ||
| 6423 | if (!tl) | ||
| 6424 | return; | ||
| 6425 | |||
| 6426 | /* | ||
| 6427 | * Copy the default topology bits.. | ||
| 6428 | */ | ||
| 6429 | for (i = 0; default_topology[i].init; i++) | ||
| 6430 | tl[i] = default_topology[i]; | ||
| 6431 | |||
| 6432 | /* | ||
| 6433 | * .. and append 'j' levels of NUMA goodness. | ||
| 6434 | */ | ||
| 6435 | for (j = 0; j < level; i++, j++) { | ||
| 6436 | tl[i] = (struct sched_domain_topology_level){ | ||
| 6437 | .init = sd_numa_init, | ||
| 6438 | .mask = sd_numa_mask, | ||
| 6439 | .flags = SDTL_OVERLAP, | ||
| 6440 | .numa_level = j, | ||
| 6441 | }; | ||
| 6442 | } | ||
| 6443 | |||
| 6444 | sched_domain_topology = tl; | ||
| 6445 | } | ||
| 6446 | #else | ||
| 6447 | static inline void sched_init_numa(void) | ||
| 6448 | { | ||
| 6449 | } | ||
| 6450 | #endif /* CONFIG_NUMA */ | ||
| 6451 | |||
| 6348 | static int __sdt_alloc(const struct cpumask *cpu_map) | 6452 | static int __sdt_alloc(const struct cpumask *cpu_map) |
| 6349 | { | 6453 | { |
| 6350 | struct sched_domain_topology_level *tl; | 6454 | struct sched_domain_topology_level *tl; |
| @@ -6382,6 +6486,8 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
| 6382 | if (!sg) | 6486 | if (!sg) |
| 6383 | return -ENOMEM; | 6487 | return -ENOMEM; |
| 6384 | 6488 | ||
| 6489 | sg->next = sg; | ||
| 6490 | |||
| 6385 | *per_cpu_ptr(sdd->sg, j) = sg; | 6491 | *per_cpu_ptr(sdd->sg, j) = sg; |
| 6386 | 6492 | ||
| 6387 | sgp = kzalloc_node(sizeof(struct sched_group_power), | 6493 | sgp = kzalloc_node(sizeof(struct sched_group_power), |
| @@ -6710,97 +6816,6 @@ match2: | |||
| 6710 | mutex_unlock(&sched_domains_mutex); | 6816 | mutex_unlock(&sched_domains_mutex); |
| 6711 | } | 6817 | } |
| 6712 | 6818 | ||
| 6713 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
| 6714 | static void reinit_sched_domains(void) | ||
| 6715 | { | ||
| 6716 | get_online_cpus(); | ||
| 6717 | |||
| 6718 | /* Destroy domains first to force the rebuild */ | ||
| 6719 | partition_sched_domains(0, NULL, NULL); | ||
| 6720 | |||
| 6721 | rebuild_sched_domains(); | ||
| 6722 | put_online_cpus(); | ||
| 6723 | } | ||
| 6724 | |||
| 6725 | static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | ||
| 6726 | { | ||
| 6727 | unsigned int level = 0; | ||
| 6728 | |||
| 6729 | if (sscanf(buf, "%u", &level) != 1) | ||
| 6730 | return -EINVAL; | ||
| 6731 | |||
| 6732 | /* | ||
| 6733 | * level is always be positive so don't check for | ||
| 6734 | * level < POWERSAVINGS_BALANCE_NONE which is 0 | ||
| 6735 | * What happens on 0 or 1 byte write, | ||
| 6736 | * need to check for count as well? | ||
| 6737 | */ | ||
| 6738 | |||
| 6739 | if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS) | ||
| 6740 | return -EINVAL; | ||
| 6741 | |||
| 6742 | if (smt) | ||
| 6743 | sched_smt_power_savings = level; | ||
| 6744 | else | ||
| 6745 | sched_mc_power_savings = level; | ||
| 6746 | |||
| 6747 | reinit_sched_domains(); | ||
| 6748 | |||
| 6749 | return count; | ||
| 6750 | } | ||
| 6751 | |||
| 6752 | #ifdef CONFIG_SCHED_MC | ||
| 6753 | static ssize_t sched_mc_power_savings_show(struct device *dev, | ||
| 6754 | struct device_attribute *attr, | ||
| 6755 | char *buf) | ||
| 6756 | { | ||
| 6757 | return sprintf(buf, "%u\n", sched_mc_power_savings); | ||
| 6758 | } | ||
| 6759 | static ssize_t sched_mc_power_savings_store(struct device *dev, | ||
| 6760 | struct device_attribute *attr, | ||
| 6761 | const char *buf, size_t count) | ||
| 6762 | { | ||
| 6763 | return sched_power_savings_store(buf, count, 0); | ||
| 6764 | } | ||
| 6765 | static DEVICE_ATTR(sched_mc_power_savings, 0644, | ||
| 6766 | sched_mc_power_savings_show, | ||
| 6767 | sched_mc_power_savings_store); | ||
| 6768 | #endif | ||
| 6769 | |||
| 6770 | #ifdef CONFIG_SCHED_SMT | ||
| 6771 | static ssize_t sched_smt_power_savings_show(struct device *dev, | ||
| 6772 | struct device_attribute *attr, | ||
| 6773 | char *buf) | ||
| 6774 | { | ||
| 6775 | return sprintf(buf, "%u\n", sched_smt_power_savings); | ||
| 6776 | } | ||
| 6777 | static ssize_t sched_smt_power_savings_store(struct device *dev, | ||
| 6778 | struct device_attribute *attr, | ||
| 6779 | const char *buf, size_t count) | ||
| 6780 | { | ||
| 6781 | return sched_power_savings_store(buf, count, 1); | ||
| 6782 | } | ||
| 6783 | static DEVICE_ATTR(sched_smt_power_savings, 0644, | ||
| 6784 | sched_smt_power_savings_show, | ||
| 6785 | sched_smt_power_savings_store); | ||
| 6786 | #endif | ||
| 6787 | |||
| 6788 | int __init sched_create_sysfs_power_savings_entries(struct device *dev) | ||
| 6789 | { | ||
| 6790 | int err = 0; | ||
| 6791 | |||
| 6792 | #ifdef CONFIG_SCHED_SMT | ||
| 6793 | if (smt_capable()) | ||
| 6794 | err = device_create_file(dev, &dev_attr_sched_smt_power_savings); | ||
| 6795 | #endif | ||
| 6796 | #ifdef CONFIG_SCHED_MC | ||
| 6797 | if (!err && mc_capable()) | ||
| 6798 | err = device_create_file(dev, &dev_attr_sched_mc_power_savings); | ||
| 6799 | #endif | ||
| 6800 | return err; | ||
| 6801 | } | ||
| 6802 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | ||
| 6803 | |||
| 6804 | /* | 6819 | /* |
| 6805 | * Update cpusets according to cpu_active mask. If cpusets are | 6820 | * Update cpusets according to cpu_active mask. If cpusets are |
| 6806 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper | 6821 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper |
| @@ -6838,6 +6853,8 @@ void __init sched_init_smp(void) | |||
| 6838 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); | 6853 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); |
| 6839 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); | 6854 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); |
| 6840 | 6855 | ||
| 6856 | sched_init_numa(); | ||
| 6857 | |||
| 6841 | get_online_cpus(); | 6858 | get_online_cpus(); |
| 6842 | mutex_lock(&sched_domains_mutex); | 6859 | mutex_lock(&sched_domains_mutex); |
| 6843 | init_sched_domains(cpu_active_mask); | 6860 | init_sched_domains(cpu_active_mask); |
| @@ -7059,6 +7076,7 @@ void __init sched_init(void) | |||
| 7059 | /* May be allocated at isolcpus cmdline parse time */ | 7076 | /* May be allocated at isolcpus cmdline parse time */ |
| 7060 | if (cpu_isolated_map == NULL) | 7077 | if (cpu_isolated_map == NULL) |
| 7061 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 7078 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
| 7079 | idle_thread_set_boot_cpu(); | ||
| 7062 | #endif | 7080 | #endif |
| 7063 | init_sched_fair_class(); | 7081 | init_sched_fair_class(); |
| 7064 | 7082 | ||
| @@ -7980,13 +7998,9 @@ static struct cftype cpu_files[] = { | |||
| 7980 | .write_u64 = cpu_rt_period_write_uint, | 7998 | .write_u64 = cpu_rt_period_write_uint, |
| 7981 | }, | 7999 | }, |
| 7982 | #endif | 8000 | #endif |
| 8001 | { } /* terminate */ | ||
| 7983 | }; | 8002 | }; |
| 7984 | 8003 | ||
| 7985 | static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) | ||
| 7986 | { | ||
| 7987 | return cgroup_add_files(cont, ss, cpu_files, ARRAY_SIZE(cpu_files)); | ||
| 7988 | } | ||
| 7989 | |||
| 7990 | struct cgroup_subsys cpu_cgroup_subsys = { | 8004 | struct cgroup_subsys cpu_cgroup_subsys = { |
| 7991 | .name = "cpu", | 8005 | .name = "cpu", |
| 7992 | .create = cpu_cgroup_create, | 8006 | .create = cpu_cgroup_create, |
| @@ -7994,8 +8008,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
| 7994 | .can_attach = cpu_cgroup_can_attach, | 8008 | .can_attach = cpu_cgroup_can_attach, |
| 7995 | .attach = cpu_cgroup_attach, | 8009 | .attach = cpu_cgroup_attach, |
| 7996 | .exit = cpu_cgroup_exit, | 8010 | .exit = cpu_cgroup_exit, |
| 7997 | .populate = cpu_cgroup_populate, | ||
| 7998 | .subsys_id = cpu_cgroup_subsys_id, | 8011 | .subsys_id = cpu_cgroup_subsys_id, |
| 8012 | .base_cftypes = cpu_files, | ||
| 7999 | .early_init = 1, | 8013 | .early_init = 1, |
| 8000 | }; | 8014 | }; |
| 8001 | 8015 | ||
| @@ -8180,13 +8194,9 @@ static struct cftype files[] = { | |||
| 8180 | .name = "stat", | 8194 | .name = "stat", |
| 8181 | .read_map = cpuacct_stats_show, | 8195 | .read_map = cpuacct_stats_show, |
| 8182 | }, | 8196 | }, |
| 8197 | { } /* terminate */ | ||
| 8183 | }; | 8198 | }; |
| 8184 | 8199 | ||
| 8185 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
| 8186 | { | ||
| 8187 | return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files)); | ||
| 8188 | } | ||
| 8189 | |||
| 8190 | /* | 8200 | /* |
| 8191 | * charge this task's execution time to its accounting group. | 8201 | * charge this task's execution time to its accounting group. |
| 8192 | * | 8202 | * |
| @@ -8218,7 +8228,7 @@ struct cgroup_subsys cpuacct_subsys = { | |||
| 8218 | .name = "cpuacct", | 8228 | .name = "cpuacct", |
| 8219 | .create = cpuacct_create, | 8229 | .create = cpuacct_create, |
| 8220 | .destroy = cpuacct_destroy, | 8230 | .destroy = cpuacct_destroy, |
| 8221 | .populate = cpuacct_populate, | ||
| 8222 | .subsys_id = cpuacct_subsys_id, | 8231 | .subsys_id = cpuacct_subsys_id, |
| 8232 | .base_cftypes = files, | ||
| 8223 | }; | 8233 | }; |
| 8224 | #endif /* CONFIG_CGROUP_CPUACCT */ | 8234 | #endif /* CONFIG_CGROUP_CPUACCT */ |
