diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 667 |
1 files changed, 419 insertions, 248 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4603b9d8f30a..d5594a4268d4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -83,6 +83,7 @@ | |||
83 | 83 | ||
84 | #include "sched.h" | 84 | #include "sched.h" |
85 | #include "../workqueue_sched.h" | 85 | #include "../workqueue_sched.h" |
86 | #include "../smpboot.h" | ||
86 | 87 | ||
87 | #define CREATE_TRACE_POINTS | 88 | #define CREATE_TRACE_POINTS |
88 | #include <trace/events/sched.h> | 89 | #include <trace/events/sched.h> |
@@ -141,9 +142,8 @@ const_debug unsigned int sysctl_sched_features = | |||
141 | #define SCHED_FEAT(name, enabled) \ | 142 | #define SCHED_FEAT(name, enabled) \ |
142 | #name , | 143 | #name , |
143 | 144 | ||
144 | static __read_mostly char *sched_feat_names[] = { | 145 | static const char * const sched_feat_names[] = { |
145 | #include "features.h" | 146 | #include "features.h" |
146 | NULL | ||
147 | }; | 147 | }; |
148 | 148 | ||
149 | #undef SCHED_FEAT | 149 | #undef SCHED_FEAT |
@@ -692,8 +692,6 @@ int tg_nop(struct task_group *tg, void *data) | |||
692 | } | 692 | } |
693 | #endif | 693 | #endif |
694 | 694 | ||
695 | void update_cpu_load(struct rq *this_rq); | ||
696 | |||
697 | static void set_load_weight(struct task_struct *p) | 695 | static void set_load_weight(struct task_struct *p) |
698 | { | 696 | { |
699 | int prio = p->static_prio - MAX_RT_PRIO; | 697 | int prio = p->static_prio - MAX_RT_PRIO; |
@@ -2083,6 +2081,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2083 | #endif | 2081 | #endif |
2084 | 2082 | ||
2085 | /* Here we just switch the register state and the stack. */ | 2083 | /* Here we just switch the register state and the stack. */ |
2084 | rcu_switch_from(prev); | ||
2086 | switch_to(prev, next, prev); | 2085 | switch_to(prev, next, prev); |
2087 | 2086 | ||
2088 | barrier(); | 2087 | barrier(); |
@@ -2486,22 +2485,13 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) | |||
2486 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called | 2485 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called |
2487 | * every tick. We fix it up based on jiffies. | 2486 | * every tick. We fix it up based on jiffies. |
2488 | */ | 2487 | */ |
2489 | void update_cpu_load(struct rq *this_rq) | 2488 | static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, |
2489 | unsigned long pending_updates) | ||
2490 | { | 2490 | { |
2491 | unsigned long this_load = this_rq->load.weight; | ||
2492 | unsigned long curr_jiffies = jiffies; | ||
2493 | unsigned long pending_updates; | ||
2494 | int i, scale; | 2491 | int i, scale; |
2495 | 2492 | ||
2496 | this_rq->nr_load_updates++; | 2493 | this_rq->nr_load_updates++; |
2497 | 2494 | ||
2498 | /* Avoid repeated calls on same jiffy, when moving in and out of idle */ | ||
2499 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
2500 | return; | ||
2501 | |||
2502 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
2503 | this_rq->last_load_update_tick = curr_jiffies; | ||
2504 | |||
2505 | /* Update our load: */ | 2495 | /* Update our load: */ |
2506 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ | 2496 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ |
2507 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | 2497 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { |
@@ -2526,9 +2516,78 @@ void update_cpu_load(struct rq *this_rq) | |||
2526 | sched_avg_update(this_rq); | 2516 | sched_avg_update(this_rq); |
2527 | } | 2517 | } |
2528 | 2518 | ||
2519 | #ifdef CONFIG_NO_HZ | ||
2520 | /* | ||
2521 | * There is no sane way to deal with nohz on smp when using jiffies because the | ||
2522 | * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading | ||
2523 | * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. | ||
2524 | * | ||
2525 | * Therefore we cannot use the delta approach from the regular tick since that | ||
2526 | * would seriously skew the load calculation. However we'll make do for those | ||
2527 | * updates happening while idle (nohz_idle_balance) or coming out of idle | ||
2528 | * (tick_nohz_idle_exit). | ||
2529 | * | ||
2530 | * This means we might still be one tick off for nohz periods. | ||
2531 | */ | ||
2532 | |||
2533 | /* | ||
2534 | * Called from nohz_idle_balance() to update the load ratings before doing the | ||
2535 | * idle balance. | ||
2536 | */ | ||
2537 | void update_idle_cpu_load(struct rq *this_rq) | ||
2538 | { | ||
2539 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); | ||
2540 | unsigned long load = this_rq->load.weight; | ||
2541 | unsigned long pending_updates; | ||
2542 | |||
2543 | /* | ||
2544 | * bail if there's load or we're actually up-to-date. | ||
2545 | */ | ||
2546 | if (load || curr_jiffies == this_rq->last_load_update_tick) | ||
2547 | return; | ||
2548 | |||
2549 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
2550 | this_rq->last_load_update_tick = curr_jiffies; | ||
2551 | |||
2552 | __update_cpu_load(this_rq, load, pending_updates); | ||
2553 | } | ||
2554 | |||
2555 | /* | ||
2556 | * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. | ||
2557 | */ | ||
2558 | void update_cpu_load_nohz(void) | ||
2559 | { | ||
2560 | struct rq *this_rq = this_rq(); | ||
2561 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); | ||
2562 | unsigned long pending_updates; | ||
2563 | |||
2564 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
2565 | return; | ||
2566 | |||
2567 | raw_spin_lock(&this_rq->lock); | ||
2568 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
2569 | if (pending_updates) { | ||
2570 | this_rq->last_load_update_tick = curr_jiffies; | ||
2571 | /* | ||
2572 | * We were idle, this means load 0, the current load might be | ||
2573 | * !0 due to remote wakeups and the sort. | ||
2574 | */ | ||
2575 | __update_cpu_load(this_rq, 0, pending_updates); | ||
2576 | } | ||
2577 | raw_spin_unlock(&this_rq->lock); | ||
2578 | } | ||
2579 | #endif /* CONFIG_NO_HZ */ | ||
2580 | |||
2581 | /* | ||
2582 | * Called from scheduler_tick() | ||
2583 | */ | ||
2529 | static void update_cpu_load_active(struct rq *this_rq) | 2584 | static void update_cpu_load_active(struct rq *this_rq) |
2530 | { | 2585 | { |
2531 | update_cpu_load(this_rq); | 2586 | /* |
2587 | * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). | ||
2588 | */ | ||
2589 | this_rq->last_load_update_tick = jiffies; | ||
2590 | __update_cpu_load(this_rq, this_rq->load.weight, 1); | ||
2532 | 2591 | ||
2533 | calc_load_account_active(this_rq); | 2592 | calc_load_account_active(this_rq); |
2534 | } | 2593 | } |
@@ -3113,6 +3172,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
3113 | if (irqs_disabled()) | 3172 | if (irqs_disabled()) |
3114 | print_irqtrace_events(prev); | 3173 | print_irqtrace_events(prev); |
3115 | dump_stack(); | 3174 | dump_stack(); |
3175 | add_taint(TAINT_WARN); | ||
3116 | } | 3176 | } |
3117 | 3177 | ||
3118 | /* | 3178 | /* |
@@ -4042,11 +4102,8 @@ static bool check_same_owner(struct task_struct *p) | |||
4042 | 4102 | ||
4043 | rcu_read_lock(); | 4103 | rcu_read_lock(); |
4044 | pcred = __task_cred(p); | 4104 | pcred = __task_cred(p); |
4045 | if (cred->user->user_ns == pcred->user->user_ns) | 4105 | match = (uid_eq(cred->euid, pcred->euid) || |
4046 | match = (cred->euid == pcred->euid || | 4106 | uid_eq(cred->euid, pcred->uid)); |
4047 | cred->euid == pcred->uid); | ||
4048 | else | ||
4049 | match = false; | ||
4050 | rcu_read_unlock(); | 4107 | rcu_read_unlock(); |
4051 | return match; | 4108 | return match; |
4052 | } | 4109 | } |
@@ -4957,7 +5014,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |||
4957 | p->sched_class->set_cpus_allowed(p, new_mask); | 5014 | p->sched_class->set_cpus_allowed(p, new_mask); |
4958 | 5015 | ||
4959 | cpumask_copy(&p->cpus_allowed, new_mask); | 5016 | cpumask_copy(&p->cpus_allowed, new_mask); |
4960 | p->rt.nr_cpus_allowed = cpumask_weight(new_mask); | 5017 | p->nr_cpus_allowed = cpumask_weight(new_mask); |
4961 | } | 5018 | } |
4962 | 5019 | ||
4963 | /* | 5020 | /* |
@@ -5499,15 +5556,20 @@ static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ | |||
5499 | 5556 | ||
5500 | #ifdef CONFIG_SCHED_DEBUG | 5557 | #ifdef CONFIG_SCHED_DEBUG |
5501 | 5558 | ||
5502 | static __read_mostly int sched_domain_debug_enabled; | 5559 | static __read_mostly int sched_debug_enabled; |
5503 | 5560 | ||
5504 | static int __init sched_domain_debug_setup(char *str) | 5561 | static int __init sched_debug_setup(char *str) |
5505 | { | 5562 | { |
5506 | sched_domain_debug_enabled = 1; | 5563 | sched_debug_enabled = 1; |
5507 | 5564 | ||
5508 | return 0; | 5565 | return 0; |
5509 | } | 5566 | } |
5510 | early_param("sched_debug", sched_domain_debug_setup); | 5567 | early_param("sched_debug", sched_debug_setup); |
5568 | |||
5569 | static inline bool sched_debug(void) | ||
5570 | { | ||
5571 | return sched_debug_enabled; | ||
5572 | } | ||
5511 | 5573 | ||
5512 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 5574 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
5513 | struct cpumask *groupmask) | 5575 | struct cpumask *groupmask) |
@@ -5547,7 +5609,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
5547 | break; | 5609 | break; |
5548 | } | 5610 | } |
5549 | 5611 | ||
5550 | if (!group->sgp->power) { | 5612 | /* |
5613 | * Even though we initialize ->power to something semi-sane, | ||
5614 | * we leave power_orig unset. This allows us to detect if | ||
5615 | * domain iteration is still funny without causing /0 traps. | ||
5616 | */ | ||
5617 | if (!group->sgp->power_orig) { | ||
5551 | printk(KERN_CONT "\n"); | 5618 | printk(KERN_CONT "\n"); |
5552 | printk(KERN_ERR "ERROR: domain->cpu_power not " | 5619 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
5553 | "set\n"); | 5620 | "set\n"); |
@@ -5560,7 +5627,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
5560 | break; | 5627 | break; |
5561 | } | 5628 | } |
5562 | 5629 | ||
5563 | if (cpumask_intersects(groupmask, sched_group_cpus(group))) { | 5630 | if (!(sd->flags & SD_OVERLAP) && |
5631 | cpumask_intersects(groupmask, sched_group_cpus(group))) { | ||
5564 | printk(KERN_CONT "\n"); | 5632 | printk(KERN_CONT "\n"); |
5565 | printk(KERN_ERR "ERROR: repeated CPUs\n"); | 5633 | printk(KERN_ERR "ERROR: repeated CPUs\n"); |
5566 | break; | 5634 | break; |
@@ -5594,7 +5662,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
5594 | { | 5662 | { |
5595 | int level = 0; | 5663 | int level = 0; |
5596 | 5664 | ||
5597 | if (!sched_domain_debug_enabled) | 5665 | if (!sched_debug_enabled) |
5598 | return; | 5666 | return; |
5599 | 5667 | ||
5600 | if (!sd) { | 5668 | if (!sd) { |
@@ -5615,6 +5683,10 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
5615 | } | 5683 | } |
5616 | #else /* !CONFIG_SCHED_DEBUG */ | 5684 | #else /* !CONFIG_SCHED_DEBUG */ |
5617 | # define sched_domain_debug(sd, cpu) do { } while (0) | 5685 | # define sched_domain_debug(sd, cpu) do { } while (0) |
5686 | static inline bool sched_debug(void) | ||
5687 | { | ||
5688 | return false; | ||
5689 | } | ||
5618 | #endif /* CONFIG_SCHED_DEBUG */ | 5690 | #endif /* CONFIG_SCHED_DEBUG */ |
5619 | 5691 | ||
5620 | static int sd_degenerate(struct sched_domain *sd) | 5692 | static int sd_degenerate(struct sched_domain *sd) |
@@ -5898,99 +5970,11 @@ static int __init isolated_cpu_setup(char *str) | |||
5898 | 5970 | ||
5899 | __setup("isolcpus=", isolated_cpu_setup); | 5971 | __setup("isolcpus=", isolated_cpu_setup); |
5900 | 5972 | ||
5901 | #ifdef CONFIG_NUMA | ||
5902 | |||
5903 | /** | ||
5904 | * find_next_best_node - find the next node to include in a sched_domain | ||
5905 | * @node: node whose sched_domain we're building | ||
5906 | * @used_nodes: nodes already in the sched_domain | ||
5907 | * | ||
5908 | * Find the next node to include in a given scheduling domain. Simply | ||
5909 | * finds the closest node not already in the @used_nodes map. | ||
5910 | * | ||
5911 | * Should use nodemask_t. | ||
5912 | */ | ||
5913 | static int find_next_best_node(int node, nodemask_t *used_nodes) | ||
5914 | { | ||
5915 | int i, n, val, min_val, best_node = -1; | ||
5916 | |||
5917 | min_val = INT_MAX; | ||
5918 | |||
5919 | for (i = 0; i < nr_node_ids; i++) { | ||
5920 | /* Start at @node */ | ||
5921 | n = (node + i) % nr_node_ids; | ||
5922 | |||
5923 | if (!nr_cpus_node(n)) | ||
5924 | continue; | ||
5925 | |||
5926 | /* Skip already used nodes */ | ||
5927 | if (node_isset(n, *used_nodes)) | ||
5928 | continue; | ||
5929 | |||
5930 | /* Simple min distance search */ | ||
5931 | val = node_distance(node, n); | ||
5932 | |||
5933 | if (val < min_val) { | ||
5934 | min_val = val; | ||
5935 | best_node = n; | ||
5936 | } | ||
5937 | } | ||
5938 | |||
5939 | if (best_node != -1) | ||
5940 | node_set(best_node, *used_nodes); | ||
5941 | return best_node; | ||
5942 | } | ||
5943 | |||
5944 | /** | ||
5945 | * sched_domain_node_span - get a cpumask for a node's sched_domain | ||
5946 | * @node: node whose cpumask we're constructing | ||
5947 | * @span: resulting cpumask | ||
5948 | * | ||
5949 | * Given a node, construct a good cpumask for its sched_domain to span. It | ||
5950 | * should be one that prevents unnecessary balancing, but also spreads tasks | ||
5951 | * out optimally. | ||
5952 | */ | ||
5953 | static void sched_domain_node_span(int node, struct cpumask *span) | ||
5954 | { | ||
5955 | nodemask_t used_nodes; | ||
5956 | int i; | ||
5957 | |||
5958 | cpumask_clear(span); | ||
5959 | nodes_clear(used_nodes); | ||
5960 | |||
5961 | cpumask_or(span, span, cpumask_of_node(node)); | ||
5962 | node_set(node, used_nodes); | ||
5963 | |||
5964 | for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { | ||
5965 | int next_node = find_next_best_node(node, &used_nodes); | ||
5966 | if (next_node < 0) | ||
5967 | break; | ||
5968 | cpumask_or(span, span, cpumask_of_node(next_node)); | ||
5969 | } | ||
5970 | } | ||
5971 | |||
5972 | static const struct cpumask *cpu_node_mask(int cpu) | ||
5973 | { | ||
5974 | lockdep_assert_held(&sched_domains_mutex); | ||
5975 | |||
5976 | sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask); | ||
5977 | |||
5978 | return sched_domains_tmpmask; | ||
5979 | } | ||
5980 | |||
5981 | static const struct cpumask *cpu_allnodes_mask(int cpu) | ||
5982 | { | ||
5983 | return cpu_possible_mask; | ||
5984 | } | ||
5985 | #endif /* CONFIG_NUMA */ | ||
5986 | |||
5987 | static const struct cpumask *cpu_cpu_mask(int cpu) | 5973 | static const struct cpumask *cpu_cpu_mask(int cpu) |
5988 | { | 5974 | { |
5989 | return cpumask_of_node(cpu_to_node(cpu)); | 5975 | return cpumask_of_node(cpu_to_node(cpu)); |
5990 | } | 5976 | } |
5991 | 5977 | ||
5992 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | ||
5993 | |||
5994 | struct sd_data { | 5978 | struct sd_data { |
5995 | struct sched_domain **__percpu sd; | 5979 | struct sched_domain **__percpu sd; |
5996 | struct sched_group **__percpu sg; | 5980 | struct sched_group **__percpu sg; |
@@ -6020,9 +6004,48 @@ struct sched_domain_topology_level { | |||
6020 | sched_domain_init_f init; | 6004 | sched_domain_init_f init; |
6021 | sched_domain_mask_f mask; | 6005 | sched_domain_mask_f mask; |
6022 | int flags; | 6006 | int flags; |
6007 | int numa_level; | ||
6023 | struct sd_data data; | 6008 | struct sd_data data; |
6024 | }; | 6009 | }; |
6025 | 6010 | ||
6011 | /* | ||
6012 | * Build an iteration mask that can exclude certain CPUs from the upwards | ||
6013 | * domain traversal. | ||
6014 | * | ||
6015 | * Asymmetric node setups can result in situations where the domain tree is of | ||
6016 | * unequal depth, make sure to skip domains that already cover the entire | ||
6017 | * range. | ||
6018 | * | ||
6019 | * In that case build_sched_domains() will have terminated the iteration early | ||
6020 | * and our sibling sd spans will be empty. Domains should always include the | ||
6021 | * cpu they're built on, so check that. | ||
6022 | * | ||
6023 | */ | ||
6024 | static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) | ||
6025 | { | ||
6026 | const struct cpumask *span = sched_domain_span(sd); | ||
6027 | struct sd_data *sdd = sd->private; | ||
6028 | struct sched_domain *sibling; | ||
6029 | int i; | ||
6030 | |||
6031 | for_each_cpu(i, span) { | ||
6032 | sibling = *per_cpu_ptr(sdd->sd, i); | ||
6033 | if (!cpumask_test_cpu(i, sched_domain_span(sibling))) | ||
6034 | continue; | ||
6035 | |||
6036 | cpumask_set_cpu(i, sched_group_mask(sg)); | ||
6037 | } | ||
6038 | } | ||
6039 | |||
6040 | /* | ||
6041 | * Return the canonical balance cpu for this group, this is the first cpu | ||
6042 | * of this group that's also in the iteration mask. | ||
6043 | */ | ||
6044 | int group_balance_cpu(struct sched_group *sg) | ||
6045 | { | ||
6046 | return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg)); | ||
6047 | } | ||
6048 | |||
6026 | static int | 6049 | static int |
6027 | build_overlap_sched_groups(struct sched_domain *sd, int cpu) | 6050 | build_overlap_sched_groups(struct sched_domain *sd, int cpu) |
6028 | { | 6051 | { |
@@ -6041,6 +6064,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
6041 | if (cpumask_test_cpu(i, covered)) | 6064 | if (cpumask_test_cpu(i, covered)) |
6042 | continue; | 6065 | continue; |
6043 | 6066 | ||
6067 | child = *per_cpu_ptr(sdd->sd, i); | ||
6068 | |||
6069 | /* See the comment near build_group_mask(). */ | ||
6070 | if (!cpumask_test_cpu(i, sched_domain_span(child))) | ||
6071 | continue; | ||
6072 | |||
6044 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | 6073 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), |
6045 | GFP_KERNEL, cpu_to_node(cpu)); | 6074 | GFP_KERNEL, cpu_to_node(cpu)); |
6046 | 6075 | ||
@@ -6048,8 +6077,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
6048 | goto fail; | 6077 | goto fail; |
6049 | 6078 | ||
6050 | sg_span = sched_group_cpus(sg); | 6079 | sg_span = sched_group_cpus(sg); |
6051 | |||
6052 | child = *per_cpu_ptr(sdd->sd, i); | ||
6053 | if (child->child) { | 6080 | if (child->child) { |
6054 | child = child->child; | 6081 | child = child->child; |
6055 | cpumask_copy(sg_span, sched_domain_span(child)); | 6082 | cpumask_copy(sg_span, sched_domain_span(child)); |
@@ -6058,10 +6085,24 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
6058 | 6085 | ||
6059 | cpumask_or(covered, covered, sg_span); | 6086 | cpumask_or(covered, covered, sg_span); |
6060 | 6087 | ||
6061 | sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); | 6088 | sg->sgp = *per_cpu_ptr(sdd->sgp, i); |
6062 | atomic_inc(&sg->sgp->ref); | 6089 | if (atomic_inc_return(&sg->sgp->ref) == 1) |
6090 | build_group_mask(sd, sg); | ||
6063 | 6091 | ||
6064 | if (cpumask_test_cpu(cpu, sg_span)) | 6092 | /* |
6093 | * Initialize sgp->power such that even if we mess up the | ||
6094 | * domains and no possible iteration will get us here, we won't | ||
6095 | * die on a /0 trap. | ||
6096 | */ | ||
6097 | sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); | ||
6098 | |||
6099 | /* | ||
6100 | * Make sure the first group of this domain contains the | ||
6101 | * canonical balance cpu. Otherwise the sched_domain iteration | ||
6102 | * breaks. See update_sg_lb_stats(). | ||
6103 | */ | ||
6104 | if ((!groups && cpumask_test_cpu(cpu, sg_span)) || | ||
6105 | group_balance_cpu(sg) == cpu) | ||
6065 | groups = sg; | 6106 | groups = sg; |
6066 | 6107 | ||
6067 | if (!first) | 6108 | if (!first) |
@@ -6135,6 +6176,7 @@ build_sched_groups(struct sched_domain *sd, int cpu) | |||
6135 | 6176 | ||
6136 | cpumask_clear(sched_group_cpus(sg)); | 6177 | cpumask_clear(sched_group_cpus(sg)); |
6137 | sg->sgp->power = 0; | 6178 | sg->sgp->power = 0; |
6179 | cpumask_setall(sched_group_mask(sg)); | ||
6138 | 6180 | ||
6139 | for_each_cpu(j, span) { | 6181 | for_each_cpu(j, span) { |
6140 | if (get_group(j, sdd, NULL) != group) | 6182 | if (get_group(j, sdd, NULL) != group) |
@@ -6176,7 +6218,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
6176 | sg = sg->next; | 6218 | sg = sg->next; |
6177 | } while (sg != sd->groups); | 6219 | } while (sg != sd->groups); |
6178 | 6220 | ||
6179 | if (cpu != group_first_cpu(sg)) | 6221 | if (cpu != group_balance_cpu(sg)) |
6180 | return; | 6222 | return; |
6181 | 6223 | ||
6182 | update_group_power(sd, cpu); | 6224 | update_group_power(sd, cpu); |
@@ -6211,10 +6253,6 @@ sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \ | |||
6211 | } | 6253 | } |
6212 | 6254 | ||
6213 | SD_INIT_FUNC(CPU) | 6255 | SD_INIT_FUNC(CPU) |
6214 | #ifdef CONFIG_NUMA | ||
6215 | SD_INIT_FUNC(ALLNODES) | ||
6216 | SD_INIT_FUNC(NODE) | ||
6217 | #endif | ||
6218 | #ifdef CONFIG_SCHED_SMT | 6256 | #ifdef CONFIG_SCHED_SMT |
6219 | SD_INIT_FUNC(SIBLING) | 6257 | SD_INIT_FUNC(SIBLING) |
6220 | #endif | 6258 | #endif |
@@ -6230,11 +6268,8 @@ int sched_domain_level_max; | |||
6230 | 6268 | ||
6231 | static int __init setup_relax_domain_level(char *str) | 6269 | static int __init setup_relax_domain_level(char *str) |
6232 | { | 6270 | { |
6233 | unsigned long val; | 6271 | if (kstrtoint(str, 0, &default_relax_domain_level)) |
6234 | 6272 | pr_warn("Unable to set relax_domain_level\n"); | |
6235 | val = simple_strtoul(str, NULL, 0); | ||
6236 | if (val < sched_domain_level_max) | ||
6237 | default_relax_domain_level = val; | ||
6238 | 6273 | ||
6239 | return 1; | 6274 | return 1; |
6240 | } | 6275 | } |
@@ -6336,15 +6371,236 @@ static struct sched_domain_topology_level default_topology[] = { | |||
6336 | { sd_init_BOOK, cpu_book_mask, }, | 6371 | { sd_init_BOOK, cpu_book_mask, }, |
6337 | #endif | 6372 | #endif |
6338 | { sd_init_CPU, cpu_cpu_mask, }, | 6373 | { sd_init_CPU, cpu_cpu_mask, }, |
6339 | #ifdef CONFIG_NUMA | ||
6340 | { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, }, | ||
6341 | { sd_init_ALLNODES, cpu_allnodes_mask, }, | ||
6342 | #endif | ||
6343 | { NULL, }, | 6374 | { NULL, }, |
6344 | }; | 6375 | }; |
6345 | 6376 | ||
6346 | static struct sched_domain_topology_level *sched_domain_topology = default_topology; | 6377 | static struct sched_domain_topology_level *sched_domain_topology = default_topology; |
6347 | 6378 | ||
6379 | #ifdef CONFIG_NUMA | ||
6380 | |||
6381 | static int sched_domains_numa_levels; | ||
6382 | static int *sched_domains_numa_distance; | ||
6383 | static struct cpumask ***sched_domains_numa_masks; | ||
6384 | static int sched_domains_curr_level; | ||
6385 | |||
6386 | static inline int sd_local_flags(int level) | ||
6387 | { | ||
6388 | if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) | ||
6389 | return 0; | ||
6390 | |||
6391 | return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; | ||
6392 | } | ||
6393 | |||
6394 | static struct sched_domain * | ||
6395 | sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | ||
6396 | { | ||
6397 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); | ||
6398 | int level = tl->numa_level; | ||
6399 | int sd_weight = cpumask_weight( | ||
6400 | sched_domains_numa_masks[level][cpu_to_node(cpu)]); | ||
6401 | |||
6402 | *sd = (struct sched_domain){ | ||
6403 | .min_interval = sd_weight, | ||
6404 | .max_interval = 2*sd_weight, | ||
6405 | .busy_factor = 32, | ||
6406 | .imbalance_pct = 125, | ||
6407 | .cache_nice_tries = 2, | ||
6408 | .busy_idx = 3, | ||
6409 | .idle_idx = 2, | ||
6410 | .newidle_idx = 0, | ||
6411 | .wake_idx = 0, | ||
6412 | .forkexec_idx = 0, | ||
6413 | |||
6414 | .flags = 1*SD_LOAD_BALANCE | ||
6415 | | 1*SD_BALANCE_NEWIDLE | ||
6416 | | 0*SD_BALANCE_EXEC | ||
6417 | | 0*SD_BALANCE_FORK | ||
6418 | | 0*SD_BALANCE_WAKE | ||
6419 | | 0*SD_WAKE_AFFINE | ||
6420 | | 0*SD_PREFER_LOCAL | ||
6421 | | 0*SD_SHARE_CPUPOWER | ||
6422 | | 0*SD_SHARE_PKG_RESOURCES | ||
6423 | | 1*SD_SERIALIZE | ||
6424 | | 0*SD_PREFER_SIBLING | ||
6425 | | sd_local_flags(level) | ||
6426 | , | ||
6427 | .last_balance = jiffies, | ||
6428 | .balance_interval = sd_weight, | ||
6429 | }; | ||
6430 | SD_INIT_NAME(sd, NUMA); | ||
6431 | sd->private = &tl->data; | ||
6432 | |||
6433 | /* | ||
6434 | * Ugly hack to pass state to sd_numa_mask()... | ||
6435 | */ | ||
6436 | sched_domains_curr_level = tl->numa_level; | ||
6437 | |||
6438 | return sd; | ||
6439 | } | ||
6440 | |||
6441 | static const struct cpumask *sd_numa_mask(int cpu) | ||
6442 | { | ||
6443 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; | ||
6444 | } | ||
6445 | |||
6446 | static void sched_numa_warn(const char *str) | ||
6447 | { | ||
6448 | static int done = false; | ||
6449 | int i,j; | ||
6450 | |||
6451 | if (done) | ||
6452 | return; | ||
6453 | |||
6454 | done = true; | ||
6455 | |||
6456 | printk(KERN_WARNING "ERROR: %s\n\n", str); | ||
6457 | |||
6458 | for (i = 0; i < nr_node_ids; i++) { | ||
6459 | printk(KERN_WARNING " "); | ||
6460 | for (j = 0; j < nr_node_ids; j++) | ||
6461 | printk(KERN_CONT "%02d ", node_distance(i,j)); | ||
6462 | printk(KERN_CONT "\n"); | ||
6463 | } | ||
6464 | printk(KERN_WARNING "\n"); | ||
6465 | } | ||
6466 | |||
6467 | static bool find_numa_distance(int distance) | ||
6468 | { | ||
6469 | int i; | ||
6470 | |||
6471 | if (distance == node_distance(0, 0)) | ||
6472 | return true; | ||
6473 | |||
6474 | for (i = 0; i < sched_domains_numa_levels; i++) { | ||
6475 | if (sched_domains_numa_distance[i] == distance) | ||
6476 | return true; | ||
6477 | } | ||
6478 | |||
6479 | return false; | ||
6480 | } | ||
6481 | |||
6482 | static void sched_init_numa(void) | ||
6483 | { | ||
6484 | int next_distance, curr_distance = node_distance(0, 0); | ||
6485 | struct sched_domain_topology_level *tl; | ||
6486 | int level = 0; | ||
6487 | int i, j, k; | ||
6488 | |||
6489 | sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); | ||
6490 | if (!sched_domains_numa_distance) | ||
6491 | return; | ||
6492 | |||
6493 | /* | ||
6494 | * O(nr_nodes^2) deduplicating selection sort -- in order to find the | ||
6495 | * unique distances in the node_distance() table. | ||
6496 | * | ||
6497 | * Assumes node_distance(0,j) includes all distances in | ||
6498 | * node_distance(i,j) in order to avoid cubic time. | ||
6499 | */ | ||
6500 | next_distance = curr_distance; | ||
6501 | for (i = 0; i < nr_node_ids; i++) { | ||
6502 | for (j = 0; j < nr_node_ids; j++) { | ||
6503 | for (k = 0; k < nr_node_ids; k++) { | ||
6504 | int distance = node_distance(i, k); | ||
6505 | |||
6506 | if (distance > curr_distance && | ||
6507 | (distance < next_distance || | ||
6508 | next_distance == curr_distance)) | ||
6509 | next_distance = distance; | ||
6510 | |||
6511 | /* | ||
6512 | * While not a strong assumption it would be nice to know | ||
6513 | * about cases where if node A is connected to B, B is not | ||
6514 | * equally connected to A. | ||
6515 | */ | ||
6516 | if (sched_debug() && node_distance(k, i) != distance) | ||
6517 | sched_numa_warn("Node-distance not symmetric"); | ||
6518 | |||
6519 | if (sched_debug() && i && !find_numa_distance(distance)) | ||
6520 | sched_numa_warn("Node-0 not representative"); | ||
6521 | } | ||
6522 | if (next_distance != curr_distance) { | ||
6523 | sched_domains_numa_distance[level++] = next_distance; | ||
6524 | sched_domains_numa_levels = level; | ||
6525 | curr_distance = next_distance; | ||
6526 | } else break; | ||
6527 | } | ||
6528 | |||
6529 | /* | ||
6530 | * In case of sched_debug() we verify the above assumption. | ||
6531 | */ | ||
6532 | if (!sched_debug()) | ||
6533 | break; | ||
6534 | } | ||
6535 | /* | ||
6536 | * 'level' contains the number of unique distances, excluding the | ||
6537 | * identity distance node_distance(i,i). | ||
6538 | * | ||
6539 | * The sched_domains_nume_distance[] array includes the actual distance | ||
6540 | * numbers. | ||
6541 | */ | ||
6542 | |||
6543 | sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); | ||
6544 | if (!sched_domains_numa_masks) | ||
6545 | return; | ||
6546 | |||
6547 | /* | ||
6548 | * Now for each level, construct a mask per node which contains all | ||
6549 | * cpus of nodes that are that many hops away from us. | ||
6550 | */ | ||
6551 | for (i = 0; i < level; i++) { | ||
6552 | sched_domains_numa_masks[i] = | ||
6553 | kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL); | ||
6554 | if (!sched_domains_numa_masks[i]) | ||
6555 | return; | ||
6556 | |||
6557 | for (j = 0; j < nr_node_ids; j++) { | ||
6558 | struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL); | ||
6559 | if (!mask) | ||
6560 | return; | ||
6561 | |||
6562 | sched_domains_numa_masks[i][j] = mask; | ||
6563 | |||
6564 | for (k = 0; k < nr_node_ids; k++) { | ||
6565 | if (node_distance(j, k) > sched_domains_numa_distance[i]) | ||
6566 | continue; | ||
6567 | |||
6568 | cpumask_or(mask, mask, cpumask_of_node(k)); | ||
6569 | } | ||
6570 | } | ||
6571 | } | ||
6572 | |||
6573 | tl = kzalloc((ARRAY_SIZE(default_topology) + level) * | ||
6574 | sizeof(struct sched_domain_topology_level), GFP_KERNEL); | ||
6575 | if (!tl) | ||
6576 | return; | ||
6577 | |||
6578 | /* | ||
6579 | * Copy the default topology bits.. | ||
6580 | */ | ||
6581 | for (i = 0; default_topology[i].init; i++) | ||
6582 | tl[i] = default_topology[i]; | ||
6583 | |||
6584 | /* | ||
6585 | * .. and append 'j' levels of NUMA goodness. | ||
6586 | */ | ||
6587 | for (j = 0; j < level; i++, j++) { | ||
6588 | tl[i] = (struct sched_domain_topology_level){ | ||
6589 | .init = sd_numa_init, | ||
6590 | .mask = sd_numa_mask, | ||
6591 | .flags = SDTL_OVERLAP, | ||
6592 | .numa_level = j, | ||
6593 | }; | ||
6594 | } | ||
6595 | |||
6596 | sched_domain_topology = tl; | ||
6597 | } | ||
6598 | #else | ||
6599 | static inline void sched_init_numa(void) | ||
6600 | { | ||
6601 | } | ||
6602 | #endif /* CONFIG_NUMA */ | ||
6603 | |||
6348 | static int __sdt_alloc(const struct cpumask *cpu_map) | 6604 | static int __sdt_alloc(const struct cpumask *cpu_map) |
6349 | { | 6605 | { |
6350 | struct sched_domain_topology_level *tl; | 6606 | struct sched_domain_topology_level *tl; |
@@ -6382,9 +6638,11 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
6382 | if (!sg) | 6638 | if (!sg) |
6383 | return -ENOMEM; | 6639 | return -ENOMEM; |
6384 | 6640 | ||
6641 | sg->next = sg; | ||
6642 | |||
6385 | *per_cpu_ptr(sdd->sg, j) = sg; | 6643 | *per_cpu_ptr(sdd->sg, j) = sg; |
6386 | 6644 | ||
6387 | sgp = kzalloc_node(sizeof(struct sched_group_power), | 6645 | sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(), |
6388 | GFP_KERNEL, cpu_to_node(j)); | 6646 | GFP_KERNEL, cpu_to_node(j)); |
6389 | if (!sgp) | 6647 | if (!sgp) |
6390 | return -ENOMEM; | 6648 | return -ENOMEM; |
@@ -6405,16 +6663,26 @@ static void __sdt_free(const struct cpumask *cpu_map) | |||
6405 | struct sd_data *sdd = &tl->data; | 6663 | struct sd_data *sdd = &tl->data; |
6406 | 6664 | ||
6407 | for_each_cpu(j, cpu_map) { | 6665 | for_each_cpu(j, cpu_map) { |
6408 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); | 6666 | struct sched_domain *sd; |
6409 | if (sd && (sd->flags & SD_OVERLAP)) | 6667 | |
6410 | free_sched_groups(sd->groups, 0); | 6668 | if (sdd->sd) { |
6411 | kfree(*per_cpu_ptr(sdd->sd, j)); | 6669 | sd = *per_cpu_ptr(sdd->sd, j); |
6412 | kfree(*per_cpu_ptr(sdd->sg, j)); | 6670 | if (sd && (sd->flags & SD_OVERLAP)) |
6413 | kfree(*per_cpu_ptr(sdd->sgp, j)); | 6671 | free_sched_groups(sd->groups, 0); |
6672 | kfree(*per_cpu_ptr(sdd->sd, j)); | ||
6673 | } | ||
6674 | |||
6675 | if (sdd->sg) | ||
6676 | kfree(*per_cpu_ptr(sdd->sg, j)); | ||
6677 | if (sdd->sgp) | ||
6678 | kfree(*per_cpu_ptr(sdd->sgp, j)); | ||
6414 | } | 6679 | } |
6415 | free_percpu(sdd->sd); | 6680 | free_percpu(sdd->sd); |
6681 | sdd->sd = NULL; | ||
6416 | free_percpu(sdd->sg); | 6682 | free_percpu(sdd->sg); |
6683 | sdd->sg = NULL; | ||
6417 | free_percpu(sdd->sgp); | 6684 | free_percpu(sdd->sgp); |
6685 | sdd->sgp = NULL; | ||
6418 | } | 6686 | } |
6419 | } | 6687 | } |
6420 | 6688 | ||
@@ -6427,7 +6695,6 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | |||
6427 | if (!sd) | 6695 | if (!sd) |
6428 | return child; | 6696 | return child; |
6429 | 6697 | ||
6430 | set_domain_attribute(sd, attr); | ||
6431 | cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); | 6698 | cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); |
6432 | if (child) { | 6699 | if (child) { |
6433 | sd->level = child->level + 1; | 6700 | sd->level = child->level + 1; |
@@ -6435,6 +6702,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | |||
6435 | child->parent = sd; | 6702 | child->parent = sd; |
6436 | } | 6703 | } |
6437 | sd->child = child; | 6704 | sd->child = child; |
6705 | set_domain_attribute(sd, attr); | ||
6438 | 6706 | ||
6439 | return sd; | 6707 | return sd; |
6440 | } | 6708 | } |
@@ -6575,7 +6843,6 @@ static int init_sched_domains(const struct cpumask *cpu_map) | |||
6575 | if (!doms_cur) | 6843 | if (!doms_cur) |
6576 | doms_cur = &fallback_doms; | 6844 | doms_cur = &fallback_doms; |
6577 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); | 6845 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
6578 | dattr_cur = NULL; | ||
6579 | err = build_sched_domains(doms_cur[0], NULL); | 6846 | err = build_sched_domains(doms_cur[0], NULL); |
6580 | register_sched_domain_sysctl(); | 6847 | register_sched_domain_sysctl(); |
6581 | 6848 | ||
@@ -6700,97 +6967,6 @@ match2: | |||
6700 | mutex_unlock(&sched_domains_mutex); | 6967 | mutex_unlock(&sched_domains_mutex); |
6701 | } | 6968 | } |
6702 | 6969 | ||
6703 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
6704 | static void reinit_sched_domains(void) | ||
6705 | { | ||
6706 | get_online_cpus(); | ||
6707 | |||
6708 | /* Destroy domains first to force the rebuild */ | ||
6709 | partition_sched_domains(0, NULL, NULL); | ||
6710 | |||
6711 | rebuild_sched_domains(); | ||
6712 | put_online_cpus(); | ||
6713 | } | ||
6714 | |||
6715 | static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | ||
6716 | { | ||
6717 | unsigned int level = 0; | ||
6718 | |||
6719 | if (sscanf(buf, "%u", &level) != 1) | ||
6720 | return -EINVAL; | ||
6721 | |||
6722 | /* | ||
6723 | * level is always be positive so don't check for | ||
6724 | * level < POWERSAVINGS_BALANCE_NONE which is 0 | ||
6725 | * What happens on 0 or 1 byte write, | ||
6726 | * need to check for count as well? | ||
6727 | */ | ||
6728 | |||
6729 | if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS) | ||
6730 | return -EINVAL; | ||
6731 | |||
6732 | if (smt) | ||
6733 | sched_smt_power_savings = level; | ||
6734 | else | ||
6735 | sched_mc_power_savings = level; | ||
6736 | |||
6737 | reinit_sched_domains(); | ||
6738 | |||
6739 | return count; | ||
6740 | } | ||
6741 | |||
6742 | #ifdef CONFIG_SCHED_MC | ||
6743 | static ssize_t sched_mc_power_savings_show(struct device *dev, | ||
6744 | struct device_attribute *attr, | ||
6745 | char *buf) | ||
6746 | { | ||
6747 | return sprintf(buf, "%u\n", sched_mc_power_savings); | ||
6748 | } | ||
6749 | static ssize_t sched_mc_power_savings_store(struct device *dev, | ||
6750 | struct device_attribute *attr, | ||
6751 | const char *buf, size_t count) | ||
6752 | { | ||
6753 | return sched_power_savings_store(buf, count, 0); | ||
6754 | } | ||
6755 | static DEVICE_ATTR(sched_mc_power_savings, 0644, | ||
6756 | sched_mc_power_savings_show, | ||
6757 | sched_mc_power_savings_store); | ||
6758 | #endif | ||
6759 | |||
6760 | #ifdef CONFIG_SCHED_SMT | ||
6761 | static ssize_t sched_smt_power_savings_show(struct device *dev, | ||
6762 | struct device_attribute *attr, | ||
6763 | char *buf) | ||
6764 | { | ||
6765 | return sprintf(buf, "%u\n", sched_smt_power_savings); | ||
6766 | } | ||
6767 | static ssize_t sched_smt_power_savings_store(struct device *dev, | ||
6768 | struct device_attribute *attr, | ||
6769 | const char *buf, size_t count) | ||
6770 | { | ||
6771 | return sched_power_savings_store(buf, count, 1); | ||
6772 | } | ||
6773 | static DEVICE_ATTR(sched_smt_power_savings, 0644, | ||
6774 | sched_smt_power_savings_show, | ||
6775 | sched_smt_power_savings_store); | ||
6776 | #endif | ||
6777 | |||
6778 | int __init sched_create_sysfs_power_savings_entries(struct device *dev) | ||
6779 | { | ||
6780 | int err = 0; | ||
6781 | |||
6782 | #ifdef CONFIG_SCHED_SMT | ||
6783 | if (smt_capable()) | ||
6784 | err = device_create_file(dev, &dev_attr_sched_smt_power_savings); | ||
6785 | #endif | ||
6786 | #ifdef CONFIG_SCHED_MC | ||
6787 | if (!err && mc_capable()) | ||
6788 | err = device_create_file(dev, &dev_attr_sched_mc_power_savings); | ||
6789 | #endif | ||
6790 | return err; | ||
6791 | } | ||
6792 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | ||
6793 | |||
6794 | /* | 6970 | /* |
6795 | * Update cpusets according to cpu_active mask. If cpusets are | 6971 | * Update cpusets according to cpu_active mask. If cpusets are |
6796 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper | 6972 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper |
@@ -6828,6 +7004,8 @@ void __init sched_init_smp(void) | |||
6828 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); | 7004 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); |
6829 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); | 7005 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); |
6830 | 7006 | ||
7007 | sched_init_numa(); | ||
7008 | |||
6831 | get_online_cpus(); | 7009 | get_online_cpus(); |
6832 | mutex_lock(&sched_domains_mutex); | 7010 | mutex_lock(&sched_domains_mutex); |
6833 | init_sched_domains(cpu_active_mask); | 7011 | init_sched_domains(cpu_active_mask); |
@@ -7049,6 +7227,7 @@ void __init sched_init(void) | |||
7049 | /* May be allocated at isolcpus cmdline parse time */ | 7227 | /* May be allocated at isolcpus cmdline parse time */ |
7050 | if (cpu_isolated_map == NULL) | 7228 | if (cpu_isolated_map == NULL) |
7051 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 7229 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
7230 | idle_thread_set_boot_cpu(); | ||
7052 | #endif | 7231 | #endif |
7053 | init_sched_fair_class(); | 7232 | init_sched_fair_class(); |
7054 | 7233 | ||
@@ -7970,13 +8149,9 @@ static struct cftype cpu_files[] = { | |||
7970 | .write_u64 = cpu_rt_period_write_uint, | 8149 | .write_u64 = cpu_rt_period_write_uint, |
7971 | }, | 8150 | }, |
7972 | #endif | 8151 | #endif |
8152 | { } /* terminate */ | ||
7973 | }; | 8153 | }; |
7974 | 8154 | ||
7975 | static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) | ||
7976 | { | ||
7977 | return cgroup_add_files(cont, ss, cpu_files, ARRAY_SIZE(cpu_files)); | ||
7978 | } | ||
7979 | |||
7980 | struct cgroup_subsys cpu_cgroup_subsys = { | 8155 | struct cgroup_subsys cpu_cgroup_subsys = { |
7981 | .name = "cpu", | 8156 | .name = "cpu", |
7982 | .create = cpu_cgroup_create, | 8157 | .create = cpu_cgroup_create, |
@@ -7984,8 +8159,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7984 | .can_attach = cpu_cgroup_can_attach, | 8159 | .can_attach = cpu_cgroup_can_attach, |
7985 | .attach = cpu_cgroup_attach, | 8160 | .attach = cpu_cgroup_attach, |
7986 | .exit = cpu_cgroup_exit, | 8161 | .exit = cpu_cgroup_exit, |
7987 | .populate = cpu_cgroup_populate, | ||
7988 | .subsys_id = cpu_cgroup_subsys_id, | 8162 | .subsys_id = cpu_cgroup_subsys_id, |
8163 | .base_cftypes = cpu_files, | ||
7989 | .early_init = 1, | 8164 | .early_init = 1, |
7990 | }; | 8165 | }; |
7991 | 8166 | ||
@@ -8170,13 +8345,9 @@ static struct cftype files[] = { | |||
8170 | .name = "stat", | 8345 | .name = "stat", |
8171 | .read_map = cpuacct_stats_show, | 8346 | .read_map = cpuacct_stats_show, |
8172 | }, | 8347 | }, |
8348 | { } /* terminate */ | ||
8173 | }; | 8349 | }; |
8174 | 8350 | ||
8175 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
8176 | { | ||
8177 | return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files)); | ||
8178 | } | ||
8179 | |||
8180 | /* | 8351 | /* |
8181 | * charge this task's execution time to its accounting group. | 8352 | * charge this task's execution time to its accounting group. |
8182 | * | 8353 | * |
@@ -8208,7 +8379,7 @@ struct cgroup_subsys cpuacct_subsys = { | |||
8208 | .name = "cpuacct", | 8379 | .name = "cpuacct", |
8209 | .create = cpuacct_create, | 8380 | .create = cpuacct_create, |
8210 | .destroy = cpuacct_destroy, | 8381 | .destroy = cpuacct_destroy, |
8211 | .populate = cpuacct_populate, | ||
8212 | .subsys_id = cpuacct_subsys_id, | 8382 | .subsys_id = cpuacct_subsys_id, |
8383 | .base_cftypes = files, | ||
8213 | }; | 8384 | }; |
8214 | #endif /* CONFIG_CGROUP_CPUACCT */ | 8385 | #endif /* CONFIG_CGROUP_CPUACCT */ |