diff options
Diffstat (limited to 'kernel/sched/core.c')
| -rw-r--r-- | kernel/sched/core.c | 98 |
1 files changed, 86 insertions, 12 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 468bdd44c1ba..82ad284f823b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -1096,7 +1096,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 1096 | * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. | 1096 | * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. |
| 1097 | * | 1097 | * |
| 1098 | * sched_move_task() holds both and thus holding either pins the cgroup, | 1098 | * sched_move_task() holds both and thus holding either pins the cgroup, |
| 1099 | * see set_task_rq(). | 1099 | * see task_group(). |
| 1100 | * | 1100 | * |
| 1101 | * Furthermore, all task_rq users should acquire both locks, see | 1101 | * Furthermore, all task_rq users should acquire both locks, see |
| 1102 | * task_rq_lock(). | 1102 | * task_rq_lock(). |
| @@ -1910,12 +1910,12 @@ static inline void | |||
| 1910 | prepare_task_switch(struct rq *rq, struct task_struct *prev, | 1910 | prepare_task_switch(struct rq *rq, struct task_struct *prev, |
| 1911 | struct task_struct *next) | 1911 | struct task_struct *next) |
| 1912 | { | 1912 | { |
| 1913 | trace_sched_switch(prev, next); | ||
| 1913 | sched_info_switch(prev, next); | 1914 | sched_info_switch(prev, next); |
| 1914 | perf_event_task_sched_out(prev, next); | 1915 | perf_event_task_sched_out(prev, next); |
| 1915 | fire_sched_out_preempt_notifiers(prev, next); | 1916 | fire_sched_out_preempt_notifiers(prev, next); |
| 1916 | prepare_lock_switch(rq, next); | 1917 | prepare_lock_switch(rq, next); |
| 1917 | prepare_arch_switch(next); | 1918 | prepare_arch_switch(next); |
| 1918 | trace_sched_switch(prev, next); | ||
| 1919 | } | 1919 | } |
| 1920 | 1920 | ||
| 1921 | /** | 1921 | /** |
| @@ -4340,9 +4340,7 @@ recheck: | |||
| 4340 | */ | 4340 | */ |
| 4341 | if (unlikely(policy == p->policy && (!rt_policy(policy) || | 4341 | if (unlikely(policy == p->policy && (!rt_policy(policy) || |
| 4342 | param->sched_priority == p->rt_priority))) { | 4342 | param->sched_priority == p->rt_priority))) { |
| 4343 | 4343 | task_rq_unlock(rq, p, &flags); | |
| 4344 | __task_rq_unlock(rq); | ||
| 4345 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
| 4346 | return 0; | 4344 | return 0; |
| 4347 | } | 4345 | } |
| 4348 | 4346 | ||
| @@ -6024,6 +6022,11 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) | |||
| 6024 | * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this | 6022 | * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this |
| 6025 | * allows us to avoid some pointer chasing select_idle_sibling(). | 6023 | * allows us to avoid some pointer chasing select_idle_sibling(). |
| 6026 | * | 6024 | * |
| 6025 | * Iterate domains and sched_groups downward, assigning CPUs to be | ||
| 6026 | * select_idle_sibling() hw buddy. Cross-wiring hw makes bouncing | ||
| 6027 | * due to random perturbation self canceling, ie sw buddies pull | ||
| 6028 | * their counterpart to their CPU's hw counterpart. | ||
| 6029 | * | ||
| 6027 | * Also keep a unique ID per domain (we use the first cpu number in | 6030 | * Also keep a unique ID per domain (we use the first cpu number in |
| 6028 | * the cpumask of the domain), this allows us to quickly tell if | 6031 | * the cpumask of the domain), this allows us to quickly tell if |
| 6029 | * two cpus are in the same cache domain, see cpus_share_cache(). | 6032 | * two cpus are in the same cache domain, see cpus_share_cache(). |
| @@ -6037,8 +6040,40 @@ static void update_top_cache_domain(int cpu) | |||
| 6037 | int id = cpu; | 6040 | int id = cpu; |
| 6038 | 6041 | ||
| 6039 | sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); | 6042 | sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); |
| 6040 | if (sd) | 6043 | if (sd) { |
| 6044 | struct sched_domain *tmp = sd; | ||
| 6045 | struct sched_group *sg, *prev; | ||
| 6046 | bool right; | ||
| 6047 | |||
| 6048 | /* | ||
| 6049 | * Traverse to first CPU in group, and count hops | ||
| 6050 | * to cpu from there, switching direction on each | ||
| 6051 | * hop, never ever pointing the last CPU rightward. | ||
| 6052 | */ | ||
| 6053 | do { | ||
| 6054 | id = cpumask_first(sched_domain_span(tmp)); | ||
| 6055 | prev = sg = tmp->groups; | ||
| 6056 | right = 1; | ||
| 6057 | |||
| 6058 | while (cpumask_first(sched_group_cpus(sg)) != id) | ||
| 6059 | sg = sg->next; | ||
| 6060 | |||
| 6061 | while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) { | ||
| 6062 | prev = sg; | ||
| 6063 | sg = sg->next; | ||
| 6064 | right = !right; | ||
| 6065 | } | ||
| 6066 | |||
| 6067 | /* A CPU went down, never point back to domain start. */ | ||
| 6068 | if (right && cpumask_first(sched_group_cpus(sg->next)) == id) | ||
| 6069 | right = false; | ||
| 6070 | |||
| 6071 | sg = right ? sg->next : prev; | ||
| 6072 | tmp->idle_buddy = cpumask_first(sched_group_cpus(sg)); | ||
| 6073 | } while ((tmp = tmp->child)); | ||
| 6074 | |||
| 6041 | id = cpumask_first(sched_domain_span(sd)); | 6075 | id = cpumask_first(sched_domain_span(sd)); |
| 6076 | } | ||
| 6042 | 6077 | ||
| 6043 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); | 6078 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); |
| 6044 | per_cpu(sd_llc_id, cpu) = id; | 6079 | per_cpu(sd_llc_id, cpu) = id; |
| @@ -7097,34 +7132,66 @@ match2: | |||
| 7097 | mutex_unlock(&sched_domains_mutex); | 7132 | mutex_unlock(&sched_domains_mutex); |
| 7098 | } | 7133 | } |
| 7099 | 7134 | ||
| 7135 | static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ | ||
| 7136 | |||
| 7100 | /* | 7137 | /* |
| 7101 | * Update cpusets according to cpu_active mask. If cpusets are | 7138 | * Update cpusets according to cpu_active mask. If cpusets are |
| 7102 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper | 7139 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper |
| 7103 | * around partition_sched_domains(). | 7140 | * around partition_sched_domains(). |
| 7141 | * | ||
| 7142 | * If we come here as part of a suspend/resume, don't touch cpusets because we | ||
| 7143 | * want to restore it back to its original state upon resume anyway. | ||
| 7104 | */ | 7144 | */ |
| 7105 | static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, | 7145 | static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, |
| 7106 | void *hcpu) | 7146 | void *hcpu) |
| 7107 | { | 7147 | { |
| 7108 | switch (action & ~CPU_TASKS_FROZEN) { | 7148 | switch (action) { |
| 7149 | case CPU_ONLINE_FROZEN: | ||
| 7150 | case CPU_DOWN_FAILED_FROZEN: | ||
| 7151 | |||
| 7152 | /* | ||
| 7153 | * num_cpus_frozen tracks how many CPUs are involved in suspend | ||
| 7154 | * resume sequence. As long as this is not the last online | ||
| 7155 | * operation in the resume sequence, just build a single sched | ||
| 7156 | * domain, ignoring cpusets. | ||
| 7157 | */ | ||
| 7158 | num_cpus_frozen--; | ||
| 7159 | if (likely(num_cpus_frozen)) { | ||
| 7160 | partition_sched_domains(1, NULL, NULL); | ||
| 7161 | break; | ||
| 7162 | } | ||
| 7163 | |||
| 7164 | /* | ||
| 7165 | * This is the last CPU online operation. So fall through and | ||
| 7166 | * restore the original sched domains by considering the | ||
| 7167 | * cpuset configurations. | ||
| 7168 | */ | ||
| 7169 | |||
| 7109 | case CPU_ONLINE: | 7170 | case CPU_ONLINE: |
| 7110 | case CPU_DOWN_FAILED: | 7171 | case CPU_DOWN_FAILED: |
| 7111 | cpuset_update_active_cpus(); | 7172 | cpuset_update_active_cpus(true); |
| 7112 | return NOTIFY_OK; | 7173 | break; |
| 7113 | default: | 7174 | default: |
| 7114 | return NOTIFY_DONE; | 7175 | return NOTIFY_DONE; |
| 7115 | } | 7176 | } |
| 7177 | return NOTIFY_OK; | ||
| 7116 | } | 7178 | } |
| 7117 | 7179 | ||
| 7118 | static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, | 7180 | static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, |
| 7119 | void *hcpu) | 7181 | void *hcpu) |
| 7120 | { | 7182 | { |
| 7121 | switch (action & ~CPU_TASKS_FROZEN) { | 7183 | switch (action) { |
| 7122 | case CPU_DOWN_PREPARE: | 7184 | case CPU_DOWN_PREPARE: |
| 7123 | cpuset_update_active_cpus(); | 7185 | cpuset_update_active_cpus(false); |
| 7124 | return NOTIFY_OK; | 7186 | break; |
| 7187 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 7188 | num_cpus_frozen++; | ||
| 7189 | partition_sched_domains(1, NULL, NULL); | ||
| 7190 | break; | ||
| 7125 | default: | 7191 | default: |
| 7126 | return NOTIFY_DONE; | 7192 | return NOTIFY_DONE; |
| 7127 | } | 7193 | } |
| 7194 | return NOTIFY_OK; | ||
| 7128 | } | 7195 | } |
| 7129 | 7196 | ||
| 7130 | void __init sched_init_smp(void) | 7197 | void __init sched_init_smp(void) |
| @@ -7589,6 +7656,7 @@ void sched_destroy_group(struct task_group *tg) | |||
| 7589 | */ | 7656 | */ |
| 7590 | void sched_move_task(struct task_struct *tsk) | 7657 | void sched_move_task(struct task_struct *tsk) |
| 7591 | { | 7658 | { |
| 7659 | struct task_group *tg; | ||
| 7592 | int on_rq, running; | 7660 | int on_rq, running; |
| 7593 | unsigned long flags; | 7661 | unsigned long flags; |
| 7594 | struct rq *rq; | 7662 | struct rq *rq; |
| @@ -7603,6 +7671,12 @@ void sched_move_task(struct task_struct *tsk) | |||
| 7603 | if (unlikely(running)) | 7671 | if (unlikely(running)) |
| 7604 | tsk->sched_class->put_prev_task(rq, tsk); | 7672 | tsk->sched_class->put_prev_task(rq, tsk); |
| 7605 | 7673 | ||
| 7674 | tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id, | ||
| 7675 | lockdep_is_held(&tsk->sighand->siglock)), | ||
| 7676 | struct task_group, css); | ||
| 7677 | tg = autogroup_task_group(tsk, tg); | ||
| 7678 | tsk->sched_task_group = tg; | ||
| 7679 | |||
| 7606 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7680 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7607 | if (tsk->sched_class->task_move_group) | 7681 | if (tsk->sched_class->task_move_group) |
| 7608 | tsk->sched_class->task_move_group(tsk, on_rq); | 7682 | tsk->sched_class->task_move_group(tsk, on_rq); |
