aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c98
1 files changed, 86 insertions, 12 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 468bdd44c1ba..82ad284f823b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1096,7 +1096,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1096 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. 1096 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
1097 * 1097 *
1098 * sched_move_task() holds both and thus holding either pins the cgroup, 1098 * sched_move_task() holds both and thus holding either pins the cgroup,
1099 * see set_task_rq(). 1099 * see task_group().
1100 * 1100 *
1101 * Furthermore, all task_rq users should acquire both locks, see 1101 * Furthermore, all task_rq users should acquire both locks, see
1102 * task_rq_lock(). 1102 * task_rq_lock().
@@ -1910,12 +1910,12 @@ static inline void
1910prepare_task_switch(struct rq *rq, struct task_struct *prev, 1910prepare_task_switch(struct rq *rq, struct task_struct *prev,
1911 struct task_struct *next) 1911 struct task_struct *next)
1912{ 1912{
1913 trace_sched_switch(prev, next);
1913 sched_info_switch(prev, next); 1914 sched_info_switch(prev, next);
1914 perf_event_task_sched_out(prev, next); 1915 perf_event_task_sched_out(prev, next);
1915 fire_sched_out_preempt_notifiers(prev, next); 1916 fire_sched_out_preempt_notifiers(prev, next);
1916 prepare_lock_switch(rq, next); 1917 prepare_lock_switch(rq, next);
1917 prepare_arch_switch(next); 1918 prepare_arch_switch(next);
1918 trace_sched_switch(prev, next);
1919} 1919}
1920 1920
1921/** 1921/**
@@ -4340,9 +4340,7 @@ recheck:
4340 */ 4340 */
4341 if (unlikely(policy == p->policy && (!rt_policy(policy) || 4341 if (unlikely(policy == p->policy && (!rt_policy(policy) ||
4342 param->sched_priority == p->rt_priority))) { 4342 param->sched_priority == p->rt_priority))) {
4343 4343 task_rq_unlock(rq, p, &flags);
4344 __task_rq_unlock(rq);
4345 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4346 return 0; 4344 return 0;
4347 } 4345 }
4348 4346
@@ -6024,6 +6022,11 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
6024 * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this 6022 * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
6025 * allows us to avoid some pointer chasing select_idle_sibling(). 6023 * allows us to avoid some pointer chasing select_idle_sibling().
6026 * 6024 *
6025 * Iterate domains and sched_groups downward, assigning CPUs to be
6026 * select_idle_sibling() hw buddy. Cross-wiring hw makes bouncing
6027 * due to random perturbation self canceling, ie sw buddies pull
6028 * their counterpart to their CPU's hw counterpart.
6029 *
6027 * Also keep a unique ID per domain (we use the first cpu number in 6030 * Also keep a unique ID per domain (we use the first cpu number in
6028 * the cpumask of the domain), this allows us to quickly tell if 6031 * the cpumask of the domain), this allows us to quickly tell if
6029 * two cpus are in the same cache domain, see cpus_share_cache(). 6032 * two cpus are in the same cache domain, see cpus_share_cache().
@@ -6037,8 +6040,40 @@ static void update_top_cache_domain(int cpu)
6037 int id = cpu; 6040 int id = cpu;
6038 6041
6039 sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); 6042 sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
6040 if (sd) 6043 if (sd) {
6044 struct sched_domain *tmp = sd;
6045 struct sched_group *sg, *prev;
6046 bool right;
6047
6048 /*
6049 * Traverse to first CPU in group, and count hops
6050 * to cpu from there, switching direction on each
6051 * hop, never ever pointing the last CPU rightward.
6052 */
6053 do {
6054 id = cpumask_first(sched_domain_span(tmp));
6055 prev = sg = tmp->groups;
6056 right = 1;
6057
6058 while (cpumask_first(sched_group_cpus(sg)) != id)
6059 sg = sg->next;
6060
6061 while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) {
6062 prev = sg;
6063 sg = sg->next;
6064 right = !right;
6065 }
6066
6067 /* A CPU went down, never point back to domain start. */
6068 if (right && cpumask_first(sched_group_cpus(sg->next)) == id)
6069 right = false;
6070
6071 sg = right ? sg->next : prev;
6072 tmp->idle_buddy = cpumask_first(sched_group_cpus(sg));
6073 } while ((tmp = tmp->child));
6074
6041 id = cpumask_first(sched_domain_span(sd)); 6075 id = cpumask_first(sched_domain_span(sd));
6076 }
6042 6077
6043 rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); 6078 rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
6044 per_cpu(sd_llc_id, cpu) = id; 6079 per_cpu(sd_llc_id, cpu) = id;
@@ -7097,34 +7132,66 @@ match2:
7097 mutex_unlock(&sched_domains_mutex); 7132 mutex_unlock(&sched_domains_mutex);
7098} 7133}
7099 7134
7135static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */
7136
7100/* 7137/*
7101 * Update cpusets according to cpu_active mask. If cpusets are 7138 * Update cpusets according to cpu_active mask. If cpusets are
7102 * disabled, cpuset_update_active_cpus() becomes a simple wrapper 7139 * disabled, cpuset_update_active_cpus() becomes a simple wrapper
7103 * around partition_sched_domains(). 7140 * around partition_sched_domains().
7141 *
7142 * If we come here as part of a suspend/resume, don't touch cpusets because we
7143 * want to restore it back to its original state upon resume anyway.
7104 */ 7144 */
7105static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, 7145static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
7106 void *hcpu) 7146 void *hcpu)
7107{ 7147{
7108 switch (action & ~CPU_TASKS_FROZEN) { 7148 switch (action) {
7149 case CPU_ONLINE_FROZEN:
7150 case CPU_DOWN_FAILED_FROZEN:
7151
7152 /*
7153 * num_cpus_frozen tracks how many CPUs are involved in suspend
7154 * resume sequence. As long as this is not the last online
7155 * operation in the resume sequence, just build a single sched
7156 * domain, ignoring cpusets.
7157 */
7158 num_cpus_frozen--;
7159 if (likely(num_cpus_frozen)) {
7160 partition_sched_domains(1, NULL, NULL);
7161 break;
7162 }
7163
7164 /*
7165 * This is the last CPU online operation. So fall through and
7166 * restore the original sched domains by considering the
7167 * cpuset configurations.
7168 */
7169
7109 case CPU_ONLINE: 7170 case CPU_ONLINE:
7110 case CPU_DOWN_FAILED: 7171 case CPU_DOWN_FAILED:
7111 cpuset_update_active_cpus(); 7172 cpuset_update_active_cpus(true);
7112 return NOTIFY_OK; 7173 break;
7113 default: 7174 default:
7114 return NOTIFY_DONE; 7175 return NOTIFY_DONE;
7115 } 7176 }
7177 return NOTIFY_OK;
7116} 7178}
7117 7179
7118static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, 7180static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
7119 void *hcpu) 7181 void *hcpu)
7120{ 7182{
7121 switch (action & ~CPU_TASKS_FROZEN) { 7183 switch (action) {
7122 case CPU_DOWN_PREPARE: 7184 case CPU_DOWN_PREPARE:
7123 cpuset_update_active_cpus(); 7185 cpuset_update_active_cpus(false);
7124 return NOTIFY_OK; 7186 break;
7187 case CPU_DOWN_PREPARE_FROZEN:
7188 num_cpus_frozen++;
7189 partition_sched_domains(1, NULL, NULL);
7190 break;
7125 default: 7191 default:
7126 return NOTIFY_DONE; 7192 return NOTIFY_DONE;
7127 } 7193 }
7194 return NOTIFY_OK;
7128} 7195}
7129 7196
7130void __init sched_init_smp(void) 7197void __init sched_init_smp(void)
@@ -7589,6 +7656,7 @@ void sched_destroy_group(struct task_group *tg)
7589 */ 7656 */
7590void sched_move_task(struct task_struct *tsk) 7657void sched_move_task(struct task_struct *tsk)
7591{ 7658{
7659 struct task_group *tg;
7592 int on_rq, running; 7660 int on_rq, running;
7593 unsigned long flags; 7661 unsigned long flags;
7594 struct rq *rq; 7662 struct rq *rq;
@@ -7603,6 +7671,12 @@ void sched_move_task(struct task_struct *tsk)
7603 if (unlikely(running)) 7671 if (unlikely(running))
7604 tsk->sched_class->put_prev_task(rq, tsk); 7672 tsk->sched_class->put_prev_task(rq, tsk);
7605 7673
7674 tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
7675 lockdep_is_held(&tsk->sighand->siglock)),
7676 struct task_group, css);
7677 tg = autogroup_task_group(tsk, tg);
7678 tsk->sched_task_group = tg;
7679
7606#ifdef CONFIG_FAIR_GROUP_SCHED 7680#ifdef CONFIG_FAIR_GROUP_SCHED
7607 if (tsk->sched_class->task_move_group) 7681 if (tsk->sched_class->task_move_group)
7608 tsk->sched_class->task_move_group(tsk, on_rq); 7682 tsk->sched_class->task_move_group(tsk, on_rq);