diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 119 |
1 files changed, 69 insertions, 50 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 62671f53202a..f9123a82cbb6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -306,6 +306,9 @@ __read_mostly int scheduler_running; | |||
306 | */ | 306 | */ |
307 | int sysctl_sched_rt_runtime = 950000; | 307 | int sysctl_sched_rt_runtime = 950000; |
308 | 308 | ||
309 | /* cpus with isolated domains */ | ||
310 | cpumask_var_t cpu_isolated_map; | ||
311 | |||
309 | /* | 312 | /* |
310 | * this_rq_lock - lock this runqueue and disable interrupts. | 313 | * this_rq_lock - lock this runqueue and disable interrupts. |
311 | */ | 314 | */ |
@@ -690,6 +693,23 @@ static inline bool got_nohz_idle_kick(void) | |||
690 | bool sched_can_stop_tick(void) | 693 | bool sched_can_stop_tick(void) |
691 | { | 694 | { |
692 | /* | 695 | /* |
696 | * FIFO realtime policy runs the highest priority task. Other runnable | ||
697 | * tasks are of a lower priority. The scheduler tick does nothing. | ||
698 | */ | ||
699 | if (current->policy == SCHED_FIFO) | ||
700 | return true; | ||
701 | |||
702 | /* | ||
703 | * Round-robin realtime tasks time slice with other tasks at the same | ||
704 | * realtime priority. Is this task the only one at this priority? | ||
705 | */ | ||
706 | if (current->policy == SCHED_RR) { | ||
707 | struct sched_rt_entity *rt_se = ¤t->rt; | ||
708 | |||
709 | return rt_se->run_list.prev == rt_se->run_list.next; | ||
710 | } | ||
711 | |||
712 | /* | ||
693 | * More than one running task need preemption. | 713 | * More than one running task need preemption. |
694 | * nr_running update is assumed to be visible | 714 | * nr_running update is assumed to be visible |
695 | * after IPI is sent from wakers. | 715 | * after IPI is sent from wakers. |
@@ -996,6 +1016,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
996 | rq_clock_skip_update(rq, true); | 1016 | rq_clock_skip_update(rq, true); |
997 | } | 1017 | } |
998 | 1018 | ||
1019 | static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); | ||
1020 | |||
1021 | void register_task_migration_notifier(struct notifier_block *n) | ||
1022 | { | ||
1023 | atomic_notifier_chain_register(&task_migration_notifier, n); | ||
1024 | } | ||
1025 | |||
999 | #ifdef CONFIG_SMP | 1026 | #ifdef CONFIG_SMP |
1000 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 1027 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
1001 | { | 1028 | { |
@@ -1026,10 +1053,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1026 | trace_sched_migrate_task(p, new_cpu); | 1053 | trace_sched_migrate_task(p, new_cpu); |
1027 | 1054 | ||
1028 | if (task_cpu(p) != new_cpu) { | 1055 | if (task_cpu(p) != new_cpu) { |
1056 | struct task_migration_notifier tmn; | ||
1057 | |||
1029 | if (p->sched_class->migrate_task_rq) | 1058 | if (p->sched_class->migrate_task_rq) |
1030 | p->sched_class->migrate_task_rq(p, new_cpu); | 1059 | p->sched_class->migrate_task_rq(p, new_cpu); |
1031 | p->se.nr_migrations++; | 1060 | p->se.nr_migrations++; |
1032 | perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); | 1061 | perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); |
1062 | |||
1063 | tmn.task = p; | ||
1064 | tmn.from_cpu = task_cpu(p); | ||
1065 | tmn.to_cpu = new_cpu; | ||
1066 | |||
1067 | atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); | ||
1033 | } | 1068 | } |
1034 | 1069 | ||
1035 | __set_task_cpu(p, new_cpu); | 1070 | __set_task_cpu(p, new_cpu); |
@@ -2818,7 +2853,7 @@ asmlinkage __visible void __sched schedule_user(void) | |||
2818 | * we find a better solution. | 2853 | * we find a better solution. |
2819 | * | 2854 | * |
2820 | * NB: There are buggy callers of this function. Ideally we | 2855 | * NB: There are buggy callers of this function. Ideally we |
2821 | * should warn if prev_state != IN_USER, but that will trigger | 2856 | * should warn if prev_state != CONTEXT_USER, but that will trigger |
2822 | * too frequently to make sense yet. | 2857 | * too frequently to make sense yet. |
2823 | */ | 2858 | */ |
2824 | enum ctx_state prev_state = exception_enter(); | 2859 | enum ctx_state prev_state = exception_enter(); |
@@ -5320,36 +5355,13 @@ static int sched_cpu_active(struct notifier_block *nfb, | |||
5320 | static int sched_cpu_inactive(struct notifier_block *nfb, | 5355 | static int sched_cpu_inactive(struct notifier_block *nfb, |
5321 | unsigned long action, void *hcpu) | 5356 | unsigned long action, void *hcpu) |
5322 | { | 5357 | { |
5323 | unsigned long flags; | ||
5324 | long cpu = (long)hcpu; | ||
5325 | struct dl_bw *dl_b; | ||
5326 | |||
5327 | switch (action & ~CPU_TASKS_FROZEN) { | 5358 | switch (action & ~CPU_TASKS_FROZEN) { |
5328 | case CPU_DOWN_PREPARE: | 5359 | case CPU_DOWN_PREPARE: |
5329 | set_cpu_active(cpu, false); | 5360 | set_cpu_active((long)hcpu, false); |
5330 | |||
5331 | /* explicitly allow suspend */ | ||
5332 | if (!(action & CPU_TASKS_FROZEN)) { | ||
5333 | bool overflow; | ||
5334 | int cpus; | ||
5335 | |||
5336 | rcu_read_lock_sched(); | ||
5337 | dl_b = dl_bw_of(cpu); | ||
5338 | |||
5339 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
5340 | cpus = dl_bw_cpus(cpu); | ||
5341 | overflow = __dl_overflow(dl_b, cpus, 0, 0); | ||
5342 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
5343 | |||
5344 | rcu_read_unlock_sched(); | ||
5345 | |||
5346 | if (overflow) | ||
5347 | return notifier_from_errno(-EBUSY); | ||
5348 | } | ||
5349 | return NOTIFY_OK; | 5361 | return NOTIFY_OK; |
5362 | default: | ||
5363 | return NOTIFY_DONE; | ||
5350 | } | 5364 | } |
5351 | |||
5352 | return NOTIFY_DONE; | ||
5353 | } | 5365 | } |
5354 | 5366 | ||
5355 | static int __init migration_init(void) | 5367 | static int __init migration_init(void) |
@@ -5430,17 +5442,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
5430 | break; | 5442 | break; |
5431 | } | 5443 | } |
5432 | 5444 | ||
5433 | /* | ||
5434 | * Even though we initialize ->capacity to something semi-sane, | ||
5435 | * we leave capacity_orig unset. This allows us to detect if | ||
5436 | * domain iteration is still funny without causing /0 traps. | ||
5437 | */ | ||
5438 | if (!group->sgc->capacity_orig) { | ||
5439 | printk(KERN_CONT "\n"); | ||
5440 | printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n"); | ||
5441 | break; | ||
5442 | } | ||
5443 | |||
5444 | if (!cpumask_weight(sched_group_cpus(group))) { | 5445 | if (!cpumask_weight(sched_group_cpus(group))) { |
5445 | printk(KERN_CONT "\n"); | 5446 | printk(KERN_CONT "\n"); |
5446 | printk(KERN_ERR "ERROR: empty group\n"); | 5447 | printk(KERN_ERR "ERROR: empty group\n"); |
@@ -5813,9 +5814,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) | |||
5813 | update_top_cache_domain(cpu); | 5814 | update_top_cache_domain(cpu); |
5814 | } | 5815 | } |
5815 | 5816 | ||
5816 | /* cpus with isolated domains */ | ||
5817 | static cpumask_var_t cpu_isolated_map; | ||
5818 | |||
5819 | /* Setup the mask of cpus configured for isolated domains */ | 5817 | /* Setup the mask of cpus configured for isolated domains */ |
5820 | static int __init isolated_cpu_setup(char *str) | 5818 | static int __init isolated_cpu_setup(char *str) |
5821 | { | 5819 | { |
@@ -5924,7 +5922,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
5924 | * die on a /0 trap. | 5922 | * die on a /0 trap. |
5925 | */ | 5923 | */ |
5926 | sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); | 5924 | sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); |
5927 | sg->sgc->capacity_orig = sg->sgc->capacity; | ||
5928 | 5925 | ||
5929 | /* | 5926 | /* |
5930 | * Make sure the first group of this domain contains the | 5927 | * Make sure the first group of this domain contains the |
@@ -6235,6 +6232,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu) | |||
6235 | */ | 6232 | */ |
6236 | 6233 | ||
6237 | if (sd->flags & SD_SHARE_CPUCAPACITY) { | 6234 | if (sd->flags & SD_SHARE_CPUCAPACITY) { |
6235 | sd->flags |= SD_PREFER_SIBLING; | ||
6238 | sd->imbalance_pct = 110; | 6236 | sd->imbalance_pct = 110; |
6239 | sd->smt_gain = 1178; /* ~15% */ | 6237 | sd->smt_gain = 1178; /* ~15% */ |
6240 | 6238 | ||
@@ -7000,7 +6998,6 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, | |||
7000 | */ | 6998 | */ |
7001 | 6999 | ||
7002 | case CPU_ONLINE: | 7000 | case CPU_ONLINE: |
7003 | case CPU_DOWN_FAILED: | ||
7004 | cpuset_update_active_cpus(true); | 7001 | cpuset_update_active_cpus(true); |
7005 | break; | 7002 | break; |
7006 | default: | 7003 | default: |
@@ -7012,8 +7009,30 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, | |||
7012 | static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, | 7009 | static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, |
7013 | void *hcpu) | 7010 | void *hcpu) |
7014 | { | 7011 | { |
7015 | switch (action) { | 7012 | unsigned long flags; |
7013 | long cpu = (long)hcpu; | ||
7014 | struct dl_bw *dl_b; | ||
7015 | |||
7016 | switch (action & ~CPU_TASKS_FROZEN) { | ||
7016 | case CPU_DOWN_PREPARE: | 7017 | case CPU_DOWN_PREPARE: |
7018 | /* explicitly allow suspend */ | ||
7019 | if (!(action & CPU_TASKS_FROZEN)) { | ||
7020 | bool overflow; | ||
7021 | int cpus; | ||
7022 | |||
7023 | rcu_read_lock_sched(); | ||
7024 | dl_b = dl_bw_of(cpu); | ||
7025 | |||
7026 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
7027 | cpus = dl_bw_cpus(cpu); | ||
7028 | overflow = __dl_overflow(dl_b, cpus, 0, 0); | ||
7029 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
7030 | |||
7031 | rcu_read_unlock_sched(); | ||
7032 | |||
7033 | if (overflow) | ||
7034 | return notifier_from_errno(-EBUSY); | ||
7035 | } | ||
7017 | cpuset_update_active_cpus(false); | 7036 | cpuset_update_active_cpus(false); |
7018 | break; | 7037 | break; |
7019 | case CPU_DOWN_PREPARE_FROZEN: | 7038 | case CPU_DOWN_PREPARE_FROZEN: |
@@ -7158,8 +7177,8 @@ void __init sched_init(void) | |||
7158 | rq->calc_load_active = 0; | 7177 | rq->calc_load_active = 0; |
7159 | rq->calc_load_update = jiffies + LOAD_FREQ; | 7178 | rq->calc_load_update = jiffies + LOAD_FREQ; |
7160 | init_cfs_rq(&rq->cfs); | 7179 | init_cfs_rq(&rq->cfs); |
7161 | init_rt_rq(&rq->rt, rq); | 7180 | init_rt_rq(&rq->rt); |
7162 | init_dl_rq(&rq->dl, rq); | 7181 | init_dl_rq(&rq->dl); |
7163 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7182 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7164 | root_task_group.shares = ROOT_TASK_GROUP_LOAD; | 7183 | root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
7165 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); | 7184 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
@@ -7199,7 +7218,7 @@ void __init sched_init(void) | |||
7199 | #ifdef CONFIG_SMP | 7218 | #ifdef CONFIG_SMP |
7200 | rq->sd = NULL; | 7219 | rq->sd = NULL; |
7201 | rq->rd = NULL; | 7220 | rq->rd = NULL; |
7202 | rq->cpu_capacity = SCHED_CAPACITY_SCALE; | 7221 | rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE; |
7203 | rq->post_schedule = 0; | 7222 | rq->post_schedule = 0; |
7204 | rq->active_balance = 0; | 7223 | rq->active_balance = 0; |
7205 | rq->next_balance = jiffies; | 7224 | rq->next_balance = jiffies; |
@@ -7798,7 +7817,7 @@ static int sched_rt_global_constraints(void) | |||
7798 | } | 7817 | } |
7799 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7818 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7800 | 7819 | ||
7801 | static int sched_dl_global_constraints(void) | 7820 | static int sched_dl_global_validate(void) |
7802 | { | 7821 | { |
7803 | u64 runtime = global_rt_runtime(); | 7822 | u64 runtime = global_rt_runtime(); |
7804 | u64 period = global_rt_period(); | 7823 | u64 period = global_rt_period(); |
@@ -7899,11 +7918,11 @@ int sched_rt_handler(struct ctl_table *table, int write, | |||
7899 | if (ret) | 7918 | if (ret) |
7900 | goto undo; | 7919 | goto undo; |
7901 | 7920 | ||
7902 | ret = sched_rt_global_constraints(); | 7921 | ret = sched_dl_global_validate(); |
7903 | if (ret) | 7922 | if (ret) |
7904 | goto undo; | 7923 | goto undo; |
7905 | 7924 | ||
7906 | ret = sched_dl_global_constraints(); | 7925 | ret = sched_rt_global_constraints(); |
7907 | if (ret) | 7926 | if (ret) |
7908 | goto undo; | 7927 | goto undo; |
7909 | 7928 | ||