diff options
Diffstat (limited to 'kernel/sched/core.c')
| -rw-r--r-- | kernel/sched/core.c | 121 |
1 files changed, 71 insertions, 50 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f0f831e8a345..f9123a82cbb6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -306,6 +306,9 @@ __read_mostly int scheduler_running; | |||
| 306 | */ | 306 | */ |
| 307 | int sysctl_sched_rt_runtime = 950000; | 307 | int sysctl_sched_rt_runtime = 950000; |
| 308 | 308 | ||
| 309 | /* cpus with isolated domains */ | ||
| 310 | cpumask_var_t cpu_isolated_map; | ||
| 311 | |||
| 309 | /* | 312 | /* |
| 310 | * this_rq_lock - lock this runqueue and disable interrupts. | 313 | * this_rq_lock - lock this runqueue and disable interrupts. |
| 311 | */ | 314 | */ |
| @@ -690,6 +693,23 @@ static inline bool got_nohz_idle_kick(void) | |||
| 690 | bool sched_can_stop_tick(void) | 693 | bool sched_can_stop_tick(void) |
| 691 | { | 694 | { |
| 692 | /* | 695 | /* |
| 696 | * FIFO realtime policy runs the highest priority task. Other runnable | ||
| 697 | * tasks are of a lower priority. The scheduler tick does nothing. | ||
| 698 | */ | ||
| 699 | if (current->policy == SCHED_FIFO) | ||
| 700 | return true; | ||
| 701 | |||
| 702 | /* | ||
| 703 | * Round-robin realtime tasks time slice with other tasks at the same | ||
| 704 | * realtime priority. Is this task the only one at this priority? | ||
| 705 | */ | ||
| 706 | if (current->policy == SCHED_RR) { | ||
| 707 | struct sched_rt_entity *rt_se = ¤t->rt; | ||
| 708 | |||
| 709 | return rt_se->run_list.prev == rt_se->run_list.next; | ||
| 710 | } | ||
| 711 | |||
| 712 | /* | ||
| 693 | * More than one running task need preemption. | 713 | * More than one running task need preemption. |
| 694 | * nr_running update is assumed to be visible | 714 | * nr_running update is assumed to be visible |
| 695 | * after IPI is sent from wakers. | 715 | * after IPI is sent from wakers. |
| @@ -996,6 +1016,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
| 996 | rq_clock_skip_update(rq, true); | 1016 | rq_clock_skip_update(rq, true); |
| 997 | } | 1017 | } |
| 998 | 1018 | ||
| 1019 | static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); | ||
| 1020 | |||
| 1021 | void register_task_migration_notifier(struct notifier_block *n) | ||
| 1022 | { | ||
| 1023 | atomic_notifier_chain_register(&task_migration_notifier, n); | ||
| 1024 | } | ||
| 1025 | |||
| 999 | #ifdef CONFIG_SMP | 1026 | #ifdef CONFIG_SMP |
| 1000 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 1027 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
| 1001 | { | 1028 | { |
| @@ -1026,10 +1053,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 1026 | trace_sched_migrate_task(p, new_cpu); | 1053 | trace_sched_migrate_task(p, new_cpu); |
| 1027 | 1054 | ||
| 1028 | if (task_cpu(p) != new_cpu) { | 1055 | if (task_cpu(p) != new_cpu) { |
| 1056 | struct task_migration_notifier tmn; | ||
| 1057 | |||
| 1029 | if (p->sched_class->migrate_task_rq) | 1058 | if (p->sched_class->migrate_task_rq) |
| 1030 | p->sched_class->migrate_task_rq(p, new_cpu); | 1059 | p->sched_class->migrate_task_rq(p, new_cpu); |
| 1031 | p->se.nr_migrations++; | 1060 | p->se.nr_migrations++; |
| 1032 | perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); | 1061 | perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); |
| 1062 | |||
| 1063 | tmn.task = p; | ||
| 1064 | tmn.from_cpu = task_cpu(p); | ||
| 1065 | tmn.to_cpu = new_cpu; | ||
| 1066 | |||
| 1067 | atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); | ||
| 1033 | } | 1068 | } |
| 1034 | 1069 | ||
| 1035 | __set_task_cpu(p, new_cpu); | 1070 | __set_task_cpu(p, new_cpu); |
| @@ -2818,7 +2853,7 @@ asmlinkage __visible void __sched schedule_user(void) | |||
| 2818 | * we find a better solution. | 2853 | * we find a better solution. |
| 2819 | * | 2854 | * |
| 2820 | * NB: There are buggy callers of this function. Ideally we | 2855 | * NB: There are buggy callers of this function. Ideally we |
| 2821 | * should warn if prev_state != IN_USER, but that will trigger | 2856 | * should warn if prev_state != CONTEXT_USER, but that will trigger |
| 2822 | * too frequently to make sense yet. | 2857 | * too frequently to make sense yet. |
| 2823 | */ | 2858 | */ |
| 2824 | enum ctx_state prev_state = exception_enter(); | 2859 | enum ctx_state prev_state = exception_enter(); |
| @@ -3034,6 +3069,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 3034 | } else { | 3069 | } else { |
| 3035 | if (dl_prio(oldprio)) | 3070 | if (dl_prio(oldprio)) |
| 3036 | p->dl.dl_boosted = 0; | 3071 | p->dl.dl_boosted = 0; |
| 3072 | if (rt_prio(oldprio)) | ||
| 3073 | p->rt.timeout = 0; | ||
| 3037 | p->sched_class = &fair_sched_class; | 3074 | p->sched_class = &fair_sched_class; |
| 3038 | } | 3075 | } |
| 3039 | 3076 | ||
| @@ -5318,36 +5355,13 @@ static int sched_cpu_active(struct notifier_block *nfb, | |||
| 5318 | static int sched_cpu_inactive(struct notifier_block *nfb, | 5355 | static int sched_cpu_inactive(struct notifier_block *nfb, |
| 5319 | unsigned long action, void *hcpu) | 5356 | unsigned long action, void *hcpu) |
| 5320 | { | 5357 | { |
| 5321 | unsigned long flags; | ||
| 5322 | long cpu = (long)hcpu; | ||
| 5323 | struct dl_bw *dl_b; | ||
| 5324 | |||
| 5325 | switch (action & ~CPU_TASKS_FROZEN) { | 5358 | switch (action & ~CPU_TASKS_FROZEN) { |
| 5326 | case CPU_DOWN_PREPARE: | 5359 | case CPU_DOWN_PREPARE: |
| 5327 | set_cpu_active(cpu, false); | 5360 | set_cpu_active((long)hcpu, false); |
| 5328 | |||
| 5329 | /* explicitly allow suspend */ | ||
| 5330 | if (!(action & CPU_TASKS_FROZEN)) { | ||
| 5331 | bool overflow; | ||
| 5332 | int cpus; | ||
| 5333 | |||
| 5334 | rcu_read_lock_sched(); | ||
| 5335 | dl_b = dl_bw_of(cpu); | ||
| 5336 | |||
| 5337 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
| 5338 | cpus = dl_bw_cpus(cpu); | ||
| 5339 | overflow = __dl_overflow(dl_b, cpus, 0, 0); | ||
| 5340 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
| 5341 | |||
| 5342 | rcu_read_unlock_sched(); | ||
| 5343 | |||
| 5344 | if (overflow) | ||
| 5345 | return notifier_from_errno(-EBUSY); | ||
| 5346 | } | ||
| 5347 | return NOTIFY_OK; | 5361 | return NOTIFY_OK; |
| 5362 | default: | ||
| 5363 | return NOTIFY_DONE; | ||
| 5348 | } | 5364 | } |
| 5349 | |||
| 5350 | return NOTIFY_DONE; | ||
| 5351 | } | 5365 | } |
| 5352 | 5366 | ||
| 5353 | static int __init migration_init(void) | 5367 | static int __init migration_init(void) |
| @@ -5428,17 +5442,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 5428 | break; | 5442 | break; |
| 5429 | } | 5443 | } |
| 5430 | 5444 | ||
| 5431 | /* | ||
| 5432 | * Even though we initialize ->capacity to something semi-sane, | ||
| 5433 | * we leave capacity_orig unset. This allows us to detect if | ||
| 5434 | * domain iteration is still funny without causing /0 traps. | ||
| 5435 | */ | ||
| 5436 | if (!group->sgc->capacity_orig) { | ||
| 5437 | printk(KERN_CONT "\n"); | ||
| 5438 | printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n"); | ||
| 5439 | break; | ||
| 5440 | } | ||
| 5441 | |||
| 5442 | if (!cpumask_weight(sched_group_cpus(group))) { | 5445 | if (!cpumask_weight(sched_group_cpus(group))) { |
| 5443 | printk(KERN_CONT "\n"); | 5446 | printk(KERN_CONT "\n"); |
| 5444 | printk(KERN_ERR "ERROR: empty group\n"); | 5447 | printk(KERN_ERR "ERROR: empty group\n"); |
| @@ -5811,9 +5814,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) | |||
| 5811 | update_top_cache_domain(cpu); | 5814 | update_top_cache_domain(cpu); |
| 5812 | } | 5815 | } |
| 5813 | 5816 | ||
| 5814 | /* cpus with isolated domains */ | ||
| 5815 | static cpumask_var_t cpu_isolated_map; | ||
| 5816 | |||
| 5817 | /* Setup the mask of cpus configured for isolated domains */ | 5817 | /* Setup the mask of cpus configured for isolated domains */ |
| 5818 | static int __init isolated_cpu_setup(char *str) | 5818 | static int __init isolated_cpu_setup(char *str) |
| 5819 | { | 5819 | { |
| @@ -5922,7 +5922,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
| 5922 | * die on a /0 trap. | 5922 | * die on a /0 trap. |
| 5923 | */ | 5923 | */ |
| 5924 | sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); | 5924 | sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); |
| 5925 | sg->sgc->capacity_orig = sg->sgc->capacity; | ||
| 5926 | 5925 | ||
| 5927 | /* | 5926 | /* |
| 5928 | * Make sure the first group of this domain contains the | 5927 | * Make sure the first group of this domain contains the |
| @@ -6233,6 +6232,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu) | |||
| 6233 | */ | 6232 | */ |
| 6234 | 6233 | ||
| 6235 | if (sd->flags & SD_SHARE_CPUCAPACITY) { | 6234 | if (sd->flags & SD_SHARE_CPUCAPACITY) { |
| 6235 | sd->flags |= SD_PREFER_SIBLING; | ||
| 6236 | sd->imbalance_pct = 110; | 6236 | sd->imbalance_pct = 110; |
| 6237 | sd->smt_gain = 1178; /* ~15% */ | 6237 | sd->smt_gain = 1178; /* ~15% */ |
| 6238 | 6238 | ||
| @@ -6998,7 +6998,6 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, | |||
| 6998 | */ | 6998 | */ |
| 6999 | 6999 | ||
| 7000 | case CPU_ONLINE: | 7000 | case CPU_ONLINE: |
| 7001 | case CPU_DOWN_FAILED: | ||
| 7002 | cpuset_update_active_cpus(true); | 7001 | cpuset_update_active_cpus(true); |
| 7003 | break; | 7002 | break; |
| 7004 | default: | 7003 | default: |
| @@ -7010,8 +7009,30 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, | |||
| 7010 | static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, | 7009 | static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, |
| 7011 | void *hcpu) | 7010 | void *hcpu) |
| 7012 | { | 7011 | { |
| 7013 | switch (action) { | 7012 | unsigned long flags; |
| 7013 | long cpu = (long)hcpu; | ||
| 7014 | struct dl_bw *dl_b; | ||
| 7015 | |||
| 7016 | switch (action & ~CPU_TASKS_FROZEN) { | ||
| 7014 | case CPU_DOWN_PREPARE: | 7017 | case CPU_DOWN_PREPARE: |
| 7018 | /* explicitly allow suspend */ | ||
| 7019 | if (!(action & CPU_TASKS_FROZEN)) { | ||
| 7020 | bool overflow; | ||
| 7021 | int cpus; | ||
| 7022 | |||
| 7023 | rcu_read_lock_sched(); | ||
| 7024 | dl_b = dl_bw_of(cpu); | ||
| 7025 | |||
| 7026 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
| 7027 | cpus = dl_bw_cpus(cpu); | ||
| 7028 | overflow = __dl_overflow(dl_b, cpus, 0, 0); | ||
| 7029 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
| 7030 | |||
| 7031 | rcu_read_unlock_sched(); | ||
| 7032 | |||
| 7033 | if (overflow) | ||
| 7034 | return notifier_from_errno(-EBUSY); | ||
| 7035 | } | ||
| 7015 | cpuset_update_active_cpus(false); | 7036 | cpuset_update_active_cpus(false); |
| 7016 | break; | 7037 | break; |
| 7017 | case CPU_DOWN_PREPARE_FROZEN: | 7038 | case CPU_DOWN_PREPARE_FROZEN: |
| @@ -7156,8 +7177,8 @@ void __init sched_init(void) | |||
| 7156 | rq->calc_load_active = 0; | 7177 | rq->calc_load_active = 0; |
| 7157 | rq->calc_load_update = jiffies + LOAD_FREQ; | 7178 | rq->calc_load_update = jiffies + LOAD_FREQ; |
| 7158 | init_cfs_rq(&rq->cfs); | 7179 | init_cfs_rq(&rq->cfs); |
| 7159 | init_rt_rq(&rq->rt, rq); | 7180 | init_rt_rq(&rq->rt); |
| 7160 | init_dl_rq(&rq->dl, rq); | 7181 | init_dl_rq(&rq->dl); |
| 7161 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7182 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7162 | root_task_group.shares = ROOT_TASK_GROUP_LOAD; | 7183 | root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
| 7163 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); | 7184 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
| @@ -7197,7 +7218,7 @@ void __init sched_init(void) | |||
| 7197 | #ifdef CONFIG_SMP | 7218 | #ifdef CONFIG_SMP |
| 7198 | rq->sd = NULL; | 7219 | rq->sd = NULL; |
| 7199 | rq->rd = NULL; | 7220 | rq->rd = NULL; |
| 7200 | rq->cpu_capacity = SCHED_CAPACITY_SCALE; | 7221 | rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE; |
| 7201 | rq->post_schedule = 0; | 7222 | rq->post_schedule = 0; |
| 7202 | rq->active_balance = 0; | 7223 | rq->active_balance = 0; |
| 7203 | rq->next_balance = jiffies; | 7224 | rq->next_balance = jiffies; |
| @@ -7796,7 +7817,7 @@ static int sched_rt_global_constraints(void) | |||
| 7796 | } | 7817 | } |
| 7797 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7818 | #endif /* CONFIG_RT_GROUP_SCHED */ |
| 7798 | 7819 | ||
| 7799 | static int sched_dl_global_constraints(void) | 7820 | static int sched_dl_global_validate(void) |
| 7800 | { | 7821 | { |
| 7801 | u64 runtime = global_rt_runtime(); | 7822 | u64 runtime = global_rt_runtime(); |
| 7802 | u64 period = global_rt_period(); | 7823 | u64 period = global_rt_period(); |
| @@ -7897,11 +7918,11 @@ int sched_rt_handler(struct ctl_table *table, int write, | |||
| 7897 | if (ret) | 7918 | if (ret) |
| 7898 | goto undo; | 7919 | goto undo; |
| 7899 | 7920 | ||
| 7900 | ret = sched_rt_global_constraints(); | 7921 | ret = sched_dl_global_validate(); |
| 7901 | if (ret) | 7922 | if (ret) |
| 7902 | goto undo; | 7923 | goto undo; |
| 7903 | 7924 | ||
| 7904 | ret = sched_dl_global_constraints(); | 7925 | ret = sched_rt_global_constraints(); |
| 7905 | if (ret) | 7926 | if (ret) |
| 7906 | goto undo; | 7927 | goto undo; |
| 7907 | 7928 | ||
