diff options
| -rw-r--r-- | kernel/sched/core.c | 13 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 72 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 2 |
3 files changed, 66 insertions, 21 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 017d5394f5dc..51d7105f529a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -1536,7 +1536,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
| 1536 | for (;;) { | 1536 | for (;;) { |
| 1537 | /* Any allowed, online CPU? */ | 1537 | /* Any allowed, online CPU? */ |
| 1538 | for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { | 1538 | for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { |
| 1539 | if (!cpu_active(dest_cpu)) | 1539 | if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) |
| 1540 | continue; | ||
| 1541 | if (!cpu_online(dest_cpu)) | ||
| 1540 | continue; | 1542 | continue; |
| 1541 | goto out; | 1543 | goto out; |
| 1542 | } | 1544 | } |
| @@ -2535,10 +2537,9 @@ void wake_up_new_task(struct task_struct *p) | |||
| 2535 | */ | 2537 | */ |
| 2536 | set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); | 2538 | set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); |
| 2537 | #endif | 2539 | #endif |
| 2538 | /* Post initialize new task's util average when its cfs_rq is set */ | 2540 | rq = __task_rq_lock(p, &rf); |
| 2539 | post_init_entity_util_avg(&p->se); | 2541 | post_init_entity_util_avg(&p->se); |
| 2540 | 2542 | ||
| 2541 | rq = __task_rq_lock(p, &rf); | ||
| 2542 | activate_task(rq, p, 0); | 2543 | activate_task(rq, p, 0); |
| 2543 | p->on_rq = TASK_ON_RQ_QUEUED; | 2544 | p->on_rq = TASK_ON_RQ_QUEUED; |
| 2544 | trace_sched_wakeup_new(p); | 2545 | trace_sched_wakeup_new(p); |
| @@ -5148,14 +5149,16 @@ void show_state_filter(unsigned long state_filter) | |||
| 5148 | /* | 5149 | /* |
| 5149 | * reset the NMI-timeout, listing all files on a slow | 5150 | * reset the NMI-timeout, listing all files on a slow |
| 5150 | * console might take a lot of time: | 5151 | * console might take a lot of time: |
| 5152 | * Also, reset softlockup watchdogs on all CPUs, because | ||
| 5153 | * another CPU might be blocked waiting for us to process | ||
| 5154 | * an IPI. | ||
| 5151 | */ | 5155 | */ |
| 5152 | touch_nmi_watchdog(); | 5156 | touch_nmi_watchdog(); |
| 5157 | touch_all_softlockup_watchdogs(); | ||
| 5153 | if (!state_filter || (p->state & state_filter)) | 5158 | if (!state_filter || (p->state & state_filter)) |
| 5154 | sched_show_task(p); | 5159 | sched_show_task(p); |
| 5155 | } | 5160 | } |
| 5156 | 5161 | ||
| 5157 | touch_all_softlockup_watchdogs(); | ||
| 5158 | |||
| 5159 | #ifdef CONFIG_SCHED_DEBUG | 5162 | #ifdef CONFIG_SCHED_DEBUG |
| 5160 | if (!state_filter) | 5163 | if (!state_filter) |
| 5161 | sysrq_sched_debug_show(); | 5164 | sysrq_sched_debug_show(); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 218f8e83db73..bdcbeea90c95 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -2904,6 +2904,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) | |||
| 2904 | } | 2904 | } |
| 2905 | } | 2905 | } |
| 2906 | 2906 | ||
| 2907 | /* | ||
| 2908 | * Unsigned subtract and clamp on underflow. | ||
| 2909 | * | ||
| 2910 | * Explicitly do a load-store to ensure the intermediate value never hits | ||
| 2911 | * memory. This allows lockless observations without ever seeing the negative | ||
| 2912 | * values. | ||
| 2913 | */ | ||
| 2914 | #define sub_positive(_ptr, _val) do { \ | ||
| 2915 | typeof(_ptr) ptr = (_ptr); \ | ||
| 2916 | typeof(*ptr) val = (_val); \ | ||
| 2917 | typeof(*ptr) res, var = READ_ONCE(*ptr); \ | ||
| 2918 | res = var - val; \ | ||
| 2919 | if (res > var) \ | ||
| 2920 | res = 0; \ | ||
| 2921 | WRITE_ONCE(*ptr, res); \ | ||
| 2922 | } while (0) | ||
| 2923 | |||
| 2907 | /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */ | 2924 | /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */ |
| 2908 | static inline int | 2925 | static inline int |
| 2909 | update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) | 2926 | update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) |
| @@ -2913,15 +2930,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) | |||
| 2913 | 2930 | ||
| 2914 | if (atomic_long_read(&cfs_rq->removed_load_avg)) { | 2931 | if (atomic_long_read(&cfs_rq->removed_load_avg)) { |
| 2915 | s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); | 2932 | s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); |
| 2916 | sa->load_avg = max_t(long, sa->load_avg - r, 0); | 2933 | sub_positive(&sa->load_avg, r); |
| 2917 | sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0); | 2934 | sub_positive(&sa->load_sum, r * LOAD_AVG_MAX); |
| 2918 | removed_load = 1; | 2935 | removed_load = 1; |
| 2919 | } | 2936 | } |
| 2920 | 2937 | ||
| 2921 | if (atomic_long_read(&cfs_rq->removed_util_avg)) { | 2938 | if (atomic_long_read(&cfs_rq->removed_util_avg)) { |
| 2922 | long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0); | 2939 | long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0); |
| 2923 | sa->util_avg = max_t(long, sa->util_avg - r, 0); | 2940 | sub_positive(&sa->util_avg, r); |
| 2924 | sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0); | 2941 | sub_positive(&sa->util_sum, r * LOAD_AVG_MAX); |
| 2925 | removed_util = 1; | 2942 | removed_util = 1; |
| 2926 | } | 2943 | } |
| 2927 | 2944 | ||
| @@ -2994,10 +3011,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s | |||
| 2994 | &se->avg, se->on_rq * scale_load_down(se->load.weight), | 3011 | &se->avg, se->on_rq * scale_load_down(se->load.weight), |
| 2995 | cfs_rq->curr == se, NULL); | 3012 | cfs_rq->curr == se, NULL); |
| 2996 | 3013 | ||
| 2997 | cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0); | 3014 | sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg); |
| 2998 | cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0); | 3015 | sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum); |
| 2999 | cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0); | 3016 | sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); |
| 3000 | cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0); | 3017 | sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); |
| 3001 | 3018 | ||
| 3002 | cfs_rq_util_change(cfs_rq); | 3019 | cfs_rq_util_change(cfs_rq); |
| 3003 | } | 3020 | } |
| @@ -3246,7 +3263,7 @@ static inline void check_schedstat_required(void) | |||
| 3246 | trace_sched_stat_iowait_enabled() || | 3263 | trace_sched_stat_iowait_enabled() || |
| 3247 | trace_sched_stat_blocked_enabled() || | 3264 | trace_sched_stat_blocked_enabled() || |
| 3248 | trace_sched_stat_runtime_enabled()) { | 3265 | trace_sched_stat_runtime_enabled()) { |
| 3249 | pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, " | 3266 | printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, " |
| 3250 | "stat_blocked and stat_runtime require the " | 3267 | "stat_blocked and stat_runtime require the " |
| 3251 | "kernel parameter schedstats=enabled or " | 3268 | "kernel parameter schedstats=enabled or " |
| 3252 | "kernel.sched_schedstats=1\n"); | 3269 | "kernel.sched_schedstats=1\n"); |
| @@ -4185,6 +4202,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) | |||
| 4185 | if (!cfs_bandwidth_used()) | 4202 | if (!cfs_bandwidth_used()) |
| 4186 | return; | 4203 | return; |
| 4187 | 4204 | ||
| 4205 | /* Synchronize hierarchical throttle counter: */ | ||
| 4206 | if (unlikely(!cfs_rq->throttle_uptodate)) { | ||
| 4207 | struct rq *rq = rq_of(cfs_rq); | ||
| 4208 | struct cfs_rq *pcfs_rq; | ||
| 4209 | struct task_group *tg; | ||
| 4210 | |||
| 4211 | cfs_rq->throttle_uptodate = 1; | ||
| 4212 | |||
| 4213 | /* Get closest up-to-date node, because leaves go first: */ | ||
| 4214 | for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) { | ||
| 4215 | pcfs_rq = tg->cfs_rq[cpu_of(rq)]; | ||
| 4216 | if (pcfs_rq->throttle_uptodate) | ||
| 4217 | break; | ||
| 4218 | } | ||
| 4219 | if (tg) { | ||
| 4220 | cfs_rq->throttle_count = pcfs_rq->throttle_count; | ||
| 4221 | cfs_rq->throttled_clock_task = rq_clock_task(rq); | ||
| 4222 | } | ||
| 4223 | } | ||
| 4224 | |||
| 4188 | /* an active group must be handled by the update_curr()->put() path */ | 4225 | /* an active group must be handled by the update_curr()->put() path */ |
| 4189 | if (!cfs_rq->runtime_enabled || cfs_rq->curr) | 4226 | if (!cfs_rq->runtime_enabled || cfs_rq->curr) |
| 4190 | return; | 4227 | return; |
| @@ -4500,15 +4537,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
| 4500 | 4537 | ||
| 4501 | /* Don't dequeue parent if it has other entities besides us */ | 4538 | /* Don't dequeue parent if it has other entities besides us */ |
| 4502 | if (cfs_rq->load.weight) { | 4539 | if (cfs_rq->load.weight) { |
| 4540 | /* Avoid re-evaluating load for this entity: */ | ||
| 4541 | se = parent_entity(se); | ||
| 4503 | /* | 4542 | /* |
| 4504 | * Bias pick_next to pick a task from this cfs_rq, as | 4543 | * Bias pick_next to pick a task from this cfs_rq, as |
| 4505 | * p is sleeping when it is within its sched_slice. | 4544 | * p is sleeping when it is within its sched_slice. |
| 4506 | */ | 4545 | */ |
| 4507 | if (task_sleep && parent_entity(se)) | 4546 | if (task_sleep && se && !throttled_hierarchy(cfs_rq)) |
| 4508 | set_next_buddy(parent_entity(se)); | 4547 | set_next_buddy(se); |
| 4509 | |||
| 4510 | /* avoid re-evaluating load for this entity */ | ||
| 4511 | se = parent_entity(se); | ||
| 4512 | break; | 4548 | break; |
| 4513 | } | 4549 | } |
| 4514 | flags |= DEQUEUE_SLEEP; | 4550 | flags |= DEQUEUE_SLEEP; |
| @@ -8496,8 +8532,9 @@ void free_fair_sched_group(struct task_group *tg) | |||
| 8496 | 8532 | ||
| 8497 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | 8533 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) |
| 8498 | { | 8534 | { |
| 8499 | struct cfs_rq *cfs_rq; | ||
| 8500 | struct sched_entity *se; | 8535 | struct sched_entity *se; |
| 8536 | struct cfs_rq *cfs_rq; | ||
| 8537 | struct rq *rq; | ||
| 8501 | int i; | 8538 | int i; |
| 8502 | 8539 | ||
| 8503 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); | 8540 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); |
| @@ -8512,6 +8549,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8512 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); | 8549 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); |
| 8513 | 8550 | ||
| 8514 | for_each_possible_cpu(i) { | 8551 | for_each_possible_cpu(i) { |
| 8552 | rq = cpu_rq(i); | ||
| 8553 | |||
| 8515 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | 8554 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), |
| 8516 | GFP_KERNEL, cpu_to_node(i)); | 8555 | GFP_KERNEL, cpu_to_node(i)); |
| 8517 | if (!cfs_rq) | 8556 | if (!cfs_rq) |
| @@ -8525,7 +8564,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8525 | init_cfs_rq(cfs_rq); | 8564 | init_cfs_rq(cfs_rq); |
| 8526 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); | 8565 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); |
| 8527 | init_entity_runnable_average(se); | 8566 | init_entity_runnable_average(se); |
| 8567 | |||
| 8568 | raw_spin_lock_irq(&rq->lock); | ||
| 8528 | post_init_entity_util_avg(se); | 8569 | post_init_entity_util_avg(se); |
| 8570 | raw_spin_unlock_irq(&rq->lock); | ||
| 8529 | } | 8571 | } |
| 8530 | 8572 | ||
| 8531 | return 1; | 8573 | return 1; |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 72f1f3087b04..7cbeb92a1cb9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -437,7 +437,7 @@ struct cfs_rq { | |||
| 437 | 437 | ||
| 438 | u64 throttled_clock, throttled_clock_task; | 438 | u64 throttled_clock, throttled_clock_task; |
| 439 | u64 throttled_clock_task_time; | 439 | u64 throttled_clock_task_time; |
| 440 | int throttled, throttle_count; | 440 | int throttled, throttle_count, throttle_uptodate; |
| 441 | struct list_head throttled_list; | 441 | struct list_head throttled_list; |
| 442 | #endif /* CONFIG_CFS_BANDWIDTH */ | 442 | #endif /* CONFIG_CFS_BANDWIDTH */ |
| 443 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 443 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
