diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 218 |
1 files changed, 114 insertions, 104 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index e7f2cfa6a257..ff39cadf621e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -814,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
814 | * default: 0.25ms | 814 | * default: 0.25ms |
815 | */ | 815 | */ |
816 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 816 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
817 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
817 | 818 | ||
818 | /* | 819 | /* |
819 | * Inject some fuzzyness into changing the per-cpu group shares | 820 | * Inject some fuzzyness into changing the per-cpu group shares |
@@ -1614,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1614 | */ | 1615 | */ |
1615 | static int tg_shares_up(struct task_group *tg, void *data) | 1616 | static int tg_shares_up(struct task_group *tg, void *data) |
1616 | { | 1617 | { |
1617 | unsigned long weight, rq_weight = 0, shares = 0; | 1618 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; |
1618 | unsigned long *usd_rq_weight; | 1619 | unsigned long *usd_rq_weight; |
1619 | struct sched_domain *sd = data; | 1620 | struct sched_domain *sd = data; |
1620 | unsigned long flags; | 1621 | unsigned long flags; |
@@ -1630,6 +1631,7 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1630 | weight = tg->cfs_rq[i]->load.weight; | 1631 | weight = tg->cfs_rq[i]->load.weight; |
1631 | usd_rq_weight[i] = weight; | 1632 | usd_rq_weight[i] = weight; |
1632 | 1633 | ||
1634 | rq_weight += weight; | ||
1633 | /* | 1635 | /* |
1634 | * If there are currently no tasks on the cpu pretend there | 1636 | * If there are currently no tasks on the cpu pretend there |
1635 | * is one of average load so that when a new task gets to | 1637 | * is one of average load so that when a new task gets to |
@@ -1638,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1638 | if (!weight) | 1640 | if (!weight) |
1639 | weight = NICE_0_LOAD; | 1641 | weight = NICE_0_LOAD; |
1640 | 1642 | ||
1641 | rq_weight += weight; | 1643 | sum_weight += weight; |
1642 | shares += tg->cfs_rq[i]->shares; | 1644 | shares += tg->cfs_rq[i]->shares; |
1643 | } | 1645 | } |
1644 | 1646 | ||
1647 | if (!rq_weight) | ||
1648 | rq_weight = sum_weight; | ||
1649 | |||
1645 | if ((!shares && rq_weight) || shares > tg->shares) | 1650 | if ((!shares && rq_weight) || shares > tg->shares) |
1646 | shares = tg->shares; | 1651 | shares = tg->shares; |
1647 | 1652 | ||
@@ -1810,6 +1815,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1810 | #endif | 1815 | #endif |
1811 | 1816 | ||
1812 | static void calc_load_account_active(struct rq *this_rq); | 1817 | static void calc_load_account_active(struct rq *this_rq); |
1818 | static void update_sysctl(void); | ||
1819 | static int get_update_sysctl_factor(void); | ||
1820 | |||
1821 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1822 | { | ||
1823 | set_task_rq(p, cpu); | ||
1824 | #ifdef CONFIG_SMP | ||
1825 | /* | ||
1826 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1827 | * successfuly executed on another CPU. We must ensure that updates of | ||
1828 | * per-task data have been completed by this moment. | ||
1829 | */ | ||
1830 | smp_wmb(); | ||
1831 | task_thread_info(p)->cpu = cpu; | ||
1832 | #endif | ||
1833 | } | ||
1813 | 1834 | ||
1814 | #include "sched_stats.h" | 1835 | #include "sched_stats.h" |
1815 | #include "sched_idletask.c" | 1836 | #include "sched_idletask.c" |
@@ -1967,20 +1988,6 @@ inline int task_curr(const struct task_struct *p) | |||
1967 | return cpu_curr(task_cpu(p)) == p; | 1988 | return cpu_curr(task_cpu(p)) == p; |
1968 | } | 1989 | } |
1969 | 1990 | ||
1970 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1971 | { | ||
1972 | set_task_rq(p, cpu); | ||
1973 | #ifdef CONFIG_SMP | ||
1974 | /* | ||
1975 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1976 | * successfuly executed on another CPU. We must ensure that updates of | ||
1977 | * per-task data have been completed by this moment. | ||
1978 | */ | ||
1979 | smp_wmb(); | ||
1980 | task_thread_info(p)->cpu = cpu; | ||
1981 | #endif | ||
1982 | } | ||
1983 | |||
1984 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 1991 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
1985 | const struct sched_class *prev_class, | 1992 | const struct sched_class *prev_class, |
1986 | int oldprio, int running) | 1993 | int oldprio, int running) |
@@ -2060,29 +2067,13 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2060 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 2067 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
2061 | { | 2068 | { |
2062 | int old_cpu = task_cpu(p); | 2069 | int old_cpu = task_cpu(p); |
2063 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); | ||
2064 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2070 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), |
2065 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2071 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); |
2066 | u64 clock_offset; | ||
2067 | |||
2068 | clock_offset = old_rq->clock - new_rq->clock; | ||
2069 | 2072 | ||
2070 | trace_sched_migrate_task(p, new_cpu); | 2073 | trace_sched_migrate_task(p, new_cpu); |
2071 | 2074 | ||
2072 | #ifdef CONFIG_SCHEDSTATS | ||
2073 | if (p->se.wait_start) | ||
2074 | p->se.wait_start -= clock_offset; | ||
2075 | if (p->se.sleep_start) | ||
2076 | p->se.sleep_start -= clock_offset; | ||
2077 | if (p->se.block_start) | ||
2078 | p->se.block_start -= clock_offset; | ||
2079 | #endif | ||
2080 | if (old_cpu != new_cpu) { | 2075 | if (old_cpu != new_cpu) { |
2081 | p->se.nr_migrations++; | 2076 | p->se.nr_migrations++; |
2082 | #ifdef CONFIG_SCHEDSTATS | ||
2083 | if (task_hot(p, old_rq->clock, NULL)) | ||
2084 | schedstat_inc(p, se.nr_forced2_migrations); | ||
2085 | #endif | ||
2086 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, | 2077 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, |
2087 | 1, 1, NULL, 0); | 2078 | 1, 1, NULL, 0); |
2088 | } | 2079 | } |
@@ -2323,6 +2314,14 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2323 | preempt_enable(); | 2314 | preempt_enable(); |
2324 | } | 2315 | } |
2325 | 2316 | ||
2317 | #ifdef CONFIG_SMP | ||
2318 | static inline | ||
2319 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | ||
2320 | { | ||
2321 | return p->sched_class->select_task_rq(p, sd_flags, wake_flags); | ||
2322 | } | ||
2323 | #endif | ||
2324 | |||
2326 | /*** | 2325 | /*** |
2327 | * try_to_wake_up - wake up a thread | 2326 | * try_to_wake_up - wake up a thread |
2328 | * @p: the to-be-woken-up thread | 2327 | * @p: the to-be-woken-up thread |
@@ -2374,17 +2373,14 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2374 | if (task_contributes_to_load(p)) | 2373 | if (task_contributes_to_load(p)) |
2375 | rq->nr_uninterruptible--; | 2374 | rq->nr_uninterruptible--; |
2376 | p->state = TASK_WAKING; | 2375 | p->state = TASK_WAKING; |
2377 | task_rq_unlock(rq, &flags); | 2376 | __task_rq_unlock(rq); |
2378 | 2377 | ||
2379 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2378 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2380 | if (cpu != orig_cpu) { | 2379 | if (cpu != orig_cpu) |
2381 | local_irq_save(flags); | ||
2382 | rq = cpu_rq(cpu); | ||
2383 | update_rq_clock(rq); | ||
2384 | set_task_cpu(p, cpu); | 2380 | set_task_cpu(p, cpu); |
2385 | local_irq_restore(flags); | 2381 | |
2386 | } | 2382 | rq = __task_rq_lock(p); |
2387 | rq = task_rq_lock(p, &flags); | 2383 | update_rq_clock(rq); |
2388 | 2384 | ||
2389 | WARN_ON(p->state != TASK_WAKING); | 2385 | WARN_ON(p->state != TASK_WAKING); |
2390 | cpu = task_cpu(p); | 2386 | cpu = task_cpu(p); |
@@ -2499,7 +2495,6 @@ static void __sched_fork(struct task_struct *p) | |||
2499 | p->se.avg_overlap = 0; | 2495 | p->se.avg_overlap = 0; |
2500 | p->se.start_runtime = 0; | 2496 | p->se.start_runtime = 0; |
2501 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2497 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2502 | p->se.avg_running = 0; | ||
2503 | 2498 | ||
2504 | #ifdef CONFIG_SCHEDSTATS | 2499 | #ifdef CONFIG_SCHEDSTATS |
2505 | p->se.wait_start = 0; | 2500 | p->se.wait_start = 0; |
@@ -2521,7 +2516,6 @@ static void __sched_fork(struct task_struct *p) | |||
2521 | p->se.nr_failed_migrations_running = 0; | 2516 | p->se.nr_failed_migrations_running = 0; |
2522 | p->se.nr_failed_migrations_hot = 0; | 2517 | p->se.nr_failed_migrations_hot = 0; |
2523 | p->se.nr_forced_migrations = 0; | 2518 | p->se.nr_forced_migrations = 0; |
2524 | p->se.nr_forced2_migrations = 0; | ||
2525 | 2519 | ||
2526 | p->se.nr_wakeups = 0; | 2520 | p->se.nr_wakeups = 0; |
2527 | p->se.nr_wakeups_sync = 0; | 2521 | p->se.nr_wakeups_sync = 0; |
@@ -2558,7 +2552,6 @@ static void __sched_fork(struct task_struct *p) | |||
2558 | void sched_fork(struct task_struct *p, int clone_flags) | 2552 | void sched_fork(struct task_struct *p, int clone_flags) |
2559 | { | 2553 | { |
2560 | int cpu = get_cpu(); | 2554 | int cpu = get_cpu(); |
2561 | unsigned long flags; | ||
2562 | 2555 | ||
2563 | __sched_fork(p); | 2556 | __sched_fork(p); |
2564 | 2557 | ||
@@ -2592,13 +2585,13 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2592 | if (!rt_prio(p->prio)) | 2585 | if (!rt_prio(p->prio)) |
2593 | p->sched_class = &fair_sched_class; | 2586 | p->sched_class = &fair_sched_class; |
2594 | 2587 | ||
2588 | if (p->sched_class->task_fork) | ||
2589 | p->sched_class->task_fork(p); | ||
2590 | |||
2595 | #ifdef CONFIG_SMP | 2591 | #ifdef CONFIG_SMP |
2596 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | 2592 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); |
2597 | #endif | 2593 | #endif |
2598 | local_irq_save(flags); | ||
2599 | update_rq_clock(cpu_rq(cpu)); | ||
2600 | set_task_cpu(p, cpu); | 2594 | set_task_cpu(p, cpu); |
2601 | local_irq_restore(flags); | ||
2602 | 2595 | ||
2603 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2596 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2604 | if (likely(sched_info_on())) | 2597 | if (likely(sched_info_on())) |
@@ -2631,17 +2624,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2631 | rq = task_rq_lock(p, &flags); | 2624 | rq = task_rq_lock(p, &flags); |
2632 | BUG_ON(p->state != TASK_RUNNING); | 2625 | BUG_ON(p->state != TASK_RUNNING); |
2633 | update_rq_clock(rq); | 2626 | update_rq_clock(rq); |
2634 | 2627 | activate_task(rq, p, 0); | |
2635 | if (!p->sched_class->task_new || !current->se.on_rq) { | ||
2636 | activate_task(rq, p, 0); | ||
2637 | } else { | ||
2638 | /* | ||
2639 | * Let the scheduling class do new task startup | ||
2640 | * management (if any): | ||
2641 | */ | ||
2642 | p->sched_class->task_new(rq, p); | ||
2643 | inc_nr_running(rq); | ||
2644 | } | ||
2645 | trace_sched_wakeup_new(rq, p, 1); | 2628 | trace_sched_wakeup_new(rq, p, 1); |
2646 | check_preempt_curr(rq, p, WF_FORK); | 2629 | check_preempt_curr(rq, p, WF_FORK); |
2647 | #ifdef CONFIG_SMP | 2630 | #ifdef CONFIG_SMP |
@@ -3156,7 +3139,7 @@ out: | |||
3156 | void sched_exec(void) | 3139 | void sched_exec(void) |
3157 | { | 3140 | { |
3158 | int new_cpu, this_cpu = get_cpu(); | 3141 | int new_cpu, this_cpu = get_cpu(); |
3159 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); | 3142 | new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); |
3160 | put_cpu(); | 3143 | put_cpu(); |
3161 | if (new_cpu != this_cpu) | 3144 | if (new_cpu != this_cpu) |
3162 | sched_migrate_task(current, new_cpu); | 3145 | sched_migrate_task(current, new_cpu); |
@@ -3172,10 +3155,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
3172 | deactivate_task(src_rq, p, 0); | 3155 | deactivate_task(src_rq, p, 0); |
3173 | set_task_cpu(p, this_cpu); | 3156 | set_task_cpu(p, this_cpu); |
3174 | activate_task(this_rq, p, 0); | 3157 | activate_task(this_rq, p, 0); |
3175 | /* | ||
3176 | * Note that idle threads have a prio of MAX_PRIO, for this test | ||
3177 | * to be always true for them. | ||
3178 | */ | ||
3179 | check_preempt_curr(this_rq, p, 0); | 3158 | check_preempt_curr(this_rq, p, 0); |
3180 | } | 3159 | } |
3181 | 3160 | ||
@@ -4134,7 +4113,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4134 | unsigned long flags; | 4113 | unsigned long flags; |
4135 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4114 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4136 | 4115 | ||
4137 | cpumask_copy(cpus, cpu_online_mask); | 4116 | cpumask_copy(cpus, cpu_active_mask); |
4138 | 4117 | ||
4139 | /* | 4118 | /* |
4140 | * When power savings policy is enabled for the parent domain, idle | 4119 | * When power savings policy is enabled for the parent domain, idle |
@@ -4297,7 +4276,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
4297 | int all_pinned = 0; | 4276 | int all_pinned = 0; |
4298 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4277 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4299 | 4278 | ||
4300 | cpumask_copy(cpus, cpu_online_mask); | 4279 | cpumask_copy(cpus, cpu_active_mask); |
4301 | 4280 | ||
4302 | /* | 4281 | /* |
4303 | * When power savings policy is enabled for the parent domain, idle | 4282 | * When power savings policy is enabled for the parent domain, idle |
@@ -4694,7 +4673,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
4694 | cpumask_set_cpu(cpu, nohz.cpu_mask); | 4673 | cpumask_set_cpu(cpu, nohz.cpu_mask); |
4695 | 4674 | ||
4696 | /* time for ilb owner also to sleep */ | 4675 | /* time for ilb owner also to sleep */ |
4697 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 4676 | if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) { |
4698 | if (atomic_read(&nohz.load_balancer) == cpu) | 4677 | if (atomic_read(&nohz.load_balancer) == cpu) |
4699 | atomic_set(&nohz.load_balancer, -1); | 4678 | atomic_set(&nohz.load_balancer, -1); |
4700 | return 0; | 4679 | return 0; |
@@ -5396,13 +5375,14 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5396 | #endif | 5375 | #endif |
5397 | } | 5376 | } |
5398 | 5377 | ||
5399 | static void put_prev_task(struct rq *rq, struct task_struct *p) | 5378 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
5400 | { | 5379 | { |
5401 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; | 5380 | if (prev->state == TASK_RUNNING) { |
5381 | u64 runtime = prev->se.sum_exec_runtime; | ||
5402 | 5382 | ||
5403 | update_avg(&p->se.avg_running, runtime); | 5383 | runtime -= prev->se.prev_sum_exec_runtime; |
5384 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5404 | 5385 | ||
5405 | if (p->state == TASK_RUNNING) { | ||
5406 | /* | 5386 | /* |
5407 | * In order to avoid avg_overlap growing stale when we are | 5387 | * In order to avoid avg_overlap growing stale when we are |
5408 | * indeed overlapping and hence not getting put to sleep, grow | 5388 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5412,12 +5392,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p) | |||
5412 | * correlates to the amount of cache footprint a task can | 5392 | * correlates to the amount of cache footprint a task can |
5413 | * build up. | 5393 | * build up. |
5414 | */ | 5394 | */ |
5415 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | 5395 | update_avg(&prev->se.avg_overlap, runtime); |
5416 | update_avg(&p->se.avg_overlap, runtime); | ||
5417 | } else { | ||
5418 | update_avg(&p->se.avg_running, 0); | ||
5419 | } | 5396 | } |
5420 | p->sched_class->put_prev_task(rq, p); | 5397 | prev->sched_class->put_prev_task(rq, prev); |
5421 | } | 5398 | } |
5422 | 5399 | ||
5423 | /* | 5400 | /* |
@@ -6631,6 +6608,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, | |||
6631 | long sched_getaffinity(pid_t pid, struct cpumask *mask) | 6608 | long sched_getaffinity(pid_t pid, struct cpumask *mask) |
6632 | { | 6609 | { |
6633 | struct task_struct *p; | 6610 | struct task_struct *p; |
6611 | unsigned long flags; | ||
6612 | struct rq *rq; | ||
6634 | int retval; | 6613 | int retval; |
6635 | 6614 | ||
6636 | get_online_cpus(); | 6615 | get_online_cpus(); |
@@ -6645,7 +6624,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6645 | if (retval) | 6624 | if (retval) |
6646 | goto out_unlock; | 6625 | goto out_unlock; |
6647 | 6626 | ||
6627 | rq = task_rq_lock(p, &flags); | ||
6648 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 6628 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
6629 | task_rq_unlock(rq, &flags); | ||
6649 | 6630 | ||
6650 | out_unlock: | 6631 | out_unlock: |
6651 | read_unlock(&tasklist_lock); | 6632 | read_unlock(&tasklist_lock); |
@@ -6883,6 +6864,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6883 | { | 6864 | { |
6884 | struct task_struct *p; | 6865 | struct task_struct *p; |
6885 | unsigned int time_slice; | 6866 | unsigned int time_slice; |
6867 | unsigned long flags; | ||
6868 | struct rq *rq; | ||
6886 | int retval; | 6869 | int retval; |
6887 | struct timespec t; | 6870 | struct timespec t; |
6888 | 6871 | ||
@@ -6899,7 +6882,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6899 | if (retval) | 6882 | if (retval) |
6900 | goto out_unlock; | 6883 | goto out_unlock; |
6901 | 6884 | ||
6902 | time_slice = p->sched_class->get_rr_interval(p); | 6885 | rq = task_rq_lock(p, &flags); |
6886 | time_slice = p->sched_class->get_rr_interval(rq, p); | ||
6887 | task_rq_unlock(rq, &flags); | ||
6903 | 6888 | ||
6904 | read_unlock(&tasklist_lock); | 6889 | read_unlock(&tasklist_lock); |
6905 | jiffies_to_timespec(time_slice, &t); | 6890 | jiffies_to_timespec(time_slice, &t); |
@@ -7000,7 +6985,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
7000 | __sched_fork(idle); | 6985 | __sched_fork(idle); |
7001 | idle->se.exec_start = sched_clock(); | 6986 | idle->se.exec_start = sched_clock(); |
7002 | 6987 | ||
7003 | idle->prio = idle->normal_prio = MAX_PRIO; | ||
7004 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 6988 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
7005 | __set_task_cpu(idle, cpu); | 6989 | __set_task_cpu(idle, cpu); |
7006 | 6990 | ||
@@ -7041,22 +7025,43 @@ cpumask_var_t nohz_cpu_mask; | |||
7041 | * | 7025 | * |
7042 | * This idea comes from the SD scheduler of Con Kolivas: | 7026 | * This idea comes from the SD scheduler of Con Kolivas: |
7043 | */ | 7027 | */ |
7044 | static inline void sched_init_granularity(void) | 7028 | static int get_update_sysctl_factor(void) |
7045 | { | 7029 | { |
7046 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 7030 | unsigned int cpus = min_t(int, num_online_cpus(), 8); |
7047 | const unsigned long limit = 200000000; | 7031 | unsigned int factor; |
7032 | |||
7033 | switch (sysctl_sched_tunable_scaling) { | ||
7034 | case SCHED_TUNABLESCALING_NONE: | ||
7035 | factor = 1; | ||
7036 | break; | ||
7037 | case SCHED_TUNABLESCALING_LINEAR: | ||
7038 | factor = cpus; | ||
7039 | break; | ||
7040 | case SCHED_TUNABLESCALING_LOG: | ||
7041 | default: | ||
7042 | factor = 1 + ilog2(cpus); | ||
7043 | break; | ||
7044 | } | ||
7048 | 7045 | ||
7049 | sysctl_sched_min_granularity *= factor; | 7046 | return factor; |
7050 | if (sysctl_sched_min_granularity > limit) | 7047 | } |
7051 | sysctl_sched_min_granularity = limit; | ||
7052 | 7048 | ||
7053 | sysctl_sched_latency *= factor; | 7049 | static void update_sysctl(void) |
7054 | if (sysctl_sched_latency > limit) | 7050 | { |
7055 | sysctl_sched_latency = limit; | 7051 | unsigned int factor = get_update_sysctl_factor(); |
7056 | 7052 | ||
7057 | sysctl_sched_wakeup_granularity *= factor; | 7053 | #define SET_SYSCTL(name) \ |
7054 | (sysctl_##name = (factor) * normalized_sysctl_##name) | ||
7055 | SET_SYSCTL(sched_min_granularity); | ||
7056 | SET_SYSCTL(sched_latency); | ||
7057 | SET_SYSCTL(sched_wakeup_granularity); | ||
7058 | SET_SYSCTL(sched_shares_ratelimit); | ||
7059 | #undef SET_SYSCTL | ||
7060 | } | ||
7058 | 7061 | ||
7059 | sysctl_sched_shares_ratelimit *= factor; | 7062 | static inline void sched_init_granularity(void) |
7063 | { | ||
7064 | update_sysctl(); | ||
7060 | } | 7065 | } |
7061 | 7066 | ||
7062 | #ifdef CONFIG_SMP | 7067 | #ifdef CONFIG_SMP |
@@ -7093,7 +7098,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7093 | int ret = 0; | 7098 | int ret = 0; |
7094 | 7099 | ||
7095 | rq = task_rq_lock(p, &flags); | 7100 | rq = task_rq_lock(p, &flags); |
7096 | if (!cpumask_intersects(new_mask, cpu_online_mask)) { | 7101 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7097 | ret = -EINVAL; | 7102 | ret = -EINVAL; |
7098 | goto out; | 7103 | goto out; |
7099 | } | 7104 | } |
@@ -7115,7 +7120,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7115 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 7120 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
7116 | goto out; | 7121 | goto out; |
7117 | 7122 | ||
7118 | if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { | 7123 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { |
7119 | /* Need help from migration thread: drop lock and wait. */ | 7124 | /* Need help from migration thread: drop lock and wait. */ |
7120 | struct task_struct *mt = rq->migration_thread; | 7125 | struct task_struct *mt = rq->migration_thread; |
7121 | 7126 | ||
@@ -7269,19 +7274,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
7269 | 7274 | ||
7270 | again: | 7275 | again: |
7271 | /* Look for allowed, online CPU in same node. */ | 7276 | /* Look for allowed, online CPU in same node. */ |
7272 | for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) | 7277 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) |
7273 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7278 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7274 | goto move; | 7279 | goto move; |
7275 | 7280 | ||
7276 | /* Any allowed, online CPU? */ | 7281 | /* Any allowed, online CPU? */ |
7277 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); | 7282 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); |
7278 | if (dest_cpu < nr_cpu_ids) | 7283 | if (dest_cpu < nr_cpu_ids) |
7279 | goto move; | 7284 | goto move; |
7280 | 7285 | ||
7281 | /* No more Mr. Nice Guy. */ | 7286 | /* No more Mr. Nice Guy. */ |
7282 | if (dest_cpu >= nr_cpu_ids) { | 7287 | if (dest_cpu >= nr_cpu_ids) { |
7283 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | 7288 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); |
7284 | dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); | 7289 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); |
7285 | 7290 | ||
7286 | /* | 7291 | /* |
7287 | * Don't tell them about moving exiting tasks or | 7292 | * Don't tell them about moving exiting tasks or |
@@ -7310,7 +7315,7 @@ move: | |||
7310 | */ | 7315 | */ |
7311 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 7316 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
7312 | { | 7317 | { |
7313 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); | 7318 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
7314 | unsigned long flags; | 7319 | unsigned long flags; |
7315 | 7320 | ||
7316 | local_irq_save(flags); | 7321 | local_irq_save(flags); |
@@ -7563,7 +7568,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
7563 | static struct ctl_table_header *sd_sysctl_header; | 7568 | static struct ctl_table_header *sd_sysctl_header; |
7564 | static void register_sched_domain_sysctl(void) | 7569 | static void register_sched_domain_sysctl(void) |
7565 | { | 7570 | { |
7566 | int i, cpu_num = num_online_cpus(); | 7571 | int i, cpu_num = num_possible_cpus(); |
7567 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | 7572 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); |
7568 | char buf[32]; | 7573 | char buf[32]; |
7569 | 7574 | ||
@@ -7573,7 +7578,7 @@ static void register_sched_domain_sysctl(void) | |||
7573 | if (entry == NULL) | 7578 | if (entry == NULL) |
7574 | return; | 7579 | return; |
7575 | 7580 | ||
7576 | for_each_online_cpu(i) { | 7581 | for_each_possible_cpu(i) { |
7577 | snprintf(buf, 32, "cpu%d", i); | 7582 | snprintf(buf, 32, "cpu%d", i); |
7578 | entry->procname = kstrdup(buf, GFP_KERNEL); | 7583 | entry->procname = kstrdup(buf, GFP_KERNEL); |
7579 | entry->mode = 0555; | 7584 | entry->mode = 0555; |
@@ -7703,7 +7708,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7703 | spin_lock_irq(&rq->lock); | 7708 | spin_lock_irq(&rq->lock); |
7704 | update_rq_clock(rq); | 7709 | update_rq_clock(rq); |
7705 | deactivate_task(rq, rq->idle, 0); | 7710 | deactivate_task(rq, rq->idle, 0); |
7706 | rq->idle->static_prio = MAX_PRIO; | ||
7707 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | 7711 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); |
7708 | rq->idle->sched_class = &idle_sched_class; | 7712 | rq->idle->sched_class = &idle_sched_class; |
7709 | migrate_dead_tasks(cpu); | 7713 | migrate_dead_tasks(cpu); |
@@ -9099,7 +9103,7 @@ match1: | |||
9099 | if (doms_new == NULL) { | 9103 | if (doms_new == NULL) { |
9100 | ndoms_cur = 0; | 9104 | ndoms_cur = 0; |
9101 | doms_new = &fallback_doms; | 9105 | doms_new = &fallback_doms; |
9102 | cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); | 9106 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); |
9103 | WARN_ON_ONCE(dattr_new); | 9107 | WARN_ON_ONCE(dattr_new); |
9104 | } | 9108 | } |
9105 | 9109 | ||
@@ -9230,8 +9234,10 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
9230 | switch (action) { | 9234 | switch (action) { |
9231 | case CPU_ONLINE: | 9235 | case CPU_ONLINE: |
9232 | case CPU_ONLINE_FROZEN: | 9236 | case CPU_ONLINE_FROZEN: |
9233 | case CPU_DEAD: | 9237 | case CPU_DOWN_PREPARE: |
9234 | case CPU_DEAD_FROZEN: | 9238 | case CPU_DOWN_PREPARE_FROZEN: |
9239 | case CPU_DOWN_FAILED: | ||
9240 | case CPU_DOWN_FAILED_FROZEN: | ||
9235 | partition_sched_domains(1, NULL, NULL); | 9241 | partition_sched_domains(1, NULL, NULL); |
9236 | return NOTIFY_OK; | 9242 | return NOTIFY_OK; |
9237 | 9243 | ||
@@ -9278,7 +9284,7 @@ void __init sched_init_smp(void) | |||
9278 | #endif | 9284 | #endif |
9279 | get_online_cpus(); | 9285 | get_online_cpus(); |
9280 | mutex_lock(&sched_domains_mutex); | 9286 | mutex_lock(&sched_domains_mutex); |
9281 | arch_init_sched_domains(cpu_online_mask); | 9287 | arch_init_sched_domains(cpu_active_mask); |
9282 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 9288 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); |
9283 | if (cpumask_empty(non_isolated_cpus)) | 9289 | if (cpumask_empty(non_isolated_cpus)) |
9284 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 9290 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |
@@ -9842,13 +9848,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
9842 | se = kzalloc_node(sizeof(struct sched_entity), | 9848 | se = kzalloc_node(sizeof(struct sched_entity), |
9843 | GFP_KERNEL, cpu_to_node(i)); | 9849 | GFP_KERNEL, cpu_to_node(i)); |
9844 | if (!se) | 9850 | if (!se) |
9845 | goto err; | 9851 | goto err_free_rq; |
9846 | 9852 | ||
9847 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 9853 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); |
9848 | } | 9854 | } |
9849 | 9855 | ||
9850 | return 1; | 9856 | return 1; |
9851 | 9857 | ||
9858 | err_free_rq: | ||
9859 | kfree(cfs_rq); | ||
9852 | err: | 9860 | err: |
9853 | return 0; | 9861 | return 0; |
9854 | } | 9862 | } |
@@ -9930,13 +9938,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
9930 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | 9938 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), |
9931 | GFP_KERNEL, cpu_to_node(i)); | 9939 | GFP_KERNEL, cpu_to_node(i)); |
9932 | if (!rt_se) | 9940 | if (!rt_se) |
9933 | goto err; | 9941 | goto err_free_rq; |
9934 | 9942 | ||
9935 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 9943 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); |
9936 | } | 9944 | } |
9937 | 9945 | ||
9938 | return 1; | 9946 | return 1; |
9939 | 9947 | ||
9948 | err_free_rq: | ||
9949 | kfree(rt_rq); | ||
9940 | err: | 9950 | err: |
9941 | return 0; | 9951 | return 0; |
9942 | } | 9952 | } |