diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-12 14:34:10 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-12 14:34:10 -0500 |
commit | 702a7c7609bec3a940b6a46b0d6ab9ce45274580 (patch) | |
tree | 6c169691449259410b9b51a146acb0e837dae96a | |
parent | 053fe57ac249a9531c396175778160d9e9509399 (diff) | |
parent | b9889ed1ddeca5a3f3569c8de7354e9e97d803ae (diff) |
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (21 commits)
sched: Remove forced2_migrations stats
sched: Fix memory leak in two error corner cases
sched: Fix build warning in get_update_sysctl_factor()
sched: Update normalized values on user updates via proc
sched: Make tunable scaling style configurable
sched: Fix missing sched tunable recalculation on cpu add/remove
sched: Fix task priority bug
sched: cgroup: Implement different treatment for idle shares
sched: Remove unnecessary RCU exclusion
sched: Discard some old bits
sched: Clean up check_preempt_wakeup()
sched: Move update_curr() in check_preempt_wakeup() to avoid redundant call
sched: Sanitize fork() handling
sched: Clean up ttwu() rq locking
sched: Remove rq->clock coupling from set_task_cpu()
sched: Consolidate select_task_rq() callers
sched: Remove sysctl.sched_features
sched: Protect sched_rr_get_param() access to task->sched_class
sched: Protect task->cpus_allowed access in sched_getaffinity()
sched: Fix balance vs hotplug race
...
Fixed up conflicts in kernel/sysctl.c (due to sysctl cleanup)
-rw-r--r-- | include/linux/cpumask.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 20 | ||||
-rw-r--r-- | kernel/cpu.c | 18 | ||||
-rw-r--r-- | kernel/cpuset.c | 18 | ||||
-rw-r--r-- | kernel/sched.c | 218 | ||||
-rw-r--r-- | kernel/sched_debug.c | 13 | ||||
-rw-r--r-- | kernel/sched_fair.c | 155 | ||||
-rw-r--r-- | kernel/sched_features.h | 5 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 2 | ||||
-rw-r--r-- | kernel/sched_rt.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 30 |
11 files changed, 271 insertions, 212 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 789cf5f920ce..d77b54733c5b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h | |||
@@ -84,6 +84,7 @@ extern const struct cpumask *const cpu_active_mask; | |||
84 | #define num_online_cpus() cpumask_weight(cpu_online_mask) | 84 | #define num_online_cpus() cpumask_weight(cpu_online_mask) |
85 | #define num_possible_cpus() cpumask_weight(cpu_possible_mask) | 85 | #define num_possible_cpus() cpumask_weight(cpu_possible_mask) |
86 | #define num_present_cpus() cpumask_weight(cpu_present_mask) | 86 | #define num_present_cpus() cpumask_weight(cpu_present_mask) |
87 | #define num_active_cpus() cpumask_weight(cpu_active_mask) | ||
87 | #define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) | 88 | #define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) |
88 | #define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) | 89 | #define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) |
89 | #define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) | 90 | #define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) |
@@ -92,6 +93,7 @@ extern const struct cpumask *const cpu_active_mask; | |||
92 | #define num_online_cpus() 1 | 93 | #define num_online_cpus() 1 |
93 | #define num_possible_cpus() 1 | 94 | #define num_possible_cpus() 1 |
94 | #define num_present_cpus() 1 | 95 | #define num_present_cpus() 1 |
96 | #define num_active_cpus() 1 | ||
95 | #define cpu_online(cpu) ((cpu) == 0) | 97 | #define cpu_online(cpu) ((cpu) == 0) |
96 | #define cpu_possible(cpu) ((cpu) == 0) | 98 | #define cpu_possible(cpu) ((cpu) == 0) |
97 | #define cpu_present(cpu) ((cpu) == 0) | 99 | #define cpu_present(cpu) ((cpu) == 0) |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 3f4fa73b512a..294eb2f80144 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1102,7 +1102,7 @@ struct sched_class { | |||
1102 | 1102 | ||
1103 | void (*set_curr_task) (struct rq *rq); | 1103 | void (*set_curr_task) (struct rq *rq); |
1104 | void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); | 1104 | void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); |
1105 | void (*task_new) (struct rq *rq, struct task_struct *p); | 1105 | void (*task_fork) (struct task_struct *p); |
1106 | 1106 | ||
1107 | void (*switched_from) (struct rq *this_rq, struct task_struct *task, | 1107 | void (*switched_from) (struct rq *this_rq, struct task_struct *task, |
1108 | int running); | 1108 | int running); |
@@ -1111,7 +1111,8 @@ struct sched_class { | |||
1111 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, | 1111 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, |
1112 | int oldprio, int running); | 1112 | int oldprio, int running); |
1113 | 1113 | ||
1114 | unsigned int (*get_rr_interval) (struct task_struct *task); | 1114 | unsigned int (*get_rr_interval) (struct rq *rq, |
1115 | struct task_struct *task); | ||
1115 | 1116 | ||
1116 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1117 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1117 | void (*moved_group) (struct task_struct *p); | 1118 | void (*moved_group) (struct task_struct *p); |
@@ -1151,8 +1152,6 @@ struct sched_entity { | |||
1151 | u64 start_runtime; | 1152 | u64 start_runtime; |
1152 | u64 avg_wakeup; | 1153 | u64 avg_wakeup; |
1153 | 1154 | ||
1154 | u64 avg_running; | ||
1155 | |||
1156 | #ifdef CONFIG_SCHEDSTATS | 1155 | #ifdef CONFIG_SCHEDSTATS |
1157 | u64 wait_start; | 1156 | u64 wait_start; |
1158 | u64 wait_max; | 1157 | u64 wait_max; |
@@ -1175,7 +1174,6 @@ struct sched_entity { | |||
1175 | u64 nr_failed_migrations_running; | 1174 | u64 nr_failed_migrations_running; |
1176 | u64 nr_failed_migrations_hot; | 1175 | u64 nr_failed_migrations_hot; |
1177 | u64 nr_forced_migrations; | 1176 | u64 nr_forced_migrations; |
1178 | u64 nr_forced2_migrations; | ||
1179 | 1177 | ||
1180 | u64 nr_wakeups; | 1178 | u64 nr_wakeups; |
1181 | u64 nr_wakeups_sync; | 1179 | u64 nr_wakeups_sync; |
@@ -1904,14 +1902,22 @@ extern unsigned int sysctl_sched_wakeup_granularity; | |||
1904 | extern unsigned int sysctl_sched_shares_ratelimit; | 1902 | extern unsigned int sysctl_sched_shares_ratelimit; |
1905 | extern unsigned int sysctl_sched_shares_thresh; | 1903 | extern unsigned int sysctl_sched_shares_thresh; |
1906 | extern unsigned int sysctl_sched_child_runs_first; | 1904 | extern unsigned int sysctl_sched_child_runs_first; |
1905 | |||
1906 | enum sched_tunable_scaling { | ||
1907 | SCHED_TUNABLESCALING_NONE, | ||
1908 | SCHED_TUNABLESCALING_LOG, | ||
1909 | SCHED_TUNABLESCALING_LINEAR, | ||
1910 | SCHED_TUNABLESCALING_END, | ||
1911 | }; | ||
1912 | extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; | ||
1913 | |||
1907 | #ifdef CONFIG_SCHED_DEBUG | 1914 | #ifdef CONFIG_SCHED_DEBUG |
1908 | extern unsigned int sysctl_sched_features; | ||
1909 | extern unsigned int sysctl_sched_migration_cost; | 1915 | extern unsigned int sysctl_sched_migration_cost; |
1910 | extern unsigned int sysctl_sched_nr_migrate; | 1916 | extern unsigned int sysctl_sched_nr_migrate; |
1911 | extern unsigned int sysctl_sched_time_avg; | 1917 | extern unsigned int sysctl_sched_time_avg; |
1912 | extern unsigned int sysctl_timer_migration; | 1918 | extern unsigned int sysctl_timer_migration; |
1913 | 1919 | ||
1914 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 1920 | int sched_proc_update_handler(struct ctl_table *table, int write, |
1915 | void __user *buffer, size_t *length, | 1921 | void __user *buffer, size_t *length, |
1916 | loff_t *ppos); | 1922 | loff_t *ppos); |
1917 | #endif | 1923 | #endif |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 7c4e2713df0a..291ac586f37f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -212,6 +212,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
212 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, | 212 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, |
213 | hcpu, -1, &nr_calls); | 213 | hcpu, -1, &nr_calls); |
214 | if (err == NOTIFY_BAD) { | 214 | if (err == NOTIFY_BAD) { |
215 | set_cpu_active(cpu, true); | ||
216 | |||
215 | nr_calls--; | 217 | nr_calls--; |
216 | __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, | 218 | __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, |
217 | hcpu, nr_calls, NULL); | 219 | hcpu, nr_calls, NULL); |
@@ -223,11 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
223 | 225 | ||
224 | /* Ensure that we are not runnable on dying cpu */ | 226 | /* Ensure that we are not runnable on dying cpu */ |
225 | cpumask_copy(old_allowed, ¤t->cpus_allowed); | 227 | cpumask_copy(old_allowed, ¤t->cpus_allowed); |
226 | set_cpus_allowed_ptr(current, | 228 | set_cpus_allowed_ptr(current, cpu_active_mask); |
227 | cpumask_of(cpumask_any_but(cpu_online_mask, cpu))); | ||
228 | 229 | ||
229 | err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); | 230 | err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); |
230 | if (err) { | 231 | if (err) { |
232 | set_cpu_active(cpu, true); | ||
231 | /* CPU didn't die: tell everyone. Can't complain. */ | 233 | /* CPU didn't die: tell everyone. Can't complain. */ |
232 | if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, | 234 | if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, |
233 | hcpu) == NOTIFY_BAD) | 235 | hcpu) == NOTIFY_BAD) |
@@ -292,9 +294,6 @@ int __ref cpu_down(unsigned int cpu) | |||
292 | 294 | ||
293 | err = _cpu_down(cpu, 0); | 295 | err = _cpu_down(cpu, 0); |
294 | 296 | ||
295 | if (cpu_online(cpu)) | ||
296 | set_cpu_active(cpu, true); | ||
297 | |||
298 | out: | 297 | out: |
299 | cpu_maps_update_done(); | 298 | cpu_maps_update_done(); |
300 | stop_machine_destroy(); | 299 | stop_machine_destroy(); |
@@ -387,6 +386,15 @@ int disable_nonboot_cpus(void) | |||
387 | * with the userspace trying to use the CPU hotplug at the same time | 386 | * with the userspace trying to use the CPU hotplug at the same time |
388 | */ | 387 | */ |
389 | cpumask_clear(frozen_cpus); | 388 | cpumask_clear(frozen_cpus); |
389 | |||
390 | for_each_online_cpu(cpu) { | ||
391 | if (cpu == first_cpu) | ||
392 | continue; | ||
393 | set_cpu_active(cpu, false); | ||
394 | } | ||
395 | |||
396 | synchronize_sched(); | ||
397 | |||
390 | printk("Disabling non-boot CPUs ...\n"); | 398 | printk("Disabling non-boot CPUs ...\n"); |
391 | for_each_online_cpu(cpu) { | 399 | for_each_online_cpu(cpu) { |
392 | if (cpu == first_cpu) | 400 | if (cpu == first_cpu) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3cf2183b472d..ba401fab459f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -737,7 +737,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused) | |||
737 | { | 737 | { |
738 | } | 738 | } |
739 | 739 | ||
740 | static int generate_sched_domains(struct cpumask **domains, | 740 | static int generate_sched_domains(cpumask_var_t **domains, |
741 | struct sched_domain_attr **attributes) | 741 | struct sched_domain_attr **attributes) |
742 | { | 742 | { |
743 | *domains = NULL; | 743 | *domains = NULL; |
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
872 | if (retval < 0) | 872 | if (retval < 0) |
873 | return retval; | 873 | return retval; |
874 | 874 | ||
875 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) | 875 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask)) |
876 | return -EINVAL; | 876 | return -EINVAL; |
877 | } | 877 | } |
878 | retval = validate_change(cs, trialcs); | 878 | retval = validate_change(cs, trialcs); |
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2010 | } | 2010 | } |
2011 | 2011 | ||
2012 | /* Continue past cpusets with all cpus, mems online */ | 2012 | /* Continue past cpusets with all cpus, mems online */ |
2013 | if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && | 2013 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) && |
2014 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2014 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
2015 | continue; | 2015 | continue; |
2016 | 2016 | ||
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2019 | /* Remove offline cpus and mems from this cpuset. */ | 2019 | /* Remove offline cpus and mems from this cpuset. */ |
2020 | mutex_lock(&callback_mutex); | 2020 | mutex_lock(&callback_mutex); |
2021 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | 2021 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, |
2022 | cpu_online_mask); | 2022 | cpu_active_mask); |
2023 | nodes_and(cp->mems_allowed, cp->mems_allowed, | 2023 | nodes_and(cp->mems_allowed, cp->mems_allowed, |
2024 | node_states[N_HIGH_MEMORY]); | 2024 | node_states[N_HIGH_MEMORY]); |
2025 | mutex_unlock(&callback_mutex); | 2025 | mutex_unlock(&callback_mutex); |
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2057 | switch (phase) { | 2057 | switch (phase) { |
2058 | case CPU_ONLINE: | 2058 | case CPU_ONLINE: |
2059 | case CPU_ONLINE_FROZEN: | 2059 | case CPU_ONLINE_FROZEN: |
2060 | case CPU_DEAD: | 2060 | case CPU_DOWN_PREPARE: |
2061 | case CPU_DEAD_FROZEN: | 2061 | case CPU_DOWN_PREPARE_FROZEN: |
2062 | case CPU_DOWN_FAILED: | ||
2063 | case CPU_DOWN_FAILED_FROZEN: | ||
2062 | break; | 2064 | break; |
2063 | 2065 | ||
2064 | default: | 2066 | default: |
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2067 | 2069 | ||
2068 | cgroup_lock(); | 2070 | cgroup_lock(); |
2069 | mutex_lock(&callback_mutex); | 2071 | mutex_lock(&callback_mutex); |
2070 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); | 2072 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2071 | mutex_unlock(&callback_mutex); | 2073 | mutex_unlock(&callback_mutex); |
2072 | scan_for_empty_cpusets(&top_cpuset); | 2074 | scan_for_empty_cpusets(&top_cpuset); |
2073 | ndoms = generate_sched_domains(&doms, &attr); | 2075 | ndoms = generate_sched_domains(&doms, &attr); |
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2114 | 2116 | ||
2115 | void __init cpuset_init_smp(void) | 2117 | void __init cpuset_init_smp(void) |
2116 | { | 2118 | { |
2117 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); | 2119 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2118 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2120 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2119 | 2121 | ||
2120 | hotcpu_notifier(cpuset_track_online_cpus, 0); | 2122 | hotcpu_notifier(cpuset_track_online_cpus, 0); |
diff --git a/kernel/sched.c b/kernel/sched.c index e7f2cfa6a257..ff39cadf621e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -814,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
814 | * default: 0.25ms | 814 | * default: 0.25ms |
815 | */ | 815 | */ |
816 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 816 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
817 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
817 | 818 | ||
818 | /* | 819 | /* |
819 | * Inject some fuzzyness into changing the per-cpu group shares | 820 | * Inject some fuzzyness into changing the per-cpu group shares |
@@ -1614,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1614 | */ | 1615 | */ |
1615 | static int tg_shares_up(struct task_group *tg, void *data) | 1616 | static int tg_shares_up(struct task_group *tg, void *data) |
1616 | { | 1617 | { |
1617 | unsigned long weight, rq_weight = 0, shares = 0; | 1618 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; |
1618 | unsigned long *usd_rq_weight; | 1619 | unsigned long *usd_rq_weight; |
1619 | struct sched_domain *sd = data; | 1620 | struct sched_domain *sd = data; |
1620 | unsigned long flags; | 1621 | unsigned long flags; |
@@ -1630,6 +1631,7 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1630 | weight = tg->cfs_rq[i]->load.weight; | 1631 | weight = tg->cfs_rq[i]->load.weight; |
1631 | usd_rq_weight[i] = weight; | 1632 | usd_rq_weight[i] = weight; |
1632 | 1633 | ||
1634 | rq_weight += weight; | ||
1633 | /* | 1635 | /* |
1634 | * If there are currently no tasks on the cpu pretend there | 1636 | * If there are currently no tasks on the cpu pretend there |
1635 | * is one of average load so that when a new task gets to | 1637 | * is one of average load so that when a new task gets to |
@@ -1638,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1638 | if (!weight) | 1640 | if (!weight) |
1639 | weight = NICE_0_LOAD; | 1641 | weight = NICE_0_LOAD; |
1640 | 1642 | ||
1641 | rq_weight += weight; | 1643 | sum_weight += weight; |
1642 | shares += tg->cfs_rq[i]->shares; | 1644 | shares += tg->cfs_rq[i]->shares; |
1643 | } | 1645 | } |
1644 | 1646 | ||
1647 | if (!rq_weight) | ||
1648 | rq_weight = sum_weight; | ||
1649 | |||
1645 | if ((!shares && rq_weight) || shares > tg->shares) | 1650 | if ((!shares && rq_weight) || shares > tg->shares) |
1646 | shares = tg->shares; | 1651 | shares = tg->shares; |
1647 | 1652 | ||
@@ -1810,6 +1815,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1810 | #endif | 1815 | #endif |
1811 | 1816 | ||
1812 | static void calc_load_account_active(struct rq *this_rq); | 1817 | static void calc_load_account_active(struct rq *this_rq); |
1818 | static void update_sysctl(void); | ||
1819 | static int get_update_sysctl_factor(void); | ||
1820 | |||
1821 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1822 | { | ||
1823 | set_task_rq(p, cpu); | ||
1824 | #ifdef CONFIG_SMP | ||
1825 | /* | ||
1826 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1827 | * successfuly executed on another CPU. We must ensure that updates of | ||
1828 | * per-task data have been completed by this moment. | ||
1829 | */ | ||
1830 | smp_wmb(); | ||
1831 | task_thread_info(p)->cpu = cpu; | ||
1832 | #endif | ||
1833 | } | ||
1813 | 1834 | ||
1814 | #include "sched_stats.h" | 1835 | #include "sched_stats.h" |
1815 | #include "sched_idletask.c" | 1836 | #include "sched_idletask.c" |
@@ -1967,20 +1988,6 @@ inline int task_curr(const struct task_struct *p) | |||
1967 | return cpu_curr(task_cpu(p)) == p; | 1988 | return cpu_curr(task_cpu(p)) == p; |
1968 | } | 1989 | } |
1969 | 1990 | ||
1970 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1971 | { | ||
1972 | set_task_rq(p, cpu); | ||
1973 | #ifdef CONFIG_SMP | ||
1974 | /* | ||
1975 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1976 | * successfuly executed on another CPU. We must ensure that updates of | ||
1977 | * per-task data have been completed by this moment. | ||
1978 | */ | ||
1979 | smp_wmb(); | ||
1980 | task_thread_info(p)->cpu = cpu; | ||
1981 | #endif | ||
1982 | } | ||
1983 | |||
1984 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 1991 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
1985 | const struct sched_class *prev_class, | 1992 | const struct sched_class *prev_class, |
1986 | int oldprio, int running) | 1993 | int oldprio, int running) |
@@ -2060,29 +2067,13 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2060 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 2067 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
2061 | { | 2068 | { |
2062 | int old_cpu = task_cpu(p); | 2069 | int old_cpu = task_cpu(p); |
2063 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); | ||
2064 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2070 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), |
2065 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2071 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); |
2066 | u64 clock_offset; | ||
2067 | |||
2068 | clock_offset = old_rq->clock - new_rq->clock; | ||
2069 | 2072 | ||
2070 | trace_sched_migrate_task(p, new_cpu); | 2073 | trace_sched_migrate_task(p, new_cpu); |
2071 | 2074 | ||
2072 | #ifdef CONFIG_SCHEDSTATS | ||
2073 | if (p->se.wait_start) | ||
2074 | p->se.wait_start -= clock_offset; | ||
2075 | if (p->se.sleep_start) | ||
2076 | p->se.sleep_start -= clock_offset; | ||
2077 | if (p->se.block_start) | ||
2078 | p->se.block_start -= clock_offset; | ||
2079 | #endif | ||
2080 | if (old_cpu != new_cpu) { | 2075 | if (old_cpu != new_cpu) { |
2081 | p->se.nr_migrations++; | 2076 | p->se.nr_migrations++; |
2082 | #ifdef CONFIG_SCHEDSTATS | ||
2083 | if (task_hot(p, old_rq->clock, NULL)) | ||
2084 | schedstat_inc(p, se.nr_forced2_migrations); | ||
2085 | #endif | ||
2086 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, | 2077 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, |
2087 | 1, 1, NULL, 0); | 2078 | 1, 1, NULL, 0); |
2088 | } | 2079 | } |
@@ -2323,6 +2314,14 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2323 | preempt_enable(); | 2314 | preempt_enable(); |
2324 | } | 2315 | } |
2325 | 2316 | ||
2317 | #ifdef CONFIG_SMP | ||
2318 | static inline | ||
2319 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | ||
2320 | { | ||
2321 | return p->sched_class->select_task_rq(p, sd_flags, wake_flags); | ||
2322 | } | ||
2323 | #endif | ||
2324 | |||
2326 | /*** | 2325 | /*** |
2327 | * try_to_wake_up - wake up a thread | 2326 | * try_to_wake_up - wake up a thread |
2328 | * @p: the to-be-woken-up thread | 2327 | * @p: the to-be-woken-up thread |
@@ -2374,17 +2373,14 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2374 | if (task_contributes_to_load(p)) | 2373 | if (task_contributes_to_load(p)) |
2375 | rq->nr_uninterruptible--; | 2374 | rq->nr_uninterruptible--; |
2376 | p->state = TASK_WAKING; | 2375 | p->state = TASK_WAKING; |
2377 | task_rq_unlock(rq, &flags); | 2376 | __task_rq_unlock(rq); |
2378 | 2377 | ||
2379 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2378 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2380 | if (cpu != orig_cpu) { | 2379 | if (cpu != orig_cpu) |
2381 | local_irq_save(flags); | ||
2382 | rq = cpu_rq(cpu); | ||
2383 | update_rq_clock(rq); | ||
2384 | set_task_cpu(p, cpu); | 2380 | set_task_cpu(p, cpu); |
2385 | local_irq_restore(flags); | 2381 | |
2386 | } | 2382 | rq = __task_rq_lock(p); |
2387 | rq = task_rq_lock(p, &flags); | 2383 | update_rq_clock(rq); |
2388 | 2384 | ||
2389 | WARN_ON(p->state != TASK_WAKING); | 2385 | WARN_ON(p->state != TASK_WAKING); |
2390 | cpu = task_cpu(p); | 2386 | cpu = task_cpu(p); |
@@ -2499,7 +2495,6 @@ static void __sched_fork(struct task_struct *p) | |||
2499 | p->se.avg_overlap = 0; | 2495 | p->se.avg_overlap = 0; |
2500 | p->se.start_runtime = 0; | 2496 | p->se.start_runtime = 0; |
2501 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2497 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2502 | p->se.avg_running = 0; | ||
2503 | 2498 | ||
2504 | #ifdef CONFIG_SCHEDSTATS | 2499 | #ifdef CONFIG_SCHEDSTATS |
2505 | p->se.wait_start = 0; | 2500 | p->se.wait_start = 0; |
@@ -2521,7 +2516,6 @@ static void __sched_fork(struct task_struct *p) | |||
2521 | p->se.nr_failed_migrations_running = 0; | 2516 | p->se.nr_failed_migrations_running = 0; |
2522 | p->se.nr_failed_migrations_hot = 0; | 2517 | p->se.nr_failed_migrations_hot = 0; |
2523 | p->se.nr_forced_migrations = 0; | 2518 | p->se.nr_forced_migrations = 0; |
2524 | p->se.nr_forced2_migrations = 0; | ||
2525 | 2519 | ||
2526 | p->se.nr_wakeups = 0; | 2520 | p->se.nr_wakeups = 0; |
2527 | p->se.nr_wakeups_sync = 0; | 2521 | p->se.nr_wakeups_sync = 0; |
@@ -2558,7 +2552,6 @@ static void __sched_fork(struct task_struct *p) | |||
2558 | void sched_fork(struct task_struct *p, int clone_flags) | 2552 | void sched_fork(struct task_struct *p, int clone_flags) |
2559 | { | 2553 | { |
2560 | int cpu = get_cpu(); | 2554 | int cpu = get_cpu(); |
2561 | unsigned long flags; | ||
2562 | 2555 | ||
2563 | __sched_fork(p); | 2556 | __sched_fork(p); |
2564 | 2557 | ||
@@ -2592,13 +2585,13 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2592 | if (!rt_prio(p->prio)) | 2585 | if (!rt_prio(p->prio)) |
2593 | p->sched_class = &fair_sched_class; | 2586 | p->sched_class = &fair_sched_class; |
2594 | 2587 | ||
2588 | if (p->sched_class->task_fork) | ||
2589 | p->sched_class->task_fork(p); | ||
2590 | |||
2595 | #ifdef CONFIG_SMP | 2591 | #ifdef CONFIG_SMP |
2596 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | 2592 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); |
2597 | #endif | 2593 | #endif |
2598 | local_irq_save(flags); | ||
2599 | update_rq_clock(cpu_rq(cpu)); | ||
2600 | set_task_cpu(p, cpu); | 2594 | set_task_cpu(p, cpu); |
2601 | local_irq_restore(flags); | ||
2602 | 2595 | ||
2603 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2596 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2604 | if (likely(sched_info_on())) | 2597 | if (likely(sched_info_on())) |
@@ -2631,17 +2624,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2631 | rq = task_rq_lock(p, &flags); | 2624 | rq = task_rq_lock(p, &flags); |
2632 | BUG_ON(p->state != TASK_RUNNING); | 2625 | BUG_ON(p->state != TASK_RUNNING); |
2633 | update_rq_clock(rq); | 2626 | update_rq_clock(rq); |
2634 | 2627 | activate_task(rq, p, 0); | |
2635 | if (!p->sched_class->task_new || !current->se.on_rq) { | ||
2636 | activate_task(rq, p, 0); | ||
2637 | } else { | ||
2638 | /* | ||
2639 | * Let the scheduling class do new task startup | ||
2640 | * management (if any): | ||
2641 | */ | ||
2642 | p->sched_class->task_new(rq, p); | ||
2643 | inc_nr_running(rq); | ||
2644 | } | ||
2645 | trace_sched_wakeup_new(rq, p, 1); | 2628 | trace_sched_wakeup_new(rq, p, 1); |
2646 | check_preempt_curr(rq, p, WF_FORK); | 2629 | check_preempt_curr(rq, p, WF_FORK); |
2647 | #ifdef CONFIG_SMP | 2630 | #ifdef CONFIG_SMP |
@@ -3156,7 +3139,7 @@ out: | |||
3156 | void sched_exec(void) | 3139 | void sched_exec(void) |
3157 | { | 3140 | { |
3158 | int new_cpu, this_cpu = get_cpu(); | 3141 | int new_cpu, this_cpu = get_cpu(); |
3159 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); | 3142 | new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); |
3160 | put_cpu(); | 3143 | put_cpu(); |
3161 | if (new_cpu != this_cpu) | 3144 | if (new_cpu != this_cpu) |
3162 | sched_migrate_task(current, new_cpu); | 3145 | sched_migrate_task(current, new_cpu); |
@@ -3172,10 +3155,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
3172 | deactivate_task(src_rq, p, 0); | 3155 | deactivate_task(src_rq, p, 0); |
3173 | set_task_cpu(p, this_cpu); | 3156 | set_task_cpu(p, this_cpu); |
3174 | activate_task(this_rq, p, 0); | 3157 | activate_task(this_rq, p, 0); |
3175 | /* | ||
3176 | * Note that idle threads have a prio of MAX_PRIO, for this test | ||
3177 | * to be always true for them. | ||
3178 | */ | ||
3179 | check_preempt_curr(this_rq, p, 0); | 3158 | check_preempt_curr(this_rq, p, 0); |
3180 | } | 3159 | } |
3181 | 3160 | ||
@@ -4134,7 +4113,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4134 | unsigned long flags; | 4113 | unsigned long flags; |
4135 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4114 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4136 | 4115 | ||
4137 | cpumask_copy(cpus, cpu_online_mask); | 4116 | cpumask_copy(cpus, cpu_active_mask); |
4138 | 4117 | ||
4139 | /* | 4118 | /* |
4140 | * When power savings policy is enabled for the parent domain, idle | 4119 | * When power savings policy is enabled for the parent domain, idle |
@@ -4297,7 +4276,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
4297 | int all_pinned = 0; | 4276 | int all_pinned = 0; |
4298 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4277 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4299 | 4278 | ||
4300 | cpumask_copy(cpus, cpu_online_mask); | 4279 | cpumask_copy(cpus, cpu_active_mask); |
4301 | 4280 | ||
4302 | /* | 4281 | /* |
4303 | * When power savings policy is enabled for the parent domain, idle | 4282 | * When power savings policy is enabled for the parent domain, idle |
@@ -4694,7 +4673,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
4694 | cpumask_set_cpu(cpu, nohz.cpu_mask); | 4673 | cpumask_set_cpu(cpu, nohz.cpu_mask); |
4695 | 4674 | ||
4696 | /* time for ilb owner also to sleep */ | 4675 | /* time for ilb owner also to sleep */ |
4697 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 4676 | if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) { |
4698 | if (atomic_read(&nohz.load_balancer) == cpu) | 4677 | if (atomic_read(&nohz.load_balancer) == cpu) |
4699 | atomic_set(&nohz.load_balancer, -1); | 4678 | atomic_set(&nohz.load_balancer, -1); |
4700 | return 0; | 4679 | return 0; |
@@ -5396,13 +5375,14 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5396 | #endif | 5375 | #endif |
5397 | } | 5376 | } |
5398 | 5377 | ||
5399 | static void put_prev_task(struct rq *rq, struct task_struct *p) | 5378 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
5400 | { | 5379 | { |
5401 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; | 5380 | if (prev->state == TASK_RUNNING) { |
5381 | u64 runtime = prev->se.sum_exec_runtime; | ||
5402 | 5382 | ||
5403 | update_avg(&p->se.avg_running, runtime); | 5383 | runtime -= prev->se.prev_sum_exec_runtime; |
5384 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5404 | 5385 | ||
5405 | if (p->state == TASK_RUNNING) { | ||
5406 | /* | 5386 | /* |
5407 | * In order to avoid avg_overlap growing stale when we are | 5387 | * In order to avoid avg_overlap growing stale when we are |
5408 | * indeed overlapping and hence not getting put to sleep, grow | 5388 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5412,12 +5392,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p) | |||
5412 | * correlates to the amount of cache footprint a task can | 5392 | * correlates to the amount of cache footprint a task can |
5413 | * build up. | 5393 | * build up. |
5414 | */ | 5394 | */ |
5415 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | 5395 | update_avg(&prev->se.avg_overlap, runtime); |
5416 | update_avg(&p->se.avg_overlap, runtime); | ||
5417 | } else { | ||
5418 | update_avg(&p->se.avg_running, 0); | ||
5419 | } | 5396 | } |
5420 | p->sched_class->put_prev_task(rq, p); | 5397 | prev->sched_class->put_prev_task(rq, prev); |
5421 | } | 5398 | } |
5422 | 5399 | ||
5423 | /* | 5400 | /* |
@@ -6631,6 +6608,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, | |||
6631 | long sched_getaffinity(pid_t pid, struct cpumask *mask) | 6608 | long sched_getaffinity(pid_t pid, struct cpumask *mask) |
6632 | { | 6609 | { |
6633 | struct task_struct *p; | 6610 | struct task_struct *p; |
6611 | unsigned long flags; | ||
6612 | struct rq *rq; | ||
6634 | int retval; | 6613 | int retval; |
6635 | 6614 | ||
6636 | get_online_cpus(); | 6615 | get_online_cpus(); |
@@ -6645,7 +6624,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6645 | if (retval) | 6624 | if (retval) |
6646 | goto out_unlock; | 6625 | goto out_unlock; |
6647 | 6626 | ||
6627 | rq = task_rq_lock(p, &flags); | ||
6648 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 6628 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
6629 | task_rq_unlock(rq, &flags); | ||
6649 | 6630 | ||
6650 | out_unlock: | 6631 | out_unlock: |
6651 | read_unlock(&tasklist_lock); | 6632 | read_unlock(&tasklist_lock); |
@@ -6883,6 +6864,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6883 | { | 6864 | { |
6884 | struct task_struct *p; | 6865 | struct task_struct *p; |
6885 | unsigned int time_slice; | 6866 | unsigned int time_slice; |
6867 | unsigned long flags; | ||
6868 | struct rq *rq; | ||
6886 | int retval; | 6869 | int retval; |
6887 | struct timespec t; | 6870 | struct timespec t; |
6888 | 6871 | ||
@@ -6899,7 +6882,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6899 | if (retval) | 6882 | if (retval) |
6900 | goto out_unlock; | 6883 | goto out_unlock; |
6901 | 6884 | ||
6902 | time_slice = p->sched_class->get_rr_interval(p); | 6885 | rq = task_rq_lock(p, &flags); |
6886 | time_slice = p->sched_class->get_rr_interval(rq, p); | ||
6887 | task_rq_unlock(rq, &flags); | ||
6903 | 6888 | ||
6904 | read_unlock(&tasklist_lock); | 6889 | read_unlock(&tasklist_lock); |
6905 | jiffies_to_timespec(time_slice, &t); | 6890 | jiffies_to_timespec(time_slice, &t); |
@@ -7000,7 +6985,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
7000 | __sched_fork(idle); | 6985 | __sched_fork(idle); |
7001 | idle->se.exec_start = sched_clock(); | 6986 | idle->se.exec_start = sched_clock(); |
7002 | 6987 | ||
7003 | idle->prio = idle->normal_prio = MAX_PRIO; | ||
7004 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 6988 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
7005 | __set_task_cpu(idle, cpu); | 6989 | __set_task_cpu(idle, cpu); |
7006 | 6990 | ||
@@ -7041,22 +7025,43 @@ cpumask_var_t nohz_cpu_mask; | |||
7041 | * | 7025 | * |
7042 | * This idea comes from the SD scheduler of Con Kolivas: | 7026 | * This idea comes from the SD scheduler of Con Kolivas: |
7043 | */ | 7027 | */ |
7044 | static inline void sched_init_granularity(void) | 7028 | static int get_update_sysctl_factor(void) |
7045 | { | 7029 | { |
7046 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 7030 | unsigned int cpus = min_t(int, num_online_cpus(), 8); |
7047 | const unsigned long limit = 200000000; | 7031 | unsigned int factor; |
7032 | |||
7033 | switch (sysctl_sched_tunable_scaling) { | ||
7034 | case SCHED_TUNABLESCALING_NONE: | ||
7035 | factor = 1; | ||
7036 | break; | ||
7037 | case SCHED_TUNABLESCALING_LINEAR: | ||
7038 | factor = cpus; | ||
7039 | break; | ||
7040 | case SCHED_TUNABLESCALING_LOG: | ||
7041 | default: | ||
7042 | factor = 1 + ilog2(cpus); | ||
7043 | break; | ||
7044 | } | ||
7048 | 7045 | ||
7049 | sysctl_sched_min_granularity *= factor; | 7046 | return factor; |
7050 | if (sysctl_sched_min_granularity > limit) | 7047 | } |
7051 | sysctl_sched_min_granularity = limit; | ||
7052 | 7048 | ||
7053 | sysctl_sched_latency *= factor; | 7049 | static void update_sysctl(void) |
7054 | if (sysctl_sched_latency > limit) | 7050 | { |
7055 | sysctl_sched_latency = limit; | 7051 | unsigned int factor = get_update_sysctl_factor(); |
7056 | 7052 | ||
7057 | sysctl_sched_wakeup_granularity *= factor; | 7053 | #define SET_SYSCTL(name) \ |
7054 | (sysctl_##name = (factor) * normalized_sysctl_##name) | ||
7055 | SET_SYSCTL(sched_min_granularity); | ||
7056 | SET_SYSCTL(sched_latency); | ||
7057 | SET_SYSCTL(sched_wakeup_granularity); | ||
7058 | SET_SYSCTL(sched_shares_ratelimit); | ||
7059 | #undef SET_SYSCTL | ||
7060 | } | ||
7058 | 7061 | ||
7059 | sysctl_sched_shares_ratelimit *= factor; | 7062 | static inline void sched_init_granularity(void) |
7063 | { | ||
7064 | update_sysctl(); | ||
7060 | } | 7065 | } |
7061 | 7066 | ||
7062 | #ifdef CONFIG_SMP | 7067 | #ifdef CONFIG_SMP |
@@ -7093,7 +7098,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7093 | int ret = 0; | 7098 | int ret = 0; |
7094 | 7099 | ||
7095 | rq = task_rq_lock(p, &flags); | 7100 | rq = task_rq_lock(p, &flags); |
7096 | if (!cpumask_intersects(new_mask, cpu_online_mask)) { | 7101 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7097 | ret = -EINVAL; | 7102 | ret = -EINVAL; |
7098 | goto out; | 7103 | goto out; |
7099 | } | 7104 | } |
@@ -7115,7 +7120,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7115 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 7120 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
7116 | goto out; | 7121 | goto out; |
7117 | 7122 | ||
7118 | if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { | 7123 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { |
7119 | /* Need help from migration thread: drop lock and wait. */ | 7124 | /* Need help from migration thread: drop lock and wait. */ |
7120 | struct task_struct *mt = rq->migration_thread; | 7125 | struct task_struct *mt = rq->migration_thread; |
7121 | 7126 | ||
@@ -7269,19 +7274,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
7269 | 7274 | ||
7270 | again: | 7275 | again: |
7271 | /* Look for allowed, online CPU in same node. */ | 7276 | /* Look for allowed, online CPU in same node. */ |
7272 | for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) | 7277 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) |
7273 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7278 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7274 | goto move; | 7279 | goto move; |
7275 | 7280 | ||
7276 | /* Any allowed, online CPU? */ | 7281 | /* Any allowed, online CPU? */ |
7277 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); | 7282 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); |
7278 | if (dest_cpu < nr_cpu_ids) | 7283 | if (dest_cpu < nr_cpu_ids) |
7279 | goto move; | 7284 | goto move; |
7280 | 7285 | ||
7281 | /* No more Mr. Nice Guy. */ | 7286 | /* No more Mr. Nice Guy. */ |
7282 | if (dest_cpu >= nr_cpu_ids) { | 7287 | if (dest_cpu >= nr_cpu_ids) { |
7283 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | 7288 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); |
7284 | dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); | 7289 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); |
7285 | 7290 | ||
7286 | /* | 7291 | /* |
7287 | * Don't tell them about moving exiting tasks or | 7292 | * Don't tell them about moving exiting tasks or |
@@ -7310,7 +7315,7 @@ move: | |||
7310 | */ | 7315 | */ |
7311 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 7316 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
7312 | { | 7317 | { |
7313 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); | 7318 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
7314 | unsigned long flags; | 7319 | unsigned long flags; |
7315 | 7320 | ||
7316 | local_irq_save(flags); | 7321 | local_irq_save(flags); |
@@ -7563,7 +7568,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
7563 | static struct ctl_table_header *sd_sysctl_header; | 7568 | static struct ctl_table_header *sd_sysctl_header; |
7564 | static void register_sched_domain_sysctl(void) | 7569 | static void register_sched_domain_sysctl(void) |
7565 | { | 7570 | { |
7566 | int i, cpu_num = num_online_cpus(); | 7571 | int i, cpu_num = num_possible_cpus(); |
7567 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | 7572 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); |
7568 | char buf[32]; | 7573 | char buf[32]; |
7569 | 7574 | ||
@@ -7573,7 +7578,7 @@ static void register_sched_domain_sysctl(void) | |||
7573 | if (entry == NULL) | 7578 | if (entry == NULL) |
7574 | return; | 7579 | return; |
7575 | 7580 | ||
7576 | for_each_online_cpu(i) { | 7581 | for_each_possible_cpu(i) { |
7577 | snprintf(buf, 32, "cpu%d", i); | 7582 | snprintf(buf, 32, "cpu%d", i); |
7578 | entry->procname = kstrdup(buf, GFP_KERNEL); | 7583 | entry->procname = kstrdup(buf, GFP_KERNEL); |
7579 | entry->mode = 0555; | 7584 | entry->mode = 0555; |
@@ -7703,7 +7708,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7703 | spin_lock_irq(&rq->lock); | 7708 | spin_lock_irq(&rq->lock); |
7704 | update_rq_clock(rq); | 7709 | update_rq_clock(rq); |
7705 | deactivate_task(rq, rq->idle, 0); | 7710 | deactivate_task(rq, rq->idle, 0); |
7706 | rq->idle->static_prio = MAX_PRIO; | ||
7707 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | 7711 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); |
7708 | rq->idle->sched_class = &idle_sched_class; | 7712 | rq->idle->sched_class = &idle_sched_class; |
7709 | migrate_dead_tasks(cpu); | 7713 | migrate_dead_tasks(cpu); |
@@ -9099,7 +9103,7 @@ match1: | |||
9099 | if (doms_new == NULL) { | 9103 | if (doms_new == NULL) { |
9100 | ndoms_cur = 0; | 9104 | ndoms_cur = 0; |
9101 | doms_new = &fallback_doms; | 9105 | doms_new = &fallback_doms; |
9102 | cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); | 9106 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); |
9103 | WARN_ON_ONCE(dattr_new); | 9107 | WARN_ON_ONCE(dattr_new); |
9104 | } | 9108 | } |
9105 | 9109 | ||
@@ -9230,8 +9234,10 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
9230 | switch (action) { | 9234 | switch (action) { |
9231 | case CPU_ONLINE: | 9235 | case CPU_ONLINE: |
9232 | case CPU_ONLINE_FROZEN: | 9236 | case CPU_ONLINE_FROZEN: |
9233 | case CPU_DEAD: | 9237 | case CPU_DOWN_PREPARE: |
9234 | case CPU_DEAD_FROZEN: | 9238 | case CPU_DOWN_PREPARE_FROZEN: |
9239 | case CPU_DOWN_FAILED: | ||
9240 | case CPU_DOWN_FAILED_FROZEN: | ||
9235 | partition_sched_domains(1, NULL, NULL); | 9241 | partition_sched_domains(1, NULL, NULL); |
9236 | return NOTIFY_OK; | 9242 | return NOTIFY_OK; |
9237 | 9243 | ||
@@ -9278,7 +9284,7 @@ void __init sched_init_smp(void) | |||
9278 | #endif | 9284 | #endif |
9279 | get_online_cpus(); | 9285 | get_online_cpus(); |
9280 | mutex_lock(&sched_domains_mutex); | 9286 | mutex_lock(&sched_domains_mutex); |
9281 | arch_init_sched_domains(cpu_online_mask); | 9287 | arch_init_sched_domains(cpu_active_mask); |
9282 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 9288 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); |
9283 | if (cpumask_empty(non_isolated_cpus)) | 9289 | if (cpumask_empty(non_isolated_cpus)) |
9284 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 9290 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |
@@ -9842,13 +9848,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
9842 | se = kzalloc_node(sizeof(struct sched_entity), | 9848 | se = kzalloc_node(sizeof(struct sched_entity), |
9843 | GFP_KERNEL, cpu_to_node(i)); | 9849 | GFP_KERNEL, cpu_to_node(i)); |
9844 | if (!se) | 9850 | if (!se) |
9845 | goto err; | 9851 | goto err_free_rq; |
9846 | 9852 | ||
9847 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 9853 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); |
9848 | } | 9854 | } |
9849 | 9855 | ||
9850 | return 1; | 9856 | return 1; |
9851 | 9857 | ||
9858 | err_free_rq: | ||
9859 | kfree(cfs_rq); | ||
9852 | err: | 9860 | err: |
9853 | return 0; | 9861 | return 0; |
9854 | } | 9862 | } |
@@ -9930,13 +9938,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
9930 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | 9938 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), |
9931 | GFP_KERNEL, cpu_to_node(i)); | 9939 | GFP_KERNEL, cpu_to_node(i)); |
9932 | if (!rt_se) | 9940 | if (!rt_se) |
9933 | goto err; | 9941 | goto err_free_rq; |
9934 | 9942 | ||
9935 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 9943 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); |
9936 | } | 9944 | } |
9937 | 9945 | ||
9938 | return 1; | 9946 | return 1; |
9939 | 9947 | ||
9948 | err_free_rq: | ||
9949 | kfree(rt_rq); | ||
9940 | err: | 9950 | err: |
9941 | return 0; | 9951 | return 0; |
9942 | } | 9952 | } |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 6988cf08f705..5ae24fc65d75 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -309,6 +309,12 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
309 | print_rq(m, rq, cpu); | 309 | print_rq(m, rq, cpu); |
310 | } | 310 | } |
311 | 311 | ||
312 | static const char *sched_tunable_scaling_names[] = { | ||
313 | "none", | ||
314 | "logaritmic", | ||
315 | "linear" | ||
316 | }; | ||
317 | |||
312 | static int sched_debug_show(struct seq_file *m, void *v) | 318 | static int sched_debug_show(struct seq_file *m, void *v) |
313 | { | 319 | { |
314 | u64 now = ktime_to_ns(ktime_get()); | 320 | u64 now = ktime_to_ns(ktime_get()); |
@@ -334,6 +340,10 @@ static int sched_debug_show(struct seq_file *m, void *v) | |||
334 | #undef PN | 340 | #undef PN |
335 | #undef P | 341 | #undef P |
336 | 342 | ||
343 | SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling", | ||
344 | sysctl_sched_tunable_scaling, | ||
345 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); | ||
346 | |||
337 | for_each_online_cpu(cpu) | 347 | for_each_online_cpu(cpu) |
338 | print_cpu(m, cpu); | 348 | print_cpu(m, cpu); |
339 | 349 | ||
@@ -399,7 +409,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
399 | PN(se.sum_exec_runtime); | 409 | PN(se.sum_exec_runtime); |
400 | PN(se.avg_overlap); | 410 | PN(se.avg_overlap); |
401 | PN(se.avg_wakeup); | 411 | PN(se.avg_wakeup); |
402 | PN(se.avg_running); | ||
403 | 412 | ||
404 | nr_switches = p->nvcsw + p->nivcsw; | 413 | nr_switches = p->nvcsw + p->nivcsw; |
405 | 414 | ||
@@ -423,7 +432,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
423 | P(se.nr_failed_migrations_running); | 432 | P(se.nr_failed_migrations_running); |
424 | P(se.nr_failed_migrations_hot); | 433 | P(se.nr_failed_migrations_hot); |
425 | P(se.nr_forced_migrations); | 434 | P(se.nr_forced_migrations); |
426 | P(se.nr_forced2_migrations); | ||
427 | P(se.nr_wakeups); | 435 | P(se.nr_wakeups); |
428 | P(se.nr_wakeups_sync); | 436 | P(se.nr_wakeups_sync); |
429 | P(se.nr_wakeups_migrate); | 437 | P(se.nr_wakeups_migrate); |
@@ -499,7 +507,6 @@ void proc_sched_set_task(struct task_struct *p) | |||
499 | p->se.nr_failed_migrations_running = 0; | 507 | p->se.nr_failed_migrations_running = 0; |
500 | p->se.nr_failed_migrations_hot = 0; | 508 | p->se.nr_failed_migrations_hot = 0; |
501 | p->se.nr_forced_migrations = 0; | 509 | p->se.nr_forced_migrations = 0; |
502 | p->se.nr_forced2_migrations = 0; | ||
503 | p->se.nr_wakeups = 0; | 510 | p->se.nr_wakeups = 0; |
504 | p->se.nr_wakeups_sync = 0; | 511 | p->se.nr_wakeups_sync = 0; |
505 | p->se.nr_wakeups_migrate = 0; | 512 | p->se.nr_wakeups_migrate = 0; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f61837ad336d..804a411838f1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -21,6 +21,7 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/latencytop.h> | 23 | #include <linux/latencytop.h> |
24 | #include <linux/sched.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Targeted preemption latency for CPU-bound tasks: | 27 | * Targeted preemption latency for CPU-bound tasks: |
@@ -35,12 +36,26 @@ | |||
35 | * run vmstat and monitor the context-switches (cs) field) | 36 | * run vmstat and monitor the context-switches (cs) field) |
36 | */ | 37 | */ |
37 | unsigned int sysctl_sched_latency = 5000000ULL; | 38 | unsigned int sysctl_sched_latency = 5000000ULL; |
39 | unsigned int normalized_sysctl_sched_latency = 5000000ULL; | ||
40 | |||
41 | /* | ||
42 | * The initial- and re-scaling of tunables is configurable | ||
43 | * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) | ||
44 | * | ||
45 | * Options are: | ||
46 | * SCHED_TUNABLESCALING_NONE - unscaled, always *1 | ||
47 | * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) | ||
48 | * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus | ||
49 | */ | ||
50 | enum sched_tunable_scaling sysctl_sched_tunable_scaling | ||
51 | = SCHED_TUNABLESCALING_LOG; | ||
38 | 52 | ||
39 | /* | 53 | /* |
40 | * Minimal preemption granularity for CPU-bound tasks: | 54 | * Minimal preemption granularity for CPU-bound tasks: |
41 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) | 55 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
42 | */ | 56 | */ |
43 | unsigned int sysctl_sched_min_granularity = 1000000ULL; | 57 | unsigned int sysctl_sched_min_granularity = 1000000ULL; |
58 | unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL; | ||
44 | 59 | ||
45 | /* | 60 | /* |
46 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity | 61 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity |
@@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield; | |||
70 | * have immediate wakeup/sleep latencies. | 85 | * have immediate wakeup/sleep latencies. |
71 | */ | 86 | */ |
72 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; | 87 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; |
88 | unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; | ||
73 | 89 | ||
74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 90 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
75 | 91 | ||
@@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
383 | */ | 399 | */ |
384 | 400 | ||
385 | #ifdef CONFIG_SCHED_DEBUG | 401 | #ifdef CONFIG_SCHED_DEBUG |
386 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 402 | int sched_proc_update_handler(struct ctl_table *table, int write, |
387 | void __user *buffer, size_t *lenp, | 403 | void __user *buffer, size_t *lenp, |
388 | loff_t *ppos) | 404 | loff_t *ppos) |
389 | { | 405 | { |
390 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 406 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
407 | int factor = get_update_sysctl_factor(); | ||
391 | 408 | ||
392 | if (ret || !write) | 409 | if (ret || !write) |
393 | return ret; | 410 | return ret; |
@@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
395 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, | 412 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, |
396 | sysctl_sched_min_granularity); | 413 | sysctl_sched_min_granularity); |
397 | 414 | ||
415 | #define WRT_SYSCTL(name) \ | ||
416 | (normalized_sysctl_##name = sysctl_##name / (factor)) | ||
417 | WRT_SYSCTL(sched_min_granularity); | ||
418 | WRT_SYSCTL(sched_latency); | ||
419 | WRT_SYSCTL(sched_wakeup_granularity); | ||
420 | WRT_SYSCTL(sched_shares_ratelimit); | ||
421 | #undef WRT_SYSCTL | ||
422 | |||
398 | return 0; | 423 | return 0; |
399 | } | 424 | } |
400 | #endif | 425 | #endif |
@@ -1403,7 +1428,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1403 | new_cpu = prev_cpu; | 1428 | new_cpu = prev_cpu; |
1404 | } | 1429 | } |
1405 | 1430 | ||
1406 | rcu_read_lock(); | ||
1407 | for_each_domain(cpu, tmp) { | 1431 | for_each_domain(cpu, tmp) { |
1408 | /* | 1432 | /* |
1409 | * If power savings logic is enabled for a domain, see if we | 1433 | * If power savings logic is enabled for a domain, see if we |
@@ -1484,10 +1508,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1484 | update_shares(tmp); | 1508 | update_shares(tmp); |
1485 | } | 1509 | } |
1486 | 1510 | ||
1487 | if (affine_sd && wake_affine(affine_sd, p, sync)) { | 1511 | if (affine_sd && wake_affine(affine_sd, p, sync)) |
1488 | new_cpu = cpu; | 1512 | return cpu; |
1489 | goto out; | ||
1490 | } | ||
1491 | 1513 | ||
1492 | while (sd) { | 1514 | while (sd) { |
1493 | int load_idx = sd->forkexec_idx; | 1515 | int load_idx = sd->forkexec_idx; |
@@ -1528,8 +1550,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1528 | /* while loop will break here if sd == NULL */ | 1550 | /* while loop will break here if sd == NULL */ |
1529 | } | 1551 | } |
1530 | 1552 | ||
1531 | out: | ||
1532 | rcu_read_unlock(); | ||
1533 | return new_cpu; | 1553 | return new_cpu; |
1534 | } | 1554 | } |
1535 | #endif /* CONFIG_SMP */ | 1555 | #endif /* CONFIG_SMP */ |
@@ -1651,12 +1671,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1651 | int sync = wake_flags & WF_SYNC; | 1671 | int sync = wake_flags & WF_SYNC; |
1652 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1672 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
1653 | 1673 | ||
1654 | update_curr(cfs_rq); | 1674 | if (unlikely(rt_prio(p->prio))) |
1655 | 1675 | goto preempt; | |
1656 | if (unlikely(rt_prio(p->prio))) { | ||
1657 | resched_task(curr); | ||
1658 | return; | ||
1659 | } | ||
1660 | 1676 | ||
1661 | if (unlikely(p->sched_class != &fair_sched_class)) | 1677 | if (unlikely(p->sched_class != &fair_sched_class)) |
1662 | return; | 1678 | return; |
@@ -1682,50 +1698,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1682 | return; | 1698 | return; |
1683 | 1699 | ||
1684 | /* Idle tasks are by definition preempted by everybody. */ | 1700 | /* Idle tasks are by definition preempted by everybody. */ |
1685 | if (unlikely(curr->policy == SCHED_IDLE)) { | 1701 | if (unlikely(curr->policy == SCHED_IDLE)) |
1686 | resched_task(curr); | 1702 | goto preempt; |
1687 | return; | ||
1688 | } | ||
1689 | 1703 | ||
1690 | if ((sched_feat(WAKEUP_SYNC) && sync) || | 1704 | if (sched_feat(WAKEUP_SYNC) && sync) |
1691 | (sched_feat(WAKEUP_OVERLAP) && | 1705 | goto preempt; |
1692 | (se->avg_overlap < sysctl_sched_migration_cost && | ||
1693 | pse->avg_overlap < sysctl_sched_migration_cost))) { | ||
1694 | resched_task(curr); | ||
1695 | return; | ||
1696 | } | ||
1697 | 1706 | ||
1698 | if (sched_feat(WAKEUP_RUNNING)) { | 1707 | if (sched_feat(WAKEUP_OVERLAP) && |
1699 | if (pse->avg_running < se->avg_running) { | 1708 | se->avg_overlap < sysctl_sched_migration_cost && |
1700 | set_next_buddy(pse); | 1709 | pse->avg_overlap < sysctl_sched_migration_cost) |
1701 | resched_task(curr); | 1710 | goto preempt; |
1702 | return; | ||
1703 | } | ||
1704 | } | ||
1705 | 1711 | ||
1706 | if (!sched_feat(WAKEUP_PREEMPT)) | 1712 | if (!sched_feat(WAKEUP_PREEMPT)) |
1707 | return; | 1713 | return; |
1708 | 1714 | ||
1715 | update_curr(cfs_rq); | ||
1709 | find_matching_se(&se, &pse); | 1716 | find_matching_se(&se, &pse); |
1710 | |||
1711 | BUG_ON(!pse); | 1717 | BUG_ON(!pse); |
1718 | if (wakeup_preempt_entity(se, pse) == 1) | ||
1719 | goto preempt; | ||
1712 | 1720 | ||
1713 | if (wakeup_preempt_entity(se, pse) == 1) { | 1721 | return; |
1714 | resched_task(curr); | 1722 | |
1715 | /* | 1723 | preempt: |
1716 | * Only set the backward buddy when the current task is still | 1724 | resched_task(curr); |
1717 | * on the rq. This can happen when a wakeup gets interleaved | 1725 | /* |
1718 | * with schedule on the ->pre_schedule() or idle_balance() | 1726 | * Only set the backward buddy when the current task is still |
1719 | * point, either of which can * drop the rq lock. | 1727 | * on the rq. This can happen when a wakeup gets interleaved |
1720 | * | 1728 | * with schedule on the ->pre_schedule() or idle_balance() |
1721 | * Also, during early boot the idle thread is in the fair class, | 1729 | * point, either of which can * drop the rq lock. |
1722 | * for obvious reasons its a bad idea to schedule back to it. | 1730 | * |
1723 | */ | 1731 | * Also, during early boot the idle thread is in the fair class, |
1724 | if (unlikely(!se->on_rq || curr == rq->idle)) | 1732 | * for obvious reasons its a bad idea to schedule back to it. |
1725 | return; | 1733 | */ |
1726 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | 1734 | if (unlikely(!se->on_rq || curr == rq->idle)) |
1727 | set_last_buddy(se); | 1735 | return; |
1728 | } | 1736 | |
1737 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | ||
1738 | set_last_buddy(se); | ||
1729 | } | 1739 | } |
1730 | 1740 | ||
1731 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 1741 | static struct task_struct *pick_next_task_fair(struct rq *rq) |
@@ -1905,6 +1915,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1905 | 1915 | ||
1906 | return 0; | 1916 | return 0; |
1907 | } | 1917 | } |
1918 | |||
1919 | static void rq_online_fair(struct rq *rq) | ||
1920 | { | ||
1921 | update_sysctl(); | ||
1922 | } | ||
1923 | |||
1924 | static void rq_offline_fair(struct rq *rq) | ||
1925 | { | ||
1926 | update_sysctl(); | ||
1927 | } | ||
1928 | |||
1908 | #endif /* CONFIG_SMP */ | 1929 | #endif /* CONFIG_SMP */ |
1909 | 1930 | ||
1910 | /* | 1931 | /* |
@@ -1922,28 +1943,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) | |||
1922 | } | 1943 | } |
1923 | 1944 | ||
1924 | /* | 1945 | /* |
1925 | * Share the fairness runtime between parent and child, thus the | 1946 | * called on fork with the child task as argument from the parent's context |
1926 | * total amount of pressure for CPU stays equal - new tasks | 1947 | * - child not yet on the tasklist |
1927 | * get a chance to run but frequent forkers are not allowed to | 1948 | * - preemption disabled |
1928 | * monopolize the CPU. Note: the parent runqueue is locked, | ||
1929 | * the child is not running yet. | ||
1930 | */ | 1949 | */ |
1931 | static void task_new_fair(struct rq *rq, struct task_struct *p) | 1950 | static void task_fork_fair(struct task_struct *p) |
1932 | { | 1951 | { |
1933 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 1952 | struct cfs_rq *cfs_rq = task_cfs_rq(current); |
1934 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; | 1953 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; |
1935 | int this_cpu = smp_processor_id(); | 1954 | int this_cpu = smp_processor_id(); |
1955 | struct rq *rq = this_rq(); | ||
1956 | unsigned long flags; | ||
1957 | |||
1958 | spin_lock_irqsave(&rq->lock, flags); | ||
1936 | 1959 | ||
1937 | sched_info_queued(p); | 1960 | if (unlikely(task_cpu(p) != this_cpu)) |
1961 | __set_task_cpu(p, this_cpu); | ||
1938 | 1962 | ||
1939 | update_curr(cfs_rq); | 1963 | update_curr(cfs_rq); |
1964 | |||
1940 | if (curr) | 1965 | if (curr) |
1941 | se->vruntime = curr->vruntime; | 1966 | se->vruntime = curr->vruntime; |
1942 | place_entity(cfs_rq, se, 1); | 1967 | place_entity(cfs_rq, se, 1); |
1943 | 1968 | ||
1944 | /* 'curr' will be NULL if the child belongs to a different group */ | 1969 | if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) { |
1945 | if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && | ||
1946 | curr && entity_before(curr, se)) { | ||
1947 | /* | 1970 | /* |
1948 | * Upon rescheduling, sched_class::put_prev_task() will place | 1971 | * Upon rescheduling, sched_class::put_prev_task() will place |
1949 | * 'current' within the tree based on its new key value. | 1972 | * 'current' within the tree based on its new key value. |
@@ -1952,7 +1975,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1952 | resched_task(rq->curr); | 1975 | resched_task(rq->curr); |
1953 | } | 1976 | } |
1954 | 1977 | ||
1955 | enqueue_task_fair(rq, p, 0); | 1978 | spin_unlock_irqrestore(&rq->lock, flags); |
1956 | } | 1979 | } |
1957 | 1980 | ||
1958 | /* | 1981 | /* |
@@ -2014,21 +2037,17 @@ static void moved_group_fair(struct task_struct *p) | |||
2014 | } | 2037 | } |
2015 | #endif | 2038 | #endif |
2016 | 2039 | ||
2017 | unsigned int get_rr_interval_fair(struct task_struct *task) | 2040 | unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task) |
2018 | { | 2041 | { |
2019 | struct sched_entity *se = &task->se; | 2042 | struct sched_entity *se = &task->se; |
2020 | unsigned long flags; | ||
2021 | struct rq *rq; | ||
2022 | unsigned int rr_interval = 0; | 2043 | unsigned int rr_interval = 0; |
2023 | 2044 | ||
2024 | /* | 2045 | /* |
2025 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise | 2046 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise |
2026 | * idle runqueue: | 2047 | * idle runqueue: |
2027 | */ | 2048 | */ |
2028 | rq = task_rq_lock(task, &flags); | ||
2029 | if (rq->cfs.load.weight) | 2049 | if (rq->cfs.load.weight) |
2030 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); | 2050 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); |
2031 | task_rq_unlock(rq, &flags); | ||
2032 | 2051 | ||
2033 | return rr_interval; | 2052 | return rr_interval; |
2034 | } | 2053 | } |
@@ -2052,11 +2071,13 @@ static const struct sched_class fair_sched_class = { | |||
2052 | 2071 | ||
2053 | .load_balance = load_balance_fair, | 2072 | .load_balance = load_balance_fair, |
2054 | .move_one_task = move_one_task_fair, | 2073 | .move_one_task = move_one_task_fair, |
2074 | .rq_online = rq_online_fair, | ||
2075 | .rq_offline = rq_offline_fair, | ||
2055 | #endif | 2076 | #endif |
2056 | 2077 | ||
2057 | .set_curr_task = set_curr_task_fair, | 2078 | .set_curr_task = set_curr_task_fair, |
2058 | .task_tick = task_tick_fair, | 2079 | .task_tick = task_tick_fair, |
2059 | .task_new = task_new_fair, | 2080 | .task_fork = task_fork_fair, |
2060 | 2081 | ||
2061 | .prio_changed = prio_changed_fair, | 2082 | .prio_changed = prio_changed_fair, |
2062 | .switched_to = switched_to_fair, | 2083 | .switched_to = switched_to_fair, |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 0d94083582c7..d5059fd761d9 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -54,11 +54,6 @@ SCHED_FEAT(WAKEUP_SYNC, 0) | |||
54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | 54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * Wakeup preemption towards tasks that run short | ||
58 | */ | ||
59 | SCHED_FEAT(WAKEUP_RUNNING, 0) | ||
60 | |||
61 | /* | ||
62 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate | 57 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate |
63 | * the remote end is likely to consume the data we just wrote, and | 58 | * the remote end is likely to consume the data we just wrote, and |
64 | * therefore has cache benefit from being placed on the same cpu, see | 59 | * therefore has cache benefit from being placed on the same cpu, see |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index b133a28fcde3..33d5384a73a8 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p, | |||
97 | check_preempt_curr(rq, p, 0); | 97 | check_preempt_curr(rq, p, 0); |
98 | } | 98 | } |
99 | 99 | ||
100 | unsigned int get_rr_interval_idle(struct task_struct *task) | 100 | unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) |
101 | { | 101 | { |
102 | return 0; | 102 | return 0; |
103 | } | 103 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 5c5fef378415..aecbd9c6b20c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1721,7 +1721,7 @@ static void set_curr_task_rt(struct rq *rq) | |||
1721 | dequeue_pushable_task(rq, p); | 1721 | dequeue_pushable_task(rq, p); |
1722 | } | 1722 | } |
1723 | 1723 | ||
1724 | unsigned int get_rr_interval_rt(struct task_struct *task) | 1724 | unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) |
1725 | { | 1725 | { |
1726 | /* | 1726 | /* |
1727 | * Time slice is 0 for SCHED_FIFO tasks | 1727 | * Time slice is 0 for SCHED_FIFO tasks |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9327a26765c5..554ac4894f0f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -244,6 +244,10 @@ static int min_sched_granularity_ns = 100000; /* 100 usecs */ | |||
244 | static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 244 | static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
245 | static int min_wakeup_granularity_ns; /* 0 usecs */ | 245 | static int min_wakeup_granularity_ns; /* 0 usecs */ |
246 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 246 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
247 | static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; | ||
248 | static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; | ||
249 | static int min_sched_shares_ratelimit = 100000; /* 100 usec */ | ||
250 | static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ | ||
247 | #endif | 251 | #endif |
248 | 252 | ||
249 | static struct ctl_table kern_table[] = { | 253 | static struct ctl_table kern_table[] = { |
@@ -260,7 +264,7 @@ static struct ctl_table kern_table[] = { | |||
260 | .data = &sysctl_sched_min_granularity, | 264 | .data = &sysctl_sched_min_granularity, |
261 | .maxlen = sizeof(unsigned int), | 265 | .maxlen = sizeof(unsigned int), |
262 | .mode = 0644, | 266 | .mode = 0644, |
263 | .proc_handler = sched_nr_latency_handler, | 267 | .proc_handler = sched_proc_update_handler, |
264 | .extra1 = &min_sched_granularity_ns, | 268 | .extra1 = &min_sched_granularity_ns, |
265 | .extra2 = &max_sched_granularity_ns, | 269 | .extra2 = &max_sched_granularity_ns, |
266 | }, | 270 | }, |
@@ -269,7 +273,7 @@ static struct ctl_table kern_table[] = { | |||
269 | .data = &sysctl_sched_latency, | 273 | .data = &sysctl_sched_latency, |
270 | .maxlen = sizeof(unsigned int), | 274 | .maxlen = sizeof(unsigned int), |
271 | .mode = 0644, | 275 | .mode = 0644, |
272 | .proc_handler = sched_nr_latency_handler, | 276 | .proc_handler = sched_proc_update_handler, |
273 | .extra1 = &min_sched_granularity_ns, | 277 | .extra1 = &min_sched_granularity_ns, |
274 | .extra2 = &max_sched_granularity_ns, | 278 | .extra2 = &max_sched_granularity_ns, |
275 | }, | 279 | }, |
@@ -278,7 +282,7 @@ static struct ctl_table kern_table[] = { | |||
278 | .data = &sysctl_sched_wakeup_granularity, | 282 | .data = &sysctl_sched_wakeup_granularity, |
279 | .maxlen = sizeof(unsigned int), | 283 | .maxlen = sizeof(unsigned int), |
280 | .mode = 0644, | 284 | .mode = 0644, |
281 | .proc_handler = proc_dointvec_minmax, | 285 | .proc_handler = sched_proc_update_handler, |
282 | .extra1 = &min_wakeup_granularity_ns, | 286 | .extra1 = &min_wakeup_granularity_ns, |
283 | .extra2 = &max_wakeup_granularity_ns, | 287 | .extra2 = &max_wakeup_granularity_ns, |
284 | }, | 288 | }, |
@@ -287,7 +291,18 @@ static struct ctl_table kern_table[] = { | |||
287 | .data = &sysctl_sched_shares_ratelimit, | 291 | .data = &sysctl_sched_shares_ratelimit, |
288 | .maxlen = sizeof(unsigned int), | 292 | .maxlen = sizeof(unsigned int), |
289 | .mode = 0644, | 293 | .mode = 0644, |
290 | .proc_handler = proc_dointvec, | 294 | .proc_handler = sched_proc_update_handler, |
295 | .extra1 = &min_sched_shares_ratelimit, | ||
296 | .extra2 = &max_sched_shares_ratelimit, | ||
297 | }, | ||
298 | { | ||
299 | .procname = "sched_tunable_scaling", | ||
300 | .data = &sysctl_sched_tunable_scaling, | ||
301 | .maxlen = sizeof(enum sched_tunable_scaling), | ||
302 | .mode = 0644, | ||
303 | .proc_handler = sched_proc_update_handler, | ||
304 | .extra1 = &min_sched_tunable_scaling, | ||
305 | .extra2 = &max_sched_tunable_scaling, | ||
291 | }, | 306 | }, |
292 | { | 307 | { |
293 | .procname = "sched_shares_thresh", | 308 | .procname = "sched_shares_thresh", |
@@ -298,13 +313,6 @@ static struct ctl_table kern_table[] = { | |||
298 | .extra1 = &zero, | 313 | .extra1 = &zero, |
299 | }, | 314 | }, |
300 | { | 315 | { |
301 | .procname = "sched_features", | ||
302 | .data = &sysctl_sched_features, | ||
303 | .maxlen = sizeof(unsigned int), | ||
304 | .mode = 0644, | ||
305 | .proc_handler = proc_dointvec, | ||
306 | }, | ||
307 | { | ||
308 | .procname = "sched_migration_cost", | 316 | .procname = "sched_migration_cost", |
309 | .data = &sysctl_sched_migration_cost, | 317 | .data = &sysctl_sched_migration_cost, |
310 | .maxlen = sizeof(unsigned int), | 318 | .maxlen = sizeof(unsigned int), |