diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 84 |
1 files changed, 60 insertions, 24 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 652e8bdef9aa..aa7f84121016 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -24,7 +24,7 @@ | |||
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Targeted preemption latency for CPU-bound tasks: | 26 | * Targeted preemption latency for CPU-bound tasks: |
27 | * (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds) | 27 | * (default: 5ms * (1 + ilog(ncpus)), units: nanoseconds) |
28 | * | 28 | * |
29 | * NOTE: this latency value is not the same as the concept of | 29 | * NOTE: this latency value is not the same as the concept of |
30 | * 'timeslice length' - timeslices in CFS are of variable length | 30 | * 'timeslice length' - timeslices in CFS are of variable length |
@@ -34,13 +34,13 @@ | |||
34 | * (to see the precise effective timeslice length of your workload, | 34 | * (to see the precise effective timeslice length of your workload, |
35 | * run vmstat and monitor the context-switches (cs) field) | 35 | * run vmstat and monitor the context-switches (cs) field) |
36 | */ | 36 | */ |
37 | unsigned int sysctl_sched_latency = 20000000ULL; | 37 | unsigned int sysctl_sched_latency = 5000000ULL; |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Minimal preemption granularity for CPU-bound tasks: | 40 | * Minimal preemption granularity for CPU-bound tasks: |
41 | * (default: 4 msec * (1 + ilog(ncpus)), units: nanoseconds) | 41 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
42 | */ | 42 | */ |
43 | unsigned int sysctl_sched_min_granularity = 4000000ULL; | 43 | unsigned int sysctl_sched_min_granularity = 1000000ULL; |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity | 46 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity |
@@ -48,10 +48,10 @@ unsigned int sysctl_sched_min_granularity = 4000000ULL; | |||
48 | static unsigned int sched_nr_latency = 5; | 48 | static unsigned int sched_nr_latency = 5; |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * After fork, child runs first. (default) If set to 0 then | 51 | * After fork, child runs first. If set to 0 (default) then |
52 | * parent will (try to) run first. | 52 | * parent will (try to) run first. |
53 | */ | 53 | */ |
54 | const_debug unsigned int sysctl_sched_child_runs_first = 1; | 54 | unsigned int sysctl_sched_child_runs_first __read_mostly; |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * sys_sched_yield() compat mode | 57 | * sys_sched_yield() compat mode |
@@ -63,13 +63,13 @@ unsigned int __read_mostly sysctl_sched_compat_yield; | |||
63 | 63 | ||
64 | /* | 64 | /* |
65 | * SCHED_OTHER wake-up granularity. | 65 | * SCHED_OTHER wake-up granularity. |
66 | * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds) | 66 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
67 | * | 67 | * |
68 | * This option delays the preemption effects of decoupled workloads | 68 | * This option delays the preemption effects of decoupled workloads |
69 | * and reduces their over-scheduling. Synchronous workloads will still | 69 | * and reduces their over-scheduling. Synchronous workloads will still |
70 | * have immediate wakeup/sleep latencies. | 70 | * have immediate wakeup/sleep latencies. |
71 | */ | 71 | */ |
72 | unsigned int sysctl_sched_wakeup_granularity = 5000000UL; | 72 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; |
73 | 73 | ||
74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
75 | 75 | ||
@@ -79,11 +79,6 @@ static const struct sched_class fair_sched_class; | |||
79 | * CFS operations on generic schedulable entities: | 79 | * CFS operations on generic schedulable entities: |
80 | */ | 80 | */ |
81 | 81 | ||
82 | static inline struct task_struct *task_of(struct sched_entity *se) | ||
83 | { | ||
84 | return container_of(se, struct task_struct, se); | ||
85 | } | ||
86 | |||
87 | #ifdef CONFIG_FAIR_GROUP_SCHED | 82 | #ifdef CONFIG_FAIR_GROUP_SCHED |
88 | 83 | ||
89 | /* cpu runqueue to which this cfs_rq is attached */ | 84 | /* cpu runqueue to which this cfs_rq is attached */ |
@@ -95,6 +90,14 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq) | |||
95 | /* An entity is a task if it doesn't "own" a runqueue */ | 90 | /* An entity is a task if it doesn't "own" a runqueue */ |
96 | #define entity_is_task(se) (!se->my_q) | 91 | #define entity_is_task(se) (!se->my_q) |
97 | 92 | ||
93 | static inline struct task_struct *task_of(struct sched_entity *se) | ||
94 | { | ||
95 | #ifdef CONFIG_SCHED_DEBUG | ||
96 | WARN_ON_ONCE(!entity_is_task(se)); | ||
97 | #endif | ||
98 | return container_of(se, struct task_struct, se); | ||
99 | } | ||
100 | |||
98 | /* Walk up scheduling entities hierarchy */ | 101 | /* Walk up scheduling entities hierarchy */ |
99 | #define for_each_sched_entity(se) \ | 102 | #define for_each_sched_entity(se) \ |
100 | for (; se; se = se->parent) | 103 | for (; se; se = se->parent) |
@@ -186,7 +189,12 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) | |||
186 | } | 189 | } |
187 | } | 190 | } |
188 | 191 | ||
189 | #else /* CONFIG_FAIR_GROUP_SCHED */ | 192 | #else /* !CONFIG_FAIR_GROUP_SCHED */ |
193 | |||
194 | static inline struct task_struct *task_of(struct sched_entity *se) | ||
195 | { | ||
196 | return container_of(se, struct task_struct, se); | ||
197 | } | ||
190 | 198 | ||
191 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) | 199 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) |
192 | { | 200 | { |
@@ -537,6 +545,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
537 | schedstat_set(se->wait_count, se->wait_count + 1); | 545 | schedstat_set(se->wait_count, se->wait_count + 1); |
538 | schedstat_set(se->wait_sum, se->wait_sum + | 546 | schedstat_set(se->wait_sum, se->wait_sum + |
539 | rq_of(cfs_rq)->clock - se->wait_start); | 547 | rq_of(cfs_rq)->clock - se->wait_start); |
548 | #ifdef CONFIG_SCHEDSTATS | ||
549 | if (entity_is_task(se)) { | ||
550 | trace_sched_stat_wait(task_of(se), | ||
551 | rq_of(cfs_rq)->clock - se->wait_start); | ||
552 | } | ||
553 | #endif | ||
540 | schedstat_set(se->wait_start, 0); | 554 | schedstat_set(se->wait_start, 0); |
541 | } | 555 | } |
542 | 556 | ||
@@ -628,8 +642,10 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
628 | se->sleep_start = 0; | 642 | se->sleep_start = 0; |
629 | se->sum_sleep_runtime += delta; | 643 | se->sum_sleep_runtime += delta; |
630 | 644 | ||
631 | if (tsk) | 645 | if (tsk) { |
632 | account_scheduler_latency(tsk, delta >> 10, 1); | 646 | account_scheduler_latency(tsk, delta >> 10, 1); |
647 | trace_sched_stat_sleep(tsk, delta); | ||
648 | } | ||
633 | } | 649 | } |
634 | if (se->block_start) { | 650 | if (se->block_start) { |
635 | u64 delta = rq_of(cfs_rq)->clock - se->block_start; | 651 | u64 delta = rq_of(cfs_rq)->clock - se->block_start; |
@@ -644,6 +660,12 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
644 | se->sum_sleep_runtime += delta; | 660 | se->sum_sleep_runtime += delta; |
645 | 661 | ||
646 | if (tsk) { | 662 | if (tsk) { |
663 | if (tsk->in_iowait) { | ||
664 | se->iowait_sum += delta; | ||
665 | se->iowait_count++; | ||
666 | trace_sched_stat_iowait(tsk, delta); | ||
667 | } | ||
668 | |||
647 | /* | 669 | /* |
648 | * Blocking time is in units of nanosecs, so shift by | 670 | * Blocking time is in units of nanosecs, so shift by |
649 | * 20 to get a milliseconds-range estimation of the | 671 | * 20 to get a milliseconds-range estimation of the |
@@ -705,11 +727,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
705 | 727 | ||
706 | vruntime -= thresh; | 728 | vruntime -= thresh; |
707 | } | 729 | } |
708 | |||
709 | /* ensure we never gain time by being placed backwards. */ | ||
710 | vruntime = max_vruntime(se->vruntime, vruntime); | ||
711 | } | 730 | } |
712 | 731 | ||
732 | /* ensure we never gain time by being placed backwards. */ | ||
733 | vruntime = max_vruntime(se->vruntime, vruntime); | ||
734 | |||
713 | se->vruntime = vruntime; | 735 | se->vruntime = vruntime; |
714 | } | 736 | } |
715 | 737 | ||
@@ -1046,17 +1068,21 @@ static void yield_task_fair(struct rq *rq) | |||
1046 | * search starts with cpus closest then further out as needed, | 1068 | * search starts with cpus closest then further out as needed, |
1047 | * so we always favor a closer, idle cpu. | 1069 | * so we always favor a closer, idle cpu. |
1048 | * Domains may include CPUs that are not usable for migration, | 1070 | * Domains may include CPUs that are not usable for migration, |
1049 | * hence we need to mask them out (cpu_active_mask) | 1071 | * hence we need to mask them out (rq->rd->online) |
1050 | * | 1072 | * |
1051 | * Returns the CPU we should wake onto. | 1073 | * Returns the CPU we should wake onto. |
1052 | */ | 1074 | */ |
1053 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) | 1075 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) |
1076 | |||
1077 | #define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online) | ||
1078 | |||
1054 | static int wake_idle(int cpu, struct task_struct *p) | 1079 | static int wake_idle(int cpu, struct task_struct *p) |
1055 | { | 1080 | { |
1056 | struct sched_domain *sd; | 1081 | struct sched_domain *sd; |
1057 | int i; | 1082 | int i; |
1058 | unsigned int chosen_wakeup_cpu; | 1083 | unsigned int chosen_wakeup_cpu; |
1059 | int this_cpu; | 1084 | int this_cpu; |
1085 | struct rq *task_rq = task_rq(p); | ||
1060 | 1086 | ||
1061 | /* | 1087 | /* |
1062 | * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu | 1088 | * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu |
@@ -1089,10 +1115,10 @@ static int wake_idle(int cpu, struct task_struct *p) | |||
1089 | for_each_domain(cpu, sd) { | 1115 | for_each_domain(cpu, sd) { |
1090 | if ((sd->flags & SD_WAKE_IDLE) | 1116 | if ((sd->flags & SD_WAKE_IDLE) |
1091 | || ((sd->flags & SD_WAKE_IDLE_FAR) | 1117 | || ((sd->flags & SD_WAKE_IDLE_FAR) |
1092 | && !task_hot(p, task_rq(p)->clock, sd))) { | 1118 | && !task_hot(p, task_rq->clock, sd))) { |
1093 | for_each_cpu_and(i, sched_domain_span(sd), | 1119 | for_each_cpu_and(i, sched_domain_span(sd), |
1094 | &p->cpus_allowed) { | 1120 | &p->cpus_allowed) { |
1095 | if (cpu_active(i) && idle_cpu(i)) { | 1121 | if (cpu_rd_active(i, task_rq) && idle_cpu(i)) { |
1096 | if (i != task_cpu(p)) { | 1122 | if (i != task_cpu(p)) { |
1097 | schedstat_inc(p, | 1123 | schedstat_inc(p, |
1098 | se.nr_wakeups_idle); | 1124 | se.nr_wakeups_idle); |
@@ -1235,7 +1261,17 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1235 | tg = task_group(p); | 1261 | tg = task_group(p); |
1236 | weight = p->se.load.weight; | 1262 | weight = p->se.load.weight; |
1237 | 1263 | ||
1238 | balanced = 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= | 1264 | /* |
1265 | * In low-load situations, where prev_cpu is idle and this_cpu is idle | ||
1266 | * due to the sync cause above having dropped tl to 0, we'll always have | ||
1267 | * an imbalance, but there's really nothing you can do about that, so | ||
1268 | * that's good too. | ||
1269 | * | ||
1270 | * Otherwise check if either cpus are near enough in load to allow this | ||
1271 | * task to be woken on this_cpu. | ||
1272 | */ | ||
1273 | balanced = !tl || | ||
1274 | 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= | ||
1239 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); | 1275 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); |
1240 | 1276 | ||
1241 | /* | 1277 | /* |
@@ -1278,8 +1314,6 @@ static int select_task_rq_fair(struct task_struct *p, int sync) | |||
1278 | this_rq = cpu_rq(this_cpu); | 1314 | this_rq = cpu_rq(this_cpu); |
1279 | new_cpu = prev_cpu; | 1315 | new_cpu = prev_cpu; |
1280 | 1316 | ||
1281 | if (prev_cpu == this_cpu) | ||
1282 | goto out; | ||
1283 | /* | 1317 | /* |
1284 | * 'this_sd' is the first domain that both | 1318 | * 'this_sd' is the first domain that both |
1285 | * this_cpu and prev_cpu are present in: | 1319 | * this_cpu and prev_cpu are present in: |
@@ -1721,6 +1755,8 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1721 | sched_info_queued(p); | 1755 | sched_info_queued(p); |
1722 | 1756 | ||
1723 | update_curr(cfs_rq); | 1757 | update_curr(cfs_rq); |
1758 | if (curr) | ||
1759 | se->vruntime = curr->vruntime; | ||
1724 | place_entity(cfs_rq, se, 1); | 1760 | place_entity(cfs_rq, se, 1); |
1725 | 1761 | ||
1726 | /* 'curr' will be NULL if the child belongs to a different group */ | 1762 | /* 'curr' will be NULL if the child belongs to a different group */ |