diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 205 |
1 files changed, 118 insertions, 87 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f2cc59080efa..86a93376282c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -73,13 +73,13 @@ unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; | |||
73 | 73 | ||
74 | /* | 74 | /* |
75 | * SCHED_OTHER wake-up granularity. | 75 | * SCHED_OTHER wake-up granularity. |
76 | * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds) | 76 | * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds) |
77 | * | 77 | * |
78 | * This option delays the preemption effects of decoupled workloads | 78 | * This option delays the preemption effects of decoupled workloads |
79 | * and reduces their over-scheduling. Synchronous workloads will still | 79 | * and reduces their over-scheduling. Synchronous workloads will still |
80 | * have immediate wakeup/sleep latencies. | 80 | * have immediate wakeup/sleep latencies. |
81 | */ | 81 | */ |
82 | unsigned int sysctl_sched_wakeup_granularity = 10000000UL; | 82 | unsigned int sysctl_sched_wakeup_granularity = 5000000UL; |
83 | 83 | ||
84 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 84 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
85 | 85 | ||
@@ -302,11 +302,6 @@ static u64 __sched_vslice(unsigned long rq_weight, unsigned long nr_running) | |||
302 | return vslice; | 302 | return vslice; |
303 | } | 303 | } |
304 | 304 | ||
305 | static u64 sched_vslice(struct cfs_rq *cfs_rq) | ||
306 | { | ||
307 | return __sched_vslice(cfs_rq->load.weight, cfs_rq->nr_running); | ||
308 | } | ||
309 | |||
310 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 305 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) |
311 | { | 306 | { |
312 | return __sched_vslice(cfs_rq->load.weight + se->load.weight, | 307 | return __sched_vslice(cfs_rq->load.weight + se->load.weight, |
@@ -504,15 +499,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
504 | } else | 499 | } else |
505 | vruntime = cfs_rq->min_vruntime; | 500 | vruntime = cfs_rq->min_vruntime; |
506 | 501 | ||
507 | if (sched_feat(TREE_AVG)) { | ||
508 | struct sched_entity *last = __pick_last_entity(cfs_rq); | ||
509 | if (last) { | ||
510 | vruntime += last->vruntime; | ||
511 | vruntime >>= 1; | ||
512 | } | ||
513 | } else if (sched_feat(APPROX_AVG) && cfs_rq->nr_running) | ||
514 | vruntime += sched_vslice(cfs_rq)/2; | ||
515 | |||
516 | /* | 502 | /* |
517 | * The 'current' period is already promised to the current tasks, | 503 | * The 'current' period is already promised to the current tasks, |
518 | * however the extra weight of the new task will slow them down a | 504 | * however the extra weight of the new task will slow them down a |
@@ -556,6 +542,21 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
556 | account_entity_enqueue(cfs_rq, se); | 542 | account_entity_enqueue(cfs_rq, se); |
557 | } | 543 | } |
558 | 544 | ||
545 | static void update_avg(u64 *avg, u64 sample) | ||
546 | { | ||
547 | s64 diff = sample - *avg; | ||
548 | *avg += diff >> 3; | ||
549 | } | ||
550 | |||
551 | static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
552 | { | ||
553 | if (!se->last_wakeup) | ||
554 | return; | ||
555 | |||
556 | update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup); | ||
557 | se->last_wakeup = 0; | ||
558 | } | ||
559 | |||
559 | static void | 560 | static void |
560 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | 561 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) |
561 | { | 562 | { |
@@ -566,6 +567,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
566 | 567 | ||
567 | update_stats_dequeue(cfs_rq, se); | 568 | update_stats_dequeue(cfs_rq, se); |
568 | if (sleep) { | 569 | if (sleep) { |
570 | update_avg_stats(cfs_rq, se); | ||
569 | #ifdef CONFIG_SCHEDSTATS | 571 | #ifdef CONFIG_SCHEDSTATS |
570 | if (entity_is_task(se)) { | 572 | if (entity_is_task(se)) { |
571 | struct task_struct *tsk = task_of(se); | 573 | struct task_struct *tsk = task_of(se); |
@@ -980,96 +982,121 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
980 | #endif | 982 | #endif |
981 | 983 | ||
982 | #ifdef CONFIG_SMP | 984 | #ifdef CONFIG_SMP |
983 | static int select_task_rq_fair(struct task_struct *p, int sync) | 985 | |
986 | static const struct sched_class fair_sched_class; | ||
987 | |||
988 | static int | ||
989 | wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | ||
990 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, | ||
991 | int idx, unsigned long load, unsigned long this_load, | ||
992 | unsigned int imbalance) | ||
984 | { | 993 | { |
985 | int cpu, this_cpu; | 994 | struct task_struct *curr = this_rq->curr; |
986 | struct rq *rq; | 995 | unsigned long tl = this_load; |
987 | struct sched_domain *sd, *this_sd = NULL; | 996 | unsigned long tl_per_task; |
988 | int new_cpu; | 997 | |
998 | if (!(this_sd->flags & SD_WAKE_AFFINE)) | ||
999 | return 0; | ||
1000 | |||
1001 | /* | ||
1002 | * If the currently running task will sleep within | ||
1003 | * a reasonable amount of time then attract this newly | ||
1004 | * woken task: | ||
1005 | */ | ||
1006 | if (sync && curr->sched_class == &fair_sched_class) { | ||
1007 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && | ||
1008 | p->se.avg_overlap < sysctl_sched_migration_cost) | ||
1009 | return 1; | ||
1010 | } | ||
1011 | |||
1012 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | ||
1013 | tl_per_task = cpu_avg_load_per_task(this_cpu); | ||
1014 | |||
1015 | /* | ||
1016 | * If sync wakeup then subtract the (maximum possible) | ||
1017 | * effect of the currently running task from the load | ||
1018 | * of the current CPU: | ||
1019 | */ | ||
1020 | if (sync) | ||
1021 | tl -= current->se.load.weight; | ||
1022 | |||
1023 | if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || | ||
1024 | 100*(tl + p->se.load.weight) <= imbalance*load) { | ||
1025 | /* | ||
1026 | * This domain has SD_WAKE_AFFINE and | ||
1027 | * p is cache cold in this domain, and | ||
1028 | * there is no bad imbalance. | ||
1029 | */ | ||
1030 | schedstat_inc(this_sd, ttwu_move_affine); | ||
1031 | schedstat_inc(p, se.nr_wakeups_affine); | ||
989 | 1032 | ||
990 | cpu = task_cpu(p); | 1033 | return 1; |
991 | rq = task_rq(p); | 1034 | } |
992 | this_cpu = smp_processor_id(); | 1035 | return 0; |
993 | new_cpu = cpu; | 1036 | } |
994 | 1037 | ||
995 | if (cpu == this_cpu) | 1038 | static int select_task_rq_fair(struct task_struct *p, int sync) |
996 | goto out_set_cpu; | 1039 | { |
1040 | struct sched_domain *sd, *this_sd = NULL; | ||
1041 | int prev_cpu, this_cpu, new_cpu; | ||
1042 | unsigned long load, this_load; | ||
1043 | struct rq *rq, *this_rq; | ||
1044 | unsigned int imbalance; | ||
1045 | int idx; | ||
1046 | |||
1047 | prev_cpu = task_cpu(p); | ||
1048 | rq = task_rq(p); | ||
1049 | this_cpu = smp_processor_id(); | ||
1050 | this_rq = cpu_rq(this_cpu); | ||
1051 | new_cpu = prev_cpu; | ||
997 | 1052 | ||
1053 | /* | ||
1054 | * 'this_sd' is the first domain that both | ||
1055 | * this_cpu and prev_cpu are present in: | ||
1056 | */ | ||
998 | for_each_domain(this_cpu, sd) { | 1057 | for_each_domain(this_cpu, sd) { |
999 | if (cpu_isset(cpu, sd->span)) { | 1058 | if (cpu_isset(prev_cpu, sd->span)) { |
1000 | this_sd = sd; | 1059 | this_sd = sd; |
1001 | break; | 1060 | break; |
1002 | } | 1061 | } |
1003 | } | 1062 | } |
1004 | 1063 | ||
1005 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | 1064 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) |
1006 | goto out_set_cpu; | 1065 | goto out; |
1007 | 1066 | ||
1008 | /* | 1067 | /* |
1009 | * Check for affine wakeup and passive balancing possibilities. | 1068 | * Check for affine wakeup and passive balancing possibilities. |
1010 | */ | 1069 | */ |
1011 | if (this_sd) { | 1070 | if (!this_sd) |
1012 | int idx = this_sd->wake_idx; | 1071 | goto out; |
1013 | unsigned int imbalance; | ||
1014 | unsigned long load, this_load; | ||
1015 | |||
1016 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | ||
1017 | |||
1018 | load = source_load(cpu, idx); | ||
1019 | this_load = target_load(this_cpu, idx); | ||
1020 | |||
1021 | new_cpu = this_cpu; /* Wake to this CPU if we can */ | ||
1022 | |||
1023 | if (this_sd->flags & SD_WAKE_AFFINE) { | ||
1024 | unsigned long tl = this_load; | ||
1025 | unsigned long tl_per_task; | ||
1026 | |||
1027 | /* | ||
1028 | * Attract cache-cold tasks on sync wakeups: | ||
1029 | */ | ||
1030 | if (sync && !task_hot(p, rq->clock, this_sd)) | ||
1031 | goto out_set_cpu; | ||
1032 | |||
1033 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | ||
1034 | tl_per_task = cpu_avg_load_per_task(this_cpu); | ||
1035 | |||
1036 | /* | ||
1037 | * If sync wakeup then subtract the (maximum possible) | ||
1038 | * effect of the currently running task from the load | ||
1039 | * of the current CPU: | ||
1040 | */ | ||
1041 | if (sync) | ||
1042 | tl -= current->se.load.weight; | ||
1043 | |||
1044 | if ((tl <= load && | ||
1045 | tl + target_load(cpu, idx) <= tl_per_task) || | ||
1046 | 100*(tl + p->se.load.weight) <= imbalance*load) { | ||
1047 | /* | ||
1048 | * This domain has SD_WAKE_AFFINE and | ||
1049 | * p is cache cold in this domain, and | ||
1050 | * there is no bad imbalance. | ||
1051 | */ | ||
1052 | schedstat_inc(this_sd, ttwu_move_affine); | ||
1053 | schedstat_inc(p, se.nr_wakeups_affine); | ||
1054 | goto out_set_cpu; | ||
1055 | } | ||
1056 | } | ||
1057 | 1072 | ||
1058 | /* | 1073 | idx = this_sd->wake_idx; |
1059 | * Start passive balancing when half the imbalance_pct | 1074 | |
1060 | * limit is reached. | 1075 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; |
1061 | */ | 1076 | |
1062 | if (this_sd->flags & SD_WAKE_BALANCE) { | 1077 | load = source_load(prev_cpu, idx); |
1063 | if (imbalance*this_load <= 100*load) { | 1078 | this_load = target_load(this_cpu, idx); |
1064 | schedstat_inc(this_sd, ttwu_move_balance); | 1079 | |
1065 | schedstat_inc(p, se.nr_wakeups_passive); | 1080 | if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, |
1066 | goto out_set_cpu; | 1081 | load, this_load, imbalance)) |
1067 | } | 1082 | return this_cpu; |
1083 | |||
1084 | if (prev_cpu == this_cpu) | ||
1085 | goto out; | ||
1086 | |||
1087 | /* | ||
1088 | * Start passive balancing when half the imbalance_pct | ||
1089 | * limit is reached. | ||
1090 | */ | ||
1091 | if (this_sd->flags & SD_WAKE_BALANCE) { | ||
1092 | if (imbalance*this_load <= 100*load) { | ||
1093 | schedstat_inc(this_sd, ttwu_move_balance); | ||
1094 | schedstat_inc(p, se.nr_wakeups_passive); | ||
1095 | return this_cpu; | ||
1068 | } | 1096 | } |
1069 | } | 1097 | } |
1070 | 1098 | ||
1071 | new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ | 1099 | out: |
1072 | out_set_cpu: | ||
1073 | return wake_idle(new_cpu, p); | 1100 | return wake_idle(new_cpu, p); |
1074 | } | 1101 | } |
1075 | #endif /* CONFIG_SMP */ | 1102 | #endif /* CONFIG_SMP */ |
@@ -1092,6 +1119,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | |||
1092 | return; | 1119 | return; |
1093 | } | 1120 | } |
1094 | 1121 | ||
1122 | se->last_wakeup = se->sum_exec_runtime; | ||
1123 | if (unlikely(se == pse)) | ||
1124 | return; | ||
1125 | |||
1095 | cfs_rq_of(pse)->next = pse; | 1126 | cfs_rq_of(pse)->next = pse; |
1096 | 1127 | ||
1097 | /* | 1128 | /* |