aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/sched/fair.c47
2 files changed, 50 insertions, 0 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50d04b92ceda..4f163a8ffabf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1034,6 +1034,9 @@ struct task_struct {
1034#ifdef CONFIG_SMP 1034#ifdef CONFIG_SMP
1035 struct llist_node wake_entry; 1035 struct llist_node wake_entry;
1036 int on_cpu; 1036 int on_cpu;
1037 struct task_struct *last_wakee;
1038 unsigned long wakee_flips;
1039 unsigned long wakee_flip_decay_ts;
1037#endif 1040#endif
1038 int on_rq; 1041 int on_rq;
1039 1042
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 765d87acdf05..860063a8c849 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3017,6 +3017,23 @@ static unsigned long cpu_avg_load_per_task(int cpu)
3017 return 0; 3017 return 0;
3018} 3018}
3019 3019
3020static void record_wakee(struct task_struct *p)
3021{
3022 /*
3023 * Rough decay (wiping) for cost saving, don't worry
3024 * about the boundary, really active task won't care
3025 * about the loss.
3026 */
3027 if (jiffies > current->wakee_flip_decay_ts + HZ) {
3028 current->wakee_flips = 0;
3029 current->wakee_flip_decay_ts = jiffies;
3030 }
3031
3032 if (current->last_wakee != p) {
3033 current->last_wakee = p;
3034 current->wakee_flips++;
3035 }
3036}
3020 3037
3021static void task_waking_fair(struct task_struct *p) 3038static void task_waking_fair(struct task_struct *p)
3022{ 3039{
@@ -3037,6 +3054,7 @@ static void task_waking_fair(struct task_struct *p)
3037#endif 3054#endif
3038 3055
3039 se->vruntime -= min_vruntime; 3056 se->vruntime -= min_vruntime;
3057 record_wakee(p);
3040} 3058}
3041 3059
3042#ifdef CONFIG_FAIR_GROUP_SCHED 3060#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -3155,6 +3173,28 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
3155 3173
3156#endif 3174#endif
3157 3175
3176static int wake_wide(struct task_struct *p)
3177{
3178 int factor = nr_cpus_node(cpu_to_node(smp_processor_id()));
3179
3180 /*
3181 * Yeah, it's the switching-frequency, could means many wakee or
3182 * rapidly switch, use factor here will just help to automatically
3183 * adjust the loose-degree, so bigger node will lead to more pull.
3184 */
3185 if (p->wakee_flips > factor) {
3186 /*
3187 * wakee is somewhat hot, it needs certain amount of cpu
3188 * resource, so if waker is far more hot, prefer to leave
3189 * it alone.
3190 */
3191 if (current->wakee_flips > (factor * p->wakee_flips))
3192 return 1;
3193 }
3194
3195 return 0;
3196}
3197
3158static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) 3198static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
3159{ 3199{
3160 s64 this_load, load; 3200 s64 this_load, load;
@@ -3164,6 +3204,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
3164 unsigned long weight; 3204 unsigned long weight;
3165 int balanced; 3205 int balanced;
3166 3206
3207 /*
3208 * If we wake multiple tasks be careful to not bounce
3209 * ourselves around too much.
3210 */
3211 if (wake_wide(p))
3212 return 0;
3213
3167 idx = sd->wake_idx; 3214 idx = sd->wake_idx;
3168 this_cpu = smp_processor_id(); 3215 this_cpu = smp_processor_id();
3169 prev_cpu = task_cpu(p); 3216 prev_cpu = task_cpu(p);