aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/sched.c100
-rw-r--r--kernel/sched_fair.c5
2 files changed, 99 insertions, 6 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index f37a9618fac3..a757f6b11cbd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -457,6 +457,7 @@ struct rq {
457 unsigned long nr_running; 457 unsigned long nr_running;
458 #define CPU_LOAD_IDX_MAX 5 458 #define CPU_LOAD_IDX_MAX 5
459 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; 459 unsigned long cpu_load[CPU_LOAD_IDX_MAX];
460 unsigned long last_load_update_tick;
460#ifdef CONFIG_NO_HZ 461#ifdef CONFIG_NO_HZ
461 u64 nohz_stamp; 462 u64 nohz_stamp;
462 unsigned char in_nohz_recently; 463 unsigned char in_nohz_recently;
@@ -1803,6 +1804,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1803static void calc_load_account_idle(struct rq *this_rq); 1804static void calc_load_account_idle(struct rq *this_rq);
1804static void update_sysctl(void); 1805static void update_sysctl(void);
1805static int get_update_sysctl_factor(void); 1806static int get_update_sysctl_factor(void);
1807static void update_cpu_load(struct rq *this_rq);
1806 1808
1807static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) 1809static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1808{ 1810{
@@ -3050,23 +3052,102 @@ static void calc_load_account_active(struct rq *this_rq)
3050} 3052}
3051 3053
3052/* 3054/*
3055 * The exact cpuload at various idx values, calculated at every tick would be
3056 * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
3057 *
3058 * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
3059 * on nth tick when cpu may be busy, then we have:
3060 * load = ((2^idx - 1) / 2^idx)^(n-1) * load
3061 * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
3062 *
3063 * decay_load_missed() below does efficient calculation of
3064 * load = ((2^idx - 1) / 2^idx)^(n-1) * load
3065 * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
3066 *
3067 * The calculation is approximated on a 128 point scale.
3068 * degrade_zero_ticks is the number of ticks after which load at any
3069 * particular idx is approximated to be zero.
3070 * degrade_factor is a precomputed table, a row for each load idx.
3071 * Each column corresponds to degradation factor for a power of two ticks,
3072 * based on 128 point scale.
3073 * Example:
3074 * row 2, col 3 (=12) says that the degradation at load idx 2 after
3075 * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
3076 *
3077 * With this power of 2 load factors, we can degrade the load n times
3078 * by looking at 1 bits in n and doing as many mult/shift instead of
3079 * n mult/shifts needed by the exact degradation.
3080 */
3081#define DEGRADE_SHIFT 7
3082static const unsigned char
3083 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
3084static const unsigned char
3085 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
3086 {0, 0, 0, 0, 0, 0, 0, 0},
3087 {64, 32, 8, 0, 0, 0, 0, 0},
3088 {96, 72, 40, 12, 1, 0, 0},
3089 {112, 98, 75, 43, 15, 1, 0},
3090 {120, 112, 98, 76, 45, 16, 2} };
3091
3092/*
3093 * Update cpu_load for any missed ticks, due to tickless idle. The backlog
3094 * would be when CPU is idle and so we just decay the old load without
3095 * adding any new load.
3096 */
3097static unsigned long
3098decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
3099{
3100 int j = 0;
3101
3102 if (!missed_updates)
3103 return load;
3104
3105 if (missed_updates >= degrade_zero_ticks[idx])
3106 return 0;
3107
3108 if (idx == 1)
3109 return load >> missed_updates;
3110
3111 while (missed_updates) {
3112 if (missed_updates % 2)
3113 load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
3114
3115 missed_updates >>= 1;
3116 j++;
3117 }
3118 return load;
3119}
3120
3121/*
3053 * Update rq->cpu_load[] statistics. This function is usually called every 3122 * Update rq->cpu_load[] statistics. This function is usually called every
3054 * scheduler tick (TICK_NSEC). 3123 * scheduler tick (TICK_NSEC). With tickless idle this will not be called
3124 * every tick. We fix it up based on jiffies.
3055 */ 3125 */
3056static void update_cpu_load(struct rq *this_rq) 3126static void update_cpu_load(struct rq *this_rq)
3057{ 3127{
3058 unsigned long this_load = this_rq->load.weight; 3128 unsigned long this_load = this_rq->load.weight;
3129 unsigned long curr_jiffies = jiffies;
3130 unsigned long pending_updates;
3059 int i, scale; 3131 int i, scale;
3060 3132
3061 this_rq->nr_load_updates++; 3133 this_rq->nr_load_updates++;
3062 3134
3135 /* Avoid repeated calls on same jiffy, when moving in and out of idle */
3136 if (curr_jiffies == this_rq->last_load_update_tick)
3137 return;
3138
3139 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
3140 this_rq->last_load_update_tick = curr_jiffies;
3141
3063 /* Update our load: */ 3142 /* Update our load: */
3064 for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { 3143 this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
3144 for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
3065 unsigned long old_load, new_load; 3145 unsigned long old_load, new_load;
3066 3146
3067 /* scale is effectively 1 << i now, and >> i divides by scale */ 3147 /* scale is effectively 1 << i now, and >> i divides by scale */
3068 3148
3069 old_load = this_rq->cpu_load[i]; 3149 old_load = this_rq->cpu_load[i];
3150 old_load = decay_load_missed(old_load, pending_updates - 1, i);
3070 new_load = this_load; 3151 new_load = this_load;
3071 /* 3152 /*
3072 * Round up the averaging division if load is increasing. This 3153 * Round up the averaging division if load is increasing. This
@@ -3074,9 +3155,15 @@ static void update_cpu_load(struct rq *this_rq)
3074 * example. 3155 * example.
3075 */ 3156 */
3076 if (new_load > old_load) 3157 if (new_load > old_load)
3077 new_load += scale-1; 3158 new_load += scale - 1;
3078 this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; 3159
3160 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
3079 } 3161 }
3162}
3163
3164static void update_cpu_load_active(struct rq *this_rq)
3165{
3166 update_cpu_load(this_rq);
3080 3167
3081 calc_load_account_active(this_rq); 3168 calc_load_account_active(this_rq);
3082} 3169}
@@ -3464,7 +3551,7 @@ void scheduler_tick(void)
3464 3551
3465 raw_spin_lock(&rq->lock); 3552 raw_spin_lock(&rq->lock);
3466 update_rq_clock(rq); 3553 update_rq_clock(rq);
3467 update_cpu_load(rq); 3554 update_cpu_load_active(rq);
3468 curr->sched_class->task_tick(rq, curr, 0); 3555 curr->sched_class->task_tick(rq, curr, 0);
3469 raw_spin_unlock(&rq->lock); 3556 raw_spin_unlock(&rq->lock);
3470 3557
@@ -7688,6 +7775,9 @@ void __init sched_init(void)
7688 7775
7689 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 7776 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
7690 rq->cpu_load[j] = 0; 7777 rq->cpu_load[j] = 0;
7778
7779 rq->last_load_update_tick = jiffies;
7780
7691#ifdef CONFIG_SMP 7781#ifdef CONFIG_SMP
7692 rq->sd = NULL; 7782 rq->sd = NULL;
7693 rq->rd = NULL; 7783 rq->rd = NULL;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index eed35eded602..22b8b4f2b616 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3420,9 +3420,12 @@ static void run_rebalance_domains(struct softirq_action *h)
3420 if (need_resched()) 3420 if (need_resched())
3421 break; 3421 break;
3422 3422
3423 rq = cpu_rq(balance_cpu);
3424 raw_spin_lock_irq(&rq->lock);
3425 update_cpu_load(rq);
3426 raw_spin_unlock_irq(&rq->lock);
3423 rebalance_domains(balance_cpu, CPU_IDLE); 3427 rebalance_domains(balance_cpu, CPU_IDLE);
3424 3428
3425 rq = cpu_rq(balance_cpu);
3426 if (time_after(this_rq->next_balance, rq->next_balance)) 3429 if (time_after(this_rq->next_balance, rq->next_balance))
3427 this_rq->next_balance = rq->next_balance; 3430 this_rq->next_balance = rq->next_balance;
3428 } 3431 }