diff options
-rw-r--r-- | kernel/sched.c | 100 | ||||
-rw-r--r-- | kernel/sched_fair.c | 5 |
2 files changed, 99 insertions, 6 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index f37a9618fac3..a757f6b11cbd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -457,6 +457,7 @@ struct rq { | |||
457 | unsigned long nr_running; | 457 | unsigned long nr_running; |
458 | #define CPU_LOAD_IDX_MAX 5 | 458 | #define CPU_LOAD_IDX_MAX 5 |
459 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | 459 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
460 | unsigned long last_load_update_tick; | ||
460 | #ifdef CONFIG_NO_HZ | 461 | #ifdef CONFIG_NO_HZ |
461 | u64 nohz_stamp; | 462 | u64 nohz_stamp; |
462 | unsigned char in_nohz_recently; | 463 | unsigned char in_nohz_recently; |
@@ -1803,6 +1804,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1803 | static void calc_load_account_idle(struct rq *this_rq); | 1804 | static void calc_load_account_idle(struct rq *this_rq); |
1804 | static void update_sysctl(void); | 1805 | static void update_sysctl(void); |
1805 | static int get_update_sysctl_factor(void); | 1806 | static int get_update_sysctl_factor(void); |
1807 | static void update_cpu_load(struct rq *this_rq); | ||
1806 | 1808 | ||
1807 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | 1809 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) |
1808 | { | 1810 | { |
@@ -3050,23 +3052,102 @@ static void calc_load_account_active(struct rq *this_rq) | |||
3050 | } | 3052 | } |
3051 | 3053 | ||
3052 | /* | 3054 | /* |
3055 | * The exact cpuload at various idx values, calculated at every tick would be | ||
3056 | * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load | ||
3057 | * | ||
3058 | * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called | ||
3059 | * on nth tick when cpu may be busy, then we have: | ||
3060 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
3061 | * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load | ||
3062 | * | ||
3063 | * decay_load_missed() below does efficient calculation of | ||
3064 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
3065 | * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load | ||
3066 | * | ||
3067 | * The calculation is approximated on a 128 point scale. | ||
3068 | * degrade_zero_ticks is the number of ticks after which load at any | ||
3069 | * particular idx is approximated to be zero. | ||
3070 | * degrade_factor is a precomputed table, a row for each load idx. | ||
3071 | * Each column corresponds to degradation factor for a power of two ticks, | ||
3072 | * based on 128 point scale. | ||
3073 | * Example: | ||
3074 | * row 2, col 3 (=12) says that the degradation at load idx 2 after | ||
3075 | * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8). | ||
3076 | * | ||
3077 | * With this power of 2 load factors, we can degrade the load n times | ||
3078 | * by looking at 1 bits in n and doing as many mult/shift instead of | ||
3079 | * n mult/shifts needed by the exact degradation. | ||
3080 | */ | ||
3081 | #define DEGRADE_SHIFT 7 | ||
3082 | static const unsigned char | ||
3083 | degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128}; | ||
3084 | static const unsigned char | ||
3085 | degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = { | ||
3086 | {0, 0, 0, 0, 0, 0, 0, 0}, | ||
3087 | {64, 32, 8, 0, 0, 0, 0, 0}, | ||
3088 | {96, 72, 40, 12, 1, 0, 0}, | ||
3089 | {112, 98, 75, 43, 15, 1, 0}, | ||
3090 | {120, 112, 98, 76, 45, 16, 2} }; | ||
3091 | |||
3092 | /* | ||
3093 | * Update cpu_load for any missed ticks, due to tickless idle. The backlog | ||
3094 | * would be when CPU is idle and so we just decay the old load without | ||
3095 | * adding any new load. | ||
3096 | */ | ||
3097 | static unsigned long | ||
3098 | decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) | ||
3099 | { | ||
3100 | int j = 0; | ||
3101 | |||
3102 | if (!missed_updates) | ||
3103 | return load; | ||
3104 | |||
3105 | if (missed_updates >= degrade_zero_ticks[idx]) | ||
3106 | return 0; | ||
3107 | |||
3108 | if (idx == 1) | ||
3109 | return load >> missed_updates; | ||
3110 | |||
3111 | while (missed_updates) { | ||
3112 | if (missed_updates % 2) | ||
3113 | load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT; | ||
3114 | |||
3115 | missed_updates >>= 1; | ||
3116 | j++; | ||
3117 | } | ||
3118 | return load; | ||
3119 | } | ||
3120 | |||
3121 | /* | ||
3053 | * Update rq->cpu_load[] statistics. This function is usually called every | 3122 | * Update rq->cpu_load[] statistics. This function is usually called every |
3054 | * scheduler tick (TICK_NSEC). | 3123 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called |
3124 | * every tick. We fix it up based on jiffies. | ||
3055 | */ | 3125 | */ |
3056 | static void update_cpu_load(struct rq *this_rq) | 3126 | static void update_cpu_load(struct rq *this_rq) |
3057 | { | 3127 | { |
3058 | unsigned long this_load = this_rq->load.weight; | 3128 | unsigned long this_load = this_rq->load.weight; |
3129 | unsigned long curr_jiffies = jiffies; | ||
3130 | unsigned long pending_updates; | ||
3059 | int i, scale; | 3131 | int i, scale; |
3060 | 3132 | ||
3061 | this_rq->nr_load_updates++; | 3133 | this_rq->nr_load_updates++; |
3062 | 3134 | ||
3135 | /* Avoid repeated calls on same jiffy, when moving in and out of idle */ | ||
3136 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
3137 | return; | ||
3138 | |||
3139 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
3140 | this_rq->last_load_update_tick = curr_jiffies; | ||
3141 | |||
3063 | /* Update our load: */ | 3142 | /* Update our load: */ |
3064 | for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | 3143 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ |
3144 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | ||
3065 | unsigned long old_load, new_load; | 3145 | unsigned long old_load, new_load; |
3066 | 3146 | ||
3067 | /* scale is effectively 1 << i now, and >> i divides by scale */ | 3147 | /* scale is effectively 1 << i now, and >> i divides by scale */ |
3068 | 3148 | ||
3069 | old_load = this_rq->cpu_load[i]; | 3149 | old_load = this_rq->cpu_load[i]; |
3150 | old_load = decay_load_missed(old_load, pending_updates - 1, i); | ||
3070 | new_load = this_load; | 3151 | new_load = this_load; |
3071 | /* | 3152 | /* |
3072 | * Round up the averaging division if load is increasing. This | 3153 | * Round up the averaging division if load is increasing. This |
@@ -3074,9 +3155,15 @@ static void update_cpu_load(struct rq *this_rq) | |||
3074 | * example. | 3155 | * example. |
3075 | */ | 3156 | */ |
3076 | if (new_load > old_load) | 3157 | if (new_load > old_load) |
3077 | new_load += scale-1; | 3158 | new_load += scale - 1; |
3078 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; | 3159 | |
3160 | this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; | ||
3079 | } | 3161 | } |
3162 | } | ||
3163 | |||
3164 | static void update_cpu_load_active(struct rq *this_rq) | ||
3165 | { | ||
3166 | update_cpu_load(this_rq); | ||
3080 | 3167 | ||
3081 | calc_load_account_active(this_rq); | 3168 | calc_load_account_active(this_rq); |
3082 | } | 3169 | } |
@@ -3464,7 +3551,7 @@ void scheduler_tick(void) | |||
3464 | 3551 | ||
3465 | raw_spin_lock(&rq->lock); | 3552 | raw_spin_lock(&rq->lock); |
3466 | update_rq_clock(rq); | 3553 | update_rq_clock(rq); |
3467 | update_cpu_load(rq); | 3554 | update_cpu_load_active(rq); |
3468 | curr->sched_class->task_tick(rq, curr, 0); | 3555 | curr->sched_class->task_tick(rq, curr, 0); |
3469 | raw_spin_unlock(&rq->lock); | 3556 | raw_spin_unlock(&rq->lock); |
3470 | 3557 | ||
@@ -7688,6 +7775,9 @@ void __init sched_init(void) | |||
7688 | 7775 | ||
7689 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 7776 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
7690 | rq->cpu_load[j] = 0; | 7777 | rq->cpu_load[j] = 0; |
7778 | |||
7779 | rq->last_load_update_tick = jiffies; | ||
7780 | |||
7691 | #ifdef CONFIG_SMP | 7781 | #ifdef CONFIG_SMP |
7692 | rq->sd = NULL; | 7782 | rq->sd = NULL; |
7693 | rq->rd = NULL; | 7783 | rq->rd = NULL; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index eed35eded602..22b8b4f2b616 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -3420,9 +3420,12 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
3420 | if (need_resched()) | 3420 | if (need_resched()) |
3421 | break; | 3421 | break; |
3422 | 3422 | ||
3423 | rq = cpu_rq(balance_cpu); | ||
3424 | raw_spin_lock_irq(&rq->lock); | ||
3425 | update_cpu_load(rq); | ||
3426 | raw_spin_unlock_irq(&rq->lock); | ||
3423 | rebalance_domains(balance_cpu, CPU_IDLE); | 3427 | rebalance_domains(balance_cpu, CPU_IDLE); |
3424 | 3428 | ||
3425 | rq = cpu_rq(balance_cpu); | ||
3426 | if (time_after(this_rq->next_balance, rq->next_balance)) | 3429 | if (time_after(this_rq->next_balance, rq->next_balance)) |
3427 | this_rq->next_balance = rq->next_balance; | 3430 | this_rq->next_balance = rq->next_balance; |
3428 | } | 3431 | } |