diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 100 |
1 files changed, 95 insertions, 5 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index f37a9618fac3..a757f6b11cbd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -457,6 +457,7 @@ struct rq { | |||
| 457 | unsigned long nr_running; | 457 | unsigned long nr_running; |
| 458 | #define CPU_LOAD_IDX_MAX 5 | 458 | #define CPU_LOAD_IDX_MAX 5 |
| 459 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | 459 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
| 460 | unsigned long last_load_update_tick; | ||
| 460 | #ifdef CONFIG_NO_HZ | 461 | #ifdef CONFIG_NO_HZ |
| 461 | u64 nohz_stamp; | 462 | u64 nohz_stamp; |
| 462 | unsigned char in_nohz_recently; | 463 | unsigned char in_nohz_recently; |
| @@ -1803,6 +1804,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
| 1803 | static void calc_load_account_idle(struct rq *this_rq); | 1804 | static void calc_load_account_idle(struct rq *this_rq); |
| 1804 | static void update_sysctl(void); | 1805 | static void update_sysctl(void); |
| 1805 | static int get_update_sysctl_factor(void); | 1806 | static int get_update_sysctl_factor(void); |
| 1807 | static void update_cpu_load(struct rq *this_rq); | ||
| 1806 | 1808 | ||
| 1807 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | 1809 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) |
| 1808 | { | 1810 | { |
| @@ -3050,23 +3052,102 @@ static void calc_load_account_active(struct rq *this_rq) | |||
| 3050 | } | 3052 | } |
| 3051 | 3053 | ||
| 3052 | /* | 3054 | /* |
| 3055 | * The exact cpuload at various idx values, calculated at every tick would be | ||
| 3056 | * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load | ||
| 3057 | * | ||
| 3058 | * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called | ||
| 3059 | * on nth tick when cpu may be busy, then we have: | ||
| 3060 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
| 3061 | * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load | ||
| 3062 | * | ||
| 3063 | * decay_load_missed() below does efficient calculation of | ||
| 3064 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
| 3065 | * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load | ||
| 3066 | * | ||
| 3067 | * The calculation is approximated on a 128 point scale. | ||
| 3068 | * degrade_zero_ticks is the number of ticks after which load at any | ||
| 3069 | * particular idx is approximated to be zero. | ||
| 3070 | * degrade_factor is a precomputed table, a row for each load idx. | ||
| 3071 | * Each column corresponds to degradation factor for a power of two ticks, | ||
| 3072 | * based on 128 point scale. | ||
| 3073 | * Example: | ||
| 3074 | * row 2, col 3 (=12) says that the degradation at load idx 2 after | ||
| 3075 | * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8). | ||
| 3076 | * | ||
| 3077 | * With this power of 2 load factors, we can degrade the load n times | ||
| 3078 | * by looking at 1 bits in n and doing as many mult/shift instead of | ||
| 3079 | * n mult/shifts needed by the exact degradation. | ||
| 3080 | */ | ||
| 3081 | #define DEGRADE_SHIFT 7 | ||
| 3082 | static const unsigned char | ||
| 3083 | degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128}; | ||
| 3084 | static const unsigned char | ||
| 3085 | degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = { | ||
| 3086 | {0, 0, 0, 0, 0, 0, 0, 0}, | ||
| 3087 | {64, 32, 8, 0, 0, 0, 0, 0}, | ||
| 3088 | {96, 72, 40, 12, 1, 0, 0}, | ||
| 3089 | {112, 98, 75, 43, 15, 1, 0}, | ||
| 3090 | {120, 112, 98, 76, 45, 16, 2} }; | ||
| 3091 | |||
| 3092 | /* | ||
| 3093 | * Update cpu_load for any missed ticks, due to tickless idle. The backlog | ||
| 3094 | * would be when CPU is idle and so we just decay the old load without | ||
| 3095 | * adding any new load. | ||
| 3096 | */ | ||
| 3097 | static unsigned long | ||
| 3098 | decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) | ||
| 3099 | { | ||
| 3100 | int j = 0; | ||
| 3101 | |||
| 3102 | if (!missed_updates) | ||
| 3103 | return load; | ||
| 3104 | |||
| 3105 | if (missed_updates >= degrade_zero_ticks[idx]) | ||
| 3106 | return 0; | ||
| 3107 | |||
| 3108 | if (idx == 1) | ||
| 3109 | return load >> missed_updates; | ||
| 3110 | |||
| 3111 | while (missed_updates) { | ||
| 3112 | if (missed_updates % 2) | ||
| 3113 | load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT; | ||
| 3114 | |||
| 3115 | missed_updates >>= 1; | ||
| 3116 | j++; | ||
| 3117 | } | ||
| 3118 | return load; | ||
| 3119 | } | ||
| 3120 | |||
| 3121 | /* | ||
| 3053 | * Update rq->cpu_load[] statistics. This function is usually called every | 3122 | * Update rq->cpu_load[] statistics. This function is usually called every |
| 3054 | * scheduler tick (TICK_NSEC). | 3123 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called |
| 3124 | * every tick. We fix it up based on jiffies. | ||
| 3055 | */ | 3125 | */ |
| 3056 | static void update_cpu_load(struct rq *this_rq) | 3126 | static void update_cpu_load(struct rq *this_rq) |
| 3057 | { | 3127 | { |
| 3058 | unsigned long this_load = this_rq->load.weight; | 3128 | unsigned long this_load = this_rq->load.weight; |
| 3129 | unsigned long curr_jiffies = jiffies; | ||
| 3130 | unsigned long pending_updates; | ||
| 3059 | int i, scale; | 3131 | int i, scale; |
| 3060 | 3132 | ||
| 3061 | this_rq->nr_load_updates++; | 3133 | this_rq->nr_load_updates++; |
| 3062 | 3134 | ||
| 3135 | /* Avoid repeated calls on same jiffy, when moving in and out of idle */ | ||
| 3136 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
| 3137 | return; | ||
| 3138 | |||
| 3139 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
| 3140 | this_rq->last_load_update_tick = curr_jiffies; | ||
| 3141 | |||
| 3063 | /* Update our load: */ | 3142 | /* Update our load: */ |
| 3064 | for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | 3143 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ |
| 3144 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | ||
| 3065 | unsigned long old_load, new_load; | 3145 | unsigned long old_load, new_load; |
| 3066 | 3146 | ||
| 3067 | /* scale is effectively 1 << i now, and >> i divides by scale */ | 3147 | /* scale is effectively 1 << i now, and >> i divides by scale */ |
| 3068 | 3148 | ||
| 3069 | old_load = this_rq->cpu_load[i]; | 3149 | old_load = this_rq->cpu_load[i]; |
| 3150 | old_load = decay_load_missed(old_load, pending_updates - 1, i); | ||
| 3070 | new_load = this_load; | 3151 | new_load = this_load; |
| 3071 | /* | 3152 | /* |
| 3072 | * Round up the averaging division if load is increasing. This | 3153 | * Round up the averaging division if load is increasing. This |
| @@ -3074,9 +3155,15 @@ static void update_cpu_load(struct rq *this_rq) | |||
| 3074 | * example. | 3155 | * example. |
| 3075 | */ | 3156 | */ |
| 3076 | if (new_load > old_load) | 3157 | if (new_load > old_load) |
| 3077 | new_load += scale-1; | 3158 | new_load += scale - 1; |
| 3078 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; | 3159 | |
| 3160 | this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; | ||
| 3079 | } | 3161 | } |
| 3162 | } | ||
| 3163 | |||
| 3164 | static void update_cpu_load_active(struct rq *this_rq) | ||
| 3165 | { | ||
| 3166 | update_cpu_load(this_rq); | ||
| 3080 | 3167 | ||
| 3081 | calc_load_account_active(this_rq); | 3168 | calc_load_account_active(this_rq); |
| 3082 | } | 3169 | } |
| @@ -3464,7 +3551,7 @@ void scheduler_tick(void) | |||
| 3464 | 3551 | ||
| 3465 | raw_spin_lock(&rq->lock); | 3552 | raw_spin_lock(&rq->lock); |
| 3466 | update_rq_clock(rq); | 3553 | update_rq_clock(rq); |
| 3467 | update_cpu_load(rq); | 3554 | update_cpu_load_active(rq); |
| 3468 | curr->sched_class->task_tick(rq, curr, 0); | 3555 | curr->sched_class->task_tick(rq, curr, 0); |
| 3469 | raw_spin_unlock(&rq->lock); | 3556 | raw_spin_unlock(&rq->lock); |
| 3470 | 3557 | ||
| @@ -7688,6 +7775,9 @@ void __init sched_init(void) | |||
| 7688 | 7775 | ||
| 7689 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 7776 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
| 7690 | rq->cpu_load[j] = 0; | 7777 | rq->cpu_load[j] = 0; |
| 7778 | |||
| 7779 | rq->last_load_update_tick = jiffies; | ||
| 7780 | |||
| 7691 | #ifdef CONFIG_SMP | 7781 | #ifdef CONFIG_SMP |
| 7692 | rq->sd = NULL; | 7782 | rq->sd = NULL; |
| 7693 | rq->rd = NULL; | 7783 | rq->rd = NULL; |
