aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c181
1 files changed, 157 insertions, 24 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 093df593e45d..b3f2b4187859 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -583,7 +583,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
583 * cases. LITMUS^RT amplifies the effects of this problem. Hence, we 583 * cases. LITMUS^RT amplifies the effects of this problem. Hence, we
584 * turn it off to avoid stalling clocks. */ 584 * turn it off to avoid stalling clocks. */
585 /* 585 /*
586 if (test_tsk_need_resched(p)) 586 if (rq->curr->se.on_rq && test_tsk_need_resched(p))
587 rq->skip_clock_update = 1; 587 rq->skip_clock_update = 1;
588 */ 588 */
589} 589}
@@ -741,7 +741,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
741 size_t cnt, loff_t *ppos) 741 size_t cnt, loff_t *ppos)
742{ 742{
743 char buf[64]; 743 char buf[64];
744 char *cmp = buf; 744 char *cmp;
745 int neg = 0; 745 int neg = 0;
746 int i; 746 int i;
747 747
@@ -752,6 +752,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
752 return -EFAULT; 752 return -EFAULT;
753 753
754 buf[cnt] = 0; 754 buf[cnt] = 0;
755 cmp = strstrip(buf);
755 756
756 if (strncmp(buf, "NO_", 3) == 0) { 757 if (strncmp(buf, "NO_", 3) == 0) {
757 neg = 1; 758 neg = 1;
@@ -759,9 +760,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
759 } 760 }
760 761
761 for (i = 0; sched_feat_names[i]; i++) { 762 for (i = 0; sched_feat_names[i]; i++) {
762 int len = strlen(sched_feat_names[i]); 763 if (strcmp(cmp, sched_feat_names[i]) == 0) {
763
764 if (strncmp(cmp, sched_feat_names[i], len) == 0) {
765 if (neg) 764 if (neg)
766 sysctl_sched_features &= ~(1UL << i); 765 sysctl_sched_features &= ~(1UL << i);
767 else 766 else
@@ -1877,12 +1876,6 @@ static void dec_nr_running(struct rq *rq)
1877 1876
1878static void set_load_weight(struct task_struct *p) 1877static void set_load_weight(struct task_struct *p)
1879{ 1878{
1880 if (task_has_rt_policy(p)) {
1881 p->se.load.weight = 0;
1882 p->se.load.inv_weight = WMULT_CONST;
1883 return;
1884 }
1885
1886 /* 1879 /*
1887 * SCHED_IDLE tasks get minimal weight: 1880 * SCHED_IDLE tasks get minimal weight:
1888 */ 1881 */
@@ -3011,6 +3004,15 @@ static long calc_load_fold_active(struct rq *this_rq)
3011 return delta; 3004 return delta;
3012} 3005}
3013 3006
3007static unsigned long
3008calc_load(unsigned long load, unsigned long exp, unsigned long active)
3009{
3010 load *= exp;
3011 load += active * (FIXED_1 - exp);
3012 load += 1UL << (FSHIFT - 1);
3013 return load >> FSHIFT;
3014}
3015
3014#ifdef CONFIG_NO_HZ 3016#ifdef CONFIG_NO_HZ
3015/* 3017/*
3016 * For NO_HZ we delay the active fold to the next LOAD_FREQ update. 3018 * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
@@ -3040,6 +3042,128 @@ static long calc_load_fold_idle(void)
3040 3042
3041 return delta; 3043 return delta;
3042} 3044}
3045
3046/**
3047 * fixed_power_int - compute: x^n, in O(log n) time
3048 *
3049 * @x: base of the power
3050 * @frac_bits: fractional bits of @x
3051 * @n: power to raise @x to.
3052 *
3053 * By exploiting the relation between the definition of the natural power
3054 * function: x^n := x*x*...*x (x multiplied by itself for n times), and
3055 * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
3056 * (where: n_i \elem {0, 1}, the binary vector representing n),
3057 * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
3058 * of course trivially computable in O(log_2 n), the length of our binary
3059 * vector.
3060 */
3061static unsigned long
3062fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
3063{
3064 unsigned long result = 1UL << frac_bits;
3065
3066 if (n) for (;;) {
3067 if (n & 1) {
3068 result *= x;
3069 result += 1UL << (frac_bits - 1);
3070 result >>= frac_bits;
3071 }
3072 n >>= 1;
3073 if (!n)
3074 break;
3075 x *= x;
3076 x += 1UL << (frac_bits - 1);
3077 x >>= frac_bits;
3078 }
3079
3080 return result;
3081}
3082
3083/*
3084 * a1 = a0 * e + a * (1 - e)
3085 *
3086 * a2 = a1 * e + a * (1 - e)
3087 * = (a0 * e + a * (1 - e)) * e + a * (1 - e)
3088 * = a0 * e^2 + a * (1 - e) * (1 + e)
3089 *
3090 * a3 = a2 * e + a * (1 - e)
3091 * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
3092 * = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
3093 *
3094 * ...
3095 *
3096 * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
3097 * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
3098 * = a0 * e^n + a * (1 - e^n)
3099 *
3100 * [1] application of the geometric series:
3101 *
3102 * n 1 - x^(n+1)
3103 * S_n := \Sum x^i = -------------
3104 * i=0 1 - x
3105 */
3106static unsigned long
3107calc_load_n(unsigned long load, unsigned long exp,
3108 unsigned long active, unsigned int n)
3109{
3110
3111 return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
3112}
3113
3114/*
3115 * NO_HZ can leave us missing all per-cpu ticks calling
3116 * calc_load_account_active(), but since an idle CPU folds its delta into
3117 * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
3118 * in the pending idle delta if our idle period crossed a load cycle boundary.
3119 *
3120 * Once we've updated the global active value, we need to apply the exponential
3121 * weights adjusted to the number of cycles missed.
3122 */
3123static void calc_global_nohz(unsigned long ticks)
3124{
3125 long delta, active, n;
3126
3127 if (time_before(jiffies, calc_load_update))
3128 return;
3129
3130 /*
3131 * If we crossed a calc_load_update boundary, make sure to fold
3132 * any pending idle changes, the respective CPUs might have
3133 * missed the tick driven calc_load_account_active() update
3134 * due to NO_HZ.
3135 */
3136 delta = calc_load_fold_idle();
3137 if (delta)
3138 atomic_long_add(delta, &calc_load_tasks);
3139
3140 /*
3141 * If we were idle for multiple load cycles, apply them.
3142 */
3143 if (ticks >= LOAD_FREQ) {
3144 n = ticks / LOAD_FREQ;
3145
3146 active = atomic_long_read(&calc_load_tasks);
3147 active = active > 0 ? active * FIXED_1 : 0;
3148
3149 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
3150 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
3151 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
3152
3153 calc_load_update += n * LOAD_FREQ;
3154 }
3155
3156 /*
3157 * Its possible the remainder of the above division also crosses
3158 * a LOAD_FREQ period, the regular check in calc_global_load()
3159 * which comes after this will take care of that.
3160 *
3161 * Consider us being 11 ticks before a cycle completion, and us
3162 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
3163 * age us 4 cycles, and the test in calc_global_load() will
3164 * pick up the final one.
3165 */
3166}
3043#else 3167#else
3044static void calc_load_account_idle(struct rq *this_rq) 3168static void calc_load_account_idle(struct rq *this_rq)
3045{ 3169{
@@ -3049,6 +3173,10 @@ static inline long calc_load_fold_idle(void)
3049{ 3173{
3050 return 0; 3174 return 0;
3051} 3175}
3176
3177static void calc_global_nohz(unsigned long ticks)
3178{
3179}
3052#endif 3180#endif
3053 3181
3054/** 3182/**
@@ -3066,24 +3194,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
3066 loads[2] = (avenrun[2] + offset) << shift; 3194 loads[2] = (avenrun[2] + offset) << shift;
3067} 3195}
3068 3196
3069static unsigned long
3070calc_load(unsigned long load, unsigned long exp, unsigned long active)
3071{
3072 load *= exp;
3073 load += active * (FIXED_1 - exp);
3074 return load >> FSHIFT;
3075}
3076
3077/* 3197/*
3078 * calc_load - update the avenrun load estimates 10 ticks after the 3198 * calc_load - update the avenrun load estimates 10 ticks after the
3079 * CPUs have updated calc_load_tasks. 3199 * CPUs have updated calc_load_tasks.
3080 */ 3200 */
3081void calc_global_load(void) 3201void calc_global_load(unsigned long ticks)
3082{ 3202{
3083 unsigned long upd = calc_load_update + 10;
3084 long active; 3203 long active;
3085 3204
3086 if (time_before(jiffies, upd)) 3205 calc_global_nohz(ticks);
3206
3207 if (time_before(jiffies, calc_load_update + 10))
3087 return; 3208 return;
3088 3209
3089 active = atomic_long_read(&calc_load_tasks); 3210 active = atomic_long_read(&calc_load_tasks);
@@ -3745,7 +3866,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)
3745{ 3866{
3746 if (prev->se.on_rq) 3867 if (prev->se.on_rq)
3747 update_rq_clock(rq); 3868 update_rq_clock(rq);
3748 rq->skip_clock_update = 0;
3749 prev->sched_class->put_prev_task(rq, prev); 3869 prev->sched_class->put_prev_task(rq, prev);
3750} 3870}
3751 3871
@@ -3821,7 +3941,6 @@ need_resched_nonpreemptible:
3821 hrtick_clear(rq); 3941 hrtick_clear(rq);
3822 3942
3823 raw_spin_lock_irq(&rq->lock); 3943 raw_spin_lock_irq(&rq->lock);
3824 clear_tsk_need_resched(prev);
3825 3944
3826 switch_count = &prev->nivcsw; 3945 switch_count = &prev->nivcsw;
3827 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 3946 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@ -3853,6 +3972,8 @@ need_resched_nonpreemptible:
3853 3972
3854 put_prev_task(rq, prev); 3973 put_prev_task(rq, prev);
3855 next = pick_next_task(rq); 3974 next = pick_next_task(rq);
3975 clear_tsk_need_resched(prev);
3976 rq->skip_clock_update = 0;
3856 3977
3857 if (likely(prev != next)) { 3978 if (likely(prev != next)) {
3858 sched_info_switch(prev, next); 3979 sched_info_switch(prev, next);
@@ -5439,7 +5560,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5439 idle->se.exec_start = sched_clock(); 5560 idle->se.exec_start = sched_clock();
5440 5561
5441 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); 5562 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
5563 /*
5564 * We're having a chicken and egg problem, even though we are
5565 * holding rq->lock, the cpu isn't yet set to this cpu so the
5566 * lockdep check in task_group() will fail.
5567 *
5568 * Similar case to sched_fork(). / Alternatively we could
5569 * use task_rq_lock() here and obtain the other rq->lock.
5570 *
5571 * Silence PROVE_RCU
5572 */
5573 rcu_read_lock();
5442 __set_task_cpu(idle, cpu); 5574 __set_task_cpu(idle, cpu);
5575 rcu_read_unlock();
5443 5576
5444 rq->curr = rq->idle = idle; 5577 rq->curr = rq->idle = idle;
5445#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 5578#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)