aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c274
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c271
-rw-r--r--kernel/sched_idletask.c26
-rw-r--r--kernel/sched_rt.c19
-rw-r--r--kernel/sched_stoptask.c7
-rw-r--r--kernel/softirq.c3
-rw-r--r--kernel/sysctl.c7
-rw-r--r--kernel/time.c23
9 files changed, 469 insertions, 163 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7ba..2effcb71a478 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -324,7 +324,7 @@ struct cfs_rq {
324 * 'curr' points to currently running entity on this cfs_rq. 324 * 'curr' points to currently running entity on this cfs_rq.
325 * It is set to NULL otherwise (i.e when none are currently running). 325 * It is set to NULL otherwise (i.e when none are currently running).
326 */ 326 */
327 struct sched_entity *curr, *next, *last; 327 struct sched_entity *curr, *next, *last, *skip;
328 328
329 unsigned int nr_spread_over; 329 unsigned int nr_spread_over;
330 330
@@ -1686,6 +1686,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1686 __release(rq2->lock); 1686 __release(rq2->lock);
1687} 1687}
1688 1688
1689#else /* CONFIG_SMP */
1690
1691/*
1692 * double_rq_lock - safely lock two runqueues
1693 *
1694 * Note this does not disable interrupts like task_rq_lock,
1695 * you need to do so manually before calling.
1696 */
1697static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1698 __acquires(rq1->lock)
1699 __acquires(rq2->lock)
1700{
1701 BUG_ON(!irqs_disabled());
1702 BUG_ON(rq1 != rq2);
1703 raw_spin_lock(&rq1->lock);
1704 __acquire(rq2->lock); /* Fake it out ;) */
1705}
1706
1707/*
1708 * double_rq_unlock - safely unlock two runqueues
1709 *
1710 * Note this does not restore interrupts like task_rq_unlock,
1711 * you need to do so manually after calling.
1712 */
1713static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1714 __releases(rq1->lock)
1715 __releases(rq2->lock)
1716{
1717 BUG_ON(rq1 != rq2);
1718 raw_spin_unlock(&rq1->lock);
1719 __release(rq2->lock);
1720}
1721
1689#endif 1722#endif
1690 1723
1691static void calc_load_account_idle(struct rq *this_rq); 1724static void calc_load_account_idle(struct rq *this_rq);
@@ -1880,7 +1913,7 @@ void account_system_vtime(struct task_struct *curr)
1880 */ 1913 */
1881 if (hardirq_count()) 1914 if (hardirq_count())
1882 __this_cpu_add(cpu_hardirq_time, delta); 1915 __this_cpu_add(cpu_hardirq_time, delta);
1883 else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) 1916 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
1884 __this_cpu_add(cpu_softirq_time, delta); 1917 __this_cpu_add(cpu_softirq_time, delta);
1885 1918
1886 irq_time_write_end(); 1919 irq_time_write_end();
@@ -1920,8 +1953,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
1920 sched_rt_avg_update(rq, irq_delta); 1953 sched_rt_avg_update(rq, irq_delta);
1921} 1954}
1922 1955
1956static int irqtime_account_hi_update(void)
1957{
1958 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
1959 unsigned long flags;
1960 u64 latest_ns;
1961 int ret = 0;
1962
1963 local_irq_save(flags);
1964 latest_ns = this_cpu_read(cpu_hardirq_time);
1965 if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
1966 ret = 1;
1967 local_irq_restore(flags);
1968 return ret;
1969}
1970
1971static int irqtime_account_si_update(void)
1972{
1973 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
1974 unsigned long flags;
1975 u64 latest_ns;
1976 int ret = 0;
1977
1978 local_irq_save(flags);
1979 latest_ns = this_cpu_read(cpu_softirq_time);
1980 if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
1981 ret = 1;
1982 local_irq_restore(flags);
1983 return ret;
1984}
1985
1923#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 1986#else /* CONFIG_IRQ_TIME_ACCOUNTING */
1924 1987
1988#define sched_clock_irqtime (0)
1989
1925static void update_rq_clock_task(struct rq *rq, s64 delta) 1990static void update_rq_clock_task(struct rq *rq, s64 delta)
1926{ 1991{
1927 rq->clock_task += delta; 1992 rq->clock_task += delta;
@@ -2025,14 +2090,14 @@ inline int task_curr(const struct task_struct *p)
2025 2090
2026static inline void check_class_changed(struct rq *rq, struct task_struct *p, 2091static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2027 const struct sched_class *prev_class, 2092 const struct sched_class *prev_class,
2028 int oldprio, int running) 2093 int oldprio)
2029{ 2094{
2030 if (prev_class != p->sched_class) { 2095 if (prev_class != p->sched_class) {
2031 if (prev_class->switched_from) 2096 if (prev_class->switched_from)
2032 prev_class->switched_from(rq, p, running); 2097 prev_class->switched_from(rq, p);
2033 p->sched_class->switched_to(rq, p, running); 2098 p->sched_class->switched_to(rq, p);
2034 } else 2099 } else if (oldprio != p->prio)
2035 p->sched_class->prio_changed(rq, p, oldprio, running); 2100 p->sched_class->prio_changed(rq, p, oldprio);
2036} 2101}
2037 2102
2038static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) 2103static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
@@ -2566,6 +2631,7 @@ static void __sched_fork(struct task_struct *p)
2566 p->se.sum_exec_runtime = 0; 2631 p->se.sum_exec_runtime = 0;
2567 p->se.prev_sum_exec_runtime = 0; 2632 p->se.prev_sum_exec_runtime = 0;
2568 p->se.nr_migrations = 0; 2633 p->se.nr_migrations = 0;
2634 p->se.vruntime = 0;
2569 2635
2570#ifdef CONFIG_SCHEDSTATS 2636#ifdef CONFIG_SCHEDSTATS
2571 memset(&p->se.statistics, 0, sizeof(p->se.statistics)); 2637 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -3568,6 +3634,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
3568} 3634}
3569 3635
3570/* 3636/*
3637 * Account system cpu time to a process and desired cpustat field
3638 * @p: the process that the cpu time gets accounted to
3639 * @cputime: the cpu time spent in kernel space since the last update
3640 * @cputime_scaled: cputime scaled by cpu frequency
3641 * @target_cputime64: pointer to cpustat field that has to be updated
3642 */
3643static inline
3644void __account_system_time(struct task_struct *p, cputime_t cputime,
3645 cputime_t cputime_scaled, cputime64_t *target_cputime64)
3646{
3647 cputime64_t tmp = cputime_to_cputime64(cputime);
3648
3649 /* Add system time to process. */
3650 p->stime = cputime_add(p->stime, cputime);
3651 p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
3652 account_group_system_time(p, cputime);
3653
3654 /* Add system time to cpustat. */
3655 *target_cputime64 = cputime64_add(*target_cputime64, tmp);
3656 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
3657
3658 /* Account for system time used */
3659 acct_update_integrals(p);
3660}
3661
3662/*
3571 * Account system cpu time to a process. 3663 * Account system cpu time to a process.
3572 * @p: the process that the cpu time gets accounted to 3664 * @p: the process that the cpu time gets accounted to
3573 * @hardirq_offset: the offset to subtract from hardirq_count() 3665 * @hardirq_offset: the offset to subtract from hardirq_count()
@@ -3578,33 +3670,90 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
3578 cputime_t cputime, cputime_t cputime_scaled) 3670 cputime_t cputime, cputime_t cputime_scaled)
3579{ 3671{
3580 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 3672 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3581 cputime64_t tmp; 3673 cputime64_t *target_cputime64;
3582 3674
3583 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 3675 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
3584 account_guest_time(p, cputime, cputime_scaled); 3676 account_guest_time(p, cputime, cputime_scaled);
3585 return; 3677 return;
3586 } 3678 }
3587 3679
3588 /* Add system time to process. */
3589 p->stime = cputime_add(p->stime, cputime);
3590 p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
3591 account_group_system_time(p, cputime);
3592
3593 /* Add system time to cpustat. */
3594 tmp = cputime_to_cputime64(cputime);
3595 if (hardirq_count() - hardirq_offset) 3680 if (hardirq_count() - hardirq_offset)
3596 cpustat->irq = cputime64_add(cpustat->irq, tmp); 3681 target_cputime64 = &cpustat->irq;
3597 else if (in_serving_softirq()) 3682 else if (in_serving_softirq())
3598 cpustat->softirq = cputime64_add(cpustat->softirq, tmp); 3683 target_cputime64 = &cpustat->softirq;
3599 else 3684 else
3600 cpustat->system = cputime64_add(cpustat->system, tmp); 3685 target_cputime64 = &cpustat->system;
3601 3686
3602 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); 3687 __account_system_time(p, cputime, cputime_scaled, target_cputime64);
3688}
3603 3689
3604 /* Account for system time used */ 3690#ifdef CONFIG_IRQ_TIME_ACCOUNTING
3605 acct_update_integrals(p); 3691/*
3692 * Account a tick to a process and cpustat
3693 * @p: the process that the cpu time gets accounted to
3694 * @user_tick: is the tick from userspace
3695 * @rq: the pointer to rq
3696 *
3697 * Tick demultiplexing follows the order
3698 * - pending hardirq update
3699 * - pending softirq update
3700 * - user_time
3701 * - idle_time
3702 * - system time
3703 * - check for guest_time
3704 * - else account as system_time
3705 *
3706 * Check for hardirq is done both for system and user time as there is
3707 * no timer going off while we are on hardirq and hence we may never get an
3708 * opportunity to update it solely in system time.
3709 * p->stime and friends are only updated on system time and not on irq
3710 * softirq as those do not count in task exec_runtime any more.
3711 */
3712static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3713 struct rq *rq)
3714{
3715 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3716 cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
3717 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3718
3719 if (irqtime_account_hi_update()) {
3720 cpustat->irq = cputime64_add(cpustat->irq, tmp);
3721 } else if (irqtime_account_si_update()) {
3722 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
3723 } else if (this_cpu_ksoftirqd() == p) {
3724 /*
3725 * ksoftirqd time do not get accounted in cpu_softirq_time.
3726 * So, we have to handle it separately here.
3727 * Also, p->stime needs to be updated for ksoftirqd.
3728 */
3729 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3730 &cpustat->softirq);
3731 } else if (user_tick) {
3732 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3733 } else if (p == rq->idle) {
3734 account_idle_time(cputime_one_jiffy);
3735 } else if (p->flags & PF_VCPU) { /* System time or guest time */
3736 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
3737 } else {
3738 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3739 &cpustat->system);
3740 }
3606} 3741}
3607 3742
3743static void irqtime_account_idle_ticks(int ticks)
3744{
3745 int i;
3746 struct rq *rq = this_rq();
3747
3748 for (i = 0; i < ticks; i++)
3749 irqtime_account_process_tick(current, 0, rq);
3750}
3751#else
3752static void irqtime_account_idle_ticks(int ticks) {}
3753static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3754 struct rq *rq) {}
3755#endif
3756
3608/* 3757/*
3609 * Account for involuntary wait time. 3758 * Account for involuntary wait time.
3610 * @steal: the cpu time spent in involuntary wait 3759 * @steal: the cpu time spent in involuntary wait
@@ -3645,6 +3794,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
3645 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 3794 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3646 struct rq *rq = this_rq(); 3795 struct rq *rq = this_rq();
3647 3796
3797 if (sched_clock_irqtime) {
3798 irqtime_account_process_tick(p, user_tick, rq);
3799 return;
3800 }
3801
3648 if (user_tick) 3802 if (user_tick)
3649 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 3803 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3650 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 3804 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@ -3670,6 +3824,12 @@ void account_steal_ticks(unsigned long ticks)
3670 */ 3824 */
3671void account_idle_ticks(unsigned long ticks) 3825void account_idle_ticks(unsigned long ticks)
3672{ 3826{
3827
3828 if (sched_clock_irqtime) {
3829 irqtime_account_idle_ticks(ticks);
3830 return;
3831 }
3832
3673 account_idle_time(jiffies_to_cputime(ticks)); 3833 account_idle_time(jiffies_to_cputime(ticks));
3674} 3834}
3675 3835
@@ -4570,11 +4730,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4570 4730
4571 if (running) 4731 if (running)
4572 p->sched_class->set_curr_task(rq); 4732 p->sched_class->set_curr_task(rq);
4573 if (on_rq) { 4733 if (on_rq)
4574 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); 4734 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
4575 4735
4576 check_class_changed(rq, p, prev_class, oldprio, running); 4736 check_class_changed(rq, p, prev_class, oldprio);
4577 }
4578 task_rq_unlock(rq, &flags); 4737 task_rq_unlock(rq, &flags);
4579} 4738}
4580 4739
@@ -4902,11 +5061,10 @@ recheck:
4902 5061
4903 if (running) 5062 if (running)
4904 p->sched_class->set_curr_task(rq); 5063 p->sched_class->set_curr_task(rq);
4905 if (on_rq) { 5064 if (on_rq)
4906 activate_task(rq, p, 0); 5065 activate_task(rq, p, 0);
4907 5066
4908 check_class_changed(rq, p, prev_class, oldprio, running); 5067 check_class_changed(rq, p, prev_class, oldprio);
4909 }
4910 __task_rq_unlock(rq); 5068 __task_rq_unlock(rq);
4911 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 5069 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4912 5070
@@ -5323,6 +5481,58 @@ void __sched yield(void)
5323} 5481}
5324EXPORT_SYMBOL(yield); 5482EXPORT_SYMBOL(yield);
5325 5483
5484/**
5485 * yield_to - yield the current processor to another thread in
5486 * your thread group, or accelerate that thread toward the
5487 * processor it's on.
5488 *
5489 * It's the caller's job to ensure that the target task struct
5490 * can't go away on us before we can do any checks.
5491 *
5492 * Returns true if we indeed boosted the target task.
5493 */
5494bool __sched yield_to(struct task_struct *p, bool preempt)
5495{
5496 struct task_struct *curr = current;
5497 struct rq *rq, *p_rq;
5498 unsigned long flags;
5499 bool yielded = 0;
5500
5501 local_irq_save(flags);
5502 rq = this_rq();
5503
5504again:
5505 p_rq = task_rq(p);
5506 double_rq_lock(rq, p_rq);
5507 while (task_rq(p) != p_rq) {
5508 double_rq_unlock(rq, p_rq);
5509 goto again;
5510 }
5511
5512 if (!curr->sched_class->yield_to_task)
5513 goto out;
5514
5515 if (curr->sched_class != p->sched_class)
5516 goto out;
5517
5518 if (task_running(p_rq, p) || p->state)
5519 goto out;
5520
5521 yielded = curr->sched_class->yield_to_task(rq, p, preempt);
5522 if (yielded)
5523 schedstat_inc(rq, yld_count);
5524
5525out:
5526 double_rq_unlock(rq, p_rq);
5527 local_irq_restore(flags);
5528
5529 if (yielded)
5530 schedule();
5531
5532 return yielded;
5533}
5534EXPORT_SYMBOL_GPL(yield_to);
5535
5326/* 5536/*
5327 * This task is about to go to sleep on IO. Increment rq->nr_iowait so 5537 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
5328 * that process accounting knows that this is a task in IO wait state. 5538 * that process accounting knows that this is a task in IO wait state.
@@ -7796,6 +8006,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
7796 INIT_LIST_HEAD(&cfs_rq->tasks); 8006 INIT_LIST_HEAD(&cfs_rq->tasks);
7797#ifdef CONFIG_FAIR_GROUP_SCHED 8007#ifdef CONFIG_FAIR_GROUP_SCHED
7798 cfs_rq->rq = rq; 8008 cfs_rq->rq = rq;
8009 /* allow initial update_cfs_load() to truncate */
8010#ifdef CONFIG_SMP
8011 cfs_rq->load_stamp = 1;
8012#endif
7799#endif 8013#endif
7800 cfs_rq->min_vruntime = (u64)(-(1LL << 20)); 8014 cfs_rq->min_vruntime = (u64)(-(1LL << 20));
7801} 8015}
@@ -8109,6 +8323,8 @@ EXPORT_SYMBOL(__might_sleep);
8109#ifdef CONFIG_MAGIC_SYSRQ 8323#ifdef CONFIG_MAGIC_SYSRQ
8110static void normalize_task(struct rq *rq, struct task_struct *p) 8324static void normalize_task(struct rq *rq, struct task_struct *p)
8111{ 8325{
8326 const struct sched_class *prev_class = p->sched_class;
8327 int old_prio = p->prio;
8112 int on_rq; 8328 int on_rq;
8113 8329
8114 on_rq = p->se.on_rq; 8330 on_rq = p->se.on_rq;
@@ -8119,6 +8335,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
8119 activate_task(rq, p, 0); 8335 activate_task(rq, p, 0);
8120 resched_task(rq->curr); 8336 resched_task(rq->curr);
8121 } 8337 }
8338
8339 check_class_changed(rq, p, prev_class, old_prio);
8122} 8340}
8123 8341
8124void normalize_rt_tasks(void) 8342void normalize_rt_tasks(void)
@@ -8510,7 +8728,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8510 /* Propagate contribution to hierarchy */ 8728 /* Propagate contribution to hierarchy */
8511 raw_spin_lock_irqsave(&rq->lock, flags); 8729 raw_spin_lock_irqsave(&rq->lock, flags);
8512 for_each_sched_entity(se) 8730 for_each_sched_entity(se)
8513 update_cfs_shares(group_cfs_rq(se), 0); 8731 update_cfs_shares(group_cfs_rq(se));
8514 raw_spin_unlock_irqrestore(&rq->lock, flags); 8732 raw_spin_unlock_irqrestore(&rq->lock, flags);
8515 } 8733 }
8516 8734
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index eb6cb8edd075..7bacd83a4158 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -179,7 +179,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
179 179
180 raw_spin_lock_irqsave(&rq->lock, flags); 180 raw_spin_lock_irqsave(&rq->lock, flags);
181 if (cfs_rq->rb_leftmost) 181 if (cfs_rq->rb_leftmost)
182 MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; 182 MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
183 last = __pick_last_entity(cfs_rq); 183 last = __pick_last_entity(cfs_rq);
184 if (last) 184 if (last)
185 max_vruntime = last->vruntime; 185 max_vruntime = last->vruntime;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0c26e2df450e..027024694043 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8;
69unsigned int sysctl_sched_child_runs_first __read_mostly; 69unsigned int sysctl_sched_child_runs_first __read_mostly;
70 70
71/* 71/*
72 * sys_sched_yield() compat mode
73 *
74 * This option switches the agressive yield implementation of the
75 * old scheduler back on.
76 */
77unsigned int __read_mostly sysctl_sched_compat_yield;
78
79/*
80 * SCHED_OTHER wake-up granularity. 72 * SCHED_OTHER wake-up granularity.
81 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) 73 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
82 * 74 *
@@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
419 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 411 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
420} 412}
421 413
422static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) 414static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
423{ 415{
424 struct rb_node *left = cfs_rq->rb_leftmost; 416 struct rb_node *left = cfs_rq->rb_leftmost;
425 417
@@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
429 return rb_entry(left, struct sched_entity, run_node); 421 return rb_entry(left, struct sched_entity, run_node);
430} 422}
431 423
424static struct sched_entity *__pick_next_entity(struct sched_entity *se)
425{
426 struct rb_node *next = rb_next(&se->run_node);
427
428 if (!next)
429 return NULL;
430
431 return rb_entry(next, struct sched_entity, run_node);
432}
433
434#ifdef CONFIG_SCHED_DEBUG
432static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) 435static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
433{ 436{
434 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); 437 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
@@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
443 * Scheduling class statistics methods: 446 * Scheduling class statistics methods:
444 */ 447 */
445 448
446#ifdef CONFIG_SCHED_DEBUG
447int sched_proc_update_handler(struct ctl_table *table, int write, 449int sched_proc_update_handler(struct ctl_table *table, int write,
448 void __user *buffer, size_t *lenp, 450 void __user *buffer, size_t *lenp,
449 loff_t *ppos) 451 loff_t *ppos)
@@ -540,7 +542,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
540} 542}
541 543
542static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); 544static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
543static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); 545static void update_cfs_shares(struct cfs_rq *cfs_rq);
544 546
545/* 547/*
546 * Update the current task's runtime statistics. Skip current tasks that 548 * Update the current task's runtime statistics. Skip current tasks that
@@ -733,6 +735,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
733 now - cfs_rq->load_last > 4 * period) { 735 now - cfs_rq->load_last > 4 * period) {
734 cfs_rq->load_period = 0; 736 cfs_rq->load_period = 0;
735 cfs_rq->load_avg = 0; 737 cfs_rq->load_avg = 0;
738 delta = period - 1;
736 } 739 }
737 740
738 cfs_rq->load_stamp = now; 741 cfs_rq->load_stamp = now;
@@ -763,16 +766,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
763 list_del_leaf_cfs_rq(cfs_rq); 766 list_del_leaf_cfs_rq(cfs_rq);
764} 767}
765 768
766static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, 769static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
767 long weight_delta)
768{ 770{
769 long load_weight, load, shares; 771 long load_weight, load, shares;
770 772
771 load = cfs_rq->load.weight + weight_delta; 773 load = cfs_rq->load.weight;
772 774
773 load_weight = atomic_read(&tg->load_weight); 775 load_weight = atomic_read(&tg->load_weight);
774 load_weight -= cfs_rq->load_contribution;
775 load_weight += load; 776 load_weight += load;
777 load_weight -= cfs_rq->load_contribution;
776 778
777 shares = (tg->shares * load); 779 shares = (tg->shares * load);
778 if (load_weight) 780 if (load_weight)
@@ -790,7 +792,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
790{ 792{
791 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { 793 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
792 update_cfs_load(cfs_rq, 0); 794 update_cfs_load(cfs_rq, 0);
793 update_cfs_shares(cfs_rq, 0); 795 update_cfs_shares(cfs_rq);
794 } 796 }
795} 797}
796# else /* CONFIG_SMP */ 798# else /* CONFIG_SMP */
@@ -798,8 +800,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
798{ 800{
799} 801}
800 802
801static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, 803static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
802 long weight_delta)
803{ 804{
804 return tg->shares; 805 return tg->shares;
805} 806}
@@ -824,7 +825,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
824 account_entity_enqueue(cfs_rq, se); 825 account_entity_enqueue(cfs_rq, se);
825} 826}
826 827
827static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) 828static void update_cfs_shares(struct cfs_rq *cfs_rq)
828{ 829{
829 struct task_group *tg; 830 struct task_group *tg;
830 struct sched_entity *se; 831 struct sched_entity *se;
@@ -838,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
838 if (likely(se->load.weight == tg->shares)) 839 if (likely(se->load.weight == tg->shares))
839 return; 840 return;
840#endif 841#endif
841 shares = calc_cfs_shares(cfs_rq, tg, weight_delta); 842 shares = calc_cfs_shares(cfs_rq, tg);
842 843
843 reweight_entity(cfs_rq_of(se), se, shares); 844 reweight_entity(cfs_rq_of(se), se, shares);
844} 845}
@@ -847,7 +848,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
847{ 848{
848} 849}
849 850
850static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) 851static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
851{ 852{
852} 853}
853 854
@@ -978,8 +979,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
978 */ 979 */
979 update_curr(cfs_rq); 980 update_curr(cfs_rq);
980 update_cfs_load(cfs_rq, 0); 981 update_cfs_load(cfs_rq, 0);
981 update_cfs_shares(cfs_rq, se->load.weight);
982 account_entity_enqueue(cfs_rq, se); 982 account_entity_enqueue(cfs_rq, se);
983 update_cfs_shares(cfs_rq);
983 984
984 if (flags & ENQUEUE_WAKEUP) { 985 if (flags & ENQUEUE_WAKEUP) {
985 place_entity(cfs_rq, se, 0); 986 place_entity(cfs_rq, se, 0);
@@ -996,19 +997,49 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
996 list_add_leaf_cfs_rq(cfs_rq); 997 list_add_leaf_cfs_rq(cfs_rq);
997} 998}
998 999
999static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 1000static void __clear_buddies_last(struct sched_entity *se)
1001{
1002 for_each_sched_entity(se) {
1003 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1004 if (cfs_rq->last == se)
1005 cfs_rq->last = NULL;
1006 else
1007 break;
1008 }
1009}
1010
1011static void __clear_buddies_next(struct sched_entity *se)
1000{ 1012{
1001 if (!se || cfs_rq->last == se) 1013 for_each_sched_entity(se) {
1002 cfs_rq->last = NULL; 1014 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1015 if (cfs_rq->next == se)
1016 cfs_rq->next = NULL;
1017 else
1018 break;
1019 }
1020}
1003 1021
1004 if (!se || cfs_rq->next == se) 1022static void __clear_buddies_skip(struct sched_entity *se)
1005 cfs_rq->next = NULL; 1023{
1024 for_each_sched_entity(se) {
1025 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1026 if (cfs_rq->skip == se)
1027 cfs_rq->skip = NULL;
1028 else
1029 break;
1030 }
1006} 1031}
1007 1032
1008static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 1033static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
1009{ 1034{
1010 for_each_sched_entity(se) 1035 if (cfs_rq->last == se)
1011 __clear_buddies(cfs_rq_of(se), se); 1036 __clear_buddies_last(se);
1037
1038 if (cfs_rq->next == se)
1039 __clear_buddies_next(se);
1040
1041 if (cfs_rq->skip == se)
1042 __clear_buddies_skip(se);
1012} 1043}
1013 1044
1014static void 1045static void
@@ -1041,7 +1072,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1041 update_cfs_load(cfs_rq, 0); 1072 update_cfs_load(cfs_rq, 0);
1042 account_entity_dequeue(cfs_rq, se); 1073 account_entity_dequeue(cfs_rq, se);
1043 update_min_vruntime(cfs_rq); 1074 update_min_vruntime(cfs_rq);
1044 update_cfs_shares(cfs_rq, 0); 1075 update_cfs_shares(cfs_rq);
1045 1076
1046 /* 1077 /*
1047 * Normalize the entity after updating the min_vruntime because the 1078 * Normalize the entity after updating the min_vruntime because the
@@ -1084,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
1084 return; 1115 return;
1085 1116
1086 if (cfs_rq->nr_running > 1) { 1117 if (cfs_rq->nr_running > 1) {
1087 struct sched_entity *se = __pick_next_entity(cfs_rq); 1118 struct sched_entity *se = __pick_first_entity(cfs_rq);
1088 s64 delta = curr->vruntime - se->vruntime; 1119 s64 delta = curr->vruntime - se->vruntime;
1089 1120
1090 if (delta < 0) 1121 if (delta < 0)
@@ -1128,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
1128static int 1159static int
1129wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); 1160wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
1130 1161
1162/*
1163 * Pick the next process, keeping these things in mind, in this order:
1164 * 1) keep things fair between processes/task groups
1165 * 2) pick the "next" process, since someone really wants that to run
1166 * 3) pick the "last" process, for cache locality
1167 * 4) do not run the "skip" process, if something else is available
1168 */
1131static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 1169static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
1132{ 1170{
1133 struct sched_entity *se = __pick_next_entity(cfs_rq); 1171 struct sched_entity *se = __pick_first_entity(cfs_rq);
1134 struct sched_entity *left = se; 1172 struct sched_entity *left = se;
1135 1173
1136 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) 1174 /*
1137 se = cfs_rq->next; 1175 * Avoid running the skip buddy, if running something else can
1176 * be done without getting too unfair.
1177 */
1178 if (cfs_rq->skip == se) {
1179 struct sched_entity *second = __pick_next_entity(se);
1180 if (second && wakeup_preempt_entity(second, left) < 1)
1181 se = second;
1182 }
1138 1183
1139 /* 1184 /*
1140 * Prefer last buddy, try to return the CPU to a preempted task. 1185 * Prefer last buddy, try to return the CPU to a preempted task.
@@ -1142,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
1142 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) 1187 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
1143 se = cfs_rq->last; 1188 se = cfs_rq->last;
1144 1189
1190 /*
1191 * Someone really wants this to run. If it's not unfair, run it.
1192 */
1193 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
1194 se = cfs_rq->next;
1195
1145 clear_buddies(cfs_rq, se); 1196 clear_buddies(cfs_rq, se);
1146 1197
1147 return se; 1198 return se;
@@ -1282,7 +1333,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1282 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1333 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1283 1334
1284 update_cfs_load(cfs_rq, 0); 1335 update_cfs_load(cfs_rq, 0);
1285 update_cfs_shares(cfs_rq, 0); 1336 update_cfs_shares(cfs_rq);
1286 } 1337 }
1287 1338
1288 hrtick_update(rq); 1339 hrtick_update(rq);
@@ -1312,58 +1363,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1312 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1363 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1313 1364
1314 update_cfs_load(cfs_rq, 0); 1365 update_cfs_load(cfs_rq, 0);
1315 update_cfs_shares(cfs_rq, 0); 1366 update_cfs_shares(cfs_rq);
1316 } 1367 }
1317 1368
1318 hrtick_update(rq); 1369 hrtick_update(rq);
1319} 1370}
1320 1371
1321/*
1322 * sched_yield() support is very simple - we dequeue and enqueue.
1323 *
1324 * If compat_yield is turned on then we requeue to the end of the tree.
1325 */
1326static void yield_task_fair(struct rq *rq)
1327{
1328 struct task_struct *curr = rq->curr;
1329 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1330 struct sched_entity *rightmost, *se = &curr->se;
1331
1332 /*
1333 * Are we the only task in the tree?
1334 */
1335 if (unlikely(cfs_rq->nr_running == 1))
1336 return;
1337
1338 clear_buddies(cfs_rq, se);
1339
1340 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
1341 update_rq_clock(rq);
1342 /*
1343 * Update run-time statistics of the 'current'.
1344 */
1345 update_curr(cfs_rq);
1346
1347 return;
1348 }
1349 /*
1350 * Find the rightmost entry in the rbtree:
1351 */
1352 rightmost = __pick_last_entity(cfs_rq);
1353 /*
1354 * Already in the rightmost position?
1355 */
1356 if (unlikely(!rightmost || entity_before(rightmost, se)))
1357 return;
1358
1359 /*
1360 * Minimally necessary key value to be last in the tree:
1361 * Upon rescheduling, sched_class::put_prev_task() will place
1362 * 'current' within the tree based on its new key value.
1363 */
1364 se->vruntime = rightmost->vruntime + 1;
1365}
1366
1367#ifdef CONFIG_SMP 1372#ifdef CONFIG_SMP
1368 1373
1369static void task_waking_fair(struct rq *rq, struct task_struct *p) 1374static void task_waking_fair(struct rq *rq, struct task_struct *p)
@@ -1834,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se)
1834 } 1839 }
1835} 1840}
1836 1841
1842static void set_skip_buddy(struct sched_entity *se)
1843{
1844 if (likely(task_of(se)->policy != SCHED_IDLE)) {
1845 for_each_sched_entity(se)
1846 cfs_rq_of(se)->skip = se;
1847 }
1848}
1849
1837/* 1850/*
1838 * Preempt the current task with a newly woken task if needed: 1851 * Preempt the current task with a newly woken task if needed:
1839 */ 1852 */
@@ -1932,6 +1945,55 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
1932 } 1945 }
1933} 1946}
1934 1947
1948/*
1949 * sched_yield() is very simple
1950 *
1951 * The magic of dealing with the ->skip buddy is in pick_next_entity.
1952 */
1953static void yield_task_fair(struct rq *rq)
1954{
1955 struct task_struct *curr = rq->curr;
1956 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1957 struct sched_entity *se = &curr->se;
1958
1959 /*
1960 * Are we the only task in the tree?
1961 */
1962 if (unlikely(rq->nr_running == 1))
1963 return;
1964
1965 clear_buddies(cfs_rq, se);
1966
1967 if (curr->policy != SCHED_BATCH) {
1968 update_rq_clock(rq);
1969 /*
1970 * Update run-time statistics of the 'current'.
1971 */
1972 update_curr(cfs_rq);
1973 }
1974
1975 set_skip_buddy(se);
1976}
1977
1978static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt)
1979{
1980 struct sched_entity *se = &p->se;
1981
1982 if (!se->on_rq)
1983 return false;
1984
1985 /* Tell the scheduler that we'd really like pse to run next. */
1986 set_next_buddy(se);
1987
1988 /* Make p's CPU reschedule; pick_next_entity takes care of fairness. */
1989 if (preempt)
1990 resched_task(rq->curr);
1991
1992 yield_task_fair(rq);
1993
1994 return true;
1995}
1996
1935#ifdef CONFIG_SMP 1997#ifdef CONFIG_SMP
1936/************************************************** 1998/**************************************************
1937 * Fair scheduling class load-balancing methods: 1999 * Fair scheduling class load-balancing methods:
@@ -2123,7 +2185,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
2123 * We need to update shares after updating tg->load_weight in 2185 * We need to update shares after updating tg->load_weight in
2124 * order to adjust the weight of groups with long running tasks. 2186 * order to adjust the weight of groups with long running tasks.
2125 */ 2187 */
2126 update_cfs_shares(cfs_rq, 0); 2188 update_cfs_shares(cfs_rq);
2127 2189
2128 raw_spin_unlock_irqrestore(&rq->lock, flags); 2190 raw_spin_unlock_irqrestore(&rq->lock, flags);
2129 2191
@@ -4079,33 +4141,62 @@ static void task_fork_fair(struct task_struct *p)
4079 * Priority of the task has changed. Check to see if we preempt 4141 * Priority of the task has changed. Check to see if we preempt
4080 * the current task. 4142 * the current task.
4081 */ 4143 */
4082static void prio_changed_fair(struct rq *rq, struct task_struct *p, 4144static void
4083 int oldprio, int running) 4145prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
4084{ 4146{
4147 if (!p->se.on_rq)
4148 return;
4149
4085 /* 4150 /*
4086 * Reschedule if we are currently running on this runqueue and 4151 * Reschedule if we are currently running on this runqueue and
4087 * our priority decreased, or if we are not currently running on 4152 * our priority decreased, or if we are not currently running on
4088 * this runqueue and our priority is higher than the current's 4153 * this runqueue and our priority is higher than the current's
4089 */ 4154 */
4090 if (running) { 4155 if (rq->curr == p) {
4091 if (p->prio > oldprio) 4156 if (p->prio > oldprio)
4092 resched_task(rq->curr); 4157 resched_task(rq->curr);
4093 } else 4158 } else
4094 check_preempt_curr(rq, p, 0); 4159 check_preempt_curr(rq, p, 0);
4095} 4160}
4096 4161
4162static void switched_from_fair(struct rq *rq, struct task_struct *p)
4163{
4164 struct sched_entity *se = &p->se;
4165 struct cfs_rq *cfs_rq = cfs_rq_of(se);
4166
4167 /*
4168 * Ensure the task's vruntime is normalized, so that when its
4169 * switched back to the fair class the enqueue_entity(.flags=0) will
4170 * do the right thing.
4171 *
4172 * If it was on_rq, then the dequeue_entity(.flags=0) will already
4173 * have normalized the vruntime, if it was !on_rq, then only when
4174 * the task is sleeping will it still have non-normalized vruntime.
4175 */
4176 if (!se->on_rq && p->state != TASK_RUNNING) {
4177 /*
4178 * Fix up our vruntime so that the current sleep doesn't
4179 * cause 'unlimited' sleep bonus.
4180 */
4181 place_entity(cfs_rq, se, 0);
4182 se->vruntime -= cfs_rq->min_vruntime;
4183 }
4184}
4185
4097/* 4186/*
4098 * We switched to the sched_fair class. 4187 * We switched to the sched_fair class.
4099 */ 4188 */
4100static void switched_to_fair(struct rq *rq, struct task_struct *p, 4189static void switched_to_fair(struct rq *rq, struct task_struct *p)
4101 int running)
4102{ 4190{
4191 if (!p->se.on_rq)
4192 return;
4193
4103 /* 4194 /*
4104 * We were most likely switched from sched_rt, so 4195 * We were most likely switched from sched_rt, so
4105 * kick off the schedule if running, otherwise just see 4196 * kick off the schedule if running, otherwise just see
4106 * if we can still preempt the current task. 4197 * if we can still preempt the current task.
4107 */ 4198 */
4108 if (running) 4199 if (rq->curr == p)
4109 resched_task(rq->curr); 4200 resched_task(rq->curr);
4110 else 4201 else
4111 check_preempt_curr(rq, p, 0); 4202 check_preempt_curr(rq, p, 0);
@@ -4171,6 +4262,7 @@ static const struct sched_class fair_sched_class = {
4171 .enqueue_task = enqueue_task_fair, 4262 .enqueue_task = enqueue_task_fair,
4172 .dequeue_task = dequeue_task_fair, 4263 .dequeue_task = dequeue_task_fair,
4173 .yield_task = yield_task_fair, 4264 .yield_task = yield_task_fair,
4265 .yield_to_task = yield_to_task_fair,
4174 4266
4175 .check_preempt_curr = check_preempt_wakeup, 4267 .check_preempt_curr = check_preempt_wakeup,
4176 4268
@@ -4191,6 +4283,7 @@ static const struct sched_class fair_sched_class = {
4191 .task_fork = task_fork_fair, 4283 .task_fork = task_fork_fair,
4192 4284
4193 .prio_changed = prio_changed_fair, 4285 .prio_changed = prio_changed_fair,
4286 .switched_from = switched_from_fair,
4194 .switched_to = switched_to_fair, 4287 .switched_to = switched_to_fair,
4195 4288
4196 .get_rr_interval = get_rr_interval_fair, 4289 .get_rr_interval = get_rr_interval_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 9fa0f402c87c..c82f26c1b7c3 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -52,31 +52,15 @@ static void set_curr_task_idle(struct rq *rq)
52{ 52{
53} 53}
54 54
55static void switched_to_idle(struct rq *rq, struct task_struct *p, 55static void switched_to_idle(struct rq *rq, struct task_struct *p)
56 int running)
57{ 56{
58 /* Can this actually happen?? */ 57 BUG();
59 if (running)
60 resched_task(rq->curr);
61 else
62 check_preempt_curr(rq, p, 0);
63} 58}
64 59
65static void prio_changed_idle(struct rq *rq, struct task_struct *p, 60static void
66 int oldprio, int running) 61prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
67{ 62{
68 /* This can happen for hot plug CPUS */ 63 BUG();
69
70 /*
71 * Reschedule if we are currently running on this runqueue and
72 * our priority decreased, or if we are not currently running on
73 * this runqueue and our priority is higher than the current's
74 */
75 if (running) {
76 if (p->prio > oldprio)
77 resched_task(rq->curr);
78 } else
79 check_preempt_curr(rq, p, 0);
80} 64}
81 65
82static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) 66static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index ad6267714c84..4e108f8ecb6a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1595,8 +1595,7 @@ static void rq_offline_rt(struct rq *rq)
1595 * When switch from the rt queue, we bring ourselves to a position 1595 * When switch from the rt queue, we bring ourselves to a position
1596 * that we might want to pull RT tasks from other runqueues. 1596 * that we might want to pull RT tasks from other runqueues.
1597 */ 1597 */
1598static void switched_from_rt(struct rq *rq, struct task_struct *p, 1598static void switched_from_rt(struct rq *rq, struct task_struct *p)
1599 int running)
1600{ 1599{
1601 /* 1600 /*
1602 * If there are other RT tasks then we will reschedule 1601 * If there are other RT tasks then we will reschedule
@@ -1605,7 +1604,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
1605 * we may need to handle the pulling of RT tasks 1604 * we may need to handle the pulling of RT tasks
1606 * now. 1605 * now.
1607 */ 1606 */
1608 if (!rq->rt.rt_nr_running) 1607 if (p->se.on_rq && !rq->rt.rt_nr_running)
1609 pull_rt_task(rq); 1608 pull_rt_task(rq);
1610} 1609}
1611 1610
@@ -1624,8 +1623,7 @@ static inline void init_sched_rt_class(void)
1624 * with RT tasks. In this case we try to push them off to 1623 * with RT tasks. In this case we try to push them off to
1625 * other runqueues. 1624 * other runqueues.
1626 */ 1625 */
1627static void switched_to_rt(struct rq *rq, struct task_struct *p, 1626static void switched_to_rt(struct rq *rq, struct task_struct *p)
1628 int running)
1629{ 1627{
1630 int check_resched = 1; 1628 int check_resched = 1;
1631 1629
@@ -1636,7 +1634,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
1636 * If that current running task is also an RT task 1634 * If that current running task is also an RT task
1637 * then see if we can move to another run queue. 1635 * then see if we can move to another run queue.
1638 */ 1636 */
1639 if (!running) { 1637 if (p->se.on_rq && rq->curr != p) {
1640#ifdef CONFIG_SMP 1638#ifdef CONFIG_SMP
1641 if (rq->rt.overloaded && push_rt_task(rq) && 1639 if (rq->rt.overloaded && push_rt_task(rq) &&
1642 /* Don't resched if we changed runqueues */ 1640 /* Don't resched if we changed runqueues */
@@ -1652,10 +1650,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
1652 * Priority of the task has changed. This may cause 1650 * Priority of the task has changed. This may cause
1653 * us to initiate a push or pull. 1651 * us to initiate a push or pull.
1654 */ 1652 */
1655static void prio_changed_rt(struct rq *rq, struct task_struct *p, 1653static void
1656 int oldprio, int running) 1654prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
1657{ 1655{
1658 if (running) { 1656 if (!p->se.on_rq)
1657 return;
1658
1659 if (rq->curr == p) {
1659#ifdef CONFIG_SMP 1660#ifdef CONFIG_SMP
1660 /* 1661 /*
1661 * If our priority decreases while running, we 1662 * If our priority decreases while running, we
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 2bf6b47058c1..84ec9bcf82d9 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -59,14 +59,13 @@ static void set_curr_task_stop(struct rq *rq)
59{ 59{
60} 60}
61 61
62static void switched_to_stop(struct rq *rq, struct task_struct *p, 62static void switched_to_stop(struct rq *rq, struct task_struct *p)
63 int running)
64{ 63{
65 BUG(); /* its impossible to change to this class */ 64 BUG(); /* its impossible to change to this class */
66} 65}
67 66
68static void prio_changed_stop(struct rq *rq, struct task_struct *p, 67static void
69 int oldprio, int running) 68prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio)
70{ 69{
71 BUG(); /* how!?, what priority? */ 70 BUG(); /* how!?, what priority? */
72} 71}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 68eb5efec388..0cee50487629 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_stat);
54 54
55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; 55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
56 56
57static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); 57DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58 58
59char *softirq_to_name[NR_SOFTIRQS] = { 59char *softirq_to_name[NR_SOFTIRQS] = {
60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", 60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -721,7 +721,6 @@ static int run_ksoftirqd(void * __bind_cpu)
721{ 721{
722 set_current_state(TASK_INTERRUPTIBLE); 722 set_current_state(TASK_INTERRUPTIBLE);
723 723
724 current->flags |= PF_KSOFTIRQD;
725 while (!kthread_should_stop()) { 724 while (!kthread_should_stop()) {
726 preempt_disable(); 725 preempt_disable();
727 if (!local_softirq_pending()) { 726 if (!local_softirq_pending()) {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0f1bd83db985..cb7c830f7faa 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -361,13 +361,6 @@ static struct ctl_table kern_table[] = {
361 .mode = 0644, 361 .mode = 0644,
362 .proc_handler = sched_rt_handler, 362 .proc_handler = sched_rt_handler,
363 }, 363 },
364 {
365 .procname = "sched_compat_yield",
366 .data = &sysctl_sched_compat_yield,
367 .maxlen = sizeof(unsigned int),
368 .mode = 0644,
369 .proc_handler = proc_dointvec,
370 },
371#ifdef CONFIG_SCHED_AUTOGROUP 364#ifdef CONFIG_SCHED_AUTOGROUP
372 { 365 {
373 .procname = "sched_autogroup_enabled", 366 .procname = "sched_autogroup_enabled",
diff --git a/kernel/time.c b/kernel/time.c
index 32174359576f..55337a816b20 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -645,7 +645,7 @@ u64 nsec_to_clock_t(u64 x)
645} 645}
646 646
647/** 647/**
648 * nsecs_to_jiffies - Convert nsecs in u64 to jiffies 648 * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
649 * 649 *
650 * @n: nsecs in u64 650 * @n: nsecs in u64
651 * 651 *
@@ -657,7 +657,7 @@ u64 nsec_to_clock_t(u64 x)
657 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) 657 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
658 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years 658 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
659 */ 659 */
660unsigned long nsecs_to_jiffies(u64 n) 660u64 nsecs_to_jiffies64(u64 n)
661{ 661{
662#if (NSEC_PER_SEC % HZ) == 0 662#if (NSEC_PER_SEC % HZ) == 0
663 /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ 663 /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
@@ -674,6 +674,25 @@ unsigned long nsecs_to_jiffies(u64 n)
674#endif 674#endif
675} 675}
676 676
677
678/**
679 * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
680 *
681 * @n: nsecs in u64
682 *
683 * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
684 * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
685 * for scheduler, not for use in device drivers to calculate timeout value.
686 *
687 * note:
688 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
689 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
690 */
691unsigned long nsecs_to_jiffies(u64 n)
692{
693 return (unsigned long)nsecs_to_jiffies64(n);
694}
695
677#if (BITS_PER_LONG < 64) 696#if (BITS_PER_LONG < 64)
678u64 get_jiffies_64(void) 697u64 get_jiffies_64(void)
679{ 698{