diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 274 | ||||
-rw-r--r-- | kernel/sched_debug.c | 2 | ||||
-rw-r--r-- | kernel/sched_fair.c | 271 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 26 | ||||
-rw-r--r-- | kernel/sched_rt.c | 19 | ||||
-rw-r--r-- | kernel/sched_stoptask.c | 7 | ||||
-rw-r--r-- | kernel/softirq.c | 3 | ||||
-rw-r--r-- | kernel/sysctl.c | 7 | ||||
-rw-r--r-- | kernel/time.c | 23 |
9 files changed, 469 insertions, 163 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4ec7ba..2effcb71a478 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -324,7 +324,7 @@ struct cfs_rq { | |||
324 | * 'curr' points to currently running entity on this cfs_rq. | 324 | * 'curr' points to currently running entity on this cfs_rq. |
325 | * It is set to NULL otherwise (i.e when none are currently running). | 325 | * It is set to NULL otherwise (i.e when none are currently running). |
326 | */ | 326 | */ |
327 | struct sched_entity *curr, *next, *last; | 327 | struct sched_entity *curr, *next, *last, *skip; |
328 | 328 | ||
329 | unsigned int nr_spread_over; | 329 | unsigned int nr_spread_over; |
330 | 330 | ||
@@ -1686,6 +1686,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
1686 | __release(rq2->lock); | 1686 | __release(rq2->lock); |
1687 | } | 1687 | } |
1688 | 1688 | ||
1689 | #else /* CONFIG_SMP */ | ||
1690 | |||
1691 | /* | ||
1692 | * double_rq_lock - safely lock two runqueues | ||
1693 | * | ||
1694 | * Note this does not disable interrupts like task_rq_lock, | ||
1695 | * you need to do so manually before calling. | ||
1696 | */ | ||
1697 | static void double_rq_lock(struct rq *rq1, struct rq *rq2) | ||
1698 | __acquires(rq1->lock) | ||
1699 | __acquires(rq2->lock) | ||
1700 | { | ||
1701 | BUG_ON(!irqs_disabled()); | ||
1702 | BUG_ON(rq1 != rq2); | ||
1703 | raw_spin_lock(&rq1->lock); | ||
1704 | __acquire(rq2->lock); /* Fake it out ;) */ | ||
1705 | } | ||
1706 | |||
1707 | /* | ||
1708 | * double_rq_unlock - safely unlock two runqueues | ||
1709 | * | ||
1710 | * Note this does not restore interrupts like task_rq_unlock, | ||
1711 | * you need to do so manually after calling. | ||
1712 | */ | ||
1713 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | ||
1714 | __releases(rq1->lock) | ||
1715 | __releases(rq2->lock) | ||
1716 | { | ||
1717 | BUG_ON(rq1 != rq2); | ||
1718 | raw_spin_unlock(&rq1->lock); | ||
1719 | __release(rq2->lock); | ||
1720 | } | ||
1721 | |||
1689 | #endif | 1722 | #endif |
1690 | 1723 | ||
1691 | static void calc_load_account_idle(struct rq *this_rq); | 1724 | static void calc_load_account_idle(struct rq *this_rq); |
@@ -1880,7 +1913,7 @@ void account_system_vtime(struct task_struct *curr) | |||
1880 | */ | 1913 | */ |
1881 | if (hardirq_count()) | 1914 | if (hardirq_count()) |
1882 | __this_cpu_add(cpu_hardirq_time, delta); | 1915 | __this_cpu_add(cpu_hardirq_time, delta); |
1883 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 1916 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) |
1884 | __this_cpu_add(cpu_softirq_time, delta); | 1917 | __this_cpu_add(cpu_softirq_time, delta); |
1885 | 1918 | ||
1886 | irq_time_write_end(); | 1919 | irq_time_write_end(); |
@@ -1920,8 +1953,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
1920 | sched_rt_avg_update(rq, irq_delta); | 1953 | sched_rt_avg_update(rq, irq_delta); |
1921 | } | 1954 | } |
1922 | 1955 | ||
1956 | static int irqtime_account_hi_update(void) | ||
1957 | { | ||
1958 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
1959 | unsigned long flags; | ||
1960 | u64 latest_ns; | ||
1961 | int ret = 0; | ||
1962 | |||
1963 | local_irq_save(flags); | ||
1964 | latest_ns = this_cpu_read(cpu_hardirq_time); | ||
1965 | if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq)) | ||
1966 | ret = 1; | ||
1967 | local_irq_restore(flags); | ||
1968 | return ret; | ||
1969 | } | ||
1970 | |||
1971 | static int irqtime_account_si_update(void) | ||
1972 | { | ||
1973 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
1974 | unsigned long flags; | ||
1975 | u64 latest_ns; | ||
1976 | int ret = 0; | ||
1977 | |||
1978 | local_irq_save(flags); | ||
1979 | latest_ns = this_cpu_read(cpu_softirq_time); | ||
1980 | if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq)) | ||
1981 | ret = 1; | ||
1982 | local_irq_restore(flags); | ||
1983 | return ret; | ||
1984 | } | ||
1985 | |||
1923 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 1986 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
1924 | 1987 | ||
1988 | #define sched_clock_irqtime (0) | ||
1989 | |||
1925 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 1990 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
1926 | { | 1991 | { |
1927 | rq->clock_task += delta; | 1992 | rq->clock_task += delta; |
@@ -2025,14 +2090,14 @@ inline int task_curr(const struct task_struct *p) | |||
2025 | 2090 | ||
2026 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 2091 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
2027 | const struct sched_class *prev_class, | 2092 | const struct sched_class *prev_class, |
2028 | int oldprio, int running) | 2093 | int oldprio) |
2029 | { | 2094 | { |
2030 | if (prev_class != p->sched_class) { | 2095 | if (prev_class != p->sched_class) { |
2031 | if (prev_class->switched_from) | 2096 | if (prev_class->switched_from) |
2032 | prev_class->switched_from(rq, p, running); | 2097 | prev_class->switched_from(rq, p); |
2033 | p->sched_class->switched_to(rq, p, running); | 2098 | p->sched_class->switched_to(rq, p); |
2034 | } else | 2099 | } else if (oldprio != p->prio) |
2035 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2100 | p->sched_class->prio_changed(rq, p, oldprio); |
2036 | } | 2101 | } |
2037 | 2102 | ||
2038 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | 2103 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) |
@@ -2566,6 +2631,7 @@ static void __sched_fork(struct task_struct *p) | |||
2566 | p->se.sum_exec_runtime = 0; | 2631 | p->se.sum_exec_runtime = 0; |
2567 | p->se.prev_sum_exec_runtime = 0; | 2632 | p->se.prev_sum_exec_runtime = 0; |
2568 | p->se.nr_migrations = 0; | 2633 | p->se.nr_migrations = 0; |
2634 | p->se.vruntime = 0; | ||
2569 | 2635 | ||
2570 | #ifdef CONFIG_SCHEDSTATS | 2636 | #ifdef CONFIG_SCHEDSTATS |
2571 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 2637 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
@@ -3568,6 +3634,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
3568 | } | 3634 | } |
3569 | 3635 | ||
3570 | /* | 3636 | /* |
3637 | * Account system cpu time to a process and desired cpustat field | ||
3638 | * @p: the process that the cpu time gets accounted to | ||
3639 | * @cputime: the cpu time spent in kernel space since the last update | ||
3640 | * @cputime_scaled: cputime scaled by cpu frequency | ||
3641 | * @target_cputime64: pointer to cpustat field that has to be updated | ||
3642 | */ | ||
3643 | static inline | ||
3644 | void __account_system_time(struct task_struct *p, cputime_t cputime, | ||
3645 | cputime_t cputime_scaled, cputime64_t *target_cputime64) | ||
3646 | { | ||
3647 | cputime64_t tmp = cputime_to_cputime64(cputime); | ||
3648 | |||
3649 | /* Add system time to process. */ | ||
3650 | p->stime = cputime_add(p->stime, cputime); | ||
3651 | p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); | ||
3652 | account_group_system_time(p, cputime); | ||
3653 | |||
3654 | /* Add system time to cpustat. */ | ||
3655 | *target_cputime64 = cputime64_add(*target_cputime64, tmp); | ||
3656 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
3657 | |||
3658 | /* Account for system time used */ | ||
3659 | acct_update_integrals(p); | ||
3660 | } | ||
3661 | |||
3662 | /* | ||
3571 | * Account system cpu time to a process. | 3663 | * Account system cpu time to a process. |
3572 | * @p: the process that the cpu time gets accounted to | 3664 | * @p: the process that the cpu time gets accounted to |
3573 | * @hardirq_offset: the offset to subtract from hardirq_count() | 3665 | * @hardirq_offset: the offset to subtract from hardirq_count() |
@@ -3578,33 +3670,90 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
3578 | cputime_t cputime, cputime_t cputime_scaled) | 3670 | cputime_t cputime, cputime_t cputime_scaled) |
3579 | { | 3671 | { |
3580 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 3672 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
3581 | cputime64_t tmp; | 3673 | cputime64_t *target_cputime64; |
3582 | 3674 | ||
3583 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | 3675 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { |
3584 | account_guest_time(p, cputime, cputime_scaled); | 3676 | account_guest_time(p, cputime, cputime_scaled); |
3585 | return; | 3677 | return; |
3586 | } | 3678 | } |
3587 | 3679 | ||
3588 | /* Add system time to process. */ | ||
3589 | p->stime = cputime_add(p->stime, cputime); | ||
3590 | p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); | ||
3591 | account_group_system_time(p, cputime); | ||
3592 | |||
3593 | /* Add system time to cpustat. */ | ||
3594 | tmp = cputime_to_cputime64(cputime); | ||
3595 | if (hardirq_count() - hardirq_offset) | 3680 | if (hardirq_count() - hardirq_offset) |
3596 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3681 | target_cputime64 = &cpustat->irq; |
3597 | else if (in_serving_softirq()) | 3682 | else if (in_serving_softirq()) |
3598 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3683 | target_cputime64 = &cpustat->softirq; |
3599 | else | 3684 | else |
3600 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3685 | target_cputime64 = &cpustat->system; |
3601 | 3686 | ||
3602 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | 3687 | __account_system_time(p, cputime, cputime_scaled, target_cputime64); |
3688 | } | ||
3603 | 3689 | ||
3604 | /* Account for system time used */ | 3690 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
3605 | acct_update_integrals(p); | 3691 | /* |
3692 | * Account a tick to a process and cpustat | ||
3693 | * @p: the process that the cpu time gets accounted to | ||
3694 | * @user_tick: is the tick from userspace | ||
3695 | * @rq: the pointer to rq | ||
3696 | * | ||
3697 | * Tick demultiplexing follows the order | ||
3698 | * - pending hardirq update | ||
3699 | * - pending softirq update | ||
3700 | * - user_time | ||
3701 | * - idle_time | ||
3702 | * - system time | ||
3703 | * - check for guest_time | ||
3704 | * - else account as system_time | ||
3705 | * | ||
3706 | * Check for hardirq is done both for system and user time as there is | ||
3707 | * no timer going off while we are on hardirq and hence we may never get an | ||
3708 | * opportunity to update it solely in system time. | ||
3709 | * p->stime and friends are only updated on system time and not on irq | ||
3710 | * softirq as those do not count in task exec_runtime any more. | ||
3711 | */ | ||
3712 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
3713 | struct rq *rq) | ||
3714 | { | ||
3715 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
3716 | cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy); | ||
3717 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
3718 | |||
3719 | if (irqtime_account_hi_update()) { | ||
3720 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | ||
3721 | } else if (irqtime_account_si_update()) { | ||
3722 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | ||
3723 | } else if (this_cpu_ksoftirqd() == p) { | ||
3724 | /* | ||
3725 | * ksoftirqd time do not get accounted in cpu_softirq_time. | ||
3726 | * So, we have to handle it separately here. | ||
3727 | * Also, p->stime needs to be updated for ksoftirqd. | ||
3728 | */ | ||
3729 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
3730 | &cpustat->softirq); | ||
3731 | } else if (user_tick) { | ||
3732 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
3733 | } else if (p == rq->idle) { | ||
3734 | account_idle_time(cputime_one_jiffy); | ||
3735 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | ||
3736 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
3737 | } else { | ||
3738 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
3739 | &cpustat->system); | ||
3740 | } | ||
3606 | } | 3741 | } |
3607 | 3742 | ||
3743 | static void irqtime_account_idle_ticks(int ticks) | ||
3744 | { | ||
3745 | int i; | ||
3746 | struct rq *rq = this_rq(); | ||
3747 | |||
3748 | for (i = 0; i < ticks; i++) | ||
3749 | irqtime_account_process_tick(current, 0, rq); | ||
3750 | } | ||
3751 | #else | ||
3752 | static void irqtime_account_idle_ticks(int ticks) {} | ||
3753 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
3754 | struct rq *rq) {} | ||
3755 | #endif | ||
3756 | |||
3608 | /* | 3757 | /* |
3609 | * Account for involuntary wait time. | 3758 | * Account for involuntary wait time. |
3610 | * @steal: the cpu time spent in involuntary wait | 3759 | * @steal: the cpu time spent in involuntary wait |
@@ -3645,6 +3794,11 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
3645 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 3794 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
3646 | struct rq *rq = this_rq(); | 3795 | struct rq *rq = this_rq(); |
3647 | 3796 | ||
3797 | if (sched_clock_irqtime) { | ||
3798 | irqtime_account_process_tick(p, user_tick, rq); | ||
3799 | return; | ||
3800 | } | ||
3801 | |||
3648 | if (user_tick) | 3802 | if (user_tick) |
3649 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 3803 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
3650 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | 3804 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
@@ -3670,6 +3824,12 @@ void account_steal_ticks(unsigned long ticks) | |||
3670 | */ | 3824 | */ |
3671 | void account_idle_ticks(unsigned long ticks) | 3825 | void account_idle_ticks(unsigned long ticks) |
3672 | { | 3826 | { |
3827 | |||
3828 | if (sched_clock_irqtime) { | ||
3829 | irqtime_account_idle_ticks(ticks); | ||
3830 | return; | ||
3831 | } | ||
3832 | |||
3673 | account_idle_time(jiffies_to_cputime(ticks)); | 3833 | account_idle_time(jiffies_to_cputime(ticks)); |
3674 | } | 3834 | } |
3675 | 3835 | ||
@@ -4570,11 +4730,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4570 | 4730 | ||
4571 | if (running) | 4731 | if (running) |
4572 | p->sched_class->set_curr_task(rq); | 4732 | p->sched_class->set_curr_task(rq); |
4573 | if (on_rq) { | 4733 | if (on_rq) |
4574 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 4734 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
4575 | 4735 | ||
4576 | check_class_changed(rq, p, prev_class, oldprio, running); | 4736 | check_class_changed(rq, p, prev_class, oldprio); |
4577 | } | ||
4578 | task_rq_unlock(rq, &flags); | 4737 | task_rq_unlock(rq, &flags); |
4579 | } | 4738 | } |
4580 | 4739 | ||
@@ -4902,11 +5061,10 @@ recheck: | |||
4902 | 5061 | ||
4903 | if (running) | 5062 | if (running) |
4904 | p->sched_class->set_curr_task(rq); | 5063 | p->sched_class->set_curr_task(rq); |
4905 | if (on_rq) { | 5064 | if (on_rq) |
4906 | activate_task(rq, p, 0); | 5065 | activate_task(rq, p, 0); |
4907 | 5066 | ||
4908 | check_class_changed(rq, p, prev_class, oldprio, running); | 5067 | check_class_changed(rq, p, prev_class, oldprio); |
4909 | } | ||
4910 | __task_rq_unlock(rq); | 5068 | __task_rq_unlock(rq); |
4911 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 5069 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
4912 | 5070 | ||
@@ -5323,6 +5481,58 @@ void __sched yield(void) | |||
5323 | } | 5481 | } |
5324 | EXPORT_SYMBOL(yield); | 5482 | EXPORT_SYMBOL(yield); |
5325 | 5483 | ||
5484 | /** | ||
5485 | * yield_to - yield the current processor to another thread in | ||
5486 | * your thread group, or accelerate that thread toward the | ||
5487 | * processor it's on. | ||
5488 | * | ||
5489 | * It's the caller's job to ensure that the target task struct | ||
5490 | * can't go away on us before we can do any checks. | ||
5491 | * | ||
5492 | * Returns true if we indeed boosted the target task. | ||
5493 | */ | ||
5494 | bool __sched yield_to(struct task_struct *p, bool preempt) | ||
5495 | { | ||
5496 | struct task_struct *curr = current; | ||
5497 | struct rq *rq, *p_rq; | ||
5498 | unsigned long flags; | ||
5499 | bool yielded = 0; | ||
5500 | |||
5501 | local_irq_save(flags); | ||
5502 | rq = this_rq(); | ||
5503 | |||
5504 | again: | ||
5505 | p_rq = task_rq(p); | ||
5506 | double_rq_lock(rq, p_rq); | ||
5507 | while (task_rq(p) != p_rq) { | ||
5508 | double_rq_unlock(rq, p_rq); | ||
5509 | goto again; | ||
5510 | } | ||
5511 | |||
5512 | if (!curr->sched_class->yield_to_task) | ||
5513 | goto out; | ||
5514 | |||
5515 | if (curr->sched_class != p->sched_class) | ||
5516 | goto out; | ||
5517 | |||
5518 | if (task_running(p_rq, p) || p->state) | ||
5519 | goto out; | ||
5520 | |||
5521 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); | ||
5522 | if (yielded) | ||
5523 | schedstat_inc(rq, yld_count); | ||
5524 | |||
5525 | out: | ||
5526 | double_rq_unlock(rq, p_rq); | ||
5527 | local_irq_restore(flags); | ||
5528 | |||
5529 | if (yielded) | ||
5530 | schedule(); | ||
5531 | |||
5532 | return yielded; | ||
5533 | } | ||
5534 | EXPORT_SYMBOL_GPL(yield_to); | ||
5535 | |||
5326 | /* | 5536 | /* |
5327 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 5537 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
5328 | * that process accounting knows that this is a task in IO wait state. | 5538 | * that process accounting knows that this is a task in IO wait state. |
@@ -7796,6 +8006,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | |||
7796 | INIT_LIST_HEAD(&cfs_rq->tasks); | 8006 | INIT_LIST_HEAD(&cfs_rq->tasks); |
7797 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8007 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7798 | cfs_rq->rq = rq; | 8008 | cfs_rq->rq = rq; |
8009 | /* allow initial update_cfs_load() to truncate */ | ||
8010 | #ifdef CONFIG_SMP | ||
8011 | cfs_rq->load_stamp = 1; | ||
8012 | #endif | ||
7799 | #endif | 8013 | #endif |
7800 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 8014 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
7801 | } | 8015 | } |
@@ -8109,6 +8323,8 @@ EXPORT_SYMBOL(__might_sleep); | |||
8109 | #ifdef CONFIG_MAGIC_SYSRQ | 8323 | #ifdef CONFIG_MAGIC_SYSRQ |
8110 | static void normalize_task(struct rq *rq, struct task_struct *p) | 8324 | static void normalize_task(struct rq *rq, struct task_struct *p) |
8111 | { | 8325 | { |
8326 | const struct sched_class *prev_class = p->sched_class; | ||
8327 | int old_prio = p->prio; | ||
8112 | int on_rq; | 8328 | int on_rq; |
8113 | 8329 | ||
8114 | on_rq = p->se.on_rq; | 8330 | on_rq = p->se.on_rq; |
@@ -8119,6 +8335,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
8119 | activate_task(rq, p, 0); | 8335 | activate_task(rq, p, 0); |
8120 | resched_task(rq->curr); | 8336 | resched_task(rq->curr); |
8121 | } | 8337 | } |
8338 | |||
8339 | check_class_changed(rq, p, prev_class, old_prio); | ||
8122 | } | 8340 | } |
8123 | 8341 | ||
8124 | void normalize_rt_tasks(void) | 8342 | void normalize_rt_tasks(void) |
@@ -8510,7 +8728,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8510 | /* Propagate contribution to hierarchy */ | 8728 | /* Propagate contribution to hierarchy */ |
8511 | raw_spin_lock_irqsave(&rq->lock, flags); | 8729 | raw_spin_lock_irqsave(&rq->lock, flags); |
8512 | for_each_sched_entity(se) | 8730 | for_each_sched_entity(se) |
8513 | update_cfs_shares(group_cfs_rq(se), 0); | 8731 | update_cfs_shares(group_cfs_rq(se)); |
8514 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 8732 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
8515 | } | 8733 | } |
8516 | 8734 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index eb6cb8edd075..7bacd83a4158 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -179,7 +179,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
179 | 179 | ||
180 | raw_spin_lock_irqsave(&rq->lock, flags); | 180 | raw_spin_lock_irqsave(&rq->lock, flags); |
181 | if (cfs_rq->rb_leftmost) | 181 | if (cfs_rq->rb_leftmost) |
182 | MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; | 182 | MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime; |
183 | last = __pick_last_entity(cfs_rq); | 183 | last = __pick_last_entity(cfs_rq); |
184 | if (last) | 184 | if (last) |
185 | max_vruntime = last->vruntime; | 185 | max_vruntime = last->vruntime; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0c26e2df450e..027024694043 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8; | |||
69 | unsigned int sysctl_sched_child_runs_first __read_mostly; | 69 | unsigned int sysctl_sched_child_runs_first __read_mostly; |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * sys_sched_yield() compat mode | ||
73 | * | ||
74 | * This option switches the agressive yield implementation of the | ||
75 | * old scheduler back on. | ||
76 | */ | ||
77 | unsigned int __read_mostly sysctl_sched_compat_yield; | ||
78 | |||
79 | /* | ||
80 | * SCHED_OTHER wake-up granularity. | 72 | * SCHED_OTHER wake-up granularity. |
81 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) | 73 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
82 | * | 74 | * |
@@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
419 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); | 411 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); |
420 | } | 412 | } |
421 | 413 | ||
422 | static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | 414 | static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) |
423 | { | 415 | { |
424 | struct rb_node *left = cfs_rq->rb_leftmost; | 416 | struct rb_node *left = cfs_rq->rb_leftmost; |
425 | 417 | ||
@@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | |||
429 | return rb_entry(left, struct sched_entity, run_node); | 421 | return rb_entry(left, struct sched_entity, run_node); |
430 | } | 422 | } |
431 | 423 | ||
424 | static struct sched_entity *__pick_next_entity(struct sched_entity *se) | ||
425 | { | ||
426 | struct rb_node *next = rb_next(&se->run_node); | ||
427 | |||
428 | if (!next) | ||
429 | return NULL; | ||
430 | |||
431 | return rb_entry(next, struct sched_entity, run_node); | ||
432 | } | ||
433 | |||
434 | #ifdef CONFIG_SCHED_DEBUG | ||
432 | static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | 435 | static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
433 | { | 436 | { |
434 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); | 437 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); |
@@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
443 | * Scheduling class statistics methods: | 446 | * Scheduling class statistics methods: |
444 | */ | 447 | */ |
445 | 448 | ||
446 | #ifdef CONFIG_SCHED_DEBUG | ||
447 | int sched_proc_update_handler(struct ctl_table *table, int write, | 449 | int sched_proc_update_handler(struct ctl_table *table, int write, |
448 | void __user *buffer, size_t *lenp, | 450 | void __user *buffer, size_t *lenp, |
449 | loff_t *ppos) | 451 | loff_t *ppos) |
@@ -540,7 +542,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
540 | } | 542 | } |
541 | 543 | ||
542 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); | 544 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); |
543 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); | 545 | static void update_cfs_shares(struct cfs_rq *cfs_rq); |
544 | 546 | ||
545 | /* | 547 | /* |
546 | * Update the current task's runtime statistics. Skip current tasks that | 548 | * Update the current task's runtime statistics. Skip current tasks that |
@@ -733,6 +735,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
733 | now - cfs_rq->load_last > 4 * period) { | 735 | now - cfs_rq->load_last > 4 * period) { |
734 | cfs_rq->load_period = 0; | 736 | cfs_rq->load_period = 0; |
735 | cfs_rq->load_avg = 0; | 737 | cfs_rq->load_avg = 0; |
738 | delta = period - 1; | ||
736 | } | 739 | } |
737 | 740 | ||
738 | cfs_rq->load_stamp = now; | 741 | cfs_rq->load_stamp = now; |
@@ -763,16 +766,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
763 | list_del_leaf_cfs_rq(cfs_rq); | 766 | list_del_leaf_cfs_rq(cfs_rq); |
764 | } | 767 | } |
765 | 768 | ||
766 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | 769 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) |
767 | long weight_delta) | ||
768 | { | 770 | { |
769 | long load_weight, load, shares; | 771 | long load_weight, load, shares; |
770 | 772 | ||
771 | load = cfs_rq->load.weight + weight_delta; | 773 | load = cfs_rq->load.weight; |
772 | 774 | ||
773 | load_weight = atomic_read(&tg->load_weight); | 775 | load_weight = atomic_read(&tg->load_weight); |
774 | load_weight -= cfs_rq->load_contribution; | ||
775 | load_weight += load; | 776 | load_weight += load; |
777 | load_weight -= cfs_rq->load_contribution; | ||
776 | 778 | ||
777 | shares = (tg->shares * load); | 779 | shares = (tg->shares * load); |
778 | if (load_weight) | 780 | if (load_weight) |
@@ -790,7 +792,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq) | |||
790 | { | 792 | { |
791 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { | 793 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { |
792 | update_cfs_load(cfs_rq, 0); | 794 | update_cfs_load(cfs_rq, 0); |
793 | update_cfs_shares(cfs_rq, 0); | 795 | update_cfs_shares(cfs_rq); |
794 | } | 796 | } |
795 | } | 797 | } |
796 | # else /* CONFIG_SMP */ | 798 | # else /* CONFIG_SMP */ |
@@ -798,8 +800,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
798 | { | 800 | { |
799 | } | 801 | } |
800 | 802 | ||
801 | static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | 803 | static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) |
802 | long weight_delta) | ||
803 | { | 804 | { |
804 | return tg->shares; | 805 | return tg->shares; |
805 | } | 806 | } |
@@ -824,7 +825,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, | |||
824 | account_entity_enqueue(cfs_rq, se); | 825 | account_entity_enqueue(cfs_rq, se); |
825 | } | 826 | } |
826 | 827 | ||
827 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | 828 | static void update_cfs_shares(struct cfs_rq *cfs_rq) |
828 | { | 829 | { |
829 | struct task_group *tg; | 830 | struct task_group *tg; |
830 | struct sched_entity *se; | 831 | struct sched_entity *se; |
@@ -838,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | |||
838 | if (likely(se->load.weight == tg->shares)) | 839 | if (likely(se->load.weight == tg->shares)) |
839 | return; | 840 | return; |
840 | #endif | 841 | #endif |
841 | shares = calc_cfs_shares(cfs_rq, tg, weight_delta); | 842 | shares = calc_cfs_shares(cfs_rq, tg); |
842 | 843 | ||
843 | reweight_entity(cfs_rq_of(se), se, shares); | 844 | reweight_entity(cfs_rq_of(se), se, shares); |
844 | } | 845 | } |
@@ -847,7 +848,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
847 | { | 848 | { |
848 | } | 849 | } |
849 | 850 | ||
850 | static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | 851 | static inline void update_cfs_shares(struct cfs_rq *cfs_rq) |
851 | { | 852 | { |
852 | } | 853 | } |
853 | 854 | ||
@@ -978,8 +979,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
978 | */ | 979 | */ |
979 | update_curr(cfs_rq); | 980 | update_curr(cfs_rq); |
980 | update_cfs_load(cfs_rq, 0); | 981 | update_cfs_load(cfs_rq, 0); |
981 | update_cfs_shares(cfs_rq, se->load.weight); | ||
982 | account_entity_enqueue(cfs_rq, se); | 982 | account_entity_enqueue(cfs_rq, se); |
983 | update_cfs_shares(cfs_rq); | ||
983 | 984 | ||
984 | if (flags & ENQUEUE_WAKEUP) { | 985 | if (flags & ENQUEUE_WAKEUP) { |
985 | place_entity(cfs_rq, se, 0); | 986 | place_entity(cfs_rq, se, 0); |
@@ -996,19 +997,49 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
996 | list_add_leaf_cfs_rq(cfs_rq); | 997 | list_add_leaf_cfs_rq(cfs_rq); |
997 | } | 998 | } |
998 | 999 | ||
999 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 1000 | static void __clear_buddies_last(struct sched_entity *se) |
1001 | { | ||
1002 | for_each_sched_entity(se) { | ||
1003 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
1004 | if (cfs_rq->last == se) | ||
1005 | cfs_rq->last = NULL; | ||
1006 | else | ||
1007 | break; | ||
1008 | } | ||
1009 | } | ||
1010 | |||
1011 | static void __clear_buddies_next(struct sched_entity *se) | ||
1000 | { | 1012 | { |
1001 | if (!se || cfs_rq->last == se) | 1013 | for_each_sched_entity(se) { |
1002 | cfs_rq->last = NULL; | 1014 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1015 | if (cfs_rq->next == se) | ||
1016 | cfs_rq->next = NULL; | ||
1017 | else | ||
1018 | break; | ||
1019 | } | ||
1020 | } | ||
1003 | 1021 | ||
1004 | if (!se || cfs_rq->next == se) | 1022 | static void __clear_buddies_skip(struct sched_entity *se) |
1005 | cfs_rq->next = NULL; | 1023 | { |
1024 | for_each_sched_entity(se) { | ||
1025 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
1026 | if (cfs_rq->skip == se) | ||
1027 | cfs_rq->skip = NULL; | ||
1028 | else | ||
1029 | break; | ||
1030 | } | ||
1006 | } | 1031 | } |
1007 | 1032 | ||
1008 | static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 1033 | static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) |
1009 | { | 1034 | { |
1010 | for_each_sched_entity(se) | 1035 | if (cfs_rq->last == se) |
1011 | __clear_buddies(cfs_rq_of(se), se); | 1036 | __clear_buddies_last(se); |
1037 | |||
1038 | if (cfs_rq->next == se) | ||
1039 | __clear_buddies_next(se); | ||
1040 | |||
1041 | if (cfs_rq->skip == se) | ||
1042 | __clear_buddies_skip(se); | ||
1012 | } | 1043 | } |
1013 | 1044 | ||
1014 | static void | 1045 | static void |
@@ -1041,7 +1072,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
1041 | update_cfs_load(cfs_rq, 0); | 1072 | update_cfs_load(cfs_rq, 0); |
1042 | account_entity_dequeue(cfs_rq, se); | 1073 | account_entity_dequeue(cfs_rq, se); |
1043 | update_min_vruntime(cfs_rq); | 1074 | update_min_vruntime(cfs_rq); |
1044 | update_cfs_shares(cfs_rq, 0); | 1075 | update_cfs_shares(cfs_rq); |
1045 | 1076 | ||
1046 | /* | 1077 | /* |
1047 | * Normalize the entity after updating the min_vruntime because the | 1078 | * Normalize the entity after updating the min_vruntime because the |
@@ -1084,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
1084 | return; | 1115 | return; |
1085 | 1116 | ||
1086 | if (cfs_rq->nr_running > 1) { | 1117 | if (cfs_rq->nr_running > 1) { |
1087 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 1118 | struct sched_entity *se = __pick_first_entity(cfs_rq); |
1088 | s64 delta = curr->vruntime - se->vruntime; | 1119 | s64 delta = curr->vruntime - se->vruntime; |
1089 | 1120 | ||
1090 | if (delta < 0) | 1121 | if (delta < 0) |
@@ -1128,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
1128 | static int | 1159 | static int |
1129 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); | 1160 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); |
1130 | 1161 | ||
1162 | /* | ||
1163 | * Pick the next process, keeping these things in mind, in this order: | ||
1164 | * 1) keep things fair between processes/task groups | ||
1165 | * 2) pick the "next" process, since someone really wants that to run | ||
1166 | * 3) pick the "last" process, for cache locality | ||
1167 | * 4) do not run the "skip" process, if something else is available | ||
1168 | */ | ||
1131 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 1169 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) |
1132 | { | 1170 | { |
1133 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 1171 | struct sched_entity *se = __pick_first_entity(cfs_rq); |
1134 | struct sched_entity *left = se; | 1172 | struct sched_entity *left = se; |
1135 | 1173 | ||
1136 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) | 1174 | /* |
1137 | se = cfs_rq->next; | 1175 | * Avoid running the skip buddy, if running something else can |
1176 | * be done without getting too unfair. | ||
1177 | */ | ||
1178 | if (cfs_rq->skip == se) { | ||
1179 | struct sched_entity *second = __pick_next_entity(se); | ||
1180 | if (second && wakeup_preempt_entity(second, left) < 1) | ||
1181 | se = second; | ||
1182 | } | ||
1138 | 1183 | ||
1139 | /* | 1184 | /* |
1140 | * Prefer last buddy, try to return the CPU to a preempted task. | 1185 | * Prefer last buddy, try to return the CPU to a preempted task. |
@@ -1142,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | |||
1142 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) | 1187 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) |
1143 | se = cfs_rq->last; | 1188 | se = cfs_rq->last; |
1144 | 1189 | ||
1190 | /* | ||
1191 | * Someone really wants this to run. If it's not unfair, run it. | ||
1192 | */ | ||
1193 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) | ||
1194 | se = cfs_rq->next; | ||
1195 | |||
1145 | clear_buddies(cfs_rq, se); | 1196 | clear_buddies(cfs_rq, se); |
1146 | 1197 | ||
1147 | return se; | 1198 | return se; |
@@ -1282,7 +1333,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1282 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1333 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1283 | 1334 | ||
1284 | update_cfs_load(cfs_rq, 0); | 1335 | update_cfs_load(cfs_rq, 0); |
1285 | update_cfs_shares(cfs_rq, 0); | 1336 | update_cfs_shares(cfs_rq); |
1286 | } | 1337 | } |
1287 | 1338 | ||
1288 | hrtick_update(rq); | 1339 | hrtick_update(rq); |
@@ -1312,58 +1363,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1312 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1363 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1313 | 1364 | ||
1314 | update_cfs_load(cfs_rq, 0); | 1365 | update_cfs_load(cfs_rq, 0); |
1315 | update_cfs_shares(cfs_rq, 0); | 1366 | update_cfs_shares(cfs_rq); |
1316 | } | 1367 | } |
1317 | 1368 | ||
1318 | hrtick_update(rq); | 1369 | hrtick_update(rq); |
1319 | } | 1370 | } |
1320 | 1371 | ||
1321 | /* | ||
1322 | * sched_yield() support is very simple - we dequeue and enqueue. | ||
1323 | * | ||
1324 | * If compat_yield is turned on then we requeue to the end of the tree. | ||
1325 | */ | ||
1326 | static void yield_task_fair(struct rq *rq) | ||
1327 | { | ||
1328 | struct task_struct *curr = rq->curr; | ||
1329 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
1330 | struct sched_entity *rightmost, *se = &curr->se; | ||
1331 | |||
1332 | /* | ||
1333 | * Are we the only task in the tree? | ||
1334 | */ | ||
1335 | if (unlikely(cfs_rq->nr_running == 1)) | ||
1336 | return; | ||
1337 | |||
1338 | clear_buddies(cfs_rq, se); | ||
1339 | |||
1340 | if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { | ||
1341 | update_rq_clock(rq); | ||
1342 | /* | ||
1343 | * Update run-time statistics of the 'current'. | ||
1344 | */ | ||
1345 | update_curr(cfs_rq); | ||
1346 | |||
1347 | return; | ||
1348 | } | ||
1349 | /* | ||
1350 | * Find the rightmost entry in the rbtree: | ||
1351 | */ | ||
1352 | rightmost = __pick_last_entity(cfs_rq); | ||
1353 | /* | ||
1354 | * Already in the rightmost position? | ||
1355 | */ | ||
1356 | if (unlikely(!rightmost || entity_before(rightmost, se))) | ||
1357 | return; | ||
1358 | |||
1359 | /* | ||
1360 | * Minimally necessary key value to be last in the tree: | ||
1361 | * Upon rescheduling, sched_class::put_prev_task() will place | ||
1362 | * 'current' within the tree based on its new key value. | ||
1363 | */ | ||
1364 | se->vruntime = rightmost->vruntime + 1; | ||
1365 | } | ||
1366 | |||
1367 | #ifdef CONFIG_SMP | 1372 | #ifdef CONFIG_SMP |
1368 | 1373 | ||
1369 | static void task_waking_fair(struct rq *rq, struct task_struct *p) | 1374 | static void task_waking_fair(struct rq *rq, struct task_struct *p) |
@@ -1834,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se) | |||
1834 | } | 1839 | } |
1835 | } | 1840 | } |
1836 | 1841 | ||
1842 | static void set_skip_buddy(struct sched_entity *se) | ||
1843 | { | ||
1844 | if (likely(task_of(se)->policy != SCHED_IDLE)) { | ||
1845 | for_each_sched_entity(se) | ||
1846 | cfs_rq_of(se)->skip = se; | ||
1847 | } | ||
1848 | } | ||
1849 | |||
1837 | /* | 1850 | /* |
1838 | * Preempt the current task with a newly woken task if needed: | 1851 | * Preempt the current task with a newly woken task if needed: |
1839 | */ | 1852 | */ |
@@ -1932,6 +1945,55 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) | |||
1932 | } | 1945 | } |
1933 | } | 1946 | } |
1934 | 1947 | ||
1948 | /* | ||
1949 | * sched_yield() is very simple | ||
1950 | * | ||
1951 | * The magic of dealing with the ->skip buddy is in pick_next_entity. | ||
1952 | */ | ||
1953 | static void yield_task_fair(struct rq *rq) | ||
1954 | { | ||
1955 | struct task_struct *curr = rq->curr; | ||
1956 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
1957 | struct sched_entity *se = &curr->se; | ||
1958 | |||
1959 | /* | ||
1960 | * Are we the only task in the tree? | ||
1961 | */ | ||
1962 | if (unlikely(rq->nr_running == 1)) | ||
1963 | return; | ||
1964 | |||
1965 | clear_buddies(cfs_rq, se); | ||
1966 | |||
1967 | if (curr->policy != SCHED_BATCH) { | ||
1968 | update_rq_clock(rq); | ||
1969 | /* | ||
1970 | * Update run-time statistics of the 'current'. | ||
1971 | */ | ||
1972 | update_curr(cfs_rq); | ||
1973 | } | ||
1974 | |||
1975 | set_skip_buddy(se); | ||
1976 | } | ||
1977 | |||
1978 | static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt) | ||
1979 | { | ||
1980 | struct sched_entity *se = &p->se; | ||
1981 | |||
1982 | if (!se->on_rq) | ||
1983 | return false; | ||
1984 | |||
1985 | /* Tell the scheduler that we'd really like pse to run next. */ | ||
1986 | set_next_buddy(se); | ||
1987 | |||
1988 | /* Make p's CPU reschedule; pick_next_entity takes care of fairness. */ | ||
1989 | if (preempt) | ||
1990 | resched_task(rq->curr); | ||
1991 | |||
1992 | yield_task_fair(rq); | ||
1993 | |||
1994 | return true; | ||
1995 | } | ||
1996 | |||
1935 | #ifdef CONFIG_SMP | 1997 | #ifdef CONFIG_SMP |
1936 | /************************************************** | 1998 | /************************************************** |
1937 | * Fair scheduling class load-balancing methods: | 1999 | * Fair scheduling class load-balancing methods: |
@@ -2123,7 +2185,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu) | |||
2123 | * We need to update shares after updating tg->load_weight in | 2185 | * We need to update shares after updating tg->load_weight in |
2124 | * order to adjust the weight of groups with long running tasks. | 2186 | * order to adjust the weight of groups with long running tasks. |
2125 | */ | 2187 | */ |
2126 | update_cfs_shares(cfs_rq, 0); | 2188 | update_cfs_shares(cfs_rq); |
2127 | 2189 | ||
2128 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 2190 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
2129 | 2191 | ||
@@ -4079,33 +4141,62 @@ static void task_fork_fair(struct task_struct *p) | |||
4079 | * Priority of the task has changed. Check to see if we preempt | 4141 | * Priority of the task has changed. Check to see if we preempt |
4080 | * the current task. | 4142 | * the current task. |
4081 | */ | 4143 | */ |
4082 | static void prio_changed_fair(struct rq *rq, struct task_struct *p, | 4144 | static void |
4083 | int oldprio, int running) | 4145 | prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) |
4084 | { | 4146 | { |
4147 | if (!p->se.on_rq) | ||
4148 | return; | ||
4149 | |||
4085 | /* | 4150 | /* |
4086 | * Reschedule if we are currently running on this runqueue and | 4151 | * Reschedule if we are currently running on this runqueue and |
4087 | * our priority decreased, or if we are not currently running on | 4152 | * our priority decreased, or if we are not currently running on |
4088 | * this runqueue and our priority is higher than the current's | 4153 | * this runqueue and our priority is higher than the current's |
4089 | */ | 4154 | */ |
4090 | if (running) { | 4155 | if (rq->curr == p) { |
4091 | if (p->prio > oldprio) | 4156 | if (p->prio > oldprio) |
4092 | resched_task(rq->curr); | 4157 | resched_task(rq->curr); |
4093 | } else | 4158 | } else |
4094 | check_preempt_curr(rq, p, 0); | 4159 | check_preempt_curr(rq, p, 0); |
4095 | } | 4160 | } |
4096 | 4161 | ||
4162 | static void switched_from_fair(struct rq *rq, struct task_struct *p) | ||
4163 | { | ||
4164 | struct sched_entity *se = &p->se; | ||
4165 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
4166 | |||
4167 | /* | ||
4168 | * Ensure the task's vruntime is normalized, so that when its | ||
4169 | * switched back to the fair class the enqueue_entity(.flags=0) will | ||
4170 | * do the right thing. | ||
4171 | * | ||
4172 | * If it was on_rq, then the dequeue_entity(.flags=0) will already | ||
4173 | * have normalized the vruntime, if it was !on_rq, then only when | ||
4174 | * the task is sleeping will it still have non-normalized vruntime. | ||
4175 | */ | ||
4176 | if (!se->on_rq && p->state != TASK_RUNNING) { | ||
4177 | /* | ||
4178 | * Fix up our vruntime so that the current sleep doesn't | ||
4179 | * cause 'unlimited' sleep bonus. | ||
4180 | */ | ||
4181 | place_entity(cfs_rq, se, 0); | ||
4182 | se->vruntime -= cfs_rq->min_vruntime; | ||
4183 | } | ||
4184 | } | ||
4185 | |||
4097 | /* | 4186 | /* |
4098 | * We switched to the sched_fair class. | 4187 | * We switched to the sched_fair class. |
4099 | */ | 4188 | */ |
4100 | static void switched_to_fair(struct rq *rq, struct task_struct *p, | 4189 | static void switched_to_fair(struct rq *rq, struct task_struct *p) |
4101 | int running) | ||
4102 | { | 4190 | { |
4191 | if (!p->se.on_rq) | ||
4192 | return; | ||
4193 | |||
4103 | /* | 4194 | /* |
4104 | * We were most likely switched from sched_rt, so | 4195 | * We were most likely switched from sched_rt, so |
4105 | * kick off the schedule if running, otherwise just see | 4196 | * kick off the schedule if running, otherwise just see |
4106 | * if we can still preempt the current task. | 4197 | * if we can still preempt the current task. |
4107 | */ | 4198 | */ |
4108 | if (running) | 4199 | if (rq->curr == p) |
4109 | resched_task(rq->curr); | 4200 | resched_task(rq->curr); |
4110 | else | 4201 | else |
4111 | check_preempt_curr(rq, p, 0); | 4202 | check_preempt_curr(rq, p, 0); |
@@ -4171,6 +4262,7 @@ static const struct sched_class fair_sched_class = { | |||
4171 | .enqueue_task = enqueue_task_fair, | 4262 | .enqueue_task = enqueue_task_fair, |
4172 | .dequeue_task = dequeue_task_fair, | 4263 | .dequeue_task = dequeue_task_fair, |
4173 | .yield_task = yield_task_fair, | 4264 | .yield_task = yield_task_fair, |
4265 | .yield_to_task = yield_to_task_fair, | ||
4174 | 4266 | ||
4175 | .check_preempt_curr = check_preempt_wakeup, | 4267 | .check_preempt_curr = check_preempt_wakeup, |
4176 | 4268 | ||
@@ -4191,6 +4283,7 @@ static const struct sched_class fair_sched_class = { | |||
4191 | .task_fork = task_fork_fair, | 4283 | .task_fork = task_fork_fair, |
4192 | 4284 | ||
4193 | .prio_changed = prio_changed_fair, | 4285 | .prio_changed = prio_changed_fair, |
4286 | .switched_from = switched_from_fair, | ||
4194 | .switched_to = switched_to_fair, | 4287 | .switched_to = switched_to_fair, |
4195 | 4288 | ||
4196 | .get_rr_interval = get_rr_interval_fair, | 4289 | .get_rr_interval = get_rr_interval_fair, |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 9fa0f402c87c..c82f26c1b7c3 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -52,31 +52,15 @@ static void set_curr_task_idle(struct rq *rq) | |||
52 | { | 52 | { |
53 | } | 53 | } |
54 | 54 | ||
55 | static void switched_to_idle(struct rq *rq, struct task_struct *p, | 55 | static void switched_to_idle(struct rq *rq, struct task_struct *p) |
56 | int running) | ||
57 | { | 56 | { |
58 | /* Can this actually happen?? */ | 57 | BUG(); |
59 | if (running) | ||
60 | resched_task(rq->curr); | ||
61 | else | ||
62 | check_preempt_curr(rq, p, 0); | ||
63 | } | 58 | } |
64 | 59 | ||
65 | static void prio_changed_idle(struct rq *rq, struct task_struct *p, | 60 | static void |
66 | int oldprio, int running) | 61 | prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio) |
67 | { | 62 | { |
68 | /* This can happen for hot plug CPUS */ | 63 | BUG(); |
69 | |||
70 | /* | ||
71 | * Reschedule if we are currently running on this runqueue and | ||
72 | * our priority decreased, or if we are not currently running on | ||
73 | * this runqueue and our priority is higher than the current's | ||
74 | */ | ||
75 | if (running) { | ||
76 | if (p->prio > oldprio) | ||
77 | resched_task(rq->curr); | ||
78 | } else | ||
79 | check_preempt_curr(rq, p, 0); | ||
80 | } | 64 | } |
81 | 65 | ||
82 | static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) | 66 | static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index ad6267714c84..4e108f8ecb6a 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1595,8 +1595,7 @@ static void rq_offline_rt(struct rq *rq) | |||
1595 | * When switch from the rt queue, we bring ourselves to a position | 1595 | * When switch from the rt queue, we bring ourselves to a position |
1596 | * that we might want to pull RT tasks from other runqueues. | 1596 | * that we might want to pull RT tasks from other runqueues. |
1597 | */ | 1597 | */ |
1598 | static void switched_from_rt(struct rq *rq, struct task_struct *p, | 1598 | static void switched_from_rt(struct rq *rq, struct task_struct *p) |
1599 | int running) | ||
1600 | { | 1599 | { |
1601 | /* | 1600 | /* |
1602 | * If there are other RT tasks then we will reschedule | 1601 | * If there are other RT tasks then we will reschedule |
@@ -1605,7 +1604,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p, | |||
1605 | * we may need to handle the pulling of RT tasks | 1604 | * we may need to handle the pulling of RT tasks |
1606 | * now. | 1605 | * now. |
1607 | */ | 1606 | */ |
1608 | if (!rq->rt.rt_nr_running) | 1607 | if (p->se.on_rq && !rq->rt.rt_nr_running) |
1609 | pull_rt_task(rq); | 1608 | pull_rt_task(rq); |
1610 | } | 1609 | } |
1611 | 1610 | ||
@@ -1624,8 +1623,7 @@ static inline void init_sched_rt_class(void) | |||
1624 | * with RT tasks. In this case we try to push them off to | 1623 | * with RT tasks. In this case we try to push them off to |
1625 | * other runqueues. | 1624 | * other runqueues. |
1626 | */ | 1625 | */ |
1627 | static void switched_to_rt(struct rq *rq, struct task_struct *p, | 1626 | static void switched_to_rt(struct rq *rq, struct task_struct *p) |
1628 | int running) | ||
1629 | { | 1627 | { |
1630 | int check_resched = 1; | 1628 | int check_resched = 1; |
1631 | 1629 | ||
@@ -1636,7 +1634,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p, | |||
1636 | * If that current running task is also an RT task | 1634 | * If that current running task is also an RT task |
1637 | * then see if we can move to another run queue. | 1635 | * then see if we can move to another run queue. |
1638 | */ | 1636 | */ |
1639 | if (!running) { | 1637 | if (p->se.on_rq && rq->curr != p) { |
1640 | #ifdef CONFIG_SMP | 1638 | #ifdef CONFIG_SMP |
1641 | if (rq->rt.overloaded && push_rt_task(rq) && | 1639 | if (rq->rt.overloaded && push_rt_task(rq) && |
1642 | /* Don't resched if we changed runqueues */ | 1640 | /* Don't resched if we changed runqueues */ |
@@ -1652,10 +1650,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p, | |||
1652 | * Priority of the task has changed. This may cause | 1650 | * Priority of the task has changed. This may cause |
1653 | * us to initiate a push or pull. | 1651 | * us to initiate a push or pull. |
1654 | */ | 1652 | */ |
1655 | static void prio_changed_rt(struct rq *rq, struct task_struct *p, | 1653 | static void |
1656 | int oldprio, int running) | 1654 | prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) |
1657 | { | 1655 | { |
1658 | if (running) { | 1656 | if (!p->se.on_rq) |
1657 | return; | ||
1658 | |||
1659 | if (rq->curr == p) { | ||
1659 | #ifdef CONFIG_SMP | 1660 | #ifdef CONFIG_SMP |
1660 | /* | 1661 | /* |
1661 | * If our priority decreases while running, we | 1662 | * If our priority decreases while running, we |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 2bf6b47058c1..84ec9bcf82d9 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -59,14 +59,13 @@ static void set_curr_task_stop(struct rq *rq) | |||
59 | { | 59 | { |
60 | } | 60 | } |
61 | 61 | ||
62 | static void switched_to_stop(struct rq *rq, struct task_struct *p, | 62 | static void switched_to_stop(struct rq *rq, struct task_struct *p) |
63 | int running) | ||
64 | { | 63 | { |
65 | BUG(); /* its impossible to change to this class */ | 64 | BUG(); /* its impossible to change to this class */ |
66 | } | 65 | } |
67 | 66 | ||
68 | static void prio_changed_stop(struct rq *rq, struct task_struct *p, | 67 | static void |
69 | int oldprio, int running) | 68 | prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio) |
70 | { | 69 | { |
71 | BUG(); /* how!?, what priority? */ | 70 | BUG(); /* how!?, what priority? */ |
72 | } | 71 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 68eb5efec388..0cee50487629 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_stat); | |||
54 | 54 | ||
55 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; | 55 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; |
56 | 56 | ||
57 | static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | 57 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); |
58 | 58 | ||
59 | char *softirq_to_name[NR_SOFTIRQS] = { | 59 | char *softirq_to_name[NR_SOFTIRQS] = { |
60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", | 60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", |
@@ -721,7 +721,6 @@ static int run_ksoftirqd(void * __bind_cpu) | |||
721 | { | 721 | { |
722 | set_current_state(TASK_INTERRUPTIBLE); | 722 | set_current_state(TASK_INTERRUPTIBLE); |
723 | 723 | ||
724 | current->flags |= PF_KSOFTIRQD; | ||
725 | while (!kthread_should_stop()) { | 724 | while (!kthread_should_stop()) { |
726 | preempt_disable(); | 725 | preempt_disable(); |
727 | if (!local_softirq_pending()) { | 726 | if (!local_softirq_pending()) { |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0f1bd83db985..cb7c830f7faa 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -361,13 +361,6 @@ static struct ctl_table kern_table[] = { | |||
361 | .mode = 0644, | 361 | .mode = 0644, |
362 | .proc_handler = sched_rt_handler, | 362 | .proc_handler = sched_rt_handler, |
363 | }, | 363 | }, |
364 | { | ||
365 | .procname = "sched_compat_yield", | ||
366 | .data = &sysctl_sched_compat_yield, | ||
367 | .maxlen = sizeof(unsigned int), | ||
368 | .mode = 0644, | ||
369 | .proc_handler = proc_dointvec, | ||
370 | }, | ||
371 | #ifdef CONFIG_SCHED_AUTOGROUP | 364 | #ifdef CONFIG_SCHED_AUTOGROUP |
372 | { | 365 | { |
373 | .procname = "sched_autogroup_enabled", | 366 | .procname = "sched_autogroup_enabled", |
diff --git a/kernel/time.c b/kernel/time.c index 32174359576f..55337a816b20 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -645,7 +645,7 @@ u64 nsec_to_clock_t(u64 x) | |||
645 | } | 645 | } |
646 | 646 | ||
647 | /** | 647 | /** |
648 | * nsecs_to_jiffies - Convert nsecs in u64 to jiffies | 648 | * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64 |
649 | * | 649 | * |
650 | * @n: nsecs in u64 | 650 | * @n: nsecs in u64 |
651 | * | 651 | * |
@@ -657,7 +657,7 @@ u64 nsec_to_clock_t(u64 x) | |||
657 | * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) | 657 | * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) |
658 | * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years | 658 | * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years |
659 | */ | 659 | */ |
660 | unsigned long nsecs_to_jiffies(u64 n) | 660 | u64 nsecs_to_jiffies64(u64 n) |
661 | { | 661 | { |
662 | #if (NSEC_PER_SEC % HZ) == 0 | 662 | #if (NSEC_PER_SEC % HZ) == 0 |
663 | /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ | 663 | /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ |
@@ -674,6 +674,25 @@ unsigned long nsecs_to_jiffies(u64 n) | |||
674 | #endif | 674 | #endif |
675 | } | 675 | } |
676 | 676 | ||
677 | |||
678 | /** | ||
679 | * nsecs_to_jiffies - Convert nsecs in u64 to jiffies | ||
680 | * | ||
681 | * @n: nsecs in u64 | ||
682 | * | ||
683 | * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64. | ||
684 | * And this doesn't return MAX_JIFFY_OFFSET since this function is designed | ||
685 | * for scheduler, not for use in device drivers to calculate timeout value. | ||
686 | * | ||
687 | * note: | ||
688 | * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) | ||
689 | * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years | ||
690 | */ | ||
691 | unsigned long nsecs_to_jiffies(u64 n) | ||
692 | { | ||
693 | return (unsigned long)nsecs_to_jiffies64(n); | ||
694 | } | ||
695 | |||
677 | #if (BITS_PER_LONG < 64) | 696 | #if (BITS_PER_LONG < 64) |
678 | u64 get_jiffies_64(void) | 697 | u64 get_jiffies_64(void) |
679 | { | 698 | { |