diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-15 21:37:30 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-15 21:37:30 -0400 |
| commit | 9620639b7ea3843983f4ced8b4c81eb4d8974838 (patch) | |
| tree | 54266fac3bcf89e61ae06c7d36ca708df6e0ea33 /kernel | |
| parent | a926021cb1f8a99a275eaf6eb546102e9469dc59 (diff) | |
| parent | 6d1cafd8b56ea726c10a5a104de57cc3ed8fa953 (diff) | |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (26 commits)
sched: Resched proper CPU on yield_to()
sched: Allow users with sufficient RLIMIT_NICE to change from SCHED_IDLE policy
sched: Allow SCHED_BATCH to preempt SCHED_IDLE tasks
sched: Clean up the IRQ_TIME_ACCOUNTING code
sched: Add #ifdef around irq time accounting functions
sched, autogroup: Stop claiming ownership of the root task group
sched, autogroup: Stop going ahead if autogroup is disabled
sched, autogroup, sysctl: Use proc_dointvec_minmax() instead
sched: Fix the group_imb logic
sched: Clean up some f_b_g() comments
sched: Clean up remnants of sd_idle
sched: Wholesale removal of sd_idle logic
sched: Add yield_to(task, preempt) functionality
sched: Use a buddy to implement yield_task_fair()
sched: Limit the scope of clear_buddies
sched: Check the right ->nr_running in yield_task_fair()
sched: Avoid expensive initial update_cfs_load(), on UP too
sched: Fix switch_from_fair()
sched: Simplify the idle scheduling class
softirqs: Account ksoftirqd time as cpustat softirq
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched.c | 296 | ||||
| -rw-r--r-- | kernel/sched_autogroup.c | 15 | ||||
| -rw-r--r-- | kernel/sched_autogroup.h | 5 | ||||
| -rw-r--r-- | kernel/sched_debug.c | 2 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 397 | ||||
| -rw-r--r-- | kernel/sched_idletask.c | 26 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 19 | ||||
| -rw-r--r-- | kernel/sched_stoptask.c | 7 | ||||
| -rw-r--r-- | kernel/softirq.c | 3 | ||||
| -rw-r--r-- | kernel/sysctl.c | 9 | ||||
| -rw-r--r-- | kernel/time.c | 23 |
11 files changed, 549 insertions, 253 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 57a18e8d28c8..27125e413576 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -324,7 +324,7 @@ struct cfs_rq { | |||
| 324 | * 'curr' points to currently running entity on this cfs_rq. | 324 | * 'curr' points to currently running entity on this cfs_rq. |
| 325 | * It is set to NULL otherwise (i.e when none are currently running). | 325 | * It is set to NULL otherwise (i.e when none are currently running). |
| 326 | */ | 326 | */ |
| 327 | struct sched_entity *curr, *next, *last; | 327 | struct sched_entity *curr, *next, *last, *skip; |
| 328 | 328 | ||
| 329 | unsigned int nr_spread_over; | 329 | unsigned int nr_spread_over; |
| 330 | 330 | ||
| @@ -1683,6 +1683,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
| 1683 | __release(rq2->lock); | 1683 | __release(rq2->lock); |
| 1684 | } | 1684 | } |
| 1685 | 1685 | ||
| 1686 | #else /* CONFIG_SMP */ | ||
| 1687 | |||
| 1688 | /* | ||
| 1689 | * double_rq_lock - safely lock two runqueues | ||
| 1690 | * | ||
| 1691 | * Note this does not disable interrupts like task_rq_lock, | ||
| 1692 | * you need to do so manually before calling. | ||
| 1693 | */ | ||
| 1694 | static void double_rq_lock(struct rq *rq1, struct rq *rq2) | ||
| 1695 | __acquires(rq1->lock) | ||
| 1696 | __acquires(rq2->lock) | ||
| 1697 | { | ||
| 1698 | BUG_ON(!irqs_disabled()); | ||
| 1699 | BUG_ON(rq1 != rq2); | ||
| 1700 | raw_spin_lock(&rq1->lock); | ||
| 1701 | __acquire(rq2->lock); /* Fake it out ;) */ | ||
| 1702 | } | ||
| 1703 | |||
| 1704 | /* | ||
| 1705 | * double_rq_unlock - safely unlock two runqueues | ||
| 1706 | * | ||
| 1707 | * Note this does not restore interrupts like task_rq_unlock, | ||
| 1708 | * you need to do so manually after calling. | ||
| 1709 | */ | ||
| 1710 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | ||
| 1711 | __releases(rq1->lock) | ||
| 1712 | __releases(rq2->lock) | ||
| 1713 | { | ||
| 1714 | BUG_ON(rq1 != rq2); | ||
| 1715 | raw_spin_unlock(&rq1->lock); | ||
| 1716 | __release(rq2->lock); | ||
| 1717 | } | ||
| 1718 | |||
| 1686 | #endif | 1719 | #endif |
| 1687 | 1720 | ||
| 1688 | static void calc_load_account_idle(struct rq *this_rq); | 1721 | static void calc_load_account_idle(struct rq *this_rq); |
| @@ -1877,7 +1910,7 @@ void account_system_vtime(struct task_struct *curr) | |||
| 1877 | */ | 1910 | */ |
| 1878 | if (hardirq_count()) | 1911 | if (hardirq_count()) |
| 1879 | __this_cpu_add(cpu_hardirq_time, delta); | 1912 | __this_cpu_add(cpu_hardirq_time, delta); |
| 1880 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 1913 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) |
| 1881 | __this_cpu_add(cpu_softirq_time, delta); | 1914 | __this_cpu_add(cpu_softirq_time, delta); |
| 1882 | 1915 | ||
| 1883 | irq_time_write_end(); | 1916 | irq_time_write_end(); |
| @@ -1917,8 +1950,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
| 1917 | sched_rt_avg_update(rq, irq_delta); | 1950 | sched_rt_avg_update(rq, irq_delta); |
| 1918 | } | 1951 | } |
| 1919 | 1952 | ||
| 1953 | static int irqtime_account_hi_update(void) | ||
| 1954 | { | ||
| 1955 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
| 1956 | unsigned long flags; | ||
| 1957 | u64 latest_ns; | ||
| 1958 | int ret = 0; | ||
| 1959 | |||
| 1960 | local_irq_save(flags); | ||
| 1961 | latest_ns = this_cpu_read(cpu_hardirq_time); | ||
| 1962 | if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq)) | ||
| 1963 | ret = 1; | ||
| 1964 | local_irq_restore(flags); | ||
| 1965 | return ret; | ||
| 1966 | } | ||
| 1967 | |||
| 1968 | static int irqtime_account_si_update(void) | ||
| 1969 | { | ||
| 1970 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
| 1971 | unsigned long flags; | ||
| 1972 | u64 latest_ns; | ||
| 1973 | int ret = 0; | ||
| 1974 | |||
| 1975 | local_irq_save(flags); | ||
| 1976 | latest_ns = this_cpu_read(cpu_softirq_time); | ||
| 1977 | if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq)) | ||
| 1978 | ret = 1; | ||
| 1979 | local_irq_restore(flags); | ||
| 1980 | return ret; | ||
| 1981 | } | ||
| 1982 | |||
| 1920 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 1983 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
| 1921 | 1984 | ||
| 1985 | #define sched_clock_irqtime (0) | ||
| 1986 | |||
| 1922 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 1987 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
| 1923 | { | 1988 | { |
| 1924 | rq->clock_task += delta; | 1989 | rq->clock_task += delta; |
| @@ -2022,14 +2087,14 @@ inline int task_curr(const struct task_struct *p) | |||
| 2022 | 2087 | ||
| 2023 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 2088 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
| 2024 | const struct sched_class *prev_class, | 2089 | const struct sched_class *prev_class, |
| 2025 | int oldprio, int running) | 2090 | int oldprio) |
| 2026 | { | 2091 | { |
| 2027 | if (prev_class != p->sched_class) { | 2092 | if (prev_class != p->sched_class) { |
| 2028 | if (prev_class->switched_from) | 2093 | if (prev_class->switched_from) |
| 2029 | prev_class->switched_from(rq, p, running); | 2094 | prev_class->switched_from(rq, p); |
| 2030 | p->sched_class->switched_to(rq, p, running); | 2095 | p->sched_class->switched_to(rq, p); |
| 2031 | } else | 2096 | } else if (oldprio != p->prio) |
| 2032 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2097 | p->sched_class->prio_changed(rq, p, oldprio); |
| 2033 | } | 2098 | } |
| 2034 | 2099 | ||
| 2035 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | 2100 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) |
| @@ -2542,6 +2607,7 @@ static void __sched_fork(struct task_struct *p) | |||
| 2542 | p->se.sum_exec_runtime = 0; | 2607 | p->se.sum_exec_runtime = 0; |
| 2543 | p->se.prev_sum_exec_runtime = 0; | 2608 | p->se.prev_sum_exec_runtime = 0; |
| 2544 | p->se.nr_migrations = 0; | 2609 | p->se.nr_migrations = 0; |
| 2610 | p->se.vruntime = 0; | ||
| 2545 | 2611 | ||
| 2546 | #ifdef CONFIG_SCHEDSTATS | 2612 | #ifdef CONFIG_SCHEDSTATS |
| 2547 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 2613 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
| @@ -3547,6 +3613,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
| 3547 | } | 3613 | } |
| 3548 | 3614 | ||
| 3549 | /* | 3615 | /* |
| 3616 | * Account system cpu time to a process and desired cpustat field | ||
| 3617 | * @p: the process that the cpu time gets accounted to | ||
| 3618 | * @cputime: the cpu time spent in kernel space since the last update | ||
| 3619 | * @cputime_scaled: cputime scaled by cpu frequency | ||
| 3620 | * @target_cputime64: pointer to cpustat field that has to be updated | ||
| 3621 | */ | ||
| 3622 | static inline | ||
| 3623 | void __account_system_time(struct task_struct *p, cputime_t cputime, | ||
| 3624 | cputime_t cputime_scaled, cputime64_t *target_cputime64) | ||
| 3625 | { | ||
| 3626 | cputime64_t tmp = cputime_to_cputime64(cputime); | ||
| 3627 | |||
| 3628 | /* Add system time to process. */ | ||
| 3629 | p->stime = cputime_add(p->stime, cputime); | ||
| 3630 | p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); | ||
| 3631 | account_group_system_time(p, cputime); | ||
| 3632 | |||
| 3633 | /* Add system time to cpustat. */ | ||
| 3634 | *target_cputime64 = cputime64_add(*target_cputime64, tmp); | ||
| 3635 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
| 3636 | |||
| 3637 | /* Account for system time used */ | ||
| 3638 | acct_update_integrals(p); | ||
| 3639 | } | ||
| 3640 | |||
| 3641 | /* | ||
| 3550 | * Account system cpu time to a process. | 3642 | * Account system cpu time to a process. |
| 3551 | * @p: the process that the cpu time gets accounted to | 3643 | * @p: the process that the cpu time gets accounted to |
| 3552 | * @hardirq_offset: the offset to subtract from hardirq_count() | 3644 | * @hardirq_offset: the offset to subtract from hardirq_count() |
| @@ -3557,36 +3649,26 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
| 3557 | cputime_t cputime, cputime_t cputime_scaled) | 3649 | cputime_t cputime, cputime_t cputime_scaled) |
| 3558 | { | 3650 | { |
| 3559 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 3651 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
| 3560 | cputime64_t tmp; | 3652 | cputime64_t *target_cputime64; |
| 3561 | 3653 | ||
| 3562 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | 3654 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { |
| 3563 | account_guest_time(p, cputime, cputime_scaled); | 3655 | account_guest_time(p, cputime, cputime_scaled); |
| 3564 | return; | 3656 | return; |
| 3565 | } | 3657 | } |
| 3566 | 3658 | ||
| 3567 | /* Add system time to process. */ | ||
| 3568 | p->stime = cputime_add(p->stime, cputime); | ||
| 3569 | p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); | ||
| 3570 | account_group_system_time(p, cputime); | ||
| 3571 | |||
| 3572 | /* Add system time to cpustat. */ | ||
| 3573 | tmp = cputime_to_cputime64(cputime); | ||
| 3574 | if (hardirq_count() - hardirq_offset) | 3659 | if (hardirq_count() - hardirq_offset) |
| 3575 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3660 | target_cputime64 = &cpustat->irq; |
| 3576 | else if (in_serving_softirq()) | 3661 | else if (in_serving_softirq()) |
| 3577 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3662 | target_cputime64 = &cpustat->softirq; |
| 3578 | else | 3663 | else |
| 3579 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3664 | target_cputime64 = &cpustat->system; |
| 3580 | |||
| 3581 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
| 3582 | 3665 | ||
| 3583 | /* Account for system time used */ | 3666 | __account_system_time(p, cputime, cputime_scaled, target_cputime64); |
| 3584 | acct_update_integrals(p); | ||
| 3585 | } | 3667 | } |
| 3586 | 3668 | ||
| 3587 | /* | 3669 | /* |
| 3588 | * Account for involuntary wait time. | 3670 | * Account for involuntary wait time. |
| 3589 | * @steal: the cpu time spent in involuntary wait | 3671 | * @cputime: the cpu time spent in involuntary wait |
| 3590 | */ | 3672 | */ |
| 3591 | void account_steal_time(cputime_t cputime) | 3673 | void account_steal_time(cputime_t cputime) |
| 3592 | { | 3674 | { |
| @@ -3614,6 +3696,73 @@ void account_idle_time(cputime_t cputime) | |||
| 3614 | 3696 | ||
| 3615 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 3697 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
| 3616 | 3698 | ||
| 3699 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 3700 | /* | ||
| 3701 | * Account a tick to a process and cpustat | ||
| 3702 | * @p: the process that the cpu time gets accounted to | ||
| 3703 | * @user_tick: is the tick from userspace | ||
| 3704 | * @rq: the pointer to rq | ||
| 3705 | * | ||
| 3706 | * Tick demultiplexing follows the order | ||
| 3707 | * - pending hardirq update | ||
| 3708 | * - pending softirq update | ||
| 3709 | * - user_time | ||
| 3710 | * - idle_time | ||
| 3711 | * - system time | ||
| 3712 | * - check for guest_time | ||
| 3713 | * - else account as system_time | ||
| 3714 | * | ||
| 3715 | * Check for hardirq is done both for system and user time as there is | ||
| 3716 | * no timer going off while we are on hardirq and hence we may never get an | ||
| 3717 | * opportunity to update it solely in system time. | ||
| 3718 | * p->stime and friends are only updated on system time and not on irq | ||
| 3719 | * softirq as those do not count in task exec_runtime any more. | ||
| 3720 | */ | ||
| 3721 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
| 3722 | struct rq *rq) | ||
| 3723 | { | ||
| 3724 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
| 3725 | cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy); | ||
| 3726 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
| 3727 | |||
| 3728 | if (irqtime_account_hi_update()) { | ||
| 3729 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | ||
| 3730 | } else if (irqtime_account_si_update()) { | ||
| 3731 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | ||
| 3732 | } else if (this_cpu_ksoftirqd() == p) { | ||
| 3733 | /* | ||
| 3734 | * ksoftirqd time do not get accounted in cpu_softirq_time. | ||
| 3735 | * So, we have to handle it separately here. | ||
| 3736 | * Also, p->stime needs to be updated for ksoftirqd. | ||
| 3737 | */ | ||
| 3738 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
| 3739 | &cpustat->softirq); | ||
| 3740 | } else if (user_tick) { | ||
| 3741 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
| 3742 | } else if (p == rq->idle) { | ||
| 3743 | account_idle_time(cputime_one_jiffy); | ||
| 3744 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | ||
| 3745 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
| 3746 | } else { | ||
| 3747 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
| 3748 | &cpustat->system); | ||
| 3749 | } | ||
| 3750 | } | ||
| 3751 | |||
| 3752 | static void irqtime_account_idle_ticks(int ticks) | ||
| 3753 | { | ||
| 3754 | int i; | ||
| 3755 | struct rq *rq = this_rq(); | ||
| 3756 | |||
| 3757 | for (i = 0; i < ticks; i++) | ||
| 3758 | irqtime_account_process_tick(current, 0, rq); | ||
| 3759 | } | ||
| 3760 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 3761 | static void irqtime_account_idle_ticks(int ticks) {} | ||
| 3762 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
| 3763 | struct rq *rq) {} | ||
| 3764 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 3765 | |||
| 3617 | /* | 3766 | /* |
| 3618 | * Account a single tick of cpu time. | 3767 | * Account a single tick of cpu time. |
| 3619 | * @p: the process that the cpu time gets accounted to | 3768 | * @p: the process that the cpu time gets accounted to |
| @@ -3624,6 +3773,11 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
| 3624 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 3773 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
| 3625 | struct rq *rq = this_rq(); | 3774 | struct rq *rq = this_rq(); |
| 3626 | 3775 | ||
| 3776 | if (sched_clock_irqtime) { | ||
| 3777 | irqtime_account_process_tick(p, user_tick, rq); | ||
| 3778 | return; | ||
| 3779 | } | ||
| 3780 | |||
| 3627 | if (user_tick) | 3781 | if (user_tick) |
| 3628 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 3782 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
| 3629 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | 3783 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
| @@ -3649,6 +3803,12 @@ void account_steal_ticks(unsigned long ticks) | |||
| 3649 | */ | 3803 | */ |
| 3650 | void account_idle_ticks(unsigned long ticks) | 3804 | void account_idle_ticks(unsigned long ticks) |
| 3651 | { | 3805 | { |
| 3806 | |||
| 3807 | if (sched_clock_irqtime) { | ||
| 3808 | irqtime_account_idle_ticks(ticks); | ||
| 3809 | return; | ||
| 3810 | } | ||
| 3811 | |||
| 3652 | account_idle_time(jiffies_to_cputime(ticks)); | 3812 | account_idle_time(jiffies_to_cputime(ticks)); |
| 3653 | } | 3813 | } |
| 3654 | 3814 | ||
| @@ -4547,11 +4707,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 4547 | 4707 | ||
| 4548 | if (running) | 4708 | if (running) |
| 4549 | p->sched_class->set_curr_task(rq); | 4709 | p->sched_class->set_curr_task(rq); |
| 4550 | if (on_rq) { | 4710 | if (on_rq) |
| 4551 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 4711 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
| 4552 | 4712 | ||
| 4553 | check_class_changed(rq, p, prev_class, oldprio, running); | 4713 | check_class_changed(rq, p, prev_class, oldprio); |
| 4554 | } | ||
| 4555 | task_rq_unlock(rq, &flags); | 4714 | task_rq_unlock(rq, &flags); |
| 4556 | } | 4715 | } |
| 4557 | 4716 | ||
| @@ -4799,12 +4958,15 @@ recheck: | |||
| 4799 | param->sched_priority > rlim_rtprio) | 4958 | param->sched_priority > rlim_rtprio) |
| 4800 | return -EPERM; | 4959 | return -EPERM; |
| 4801 | } | 4960 | } |
| 4961 | |||
| 4802 | /* | 4962 | /* |
| 4803 | * Like positive nice levels, dont allow tasks to | 4963 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
| 4804 | * move out of SCHED_IDLE either: | 4964 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
| 4805 | */ | 4965 | */ |
| 4806 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) | 4966 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { |
| 4807 | return -EPERM; | 4967 | if (!can_nice(p, TASK_NICE(p))) |
| 4968 | return -EPERM; | ||
| 4969 | } | ||
| 4808 | 4970 | ||
| 4809 | /* can't change other user's priorities */ | 4971 | /* can't change other user's priorities */ |
| 4810 | if (!check_same_owner(p)) | 4972 | if (!check_same_owner(p)) |
| @@ -4879,11 +5041,10 @@ recheck: | |||
| 4879 | 5041 | ||
| 4880 | if (running) | 5042 | if (running) |
| 4881 | p->sched_class->set_curr_task(rq); | 5043 | p->sched_class->set_curr_task(rq); |
| 4882 | if (on_rq) { | 5044 | if (on_rq) |
| 4883 | activate_task(rq, p, 0); | 5045 | activate_task(rq, p, 0); |
| 4884 | 5046 | ||
| 4885 | check_class_changed(rq, p, prev_class, oldprio, running); | 5047 | check_class_changed(rq, p, prev_class, oldprio); |
| 4886 | } | ||
| 4887 | __task_rq_unlock(rq); | 5048 | __task_rq_unlock(rq); |
| 4888 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 5049 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
| 4889 | 5050 | ||
| @@ -5300,6 +5461,65 @@ void __sched yield(void) | |||
| 5300 | } | 5461 | } |
| 5301 | EXPORT_SYMBOL(yield); | 5462 | EXPORT_SYMBOL(yield); |
| 5302 | 5463 | ||
| 5464 | /** | ||
| 5465 | * yield_to - yield the current processor to another thread in | ||
| 5466 | * your thread group, or accelerate that thread toward the | ||
| 5467 | * processor it's on. | ||
| 5468 | * | ||
| 5469 | * It's the caller's job to ensure that the target task struct | ||
| 5470 | * can't go away on us before we can do any checks. | ||
| 5471 | * | ||
| 5472 | * Returns true if we indeed boosted the target task. | ||
| 5473 | */ | ||
| 5474 | bool __sched yield_to(struct task_struct *p, bool preempt) | ||
| 5475 | { | ||
| 5476 | struct task_struct *curr = current; | ||
| 5477 | struct rq *rq, *p_rq; | ||
| 5478 | unsigned long flags; | ||
| 5479 | bool yielded = 0; | ||
| 5480 | |||
| 5481 | local_irq_save(flags); | ||
| 5482 | rq = this_rq(); | ||
| 5483 | |||
| 5484 | again: | ||
| 5485 | p_rq = task_rq(p); | ||
| 5486 | double_rq_lock(rq, p_rq); | ||
| 5487 | while (task_rq(p) != p_rq) { | ||
| 5488 | double_rq_unlock(rq, p_rq); | ||
| 5489 | goto again; | ||
| 5490 | } | ||
| 5491 | |||
| 5492 | if (!curr->sched_class->yield_to_task) | ||
| 5493 | goto out; | ||
| 5494 | |||
| 5495 | if (curr->sched_class != p->sched_class) | ||
| 5496 | goto out; | ||
| 5497 | |||
| 5498 | if (task_running(p_rq, p) || p->state) | ||
| 5499 | goto out; | ||
| 5500 | |||
| 5501 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); | ||
| 5502 | if (yielded) { | ||
| 5503 | schedstat_inc(rq, yld_count); | ||
| 5504 | /* | ||
| 5505 | * Make p's CPU reschedule; pick_next_entity takes care of | ||
| 5506 | * fairness. | ||
| 5507 | */ | ||
| 5508 | if (preempt && rq != p_rq) | ||
| 5509 | resched_task(p_rq->curr); | ||
| 5510 | } | ||
| 5511 | |||
| 5512 | out: | ||
| 5513 | double_rq_unlock(rq, p_rq); | ||
| 5514 | local_irq_restore(flags); | ||
| 5515 | |||
| 5516 | if (yielded) | ||
| 5517 | schedule(); | ||
| 5518 | |||
| 5519 | return yielded; | ||
| 5520 | } | ||
| 5521 | EXPORT_SYMBOL_GPL(yield_to); | ||
| 5522 | |||
| 5303 | /* | 5523 | /* |
| 5304 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 5524 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
| 5305 | * that process accounting knows that this is a task in IO wait state. | 5525 | * that process accounting knows that this is a task in IO wait state. |
| @@ -7773,6 +7993,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | |||
| 7773 | INIT_LIST_HEAD(&cfs_rq->tasks); | 7993 | INIT_LIST_HEAD(&cfs_rq->tasks); |
| 7774 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7994 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7775 | cfs_rq->rq = rq; | 7995 | cfs_rq->rq = rq; |
| 7996 | /* allow initial update_cfs_load() to truncate */ | ||
| 7997 | #ifdef CONFIG_SMP | ||
| 7998 | cfs_rq->load_stamp = 1; | ||
| 7999 | #endif | ||
| 7776 | #endif | 8000 | #endif |
| 7777 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 8001 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
| 7778 | } | 8002 | } |
| @@ -8086,6 +8310,8 @@ EXPORT_SYMBOL(__might_sleep); | |||
| 8086 | #ifdef CONFIG_MAGIC_SYSRQ | 8310 | #ifdef CONFIG_MAGIC_SYSRQ |
| 8087 | static void normalize_task(struct rq *rq, struct task_struct *p) | 8311 | static void normalize_task(struct rq *rq, struct task_struct *p) |
| 8088 | { | 8312 | { |
| 8313 | const struct sched_class *prev_class = p->sched_class; | ||
| 8314 | int old_prio = p->prio; | ||
| 8089 | int on_rq; | 8315 | int on_rq; |
| 8090 | 8316 | ||
| 8091 | on_rq = p->se.on_rq; | 8317 | on_rq = p->se.on_rq; |
| @@ -8096,6 +8322,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
| 8096 | activate_task(rq, p, 0); | 8322 | activate_task(rq, p, 0); |
| 8097 | resched_task(rq->curr); | 8323 | resched_task(rq->curr); |
| 8098 | } | 8324 | } |
| 8325 | |||
| 8326 | check_class_changed(rq, p, prev_class, old_prio); | ||
| 8099 | } | 8327 | } |
| 8100 | 8328 | ||
| 8101 | void normalize_rt_tasks(void) | 8329 | void normalize_rt_tasks(void) |
| @@ -8487,7 +8715,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
| 8487 | /* Propagate contribution to hierarchy */ | 8715 | /* Propagate contribution to hierarchy */ |
| 8488 | raw_spin_lock_irqsave(&rq->lock, flags); | 8716 | raw_spin_lock_irqsave(&rq->lock, flags); |
| 8489 | for_each_sched_entity(se) | 8717 | for_each_sched_entity(se) |
| 8490 | update_cfs_shares(group_cfs_rq(se), 0); | 8718 | update_cfs_shares(group_cfs_rq(se)); |
| 8491 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 8719 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
| 8492 | } | 8720 | } |
| 8493 | 8721 | ||
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index 9fb656283157..5946ac515602 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c | |||
| @@ -12,7 +12,6 @@ static atomic_t autogroup_seq_nr; | |||
| 12 | static void __init autogroup_init(struct task_struct *init_task) | 12 | static void __init autogroup_init(struct task_struct *init_task) |
| 13 | { | 13 | { |
| 14 | autogroup_default.tg = &root_task_group; | 14 | autogroup_default.tg = &root_task_group; |
| 15 | root_task_group.autogroup = &autogroup_default; | ||
| 16 | kref_init(&autogroup_default.kref); | 15 | kref_init(&autogroup_default.kref); |
| 17 | init_rwsem(&autogroup_default.lock); | 16 | init_rwsem(&autogroup_default.lock); |
| 18 | init_task->signal->autogroup = &autogroup_default; | 17 | init_task->signal->autogroup = &autogroup_default; |
| @@ -130,7 +129,7 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg) | |||
| 130 | 129 | ||
| 131 | static inline bool task_group_is_autogroup(struct task_group *tg) | 130 | static inline bool task_group_is_autogroup(struct task_group *tg) |
| 132 | { | 131 | { |
| 133 | return tg != &root_task_group && tg->autogroup; | 132 | return !!tg->autogroup; |
| 134 | } | 133 | } |
| 135 | 134 | ||
| 136 | static inline struct task_group * | 135 | static inline struct task_group * |
| @@ -161,11 +160,15 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) | |||
| 161 | 160 | ||
| 162 | p->signal->autogroup = autogroup_kref_get(ag); | 161 | p->signal->autogroup = autogroup_kref_get(ag); |
| 163 | 162 | ||
| 163 | if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled)) | ||
| 164 | goto out; | ||
| 165 | |||
| 164 | t = p; | 166 | t = p; |
| 165 | do { | 167 | do { |
| 166 | sched_move_task(t); | 168 | sched_move_task(t); |
| 167 | } while_each_thread(p, t); | 169 | } while_each_thread(p, t); |
| 168 | 170 | ||
| 171 | out: | ||
| 169 | unlock_task_sighand(p, &flags); | 172 | unlock_task_sighand(p, &flags); |
| 170 | autogroup_kref_put(prev); | 173 | autogroup_kref_put(prev); |
| 171 | } | 174 | } |
| @@ -247,10 +250,14 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) | |||
| 247 | { | 250 | { |
| 248 | struct autogroup *ag = autogroup_task_get(p); | 251 | struct autogroup *ag = autogroup_task_get(p); |
| 249 | 252 | ||
| 253 | if (!task_group_is_autogroup(ag->tg)) | ||
| 254 | goto out; | ||
| 255 | |||
| 250 | down_read(&ag->lock); | 256 | down_read(&ag->lock); |
| 251 | seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); | 257 | seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); |
| 252 | up_read(&ag->lock); | 258 | up_read(&ag->lock); |
| 253 | 259 | ||
| 260 | out: | ||
| 254 | autogroup_kref_put(ag); | 261 | autogroup_kref_put(ag); |
| 255 | } | 262 | } |
| 256 | #endif /* CONFIG_PROC_FS */ | 263 | #endif /* CONFIG_PROC_FS */ |
| @@ -258,9 +265,7 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) | |||
| 258 | #ifdef CONFIG_SCHED_DEBUG | 265 | #ifdef CONFIG_SCHED_DEBUG |
| 259 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) | 266 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) |
| 260 | { | 267 | { |
| 261 | int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); | 268 | if (!task_group_is_autogroup(tg)) |
| 262 | |||
| 263 | if (!enabled || !tg->autogroup) | ||
| 264 | return 0; | 269 | return 0; |
| 265 | 270 | ||
| 266 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); | 271 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); |
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h index 7b859ffe5dad..05577055cfca 100644 --- a/kernel/sched_autogroup.h +++ b/kernel/sched_autogroup.h | |||
| @@ -1,6 +1,11 @@ | |||
| 1 | #ifdef CONFIG_SCHED_AUTOGROUP | 1 | #ifdef CONFIG_SCHED_AUTOGROUP |
| 2 | 2 | ||
| 3 | struct autogroup { | 3 | struct autogroup { |
| 4 | /* | ||
| 5 | * reference doesn't mean how many thread attach to this | ||
| 6 | * autogroup now. It just stands for the number of task | ||
| 7 | * could use this autogroup. | ||
| 8 | */ | ||
| 4 | struct kref kref; | 9 | struct kref kref; |
| 5 | struct task_group *tg; | 10 | struct task_group *tg; |
| 6 | struct rw_semaphore lock; | 11 | struct rw_semaphore lock; |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index eb6cb8edd075..7bacd83a4158 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
| @@ -179,7 +179,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
| 179 | 179 | ||
| 180 | raw_spin_lock_irqsave(&rq->lock, flags); | 180 | raw_spin_lock_irqsave(&rq->lock, flags); |
| 181 | if (cfs_rq->rb_leftmost) | 181 | if (cfs_rq->rb_leftmost) |
| 182 | MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; | 182 | MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime; |
| 183 | last = __pick_last_entity(cfs_rq); | 183 | last = __pick_last_entity(cfs_rq); |
| 184 | if (last) | 184 | if (last) |
| 185 | max_vruntime = last->vruntime; | 185 | max_vruntime = last->vruntime; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0c26e2df450e..3f7ec9e27ee1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8; | |||
| 69 | unsigned int sysctl_sched_child_runs_first __read_mostly; | 69 | unsigned int sysctl_sched_child_runs_first __read_mostly; |
| 70 | 70 | ||
| 71 | /* | 71 | /* |
| 72 | * sys_sched_yield() compat mode | ||
| 73 | * | ||
| 74 | * This option switches the agressive yield implementation of the | ||
| 75 | * old scheduler back on. | ||
| 76 | */ | ||
| 77 | unsigned int __read_mostly sysctl_sched_compat_yield; | ||
| 78 | |||
| 79 | /* | ||
| 80 | * SCHED_OTHER wake-up granularity. | 72 | * SCHED_OTHER wake-up granularity. |
| 81 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) | 73 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
| 82 | * | 74 | * |
| @@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 419 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); | 411 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); |
| 420 | } | 412 | } |
| 421 | 413 | ||
| 422 | static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | 414 | static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) |
| 423 | { | 415 | { |
| 424 | struct rb_node *left = cfs_rq->rb_leftmost; | 416 | struct rb_node *left = cfs_rq->rb_leftmost; |
| 425 | 417 | ||
| @@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | |||
| 429 | return rb_entry(left, struct sched_entity, run_node); | 421 | return rb_entry(left, struct sched_entity, run_node); |
| 430 | } | 422 | } |
| 431 | 423 | ||
| 424 | static struct sched_entity *__pick_next_entity(struct sched_entity *se) | ||
| 425 | { | ||
| 426 | struct rb_node *next = rb_next(&se->run_node); | ||
| 427 | |||
| 428 | if (!next) | ||
| 429 | return NULL; | ||
| 430 | |||
| 431 | return rb_entry(next, struct sched_entity, run_node); | ||
| 432 | } | ||
| 433 | |||
| 434 | #ifdef CONFIG_SCHED_DEBUG | ||
| 432 | static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | 435 | static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
| 433 | { | 436 | { |
| 434 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); | 437 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); |
| @@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
| 443 | * Scheduling class statistics methods: | 446 | * Scheduling class statistics methods: |
| 444 | */ | 447 | */ |
| 445 | 448 | ||
| 446 | #ifdef CONFIG_SCHED_DEBUG | ||
| 447 | int sched_proc_update_handler(struct ctl_table *table, int write, | 449 | int sched_proc_update_handler(struct ctl_table *table, int write, |
| 448 | void __user *buffer, size_t *lenp, | 450 | void __user *buffer, size_t *lenp, |
| 449 | loff_t *ppos) | 451 | loff_t *ppos) |
| @@ -540,7 +542,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 540 | } | 542 | } |
| 541 | 543 | ||
| 542 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); | 544 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); |
| 543 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); | 545 | static void update_cfs_shares(struct cfs_rq *cfs_rq); |
| 544 | 546 | ||
| 545 | /* | 547 | /* |
| 546 | * Update the current task's runtime statistics. Skip current tasks that | 548 | * Update the current task's runtime statistics. Skip current tasks that |
| @@ -733,6 +735,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
| 733 | now - cfs_rq->load_last > 4 * period) { | 735 | now - cfs_rq->load_last > 4 * period) { |
| 734 | cfs_rq->load_period = 0; | 736 | cfs_rq->load_period = 0; |
| 735 | cfs_rq->load_avg = 0; | 737 | cfs_rq->load_avg = 0; |
| 738 | delta = period - 1; | ||
| 736 | } | 739 | } |
| 737 | 740 | ||
| 738 | cfs_rq->load_stamp = now; | 741 | cfs_rq->load_stamp = now; |
| @@ -763,16 +766,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
| 763 | list_del_leaf_cfs_rq(cfs_rq); | 766 | list_del_leaf_cfs_rq(cfs_rq); |
| 764 | } | 767 | } |
| 765 | 768 | ||
| 766 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | 769 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) |
| 767 | long weight_delta) | ||
| 768 | { | 770 | { |
| 769 | long load_weight, load, shares; | 771 | long load_weight, load, shares; |
| 770 | 772 | ||
| 771 | load = cfs_rq->load.weight + weight_delta; | 773 | load = cfs_rq->load.weight; |
| 772 | 774 | ||
| 773 | load_weight = atomic_read(&tg->load_weight); | 775 | load_weight = atomic_read(&tg->load_weight); |
| 774 | load_weight -= cfs_rq->load_contribution; | ||
| 775 | load_weight += load; | 776 | load_weight += load; |
| 777 | load_weight -= cfs_rq->load_contribution; | ||
| 776 | 778 | ||
| 777 | shares = (tg->shares * load); | 779 | shares = (tg->shares * load); |
| 778 | if (load_weight) | 780 | if (load_weight) |
| @@ -790,7 +792,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq) | |||
| 790 | { | 792 | { |
| 791 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { | 793 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { |
| 792 | update_cfs_load(cfs_rq, 0); | 794 | update_cfs_load(cfs_rq, 0); |
| 793 | update_cfs_shares(cfs_rq, 0); | 795 | update_cfs_shares(cfs_rq); |
| 794 | } | 796 | } |
| 795 | } | 797 | } |
| 796 | # else /* CONFIG_SMP */ | 798 | # else /* CONFIG_SMP */ |
| @@ -798,8 +800,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
| 798 | { | 800 | { |
| 799 | } | 801 | } |
| 800 | 802 | ||
| 801 | static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | 803 | static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) |
| 802 | long weight_delta) | ||
| 803 | { | 804 | { |
| 804 | return tg->shares; | 805 | return tg->shares; |
| 805 | } | 806 | } |
| @@ -824,7 +825,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, | |||
| 824 | account_entity_enqueue(cfs_rq, se); | 825 | account_entity_enqueue(cfs_rq, se); |
| 825 | } | 826 | } |
| 826 | 827 | ||
| 827 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | 828 | static void update_cfs_shares(struct cfs_rq *cfs_rq) |
| 828 | { | 829 | { |
| 829 | struct task_group *tg; | 830 | struct task_group *tg; |
| 830 | struct sched_entity *se; | 831 | struct sched_entity *se; |
| @@ -838,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | |||
| 838 | if (likely(se->load.weight == tg->shares)) | 839 | if (likely(se->load.weight == tg->shares)) |
| 839 | return; | 840 | return; |
| 840 | #endif | 841 | #endif |
| 841 | shares = calc_cfs_shares(cfs_rq, tg, weight_delta); | 842 | shares = calc_cfs_shares(cfs_rq, tg); |
| 842 | 843 | ||
| 843 | reweight_entity(cfs_rq_of(se), se, shares); | 844 | reweight_entity(cfs_rq_of(se), se, shares); |
| 844 | } | 845 | } |
| @@ -847,7 +848,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
| 847 | { | 848 | { |
| 848 | } | 849 | } |
| 849 | 850 | ||
| 850 | static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | 851 | static inline void update_cfs_shares(struct cfs_rq *cfs_rq) |
| 851 | { | 852 | { |
| 852 | } | 853 | } |
| 853 | 854 | ||
| @@ -978,8 +979,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
| 978 | */ | 979 | */ |
| 979 | update_curr(cfs_rq); | 980 | update_curr(cfs_rq); |
| 980 | update_cfs_load(cfs_rq, 0); | 981 | update_cfs_load(cfs_rq, 0); |
| 981 | update_cfs_shares(cfs_rq, se->load.weight); | ||
| 982 | account_entity_enqueue(cfs_rq, se); | 982 | account_entity_enqueue(cfs_rq, se); |
| 983 | update_cfs_shares(cfs_rq); | ||
| 983 | 984 | ||
| 984 | if (flags & ENQUEUE_WAKEUP) { | 985 | if (flags & ENQUEUE_WAKEUP) { |
| 985 | place_entity(cfs_rq, se, 0); | 986 | place_entity(cfs_rq, se, 0); |
| @@ -996,19 +997,49 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
| 996 | list_add_leaf_cfs_rq(cfs_rq); | 997 | list_add_leaf_cfs_rq(cfs_rq); |
| 997 | } | 998 | } |
| 998 | 999 | ||
| 999 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 1000 | static void __clear_buddies_last(struct sched_entity *se) |
| 1001 | { | ||
| 1002 | for_each_sched_entity(se) { | ||
| 1003 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
| 1004 | if (cfs_rq->last == se) | ||
| 1005 | cfs_rq->last = NULL; | ||
| 1006 | else | ||
| 1007 | break; | ||
| 1008 | } | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | static void __clear_buddies_next(struct sched_entity *se) | ||
| 1000 | { | 1012 | { |
| 1001 | if (!se || cfs_rq->last == se) | 1013 | for_each_sched_entity(se) { |
| 1002 | cfs_rq->last = NULL; | 1014 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
| 1015 | if (cfs_rq->next == se) | ||
| 1016 | cfs_rq->next = NULL; | ||
| 1017 | else | ||
| 1018 | break; | ||
| 1019 | } | ||
| 1020 | } | ||
| 1003 | 1021 | ||
| 1004 | if (!se || cfs_rq->next == se) | 1022 | static void __clear_buddies_skip(struct sched_entity *se) |
| 1005 | cfs_rq->next = NULL; | 1023 | { |
| 1024 | for_each_sched_entity(se) { | ||
| 1025 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
| 1026 | if (cfs_rq->skip == se) | ||
| 1027 | cfs_rq->skip = NULL; | ||
| 1028 | else | ||
| 1029 | break; | ||
| 1030 | } | ||
| 1006 | } | 1031 | } |
| 1007 | 1032 | ||
| 1008 | static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 1033 | static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 1009 | { | 1034 | { |
| 1010 | for_each_sched_entity(se) | 1035 | if (cfs_rq->last == se) |
| 1011 | __clear_buddies(cfs_rq_of(se), se); | 1036 | __clear_buddies_last(se); |
| 1037 | |||
| 1038 | if (cfs_rq->next == se) | ||
| 1039 | __clear_buddies_next(se); | ||
| 1040 | |||
| 1041 | if (cfs_rq->skip == se) | ||
| 1042 | __clear_buddies_skip(se); | ||
| 1012 | } | 1043 | } |
| 1013 | 1044 | ||
| 1014 | static void | 1045 | static void |
| @@ -1041,7 +1072,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
| 1041 | update_cfs_load(cfs_rq, 0); | 1072 | update_cfs_load(cfs_rq, 0); |
| 1042 | account_entity_dequeue(cfs_rq, se); | 1073 | account_entity_dequeue(cfs_rq, se); |
| 1043 | update_min_vruntime(cfs_rq); | 1074 | update_min_vruntime(cfs_rq); |
| 1044 | update_cfs_shares(cfs_rq, 0); | 1075 | update_cfs_shares(cfs_rq); |
| 1045 | 1076 | ||
| 1046 | /* | 1077 | /* |
| 1047 | * Normalize the entity after updating the min_vruntime because the | 1078 | * Normalize the entity after updating the min_vruntime because the |
| @@ -1084,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
| 1084 | return; | 1115 | return; |
| 1085 | 1116 | ||
| 1086 | if (cfs_rq->nr_running > 1) { | 1117 | if (cfs_rq->nr_running > 1) { |
| 1087 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 1118 | struct sched_entity *se = __pick_first_entity(cfs_rq); |
| 1088 | s64 delta = curr->vruntime - se->vruntime; | 1119 | s64 delta = curr->vruntime - se->vruntime; |
| 1089 | 1120 | ||
| 1090 | if (delta < 0) | 1121 | if (delta < 0) |
| @@ -1128,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 1128 | static int | 1159 | static int |
| 1129 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); | 1160 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); |
| 1130 | 1161 | ||
| 1162 | /* | ||
| 1163 | * Pick the next process, keeping these things in mind, in this order: | ||
| 1164 | * 1) keep things fair between processes/task groups | ||
| 1165 | * 2) pick the "next" process, since someone really wants that to run | ||
| 1166 | * 3) pick the "last" process, for cache locality | ||
| 1167 | * 4) do not run the "skip" process, if something else is available | ||
| 1168 | */ | ||
| 1131 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 1169 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) |
| 1132 | { | 1170 | { |
| 1133 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 1171 | struct sched_entity *se = __pick_first_entity(cfs_rq); |
| 1134 | struct sched_entity *left = se; | 1172 | struct sched_entity *left = se; |
| 1135 | 1173 | ||
| 1136 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) | 1174 | /* |
| 1137 | se = cfs_rq->next; | 1175 | * Avoid running the skip buddy, if running something else can |
| 1176 | * be done without getting too unfair. | ||
| 1177 | */ | ||
| 1178 | if (cfs_rq->skip == se) { | ||
| 1179 | struct sched_entity *second = __pick_next_entity(se); | ||
| 1180 | if (second && wakeup_preempt_entity(second, left) < 1) | ||
| 1181 | se = second; | ||
| 1182 | } | ||
| 1138 | 1183 | ||
| 1139 | /* | 1184 | /* |
| 1140 | * Prefer last buddy, try to return the CPU to a preempted task. | 1185 | * Prefer last buddy, try to return the CPU to a preempted task. |
| @@ -1142,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | |||
| 1142 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) | 1187 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) |
| 1143 | se = cfs_rq->last; | 1188 | se = cfs_rq->last; |
| 1144 | 1189 | ||
| 1190 | /* | ||
| 1191 | * Someone really wants this to run. If it's not unfair, run it. | ||
| 1192 | */ | ||
| 1193 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) | ||
| 1194 | se = cfs_rq->next; | ||
| 1195 | |||
| 1145 | clear_buddies(cfs_rq, se); | 1196 | clear_buddies(cfs_rq, se); |
| 1146 | 1197 | ||
| 1147 | return se; | 1198 | return se; |
| @@ -1282,7 +1333,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
| 1282 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1333 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
| 1283 | 1334 | ||
| 1284 | update_cfs_load(cfs_rq, 0); | 1335 | update_cfs_load(cfs_rq, 0); |
| 1285 | update_cfs_shares(cfs_rq, 0); | 1336 | update_cfs_shares(cfs_rq); |
| 1286 | } | 1337 | } |
| 1287 | 1338 | ||
| 1288 | hrtick_update(rq); | 1339 | hrtick_update(rq); |
| @@ -1312,58 +1363,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
| 1312 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1363 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
| 1313 | 1364 | ||
| 1314 | update_cfs_load(cfs_rq, 0); | 1365 | update_cfs_load(cfs_rq, 0); |
| 1315 | update_cfs_shares(cfs_rq, 0); | 1366 | update_cfs_shares(cfs_rq); |
| 1316 | } | 1367 | } |
| 1317 | 1368 | ||
| 1318 | hrtick_update(rq); | 1369 | hrtick_update(rq); |
| 1319 | } | 1370 | } |
| 1320 | 1371 | ||
| 1321 | /* | ||
| 1322 | * sched_yield() support is very simple - we dequeue and enqueue. | ||
| 1323 | * | ||
| 1324 | * If compat_yield is turned on then we requeue to the end of the tree. | ||
| 1325 | */ | ||
| 1326 | static void yield_task_fair(struct rq *rq) | ||
| 1327 | { | ||
| 1328 | struct task_struct *curr = rq->curr; | ||
| 1329 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
| 1330 | struct sched_entity *rightmost, *se = &curr->se; | ||
| 1331 | |||
| 1332 | /* | ||
| 1333 | * Are we the only task in the tree? | ||
| 1334 | */ | ||
| 1335 | if (unlikely(cfs_rq->nr_running == 1)) | ||
| 1336 | return; | ||
| 1337 | |||
| 1338 | clear_buddies(cfs_rq, se); | ||
| 1339 | |||
| 1340 | if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { | ||
| 1341 | update_rq_clock(rq); | ||
| 1342 | /* | ||
| 1343 | * Update run-time statistics of the 'current'. | ||
| 1344 | */ | ||
| 1345 | update_curr(cfs_rq); | ||
| 1346 | |||
| 1347 | return; | ||
| 1348 | } | ||
| 1349 | /* | ||
| 1350 | * Find the rightmost entry in the rbtree: | ||
| 1351 | */ | ||
| 1352 | rightmost = __pick_last_entity(cfs_rq); | ||
| 1353 | /* | ||
| 1354 | * Already in the rightmost position? | ||
| 1355 | */ | ||
| 1356 | if (unlikely(!rightmost || entity_before(rightmost, se))) | ||
| 1357 | return; | ||
| 1358 | |||
| 1359 | /* | ||
| 1360 | * Minimally necessary key value to be last in the tree: | ||
| 1361 | * Upon rescheduling, sched_class::put_prev_task() will place | ||
| 1362 | * 'current' within the tree based on its new key value. | ||
| 1363 | */ | ||
| 1364 | se->vruntime = rightmost->vruntime + 1; | ||
| 1365 | } | ||
| 1366 | |||
| 1367 | #ifdef CONFIG_SMP | 1372 | #ifdef CONFIG_SMP |
| 1368 | 1373 | ||
| 1369 | static void task_waking_fair(struct rq *rq, struct task_struct *p) | 1374 | static void task_waking_fair(struct rq *rq, struct task_struct *p) |
| @@ -1834,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se) | |||
| 1834 | } | 1839 | } |
| 1835 | } | 1840 | } |
| 1836 | 1841 | ||
| 1842 | static void set_skip_buddy(struct sched_entity *se) | ||
| 1843 | { | ||
| 1844 | if (likely(task_of(se)->policy != SCHED_IDLE)) { | ||
| 1845 | for_each_sched_entity(se) | ||
| 1846 | cfs_rq_of(se)->skip = se; | ||
| 1847 | } | ||
| 1848 | } | ||
| 1849 | |||
| 1837 | /* | 1850 | /* |
| 1838 | * Preempt the current task with a newly woken task if needed: | 1851 | * Preempt the current task with a newly woken task if needed: |
| 1839 | */ | 1852 | */ |
| @@ -1857,16 +1870,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1857 | if (test_tsk_need_resched(curr)) | 1870 | if (test_tsk_need_resched(curr)) |
| 1858 | return; | 1871 | return; |
| 1859 | 1872 | ||
| 1873 | /* Idle tasks are by definition preempted by non-idle tasks. */ | ||
| 1874 | if (unlikely(curr->policy == SCHED_IDLE) && | ||
| 1875 | likely(p->policy != SCHED_IDLE)) | ||
| 1876 | goto preempt; | ||
| 1877 | |||
| 1860 | /* | 1878 | /* |
| 1861 | * Batch and idle tasks do not preempt (their preemption is driven by | 1879 | * Batch and idle tasks do not preempt non-idle tasks (their preemption |
| 1862 | * the tick): | 1880 | * is driven by the tick): |
| 1863 | */ | 1881 | */ |
| 1864 | if (unlikely(p->policy != SCHED_NORMAL)) | 1882 | if (unlikely(p->policy != SCHED_NORMAL)) |
| 1865 | return; | 1883 | return; |
| 1866 | 1884 | ||
| 1867 | /* Idle tasks are by definition preempted by everybody. */ | ||
| 1868 | if (unlikely(curr->policy == SCHED_IDLE)) | ||
| 1869 | goto preempt; | ||
| 1870 | 1885 | ||
| 1871 | if (!sched_feat(WAKEUP_PREEMPT)) | 1886 | if (!sched_feat(WAKEUP_PREEMPT)) |
| 1872 | return; | 1887 | return; |
| @@ -1932,6 +1947,51 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) | |||
| 1932 | } | 1947 | } |
| 1933 | } | 1948 | } |
| 1934 | 1949 | ||
| 1950 | /* | ||
| 1951 | * sched_yield() is very simple | ||
| 1952 | * | ||
| 1953 | * The magic of dealing with the ->skip buddy is in pick_next_entity. | ||
| 1954 | */ | ||
| 1955 | static void yield_task_fair(struct rq *rq) | ||
| 1956 | { | ||
| 1957 | struct task_struct *curr = rq->curr; | ||
| 1958 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
| 1959 | struct sched_entity *se = &curr->se; | ||
| 1960 | |||
| 1961 | /* | ||
| 1962 | * Are we the only task in the tree? | ||
| 1963 | */ | ||
| 1964 | if (unlikely(rq->nr_running == 1)) | ||
| 1965 | return; | ||
| 1966 | |||
| 1967 | clear_buddies(cfs_rq, se); | ||
| 1968 | |||
| 1969 | if (curr->policy != SCHED_BATCH) { | ||
| 1970 | update_rq_clock(rq); | ||
| 1971 | /* | ||
| 1972 | * Update run-time statistics of the 'current'. | ||
| 1973 | */ | ||
| 1974 | update_curr(cfs_rq); | ||
| 1975 | } | ||
| 1976 | |||
| 1977 | set_skip_buddy(se); | ||
| 1978 | } | ||
| 1979 | |||
| 1980 | static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt) | ||
| 1981 | { | ||
| 1982 | struct sched_entity *se = &p->se; | ||
| 1983 | |||
| 1984 | if (!se->on_rq) | ||
| 1985 | return false; | ||
| 1986 | |||
| 1987 | /* Tell the scheduler that we'd really like pse to run next. */ | ||
| 1988 | set_next_buddy(se); | ||
| 1989 | |||
| 1990 | yield_task_fair(rq); | ||
| 1991 | |||
| 1992 | return true; | ||
| 1993 | } | ||
| 1994 | |||
| 1935 | #ifdef CONFIG_SMP | 1995 | #ifdef CONFIG_SMP |
| 1936 | /************************************************** | 1996 | /************************************************** |
| 1937 | * Fair scheduling class load-balancing methods: | 1997 | * Fair scheduling class load-balancing methods: |
| @@ -2123,7 +2183,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu) | |||
| 2123 | * We need to update shares after updating tg->load_weight in | 2183 | * We need to update shares after updating tg->load_weight in |
| 2124 | * order to adjust the weight of groups with long running tasks. | 2184 | * order to adjust the weight of groups with long running tasks. |
| 2125 | */ | 2185 | */ |
| 2126 | update_cfs_shares(cfs_rq, 0); | 2186 | update_cfs_shares(cfs_rq); |
| 2127 | 2187 | ||
| 2128 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 2188 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
| 2129 | 2189 | ||
| @@ -2610,7 +2670,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
| 2610 | * @this_cpu: Cpu for which load balance is currently performed. | 2670 | * @this_cpu: Cpu for which load balance is currently performed. |
| 2611 | * @idle: Idle status of this_cpu | 2671 | * @idle: Idle status of this_cpu |
| 2612 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | 2672 | * @load_idx: Load index of sched_domain of this_cpu for load calc. |
| 2613 | * @sd_idle: Idle status of the sched_domain containing group. | ||
| 2614 | * @local_group: Does group contain this_cpu. | 2673 | * @local_group: Does group contain this_cpu. |
| 2615 | * @cpus: Set of cpus considered for load balancing. | 2674 | * @cpus: Set of cpus considered for load balancing. |
| 2616 | * @balance: Should we balance. | 2675 | * @balance: Should we balance. |
| @@ -2618,7 +2677,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
| 2618 | */ | 2677 | */ |
| 2619 | static inline void update_sg_lb_stats(struct sched_domain *sd, | 2678 | static inline void update_sg_lb_stats(struct sched_domain *sd, |
| 2620 | struct sched_group *group, int this_cpu, | 2679 | struct sched_group *group, int this_cpu, |
| 2621 | enum cpu_idle_type idle, int load_idx, int *sd_idle, | 2680 | enum cpu_idle_type idle, int load_idx, |
| 2622 | int local_group, const struct cpumask *cpus, | 2681 | int local_group, const struct cpumask *cpus, |
| 2623 | int *balance, struct sg_lb_stats *sgs) | 2682 | int *balance, struct sg_lb_stats *sgs) |
| 2624 | { | 2683 | { |
| @@ -2638,9 +2697,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2638 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | 2697 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { |
| 2639 | struct rq *rq = cpu_rq(i); | 2698 | struct rq *rq = cpu_rq(i); |
| 2640 | 2699 | ||
| 2641 | if (*sd_idle && rq->nr_running) | ||
| 2642 | *sd_idle = 0; | ||
| 2643 | |||
| 2644 | /* Bias balancing toward cpus of our domain */ | 2700 | /* Bias balancing toward cpus of our domain */ |
| 2645 | if (local_group) { | 2701 | if (local_group) { |
| 2646 | if (idle_cpu(i) && !first_idle_cpu) { | 2702 | if (idle_cpu(i) && !first_idle_cpu) { |
| @@ -2685,7 +2741,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2685 | 2741 | ||
| 2686 | /* | 2742 | /* |
| 2687 | * Consider the group unbalanced when the imbalance is larger | 2743 | * Consider the group unbalanced when the imbalance is larger |
| 2688 | * than the average weight of two tasks. | 2744 | * than the average weight of a task. |
| 2689 | * | 2745 | * |
| 2690 | * APZ: with cgroup the avg task weight can vary wildly and | 2746 | * APZ: with cgroup the avg task weight can vary wildly and |
| 2691 | * might not be a suitable number - should we keep a | 2747 | * might not be a suitable number - should we keep a |
| @@ -2695,7 +2751,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2695 | if (sgs->sum_nr_running) | 2751 | if (sgs->sum_nr_running) |
| 2696 | avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; | 2752 | avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; |
| 2697 | 2753 | ||
| 2698 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1) | 2754 | if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) |
| 2699 | sgs->group_imb = 1; | 2755 | sgs->group_imb = 1; |
| 2700 | 2756 | ||
| 2701 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); | 2757 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); |
| @@ -2755,15 +2811,13 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, | |||
| 2755 | * @sd: sched_domain whose statistics are to be updated. | 2811 | * @sd: sched_domain whose statistics are to be updated. |
| 2756 | * @this_cpu: Cpu for which load balance is currently performed. | 2812 | * @this_cpu: Cpu for which load balance is currently performed. |
| 2757 | * @idle: Idle status of this_cpu | 2813 | * @idle: Idle status of this_cpu |
| 2758 | * @sd_idle: Idle status of the sched_domain containing sg. | ||
| 2759 | * @cpus: Set of cpus considered for load balancing. | 2814 | * @cpus: Set of cpus considered for load balancing. |
| 2760 | * @balance: Should we balance. | 2815 | * @balance: Should we balance. |
| 2761 | * @sds: variable to hold the statistics for this sched_domain. | 2816 | * @sds: variable to hold the statistics for this sched_domain. |
| 2762 | */ | 2817 | */ |
| 2763 | static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | 2818 | static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, |
| 2764 | enum cpu_idle_type idle, int *sd_idle, | 2819 | enum cpu_idle_type idle, const struct cpumask *cpus, |
| 2765 | const struct cpumask *cpus, int *balance, | 2820 | int *balance, struct sd_lb_stats *sds) |
| 2766 | struct sd_lb_stats *sds) | ||
| 2767 | { | 2821 | { |
| 2768 | struct sched_domain *child = sd->child; | 2822 | struct sched_domain *child = sd->child; |
| 2769 | struct sched_group *sg = sd->groups; | 2823 | struct sched_group *sg = sd->groups; |
| @@ -2781,7 +2835,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
| 2781 | 2835 | ||
| 2782 | local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg)); | 2836 | local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg)); |
| 2783 | memset(&sgs, 0, sizeof(sgs)); | 2837 | memset(&sgs, 0, sizeof(sgs)); |
| 2784 | update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle, | 2838 | update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, |
| 2785 | local_group, cpus, balance, &sgs); | 2839 | local_group, cpus, balance, &sgs); |
| 2786 | 2840 | ||
| 2787 | if (local_group && !(*balance)) | 2841 | if (local_group && !(*balance)) |
| @@ -3033,7 +3087,6 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
| 3033 | * @imbalance: Variable which stores amount of weighted load which should | 3087 | * @imbalance: Variable which stores amount of weighted load which should |
| 3034 | * be moved to restore balance/put a group to idle. | 3088 | * be moved to restore balance/put a group to idle. |
| 3035 | * @idle: The idle status of this_cpu. | 3089 | * @idle: The idle status of this_cpu. |
| 3036 | * @sd_idle: The idleness of sd | ||
| 3037 | * @cpus: The set of CPUs under consideration for load-balancing. | 3090 | * @cpus: The set of CPUs under consideration for load-balancing. |
| 3038 | * @balance: Pointer to a variable indicating if this_cpu | 3091 | * @balance: Pointer to a variable indicating if this_cpu |
| 3039 | * is the appropriate cpu to perform load balancing at this_level. | 3092 | * is the appropriate cpu to perform load balancing at this_level. |
| @@ -3046,7 +3099,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
| 3046 | static struct sched_group * | 3099 | static struct sched_group * |
| 3047 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 3100 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
| 3048 | unsigned long *imbalance, enum cpu_idle_type idle, | 3101 | unsigned long *imbalance, enum cpu_idle_type idle, |
| 3049 | int *sd_idle, const struct cpumask *cpus, int *balance) | 3102 | const struct cpumask *cpus, int *balance) |
| 3050 | { | 3103 | { |
| 3051 | struct sd_lb_stats sds; | 3104 | struct sd_lb_stats sds; |
| 3052 | 3105 | ||
| @@ -3056,22 +3109,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 3056 | * Compute the various statistics relavent for load balancing at | 3109 | * Compute the various statistics relavent for load balancing at |
| 3057 | * this level. | 3110 | * this level. |
| 3058 | */ | 3111 | */ |
| 3059 | update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus, | 3112 | update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds); |
| 3060 | balance, &sds); | 3113 | |
| 3061 | 3114 | /* | |
| 3062 | /* Cases where imbalance does not exist from POV of this_cpu */ | 3115 | * this_cpu is not the appropriate cpu to perform load balancing at |
| 3063 | /* 1) this_cpu is not the appropriate cpu to perform load balancing | 3116 | * this level. |
| 3064 | * at this level. | ||
| 3065 | * 2) There is no busy sibling group to pull from. | ||
| 3066 | * 3) This group is the busiest group. | ||
| 3067 | * 4) This group is more busy than the avg busieness at this | ||
| 3068 | * sched_domain. | ||
| 3069 | * 5) The imbalance is within the specified limit. | ||
| 3070 | * | ||
| 3071 | * Note: when doing newidle balance, if the local group has excess | ||
| 3072 | * capacity (i.e. nr_running < group_capacity) and the busiest group | ||
| 3073 | * does not have any capacity, we force a load balance to pull tasks | ||
| 3074 | * to the local group. In this case, we skip past checks 3, 4 and 5. | ||
| 3075 | */ | 3117 | */ |
| 3076 | if (!(*balance)) | 3118 | if (!(*balance)) |
| 3077 | goto ret; | 3119 | goto ret; |
| @@ -3080,41 +3122,55 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 3080 | check_asym_packing(sd, &sds, this_cpu, imbalance)) | 3122 | check_asym_packing(sd, &sds, this_cpu, imbalance)) |
| 3081 | return sds.busiest; | 3123 | return sds.busiest; |
| 3082 | 3124 | ||
| 3125 | /* There is no busy sibling group to pull tasks from */ | ||
| 3083 | if (!sds.busiest || sds.busiest_nr_running == 0) | 3126 | if (!sds.busiest || sds.busiest_nr_running == 0) |
| 3084 | goto out_balanced; | 3127 | goto out_balanced; |
| 3085 | 3128 | ||
| 3086 | /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ | 3129 | /* |
| 3130 | * If the busiest group is imbalanced the below checks don't | ||
| 3131 | * work because they assumes all things are equal, which typically | ||
| 3132 | * isn't true due to cpus_allowed constraints and the like. | ||
| 3133 | */ | ||
| 3134 | if (sds.group_imb) | ||
| 3135 | goto force_balance; | ||
| 3136 | |||
| 3137 | /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ | ||
| 3087 | if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && | 3138 | if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && |
| 3088 | !sds.busiest_has_capacity) | 3139 | !sds.busiest_has_capacity) |
| 3089 | goto force_balance; | 3140 | goto force_balance; |
| 3090 | 3141 | ||
| 3142 | /* | ||
| 3143 | * If the local group is more busy than the selected busiest group | ||
| 3144 | * don't try and pull any tasks. | ||
| 3145 | */ | ||
| 3091 | if (sds.this_load >= sds.max_load) | 3146 | if (sds.this_load >= sds.max_load) |
| 3092 | goto out_balanced; | 3147 | goto out_balanced; |
| 3093 | 3148 | ||
| 3149 | /* | ||
| 3150 | * Don't pull any tasks if this group is already above the domain | ||
| 3151 | * average load. | ||
| 3152 | */ | ||
| 3094 | sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; | 3153 | sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; |
| 3095 | |||
| 3096 | if (sds.this_load >= sds.avg_load) | 3154 | if (sds.this_load >= sds.avg_load) |
| 3097 | goto out_balanced; | 3155 | goto out_balanced; |
| 3098 | 3156 | ||
| 3099 | /* | 3157 | if (idle == CPU_IDLE) { |
| 3100 | * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. | ||
| 3101 | * And to check for busy balance use !idle_cpu instead of | ||
| 3102 | * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE | ||
| 3103 | * even when they are idle. | ||
| 3104 | */ | ||
| 3105 | if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { | ||
| 3106 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | ||
| 3107 | goto out_balanced; | ||
| 3108 | } else { | ||
| 3109 | /* | 3158 | /* |
| 3110 | * This cpu is idle. If the busiest group load doesn't | 3159 | * This cpu is idle. If the busiest group load doesn't |
| 3111 | * have more tasks than the number of available cpu's and | 3160 | * have more tasks than the number of available cpu's and |
| 3112 | * there is no imbalance between this and busiest group | 3161 | * there is no imbalance between this and busiest group |
| 3113 | * wrt to idle cpu's, it is balanced. | 3162 | * wrt to idle cpu's, it is balanced. |
| 3114 | */ | 3163 | */ |
| 3115 | if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && | 3164 | if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && |
| 3116 | sds.busiest_nr_running <= sds.busiest_group_weight) | 3165 | sds.busiest_nr_running <= sds.busiest_group_weight) |
| 3117 | goto out_balanced; | 3166 | goto out_balanced; |
| 3167 | } else { | ||
| 3168 | /* | ||
| 3169 | * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use | ||
| 3170 | * imbalance_pct to be conservative. | ||
| 3171 | */ | ||
| 3172 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | ||
| 3173 | goto out_balanced; | ||
| 3118 | } | 3174 | } |
| 3119 | 3175 | ||
| 3120 | force_balance: | 3176 | force_balance: |
| @@ -3193,7 +3249,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group, | |||
| 3193 | /* Working cpumask for load_balance and load_balance_newidle. */ | 3249 | /* Working cpumask for load_balance and load_balance_newidle. */ |
| 3194 | static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); | 3250 | static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); |
| 3195 | 3251 | ||
| 3196 | static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle, | 3252 | static int need_active_balance(struct sched_domain *sd, int idle, |
| 3197 | int busiest_cpu, int this_cpu) | 3253 | int busiest_cpu, int this_cpu) |
| 3198 | { | 3254 | { |
| 3199 | if (idle == CPU_NEWLY_IDLE) { | 3255 | if (idle == CPU_NEWLY_IDLE) { |
| @@ -3225,10 +3281,6 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle, | |||
| 3225 | * move_tasks() will succeed. ld_moved will be true and this | 3281 | * move_tasks() will succeed. ld_moved will be true and this |
| 3226 | * active balance code will not be triggered. | 3282 | * active balance code will not be triggered. |
| 3227 | */ | 3283 | */ |
| 3228 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | ||
| 3229 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
| 3230 | return 0; | ||
| 3231 | |||
| 3232 | if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) | 3284 | if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) |
| 3233 | return 0; | 3285 | return 0; |
| 3234 | } | 3286 | } |
| @@ -3246,7 +3298,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 3246 | struct sched_domain *sd, enum cpu_idle_type idle, | 3298 | struct sched_domain *sd, enum cpu_idle_type idle, |
| 3247 | int *balance) | 3299 | int *balance) |
| 3248 | { | 3300 | { |
| 3249 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 3301 | int ld_moved, all_pinned = 0, active_balance = 0; |
| 3250 | struct sched_group *group; | 3302 | struct sched_group *group; |
| 3251 | unsigned long imbalance; | 3303 | unsigned long imbalance; |
| 3252 | struct rq *busiest; | 3304 | struct rq *busiest; |
| @@ -3255,20 +3307,10 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 3255 | 3307 | ||
| 3256 | cpumask_copy(cpus, cpu_active_mask); | 3308 | cpumask_copy(cpus, cpu_active_mask); |
| 3257 | 3309 | ||
| 3258 | /* | ||
| 3259 | * When power savings policy is enabled for the parent domain, idle | ||
| 3260 | * sibling can pick up load irrespective of busy siblings. In this case, | ||
| 3261 | * let the state of idle sibling percolate up as CPU_IDLE, instead of | ||
| 3262 | * portraying it as CPU_NOT_IDLE. | ||
| 3263 | */ | ||
| 3264 | if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && | ||
| 3265 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
| 3266 | sd_idle = 1; | ||
| 3267 | |||
| 3268 | schedstat_inc(sd, lb_count[idle]); | 3310 | schedstat_inc(sd, lb_count[idle]); |
| 3269 | 3311 | ||
| 3270 | redo: | 3312 | redo: |
| 3271 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 3313 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, |
| 3272 | cpus, balance); | 3314 | cpus, balance); |
| 3273 | 3315 | ||
| 3274 | if (*balance == 0) | 3316 | if (*balance == 0) |
| @@ -3330,8 +3372,7 @@ redo: | |||
| 3330 | if (idle != CPU_NEWLY_IDLE) | 3372 | if (idle != CPU_NEWLY_IDLE) |
| 3331 | sd->nr_balance_failed++; | 3373 | sd->nr_balance_failed++; |
| 3332 | 3374 | ||
| 3333 | if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), | 3375 | if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) { |
| 3334 | this_cpu)) { | ||
| 3335 | raw_spin_lock_irqsave(&busiest->lock, flags); | 3376 | raw_spin_lock_irqsave(&busiest->lock, flags); |
| 3336 | 3377 | ||
| 3337 | /* don't kick the active_load_balance_cpu_stop, | 3378 | /* don't kick the active_load_balance_cpu_stop, |
| @@ -3386,10 +3427,6 @@ redo: | |||
| 3386 | sd->balance_interval *= 2; | 3427 | sd->balance_interval *= 2; |
| 3387 | } | 3428 | } |
| 3388 | 3429 | ||
| 3389 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | ||
| 3390 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
| 3391 | ld_moved = -1; | ||
| 3392 | |||
| 3393 | goto out; | 3430 | goto out; |
| 3394 | 3431 | ||
| 3395 | out_balanced: | 3432 | out_balanced: |
| @@ -3403,11 +3440,7 @@ out_one_pinned: | |||
| 3403 | (sd->balance_interval < sd->max_interval)) | 3440 | (sd->balance_interval < sd->max_interval)) |
| 3404 | sd->balance_interval *= 2; | 3441 | sd->balance_interval *= 2; |
| 3405 | 3442 | ||
| 3406 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3443 | ld_moved = 0; |
| 3407 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
| 3408 | ld_moved = -1; | ||
| 3409 | else | ||
| 3410 | ld_moved = 0; | ||
| 3411 | out: | 3444 | out: |
| 3412 | return ld_moved; | 3445 | return ld_moved; |
| 3413 | } | 3446 | } |
| @@ -3831,8 +3864,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
| 3831 | if (load_balance(cpu, rq, sd, idle, &balance)) { | 3864 | if (load_balance(cpu, rq, sd, idle, &balance)) { |
| 3832 | /* | 3865 | /* |
| 3833 | * We've pulled tasks over so either we're no | 3866 | * We've pulled tasks over so either we're no |
| 3834 | * longer idle, or one of our SMT siblings is | 3867 | * longer idle. |
| 3835 | * not idle. | ||
| 3836 | */ | 3868 | */ |
| 3837 | idle = CPU_NOT_IDLE; | 3869 | idle = CPU_NOT_IDLE; |
| 3838 | } | 3870 | } |
| @@ -4079,33 +4111,62 @@ static void task_fork_fair(struct task_struct *p) | |||
| 4079 | * Priority of the task has changed. Check to see if we preempt | 4111 | * Priority of the task has changed. Check to see if we preempt |
| 4080 | * the current task. | 4112 | * the current task. |
| 4081 | */ | 4113 | */ |
| 4082 | static void prio_changed_fair(struct rq *rq, struct task_struct *p, | 4114 | static void |
| 4083 | int oldprio, int running) | 4115 | prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) |
| 4084 | { | 4116 | { |
| 4117 | if (!p->se.on_rq) | ||
| 4118 | return; | ||
| 4119 | |||
| 4085 | /* | 4120 | /* |
| 4086 | * Reschedule if we are currently running on this runqueue and | 4121 | * Reschedule if we are currently running on this runqueue and |
| 4087 | * our priority decreased, or if we are not currently running on | 4122 | * our priority decreased, or if we are not currently running on |
| 4088 | * this runqueue and our priority is higher than the current's | 4123 | * this runqueue and our priority is higher than the current's |
| 4089 | */ | 4124 | */ |
| 4090 | if (running) { | 4125 | if (rq->curr == p) { |
| 4091 | if (p->prio > oldprio) | 4126 | if (p->prio > oldprio) |
| 4092 | resched_task(rq->curr); | 4127 | resched_task(rq->curr); |
| 4093 | } else | 4128 | } else |
| 4094 | check_preempt_curr(rq, p, 0); | 4129 | check_preempt_curr(rq, p, 0); |
| 4095 | } | 4130 | } |
| 4096 | 4131 | ||
| 4132 | static void switched_from_fair(struct rq *rq, struct task_struct *p) | ||
| 4133 | { | ||
| 4134 | struct sched_entity *se = &p->se; | ||
| 4135 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
| 4136 | |||
| 4137 | /* | ||
| 4138 | * Ensure the task's vruntime is normalized, so that when its | ||
| 4139 | * switched back to the fair class the enqueue_entity(.flags=0) will | ||
| 4140 | * do the right thing. | ||
| 4141 | * | ||
| 4142 | * If it was on_rq, then the dequeue_entity(.flags=0) will already | ||
| 4143 | * have normalized the vruntime, if it was !on_rq, then only when | ||
| 4144 | * the task is sleeping will it still have non-normalized vruntime. | ||
| 4145 | */ | ||
| 4146 | if (!se->on_rq && p->state != TASK_RUNNING) { | ||
| 4147 | /* | ||
| 4148 | * Fix up our vruntime so that the current sleep doesn't | ||
| 4149 | * cause 'unlimited' sleep bonus. | ||
| 4150 | */ | ||
| 4151 | place_entity(cfs_rq, se, 0); | ||
| 4152 | se->vruntime -= cfs_rq->min_vruntime; | ||
| 4153 | } | ||
| 4154 | } | ||
| 4155 | |||
| 4097 | /* | 4156 | /* |
| 4098 | * We switched to the sched_fair class. | 4157 | * We switched to the sched_fair class. |
| 4099 | */ | 4158 | */ |
| 4100 | static void switched_to_fair(struct rq *rq, struct task_struct *p, | 4159 | static void switched_to_fair(struct rq *rq, struct task_struct *p) |
| 4101 | int running) | ||
| 4102 | { | 4160 | { |
| 4161 | if (!p->se.on_rq) | ||
| 4162 | return; | ||
| 4163 | |||
| 4103 | /* | 4164 | /* |
| 4104 | * We were most likely switched from sched_rt, so | 4165 | * We were most likely switched from sched_rt, so |
| 4105 | * kick off the schedule if running, otherwise just see | 4166 | * kick off the schedule if running, otherwise just see |
| 4106 | * if we can still preempt the current task. | 4167 | * if we can still preempt the current task. |
| 4107 | */ | 4168 | */ |
| 4108 | if (running) | 4169 | if (rq->curr == p) |
| 4109 | resched_task(rq->curr); | 4170 | resched_task(rq->curr); |
| 4110 | else | 4171 | else |
| 4111 | check_preempt_curr(rq, p, 0); | 4172 | check_preempt_curr(rq, p, 0); |
| @@ -4171,6 +4232,7 @@ static const struct sched_class fair_sched_class = { | |||
| 4171 | .enqueue_task = enqueue_task_fair, | 4232 | .enqueue_task = enqueue_task_fair, |
| 4172 | .dequeue_task = dequeue_task_fair, | 4233 | .dequeue_task = dequeue_task_fair, |
| 4173 | .yield_task = yield_task_fair, | 4234 | .yield_task = yield_task_fair, |
| 4235 | .yield_to_task = yield_to_task_fair, | ||
| 4174 | 4236 | ||
| 4175 | .check_preempt_curr = check_preempt_wakeup, | 4237 | .check_preempt_curr = check_preempt_wakeup, |
| 4176 | 4238 | ||
| @@ -4191,6 +4253,7 @@ static const struct sched_class fair_sched_class = { | |||
| 4191 | .task_fork = task_fork_fair, | 4253 | .task_fork = task_fork_fair, |
| 4192 | 4254 | ||
| 4193 | .prio_changed = prio_changed_fair, | 4255 | .prio_changed = prio_changed_fair, |
| 4256 | .switched_from = switched_from_fair, | ||
| 4194 | .switched_to = switched_to_fair, | 4257 | .switched_to = switched_to_fair, |
| 4195 | 4258 | ||
| 4196 | .get_rr_interval = get_rr_interval_fair, | 4259 | .get_rr_interval = get_rr_interval_fair, |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 9fa0f402c87c..c82f26c1b7c3 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
| @@ -52,31 +52,15 @@ static void set_curr_task_idle(struct rq *rq) | |||
| 52 | { | 52 | { |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | static void switched_to_idle(struct rq *rq, struct task_struct *p, | 55 | static void switched_to_idle(struct rq *rq, struct task_struct *p) |
| 56 | int running) | ||
| 57 | { | 56 | { |
| 58 | /* Can this actually happen?? */ | 57 | BUG(); |
| 59 | if (running) | ||
| 60 | resched_task(rq->curr); | ||
| 61 | else | ||
| 62 | check_preempt_curr(rq, p, 0); | ||
| 63 | } | 58 | } |
| 64 | 59 | ||
| 65 | static void prio_changed_idle(struct rq *rq, struct task_struct *p, | 60 | static void |
| 66 | int oldprio, int running) | 61 | prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio) |
| 67 | { | 62 | { |
| 68 | /* This can happen for hot plug CPUS */ | 63 | BUG(); |
| 69 | |||
| 70 | /* | ||
| 71 | * Reschedule if we are currently running on this runqueue and | ||
| 72 | * our priority decreased, or if we are not currently running on | ||
| 73 | * this runqueue and our priority is higher than the current's | ||
| 74 | */ | ||
| 75 | if (running) { | ||
| 76 | if (p->prio > oldprio) | ||
| 77 | resched_task(rq->curr); | ||
| 78 | } else | ||
| 79 | check_preempt_curr(rq, p, 0); | ||
| 80 | } | 64 | } |
| 81 | 65 | ||
| 82 | static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) | 66 | static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 01f75a5f17af..db308cb08b75 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -1599,8 +1599,7 @@ static void rq_offline_rt(struct rq *rq) | |||
| 1599 | * When switch from the rt queue, we bring ourselves to a position | 1599 | * When switch from the rt queue, we bring ourselves to a position |
| 1600 | * that we might want to pull RT tasks from other runqueues. | 1600 | * that we might want to pull RT tasks from other runqueues. |
| 1601 | */ | 1601 | */ |
| 1602 | static void switched_from_rt(struct rq *rq, struct task_struct *p, | 1602 | static void switched_from_rt(struct rq *rq, struct task_struct *p) |
| 1603 | int running) | ||
| 1604 | { | 1603 | { |
| 1605 | /* | 1604 | /* |
| 1606 | * If there are other RT tasks then we will reschedule | 1605 | * If there are other RT tasks then we will reschedule |
| @@ -1609,7 +1608,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p, | |||
| 1609 | * we may need to handle the pulling of RT tasks | 1608 | * we may need to handle the pulling of RT tasks |
| 1610 | * now. | 1609 | * now. |
| 1611 | */ | 1610 | */ |
| 1612 | if (!rq->rt.rt_nr_running) | 1611 | if (p->se.on_rq && !rq->rt.rt_nr_running) |
| 1613 | pull_rt_task(rq); | 1612 | pull_rt_task(rq); |
| 1614 | } | 1613 | } |
| 1615 | 1614 | ||
| @@ -1628,8 +1627,7 @@ static inline void init_sched_rt_class(void) | |||
| 1628 | * with RT tasks. In this case we try to push them off to | 1627 | * with RT tasks. In this case we try to push them off to |
| 1629 | * other runqueues. | 1628 | * other runqueues. |
| 1630 | */ | 1629 | */ |
| 1631 | static void switched_to_rt(struct rq *rq, struct task_struct *p, | 1630 | static void switched_to_rt(struct rq *rq, struct task_struct *p) |
| 1632 | int running) | ||
| 1633 | { | 1631 | { |
| 1634 | int check_resched = 1; | 1632 | int check_resched = 1; |
| 1635 | 1633 | ||
| @@ -1640,7 +1638,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p, | |||
| 1640 | * If that current running task is also an RT task | 1638 | * If that current running task is also an RT task |
| 1641 | * then see if we can move to another run queue. | 1639 | * then see if we can move to another run queue. |
| 1642 | */ | 1640 | */ |
| 1643 | if (!running) { | 1641 | if (p->se.on_rq && rq->curr != p) { |
| 1644 | #ifdef CONFIG_SMP | 1642 | #ifdef CONFIG_SMP |
| 1645 | if (rq->rt.overloaded && push_rt_task(rq) && | 1643 | if (rq->rt.overloaded && push_rt_task(rq) && |
| 1646 | /* Don't resched if we changed runqueues */ | 1644 | /* Don't resched if we changed runqueues */ |
| @@ -1656,10 +1654,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p, | |||
| 1656 | * Priority of the task has changed. This may cause | 1654 | * Priority of the task has changed. This may cause |
| 1657 | * us to initiate a push or pull. | 1655 | * us to initiate a push or pull. |
| 1658 | */ | 1656 | */ |
| 1659 | static void prio_changed_rt(struct rq *rq, struct task_struct *p, | 1657 | static void |
| 1660 | int oldprio, int running) | 1658 | prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) |
| 1661 | { | 1659 | { |
| 1662 | if (running) { | 1660 | if (!p->se.on_rq) |
| 1661 | return; | ||
| 1662 | |||
| 1663 | if (rq->curr == p) { | ||
| 1663 | #ifdef CONFIG_SMP | 1664 | #ifdef CONFIG_SMP |
| 1664 | /* | 1665 | /* |
| 1665 | * If our priority decreases while running, we | 1666 | * If our priority decreases while running, we |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 2bf6b47058c1..84ec9bcf82d9 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
| @@ -59,14 +59,13 @@ static void set_curr_task_stop(struct rq *rq) | |||
| 59 | { | 59 | { |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static void switched_to_stop(struct rq *rq, struct task_struct *p, | 62 | static void switched_to_stop(struct rq *rq, struct task_struct *p) |
| 63 | int running) | ||
| 64 | { | 63 | { |
| 65 | BUG(); /* its impossible to change to this class */ | 64 | BUG(); /* its impossible to change to this class */ |
| 66 | } | 65 | } |
| 67 | 66 | ||
| 68 | static void prio_changed_stop(struct rq *rq, struct task_struct *p, | 67 | static void |
| 69 | int oldprio, int running) | 68 | prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio) |
| 70 | { | 69 | { |
| 71 | BUG(); /* how!?, what priority? */ | 70 | BUG(); /* how!?, what priority? */ |
| 72 | } | 71 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 68eb5efec388..0cee50487629 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_stat); | |||
| 54 | 54 | ||
| 55 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; | 55 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; |
| 56 | 56 | ||
| 57 | static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | 57 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); |
| 58 | 58 | ||
| 59 | char *softirq_to_name[NR_SOFTIRQS] = { | 59 | char *softirq_to_name[NR_SOFTIRQS] = { |
| 60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", | 60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", |
| @@ -721,7 +721,6 @@ static int run_ksoftirqd(void * __bind_cpu) | |||
| 721 | { | 721 | { |
| 722 | set_current_state(TASK_INTERRUPTIBLE); | 722 | set_current_state(TASK_INTERRUPTIBLE); |
| 723 | 723 | ||
| 724 | current->flags |= PF_KSOFTIRQD; | ||
| 725 | while (!kthread_should_stop()) { | 724 | while (!kthread_should_stop()) { |
| 726 | preempt_disable(); | 725 | preempt_disable(); |
| 727 | if (!local_softirq_pending()) { | 726 | if (!local_softirq_pending()) { |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 19b9d85e06cc..51054fea5d99 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -361,20 +361,13 @@ static struct ctl_table kern_table[] = { | |||
| 361 | .mode = 0644, | 361 | .mode = 0644, |
| 362 | .proc_handler = sched_rt_handler, | 362 | .proc_handler = sched_rt_handler, |
| 363 | }, | 363 | }, |
| 364 | { | ||
| 365 | .procname = "sched_compat_yield", | ||
| 366 | .data = &sysctl_sched_compat_yield, | ||
| 367 | .maxlen = sizeof(unsigned int), | ||
| 368 | .mode = 0644, | ||
| 369 | .proc_handler = proc_dointvec, | ||
| 370 | }, | ||
| 371 | #ifdef CONFIG_SCHED_AUTOGROUP | 364 | #ifdef CONFIG_SCHED_AUTOGROUP |
| 372 | { | 365 | { |
| 373 | .procname = "sched_autogroup_enabled", | 366 | .procname = "sched_autogroup_enabled", |
| 374 | .data = &sysctl_sched_autogroup_enabled, | 367 | .data = &sysctl_sched_autogroup_enabled, |
| 375 | .maxlen = sizeof(unsigned int), | 368 | .maxlen = sizeof(unsigned int), |
| 376 | .mode = 0644, | 369 | .mode = 0644, |
| 377 | .proc_handler = proc_dointvec, | 370 | .proc_handler = proc_dointvec_minmax, |
| 378 | .extra1 = &zero, | 371 | .extra1 = &zero, |
| 379 | .extra2 = &one, | 372 | .extra2 = &one, |
| 380 | }, | 373 | }, |
diff --git a/kernel/time.c b/kernel/time.c index 32174359576f..55337a816b20 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -645,7 +645,7 @@ u64 nsec_to_clock_t(u64 x) | |||
| 645 | } | 645 | } |
| 646 | 646 | ||
| 647 | /** | 647 | /** |
| 648 | * nsecs_to_jiffies - Convert nsecs in u64 to jiffies | 648 | * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64 |
| 649 | * | 649 | * |
| 650 | * @n: nsecs in u64 | 650 | * @n: nsecs in u64 |
| 651 | * | 651 | * |
| @@ -657,7 +657,7 @@ u64 nsec_to_clock_t(u64 x) | |||
| 657 | * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) | 657 | * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) |
| 658 | * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years | 658 | * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years |
| 659 | */ | 659 | */ |
| 660 | unsigned long nsecs_to_jiffies(u64 n) | 660 | u64 nsecs_to_jiffies64(u64 n) |
| 661 | { | 661 | { |
| 662 | #if (NSEC_PER_SEC % HZ) == 0 | 662 | #if (NSEC_PER_SEC % HZ) == 0 |
| 663 | /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ | 663 | /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ |
| @@ -674,6 +674,25 @@ unsigned long nsecs_to_jiffies(u64 n) | |||
| 674 | #endif | 674 | #endif |
| 675 | } | 675 | } |
| 676 | 676 | ||
| 677 | |||
| 678 | /** | ||
| 679 | * nsecs_to_jiffies - Convert nsecs in u64 to jiffies | ||
| 680 | * | ||
| 681 | * @n: nsecs in u64 | ||
| 682 | * | ||
| 683 | * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64. | ||
| 684 | * And this doesn't return MAX_JIFFY_OFFSET since this function is designed | ||
| 685 | * for scheduler, not for use in device drivers to calculate timeout value. | ||
| 686 | * | ||
| 687 | * note: | ||
| 688 | * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) | ||
| 689 | * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years | ||
| 690 | */ | ||
| 691 | unsigned long nsecs_to_jiffies(u64 n) | ||
| 692 | { | ||
| 693 | return (unsigned long)nsecs_to_jiffies64(n); | ||
| 694 | } | ||
| 695 | |||
| 677 | #if (BITS_PER_LONG < 64) | 696 | #if (BITS_PER_LONG < 64) |
| 678 | u64 get_jiffies_64(void) | 697 | u64 get_jiffies_64(void) |
| 679 | { | 698 | { |
