diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 334 |
1 files changed, 270 insertions, 64 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 66ca5d9ba83c..c8e40b7005c0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -324,7 +324,7 @@ struct cfs_rq { | |||
324 | * 'curr' points to currently running entity on this cfs_rq. | 324 | * 'curr' points to currently running entity on this cfs_rq. |
325 | * It is set to NULL otherwise (i.e when none are currently running). | 325 | * It is set to NULL otherwise (i.e when none are currently running). |
326 | */ | 326 | */ |
327 | struct sched_entity *curr, *next, *last; | 327 | struct sched_entity *curr, *next, *last, *skip; |
328 | 328 | ||
329 | unsigned int nr_spread_over; | 329 | unsigned int nr_spread_over; |
330 | 330 | ||
@@ -606,9 +606,6 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
606 | struct task_group *tg; | 606 | struct task_group *tg; |
607 | struct cgroup_subsys_state *css; | 607 | struct cgroup_subsys_state *css; |
608 | 608 | ||
609 | if (p->flags & PF_EXITING) | ||
610 | return &root_task_group; | ||
611 | |||
612 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 609 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
613 | lockdep_is_held(&task_rq(p)->lock)); | 610 | lockdep_is_held(&task_rq(p)->lock)); |
614 | tg = container_of(css, struct task_group, css); | 611 | tg = container_of(css, struct task_group, css); |
@@ -1686,6 +1683,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
1686 | __release(rq2->lock); | 1683 | __release(rq2->lock); |
1687 | } | 1684 | } |
1688 | 1685 | ||
1686 | #else /* CONFIG_SMP */ | ||
1687 | |||
1688 | /* | ||
1689 | * double_rq_lock - safely lock two runqueues | ||
1690 | * | ||
1691 | * Note this does not disable interrupts like task_rq_lock, | ||
1692 | * you need to do so manually before calling. | ||
1693 | */ | ||
1694 | static void double_rq_lock(struct rq *rq1, struct rq *rq2) | ||
1695 | __acquires(rq1->lock) | ||
1696 | __acquires(rq2->lock) | ||
1697 | { | ||
1698 | BUG_ON(!irqs_disabled()); | ||
1699 | BUG_ON(rq1 != rq2); | ||
1700 | raw_spin_lock(&rq1->lock); | ||
1701 | __acquire(rq2->lock); /* Fake it out ;) */ | ||
1702 | } | ||
1703 | |||
1704 | /* | ||
1705 | * double_rq_unlock - safely unlock two runqueues | ||
1706 | * | ||
1707 | * Note this does not restore interrupts like task_rq_unlock, | ||
1708 | * you need to do so manually after calling. | ||
1709 | */ | ||
1710 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | ||
1711 | __releases(rq1->lock) | ||
1712 | __releases(rq2->lock) | ||
1713 | { | ||
1714 | BUG_ON(rq1 != rq2); | ||
1715 | raw_spin_unlock(&rq1->lock); | ||
1716 | __release(rq2->lock); | ||
1717 | } | ||
1718 | |||
1689 | #endif | 1719 | #endif |
1690 | 1720 | ||
1691 | static void calc_load_account_idle(struct rq *this_rq); | 1721 | static void calc_load_account_idle(struct rq *this_rq); |
@@ -1880,7 +1910,7 @@ void account_system_vtime(struct task_struct *curr) | |||
1880 | */ | 1910 | */ |
1881 | if (hardirq_count()) | 1911 | if (hardirq_count()) |
1882 | __this_cpu_add(cpu_hardirq_time, delta); | 1912 | __this_cpu_add(cpu_hardirq_time, delta); |
1883 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 1913 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) |
1884 | __this_cpu_add(cpu_softirq_time, delta); | 1914 | __this_cpu_add(cpu_softirq_time, delta); |
1885 | 1915 | ||
1886 | irq_time_write_end(); | 1916 | irq_time_write_end(); |
@@ -1920,8 +1950,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
1920 | sched_rt_avg_update(rq, irq_delta); | 1950 | sched_rt_avg_update(rq, irq_delta); |
1921 | } | 1951 | } |
1922 | 1952 | ||
1953 | static int irqtime_account_hi_update(void) | ||
1954 | { | ||
1955 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
1956 | unsigned long flags; | ||
1957 | u64 latest_ns; | ||
1958 | int ret = 0; | ||
1959 | |||
1960 | local_irq_save(flags); | ||
1961 | latest_ns = this_cpu_read(cpu_hardirq_time); | ||
1962 | if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq)) | ||
1963 | ret = 1; | ||
1964 | local_irq_restore(flags); | ||
1965 | return ret; | ||
1966 | } | ||
1967 | |||
1968 | static int irqtime_account_si_update(void) | ||
1969 | { | ||
1970 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
1971 | unsigned long flags; | ||
1972 | u64 latest_ns; | ||
1973 | int ret = 0; | ||
1974 | |||
1975 | local_irq_save(flags); | ||
1976 | latest_ns = this_cpu_read(cpu_softirq_time); | ||
1977 | if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq)) | ||
1978 | ret = 1; | ||
1979 | local_irq_restore(flags); | ||
1980 | return ret; | ||
1981 | } | ||
1982 | |||
1923 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 1983 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
1924 | 1984 | ||
1985 | #define sched_clock_irqtime (0) | ||
1986 | |||
1925 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 1987 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
1926 | { | 1988 | { |
1927 | rq->clock_task += delta; | 1989 | rq->clock_task += delta; |
@@ -2025,14 +2087,14 @@ inline int task_curr(const struct task_struct *p) | |||
2025 | 2087 | ||
2026 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 2088 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
2027 | const struct sched_class *prev_class, | 2089 | const struct sched_class *prev_class, |
2028 | int oldprio, int running) | 2090 | int oldprio) |
2029 | { | 2091 | { |
2030 | if (prev_class != p->sched_class) { | 2092 | if (prev_class != p->sched_class) { |
2031 | if (prev_class->switched_from) | 2093 | if (prev_class->switched_from) |
2032 | prev_class->switched_from(rq, p, running); | 2094 | prev_class->switched_from(rq, p); |
2033 | p->sched_class->switched_to(rq, p, running); | 2095 | p->sched_class->switched_to(rq, p); |
2034 | } else | 2096 | } else if (oldprio != p->prio) |
2035 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2097 | p->sched_class->prio_changed(rq, p, oldprio); |
2036 | } | 2098 | } |
2037 | 2099 | ||
2038 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | 2100 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) |
@@ -2268,27 +2330,6 @@ void kick_process(struct task_struct *p) | |||
2268 | EXPORT_SYMBOL_GPL(kick_process); | 2330 | EXPORT_SYMBOL_GPL(kick_process); |
2269 | #endif /* CONFIG_SMP */ | 2331 | #endif /* CONFIG_SMP */ |
2270 | 2332 | ||
2271 | /** | ||
2272 | * task_oncpu_function_call - call a function on the cpu on which a task runs | ||
2273 | * @p: the task to evaluate | ||
2274 | * @func: the function to be called | ||
2275 | * @info: the function call argument | ||
2276 | * | ||
2277 | * Calls the function @func when the task is currently running. This might | ||
2278 | * be on the current CPU, which just calls the function directly | ||
2279 | */ | ||
2280 | void task_oncpu_function_call(struct task_struct *p, | ||
2281 | void (*func) (void *info), void *info) | ||
2282 | { | ||
2283 | int cpu; | ||
2284 | |||
2285 | preempt_disable(); | ||
2286 | cpu = task_cpu(p); | ||
2287 | if (task_curr(p)) | ||
2288 | smp_call_function_single(cpu, func, info, 1); | ||
2289 | preempt_enable(); | ||
2290 | } | ||
2291 | |||
2292 | #ifdef CONFIG_SMP | 2333 | #ifdef CONFIG_SMP |
2293 | /* | 2334 | /* |
2294 | * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. | 2335 | * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. |
@@ -2569,6 +2610,7 @@ static void __sched_fork(struct task_struct *p) | |||
2569 | p->se.sum_exec_runtime = 0; | 2610 | p->se.sum_exec_runtime = 0; |
2570 | p->se.prev_sum_exec_runtime = 0; | 2611 | p->se.prev_sum_exec_runtime = 0; |
2571 | p->se.nr_migrations = 0; | 2612 | p->se.nr_migrations = 0; |
2613 | p->se.vruntime = 0; | ||
2572 | 2614 | ||
2573 | #ifdef CONFIG_SCHEDSTATS | 2615 | #ifdef CONFIG_SCHEDSTATS |
2574 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 2616 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
@@ -2779,9 +2821,12 @@ static inline void | |||
2779 | prepare_task_switch(struct rq *rq, struct task_struct *prev, | 2821 | prepare_task_switch(struct rq *rq, struct task_struct *prev, |
2780 | struct task_struct *next) | 2822 | struct task_struct *next) |
2781 | { | 2823 | { |
2824 | sched_info_switch(prev, next); | ||
2825 | perf_event_task_sched_out(prev, next); | ||
2782 | fire_sched_out_preempt_notifiers(prev, next); | 2826 | fire_sched_out_preempt_notifiers(prev, next); |
2783 | prepare_lock_switch(rq, next); | 2827 | prepare_lock_switch(rq, next); |
2784 | prepare_arch_switch(next); | 2828 | prepare_arch_switch(next); |
2829 | trace_sched_switch(prev, next); | ||
2785 | } | 2830 | } |
2786 | 2831 | ||
2787 | /** | 2832 | /** |
@@ -2914,7 +2959,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2914 | struct mm_struct *mm, *oldmm; | 2959 | struct mm_struct *mm, *oldmm; |
2915 | 2960 | ||
2916 | prepare_task_switch(rq, prev, next); | 2961 | prepare_task_switch(rq, prev, next); |
2917 | trace_sched_switch(prev, next); | 2962 | |
2918 | mm = next->mm; | 2963 | mm = next->mm; |
2919 | oldmm = prev->active_mm; | 2964 | oldmm = prev->active_mm; |
2920 | /* | 2965 | /* |
@@ -3571,6 +3616,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
3571 | } | 3616 | } |
3572 | 3617 | ||
3573 | /* | 3618 | /* |
3619 | * Account system cpu time to a process and desired cpustat field | ||
3620 | * @p: the process that the cpu time gets accounted to | ||
3621 | * @cputime: the cpu time spent in kernel space since the last update | ||
3622 | * @cputime_scaled: cputime scaled by cpu frequency | ||
3623 | * @target_cputime64: pointer to cpustat field that has to be updated | ||
3624 | */ | ||
3625 | static inline | ||
3626 | void __account_system_time(struct task_struct *p, cputime_t cputime, | ||
3627 | cputime_t cputime_scaled, cputime64_t *target_cputime64) | ||
3628 | { | ||
3629 | cputime64_t tmp = cputime_to_cputime64(cputime); | ||
3630 | |||
3631 | /* Add system time to process. */ | ||
3632 | p->stime = cputime_add(p->stime, cputime); | ||
3633 | p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); | ||
3634 | account_group_system_time(p, cputime); | ||
3635 | |||
3636 | /* Add system time to cpustat. */ | ||
3637 | *target_cputime64 = cputime64_add(*target_cputime64, tmp); | ||
3638 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
3639 | |||
3640 | /* Account for system time used */ | ||
3641 | acct_update_integrals(p); | ||
3642 | } | ||
3643 | |||
3644 | /* | ||
3574 | * Account system cpu time to a process. | 3645 | * Account system cpu time to a process. |
3575 | * @p: the process that the cpu time gets accounted to | 3646 | * @p: the process that the cpu time gets accounted to |
3576 | * @hardirq_offset: the offset to subtract from hardirq_count() | 3647 | * @hardirq_offset: the offset to subtract from hardirq_count() |
@@ -3581,36 +3652,26 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
3581 | cputime_t cputime, cputime_t cputime_scaled) | 3652 | cputime_t cputime, cputime_t cputime_scaled) |
3582 | { | 3653 | { |
3583 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 3654 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
3584 | cputime64_t tmp; | 3655 | cputime64_t *target_cputime64; |
3585 | 3656 | ||
3586 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | 3657 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { |
3587 | account_guest_time(p, cputime, cputime_scaled); | 3658 | account_guest_time(p, cputime, cputime_scaled); |
3588 | return; | 3659 | return; |
3589 | } | 3660 | } |
3590 | 3661 | ||
3591 | /* Add system time to process. */ | ||
3592 | p->stime = cputime_add(p->stime, cputime); | ||
3593 | p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); | ||
3594 | account_group_system_time(p, cputime); | ||
3595 | |||
3596 | /* Add system time to cpustat. */ | ||
3597 | tmp = cputime_to_cputime64(cputime); | ||
3598 | if (hardirq_count() - hardirq_offset) | 3662 | if (hardirq_count() - hardirq_offset) |
3599 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3663 | target_cputime64 = &cpustat->irq; |
3600 | else if (in_serving_softirq()) | 3664 | else if (in_serving_softirq()) |
3601 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3665 | target_cputime64 = &cpustat->softirq; |
3602 | else | 3666 | else |
3603 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3667 | target_cputime64 = &cpustat->system; |
3604 | 3668 | ||
3605 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | 3669 | __account_system_time(p, cputime, cputime_scaled, target_cputime64); |
3606 | |||
3607 | /* Account for system time used */ | ||
3608 | acct_update_integrals(p); | ||
3609 | } | 3670 | } |
3610 | 3671 | ||
3611 | /* | 3672 | /* |
3612 | * Account for involuntary wait time. | 3673 | * Account for involuntary wait time. |
3613 | * @steal: the cpu time spent in involuntary wait | 3674 | * @cputime: the cpu time spent in involuntary wait |
3614 | */ | 3675 | */ |
3615 | void account_steal_time(cputime_t cputime) | 3676 | void account_steal_time(cputime_t cputime) |
3616 | { | 3677 | { |
@@ -3638,6 +3699,73 @@ void account_idle_time(cputime_t cputime) | |||
3638 | 3699 | ||
3639 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 3700 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
3640 | 3701 | ||
3702 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
3703 | /* | ||
3704 | * Account a tick to a process and cpustat | ||
3705 | * @p: the process that the cpu time gets accounted to | ||
3706 | * @user_tick: is the tick from userspace | ||
3707 | * @rq: the pointer to rq | ||
3708 | * | ||
3709 | * Tick demultiplexing follows the order | ||
3710 | * - pending hardirq update | ||
3711 | * - pending softirq update | ||
3712 | * - user_time | ||
3713 | * - idle_time | ||
3714 | * - system time | ||
3715 | * - check for guest_time | ||
3716 | * - else account as system_time | ||
3717 | * | ||
3718 | * Check for hardirq is done both for system and user time as there is | ||
3719 | * no timer going off while we are on hardirq and hence we may never get an | ||
3720 | * opportunity to update it solely in system time. | ||
3721 | * p->stime and friends are only updated on system time and not on irq | ||
3722 | * softirq as those do not count in task exec_runtime any more. | ||
3723 | */ | ||
3724 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
3725 | struct rq *rq) | ||
3726 | { | ||
3727 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
3728 | cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy); | ||
3729 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
3730 | |||
3731 | if (irqtime_account_hi_update()) { | ||
3732 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | ||
3733 | } else if (irqtime_account_si_update()) { | ||
3734 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | ||
3735 | } else if (this_cpu_ksoftirqd() == p) { | ||
3736 | /* | ||
3737 | * ksoftirqd time do not get accounted in cpu_softirq_time. | ||
3738 | * So, we have to handle it separately here. | ||
3739 | * Also, p->stime needs to be updated for ksoftirqd. | ||
3740 | */ | ||
3741 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
3742 | &cpustat->softirq); | ||
3743 | } else if (user_tick) { | ||
3744 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
3745 | } else if (p == rq->idle) { | ||
3746 | account_idle_time(cputime_one_jiffy); | ||
3747 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | ||
3748 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
3749 | } else { | ||
3750 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
3751 | &cpustat->system); | ||
3752 | } | ||
3753 | } | ||
3754 | |||
3755 | static void irqtime_account_idle_ticks(int ticks) | ||
3756 | { | ||
3757 | int i; | ||
3758 | struct rq *rq = this_rq(); | ||
3759 | |||
3760 | for (i = 0; i < ticks; i++) | ||
3761 | irqtime_account_process_tick(current, 0, rq); | ||
3762 | } | ||
3763 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
3764 | static void irqtime_account_idle_ticks(int ticks) {} | ||
3765 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
3766 | struct rq *rq) {} | ||
3767 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
3768 | |||
3641 | /* | 3769 | /* |
3642 | * Account a single tick of cpu time. | 3770 | * Account a single tick of cpu time. |
3643 | * @p: the process that the cpu time gets accounted to | 3771 | * @p: the process that the cpu time gets accounted to |
@@ -3648,6 +3776,11 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
3648 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 3776 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
3649 | struct rq *rq = this_rq(); | 3777 | struct rq *rq = this_rq(); |
3650 | 3778 | ||
3779 | if (sched_clock_irqtime) { | ||
3780 | irqtime_account_process_tick(p, user_tick, rq); | ||
3781 | return; | ||
3782 | } | ||
3783 | |||
3651 | if (user_tick) | 3784 | if (user_tick) |
3652 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 3785 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
3653 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | 3786 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
@@ -3673,6 +3806,12 @@ void account_steal_ticks(unsigned long ticks) | |||
3673 | */ | 3806 | */ |
3674 | void account_idle_ticks(unsigned long ticks) | 3807 | void account_idle_ticks(unsigned long ticks) |
3675 | { | 3808 | { |
3809 | |||
3810 | if (sched_clock_irqtime) { | ||
3811 | irqtime_account_idle_ticks(ticks); | ||
3812 | return; | ||
3813 | } | ||
3814 | |||
3676 | account_idle_time(jiffies_to_cputime(ticks)); | 3815 | account_idle_time(jiffies_to_cputime(ticks)); |
3677 | } | 3816 | } |
3678 | 3817 | ||
@@ -3992,9 +4131,6 @@ need_resched_nonpreemptible: | |||
3992 | rq->skip_clock_update = 0; | 4131 | rq->skip_clock_update = 0; |
3993 | 4132 | ||
3994 | if (likely(prev != next)) { | 4133 | if (likely(prev != next)) { |
3995 | sched_info_switch(prev, next); | ||
3996 | perf_event_task_sched_out(prev, next); | ||
3997 | |||
3998 | rq->nr_switches++; | 4134 | rq->nr_switches++; |
3999 | rq->curr = next; | 4135 | rq->curr = next; |
4000 | ++*switch_count; | 4136 | ++*switch_count; |
@@ -4216,6 +4352,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) | |||
4216 | { | 4352 | { |
4217 | __wake_up_common(q, mode, 1, 0, key); | 4353 | __wake_up_common(q, mode, 1, 0, key); |
4218 | } | 4354 | } |
4355 | EXPORT_SYMBOL_GPL(__wake_up_locked_key); | ||
4219 | 4356 | ||
4220 | /** | 4357 | /** |
4221 | * __wake_up_sync_key - wake up threads blocked on a waitqueue. | 4358 | * __wake_up_sync_key - wake up threads blocked on a waitqueue. |
@@ -4573,11 +4710,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4573 | 4710 | ||
4574 | if (running) | 4711 | if (running) |
4575 | p->sched_class->set_curr_task(rq); | 4712 | p->sched_class->set_curr_task(rq); |
4576 | if (on_rq) { | 4713 | if (on_rq) |
4577 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 4714 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
4578 | 4715 | ||
4579 | check_class_changed(rq, p, prev_class, oldprio, running); | 4716 | check_class_changed(rq, p, prev_class, oldprio); |
4580 | } | ||
4581 | task_rq_unlock(rq, &flags); | 4717 | task_rq_unlock(rq, &flags); |
4582 | } | 4718 | } |
4583 | 4719 | ||
@@ -4825,12 +4961,15 @@ recheck: | |||
4825 | param->sched_priority > rlim_rtprio) | 4961 | param->sched_priority > rlim_rtprio) |
4826 | return -EPERM; | 4962 | return -EPERM; |
4827 | } | 4963 | } |
4964 | |||
4828 | /* | 4965 | /* |
4829 | * Like positive nice levels, dont allow tasks to | 4966 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
4830 | * move out of SCHED_IDLE either: | 4967 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
4831 | */ | 4968 | */ |
4832 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) | 4969 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { |
4833 | return -EPERM; | 4970 | if (!can_nice(p, TASK_NICE(p))) |
4971 | return -EPERM; | ||
4972 | } | ||
4834 | 4973 | ||
4835 | /* can't change other user's priorities */ | 4974 | /* can't change other user's priorities */ |
4836 | if (!check_same_owner(p)) | 4975 | if (!check_same_owner(p)) |
@@ -4905,11 +5044,10 @@ recheck: | |||
4905 | 5044 | ||
4906 | if (running) | 5045 | if (running) |
4907 | p->sched_class->set_curr_task(rq); | 5046 | p->sched_class->set_curr_task(rq); |
4908 | if (on_rq) { | 5047 | if (on_rq) |
4909 | activate_task(rq, p, 0); | 5048 | activate_task(rq, p, 0); |
4910 | 5049 | ||
4911 | check_class_changed(rq, p, prev_class, oldprio, running); | 5050 | check_class_changed(rq, p, prev_class, oldprio); |
4912 | } | ||
4913 | __task_rq_unlock(rq); | 5051 | __task_rq_unlock(rq); |
4914 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 5052 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
4915 | 5053 | ||
@@ -5326,6 +5464,65 @@ void __sched yield(void) | |||
5326 | } | 5464 | } |
5327 | EXPORT_SYMBOL(yield); | 5465 | EXPORT_SYMBOL(yield); |
5328 | 5466 | ||
5467 | /** | ||
5468 | * yield_to - yield the current processor to another thread in | ||
5469 | * your thread group, or accelerate that thread toward the | ||
5470 | * processor it's on. | ||
5471 | * | ||
5472 | * It's the caller's job to ensure that the target task struct | ||
5473 | * can't go away on us before we can do any checks. | ||
5474 | * | ||
5475 | * Returns true if we indeed boosted the target task. | ||
5476 | */ | ||
5477 | bool __sched yield_to(struct task_struct *p, bool preempt) | ||
5478 | { | ||
5479 | struct task_struct *curr = current; | ||
5480 | struct rq *rq, *p_rq; | ||
5481 | unsigned long flags; | ||
5482 | bool yielded = 0; | ||
5483 | |||
5484 | local_irq_save(flags); | ||
5485 | rq = this_rq(); | ||
5486 | |||
5487 | again: | ||
5488 | p_rq = task_rq(p); | ||
5489 | double_rq_lock(rq, p_rq); | ||
5490 | while (task_rq(p) != p_rq) { | ||
5491 | double_rq_unlock(rq, p_rq); | ||
5492 | goto again; | ||
5493 | } | ||
5494 | |||
5495 | if (!curr->sched_class->yield_to_task) | ||
5496 | goto out; | ||
5497 | |||
5498 | if (curr->sched_class != p->sched_class) | ||
5499 | goto out; | ||
5500 | |||
5501 | if (task_running(p_rq, p) || p->state) | ||
5502 | goto out; | ||
5503 | |||
5504 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); | ||
5505 | if (yielded) { | ||
5506 | schedstat_inc(rq, yld_count); | ||
5507 | /* | ||
5508 | * Make p's CPU reschedule; pick_next_entity takes care of | ||
5509 | * fairness. | ||
5510 | */ | ||
5511 | if (preempt && rq != p_rq) | ||
5512 | resched_task(p_rq->curr); | ||
5513 | } | ||
5514 | |||
5515 | out: | ||
5516 | double_rq_unlock(rq, p_rq); | ||
5517 | local_irq_restore(flags); | ||
5518 | |||
5519 | if (yielded) | ||
5520 | schedule(); | ||
5521 | |||
5522 | return yielded; | ||
5523 | } | ||
5524 | EXPORT_SYMBOL_GPL(yield_to); | ||
5525 | |||
5329 | /* | 5526 | /* |
5330 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 5527 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
5331 | * that process accounting knows that this is a task in IO wait state. | 5528 | * that process accounting knows that this is a task in IO wait state. |
@@ -5574,7 +5771,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5574 | * The idle tasks have their own, simple scheduling class: | 5771 | * The idle tasks have their own, simple scheduling class: |
5575 | */ | 5772 | */ |
5576 | idle->sched_class = &idle_sched_class; | 5773 | idle->sched_class = &idle_sched_class; |
5577 | ftrace_graph_init_task(idle); | 5774 | ftrace_graph_init_idle_task(idle, cpu); |
5578 | } | 5775 | } |
5579 | 5776 | ||
5580 | /* | 5777 | /* |
@@ -7799,6 +7996,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | |||
7799 | INIT_LIST_HEAD(&cfs_rq->tasks); | 7996 | INIT_LIST_HEAD(&cfs_rq->tasks); |
7800 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7997 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7801 | cfs_rq->rq = rq; | 7998 | cfs_rq->rq = rq; |
7999 | /* allow initial update_cfs_load() to truncate */ | ||
8000 | #ifdef CONFIG_SMP | ||
8001 | cfs_rq->load_stamp = 1; | ||
8002 | #endif | ||
7802 | #endif | 8003 | #endif |
7803 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 8004 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
7804 | } | 8005 | } |
@@ -8112,6 +8313,8 @@ EXPORT_SYMBOL(__might_sleep); | |||
8112 | #ifdef CONFIG_MAGIC_SYSRQ | 8313 | #ifdef CONFIG_MAGIC_SYSRQ |
8113 | static void normalize_task(struct rq *rq, struct task_struct *p) | 8314 | static void normalize_task(struct rq *rq, struct task_struct *p) |
8114 | { | 8315 | { |
8316 | const struct sched_class *prev_class = p->sched_class; | ||
8317 | int old_prio = p->prio; | ||
8115 | int on_rq; | 8318 | int on_rq; |
8116 | 8319 | ||
8117 | on_rq = p->se.on_rq; | 8320 | on_rq = p->se.on_rq; |
@@ -8122,6 +8325,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
8122 | activate_task(rq, p, 0); | 8325 | activate_task(rq, p, 0); |
8123 | resched_task(rq->curr); | 8326 | resched_task(rq->curr); |
8124 | } | 8327 | } |
8328 | |||
8329 | check_class_changed(rq, p, prev_class, old_prio); | ||
8125 | } | 8330 | } |
8126 | 8331 | ||
8127 | void normalize_rt_tasks(void) | 8332 | void normalize_rt_tasks(void) |
@@ -8513,7 +8718,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8513 | /* Propagate contribution to hierarchy */ | 8718 | /* Propagate contribution to hierarchy */ |
8514 | raw_spin_lock_irqsave(&rq->lock, flags); | 8719 | raw_spin_lock_irqsave(&rq->lock, flags); |
8515 | for_each_sched_entity(se) | 8720 | for_each_sched_entity(se) |
8516 | update_cfs_shares(group_cfs_rq(se), 0); | 8721 | update_cfs_shares(group_cfs_rq(se)); |
8517 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 8722 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
8518 | } | 8723 | } |
8519 | 8724 | ||
@@ -8887,7 +9092,8 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
8887 | } | 9092 | } |
8888 | 9093 | ||
8889 | static void | 9094 | static void |
8890 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) | 9095 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, |
9096 | struct cgroup *old_cgrp, struct task_struct *task) | ||
8891 | { | 9097 | { |
8892 | /* | 9098 | /* |
8893 | * cgroup_exit() is called in the copy_process() failure path. | 9099 | * cgroup_exit() is called in the copy_process() failure path. |