diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 351 |
1 files changed, 276 insertions, 75 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4ec7b..a172494a9a6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/uaccess.h> | 33 | #include <linux/uaccess.h> |
34 | #include <linux/highmem.h> | 34 | #include <linux/highmem.h> |
35 | #include <linux/smp_lock.h> | ||
36 | #include <asm/mmu_context.h> | 35 | #include <asm/mmu_context.h> |
37 | #include <linux/interrupt.h> | 36 | #include <linux/interrupt.h> |
38 | #include <linux/capability.h> | 37 | #include <linux/capability.h> |
@@ -324,7 +323,7 @@ struct cfs_rq { | |||
324 | * 'curr' points to currently running entity on this cfs_rq. | 323 | * 'curr' points to currently running entity on this cfs_rq. |
325 | * It is set to NULL otherwise (i.e when none are currently running). | 324 | * It is set to NULL otherwise (i.e when none are currently running). |
326 | */ | 325 | */ |
327 | struct sched_entity *curr, *next, *last; | 326 | struct sched_entity *curr, *next, *last, *skip; |
328 | 327 | ||
329 | unsigned int nr_spread_over; | 328 | unsigned int nr_spread_over; |
330 | 329 | ||
@@ -606,9 +605,6 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
606 | struct task_group *tg; | 605 | struct task_group *tg; |
607 | struct cgroup_subsys_state *css; | 606 | struct cgroup_subsys_state *css; |
608 | 607 | ||
609 | if (p->flags & PF_EXITING) | ||
610 | return &root_task_group; | ||
611 | |||
612 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 608 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
613 | lockdep_is_held(&task_rq(p)->lock)); | 609 | lockdep_is_held(&task_rq(p)->lock)); |
614 | tg = container_of(css, struct task_group, css); | 610 | tg = container_of(css, struct task_group, css); |
@@ -664,10 +660,9 @@ static void update_rq_clock(struct rq *rq) | |||
664 | #endif | 660 | #endif |
665 | 661 | ||
666 | /** | 662 | /** |
667 | * runqueue_is_locked | 663 | * runqueue_is_locked - Returns true if the current cpu runqueue is locked |
668 | * @cpu: the processor in question. | 664 | * @cpu: the processor in question. |
669 | * | 665 | * |
670 | * Returns true if the current cpu runqueue is locked. | ||
671 | * This interface allows printk to be called with the runqueue lock | 666 | * This interface allows printk to be called with the runqueue lock |
672 | * held and know whether or not it is OK to wake up the klogd. | 667 | * held and know whether or not it is OK to wake up the klogd. |
673 | */ | 668 | */ |
@@ -1686,6 +1681,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
1686 | __release(rq2->lock); | 1681 | __release(rq2->lock); |
1687 | } | 1682 | } |
1688 | 1683 | ||
1684 | #else /* CONFIG_SMP */ | ||
1685 | |||
1686 | /* | ||
1687 | * double_rq_lock - safely lock two runqueues | ||
1688 | * | ||
1689 | * Note this does not disable interrupts like task_rq_lock, | ||
1690 | * you need to do so manually before calling. | ||
1691 | */ | ||
1692 | static void double_rq_lock(struct rq *rq1, struct rq *rq2) | ||
1693 | __acquires(rq1->lock) | ||
1694 | __acquires(rq2->lock) | ||
1695 | { | ||
1696 | BUG_ON(!irqs_disabled()); | ||
1697 | BUG_ON(rq1 != rq2); | ||
1698 | raw_spin_lock(&rq1->lock); | ||
1699 | __acquire(rq2->lock); /* Fake it out ;) */ | ||
1700 | } | ||
1701 | |||
1702 | /* | ||
1703 | * double_rq_unlock - safely unlock two runqueues | ||
1704 | * | ||
1705 | * Note this does not restore interrupts like task_rq_unlock, | ||
1706 | * you need to do so manually after calling. | ||
1707 | */ | ||
1708 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | ||
1709 | __releases(rq1->lock) | ||
1710 | __releases(rq2->lock) | ||
1711 | { | ||
1712 | BUG_ON(rq1 != rq2); | ||
1713 | raw_spin_unlock(&rq1->lock); | ||
1714 | __release(rq2->lock); | ||
1715 | } | ||
1716 | |||
1689 | #endif | 1717 | #endif |
1690 | 1718 | ||
1691 | static void calc_load_account_idle(struct rq *this_rq); | 1719 | static void calc_load_account_idle(struct rq *this_rq); |
@@ -1880,7 +1908,7 @@ void account_system_vtime(struct task_struct *curr) | |||
1880 | */ | 1908 | */ |
1881 | if (hardirq_count()) | 1909 | if (hardirq_count()) |
1882 | __this_cpu_add(cpu_hardirq_time, delta); | 1910 | __this_cpu_add(cpu_hardirq_time, delta); |
1883 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 1911 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) |
1884 | __this_cpu_add(cpu_softirq_time, delta); | 1912 | __this_cpu_add(cpu_softirq_time, delta); |
1885 | 1913 | ||
1886 | irq_time_write_end(); | 1914 | irq_time_write_end(); |
@@ -1920,8 +1948,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
1920 | sched_rt_avg_update(rq, irq_delta); | 1948 | sched_rt_avg_update(rq, irq_delta); |
1921 | } | 1949 | } |
1922 | 1950 | ||
1951 | static int irqtime_account_hi_update(void) | ||
1952 | { | ||
1953 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
1954 | unsigned long flags; | ||
1955 | u64 latest_ns; | ||
1956 | int ret = 0; | ||
1957 | |||
1958 | local_irq_save(flags); | ||
1959 | latest_ns = this_cpu_read(cpu_hardirq_time); | ||
1960 | if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq)) | ||
1961 | ret = 1; | ||
1962 | local_irq_restore(flags); | ||
1963 | return ret; | ||
1964 | } | ||
1965 | |||
1966 | static int irqtime_account_si_update(void) | ||
1967 | { | ||
1968 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
1969 | unsigned long flags; | ||
1970 | u64 latest_ns; | ||
1971 | int ret = 0; | ||
1972 | |||
1973 | local_irq_save(flags); | ||
1974 | latest_ns = this_cpu_read(cpu_softirq_time); | ||
1975 | if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq)) | ||
1976 | ret = 1; | ||
1977 | local_irq_restore(flags); | ||
1978 | return ret; | ||
1979 | } | ||
1980 | |||
1923 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 1981 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
1924 | 1982 | ||
1983 | #define sched_clock_irqtime (0) | ||
1984 | |||
1925 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 1985 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
1926 | { | 1986 | { |
1927 | rq->clock_task += delta; | 1987 | rq->clock_task += delta; |
@@ -2025,14 +2085,14 @@ inline int task_curr(const struct task_struct *p) | |||
2025 | 2085 | ||
2026 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 2086 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
2027 | const struct sched_class *prev_class, | 2087 | const struct sched_class *prev_class, |
2028 | int oldprio, int running) | 2088 | int oldprio) |
2029 | { | 2089 | { |
2030 | if (prev_class != p->sched_class) { | 2090 | if (prev_class != p->sched_class) { |
2031 | if (prev_class->switched_from) | 2091 | if (prev_class->switched_from) |
2032 | prev_class->switched_from(rq, p, running); | 2092 | prev_class->switched_from(rq, p); |
2033 | p->sched_class->switched_to(rq, p, running); | 2093 | p->sched_class->switched_to(rq, p); |
2034 | } else | 2094 | } else if (oldprio != p->prio) |
2035 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2095 | p->sched_class->prio_changed(rq, p, oldprio); |
2036 | } | 2096 | } |
2037 | 2097 | ||
2038 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | 2098 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) |
@@ -2224,7 +2284,10 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |||
2224 | * yield - it could be a while. | 2284 | * yield - it could be a while. |
2225 | */ | 2285 | */ |
2226 | if (unlikely(on_rq)) { | 2286 | if (unlikely(on_rq)) { |
2227 | schedule_timeout_uninterruptible(1); | 2287 | ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ); |
2288 | |||
2289 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
2290 | schedule_hrtimeout(&to, HRTIMER_MODE_REL); | ||
2228 | continue; | 2291 | continue; |
2229 | } | 2292 | } |
2230 | 2293 | ||
@@ -2265,27 +2328,6 @@ void kick_process(struct task_struct *p) | |||
2265 | EXPORT_SYMBOL_GPL(kick_process); | 2328 | EXPORT_SYMBOL_GPL(kick_process); |
2266 | #endif /* CONFIG_SMP */ | 2329 | #endif /* CONFIG_SMP */ |
2267 | 2330 | ||
2268 | /** | ||
2269 | * task_oncpu_function_call - call a function on the cpu on which a task runs | ||
2270 | * @p: the task to evaluate | ||
2271 | * @func: the function to be called | ||
2272 | * @info: the function call argument | ||
2273 | * | ||
2274 | * Calls the function @func when the task is currently running. This might | ||
2275 | * be on the current CPU, which just calls the function directly | ||
2276 | */ | ||
2277 | void task_oncpu_function_call(struct task_struct *p, | ||
2278 | void (*func) (void *info), void *info) | ||
2279 | { | ||
2280 | int cpu; | ||
2281 | |||
2282 | preempt_disable(); | ||
2283 | cpu = task_cpu(p); | ||
2284 | if (task_curr(p)) | ||
2285 | smp_call_function_single(cpu, func, info, 1); | ||
2286 | preempt_enable(); | ||
2287 | } | ||
2288 | |||
2289 | #ifdef CONFIG_SMP | 2331 | #ifdef CONFIG_SMP |
2290 | /* | 2332 | /* |
2291 | * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. | 2333 | * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. |
@@ -2566,6 +2608,7 @@ static void __sched_fork(struct task_struct *p) | |||
2566 | p->se.sum_exec_runtime = 0; | 2608 | p->se.sum_exec_runtime = 0; |
2567 | p->se.prev_sum_exec_runtime = 0; | 2609 | p->se.prev_sum_exec_runtime = 0; |
2568 | p->se.nr_migrations = 0; | 2610 | p->se.nr_migrations = 0; |
2611 | p->se.vruntime = 0; | ||
2569 | 2612 | ||
2570 | #ifdef CONFIG_SCHEDSTATS | 2613 | #ifdef CONFIG_SCHEDSTATS |
2571 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 2614 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
@@ -2776,9 +2819,12 @@ static inline void | |||
2776 | prepare_task_switch(struct rq *rq, struct task_struct *prev, | 2819 | prepare_task_switch(struct rq *rq, struct task_struct *prev, |
2777 | struct task_struct *next) | 2820 | struct task_struct *next) |
2778 | { | 2821 | { |
2822 | sched_info_switch(prev, next); | ||
2823 | perf_event_task_sched_out(prev, next); | ||
2779 | fire_sched_out_preempt_notifiers(prev, next); | 2824 | fire_sched_out_preempt_notifiers(prev, next); |
2780 | prepare_lock_switch(rq, next); | 2825 | prepare_lock_switch(rq, next); |
2781 | prepare_arch_switch(next); | 2826 | prepare_arch_switch(next); |
2827 | trace_sched_switch(prev, next); | ||
2782 | } | 2828 | } |
2783 | 2829 | ||
2784 | /** | 2830 | /** |
@@ -2911,7 +2957,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2911 | struct mm_struct *mm, *oldmm; | 2957 | struct mm_struct *mm, *oldmm; |
2912 | 2958 | ||
2913 | prepare_task_switch(rq, prev, next); | 2959 | prepare_task_switch(rq, prev, next); |
2914 | trace_sched_switch(prev, next); | 2960 | |
2915 | mm = next->mm; | 2961 | mm = next->mm; |
2916 | oldmm = prev->active_mm; | 2962 | oldmm = prev->active_mm; |
2917 | /* | 2963 | /* |
@@ -3568,6 +3614,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
3568 | } | 3614 | } |
3569 | 3615 | ||
3570 | /* | 3616 | /* |
3617 | * Account system cpu time to a process and desired cpustat field | ||
3618 | * @p: the process that the cpu time gets accounted to | ||
3619 | * @cputime: the cpu time spent in kernel space since the last update | ||
3620 | * @cputime_scaled: cputime scaled by cpu frequency | ||
3621 | * @target_cputime64: pointer to cpustat field that has to be updated | ||
3622 | */ | ||
3623 | static inline | ||
3624 | void __account_system_time(struct task_struct *p, cputime_t cputime, | ||
3625 | cputime_t cputime_scaled, cputime64_t *target_cputime64) | ||
3626 | { | ||
3627 | cputime64_t tmp = cputime_to_cputime64(cputime); | ||
3628 | |||
3629 | /* Add system time to process. */ | ||
3630 | p->stime = cputime_add(p->stime, cputime); | ||
3631 | p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); | ||
3632 | account_group_system_time(p, cputime); | ||
3633 | |||
3634 | /* Add system time to cpustat. */ | ||
3635 | *target_cputime64 = cputime64_add(*target_cputime64, tmp); | ||
3636 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
3637 | |||
3638 | /* Account for system time used */ | ||
3639 | acct_update_integrals(p); | ||
3640 | } | ||
3641 | |||
3642 | /* | ||
3571 | * Account system cpu time to a process. | 3643 | * Account system cpu time to a process. |
3572 | * @p: the process that the cpu time gets accounted to | 3644 | * @p: the process that the cpu time gets accounted to |
3573 | * @hardirq_offset: the offset to subtract from hardirq_count() | 3645 | * @hardirq_offset: the offset to subtract from hardirq_count() |
@@ -3578,36 +3650,26 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
3578 | cputime_t cputime, cputime_t cputime_scaled) | 3650 | cputime_t cputime, cputime_t cputime_scaled) |
3579 | { | 3651 | { |
3580 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 3652 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
3581 | cputime64_t tmp; | 3653 | cputime64_t *target_cputime64; |
3582 | 3654 | ||
3583 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | 3655 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { |
3584 | account_guest_time(p, cputime, cputime_scaled); | 3656 | account_guest_time(p, cputime, cputime_scaled); |
3585 | return; | 3657 | return; |
3586 | } | 3658 | } |
3587 | 3659 | ||
3588 | /* Add system time to process. */ | ||
3589 | p->stime = cputime_add(p->stime, cputime); | ||
3590 | p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); | ||
3591 | account_group_system_time(p, cputime); | ||
3592 | |||
3593 | /* Add system time to cpustat. */ | ||
3594 | tmp = cputime_to_cputime64(cputime); | ||
3595 | if (hardirq_count() - hardirq_offset) | 3660 | if (hardirq_count() - hardirq_offset) |
3596 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3661 | target_cputime64 = &cpustat->irq; |
3597 | else if (in_serving_softirq()) | 3662 | else if (in_serving_softirq()) |
3598 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3663 | target_cputime64 = &cpustat->softirq; |
3599 | else | 3664 | else |
3600 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3665 | target_cputime64 = &cpustat->system; |
3601 | |||
3602 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
3603 | 3666 | ||
3604 | /* Account for system time used */ | 3667 | __account_system_time(p, cputime, cputime_scaled, target_cputime64); |
3605 | acct_update_integrals(p); | ||
3606 | } | 3668 | } |
3607 | 3669 | ||
3608 | /* | 3670 | /* |
3609 | * Account for involuntary wait time. | 3671 | * Account for involuntary wait time. |
3610 | * @steal: the cpu time spent in involuntary wait | 3672 | * @cputime: the cpu time spent in involuntary wait |
3611 | */ | 3673 | */ |
3612 | void account_steal_time(cputime_t cputime) | 3674 | void account_steal_time(cputime_t cputime) |
3613 | { | 3675 | { |
@@ -3635,6 +3697,73 @@ void account_idle_time(cputime_t cputime) | |||
3635 | 3697 | ||
3636 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 3698 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
3637 | 3699 | ||
3700 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
3701 | /* | ||
3702 | * Account a tick to a process and cpustat | ||
3703 | * @p: the process that the cpu time gets accounted to | ||
3704 | * @user_tick: is the tick from userspace | ||
3705 | * @rq: the pointer to rq | ||
3706 | * | ||
3707 | * Tick demultiplexing follows the order | ||
3708 | * - pending hardirq update | ||
3709 | * - pending softirq update | ||
3710 | * - user_time | ||
3711 | * - idle_time | ||
3712 | * - system time | ||
3713 | * - check for guest_time | ||
3714 | * - else account as system_time | ||
3715 | * | ||
3716 | * Check for hardirq is done both for system and user time as there is | ||
3717 | * no timer going off while we are on hardirq and hence we may never get an | ||
3718 | * opportunity to update it solely in system time. | ||
3719 | * p->stime and friends are only updated on system time and not on irq | ||
3720 | * softirq as those do not count in task exec_runtime any more. | ||
3721 | */ | ||
3722 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
3723 | struct rq *rq) | ||
3724 | { | ||
3725 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
3726 | cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy); | ||
3727 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
3728 | |||
3729 | if (irqtime_account_hi_update()) { | ||
3730 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | ||
3731 | } else if (irqtime_account_si_update()) { | ||
3732 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | ||
3733 | } else if (this_cpu_ksoftirqd() == p) { | ||
3734 | /* | ||
3735 | * ksoftirqd time do not get accounted in cpu_softirq_time. | ||
3736 | * So, we have to handle it separately here. | ||
3737 | * Also, p->stime needs to be updated for ksoftirqd. | ||
3738 | */ | ||
3739 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
3740 | &cpustat->softirq); | ||
3741 | } else if (user_tick) { | ||
3742 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
3743 | } else if (p == rq->idle) { | ||
3744 | account_idle_time(cputime_one_jiffy); | ||
3745 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | ||
3746 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
3747 | } else { | ||
3748 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
3749 | &cpustat->system); | ||
3750 | } | ||
3751 | } | ||
3752 | |||
3753 | static void irqtime_account_idle_ticks(int ticks) | ||
3754 | { | ||
3755 | int i; | ||
3756 | struct rq *rq = this_rq(); | ||
3757 | |||
3758 | for (i = 0; i < ticks; i++) | ||
3759 | irqtime_account_process_tick(current, 0, rq); | ||
3760 | } | ||
3761 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
3762 | static void irqtime_account_idle_ticks(int ticks) {} | ||
3763 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
3764 | struct rq *rq) {} | ||
3765 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
3766 | |||
3638 | /* | 3767 | /* |
3639 | * Account a single tick of cpu time. | 3768 | * Account a single tick of cpu time. |
3640 | * @p: the process that the cpu time gets accounted to | 3769 | * @p: the process that the cpu time gets accounted to |
@@ -3645,6 +3774,11 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
3645 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 3774 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
3646 | struct rq *rq = this_rq(); | 3775 | struct rq *rq = this_rq(); |
3647 | 3776 | ||
3777 | if (sched_clock_irqtime) { | ||
3778 | irqtime_account_process_tick(p, user_tick, rq); | ||
3779 | return; | ||
3780 | } | ||
3781 | |||
3648 | if (user_tick) | 3782 | if (user_tick) |
3649 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 3783 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
3650 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | 3784 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
@@ -3670,6 +3804,12 @@ void account_steal_ticks(unsigned long ticks) | |||
3670 | */ | 3804 | */ |
3671 | void account_idle_ticks(unsigned long ticks) | 3805 | void account_idle_ticks(unsigned long ticks) |
3672 | { | 3806 | { |
3807 | |||
3808 | if (sched_clock_irqtime) { | ||
3809 | irqtime_account_idle_ticks(ticks); | ||
3810 | return; | ||
3811 | } | ||
3812 | |||
3673 | account_idle_time(jiffies_to_cputime(ticks)); | 3813 | account_idle_time(jiffies_to_cputime(ticks)); |
3674 | } | 3814 | } |
3675 | 3815 | ||
@@ -3945,9 +4085,6 @@ need_resched: | |||
3945 | rcu_note_context_switch(cpu); | 4085 | rcu_note_context_switch(cpu); |
3946 | prev = rq->curr; | 4086 | prev = rq->curr; |
3947 | 4087 | ||
3948 | release_kernel_lock(prev); | ||
3949 | need_resched_nonpreemptible: | ||
3950 | |||
3951 | schedule_debug(prev); | 4088 | schedule_debug(prev); |
3952 | 4089 | ||
3953 | if (sched_feat(HRTICK)) | 4090 | if (sched_feat(HRTICK)) |
@@ -3989,9 +4126,6 @@ need_resched_nonpreemptible: | |||
3989 | rq->skip_clock_update = 0; | 4126 | rq->skip_clock_update = 0; |
3990 | 4127 | ||
3991 | if (likely(prev != next)) { | 4128 | if (likely(prev != next)) { |
3992 | sched_info_switch(prev, next); | ||
3993 | perf_event_task_sched_out(prev, next); | ||
3994 | |||
3995 | rq->nr_switches++; | 4129 | rq->nr_switches++; |
3996 | rq->curr = next; | 4130 | rq->curr = next; |
3997 | ++*switch_count; | 4131 | ++*switch_count; |
@@ -4010,9 +4144,6 @@ need_resched_nonpreemptible: | |||
4010 | 4144 | ||
4011 | post_schedule(rq); | 4145 | post_schedule(rq); |
4012 | 4146 | ||
4013 | if (unlikely(reacquire_kernel_lock(prev))) | ||
4014 | goto need_resched_nonpreemptible; | ||
4015 | |||
4016 | preempt_enable_no_resched(); | 4147 | preempt_enable_no_resched(); |
4017 | if (need_resched()) | 4148 | if (need_resched()) |
4018 | goto need_resched; | 4149 | goto need_resched; |
@@ -4213,6 +4344,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) | |||
4213 | { | 4344 | { |
4214 | __wake_up_common(q, mode, 1, 0, key); | 4345 | __wake_up_common(q, mode, 1, 0, key); |
4215 | } | 4346 | } |
4347 | EXPORT_SYMBOL_GPL(__wake_up_locked_key); | ||
4216 | 4348 | ||
4217 | /** | 4349 | /** |
4218 | * __wake_up_sync_key - wake up threads blocked on a waitqueue. | 4350 | * __wake_up_sync_key - wake up threads blocked on a waitqueue. |
@@ -4570,11 +4702,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4570 | 4702 | ||
4571 | if (running) | 4703 | if (running) |
4572 | p->sched_class->set_curr_task(rq); | 4704 | p->sched_class->set_curr_task(rq); |
4573 | if (on_rq) { | 4705 | if (on_rq) |
4574 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 4706 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
4575 | 4707 | ||
4576 | check_class_changed(rq, p, prev_class, oldprio, running); | 4708 | check_class_changed(rq, p, prev_class, oldprio); |
4577 | } | ||
4578 | task_rq_unlock(rq, &flags); | 4709 | task_rq_unlock(rq, &flags); |
4579 | } | 4710 | } |
4580 | 4711 | ||
@@ -4822,12 +4953,15 @@ recheck: | |||
4822 | param->sched_priority > rlim_rtprio) | 4953 | param->sched_priority > rlim_rtprio) |
4823 | return -EPERM; | 4954 | return -EPERM; |
4824 | } | 4955 | } |
4956 | |||
4825 | /* | 4957 | /* |
4826 | * Like positive nice levels, dont allow tasks to | 4958 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
4827 | * move out of SCHED_IDLE either: | 4959 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
4828 | */ | 4960 | */ |
4829 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) | 4961 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { |
4830 | return -EPERM; | 4962 | if (!can_nice(p, TASK_NICE(p))) |
4963 | return -EPERM; | ||
4964 | } | ||
4831 | 4965 | ||
4832 | /* can't change other user's priorities */ | 4966 | /* can't change other user's priorities */ |
4833 | if (!check_same_owner(p)) | 4967 | if (!check_same_owner(p)) |
@@ -4902,11 +5036,10 @@ recheck: | |||
4902 | 5036 | ||
4903 | if (running) | 5037 | if (running) |
4904 | p->sched_class->set_curr_task(rq); | 5038 | p->sched_class->set_curr_task(rq); |
4905 | if (on_rq) { | 5039 | if (on_rq) |
4906 | activate_task(rq, p, 0); | 5040 | activate_task(rq, p, 0); |
4907 | 5041 | ||
4908 | check_class_changed(rq, p, prev_class, oldprio, running); | 5042 | check_class_changed(rq, p, prev_class, oldprio); |
4909 | } | ||
4910 | __task_rq_unlock(rq); | 5043 | __task_rq_unlock(rq); |
4911 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 5044 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
4912 | 5045 | ||
@@ -5323,6 +5456,65 @@ void __sched yield(void) | |||
5323 | } | 5456 | } |
5324 | EXPORT_SYMBOL(yield); | 5457 | EXPORT_SYMBOL(yield); |
5325 | 5458 | ||
5459 | /** | ||
5460 | * yield_to - yield the current processor to another thread in | ||
5461 | * your thread group, or accelerate that thread toward the | ||
5462 | * processor it's on. | ||
5463 | * | ||
5464 | * It's the caller's job to ensure that the target task struct | ||
5465 | * can't go away on us before we can do any checks. | ||
5466 | * | ||
5467 | * Returns true if we indeed boosted the target task. | ||
5468 | */ | ||
5469 | bool __sched yield_to(struct task_struct *p, bool preempt) | ||
5470 | { | ||
5471 | struct task_struct *curr = current; | ||
5472 | struct rq *rq, *p_rq; | ||
5473 | unsigned long flags; | ||
5474 | bool yielded = 0; | ||
5475 | |||
5476 | local_irq_save(flags); | ||
5477 | rq = this_rq(); | ||
5478 | |||
5479 | again: | ||
5480 | p_rq = task_rq(p); | ||
5481 | double_rq_lock(rq, p_rq); | ||
5482 | while (task_rq(p) != p_rq) { | ||
5483 | double_rq_unlock(rq, p_rq); | ||
5484 | goto again; | ||
5485 | } | ||
5486 | |||
5487 | if (!curr->sched_class->yield_to_task) | ||
5488 | goto out; | ||
5489 | |||
5490 | if (curr->sched_class != p->sched_class) | ||
5491 | goto out; | ||
5492 | |||
5493 | if (task_running(p_rq, p) || p->state) | ||
5494 | goto out; | ||
5495 | |||
5496 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); | ||
5497 | if (yielded) { | ||
5498 | schedstat_inc(rq, yld_count); | ||
5499 | /* | ||
5500 | * Make p's CPU reschedule; pick_next_entity takes care of | ||
5501 | * fairness. | ||
5502 | */ | ||
5503 | if (preempt && rq != p_rq) | ||
5504 | resched_task(p_rq->curr); | ||
5505 | } | ||
5506 | |||
5507 | out: | ||
5508 | double_rq_unlock(rq, p_rq); | ||
5509 | local_irq_restore(flags); | ||
5510 | |||
5511 | if (yielded) | ||
5512 | schedule(); | ||
5513 | |||
5514 | return yielded; | ||
5515 | } | ||
5516 | EXPORT_SYMBOL_GPL(yield_to); | ||
5517 | |||
5326 | /* | 5518 | /* |
5327 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 5519 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
5328 | * that process accounting knows that this is a task in IO wait state. | 5520 | * that process accounting knows that this is a task in IO wait state. |
@@ -5571,7 +5763,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5571 | * The idle tasks have their own, simple scheduling class: | 5763 | * The idle tasks have their own, simple scheduling class: |
5572 | */ | 5764 | */ |
5573 | idle->sched_class = &idle_sched_class; | 5765 | idle->sched_class = &idle_sched_class; |
5574 | ftrace_graph_init_task(idle); | 5766 | ftrace_graph_init_idle_task(idle, cpu); |
5575 | } | 5767 | } |
5576 | 5768 | ||
5577 | /* | 5769 | /* |
@@ -7796,6 +7988,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | |||
7796 | INIT_LIST_HEAD(&cfs_rq->tasks); | 7988 | INIT_LIST_HEAD(&cfs_rq->tasks); |
7797 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7989 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7798 | cfs_rq->rq = rq; | 7990 | cfs_rq->rq = rq; |
7991 | /* allow initial update_cfs_load() to truncate */ | ||
7992 | #ifdef CONFIG_SMP | ||
7993 | cfs_rq->load_stamp = 1; | ||
7994 | #endif | ||
7799 | #endif | 7995 | #endif |
7800 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 7996 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
7801 | } | 7997 | } |
@@ -8074,7 +8270,7 @@ static inline int preempt_count_equals(int preempt_offset) | |||
8074 | { | 8270 | { |
8075 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); | 8271 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); |
8076 | 8272 | ||
8077 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); | 8273 | return (nested == preempt_offset); |
8078 | } | 8274 | } |
8079 | 8275 | ||
8080 | void __might_sleep(const char *file, int line, int preempt_offset) | 8276 | void __might_sleep(const char *file, int line, int preempt_offset) |
@@ -8109,6 +8305,8 @@ EXPORT_SYMBOL(__might_sleep); | |||
8109 | #ifdef CONFIG_MAGIC_SYSRQ | 8305 | #ifdef CONFIG_MAGIC_SYSRQ |
8110 | static void normalize_task(struct rq *rq, struct task_struct *p) | 8306 | static void normalize_task(struct rq *rq, struct task_struct *p) |
8111 | { | 8307 | { |
8308 | const struct sched_class *prev_class = p->sched_class; | ||
8309 | int old_prio = p->prio; | ||
8112 | int on_rq; | 8310 | int on_rq; |
8113 | 8311 | ||
8114 | on_rq = p->se.on_rq; | 8312 | on_rq = p->se.on_rq; |
@@ -8119,6 +8317,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
8119 | activate_task(rq, p, 0); | 8317 | activate_task(rq, p, 0); |
8120 | resched_task(rq->curr); | 8318 | resched_task(rq->curr); |
8121 | } | 8319 | } |
8320 | |||
8321 | check_class_changed(rq, p, prev_class, old_prio); | ||
8122 | } | 8322 | } |
8123 | 8323 | ||
8124 | void normalize_rt_tasks(void) | 8324 | void normalize_rt_tasks(void) |
@@ -8510,7 +8710,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8510 | /* Propagate contribution to hierarchy */ | 8710 | /* Propagate contribution to hierarchy */ |
8511 | raw_spin_lock_irqsave(&rq->lock, flags); | 8711 | raw_spin_lock_irqsave(&rq->lock, flags); |
8512 | for_each_sched_entity(se) | 8712 | for_each_sched_entity(se) |
8513 | update_cfs_shares(group_cfs_rq(se), 0); | 8713 | update_cfs_shares(group_cfs_rq(se)); |
8514 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 8714 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
8515 | } | 8715 | } |
8516 | 8716 | ||
@@ -8884,7 +9084,8 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
8884 | } | 9084 | } |
8885 | 9085 | ||
8886 | static void | 9086 | static void |
8887 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) | 9087 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, |
9088 | struct cgroup *old_cgrp, struct task_struct *task) | ||
8888 | { | 9089 | { |
8889 | /* | 9090 | /* |
8890 | * cgroup_exit() is called in the copy_process() failure path. | 9091 | * cgroup_exit() is called in the copy_process() failure path. |