diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 180 |
1 files changed, 147 insertions, 33 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 0236958addcb..1a5f73c1fcdc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -600,7 +600,6 @@ struct rq { | |||
600 | /* BKL stats */ | 600 | /* BKL stats */ |
601 | unsigned int bkl_count; | 601 | unsigned int bkl_count; |
602 | #endif | 602 | #endif |
603 | struct lock_class_key rq_lock_key; | ||
604 | }; | 603 | }; |
605 | 604 | ||
606 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 605 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
@@ -809,9 +808,9 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
809 | 808 | ||
810 | /* | 809 | /* |
811 | * ratelimit for updating the group shares. | 810 | * ratelimit for updating the group shares. |
812 | * default: 0.5ms | 811 | * default: 0.25ms |
813 | */ | 812 | */ |
814 | const_debug unsigned int sysctl_sched_shares_ratelimit = 500000; | 813 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
815 | 814 | ||
816 | /* | 815 | /* |
817 | * period over which we measure -rt task cpu usage in us. | 816 | * period over which we measure -rt task cpu usage in us. |
@@ -834,7 +833,7 @@ static inline u64 global_rt_period(void) | |||
834 | 833 | ||
835 | static inline u64 global_rt_runtime(void) | 834 | static inline u64 global_rt_runtime(void) |
836 | { | 835 | { |
837 | if (sysctl_sched_rt_period < 0) | 836 | if (sysctl_sched_rt_runtime < 0) |
838 | return RUNTIME_INF; | 837 | return RUNTIME_INF; |
839 | 838 | ||
840 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | 839 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; |
@@ -2759,10 +2758,10 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) | |||
2759 | } else { | 2758 | } else { |
2760 | if (rq1 < rq2) { | 2759 | if (rq1 < rq2) { |
2761 | spin_lock(&rq1->lock); | 2760 | spin_lock(&rq1->lock); |
2762 | spin_lock(&rq2->lock); | 2761 | spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); |
2763 | } else { | 2762 | } else { |
2764 | spin_lock(&rq2->lock); | 2763 | spin_lock(&rq2->lock); |
2765 | spin_lock(&rq1->lock); | 2764 | spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); |
2766 | } | 2765 | } |
2767 | } | 2766 | } |
2768 | update_rq_clock(rq1); | 2767 | update_rq_clock(rq1); |
@@ -2805,14 +2804,21 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
2805 | if (busiest < this_rq) { | 2804 | if (busiest < this_rq) { |
2806 | spin_unlock(&this_rq->lock); | 2805 | spin_unlock(&this_rq->lock); |
2807 | spin_lock(&busiest->lock); | 2806 | spin_lock(&busiest->lock); |
2808 | spin_lock(&this_rq->lock); | 2807 | spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); |
2809 | ret = 1; | 2808 | ret = 1; |
2810 | } else | 2809 | } else |
2811 | spin_lock(&busiest->lock); | 2810 | spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); |
2812 | } | 2811 | } |
2813 | return ret; | 2812 | return ret; |
2814 | } | 2813 | } |
2815 | 2814 | ||
2815 | static void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | ||
2816 | __releases(busiest->lock) | ||
2817 | { | ||
2818 | spin_unlock(&busiest->lock); | ||
2819 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | ||
2820 | } | ||
2821 | |||
2816 | /* | 2822 | /* |
2817 | * If dest_cpu is allowed for this process, migrate the task to it. | 2823 | * If dest_cpu is allowed for this process, migrate the task to it. |
2818 | * This is accomplished by forcing the cpu_allowed mask to only | 2824 | * This is accomplished by forcing the cpu_allowed mask to only |
@@ -3637,7 +3643,7 @@ redo: | |||
3637 | ld_moved = move_tasks(this_rq, this_cpu, busiest, | 3643 | ld_moved = move_tasks(this_rq, this_cpu, busiest, |
3638 | imbalance, sd, CPU_NEWLY_IDLE, | 3644 | imbalance, sd, CPU_NEWLY_IDLE, |
3639 | &all_pinned); | 3645 | &all_pinned); |
3640 | spin_unlock(&busiest->lock); | 3646 | double_unlock_balance(this_rq, busiest); |
3641 | 3647 | ||
3642 | if (unlikely(all_pinned)) { | 3648 | if (unlikely(all_pinned)) { |
3643 | cpu_clear(cpu_of(busiest), *cpus); | 3649 | cpu_clear(cpu_of(busiest), *cpus); |
@@ -3752,7 +3758,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
3752 | else | 3758 | else |
3753 | schedstat_inc(sd, alb_failed); | 3759 | schedstat_inc(sd, alb_failed); |
3754 | } | 3760 | } |
3755 | spin_unlock(&target_rq->lock); | 3761 | double_unlock_balance(busiest_rq, target_rq); |
3756 | } | 3762 | } |
3757 | 3763 | ||
3758 | #ifdef CONFIG_NO_HZ | 3764 | #ifdef CONFIG_NO_HZ |
@@ -4173,6 +4179,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
4173 | } | 4179 | } |
4174 | 4180 | ||
4175 | /* | 4181 | /* |
4182 | * Use precise platform statistics if available: | ||
4183 | */ | ||
4184 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
4185 | cputime_t task_utime(struct task_struct *p) | ||
4186 | { | ||
4187 | return p->utime; | ||
4188 | } | ||
4189 | |||
4190 | cputime_t task_stime(struct task_struct *p) | ||
4191 | { | ||
4192 | return p->stime; | ||
4193 | } | ||
4194 | #else | ||
4195 | cputime_t task_utime(struct task_struct *p) | ||
4196 | { | ||
4197 | clock_t utime = cputime_to_clock_t(p->utime), | ||
4198 | total = utime + cputime_to_clock_t(p->stime); | ||
4199 | u64 temp; | ||
4200 | |||
4201 | /* | ||
4202 | * Use CFS's precise accounting: | ||
4203 | */ | ||
4204 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | ||
4205 | |||
4206 | if (total) { | ||
4207 | temp *= utime; | ||
4208 | do_div(temp, total); | ||
4209 | } | ||
4210 | utime = (clock_t)temp; | ||
4211 | |||
4212 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | ||
4213 | return p->prev_utime; | ||
4214 | } | ||
4215 | |||
4216 | cputime_t task_stime(struct task_struct *p) | ||
4217 | { | ||
4218 | clock_t stime; | ||
4219 | |||
4220 | /* | ||
4221 | * Use CFS's precise accounting. (we subtract utime from | ||
4222 | * the total, to make sure the total observed by userspace | ||
4223 | * grows monotonically - apps rely on that): | ||
4224 | */ | ||
4225 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
4226 | cputime_to_clock_t(task_utime(p)); | ||
4227 | |||
4228 | if (stime >= 0) | ||
4229 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | ||
4230 | |||
4231 | return p->prev_stime; | ||
4232 | } | ||
4233 | #endif | ||
4234 | |||
4235 | inline cputime_t task_gtime(struct task_struct *p) | ||
4236 | { | ||
4237 | return p->gtime; | ||
4238 | } | ||
4239 | |||
4240 | /* | ||
4176 | * This function gets called by the timer code, with HZ frequency. | 4241 | * This function gets called by the timer code, with HZ frequency. |
4177 | * We call it with interrupts disabled. | 4242 | * We call it with interrupts disabled. |
4178 | * | 4243 | * |
@@ -4663,6 +4728,52 @@ int __sched wait_for_completion_killable(struct completion *x) | |||
4663 | } | 4728 | } |
4664 | EXPORT_SYMBOL(wait_for_completion_killable); | 4729 | EXPORT_SYMBOL(wait_for_completion_killable); |
4665 | 4730 | ||
4731 | /** | ||
4732 | * try_wait_for_completion - try to decrement a completion without blocking | ||
4733 | * @x: completion structure | ||
4734 | * | ||
4735 | * Returns: 0 if a decrement cannot be done without blocking | ||
4736 | * 1 if a decrement succeeded. | ||
4737 | * | ||
4738 | * If a completion is being used as a counting completion, | ||
4739 | * attempt to decrement the counter without blocking. This | ||
4740 | * enables us to avoid waiting if the resource the completion | ||
4741 | * is protecting is not available. | ||
4742 | */ | ||
4743 | bool try_wait_for_completion(struct completion *x) | ||
4744 | { | ||
4745 | int ret = 1; | ||
4746 | |||
4747 | spin_lock_irq(&x->wait.lock); | ||
4748 | if (!x->done) | ||
4749 | ret = 0; | ||
4750 | else | ||
4751 | x->done--; | ||
4752 | spin_unlock_irq(&x->wait.lock); | ||
4753 | return ret; | ||
4754 | } | ||
4755 | EXPORT_SYMBOL(try_wait_for_completion); | ||
4756 | |||
4757 | /** | ||
4758 | * completion_done - Test to see if a completion has any waiters | ||
4759 | * @x: completion structure | ||
4760 | * | ||
4761 | * Returns: 0 if there are waiters (wait_for_completion() in progress) | ||
4762 | * 1 if there are no waiters. | ||
4763 | * | ||
4764 | */ | ||
4765 | bool completion_done(struct completion *x) | ||
4766 | { | ||
4767 | int ret = 1; | ||
4768 | |||
4769 | spin_lock_irq(&x->wait.lock); | ||
4770 | if (!x->done) | ||
4771 | ret = 0; | ||
4772 | spin_unlock_irq(&x->wait.lock); | ||
4773 | return ret; | ||
4774 | } | ||
4775 | EXPORT_SYMBOL(completion_done); | ||
4776 | |||
4666 | static long __sched | 4777 | static long __sched |
4667 | sleep_on_common(wait_queue_head_t *q, int state, long timeout) | 4778 | sleep_on_common(wait_queue_head_t *q, int state, long timeout) |
4668 | { | 4779 | { |
@@ -5004,19 +5115,21 @@ recheck: | |||
5004 | return -EPERM; | 5115 | return -EPERM; |
5005 | } | 5116 | } |
5006 | 5117 | ||
5118 | if (user) { | ||
5007 | #ifdef CONFIG_RT_GROUP_SCHED | 5119 | #ifdef CONFIG_RT_GROUP_SCHED |
5008 | /* | 5120 | /* |
5009 | * Do not allow realtime tasks into groups that have no runtime | 5121 | * Do not allow realtime tasks into groups that have no runtime |
5010 | * assigned. | 5122 | * assigned. |
5011 | */ | 5123 | */ |
5012 | if (user | 5124 | if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) |
5013 | && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) | 5125 | return -EPERM; |
5014 | return -EPERM; | ||
5015 | #endif | 5126 | #endif |
5016 | 5127 | ||
5017 | retval = security_task_setscheduler(p, policy, param); | 5128 | retval = security_task_setscheduler(p, policy, param); |
5018 | if (retval) | 5129 | if (retval) |
5019 | return retval; | 5130 | return retval; |
5131 | } | ||
5132 | |||
5020 | /* | 5133 | /* |
5021 | * make sure no PI-waiters arrive (or leave) while we are | 5134 | * make sure no PI-waiters arrive (or leave) while we are |
5022 | * changing the priority of the task: | 5135 | * changing the priority of the task: |
@@ -5732,6 +5845,8 @@ static inline void sched_init_granularity(void) | |||
5732 | sysctl_sched_latency = limit; | 5845 | sysctl_sched_latency = limit; |
5733 | 5846 | ||
5734 | sysctl_sched_wakeup_granularity *= factor; | 5847 | sysctl_sched_wakeup_granularity *= factor; |
5848 | |||
5849 | sysctl_sched_shares_ratelimit *= factor; | ||
5735 | } | 5850 | } |
5736 | 5851 | ||
5737 | #ifdef CONFIG_SMP | 5852 | #ifdef CONFIG_SMP |
@@ -7671,34 +7786,34 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | |||
7671 | } | 7786 | } |
7672 | 7787 | ||
7673 | #ifdef CONFIG_SCHED_MC | 7788 | #ifdef CONFIG_SCHED_MC |
7674 | static ssize_t sched_mc_power_savings_show(struct sys_device *dev, | 7789 | static ssize_t sched_mc_power_savings_show(struct sysdev_class *class, |
7675 | struct sysdev_attribute *attr, char *page) | 7790 | char *page) |
7676 | { | 7791 | { |
7677 | return sprintf(page, "%u\n", sched_mc_power_savings); | 7792 | return sprintf(page, "%u\n", sched_mc_power_savings); |
7678 | } | 7793 | } |
7679 | static ssize_t sched_mc_power_savings_store(struct sys_device *dev, | 7794 | static ssize_t sched_mc_power_savings_store(struct sysdev_class *class, |
7680 | struct sysdev_attribute *attr, | ||
7681 | const char *buf, size_t count) | 7795 | const char *buf, size_t count) |
7682 | { | 7796 | { |
7683 | return sched_power_savings_store(buf, count, 0); | 7797 | return sched_power_savings_store(buf, count, 0); |
7684 | } | 7798 | } |
7685 | static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, | 7799 | static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644, |
7686 | sched_mc_power_savings_store); | 7800 | sched_mc_power_savings_show, |
7801 | sched_mc_power_savings_store); | ||
7687 | #endif | 7802 | #endif |
7688 | 7803 | ||
7689 | #ifdef CONFIG_SCHED_SMT | 7804 | #ifdef CONFIG_SCHED_SMT |
7690 | static ssize_t sched_smt_power_savings_show(struct sys_device *dev, | 7805 | static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev, |
7691 | struct sysdev_attribute *attr, char *page) | 7806 | char *page) |
7692 | { | 7807 | { |
7693 | return sprintf(page, "%u\n", sched_smt_power_savings); | 7808 | return sprintf(page, "%u\n", sched_smt_power_savings); |
7694 | } | 7809 | } |
7695 | static ssize_t sched_smt_power_savings_store(struct sys_device *dev, | 7810 | static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev, |
7696 | struct sysdev_attribute *attr, | ||
7697 | const char *buf, size_t count) | 7811 | const char *buf, size_t count) |
7698 | { | 7812 | { |
7699 | return sched_power_savings_store(buf, count, 1); | 7813 | return sched_power_savings_store(buf, count, 1); |
7700 | } | 7814 | } |
7701 | static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, | 7815 | static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644, |
7816 | sched_smt_power_savings_show, | ||
7702 | sched_smt_power_savings_store); | 7817 | sched_smt_power_savings_store); |
7703 | #endif | 7818 | #endif |
7704 | 7819 | ||
@@ -7998,7 +8113,6 @@ void __init sched_init(void) | |||
7998 | 8113 | ||
7999 | rq = cpu_rq(i); | 8114 | rq = cpu_rq(i); |
8000 | spin_lock_init(&rq->lock); | 8115 | spin_lock_init(&rq->lock); |
8001 | lockdep_set_class(&rq->lock, &rq->rq_lock_key); | ||
8002 | rq->nr_running = 0; | 8116 | rq->nr_running = 0; |
8003 | init_cfs_rq(&rq->cfs, rq); | 8117 | init_cfs_rq(&rq->cfs, rq); |
8004 | init_rt_rq(&rq->rt, rq); | 8118 | init_rt_rq(&rq->rt, rq); |
@@ -8455,8 +8569,8 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
8455 | WARN_ON(!parent); /* root should already exist */ | 8569 | WARN_ON(!parent); /* root should already exist */ |
8456 | 8570 | ||
8457 | tg->parent = parent; | 8571 | tg->parent = parent; |
8458 | list_add_rcu(&tg->siblings, &parent->children); | ||
8459 | INIT_LIST_HEAD(&tg->children); | 8572 | INIT_LIST_HEAD(&tg->children); |
8573 | list_add_rcu(&tg->siblings, &parent->children); | ||
8460 | spin_unlock_irqrestore(&task_group_lock, flags); | 8574 | spin_unlock_irqrestore(&task_group_lock, flags); |
8461 | 8575 | ||
8462 | return tg; | 8576 | return tg; |