diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 144 |
1 files changed, 132 insertions, 12 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index d601fb0406ca..ad1962dc0aa2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |||
| 201 | hrtimer_init(&rt_b->rt_period_timer, | 201 | hrtimer_init(&rt_b->rt_period_timer, |
| 202 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 202 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| 203 | rt_b->rt_period_timer.function = sched_rt_period_timer; | 203 | rt_b->rt_period_timer.function = sched_rt_period_timer; |
| 204 | rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 204 | rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; |
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | 207 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) |
| @@ -808,9 +808,9 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
| 808 | 808 | ||
| 809 | /* | 809 | /* |
| 810 | * ratelimit for updating the group shares. | 810 | * ratelimit for updating the group shares. |
| 811 | * default: 0.5ms | 811 | * default: 0.25ms |
| 812 | */ | 812 | */ |
| 813 | const_debug unsigned int sysctl_sched_shares_ratelimit = 500000; | 813 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
| 814 | 814 | ||
| 815 | /* | 815 | /* |
| 816 | * period over which we measure -rt task cpu usage in us. | 816 | * period over which we measure -rt task cpu usage in us. |
| @@ -1087,7 +1087,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 1087 | return NOTIFY_DONE; | 1087 | return NOTIFY_DONE; |
| 1088 | } | 1088 | } |
| 1089 | 1089 | ||
| 1090 | static void init_hrtick(void) | 1090 | static __init void init_hrtick(void) |
| 1091 | { | 1091 | { |
| 1092 | hotcpu_notifier(hotplug_hrtick, 0); | 1092 | hotcpu_notifier(hotplug_hrtick, 0); |
| 1093 | } | 1093 | } |
| @@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq) | |||
| 1119 | 1119 | ||
| 1120 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 1120 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| 1121 | rq->hrtick_timer.function = hrtick; | 1121 | rq->hrtick_timer.function = hrtick; |
| 1122 | rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 1122 | rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; |
| 1123 | } | 1123 | } |
| 1124 | #else | 1124 | #else |
| 1125 | static inline void hrtick_clear(struct rq *rq) | 1125 | static inline void hrtick_clear(struct rq *rq) |
| @@ -4179,6 +4179,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
| 4179 | } | 4179 | } |
| 4180 | 4180 | ||
| 4181 | /* | 4181 | /* |
| 4182 | * Use precise platform statistics if available: | ||
| 4183 | */ | ||
| 4184 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 4185 | cputime_t task_utime(struct task_struct *p) | ||
| 4186 | { | ||
| 4187 | return p->utime; | ||
| 4188 | } | ||
| 4189 | |||
| 4190 | cputime_t task_stime(struct task_struct *p) | ||
| 4191 | { | ||
| 4192 | return p->stime; | ||
| 4193 | } | ||
| 4194 | #else | ||
| 4195 | cputime_t task_utime(struct task_struct *p) | ||
| 4196 | { | ||
| 4197 | clock_t utime = cputime_to_clock_t(p->utime), | ||
| 4198 | total = utime + cputime_to_clock_t(p->stime); | ||
| 4199 | u64 temp; | ||
| 4200 | |||
| 4201 | /* | ||
| 4202 | * Use CFS's precise accounting: | ||
| 4203 | */ | ||
| 4204 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | ||
| 4205 | |||
| 4206 | if (total) { | ||
| 4207 | temp *= utime; | ||
| 4208 | do_div(temp, total); | ||
| 4209 | } | ||
| 4210 | utime = (clock_t)temp; | ||
| 4211 | |||
| 4212 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | ||
| 4213 | return p->prev_utime; | ||
| 4214 | } | ||
| 4215 | |||
| 4216 | cputime_t task_stime(struct task_struct *p) | ||
| 4217 | { | ||
| 4218 | clock_t stime; | ||
| 4219 | |||
| 4220 | /* | ||
| 4221 | * Use CFS's precise accounting. (we subtract utime from | ||
| 4222 | * the total, to make sure the total observed by userspace | ||
| 4223 | * grows monotonically - apps rely on that): | ||
| 4224 | */ | ||
| 4225 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
| 4226 | cputime_to_clock_t(task_utime(p)); | ||
| 4227 | |||
| 4228 | if (stime >= 0) | ||
| 4229 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | ||
| 4230 | |||
| 4231 | return p->prev_stime; | ||
| 4232 | } | ||
| 4233 | #endif | ||
| 4234 | |||
| 4235 | inline cputime_t task_gtime(struct task_struct *p) | ||
| 4236 | { | ||
| 4237 | return p->gtime; | ||
| 4238 | } | ||
| 4239 | |||
| 4240 | /* | ||
| 4182 | * This function gets called by the timer code, with HZ frequency. | 4241 | * This function gets called by the timer code, with HZ frequency. |
| 4183 | * We call it with interrupts disabled. | 4242 | * We call it with interrupts disabled. |
| 4184 | * | 4243 | * |
| @@ -4669,6 +4728,52 @@ int __sched wait_for_completion_killable(struct completion *x) | |||
| 4669 | } | 4728 | } |
| 4670 | EXPORT_SYMBOL(wait_for_completion_killable); | 4729 | EXPORT_SYMBOL(wait_for_completion_killable); |
| 4671 | 4730 | ||
| 4731 | /** | ||
| 4732 | * try_wait_for_completion - try to decrement a completion without blocking | ||
| 4733 | * @x: completion structure | ||
| 4734 | * | ||
| 4735 | * Returns: 0 if a decrement cannot be done without blocking | ||
| 4736 | * 1 if a decrement succeeded. | ||
| 4737 | * | ||
| 4738 | * If a completion is being used as a counting completion, | ||
| 4739 | * attempt to decrement the counter without blocking. This | ||
| 4740 | * enables us to avoid waiting if the resource the completion | ||
| 4741 | * is protecting is not available. | ||
| 4742 | */ | ||
| 4743 | bool try_wait_for_completion(struct completion *x) | ||
| 4744 | { | ||
| 4745 | int ret = 1; | ||
| 4746 | |||
| 4747 | spin_lock_irq(&x->wait.lock); | ||
| 4748 | if (!x->done) | ||
| 4749 | ret = 0; | ||
| 4750 | else | ||
| 4751 | x->done--; | ||
| 4752 | spin_unlock_irq(&x->wait.lock); | ||
| 4753 | return ret; | ||
| 4754 | } | ||
| 4755 | EXPORT_SYMBOL(try_wait_for_completion); | ||
| 4756 | |||
| 4757 | /** | ||
| 4758 | * completion_done - Test to see if a completion has any waiters | ||
| 4759 | * @x: completion structure | ||
| 4760 | * | ||
| 4761 | * Returns: 0 if there are waiters (wait_for_completion() in progress) | ||
| 4762 | * 1 if there are no waiters. | ||
| 4763 | * | ||
| 4764 | */ | ||
| 4765 | bool completion_done(struct completion *x) | ||
| 4766 | { | ||
| 4767 | int ret = 1; | ||
| 4768 | |||
| 4769 | spin_lock_irq(&x->wait.lock); | ||
| 4770 | if (!x->done) | ||
| 4771 | ret = 0; | ||
| 4772 | spin_unlock_irq(&x->wait.lock); | ||
| 4773 | return ret; | ||
| 4774 | } | ||
| 4775 | EXPORT_SYMBOL(completion_done); | ||
| 4776 | |||
| 4672 | static long __sched | 4777 | static long __sched |
| 4673 | sleep_on_common(wait_queue_head_t *q, int state, long timeout) | 4778 | sleep_on_common(wait_queue_head_t *q, int state, long timeout) |
| 4674 | { | 4779 | { |
| @@ -5740,6 +5845,8 @@ static inline void sched_init_granularity(void) | |||
| 5740 | sysctl_sched_latency = limit; | 5845 | sysctl_sched_latency = limit; |
| 5741 | 5846 | ||
| 5742 | sysctl_sched_wakeup_granularity *= factor; | 5847 | sysctl_sched_wakeup_granularity *= factor; |
| 5848 | |||
| 5849 | sysctl_sched_shares_ratelimit *= factor; | ||
| 5743 | } | 5850 | } |
| 5744 | 5851 | ||
| 5745 | #ifdef CONFIG_SMP | 5852 | #ifdef CONFIG_SMP |
| @@ -7589,24 +7696,27 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
| 7589 | * and partition_sched_domains() will fallback to the single partition | 7696 | * and partition_sched_domains() will fallback to the single partition |
| 7590 | * 'fallback_doms', it also forces the domains to be rebuilt. | 7697 | * 'fallback_doms', it also forces the domains to be rebuilt. |
| 7591 | * | 7698 | * |
| 7699 | * If doms_new==NULL it will be replaced with cpu_online_map. | ||
| 7700 | * ndoms_new==0 is a special case for destroying existing domains. | ||
| 7701 | * It will not create the default domain. | ||
| 7702 | * | ||
| 7592 | * Call with hotplug lock held | 7703 | * Call with hotplug lock held |
| 7593 | */ | 7704 | */ |
| 7594 | void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, | 7705 | void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, |
| 7595 | struct sched_domain_attr *dattr_new) | 7706 | struct sched_domain_attr *dattr_new) |
| 7596 | { | 7707 | { |
| 7597 | int i, j; | 7708 | int i, j, n; |
| 7598 | 7709 | ||
| 7599 | mutex_lock(&sched_domains_mutex); | 7710 | mutex_lock(&sched_domains_mutex); |
| 7600 | 7711 | ||
| 7601 | /* always unregister in case we don't destroy any domains */ | 7712 | /* always unregister in case we don't destroy any domains */ |
| 7602 | unregister_sched_domain_sysctl(); | 7713 | unregister_sched_domain_sysctl(); |
| 7603 | 7714 | ||
| 7604 | if (doms_new == NULL) | 7715 | n = doms_new ? ndoms_new : 0; |
| 7605 | ndoms_new = 0; | ||
| 7606 | 7716 | ||
| 7607 | /* Destroy deleted domains */ | 7717 | /* Destroy deleted domains */ |
| 7608 | for (i = 0; i < ndoms_cur; i++) { | 7718 | for (i = 0; i < ndoms_cur; i++) { |
| 7609 | for (j = 0; j < ndoms_new; j++) { | 7719 | for (j = 0; j < n; j++) { |
| 7610 | if (cpus_equal(doms_cur[i], doms_new[j]) | 7720 | if (cpus_equal(doms_cur[i], doms_new[j]) |
| 7611 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | 7721 | && dattrs_equal(dattr_cur, i, dattr_new, j)) |
| 7612 | goto match1; | 7722 | goto match1; |
| @@ -7619,7 +7729,6 @@ match1: | |||
| 7619 | 7729 | ||
| 7620 | if (doms_new == NULL) { | 7730 | if (doms_new == NULL) { |
| 7621 | ndoms_cur = 0; | 7731 | ndoms_cur = 0; |
| 7622 | ndoms_new = 1; | ||
| 7623 | doms_new = &fallback_doms; | 7732 | doms_new = &fallback_doms; |
| 7624 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); | 7733 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); |
| 7625 | dattr_new = NULL; | 7734 | dattr_new = NULL; |
| @@ -7656,8 +7765,13 @@ match2: | |||
| 7656 | int arch_reinit_sched_domains(void) | 7765 | int arch_reinit_sched_domains(void) |
| 7657 | { | 7766 | { |
| 7658 | get_online_cpus(); | 7767 | get_online_cpus(); |
| 7768 | |||
| 7769 | /* Destroy domains first to force the rebuild */ | ||
| 7770 | partition_sched_domains(0, NULL, NULL); | ||
| 7771 | |||
| 7659 | rebuild_sched_domains(); | 7772 | rebuild_sched_domains(); |
| 7660 | put_online_cpus(); | 7773 | put_online_cpus(); |
| 7774 | |||
| 7661 | return 0; | 7775 | return 0; |
| 7662 | } | 7776 | } |
| 7663 | 7777 | ||
| @@ -7741,7 +7855,7 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
| 7741 | case CPU_ONLINE_FROZEN: | 7855 | case CPU_ONLINE_FROZEN: |
| 7742 | case CPU_DEAD: | 7856 | case CPU_DEAD: |
| 7743 | case CPU_DEAD_FROZEN: | 7857 | case CPU_DEAD_FROZEN: |
| 7744 | partition_sched_domains(0, NULL, NULL); | 7858 | partition_sched_domains(1, NULL, NULL); |
| 7745 | return NOTIFY_OK; | 7859 | return NOTIFY_OK; |
| 7746 | 7860 | ||
| 7747 | default: | 7861 | default: |
| @@ -8462,8 +8576,8 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
| 8462 | WARN_ON(!parent); /* root should already exist */ | 8576 | WARN_ON(!parent); /* root should already exist */ |
| 8463 | 8577 | ||
| 8464 | tg->parent = parent; | 8578 | tg->parent = parent; |
| 8465 | list_add_rcu(&tg->siblings, &parent->children); | ||
| 8466 | INIT_LIST_HEAD(&tg->children); | 8579 | INIT_LIST_HEAD(&tg->children); |
| 8580 | list_add_rcu(&tg->siblings, &parent->children); | ||
| 8467 | spin_unlock_irqrestore(&task_group_lock, flags); | 8581 | spin_unlock_irqrestore(&task_group_lock, flags); |
| 8468 | 8582 | ||
| 8469 | return tg; | 8583 | return tg; |
| @@ -8795,6 +8909,9 @@ static int sched_rt_global_constraints(void) | |||
| 8795 | u64 rt_runtime, rt_period; | 8909 | u64 rt_runtime, rt_period; |
| 8796 | int ret = 0; | 8910 | int ret = 0; |
| 8797 | 8911 | ||
| 8912 | if (sysctl_sched_rt_period <= 0) | ||
| 8913 | return -EINVAL; | ||
| 8914 | |||
| 8798 | rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); | 8915 | rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); |
| 8799 | rt_runtime = tg->rt_bandwidth.rt_runtime; | 8916 | rt_runtime = tg->rt_bandwidth.rt_runtime; |
| 8800 | 8917 | ||
| @@ -8811,6 +8928,9 @@ static int sched_rt_global_constraints(void) | |||
| 8811 | unsigned long flags; | 8928 | unsigned long flags; |
| 8812 | int i; | 8929 | int i; |
| 8813 | 8930 | ||
| 8931 | if (sysctl_sched_rt_period <= 0) | ||
| 8932 | return -EINVAL; | ||
| 8933 | |||
| 8814 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 8934 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
| 8815 | for_each_possible_cpu(i) { | 8935 | for_each_possible_cpu(i) { |
| 8816 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; | 8936 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; |
