1 files changed, 132 insertions, 12 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index d601fb0406ca..ad1962dc0aa2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
        hrtimer_init(&rt_b->rt_period_timer,
                        CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        rt_b->rt_period_timer.function = sched_rt_period_timer;
-        rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+        rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 }
 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
@@ -808,9 +808,9 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
 /*
 * ratelimit for updating the group shares.
- * default: 0.5ms
+ * default: 0.25ms
 */
-const_debug unsigned int sysctl_sched_shares_ratelimit = 500000;
+unsigned int sysctl_sched_shares_ratelimit = 250000;
 /*
 * period over which we measure -rt task cpu usage in us.
@@ -1087,7 +1087,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
        return NOTIFY_DONE;
 }
-static void init_hrtick(void)
+static __init void init_hrtick(void)
 {
        hotcpu_notifier(hotplug_hrtick, 0);
 }
@@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq)
        hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        rq->hrtick_timer.function = hrtick;
-        rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+        rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 }
 #else
 static inline void hrtick_clear(struct rq *rq)
@@ -4179,6 +4179,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 }
 /*
+ * Use precise platform statistics if available:
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+cputime_t task_utime(struct task_struct *p)
+{
+        return p->utime;
+}
+cputime_t task_stime(struct task_struct *p)
+{
+        return p->stime;
+}
+#else
+cputime_t task_utime(struct task_struct *p)
+{
+        clock_t utime = cputime_to_clock_t(p->utime),
+                total = utime + cputime_to_clock_t(p->stime);
+        u64 temp;
+        /*
+         * Use CFS's precise accounting:
+         */
+        temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+        if (total) {
+                temp *= utime;
+                do_div(temp, total);
+        }
+        utime = (clock_t)temp;
+        p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
+        return p->prev_utime;
+}
+cputime_t task_stime(struct task_struct *p)
+{
+        clock_t stime;
+        /*
+         * Use CFS's precise accounting. (we subtract utime from
+         * the total, to make sure the total observed by userspace
+         * grows monotonically - apps rely on that):
+         */
+        stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
+                        cputime_to_clock_t(task_utime(p));
+        if (stime >= 0)
+                p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+        return p->prev_stime;
+}
+#endif
+inline cputime_t task_gtime(struct task_struct *p)
+{
+        return p->gtime;
+}
+/*
 * This function gets called by the timer code, with HZ frequency.
 * We call it with interrupts disabled.
 *
@@ -4669,6 +4728,52 @@ int __sched wait_for_completion_killable(struct completion *x)
 }
 EXPORT_SYMBOL(wait_for_completion_killable);
+/**
+ *      try_wait_for_completion - try to decrement a completion without blocking
+ *      @x:     completion structure
+ *
+ *      Returns: 0 if a decrement cannot be done without blocking
+ *               1 if a decrement succeeded.
+ *
+ *      If a completion is being used as a counting completion,
+ *      attempt to decrement the counter without blocking. This
+ *      enables us to avoid waiting if the resource the completion
+ *      is protecting is not available.
+ */
+bool try_wait_for_completion(struct completion *x)
+{
+        int ret = 1;
+        spin_lock_irq(&x->wait.lock);
+        if (!x->done)
+                ret = 0;
+        else
+                x->done--;
+        spin_unlock_irq(&x->wait.lock);
+        return ret;
+}
+EXPORT_SYMBOL(try_wait_for_completion);
+/**
+ *      completion_done - Test to see if a completion has any waiters
+ *      @x:     completion structure
+ *
+ *      Returns: 0 if there are waiters (wait_for_completion() in progress)
+ *               1 if there are no waiters.
+ *
+ */
+bool completion_done(struct completion *x)
+{
+        int ret = 1;
+        spin_lock_irq(&x->wait.lock);
+        if (!x->done)
+                ret = 0;
+        spin_unlock_irq(&x->wait.lock);
+        return ret;
+}
+EXPORT_SYMBOL(completion_done);
 static long __sched
 sleep_on_common(wait_queue_head_t *q, int state, long timeout)
 {
@@ -5740,6 +5845,8 @@ static inline void sched_init_granularity(void)
                sysctl_sched_latency = limit;
        sysctl_sched_wakeup_granularity *= factor;
+        sysctl_sched_shares_ratelimit *= factor;
 }
 #ifdef CONFIG_SMP
@@ -7589,24 +7696,27 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
 * and partition_sched_domains() will fallback to the single partition
 * 'fallback_doms', it also forces the domains to be rebuilt.
 *
+ * If doms_new==NULL it will be replaced with cpu_online_map.
+ * ndoms_new==0 is a special case for destroying existing domains.
+ * It will not create the default domain.
+ *
 * Call with hotplug lock held
 */
 void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
                             struct sched_domain_attr *dattr_new)
 {
-        int i, j;
+        int i, j, n;
        mutex_lock(&sched_domains_mutex);
        /* always unregister in case we don't destroy any domains */
        unregister_sched_domain_sysctl();
-        if (doms_new == NULL)
+        n = doms_new ? ndoms_new : 0;
-                ndoms_new = 0;
        /* Destroy deleted domains */
        for (i = 0; i < ndoms_cur; i++) {
-                for (j = 0; j < ndoms_new; j++) {
+                for (j = 0; j < n; j++) {
                        if (cpus_equal(doms_cur[i], doms_new[j])
                            && dattrs_equal(dattr_cur, i, dattr_new, j))
                                goto match1;
@@ -7619,7 +7729,6 @@ match1:
        if (doms_new == NULL) {
                ndoms_cur = 0;
-                ndoms_new = 1;
                doms_new = &fallback_doms;
                cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
                dattr_new = NULL;
@@ -7656,8 +7765,13 @@ match2:
 int arch_reinit_sched_domains(void)
 {
        get_online_cpus();
+        /* Destroy domains first to force the rebuild */
+        partition_sched_domains(0, NULL, NULL);
        rebuild_sched_domains();
        put_online_cpus();
        return 0;
 }
@@ -7741,7 +7855,7 @@ static int update_sched_domains(struct notifier_block *nfb,
        case CPU_ONLINE_FROZEN:
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-                partition_sched_domains(0, NULL, NULL);
+                partition_sched_domains(1, NULL, NULL);
                return NOTIFY_OK;
        default:
@@ -8462,8 +8576,8 @@ struct task_group *sched_create_group(struct task_group *parent)
        WARN_ON(!parent); /* root should already exist */
        tg->parent = parent;
-        list_add_rcu(&tg->siblings, &parent->children);
        INIT_LIST_HEAD(&tg->children);
+        list_add_rcu(&tg->siblings, &parent->children);
        spin_unlock_irqrestore(&task_group_lock, flags);
        return tg;
@@ -8795,6 +8909,9 @@ static int sched_rt_global_constraints(void)
        u64 rt_runtime, rt_period;
        int ret = 0;
+        if (sysctl_sched_rt_period <= 0)
+                return -EINVAL;
        rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
        rt_runtime = tg->rt_bandwidth.rt_runtime;
@@ -8811,6 +8928,9 @@ static int sched_rt_global_constraints(void)
        unsigned long flags;
        int i;
+        if (sysctl_sched_rt_period <= 0)
+                return -EINVAL;
        spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
        for_each_possible_cpu(i) {
                struct rt_rq *rt_rq = &cpu_rq(i)->rt;

diff --git a/kernel/sched.c b/kernel/sched.c index d601fb0406ca..ad1962dc0aa2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
201	hrtimer_init(&rt_b->rt_period_timer,	201	hrtimer_init(&rt_b->rt_period_timer,
202	CLOCK_MONOTONIC, HRTIMER_MODE_REL);	202	CLOCK_MONOTONIC, HRTIMER_MODE_REL);
203	rt_b->rt_period_timer.function = sched_rt_period_timer;	203	rt_b->rt_period_timer.function = sched_rt_period_timer;
204	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;	204	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
205	}	205	}
206		206
207	static void start_rt_bandwidth(struct rt_bandwidth *rt_b)	207	static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
@@ -808,9 +808,9 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
808		808
809	/*	809	/*
810	* ratelimit for updating the group shares.	810	* ratelimit for updating the group shares.
811	* default: 0.5ms	811	* default: 0.25ms
812	*/	812	*/
813	const_debug unsigned int sysctl_sched_shares_ratelimit = 500000;	813	unsigned int sysctl_sched_shares_ratelimit = 250000;
814		814
815	/*	815	/*
816	* period over which we measure -rt task cpu usage in us.	816	* period over which we measure -rt task cpu usage in us.
@@ -1087,7 +1087,7 @@ hotplug_hrtick(struct notifier_block nfb, unsigned long action, void hcpu)
1087	return NOTIFY_DONE;	1087	return NOTIFY_DONE;
1088	}	1088	}
1089		1089
1090	static void init_hrtick(void)	1090	static __init void init_hrtick(void)
1091	{	1091	{
1092	hotcpu_notifier(hotplug_hrtick, 0);	1092	hotcpu_notifier(hotplug_hrtick, 0);
1093	}	1093	}
@@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq)
1119		1119
1120	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	1120	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1121	rq->hrtick_timer.function = hrtick;	1121	rq->hrtick_timer.function = hrtick;
1122	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;	1122	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
1123	}	1123	}
1124	#else	1124	#else
1125	static inline void hrtick_clear(struct rq *rq)	1125	static inline void hrtick_clear(struct rq *rq)
@@ -4179,6 +4179,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
4179	}	4179	}
4180		4180
4181	/*	4181	/*
		4182	* Use precise platform statistics if available:
		4183	*/
		4184	#ifdef CONFIG_VIRT_CPU_ACCOUNTING
		4185	cputime_t task_utime(struct task_struct *p)
		4186	{
		4187	return p->utime;
		4188	}
		4189
		4190	cputime_t task_stime(struct task_struct *p)
		4191	{
		4192	return p->stime;
		4193	}
		4194	#else
		4195	cputime_t task_utime(struct task_struct *p)
		4196	{
		4197	clock_t utime = cputime_to_clock_t(p->utime),
		4198	total = utime + cputime_to_clock_t(p->stime);
		4199	u64 temp;
		4200
		4201	/*
		4202	* Use CFS's precise accounting:
		4203	*/
		4204	temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
		4205
		4206	if (total) {
		4207	temp *= utime;
		4208	do_div(temp, total);
		4209	}
		4210	utime = (clock_t)temp;
		4211
		4212	p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
		4213	return p->prev_utime;
		4214	}
		4215
		4216	cputime_t task_stime(struct task_struct *p)
		4217	{
		4218	clock_t stime;
		4219
		4220	/*
		4221	* Use CFS's precise accounting. (we subtract utime from
		4222	* the total, to make sure the total observed by userspace
		4223	* grows monotonically - apps rely on that):
		4224	*/
		4225	stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
		4226	cputime_to_clock_t(task_utime(p));
		4227
		4228	if (stime >= 0)
		4229	p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
		4230
		4231	return p->prev_stime;
		4232	}
		4233	#endif
		4234
		4235	inline cputime_t task_gtime(struct task_struct *p)
		4236	{
		4237	return p->gtime;
		4238	}
		4239
		4240	/*
4182	* This function gets called by the timer code, with HZ frequency.	4241	* This function gets called by the timer code, with HZ frequency.
4183	* We call it with interrupts disabled.	4242	* We call it with interrupts disabled.
4184	*	4243	*
@@ -4669,6 +4728,52 @@ int __sched wait_for_completion_killable(struct completion *x)
4669	}	4728	}
4670	EXPORT_SYMBOL(wait_for_completion_killable);	4729	EXPORT_SYMBOL(wait_for_completion_killable);
4671		4730
		4731	/**
		4732	* try_wait_for_completion - try to decrement a completion without blocking
		4733	* @x: completion structure
		4734	*
		4735	* Returns: 0 if a decrement cannot be done without blocking
		4736	* 1 if a decrement succeeded.
		4737	*
		4738	* If a completion is being used as a counting completion,
		4739	* attempt to decrement the counter without blocking. This
		4740	* enables us to avoid waiting if the resource the completion
		4741	* is protecting is not available.
		4742	*/
		4743	bool try_wait_for_completion(struct completion *x)
		4744	{
		4745	int ret = 1;
		4746
		4747	spin_lock_irq(&x->wait.lock);
		4748	if (!x->done)
		4749	ret = 0;
		4750	else
		4751	x->done--;
		4752	spin_unlock_irq(&x->wait.lock);
		4753	return ret;
		4754	}
		4755	EXPORT_SYMBOL(try_wait_for_completion);
		4756
		4757	/**
		4758	* completion_done - Test to see if a completion has any waiters
		4759	* @x: completion structure
		4760	*
		4761	* Returns: 0 if there are waiters (wait_for_completion() in progress)
		4762	* 1 if there are no waiters.
		4763	*
		4764	*/
		4765	bool completion_done(struct completion *x)
		4766	{
		4767	int ret = 1;
		4768
		4769	spin_lock_irq(&x->wait.lock);
		4770	if (!x->done)
		4771	ret = 0;
		4772	spin_unlock_irq(&x->wait.lock);
		4773	return ret;
		4774	}
		4775	EXPORT_SYMBOL(completion_done);
		4776
4672	static long __sched	4777	static long __sched
4673	sleep_on_common(wait_queue_head_t *q, int state, long timeout)	4778	sleep_on_common(wait_queue_head_t *q, int state, long timeout)
4674	{	4779	{
@@ -5740,6 +5845,8 @@ static inline void sched_init_granularity(void)
5740	sysctl_sched_latency = limit;	5845	sysctl_sched_latency = limit;
5741		5846
5742	sysctl_sched_wakeup_granularity *= factor;	5847	sysctl_sched_wakeup_granularity *= factor;
		5848
		5849	sysctl_sched_shares_ratelimit *= factor;
5743	}	5850	}
5744		5851
5745	#ifdef CONFIG_SMP	5852	#ifdef CONFIG_SMP
@@ -7589,24 +7696,27 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7589	* and partition_sched_domains() will fallback to the single partition	7696	* and partition_sched_domains() will fallback to the single partition
7590	* 'fallback_doms', it also forces the domains to be rebuilt.	7697	* 'fallback_doms', it also forces the domains to be rebuilt.
7591	*	7698	*
		7699	* If doms_new==NULL it will be replaced with cpu_online_map.
		7700	* ndoms_new==0 is a special case for destroying existing domains.
		7701	* It will not create the default domain.
		7702	*
7592	* Call with hotplug lock held	7703	* Call with hotplug lock held
7593	*/	7704	*/
7594	void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,	7705	void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7595	struct sched_domain_attr *dattr_new)	7706	struct sched_domain_attr *dattr_new)
7596	{	7707	{
7597	int i, j;	7708	int i, j, n;
7598		7709
7599	mutex_lock(&sched_domains_mutex);	7710	mutex_lock(&sched_domains_mutex);
7600		7711
7601	/* always unregister in case we don't destroy any domains */	7712	/* always unregister in case we don't destroy any domains */
7602	unregister_sched_domain_sysctl();	7713	unregister_sched_domain_sysctl();
7603		7714
7604	if (doms_new == NULL)	7715	n = doms_new ? ndoms_new : 0;
7605	ndoms_new = 0;
7606		7716
7607	/* Destroy deleted domains */	7717	/* Destroy deleted domains */
7608	for (i = 0; i < ndoms_cur; i++) {	7718	for (i = 0; i < ndoms_cur; i++) {
7609	for (j = 0; j < ndoms_new; j++) {	7719	for (j = 0; j < n; j++) {
7610	if (cpus_equal(doms_cur[i], doms_new[j])	7720	if (cpus_equal(doms_cur[i], doms_new[j])
7611	&& dattrs_equal(dattr_cur, i, dattr_new, j))	7721	&& dattrs_equal(dattr_cur, i, dattr_new, j))
7612	goto match1;	7722	goto match1;
@@ -7619,7 +7729,6 @@ match1:
7619		7729
7620	if (doms_new == NULL) {	7730	if (doms_new == NULL) {
7621	ndoms_cur = 0;	7731	ndoms_cur = 0;
7622	ndoms_new = 1;
7623	doms_new = &fallback_doms;	7732	doms_new = &fallback_doms;
7624	cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);	7733	cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7625	dattr_new = NULL;	7734	dattr_new = NULL;
@@ -7656,8 +7765,13 @@ match2:
7656	int arch_reinit_sched_domains(void)	7765	int arch_reinit_sched_domains(void)
7657	{	7766	{
7658	get_online_cpus();	7767	get_online_cpus();
		7768
		7769	/* Destroy domains first to force the rebuild */
		7770	partition_sched_domains(0, NULL, NULL);
		7771
7659	rebuild_sched_domains();	7772	rebuild_sched_domains();
7660	put_online_cpus();	7773	put_online_cpus();
		7774
7661	return 0;	7775	return 0;
7662	}	7776	}
7663		7777
@@ -7741,7 +7855,7 @@ static int update_sched_domains(struct notifier_block *nfb,
7741	case CPU_ONLINE_FROZEN:	7855	case CPU_ONLINE_FROZEN:
7742	case CPU_DEAD:	7856	case CPU_DEAD:
7743	case CPU_DEAD_FROZEN:	7857	case CPU_DEAD_FROZEN:
7744	partition_sched_domains(0, NULL, NULL);	7858	partition_sched_domains(1, NULL, NULL);
7745	return NOTIFY_OK;	7859	return NOTIFY_OK;
7746		7860
7747	default:	7861	default:
@@ -8462,8 +8576,8 @@ struct task_group sched_create_group(struct task_group parent)
8462	WARN_ON(!parent); /* root should already exist */	8576	WARN_ON(!parent); /* root should already exist */
8463		8577
8464	tg->parent = parent;	8578	tg->parent = parent;
8465	list_add_rcu(&tg->siblings, &parent->children);
8466	INIT_LIST_HEAD(&tg->children);	8579	INIT_LIST_HEAD(&tg->children);
		8580	list_add_rcu(&tg->siblings, &parent->children);
8467	spin_unlock_irqrestore(&task_group_lock, flags);	8581	spin_unlock_irqrestore(&task_group_lock, flags);
8468		8582
8469	return tg;	8583	return tg;
@@ -8795,6 +8909,9 @@ static int sched_rt_global_constraints(void)
8795	u64 rt_runtime, rt_period;	8909	u64 rt_runtime, rt_period;
8796	int ret = 0;	8910	int ret = 0;
8797		8911
		8912	if (sysctl_sched_rt_period <= 0)
		8913	return -EINVAL;
		8914
8798	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);	8915	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
8799	rt_runtime = tg->rt_bandwidth.rt_runtime;	8916	rt_runtime = tg->rt_bandwidth.rt_runtime;
8800		8917
@@ -8811,6 +8928,9 @@ static int sched_rt_global_constraints(void)
8811	unsigned long flags;	8928	unsigned long flags;
8812	int i;	8929	int i;
8813		8930
		8931	if (sysctl_sched_rt_period <= 0)
		8932	return -EINVAL;
		8933
8814	spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);	8934	spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
8815	for_each_possible_cpu(i) {	8935	for_each_possible_cpu(i) {
8816	struct rt_rq *rt_rq = &cpu_rq(i)->rt;	8936	struct rt_rq *rt_rq = &cpu_rq(i)->rt;