1 files changed, 33 insertions, 79 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d325c4b2dcbb..649c9f876cb1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 # define nsecs_to_cputime(__nsecs)      nsecs_to_jiffies(__nsecs)
 #endif
+static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
+{
+        u64 temp = (__force u64) rtime;
+        temp *= (__force u64) utime;
+        if (sizeof(cputime_t) == 4)
+                temp = div_u64(temp, (__force u32) total);
+        else
+                temp = div64_u64(temp, (__force u64) total);
+        return (__force cputime_t) temp;
+}
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
        cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
         */
        rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
-        if (total) {
+        if (total)
-                u64 temp = (__force u64) rtime;
+                utime = scale_utime(utime, rtime, total);
+        else
-                temp *= (__force u64) utime;
-                do_div(temp, (__force u32) total);
-                utime = (__force cputime_t) temp;
-        } else
                utime = rtime;
        /*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
        total = cputime.utime + cputime.stime;
        rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
-        if (total) {
+        if (total)
-                u64 temp = (__force u64) rtime;
+                utime = scale_utime(cputime.utime, rtime, total);
+        else
-                temp *= (__force u64) cputime.utime;
-                do_div(temp, (__force u32) total);
-                utime = (__force cputime_t) temp;
-        } else
                utime = rtime;
        sig->prev_utime = max(sig->prev_utime, utime);
@@ -4340,9 +4346,7 @@ recheck:
         */
        if (unlikely(policy == p->policy && (!rt_policy(policy) ||
                        param->sched_priority == p->rt_priority))) {
+                task_rq_unlock(rq, p, &flags);
-                __task_rq_unlock(rq);
-                raw_spin_unlock_irqrestore(&p->pi_lock, flags);
                return 0;
        }
@@ -5300,27 +5304,17 @@ void idle_task_exit(void)
 }
 /*
- * While a dead CPU has no uninterruptible tasks queued at this point,
+ * Since this CPU is going 'away' for a while, fold any nr_active delta
- * it might still have a nonzero ->nr_uninterruptible counter, because
+ * we might have. Assumes we're called after migrate_tasks() so that the
- * for performance reasons the counter is not stricly tracking tasks to
+ * nr_active count is stable.
- * their home CPUs. So we just add the counter to another CPU's counter,
+ *
- * to keep the global sum constant after CPU-down:
+ * Also see the comment "Global load-average calculations".
- */
-static void migrate_nr_uninterruptible(struct rq *rq_src)
-{
-        struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
-        rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
-        rq_src->nr_uninterruptible = 0;
-}
-/*
- * remove the tasks which were accounted by rq from calc_load_tasks.
 */
-static void calc_global_load_remove(struct rq *rq)
+static void calc_load_migrate(struct rq *rq)
 {
-        atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+        long delta = calc_load_fold_active(rq);
-        rq->calc_load_active = 0;
+        if (delta)
+                atomic_long_add(delta, &calc_load_tasks);
 }
 /*
@@ -5348,9 +5342,6 @@ static void migrate_tasks(unsigned int dead_cpu)
         */
        rq->stop = NULL;
-        /* Ensure any throttled groups are reachable by pick_next_task */
-        unthrottle_offline_cfs_rqs(rq);
        for ( ; ; ) {
                /*
                 * There's this thread running, bail when that's the only
@@ -5614,8 +5605,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                BUG_ON(rq->nr_running != 1); /* the migration thread */
                raw_spin_unlock_irqrestore(&rq->lock, flags);
-                migrate_nr_uninterruptible(rq);
+                calc_load_migrate(rq);
-                calc_global_load_remove(rq);
                break;
 #endif
        }
@@ -6024,11 +6014,6 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
 * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
 * allows us to avoid some pointer chasing select_idle_sibling().
 *
- * Iterate domains and sched_groups downward, assigning CPUs to be
- * select_idle_sibling() hw buddy.  Cross-wiring hw makes bouncing
- * due to random perturbation self canceling, ie sw buddies pull
- * their counterpart to their CPU's hw counterpart.
- *
 * Also keep a unique ID per domain (we use the first cpu number in
 * the cpumask of the domain), this allows us to quickly tell if
 * two cpus are in the same cache domain, see cpus_share_cache().
@@ -6042,40 +6027,8 @@ static void update_top_cache_domain(int cpu)
        int id = cpu;
        sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
-        if (sd) {
+        if (sd)
-                struct sched_domain *tmp = sd;
-                struct sched_group *sg, *prev;
-                bool right;
-                /*
-                 * Traverse to first CPU in group, and count hops
-                 * to cpu from there, switching direction on each
-                 * hop, never ever pointing the last CPU rightward.
-                 */
-                do {
-                        id = cpumask_first(sched_domain_span(tmp));
-                        prev = sg = tmp->groups;
-                        right = 1;
-                        while (cpumask_first(sched_group_cpus(sg)) != id)
-                                sg = sg->next;
-                        while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) {
-                                prev = sg;
-                                sg = sg->next;
-                                right = !right;
-                        }
-                        /* A CPU went down, never point back to domain start. */
-                        if (right && cpumask_first(sched_group_cpus(sg->next)) == id)
-                                right = false;
-                        sg = right ? sg->next : prev;
-                        tmp->idle_buddy = cpumask_first(sched_group_cpus(sg));
-                } while ((tmp = tmp->child));
                id = cpumask_first(sched_domain_span(sd));
-        }
        rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
        per_cpu(sd_llc_id, cpu) = id;
@@ -7248,6 +7201,7 @@ int in_sched_functions(unsigned long addr)
 #ifdef CONFIG_CGROUP_SCHED
 struct task_group root_task_group;
+LIST_HEAD(task_groups);
 #endif
 DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);

diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d325c4b2dcbb..649c9f876cb1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct p, cputime_t ut, cputime_t *st)
3142	# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)	3142	# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
3143	#endif	3143	#endif
3144		3144
		3145	static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
		3146	{
		3147	u64 temp = (__force u64) rtime;
		3148
		3149	temp *= (__force u64) utime;
		3150
		3151	if (sizeof(cputime_t) == 4)
		3152	temp = div_u64(temp, (__force u32) total);
		3153	else
		3154	temp = div64_u64(temp, (__force u64) total);
		3155
		3156	return (__force cputime_t) temp;
		3157	}
		3158
3145	void task_times(struct task_struct p, cputime_t ut, cputime_t *st)	3159	void task_times(struct task_struct p, cputime_t ut, cputime_t *st)
3146	{	3160	{
3147	cputime_t rtime, utime = p->utime, total = utime + p->stime;	3161	cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct p, cputime_t ut, cputime_t *st)
3151	*/	3165	*/
3152	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);	3166	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
3153		3167
3154	if (total) {	3168	if (total)
3155	u64 temp = (__force u64) rtime;	3169	utime = scale_utime(utime, rtime, total);
3156		3170	else
3157	temp *= (__force u64) utime;
3158	do_div(temp, (__force u32) total);
3159	utime = (__force cputime_t) temp;
3160	} else
3161	utime = rtime;	3171	utime = rtime;
3162		3172
3163	/*	3173	/*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct p, cputime_t ut, cputime_t *st)
3184	total = cputime.utime + cputime.stime;	3194	total = cputime.utime + cputime.stime;
3185	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);	3195	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
3186		3196
3187	if (total) {	3197	if (total)
3188	u64 temp = (__force u64) rtime;	3198	utime = scale_utime(cputime.utime, rtime, total);
3189		3199	else
3190	temp *= (__force u64) cputime.utime;
3191	do_div(temp, (__force u32) total);
3192	utime = (__force cputime_t) temp;
3193	} else
3194	utime = rtime;	3200	utime = rtime;
3195		3201
3196	sig->prev_utime = max(sig->prev_utime, utime);	3202	sig->prev_utime = max(sig->prev_utime, utime);
@@ -4340,9 +4346,7 @@ recheck:
4340	*/	4346	*/
4341	if (unlikely(policy == p->policy && (!rt_policy(policy) \|\|	4347	if (unlikely(policy == p->policy && (!rt_policy(policy) \|\|
4342	param->sched_priority == p->rt_priority))) {	4348	param->sched_priority == p->rt_priority))) {
4343		4349	task_rq_unlock(rq, p, &flags);
4344	__task_rq_unlock(rq);
4345	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4346	return 0;	4350	return 0;
4347	}	4351	}
4348		4352
@@ -5300,27 +5304,17 @@ void idle_task_exit(void)
5300	}	5304	}
5301		5305
5302	/*	5306	/*
5303	* While a dead CPU has no uninterruptible tasks queued at this point,	5307	* Since this CPU is going 'away' for a while, fold any nr_active delta
5304	* it might still have a nonzero ->nr_uninterruptible counter, because	5308	* we might have. Assumes we're called after migrate_tasks() so that the
5305	* for performance reasons the counter is not stricly tracking tasks to	5309	* nr_active count is stable.
5306	* their home CPUs. So we just add the counter to another CPU's counter,	5310	*
5307	* to keep the global sum constant after CPU-down:	5311	* Also see the comment "Global load-average calculations".
5308	*/
5309	static void migrate_nr_uninterruptible(struct rq *rq_src)
5310	{
5311	struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
5312
5313	rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
5314	rq_src->nr_uninterruptible = 0;
5315	}
5316
5317	/*
5318	* remove the tasks which were accounted by rq from calc_load_tasks.
5319	*/	5312	*/
5320	static void calc_global_load_remove(struct rq *rq)	5313	static void calc_load_migrate(struct rq *rq)
5321	{	5314	{
5322	atomic_long_sub(rq->calc_load_active, &calc_load_tasks);	5315	long delta = calc_load_fold_active(rq);
5323	rq->calc_load_active = 0;	5316	if (delta)
		5317	atomic_long_add(delta, &calc_load_tasks);
5324	}	5318	}
5325		5319
5326	/*	5320	/*
@@ -5348,9 +5342,6 @@ static void migrate_tasks(unsigned int dead_cpu)
5348	*/	5342	*/
5349	rq->stop = NULL;	5343	rq->stop = NULL;
5350		5344
5351	/* Ensure any throttled groups are reachable by pick_next_task */
5352	unthrottle_offline_cfs_rqs(rq);
5353
5354	for ( ; ; ) {	5345	for ( ; ; ) {
5355	/*	5346	/*
5356	* There's this thread running, bail when that's the only	5347	* There's this thread running, bail when that's the only
@@ -5614,8 +5605,7 @@ migration_call(struct notifier_block nfb, unsigned long action, void hcpu)
5614	BUG_ON(rq->nr_running != 1); /* the migration thread */	5605	BUG_ON(rq->nr_running != 1); /* the migration thread */
5615	raw_spin_unlock_irqrestore(&rq->lock, flags);	5606	raw_spin_unlock_irqrestore(&rq->lock, flags);
5616		5607
5617	migrate_nr_uninterruptible(rq);	5608	calc_load_migrate(rq);
5618	calc_global_load_remove(rq);
5619	break;	5609	break;
5620	#endif	5610	#endif
5621	}	5611	}
@@ -6024,11 +6014,6 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
6024	* SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this	6014	* SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
6025	* allows us to avoid some pointer chasing select_idle_sibling().	6015	* allows us to avoid some pointer chasing select_idle_sibling().
6026	*	6016	*
6027	* Iterate domains and sched_groups downward, assigning CPUs to be
6028	* select_idle_sibling() hw buddy. Cross-wiring hw makes bouncing
6029	* due to random perturbation self canceling, ie sw buddies pull
6030	* their counterpart to their CPU's hw counterpart.
6031	*
6032	* Also keep a unique ID per domain (we use the first cpu number in	6017	* Also keep a unique ID per domain (we use the first cpu number in
6033	* the cpumask of the domain), this allows us to quickly tell if	6018	* the cpumask of the domain), this allows us to quickly tell if
6034	* two cpus are in the same cache domain, see cpus_share_cache().	6019	* two cpus are in the same cache domain, see cpus_share_cache().
@@ -6042,40 +6027,8 @@ static void update_top_cache_domain(int cpu)
6042	int id = cpu;	6027	int id = cpu;
6043		6028
6044	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);	6029	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
6045	if (sd) {	6030	if (sd)
6046	struct sched_domain *tmp = sd;
6047	struct sched_group sg, prev;
6048	bool right;
6049
6050	/*
6051	* Traverse to first CPU in group, and count hops
6052	* to cpu from there, switching direction on each
6053	* hop, never ever pointing the last CPU rightward.
6054	*/
6055	do {
6056	id = cpumask_first(sched_domain_span(tmp));
6057	prev = sg = tmp->groups;
6058	right = 1;
6059
6060	while (cpumask_first(sched_group_cpus(sg)) != id)
6061	sg = sg->next;
6062
6063	while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) {
6064	prev = sg;
6065	sg = sg->next;
6066	right = !right;
6067	}
6068
6069	/* A CPU went down, never point back to domain start. */
6070	if (right && cpumask_first(sched_group_cpus(sg->next)) == id)
6071	right = false;
6072
6073	sg = right ? sg->next : prev;
6074	tmp->idle_buddy = cpumask_first(sched_group_cpus(sg));
6075	} while ((tmp = tmp->child));
6076
6077	id = cpumask_first(sched_domain_span(sd));	6031	id = cpumask_first(sched_domain_span(sd));
6078	}
6079		6032
6080	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);	6033	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
6081	per_cpu(sd_llc_id, cpu) = id;	6034	per_cpu(sd_llc_id, cpu) = id;
@@ -7248,6 +7201,7 @@ int in_sched_functions(unsigned long addr)
7248		7201
7249	#ifdef CONFIG_CGROUP_SCHED	7202	#ifdef CONFIG_CGROUP_SCHED
7250	struct task_group root_task_group;	7203	struct task_group root_task_group;
		7204	LIST_HEAD(task_groups);
7251	#endif	7205	#endif
7252		7206
7253	DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);	7207	DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);