1 files changed, 196 insertions, 40 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 9769c756ad6..fde6ff90352 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2544,13 +2544,9 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
 }
 #ifdef CONFIG_SMP
-static void sched_ttwu_pending(void)
+static void sched_ttwu_do_pending(struct task_struct *list)
 {
        struct rq *rq = this_rq();
-        struct task_struct *list = xchg(&rq->wake_list, NULL);
-        if (!list)
-                return;
        raw_spin_lock(&rq->lock);
@@ -2563,9 +2559,45 @@ static void sched_ttwu_pending(void)
        raw_spin_unlock(&rq->lock);
 }
+#ifdef CONFIG_HOTPLUG_CPU
+static void sched_ttwu_pending(void)
+{
+        struct rq *rq = this_rq();
+        struct task_struct *list = xchg(&rq->wake_list, NULL);
+        if (!list)
+                return;
+        sched_ttwu_do_pending(list);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
 void scheduler_ipi(void)
 {
-        sched_ttwu_pending();
+        struct rq *rq = this_rq();
+        struct task_struct *list = xchg(&rq->wake_list, NULL);
+        if (!list)
+                return;
+        /*
+         * Not all reschedule IPI handlers call irq_enter/irq_exit, since
+         * traditionally all their work was done from the interrupt return
+         * path. Now that we actually do some work, we need to make sure
+         * we do call them.
+         *
+         * Some archs already do call them, luckily irq_enter/exit nest
+         * properly.
+         *
+         * Arguably we should visit all archs and update all handlers,
+         * however a fair share of IPIs are still resched only so this would
+         * somewhat pessimize the simple resched case.
+         */
+        irq_enter();
+        sched_ttwu_do_pending(list);
+        irq_exit();
 }
 static void ttwu_queue_remote(struct task_struct *p, int cpu)
@@ -6557,7 +6589,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                        break;
                }
-                if (!group->cpu_power) {
+                if (!group->sgp->power) {
                        printk(KERN_CONT "\n");
                        printk(KERN_ERR "ERROR: domain->cpu_power not "
                                        "set\n");
@@ -6581,9 +6613,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
                printk(KERN_CONT " %s", str);
-                if (group->cpu_power != SCHED_POWER_SCALE) {
+                if (group->sgp->power != SCHED_POWER_SCALE) {
                        printk(KERN_CONT " (cpu_power = %d)",
-                                group->cpu_power);
+                                group->sgp->power);
                }
                group = group->next;
@@ -6774,11 +6806,39 @@ static struct root_domain *alloc_rootdomain(void)
        return rd;
 }
+static void free_sched_groups(struct sched_group *sg, int free_sgp)
+{
+        struct sched_group *tmp, *first;
+        if (!sg)
+                return;
+        first = sg;
+        do {
+                tmp = sg->next;
+                if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
+                        kfree(sg->sgp);
+                kfree(sg);
+                sg = tmp;
+        } while (sg != first);
+}
 static void free_sched_domain(struct rcu_head *rcu)
 {
        struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
-        if (atomic_dec_and_test(&sd->groups->ref))
+        /*
+         * If its an overlapping domain it has private groups, iterate and
+         * nuke them all.
+         */
+        if (sd->flags & SD_OVERLAP) {
+                free_sched_groups(sd->groups, 1);
+        } else if (atomic_dec_and_test(&sd->groups->ref)) {
+                kfree(sd->groups->sgp);
                kfree(sd->groups);
+        }
        kfree(sd);
 }
@@ -6945,6 +7005,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 struct sd_data {
        struct sched_domain **__percpu sd;
        struct sched_group **__percpu sg;
+        struct sched_group_power **__percpu sgp;
 };
 struct s_data {
@@ -6964,15 +7025,73 @@ struct sched_domain_topology_level;
 typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
 typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+#define SDTL_OVERLAP    0x01
 struct sched_domain_topology_level {
        sched_domain_init_f init;
        sched_domain_mask_f mask;
+        int                 flags;
        struct sd_data      data;
 };
-/*
+static int
- * Assumes the sched_domain tree is fully constructed
+build_overlap_sched_groups(struct sched_domain *sd, int cpu)
- */
+{
+        struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
+        const struct cpumask *span = sched_domain_span(sd);
+        struct cpumask *covered = sched_domains_tmpmask;
+        struct sd_data *sdd = sd->private;
+        struct sched_domain *child;
+        int i;
+        cpumask_clear(covered);
+        for_each_cpu(i, span) {
+                struct cpumask *sg_span;
+                if (cpumask_test_cpu(i, covered))
+                        continue;
+                sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+                                GFP_KERNEL, cpu_to_node(i));
+                if (!sg)
+                        goto fail;
+                sg_span = sched_group_cpus(sg);
+                child = *per_cpu_ptr(sdd->sd, i);
+                if (child->child) {
+                        child = child->child;
+                        cpumask_copy(sg_span, sched_domain_span(child));
+                } else
+                        cpumask_set_cpu(i, sg_span);
+                cpumask_or(covered, covered, sg_span);
+                sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
+                atomic_inc(&sg->sgp->ref);
+                if (cpumask_test_cpu(cpu, sg_span))
+                        groups = sg;
+                if (!first)
+                        first = sg;
+                if (last)
+                        last->next = sg;
+                last = sg;
+                last->next = first;
+        }
+        sd->groups = groups;
+        return 0;
+fail:
+        free_sched_groups(first, 0);
+        return -ENOMEM;
+}
 static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 {
        struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
@@ -6981,24 +7100,24 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
        if (child)
                cpu = cpumask_first(sched_domain_span(child));
-        if (sg)
+        if (sg) {
                *sg = *per_cpu_ptr(sdd->sg, cpu);
+                (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
+                atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
+        }
        return cpu;
 }
 /*
- * build_sched_groups takes the cpumask we wish to span, and a pointer
- * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- * (due to the fact that we keep track of groups covered with a struct cpumask).
- *
 * build_sched_groups will build a circular linked list of the groups
 * covered by the given span, and will set each group's ->cpumask correctly,
 * and ->cpu_power to 0.
+ *
+ * Assumes the sched_domain tree is fully constructed
 */
-static void
+static int
-build_sched_groups(struct sched_domain *sd)
+build_sched_groups(struct sched_domain *sd, int cpu)
 {
        struct sched_group *first = NULL, *last = NULL;
        struct sd_data *sdd = sd->private;
@@ -7006,6 +7125,12 @@ build_sched_groups(struct sched_domain *sd)
        struct cpumask *covered;
        int i;
+        get_group(cpu, sdd, &sd->groups);
+        atomic_inc(&sd->groups->ref);
+        if (cpu != cpumask_first(sched_domain_span(sd)))
+                return 0;
        lockdep_assert_held(&sched_domains_mutex);
        covered = sched_domains_tmpmask;
@@ -7020,7 +7145,7 @@ build_sched_groups(struct sched_domain *sd)
                        continue;
                cpumask_clear(sched_group_cpus(sg));
-                sg->cpu_power = 0;
+                sg->sgp->power = 0;
                for_each_cpu(j, span) {
                        if (get_group(j, sdd, NULL) != group)
@@ -7037,6 +7162,8 @@ build_sched_groups(struct sched_domain *sd)
                last = sg;
        }
        last->next = first;
+        return 0;
 }
 /*
@@ -7051,12 +7178,17 @@ build_sched_groups(struct sched_domain *sd)
 */
 static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 {
-        WARN_ON(!sd || !sd->groups);
+        struct sched_group *sg = sd->groups;
-        if (cpu != group_first_cpu(sd->groups))
+        WARN_ON(!sd || !sg);
-                return;
+        do {
+                sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+                sg = sg->next;
+        } while (sg != sd->groups);
-        sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
+        if (cpu != group_first_cpu(sg))
+                return;
        update_group_power(sd, cpu);
 }
@@ -7177,15 +7309,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 static void claim_allocations(int cpu, struct sched_domain *sd)
 {
        struct sd_data *sdd = sd->private;
-        struct sched_group *sg = sd->groups;
        WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
        *per_cpu_ptr(sdd->sd, cpu) = NULL;
-        if (cpu == cpumask_first(sched_group_cpus(sg))) {
+        if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
-                WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
                *per_cpu_ptr(sdd->sg, cpu) = NULL;
-        }
+        if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
+                *per_cpu_ptr(sdd->sgp, cpu) = NULL;
 }
 #ifdef CONFIG_SCHED_SMT
@@ -7210,7 +7342,7 @@ static struct sched_domain_topology_level default_topology[] = {
 #endif
        { sd_init_CPU, cpu_cpu_mask, },
 #ifdef CONFIG_NUMA
-        { sd_init_NODE, cpu_node_mask, },
+        { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
        { sd_init_ALLNODES, cpu_allnodes_mask, },
 #endif
        { NULL, },
@@ -7234,9 +7366,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
                if (!sdd->sg)
                        return -ENOMEM;
+                sdd->sgp = alloc_percpu(struct sched_group_power *);
+                if (!sdd->sgp)
+                        return -ENOMEM;
                for_each_cpu(j, cpu_map) {
                        struct sched_domain *sd;
                        struct sched_group *sg;
+                        struct sched_group_power *sgp;
                        sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
                                        GFP_KERNEL, cpu_to_node(j));
@@ -7251,6 +7388,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
                                return -ENOMEM;
                        *per_cpu_ptr(sdd->sg, j) = sg;
+                        sgp = kzalloc_node(sizeof(struct sched_group_power),
+                                        GFP_KERNEL, cpu_to_node(j));
+                        if (!sgp)
+                                return -ENOMEM;
+                        *per_cpu_ptr(sdd->sgp, j) = sgp;
                }
        }
@@ -7266,11 +7410,15 @@ static void __sdt_free(const struct cpumask *cpu_map)
                struct sd_data *sdd = &tl->data;
                for_each_cpu(j, cpu_map) {
-                        kfree(*per_cpu_ptr(sdd->sd, j));
+                        struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
+                        if (sd && (sd->flags & SD_OVERLAP))
+                                free_sched_groups(sd->groups, 0);
                        kfree(*per_cpu_ptr(sdd->sg, j));
+                        kfree(*per_cpu_ptr(sdd->sgp, j));
                }
                free_percpu(sdd->sd);
                free_percpu(sdd->sg);
+                free_percpu(sdd->sgp);
        }
 }
@@ -7316,8 +7464,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
                struct sched_domain_topology_level *tl;
                sd = NULL;
-                for (tl = sched_domain_topology; tl->init; tl++)
+                for (tl = sched_domain_topology; tl->init; tl++) {
                        sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
+                        if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
+                                sd->flags |= SD_OVERLAP;
+                        if (cpumask_equal(cpu_map, sched_domain_span(sd)))
+                                break;
+                }
                while (sd->child)
                        sd = sd->child;
@@ -7329,13 +7482,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
        for_each_cpu(i, cpu_map) {
                for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
                        sd->span_weight = cpumask_weight(sched_domain_span(sd));
-                        get_group(i, sd->private, &sd->groups);
+                        if (sd->flags & SD_OVERLAP) {
-                        atomic_inc(&sd->groups->ref);
+                                if (build_overlap_sched_groups(sd, i))
+                                        goto error;
-                        if (i != cpumask_first(sched_domain_span(sd)))
+                        } else {
-                                continue;
+                                if (build_sched_groups(sd, i))
+                                        goto error;
-                        build_sched_groups(sd);
+                        }
                }
        }
@@ -7757,6 +7910,9 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
 #endif
 #endif
        cfs_rq->min_vruntime = (u64)(-(1LL << 20));
+#ifndef CONFIG_64BIT
+        cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
+#endif
 }
 static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)

diff --git a/kernel/sched.c b/kernel/sched.c index 9769c756ad6..fde6ff90352 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -2544,13 +2544,9 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
2544	}	2544	}
2545		2545
2546	#ifdef CONFIG_SMP	2546	#ifdef CONFIG_SMP
2547	static void sched_ttwu_pending(void)	2547	static void sched_ttwu_do_pending(struct task_struct *list)
2548	{	2548	{
2549	struct rq *rq = this_rq();	2549	struct rq *rq = this_rq();
2550	struct task_struct *list = xchg(&rq->wake_list, NULL);
2551
2552	if (!list)
2553	return;
2554		2550
2555	raw_spin_lock(&rq->lock);	2551	raw_spin_lock(&rq->lock);
2556		2552
@@ -2563,9 +2559,45 @@ static void sched_ttwu_pending(void)
2563	raw_spin_unlock(&rq->lock);	2559	raw_spin_unlock(&rq->lock);
2564	}	2560	}
2565		2561
		2562	#ifdef CONFIG_HOTPLUG_CPU
		2563
		2564	static void sched_ttwu_pending(void)
		2565	{
		2566	struct rq *rq = this_rq();
		2567	struct task_struct *list = xchg(&rq->wake_list, NULL);
		2568
		2569	if (!list)
		2570	return;
		2571
		2572	sched_ttwu_do_pending(list);
		2573	}
		2574
		2575	#endif /* CONFIG_HOTPLUG_CPU */
		2576
2566	void scheduler_ipi(void)	2577	void scheduler_ipi(void)
2567	{	2578	{
2568	sched_ttwu_pending();	2579	struct rq *rq = this_rq();
		2580	struct task_struct *list = xchg(&rq->wake_list, NULL);
		2581
		2582	if (!list)
		2583	return;
		2584
		2585	/*
		2586	* Not all reschedule IPI handlers call irq_enter/irq_exit, since
		2587	* traditionally all their work was done from the interrupt return
		2588	* path. Now that we actually do some work, we need to make sure
		2589	* we do call them.
		2590	*
		2591	* Some archs already do call them, luckily irq_enter/exit nest
		2592	* properly.
		2593	*
		2594	* Arguably we should visit all archs and update all handlers,
		2595	* however a fair share of IPIs are still resched only so this would
		2596	* somewhat pessimize the simple resched case.
		2597	*/
		2598	irq_enter();
		2599	sched_ttwu_do_pending(list);
		2600	irq_exit();
2569	}	2601	}
2570		2602
2571	static void ttwu_queue_remote(struct task_struct *p, int cpu)	2603	static void ttwu_queue_remote(struct task_struct *p, int cpu)
@@ -6557,7 +6589,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6557	break;	6589	break;
6558	}	6590	}
6559		6591
6560	if (!group->cpu_power) {	6592	if (!group->sgp->power) {
6561	printk(KERN_CONT "\n");	6593	printk(KERN_CONT "\n");
6562	printk(KERN_ERR "ERROR: domain->cpu_power not "	6594	printk(KERN_ERR "ERROR: domain->cpu_power not "
6563	"set\n");	6595	"set\n");
@@ -6581,9 +6613,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6581	cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));	6613	cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
6582		6614
6583	printk(KERN_CONT " %s", str);	6615	printk(KERN_CONT " %s", str);
6584	if (group->cpu_power != SCHED_POWER_SCALE) {	6616	if (group->sgp->power != SCHED_POWER_SCALE) {
6585	printk(KERN_CONT " (cpu_power = %d)",	6617	printk(KERN_CONT " (cpu_power = %d)",
6586	group->cpu_power);	6618	group->sgp->power);
6587	}	6619	}
6588		6620
6589	group = group->next;	6621	group = group->next;
@@ -6774,11 +6806,39 @@ static struct root_domain *alloc_rootdomain(void)
6774	return rd;	6806	return rd;
6775	}	6807	}
6776		6808
		6809	static void free_sched_groups(struct sched_group *sg, int free_sgp)
		6810	{
		6811	struct sched_group tmp, first;
		6812
		6813	if (!sg)
		6814	return;
		6815
		6816	first = sg;
		6817	do {
		6818	tmp = sg->next;
		6819
		6820	if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
		6821	kfree(sg->sgp);
		6822
		6823	kfree(sg);
		6824	sg = tmp;
		6825	} while (sg != first);
		6826	}
		6827
6777	static void free_sched_domain(struct rcu_head *rcu)	6828	static void free_sched_domain(struct rcu_head *rcu)
6778	{	6829	{
6779	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);	6830	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
6780	if (atomic_dec_and_test(&sd->groups->ref))	6831
		6832	/*
		6833	* If its an overlapping domain it has private groups, iterate and
		6834	* nuke them all.
		6835	*/
		6836	if (sd->flags & SD_OVERLAP) {
		6837	free_sched_groups(sd->groups, 1);
		6838	} else if (atomic_dec_and_test(&sd->groups->ref)) {
		6839	kfree(sd->groups->sgp);
6781	kfree(sd->groups);	6840	kfree(sd->groups);
		6841	}
6782	kfree(sd);	6842	kfree(sd);
6783	}	6843	}
6784		6844
@@ -6945,6 +7005,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6945	struct sd_data {	7005	struct sd_data {
6946	struct sched_domain **__percpu sd;	7006	struct sched_domain **__percpu sd;
6947	struct sched_group **__percpu sg;	7007	struct sched_group **__percpu sg;
		7008	struct sched_group_power **__percpu sgp;
6948	};	7009	};
6949		7010
6950	struct s_data {	7011	struct s_data {
@@ -6964,15 +7025,73 @@ struct sched_domain_topology_level;
6964	typedef struct sched_domain (sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);	7025	typedef struct sched_domain (sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
6965	typedef const struct cpumask (sched_domain_mask_f)(int cpu);	7026	typedef const struct cpumask (sched_domain_mask_f)(int cpu);
6966		7027
		7028	#define SDTL_OVERLAP 0x01
		7029
6967	struct sched_domain_topology_level {	7030	struct sched_domain_topology_level {
6968	sched_domain_init_f init;	7031	sched_domain_init_f init;
6969	sched_domain_mask_f mask;	7032	sched_domain_mask_f mask;
		7033	int flags;
6970	struct sd_data data;	7034	struct sd_data data;
6971	};	7035	};
6972		7036
6973	/*	7037	static int
6974	* Assumes the sched_domain tree is fully constructed	7038	build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6975	*/	7039	{
		7040	struct sched_group first = NULL, last = NULL, groups = NULL, sg;
		7041	const struct cpumask *span = sched_domain_span(sd);
		7042	struct cpumask *covered = sched_domains_tmpmask;
		7043	struct sd_data *sdd = sd->private;
		7044	struct sched_domain *child;
		7045	int i;
		7046
		7047	cpumask_clear(covered);
		7048
		7049	for_each_cpu(i, span) {
		7050	struct cpumask *sg_span;
		7051
		7052	if (cpumask_test_cpu(i, covered))
		7053	continue;
		7054
		7055	sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
		7056	GFP_KERNEL, cpu_to_node(i));
		7057
		7058	if (!sg)
		7059	goto fail;
		7060
		7061	sg_span = sched_group_cpus(sg);
		7062
		7063	child = *per_cpu_ptr(sdd->sd, i);
		7064	if (child->child) {
		7065	child = child->child;
		7066	cpumask_copy(sg_span, sched_domain_span(child));
		7067	} else
		7068	cpumask_set_cpu(i, sg_span);
		7069
		7070	cpumask_or(covered, covered, sg_span);
		7071
		7072	sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
		7073	atomic_inc(&sg->sgp->ref);
		7074
		7075	if (cpumask_test_cpu(cpu, sg_span))
		7076	groups = sg;
		7077
		7078	if (!first)
		7079	first = sg;
		7080	if (last)
		7081	last->next = sg;
		7082	last = sg;
		7083	last->next = first;
		7084	}
		7085	sd->groups = groups;
		7086
		7087	return 0;
		7088
		7089	fail:
		7090	free_sched_groups(first, 0);
		7091
		7092	return -ENOMEM;
		7093	}
		7094
6976	static int get_group(int cpu, struct sd_data sdd, struct sched_group *sg)	7095	static int get_group(int cpu, struct sd_data sdd, struct sched_group *sg)
6977	{	7096	{
6978	struct sched_domain sd = per_cpu_ptr(sdd->sd, cpu);	7097	struct sched_domain sd = per_cpu_ptr(sdd->sd, cpu);
@@ -6981,24 +7100,24 @@ static int get_group(int cpu, struct sd_data sdd, struct sched_group *sg)
6981	if (child)	7100	if (child)
6982	cpu = cpumask_first(sched_domain_span(child));	7101	cpu = cpumask_first(sched_domain_span(child));
6983		7102
6984	if (sg)	7103	if (sg) {
6985	sg = per_cpu_ptr(sdd->sg, cpu);	7104	sg = per_cpu_ptr(sdd->sg, cpu);
		7105	(sg)->sgp = per_cpu_ptr(sdd->sgp, cpu);
		7106	atomic_set(&(sg)->sgp->ref, 1); / for claim_allocations */
		7107	}
6986		7108
6987	return cpu;	7109	return cpu;
6988	}	7110	}
6989		7111
6990	/*	7112	/*
6991	* build_sched_groups takes the cpumask we wish to span, and a pointer
6992	* to a function which identifies what group(along with sched group) a CPU
6993	* belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6994	* (due to the fact that we keep track of groups covered with a struct cpumask).
6995	*
6996	* build_sched_groups will build a circular linked list of the groups	7113	* build_sched_groups will build a circular linked list of the groups
6997	* covered by the given span, and will set each group's ->cpumask correctly,	7114	* covered by the given span, and will set each group's ->cpumask correctly,
6998	* and ->cpu_power to 0.	7115	* and ->cpu_power to 0.
		7116	*
		7117	* Assumes the sched_domain tree is fully constructed
6999	*/	7118	*/
7000	static void	7119	static int
7001	build_sched_groups(struct sched_domain *sd)	7120	build_sched_groups(struct sched_domain *sd, int cpu)
7002	{	7121	{
7003	struct sched_group first = NULL, last = NULL;	7122	struct sched_group first = NULL, last = NULL;
7004	struct sd_data *sdd = sd->private;	7123	struct sd_data *sdd = sd->private;
@@ -7006,6 +7125,12 @@ build_sched_groups(struct sched_domain *sd)
7006	struct cpumask *covered;	7125	struct cpumask *covered;
7007	int i;	7126	int i;
7008		7127
		7128	get_group(cpu, sdd, &sd->groups);
		7129	atomic_inc(&sd->groups->ref);
		7130
		7131	if (cpu != cpumask_first(sched_domain_span(sd)))
		7132	return 0;
		7133
7009	lockdep_assert_held(&sched_domains_mutex);	7134	lockdep_assert_held(&sched_domains_mutex);
7010	covered = sched_domains_tmpmask;	7135	covered = sched_domains_tmpmask;
7011		7136
@@ -7020,7 +7145,7 @@ build_sched_groups(struct sched_domain *sd)
7020	continue;	7145	continue;
7021		7146
7022	cpumask_clear(sched_group_cpus(sg));	7147	cpumask_clear(sched_group_cpus(sg));
7023	sg->cpu_power = 0;	7148	sg->sgp->power = 0;
7024		7149
7025	for_each_cpu(j, span) {	7150	for_each_cpu(j, span) {
7026	if (get_group(j, sdd, NULL) != group)	7151	if (get_group(j, sdd, NULL) != group)
@@ -7037,6 +7162,8 @@ build_sched_groups(struct sched_domain *sd)
7037	last = sg;	7162	last = sg;
7038	}	7163	}
7039	last->next = first;	7164	last->next = first;
		7165
		7166	return 0;
7040	}	7167	}
7041		7168
7042	/*	7169	/*
@@ -7051,12 +7178,17 @@ build_sched_groups(struct sched_domain *sd)
7051	*/	7178	*/
7052	static void init_sched_groups_power(int cpu, struct sched_domain *sd)	7179	static void init_sched_groups_power(int cpu, struct sched_domain *sd)
7053	{	7180	{
7054	WARN_ON(!sd \|\| !sd->groups);	7181	struct sched_group *sg = sd->groups;
7055		7182
7056	if (cpu != group_first_cpu(sd->groups))	7183	WARN_ON(!sd \|\| !sg);
7057	return;	7184
		7185	do {
		7186	sg->group_weight = cpumask_weight(sched_group_cpus(sg));
		7187	sg = sg->next;
		7188	} while (sg != sd->groups);
7058		7189
7059	sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));	7190	if (cpu != group_first_cpu(sg))
		7191	return;
7060		7192
7061	update_group_power(sd, cpu);	7193	update_group_power(sd, cpu);
7062	}	7194	}
@@ -7177,15 +7309,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
7177	static void claim_allocations(int cpu, struct sched_domain *sd)	7309	static void claim_allocations(int cpu, struct sched_domain *sd)
7178	{	7310	{
7179	struct sd_data *sdd = sd->private;	7311	struct sd_data *sdd = sd->private;
7180	struct sched_group *sg = sd->groups;
7181		7312
7182	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);	7313	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
7183	*per_cpu_ptr(sdd->sd, cpu) = NULL;	7314	*per_cpu_ptr(sdd->sd, cpu) = NULL;
7184		7315
7185	if (cpu == cpumask_first(sched_group_cpus(sg))) {	7316	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
7186	WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
7187	*per_cpu_ptr(sdd->sg, cpu) = NULL;	7317	*per_cpu_ptr(sdd->sg, cpu) = NULL;
7188	}	7318
		7319	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
		7320	*per_cpu_ptr(sdd->sgp, cpu) = NULL;
7189	}	7321	}
7190		7322
7191	#ifdef CONFIG_SCHED_SMT	7323	#ifdef CONFIG_SCHED_SMT
@@ -7210,7 +7342,7 @@ static struct sched_domain_topology_level default_topology[] = {
7210	#endif	7342	#endif
7211	{ sd_init_CPU, cpu_cpu_mask, },	7343	{ sd_init_CPU, cpu_cpu_mask, },
7212	#ifdef CONFIG_NUMA	7344	#ifdef CONFIG_NUMA
7213	{ sd_init_NODE, cpu_node_mask, },	7345	{ sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
7214	{ sd_init_ALLNODES, cpu_allnodes_mask, },	7346	{ sd_init_ALLNODES, cpu_allnodes_mask, },
7215	#endif	7347	#endif
7216	{ NULL, },	7348	{ NULL, },
@@ -7234,9 +7366,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
7234	if (!sdd->sg)	7366	if (!sdd->sg)
7235	return -ENOMEM;	7367	return -ENOMEM;
7236		7368
		7369	sdd->sgp = alloc_percpu(struct sched_group_power *);
		7370	if (!sdd->sgp)
		7371	return -ENOMEM;
		7372
7237	for_each_cpu(j, cpu_map) {	7373	for_each_cpu(j, cpu_map) {
7238	struct sched_domain *sd;	7374	struct sched_domain *sd;
7239	struct sched_group *sg;	7375	struct sched_group *sg;
		7376	struct sched_group_power *sgp;
7240		7377
7241	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),	7378	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
7242	GFP_KERNEL, cpu_to_node(j));	7379	GFP_KERNEL, cpu_to_node(j));
@@ -7251,6 +7388,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
7251	return -ENOMEM;	7388	return -ENOMEM;
7252		7389
7253	*per_cpu_ptr(sdd->sg, j) = sg;	7390	*per_cpu_ptr(sdd->sg, j) = sg;
		7391
		7392	sgp = kzalloc_node(sizeof(struct sched_group_power),
		7393	GFP_KERNEL, cpu_to_node(j));
		7394	if (!sgp)
		7395	return -ENOMEM;
		7396
		7397	*per_cpu_ptr(sdd->sgp, j) = sgp;
7254	}	7398	}
7255	}	7399	}
7256		7400
@@ -7266,11 +7410,15 @@ static void __sdt_free(const struct cpumask *cpu_map)
7266	struct sd_data *sdd = &tl->data;	7410	struct sd_data *sdd = &tl->data;
7267		7411
7268	for_each_cpu(j, cpu_map) {	7412	for_each_cpu(j, cpu_map) {
7269	kfree(*per_cpu_ptr(sdd->sd, j));	7413	struct sched_domain sd = per_cpu_ptr(sdd->sd, j);
		7414	if (sd && (sd->flags & SD_OVERLAP))
		7415	free_sched_groups(sd->groups, 0);
7270	kfree(*per_cpu_ptr(sdd->sg, j));	7416	kfree(*per_cpu_ptr(sdd->sg, j));
		7417	kfree(*per_cpu_ptr(sdd->sgp, j));
7271	}	7418	}
7272	free_percpu(sdd->sd);	7419	free_percpu(sdd->sd);
7273	free_percpu(sdd->sg);	7420	free_percpu(sdd->sg);
		7421	free_percpu(sdd->sgp);
7274	}	7422	}
7275	}	7423	}
7276		7424
@@ -7316,8 +7464,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
7316	struct sched_domain_topology_level *tl;	7464	struct sched_domain_topology_level *tl;
7317		7465
7318	sd = NULL;	7466	sd = NULL;
7319	for (tl = sched_domain_topology; tl->init; tl++)	7467	for (tl = sched_domain_topology; tl->init; tl++) {
7320	sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);	7468	sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
		7469	if (tl->flags & SDTL_OVERLAP \|\| sched_feat(FORCE_SD_OVERLAP))
		7470	sd->flags \|= SD_OVERLAP;
		7471	if (cpumask_equal(cpu_map, sched_domain_span(sd)))
		7472	break;
		7473	}
7321		7474
7322	while (sd->child)	7475	while (sd->child)
7323	sd = sd->child;	7476	sd = sd->child;
@@ -7329,13 +7482,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
7329	for_each_cpu(i, cpu_map) {	7482	for_each_cpu(i, cpu_map) {
7330	for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {	7483	for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
7331	sd->span_weight = cpumask_weight(sched_domain_span(sd));	7484	sd->span_weight = cpumask_weight(sched_domain_span(sd));
7332	get_group(i, sd->private, &sd->groups);	7485	if (sd->flags & SD_OVERLAP) {
7333	atomic_inc(&sd->groups->ref);	7486	if (build_overlap_sched_groups(sd, i))
7334		7487	goto error;
7335	if (i != cpumask_first(sched_domain_span(sd)))	7488	} else {
7336	continue;	7489	if (build_sched_groups(sd, i))
7337		7490	goto error;
7338	build_sched_groups(sd);	7491	}
7339	}	7492	}
7340	}	7493	}
7341		7494
@@ -7757,6 +7910,9 @@ static void init_cfs_rq(struct cfs_rq cfs_rq, struct rq rq)
7757	#endif	7910	#endif
7758	#endif	7911	#endif
7759	cfs_rq->min_vruntime = (u64)(-(1LL << 20));	7912	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
		7913	#ifndef CONFIG_64BIT
		7914	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
		7915	#endif
7760	}	7916	}
7761		7917
7762	static void init_rt_rq(struct rt_rq rt_rq, struct rq rq)	7918	static void init_rt_rq(struct rt_rq rt_rq, struct rq rq)