1 files changed, 73 insertions, 119 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 700aa9a1413f..3d1ee429219b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -703,45 +703,18 @@ static __read_mostly char *sched_feat_names[] = {
 #undef SCHED_FEAT
-static int sched_feat_open(struct inode *inode, struct file *filp)
+static int sched_feat_show(struct seq_file *m, void *v)
-{
-        filp->private_data = inode->i_private;
-        return 0;
-}
-static ssize_t
-sched_feat_read(struct file *filp, char __user *ubuf,
-                size_t cnt, loff_t *ppos)
 {
-        char *buf;
-        int r = 0;
-        int len = 0;
        int i;
        for (i = 0; sched_feat_names[i]; i++) {
-                len += strlen(sched_feat_names[i]);
+                if (!(sysctl_sched_features & (1UL << i)))
-                len += 4;
+                        seq_puts(m, "NO_");
-        }
+                seq_printf(m, "%s ", sched_feat_names[i]);
-        buf = kmalloc(len + 2, GFP_KERNEL);
-        if (!buf)
-                return -ENOMEM;
-        for (i = 0; sched_feat_names[i]; i++) {
-                if (sysctl_sched_features & (1UL << i))
-                        r += sprintf(buf + r, "%s ", sched_feat_names[i]);
-                else
-                        r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
        }
+        seq_puts(m, "\n");
-        r += sprintf(buf + r, "\n");
+        return 0;
-        WARN_ON(r >= len + 2);
-        r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-        kfree(buf);
-        return r;
 }
 static ssize_t
@@ -786,10 +759,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
+static int sched_feat_open(struct inode *inode, struct file *filp)
+{
+        return single_open(filp, sched_feat_show, NULL);
+}
 static struct file_operations sched_feat_fops = {
-        .open   = sched_feat_open,
+        .open           = sched_feat_open,
-        .read   = sched_feat_read,
+        .write          = sched_feat_write,
-        .write  = sched_feat_write,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
 };
 static __init int sched_init_debug(void)
@@ -1474,27 +1454,13 @@ static void
 update_group_shares_cpu(struct task_group *tg, int cpu,
                        unsigned long sd_shares, unsigned long sd_rq_weight)
 {
-        int boost = 0;
        unsigned long shares;
        unsigned long rq_weight;
        if (!tg->se[cpu])
                return;
-        rq_weight = tg->cfs_rq[cpu]->load.weight;
+        rq_weight = tg->cfs_rq[cpu]->rq_weight;
-        /*
-         * If there are currently no tasks on the cpu pretend there is one of
-         * average load so that when a new task gets to run here it will not
-         * get delayed by group starvation.
-         */
-        if (!rq_weight) {
-                boost = 1;
-                rq_weight = NICE_0_LOAD;
-        }
-        if (unlikely(rq_weight > sd_rq_weight))
-                rq_weight = sd_rq_weight;
        /*
         *           \Sum shares * rq_weight
@@ -1502,7 +1468,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
         *               \Sum rq_weight
         *
         */
-        shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+        shares = (sd_shares * rq_weight) / sd_rq_weight;
        shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
        if (abs(shares - tg->se[cpu]->load.weight) >
@@ -1511,11 +1477,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
                unsigned long flags;
                spin_lock_irqsave(&rq->lock, flags);
-                /*
+                tg->cfs_rq[cpu]->shares = shares;
-                 * record the actual number of shares, not the boosted amount.
-                 */
-                tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
-                tg->cfs_rq[cpu]->rq_weight = rq_weight;
                __set_se_shares(tg->se[cpu], shares);
                spin_unlock_irqrestore(&rq->lock, flags);
@@ -1529,13 +1491,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
 */
 static int tg_shares_up(struct task_group *tg, void *data)
 {
-        unsigned long rq_weight = 0;
+        unsigned long weight, rq_weight = 0;
        unsigned long shares = 0;
        struct sched_domain *sd = data;
        int i;
        for_each_cpu_mask(i, sd->span) {
-                rq_weight += tg->cfs_rq[i]->load.weight;
+                /*
+                 * If there are currently no tasks on the cpu pretend there
+                 * is one of average load so that when a new task gets to
+                 * run here it will not get delayed by group starvation.
+                 */
+                weight = tg->cfs_rq[i]->load.weight;
+                if (!weight)
+                        weight = NICE_0_LOAD;
+                tg->cfs_rq[i]->rq_weight = weight;
+                rq_weight += weight;
                shares += tg->cfs_rq[i]->shares;
        }
@@ -1545,9 +1517,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
        if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
                shares = tg->shares;
-        if (!rq_weight)
-                rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
        for_each_cpu_mask(i, sd->span)
                update_group_shares_cpu(tg, i, shares, rq_weight);
@@ -2838,7 +2807,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
        return ret;
 }
-static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(busiest->lock)
 {
        spin_unlock(&busiest->lock);
@@ -6126,7 +6095,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
 /*
 * Figure out where task on dead CPU should go, use force if necessary.
- * NOTE: interrupts should be disabled by the caller
 */
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
@@ -6636,28 +6604,6 @@ early_initcall(migration_init);
 #ifdef CONFIG_SCHED_DEBUG
-static inline const char *sd_level_to_string(enum sched_domain_level lvl)
-{
-        switch (lvl) {
-        case SD_LV_NONE:
-                        return "NONE";
-        case SD_LV_SIBLING:
-                        return "SIBLING";
-        case SD_LV_MC:
-                        return "MC";
-        case SD_LV_CPU:
-                        return "CPU";
-        case SD_LV_NODE:
-                        return "NODE";
-        case SD_LV_ALLNODES:
-                        return "ALLNODES";
-        case SD_LV_MAX:
-                        return "MAX";
-        }
-        return "MAX";
-}
 static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                                  cpumask_t *groupmask)
 {
@@ -6677,8 +6623,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                return -1;
        }
-        printk(KERN_CONT "span %s level %s\n",
+        printk(KERN_CONT "span %s level %s\n", str, sd->name);
-                str, sd_level_to_string(sd->level));
        if (!cpu_isset(cpu, sd->span)) {
                printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -7334,13 +7279,21 @@ struct allmasks {
 };
 #if     NR_CPUS > 128
-#define SCHED_CPUMASK_ALLOC             1
+#define SCHED_CPUMASK_DECLARE(v)        struct allmasks *v
-#define SCHED_CPUMASK_FREE(v)           kfree(v)
+static inline void sched_cpumask_alloc(struct allmasks **masks)
-#define SCHED_CPUMASK_DECLARE(v)        struct allmasks *v
+{
+        *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
+}
+static inline void sched_cpumask_free(struct allmasks *masks)
+{
+        kfree(masks);
+}
 #else
-#define SCHED_CPUMASK_ALLOC             0
+#define SCHED_CPUMASK_DECLARE(v)        struct allmasks _v, *v = &_v
-#define SCHED_CPUMASK_FREE(v)
+static inline void sched_cpumask_alloc(struct allmasks **masks)
-#define SCHED_CPUMASK_DECLARE(v)        struct allmasks _v, *v = &_v
+{ }
+static inline void sched_cpumask_free(struct allmasks *masks)
+{ }
 #endif
 #define SCHED_CPUMASK_VAR(v, a)         cpumask_t *v = (cpumask_t *) \
@@ -7416,9 +7369,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
                return -ENOMEM;
        }
-#if SCHED_CPUMASK_ALLOC
        /* get space for all scratch cpumask variables */
-        allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
+        sched_cpumask_alloc(&allmasks);
        if (!allmasks) {
                printk(KERN_WARNING "Cannot alloc cpumask array\n");
                kfree(rd);
@@ -7427,7 +7379,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #endif
                return -ENOMEM;
        }
-#endif
        tmpmask = (cpumask_t *)allmasks;
@@ -7681,13 +7633,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
                cpu_attach_domain(sd, rd, i);
        }
-        SCHED_CPUMASK_FREE((void *)allmasks);
+        sched_cpumask_free(allmasks);
        return 0;
 #ifdef CONFIG_NUMA
 error:
        free_sched_groups(cpu_map, tmpmask);
-        SCHED_CPUMASK_FREE((void *)allmasks);
+        sched_cpumask_free(allmasks);
        kfree(rd);
        return -ENOMEM;
 #endif
@@ -7751,8 +7703,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
        cpumask_t tmpmask;
        int i;
-        unregister_sched_domain_sysctl();
        for_each_cpu_mask_nr(i, *cpu_map)
                cpu_attach_domain(NULL, &def_root_domain, i);
        synchronize_sched();
@@ -7830,7 +7780,7 @@ match1:
                ndoms_cur = 0;
                doms_new = &fallback_doms;
                cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
-                dattr_new = NULL;
+                WARN_ON_ONCE(dattr_new);
        }
        /* Build new domains */
@@ -8490,7 +8440,7 @@ static
 int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 {
        struct cfs_rq *cfs_rq;
-        struct sched_entity *se, *parent_se;
+        struct sched_entity *se;
        struct rq *rq;
        int i;
@@ -8506,18 +8456,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
        for_each_possible_cpu(i) {
                rq = cpu_rq(i);
-                cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
+                cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
-                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+                                      GFP_KERNEL, cpu_to_node(i));
                if (!cfs_rq)
                        goto err;
-                se = kmalloc_node(sizeof(struct sched_entity),
+                se = kzalloc_node(sizeof(struct sched_entity),
-                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+                                  GFP_KERNEL, cpu_to_node(i));
                if (!se)
                        goto err;
-                parent_se = parent ? parent->se[i] : NULL;
+                init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
-                init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
        }
        return 1;
@@ -8578,7 +8527,7 @@ static
 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
        struct rt_rq *rt_rq;
-        struct sched_rt_entity *rt_se, *parent_se;
+        struct sched_rt_entity *rt_se;
        struct rq *rq;
        int i;
@@ -8595,18 +8544,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
        for_each_possible_cpu(i) {
                rq = cpu_rq(i);
-                rt_rq = kmalloc_node(sizeof(struct rt_rq),
+                rt_rq = kzalloc_node(sizeof(struct rt_rq),
-                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+                                     GFP_KERNEL, cpu_to_node(i));
                if (!rt_rq)
                        goto err;
-                rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
+                rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
-                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+                                     GFP_KERNEL, cpu_to_node(i));
                if (!rt_se)
                        goto err;
-                parent_se = parent ? parent->rt_se[i] : NULL;
+                init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
-                init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
        }
        return 1;
@@ -9249,11 +9197,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 * (balbir@in.ibm.com).
 */
-/* track cpu usage of a group of tasks */
+/* track cpu usage of a group of tasks and its child groups */
 struct cpuacct {
        struct cgroup_subsys_state css;
        /* cpuusage holds pointer to a u64-type object on every cpu */
        u64 *cpuusage;
+        struct cpuacct *parent;
 };
 struct cgroup_subsys cpuacct_subsys;
@@ -9287,6 +9236,9 @@ static struct cgroup_subsys_state *cpuacct_create(
                return ERR_PTR(-ENOMEM);
        }
+        if (cgrp->parent)
+                ca->parent = cgroup_ca(cgrp->parent);
        return &ca->css;
 }
@@ -9366,14 +9318,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
        struct cpuacct *ca;
+        int cpu;
        if (!cpuacct_subsys.active)
                return;
+        cpu = task_cpu(tsk);
        ca = task_ca(tsk);
-        if (ca) {
-                u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
+        for (; ca; ca = ca->parent) {
+                u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
                *cpuusage += cputime;
        }
 }

diff --git a/kernel/sched.c b/kernel/sched.c index 700aa9a1413f..3d1ee429219b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -703,45 +703,18 @@ static __read_mostly char *sched_feat_names[] = {
703		703
704	#undef SCHED_FEAT	704	#undef SCHED_FEAT
705		705
706	static int sched_feat_open(struct inode inode, struct file filp)	706	static int sched_feat_show(struct seq_file m, void v)
707	{
708	filp->private_data = inode->i_private;
709	return 0;
710	}
711
712	static ssize_t
713	sched_feat_read(struct file filp, char __user ubuf,
714	size_t cnt, loff_t *ppos)
715	{	707	{
716	char *buf;
717	int r = 0;
718	int len = 0;
719	int i;	708	int i;
720		709
721	for (i = 0; sched_feat_names[i]; i++) {	710	for (i = 0; sched_feat_names[i]; i++) {
722	len += strlen(sched_feat_names[i]);	711	if (!(sysctl_sched_features & (1UL << i)))
723	len += 4;	712	seq_puts(m, "NO_");
724	}	713	seq_printf(m, "%s ", sched_feat_names[i]);
725
726	buf = kmalloc(len + 2, GFP_KERNEL);
727	if (!buf)
728	return -ENOMEM;
729
730	for (i = 0; sched_feat_names[i]; i++) {
731	if (sysctl_sched_features & (1UL << i))
732	r += sprintf(buf + r, "%s ", sched_feat_names[i]);
733	else
734	r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
735	}	714	}
		715	seq_puts(m, "\n");
736		716
737	r += sprintf(buf + r, "\n");	717	return 0;
738	WARN_ON(r >= len + 2);
739
740	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
741
742	kfree(buf);
743
744	return r;
745	}	718	}
746		719
747	static ssize_t	720	static ssize_t
@@ -786,10 +759,17 @@ sched_feat_write(struct file filp, const char __user ubuf,
786	return cnt;	759	return cnt;
787	}	760	}
788		761
		762	static int sched_feat_open(struct inode inode, struct file filp)
		763	{
		764	return single_open(filp, sched_feat_show, NULL);
		765	}
		766
789	static struct file_operations sched_feat_fops = {	767	static struct file_operations sched_feat_fops = {
790	.open = sched_feat_open,	768	.open = sched_feat_open,
791	.read = sched_feat_read,	769	.write = sched_feat_write,
792	.write = sched_feat_write,	770	.read = seq_read,
		771	.llseek = seq_lseek,
		772	.release = single_release,
793	};	773	};
794		774
795	static __init int sched_init_debug(void)	775	static __init int sched_init_debug(void)
@@ -1474,27 +1454,13 @@ static void
1474	update_group_shares_cpu(struct task_group *tg, int cpu,	1454	update_group_shares_cpu(struct task_group *tg, int cpu,
1475	unsigned long sd_shares, unsigned long sd_rq_weight)	1455	unsigned long sd_shares, unsigned long sd_rq_weight)
1476	{	1456	{
1477	int boost = 0;
1478	unsigned long shares;	1457	unsigned long shares;
1479	unsigned long rq_weight;	1458	unsigned long rq_weight;
1480		1459
1481	if (!tg->se[cpu])	1460	if (!tg->se[cpu])
1482	return;	1461	return;
1483		1462
1484	rq_weight = tg->cfs_rq[cpu]->load.weight;	1463	rq_weight = tg->cfs_rq[cpu]->rq_weight;
1485
1486	/*
1487	* If there are currently no tasks on the cpu pretend there is one of
1488	* average load so that when a new task gets to run here it will not
1489	* get delayed by group starvation.
1490	*/
1491	if (!rq_weight) {
1492	boost = 1;
1493	rq_weight = NICE_0_LOAD;
1494	}
1495
1496	if (unlikely(rq_weight > sd_rq_weight))
1497	rq_weight = sd_rq_weight;
1498		1464
1499	/*	1465	/*
1500	* \Sum shares * rq_weight	1466	* \Sum shares * rq_weight
@@ -1502,7 +1468,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1502	* \Sum rq_weight	1468	* \Sum rq_weight
1503	*	1469	*
1504	*/	1470	*/
1505	shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);	1471	shares = (sd_shares * rq_weight) / sd_rq_weight;
1506	shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);	1472	shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
1507		1473
1508	if (abs(shares - tg->se[cpu]->load.weight) >	1474	if (abs(shares - tg->se[cpu]->load.weight) >
@@ -1511,11 +1477,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1511	unsigned long flags;	1477	unsigned long flags;
1512		1478
1513	spin_lock_irqsave(&rq->lock, flags);	1479	spin_lock_irqsave(&rq->lock, flags);
1514	/*	1480	tg->cfs_rq[cpu]->shares = shares;
1515	* record the actual number of shares, not the boosted amount.
1516	*/
1517	tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1518	tg->cfs_rq[cpu]->rq_weight = rq_weight;
1519		1481
1520	__set_se_shares(tg->se[cpu], shares);	1482	__set_se_shares(tg->se[cpu], shares);
1521	spin_unlock_irqrestore(&rq->lock, flags);	1483	spin_unlock_irqrestore(&rq->lock, flags);
@@ -1529,13 +1491,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1529	*/	1491	*/
1530	static int tg_shares_up(struct task_group tg, void data)	1492	static int tg_shares_up(struct task_group tg, void data)
1531	{	1493	{
1532	unsigned long rq_weight = 0;	1494	unsigned long weight, rq_weight = 0;
1533	unsigned long shares = 0;	1495	unsigned long shares = 0;
1534	struct sched_domain *sd = data;	1496	struct sched_domain *sd = data;
1535	int i;	1497	int i;
1536		1498
1537	for_each_cpu_mask(i, sd->span) {	1499	for_each_cpu_mask(i, sd->span) {
1538	rq_weight += tg->cfs_rq[i]->load.weight;	1500	/*
		1501	* If there are currently no tasks on the cpu pretend there
		1502	* is one of average load so that when a new task gets to
		1503	* run here it will not get delayed by group starvation.
		1504	*/
		1505	weight = tg->cfs_rq[i]->load.weight;
		1506	if (!weight)
		1507	weight = NICE_0_LOAD;
		1508
		1509	tg->cfs_rq[i]->rq_weight = weight;
		1510	rq_weight += weight;
1539	shares += tg->cfs_rq[i]->shares;	1511	shares += tg->cfs_rq[i]->shares;
1540	}	1512	}
1541		1513
@@ -1545,9 +1517,6 @@ static int tg_shares_up(struct task_group tg, void data)
1545	if (!sd->parent \|\| !(sd->parent->flags & SD_LOAD_BALANCE))	1517	if (!sd->parent \|\| !(sd->parent->flags & SD_LOAD_BALANCE))
1546	shares = tg->shares;	1518	shares = tg->shares;
1547		1519
1548	if (!rq_weight)
1549	rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1550
1551	for_each_cpu_mask(i, sd->span)	1520	for_each_cpu_mask(i, sd->span)
1552	update_group_shares_cpu(tg, i, shares, rq_weight);	1521	update_group_shares_cpu(tg, i, shares, rq_weight);
1553		1522
@@ -2838,7 +2807,7 @@ static int double_lock_balance(struct rq this_rq, struct rq busiest)
2838	return ret;	2807	return ret;
2839	}	2808	}
2840		2809
2841	static void double_unlock_balance(struct rq this_rq, struct rq busiest)	2810	static inline void double_unlock_balance(struct rq this_rq, struct rq busiest)
2842	__releases(busiest->lock)	2811	__releases(busiest->lock)
2843	{	2812	{
2844	spin_unlock(&busiest->lock);	2813	spin_unlock(&busiest->lock);
@@ -6126,7 +6095,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
6126		6095
6127	/*	6096	/*
6128	* Figure out where task on dead CPU should go, use force if necessary.	6097	* Figure out where task on dead CPU should go, use force if necessary.
6129	* NOTE: interrupts should be disabled by the caller
6130	*/	6098	*/
6131	static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)	6099	static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
6132	{	6100	{
@@ -6636,28 +6604,6 @@ early_initcall(migration_init);
6636		6604
6637	#ifdef CONFIG_SCHED_DEBUG	6605	#ifdef CONFIG_SCHED_DEBUG
6638		6606
6639	static inline const char *sd_level_to_string(enum sched_domain_level lvl)
6640	{
6641	switch (lvl) {
6642	case SD_LV_NONE:
6643	return "NONE";
6644	case SD_LV_SIBLING:
6645	return "SIBLING";
6646	case SD_LV_MC:
6647	return "MC";
6648	case SD_LV_CPU:
6649	return "CPU";
6650	case SD_LV_NODE:
6651	return "NODE";
6652	case SD_LV_ALLNODES:
6653	return "ALLNODES";
6654	case SD_LV_MAX:
6655	return "MAX";
6656
6657	}
6658	return "MAX";
6659	}
6660
6661	static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,	6607	static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6662	cpumask_t *groupmask)	6608	cpumask_t *groupmask)
6663	{	6609	{
@@ -6677,8 +6623,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6677	return -1;	6623	return -1;
6678	}	6624	}
6679		6625
6680	printk(KERN_CONT "span %s level %s\n",	6626	printk(KERN_CONT "span %s level %s\n", str, sd->name);
6681	str, sd_level_to_string(sd->level));
6682		6627
6683	if (!cpu_isset(cpu, sd->span)) {	6628	if (!cpu_isset(cpu, sd->span)) {
6684	printk(KERN_ERR "ERROR: domain->span does not contain "	6629	printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -7334,13 +7279,21 @@ struct allmasks {
7334	};	7279	};
7335		7280
7336	#if NR_CPUS > 128	7281	#if NR_CPUS > 128
7337	#define SCHED_CPUMASK_ALLOC 1	7282	#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
7338	#define SCHED_CPUMASK_FREE(v) kfree(v)	7283	static inline void sched_cpumask_alloc(struct allmasks **masks)
7339	#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v	7284	{
		7285	masks = kmalloc(sizeof(*masks), GFP_KERNEL);
		7286	}
		7287	static inline void sched_cpumask_free(struct allmasks *masks)
		7288	{
		7289	kfree(masks);
		7290	}
7340	#else	7291	#else
7341	#define SCHED_CPUMASK_ALLOC 0	7292	#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
7342	#define SCHED_CPUMASK_FREE(v)	7293	static inline void sched_cpumask_alloc(struct allmasks **masks)
7343	#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v	7294	{ }
		7295	static inline void sched_cpumask_free(struct allmasks *masks)
		7296	{ }
7344	#endif	7297	#endif
7345		7298
7346	#define SCHED_CPUMASK_VAR(v, a) cpumask_t v = (cpumask_t ) \	7299	#define SCHED_CPUMASK_VAR(v, a) cpumask_t v = (cpumask_t ) \
@@ -7416,9 +7369,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7416	return -ENOMEM;	7369	return -ENOMEM;
7417	}	7370	}
7418		7371
7419	#if SCHED_CPUMASK_ALLOC
7420	/* get space for all scratch cpumask variables */	7372	/* get space for all scratch cpumask variables */
7421	allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);	7373	sched_cpumask_alloc(&allmasks);
7422	if (!allmasks) {	7374	if (!allmasks) {
7423	printk(KERN_WARNING "Cannot alloc cpumask array\n");	7375	printk(KERN_WARNING "Cannot alloc cpumask array\n");
7424	kfree(rd);	7376	kfree(rd);
@@ -7427,7 +7379,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7427	#endif	7379	#endif
7428	return -ENOMEM;	7380	return -ENOMEM;
7429	}	7381	}
7430	#endif	7382
7431	tmpmask = (cpumask_t *)allmasks;	7383	tmpmask = (cpumask_t *)allmasks;
7432		7384
7433		7385
@@ -7681,13 +7633,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7681	cpu_attach_domain(sd, rd, i);	7633	cpu_attach_domain(sd, rd, i);
7682	}	7634	}
7683		7635
7684	SCHED_CPUMASK_FREE((void *)allmasks);	7636	sched_cpumask_free(allmasks);
7685	return 0;	7637	return 0;
7686		7638
7687	#ifdef CONFIG_NUMA	7639	#ifdef CONFIG_NUMA
7688	error:	7640	error:
7689	free_sched_groups(cpu_map, tmpmask);	7641	free_sched_groups(cpu_map, tmpmask);
7690	SCHED_CPUMASK_FREE((void *)allmasks);	7642	sched_cpumask_free(allmasks);
7691	kfree(rd);	7643	kfree(rd);
7692	return -ENOMEM;	7644	return -ENOMEM;
7693	#endif	7645	#endif
@@ -7751,8 +7703,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
7751	cpumask_t tmpmask;	7703	cpumask_t tmpmask;
7752	int i;	7704	int i;
7753		7705
7754	unregister_sched_domain_sysctl();
7755
7756	for_each_cpu_mask_nr(i, *cpu_map)	7706	for_each_cpu_mask_nr(i, *cpu_map)
7757	cpu_attach_domain(NULL, &def_root_domain, i);	7707	cpu_attach_domain(NULL, &def_root_domain, i);
7758	synchronize_sched();	7708	synchronize_sched();
@@ -7830,7 +7780,7 @@ match1:
7830	ndoms_cur = 0;	7780	ndoms_cur = 0;
7831	doms_new = &fallback_doms;	7781	doms_new = &fallback_doms;
7832	cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);	7782	cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7833	dattr_new = NULL;	7783	WARN_ON_ONCE(dattr_new);
7834	}	7784	}
7835		7785
7836	/* Build new domains */	7786	/* Build new domains */
@@ -8490,7 +8440,7 @@ static
8490	int alloc_fair_sched_group(struct task_group tg, struct task_group parent)	8440	int alloc_fair_sched_group(struct task_group tg, struct task_group parent)
8491	{	8441	{
8492	struct cfs_rq *cfs_rq;	8442	struct cfs_rq *cfs_rq;
8493	struct sched_entity se, parent_se;	8443	struct sched_entity *se;
8494	struct rq *rq;	8444	struct rq *rq;
8495	int i;	8445	int i;
8496		8446
@@ -8506,18 +8456,17 @@ int alloc_fair_sched_group(struct task_group tg, struct task_group parent)
8506	for_each_possible_cpu(i) {	8456	for_each_possible_cpu(i) {
8507	rq = cpu_rq(i);	8457	rq = cpu_rq(i);
8508		8458
8509	cfs_rq = kmalloc_node(sizeof(struct cfs_rq),	8459	cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
8510	GFP_KERNEL\|__GFP_ZERO, cpu_to_node(i));	8460	GFP_KERNEL, cpu_to_node(i));
8511	if (!cfs_rq)	8461	if (!cfs_rq)
8512	goto err;	8462	goto err;
8513		8463
8514	se = kmalloc_node(sizeof(struct sched_entity),	8464	se = kzalloc_node(sizeof(struct sched_entity),
8515	GFP_KERNEL\|__GFP_ZERO, cpu_to_node(i));	8465	GFP_KERNEL, cpu_to_node(i));
8516	if (!se)	8466	if (!se)
8517	goto err;	8467	goto err;
8518		8468
8519	parent_se = parent ? parent->se[i] : NULL;	8469	init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
8520	init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
8521	}	8470	}
8522		8471
8523	return 1;	8472	return 1;
@@ -8578,7 +8527,7 @@ static
8578	int alloc_rt_sched_group(struct task_group tg, struct task_group parent)	8527	int alloc_rt_sched_group(struct task_group tg, struct task_group parent)
8579	{	8528	{
8580	struct rt_rq *rt_rq;	8529	struct rt_rq *rt_rq;
8581	struct sched_rt_entity rt_se, parent_se;	8530	struct sched_rt_entity *rt_se;
8582	struct rq *rq;	8531	struct rq *rq;
8583	int i;	8532	int i;
8584		8533
@@ -8595,18 +8544,17 @@ int alloc_rt_sched_group(struct task_group tg, struct task_group parent)
8595	for_each_possible_cpu(i) {	8544	for_each_possible_cpu(i) {
8596	rq = cpu_rq(i);	8545	rq = cpu_rq(i);
8597		8546
8598	rt_rq = kmalloc_node(sizeof(struct rt_rq),	8547	rt_rq = kzalloc_node(sizeof(struct rt_rq),
8599	GFP_KERNEL\|__GFP_ZERO, cpu_to_node(i));	8548	GFP_KERNEL, cpu_to_node(i));
8600	if (!rt_rq)	8549	if (!rt_rq)
8601	goto err;	8550	goto err;
8602		8551
8603	rt_se = kmalloc_node(sizeof(struct sched_rt_entity),	8552	rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
8604	GFP_KERNEL\|__GFP_ZERO, cpu_to_node(i));	8553	GFP_KERNEL, cpu_to_node(i));
8605	if (!rt_se)	8554	if (!rt_se)
8606	goto err;	8555	goto err;
8607		8556
8608	parent_se = parent ? parent->rt_se[i] : NULL;	8557	init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
8609	init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
8610	}	8558	}
8611		8559
8612	return 1;	8560	return 1;
@@ -9249,11 +9197,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
9249	* (balbir@in.ibm.com).	9197	* (balbir@in.ibm.com).
9250	*/	9198	*/
9251		9199
9252	/* track cpu usage of a group of tasks */	9200	/* track cpu usage of a group of tasks and its child groups */
9253	struct cpuacct {	9201	struct cpuacct {
9254	struct cgroup_subsys_state css;	9202	struct cgroup_subsys_state css;
9255	/* cpuusage holds pointer to a u64-type object on every cpu */	9203	/* cpuusage holds pointer to a u64-type object on every cpu */
9256	u64 *cpuusage;	9204	u64 *cpuusage;
		9205	struct cpuacct *parent;
9257	};	9206	};
9258		9207
9259	struct cgroup_subsys cpuacct_subsys;	9208	struct cgroup_subsys cpuacct_subsys;
@@ -9287,6 +9236,9 @@ static struct cgroup_subsys_state *cpuacct_create(
9287	return ERR_PTR(-ENOMEM);	9236	return ERR_PTR(-ENOMEM);
9288	}	9237	}
9289		9238
		9239	if (cgrp->parent)
		9240	ca->parent = cgroup_ca(cgrp->parent);
		9241
9290	return &ca->css;	9242	return &ca->css;
9291	}	9243	}
9292		9244
@@ -9366,14 +9318,16 @@ static int cpuacct_populate(struct cgroup_subsys ss, struct cgroup cgrp)
9366	static void cpuacct_charge(struct task_struct *tsk, u64 cputime)	9318	static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
9367	{	9319	{
9368	struct cpuacct *ca;	9320	struct cpuacct *ca;
		9321	int cpu;
9369		9322
9370	if (!cpuacct_subsys.active)	9323	if (!cpuacct_subsys.active)
9371	return;	9324	return;
9372		9325
		9326	cpu = task_cpu(tsk);
9373	ca = task_ca(tsk);	9327	ca = task_ca(tsk);
9374	if (ca) {
9375	u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
9376		9328
		9329	for (; ca; ca = ca->parent) {
		9330	u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
9377	*cpuusage += cputime;	9331	*cpuusage += cputime;
9378	}	9332	}
9379	}	9333	}