14 files changed, 205 insertions, 184 deletions
diff --git a/Documentation/controllers/cpuacct.txt b/Documentation/controllers/cpuacct.txt
new file mode 100644
index 000000000000..bb775fbe43d7
--- /dev/null
+++ b/Documentation/controllers/cpuacct.txt
@@ -0,0 +1,32 @@
+CPU Accounting Controller
+-------------------------
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these groups of tasks.
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of its child groups and the tasks
+directly present in its group.
+Accounting groups can be created by first mounting the cgroup filesystem.
+# mkdir /cgroups
+# mount -t cgroup -ocpuacct none /cgroups
+With the above step, the initial or the parent accounting group
+becomes visible at /cgroups. At bootup, this group includes all the
+tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
+/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
+this group which is essentially the CPU time obtained by all the tasks
+in the system.
+New accounting groups can be created under the parent group /cgroups.
+# cd /cgroups
+# mkdir g1
+# echo $$ > g1
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and its children
+can be obtained from g1/cpuacct.usage and the same is accumulated in
+/cgroups/cpuacct.usage also.
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
index 941615a9769b..d43dbcbd163b 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.txt
@@ -8,7 +8,7 @@ Context switch
 By default, the switch_to arch function is called with the runqueue
 locked. This is usually not a problem unless switch_to may need to
 take the runqueue lock. This is usually due to a wake up operation in
-the context switch. See include/asm-ia64/system.h for an example.
+the context switch. See arch/ia64/include/asm/system.h for an example.
 To request the scheduler call switch_to with the runqueue unlocked,
 you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
@@ -23,7 +23,7 @@ disabled. Interrupts may be enabled over the call if it is likely to
 introduce a significant interrupt latency by adding the line
 `#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
 unlocked context switches. This define also implies
-`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an
+`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an
 example.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6bd91ed7cd03..7fa8f615ba6e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -99,7 +99,7 @@ config GENERIC_IOMAP
        bool
        default y
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
        bool
        default y
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index dbaed4a63815..29047d5c259a 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY
        bool
        default y
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
        bool
        default y
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f4af967a6b30..a5255e7c79e0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE
        bool
        default y
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
        bool
        default y
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 525c13a4de93..adb23ea1c1ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,7 +141,7 @@ config GENERIC_NVRAM
        bool
        default y if PPC32
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
        bool
        default y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..7b7d2764a215 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -367,10 +367,10 @@ config X86_RDC321X
          as R-8610-(G).
          If you don't have one of these chips, you should say N here.
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
        def_bool y
        prompt "Single-depth WCHAN output"
-        depends on X86_32
+        depends on X86
        help
          Calculate simpler /proc/<PID>/wchan values. If this option
          is disabled then wchan values will recurse back to the
diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h
index 70a57c8c002b..c980f5ba8de7 100644
--- a/include/asm-m32r/system.h
+++ b/include/asm-m32r/system.h
@@ -23,7 +23,7 @@
 */
 #if defined(CONFIG_FRAME_POINTER) || \
-        !defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER)
+        !defined(CONFIG_SCHED_OMIT_FRAME_POINTER)
 #define M32R_PUSH_FP "  push fp\n"
 #define M32R_POP_FP  "  pop  fp\n"
 #else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 644ffbda17ca..d8733f07d80b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -259,8 +259,6 @@ static inline int select_nohz_load_balancer(int cpu)
 }
 #endif
-extern unsigned long rt_needs_cpu(int cpu);
 /*
 * Only dump TASK_* tasks. (0 for all tasks)
 */
@@ -2220,6 +2218,7 @@ extern void normalize_rt_tasks(void);
 extern struct task_group init_task_group;
 #ifdef CONFIG_USER_SCHED
 extern struct task_group root_task_group;
+extern void set_tg_uid(struct user_struct *user);
 #endif
 extern struct task_group *sched_create_group(struct task_group *parent);
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad003b19d..6a212b842d86 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -19,7 +19,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
-CFLAGS_REMOVE_sched.o = -pg
 endif
 obj-$(CONFIG_FREEZER) += freezer.o
@@ -90,7 +89,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
-ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
+ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond
 # me.  I suspect most platforms don't need this, but until we know that for sure
diff --git a/kernel/sched.c b/kernel/sched.c
index b7480fb5c3dc..152828239ef0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -261,6 +261,10 @@ struct task_group {
        struct cgroup_subsys_state css;
 #endif
+#ifdef CONFIG_USER_SCHED
+        uid_t uid;
+#endif
 #ifdef CONFIG_FAIR_GROUP_SCHED
        /* schedulable entities of this group on each cpu */
        struct sched_entity **se;
@@ -286,6 +290,12 @@ struct task_group {
 #ifdef CONFIG_USER_SCHED
+/* Helper function to pass uid information to create_sched_user() */
+void set_tg_uid(struct user_struct *user)
+{
+        user->tg->uid = user->uid;
+}
 /*
 * Root task group.
 *      Every UID task group (including init_task_group aka UID-0) will
@@ -703,45 +713,18 @@ static __read_mostly char *sched_feat_names[] = {
 #undef SCHED_FEAT
-static int sched_feat_open(struct inode *inode, struct file *filp)
+static int sched_feat_show(struct seq_file *m, void *v)
 {
-        filp->private_data = inode->i_private;
-        return 0;
-}
-static ssize_t
-sched_feat_read(struct file *filp, char __user *ubuf,
-                size_t cnt, loff_t *ppos)
-{
-        char *buf;
-        int r = 0;
-        int len = 0;
        int i;
        for (i = 0; sched_feat_names[i]; i++) {
-                len += strlen(sched_feat_names[i]);
+                if (!(sysctl_sched_features & (1UL << i)))
-                len += 4;
+                        seq_puts(m, "NO_");
-        }
+                seq_printf(m, "%s ", sched_feat_names[i]);
-        buf = kmalloc(len + 2, GFP_KERNEL);
-        if (!buf)
-                return -ENOMEM;
-        for (i = 0; sched_feat_names[i]; i++) {
-                if (sysctl_sched_features & (1UL << i))
-                        r += sprintf(buf + r, "%s ", sched_feat_names[i]);
-                else
-                        r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
        }
+        seq_puts(m, "\n");
-        r += sprintf(buf + r, "\n");
+        return 0;
-        WARN_ON(r >= len + 2);
-        r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-        kfree(buf);
-        return r;
 }
 static ssize_t
@@ -786,10 +769,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
+static int sched_feat_open(struct inode *inode, struct file *filp)
+{
+        return single_open(filp, sched_feat_show, NULL);
+}
 static struct file_operations sched_feat_fops = {
-        .open   = sched_feat_open,
+        .open           = sched_feat_open,
-        .read   = sched_feat_read,
+        .write          = sched_feat_write,
-        .write  = sched_feat_write,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
 };
 static __init int sched_init_debug(void)
@@ -1474,27 +1464,13 @@ static void
 update_group_shares_cpu(struct task_group *tg, int cpu,
                        unsigned long sd_shares, unsigned long sd_rq_weight)
 {
-        int boost = 0;
        unsigned long shares;
        unsigned long rq_weight;
        if (!tg->se[cpu])
                return;
-        rq_weight = tg->cfs_rq[cpu]->load.weight;
+        rq_weight = tg->cfs_rq[cpu]->rq_weight;
-        /*
-         * If there are currently no tasks on the cpu pretend there is one of
-         * average load so that when a new task gets to run here it will not
-         * get delayed by group starvation.
-         */
-        if (!rq_weight) {
-                boost = 1;
-                rq_weight = NICE_0_LOAD;
-        }
-        if (unlikely(rq_weight > sd_rq_weight))
-                rq_weight = sd_rq_weight;
        /*
         *           \Sum shares * rq_weight
@@ -1502,7 +1478,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
         *               \Sum rq_weight
         *
         */
-        shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+        shares = (sd_shares * rq_weight) / sd_rq_weight;
        shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
        if (abs(shares - tg->se[cpu]->load.weight) >
@@ -1511,11 +1487,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
                unsigned long flags;
                spin_lock_irqsave(&rq->lock, flags);
-                /*
+                tg->cfs_rq[cpu]->shares = shares;
-                 * record the actual number of shares, not the boosted amount.
-                 */
-                tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
-                tg->cfs_rq[cpu]->rq_weight = rq_weight;
                __set_se_shares(tg->se[cpu], shares);
                spin_unlock_irqrestore(&rq->lock, flags);
@@ -1529,13 +1501,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
 */
 static int tg_shares_up(struct task_group *tg, void *data)
 {
-        unsigned long rq_weight = 0;
+        unsigned long weight, rq_weight = 0;
        unsigned long shares = 0;
        struct sched_domain *sd = data;
        int i;
        for_each_cpu_mask(i, sd->span) {
-                rq_weight += tg->cfs_rq[i]->load.weight;
+                /*
+                 * If there are currently no tasks on the cpu pretend there
+                 * is one of average load so that when a new task gets to
+                 * run here it will not get delayed by group starvation.
+                 */
+                weight = tg->cfs_rq[i]->load.weight;
+                if (!weight)
+                        weight = NICE_0_LOAD;
+                tg->cfs_rq[i]->rq_weight = weight;
+                rq_weight += weight;
                shares += tg->cfs_rq[i]->shares;
        }
@@ -1545,9 +1527,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
        if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
                shares = tg->shares;
-        if (!rq_weight)
-                rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
        for_each_cpu_mask(i, sd->span)
                update_group_shares_cpu(tg, i, shares, rq_weight);
@@ -1612,6 +1591,39 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
 #endif
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+        __releases(this_rq->lock)
+        __acquires(busiest->lock)
+        __acquires(this_rq->lock)
+{
+        int ret = 0;
+        if (unlikely(!irqs_disabled())) {
+                /* printk() doesn't work good under rq->lock */
+                spin_unlock(&this_rq->lock);
+                BUG_ON(1);
+        }
+        if (unlikely(!spin_trylock(&busiest->lock))) {
+                if (busiest < this_rq) {
+                        spin_unlock(&this_rq->lock);
+                        spin_lock(&busiest->lock);
+                        spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
+                        ret = 1;
+                } else
+                        spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+        }
+        return ret;
+}
+static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+        __releases(busiest->lock)
+{
+        spin_unlock(&busiest->lock);
+        lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+}
 #endif
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -2812,40 +2824,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 }
 /*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- */
-static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
-        __releases(this_rq->lock)
-        __acquires(busiest->lock)
-        __acquires(this_rq->lock)
-{
-        int ret = 0;
-        if (unlikely(!irqs_disabled())) {
-                /* printk() doesn't work good under rq->lock */
-                spin_unlock(&this_rq->lock);
-                BUG_ON(1);
-        }
-        if (unlikely(!spin_trylock(&busiest->lock))) {
-                if (busiest < this_rq) {
-                        spin_unlock(&this_rq->lock);
-                        spin_lock(&busiest->lock);
-                        spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
-                        ret = 1;
-                } else
-                        spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
-        }
-        return ret;
-}
-static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
-        __releases(busiest->lock)
-{
-        spin_unlock(&busiest->lock);
-        lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
-}
-/*
 * If dest_cpu is allowed for this process, migrate the task to it.
 * This is accomplished by forcing the cpu_allowed mask to only
 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
@@ -6126,7 +6104,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
 /*
 * Figure out where task on dead CPU should go, use force if necessary.
- * NOTE: interrupts should be disabled by the caller
 */
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
@@ -6636,28 +6613,6 @@ early_initcall(migration_init);
 #ifdef CONFIG_SCHED_DEBUG
-static inline const char *sd_level_to_string(enum sched_domain_level lvl)
-{
-        switch (lvl) {
-        case SD_LV_NONE:
-                        return "NONE";
-        case SD_LV_SIBLING:
-                        return "SIBLING";
-        case SD_LV_MC:
-                        return "MC";
-        case SD_LV_CPU:
-                        return "CPU";
-        case SD_LV_NODE:
-                        return "NODE";
-        case SD_LV_ALLNODES:
-                        return "ALLNODES";
-        case SD_LV_MAX:
-                        return "MAX";
-        }
-        return "MAX";
-}
 static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                                  cpumask_t *groupmask)
 {
@@ -6677,8 +6632,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                return -1;
        }
-        printk(KERN_CONT "span %s level %s\n",
+        printk(KERN_CONT "span %s level %s\n", str, sd->name);
-                str, sd_level_to_string(sd->level));
        if (!cpu_isset(cpu, sd->span)) {
                printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -7334,13 +7288,21 @@ struct allmasks {
 };
 #if     NR_CPUS > 128
-#define SCHED_CPUMASK_ALLOC             1
+#define SCHED_CPUMASK_DECLARE(v)        struct allmasks *v
-#define SCHED_CPUMASK_FREE(v)           kfree(v)
+static inline void sched_cpumask_alloc(struct allmasks **masks)
-#define SCHED_CPUMASK_DECLARE(v)        struct allmasks *v
+{
+        *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
+}
+static inline void sched_cpumask_free(struct allmasks *masks)
+{
+        kfree(masks);
+}
 #else
-#define SCHED_CPUMASK_ALLOC             0
+#define SCHED_CPUMASK_DECLARE(v)        struct allmasks _v, *v = &_v
-#define SCHED_CPUMASK_FREE(v)
+static inline void sched_cpumask_alloc(struct allmasks **masks)
-#define SCHED_CPUMASK_DECLARE(v)        struct allmasks _v, *v = &_v
+{ }
+static inline void sched_cpumask_free(struct allmasks *masks)
+{ }
 #endif
 #define SCHED_CPUMASK_VAR(v, a)         cpumask_t *v = (cpumask_t *) \
@@ -7416,9 +7378,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
                return -ENOMEM;
        }
-#if SCHED_CPUMASK_ALLOC
        /* get space for all scratch cpumask variables */
-        allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
+        sched_cpumask_alloc(&allmasks);
        if (!allmasks) {
                printk(KERN_WARNING "Cannot alloc cpumask array\n");
                kfree(rd);
@@ -7427,7 +7388,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #endif
                return -ENOMEM;
        }
-#endif
        tmpmask = (cpumask_t *)allmasks;
@@ -7681,13 +7642,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
                cpu_attach_domain(sd, rd, i);
        }
-        SCHED_CPUMASK_FREE((void *)allmasks);
+        sched_cpumask_free(allmasks);
        return 0;
 #ifdef CONFIG_NUMA
 error:
        free_sched_groups(cpu_map, tmpmask);
-        SCHED_CPUMASK_FREE((void *)allmasks);
+        sched_cpumask_free(allmasks);
        kfree(rd);
        return -ENOMEM;
 #endif
@@ -7751,8 +7712,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
        cpumask_t tmpmask;
        int i;
-        unregister_sched_domain_sysctl();
        for_each_cpu_mask_nr(i, *cpu_map)
                cpu_attach_domain(NULL, &def_root_domain, i);
        synchronize_sched();
@@ -7830,7 +7789,7 @@ match1:
                ndoms_cur = 0;
                doms_new = &fallback_doms;
                cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
-                dattr_new = NULL;
+                WARN_ON_ONCE(dattr_new);
        }
        /* Build new domains */
@@ -8490,7 +8449,7 @@ static
 int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 {
        struct cfs_rq *cfs_rq;
-        struct sched_entity *se, *parent_se;
+        struct sched_entity *se;
        struct rq *rq;
        int i;
@@ -8506,18 +8465,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
        for_each_possible_cpu(i) {
                rq = cpu_rq(i);
-                cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
+                cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
-                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+                                      GFP_KERNEL, cpu_to_node(i));
                if (!cfs_rq)
                        goto err;
-                se = kmalloc_node(sizeof(struct sched_entity),
+                se = kzalloc_node(sizeof(struct sched_entity),
-                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+                                  GFP_KERNEL, cpu_to_node(i));
                if (!se)
                        goto err;
-                parent_se = parent ? parent->se[i] : NULL;
+                init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
-                init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
        }
        return 1;
@@ -8578,7 +8536,7 @@ static
 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
        struct rt_rq *rt_rq;
-        struct sched_rt_entity *rt_se, *parent_se;
+        struct sched_rt_entity *rt_se;
        struct rq *rq;
        int i;
@@ -8595,18 +8553,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
        for_each_possible_cpu(i) {
                rq = cpu_rq(i);
-                rt_rq = kmalloc_node(sizeof(struct rt_rq),
+                rt_rq = kzalloc_node(sizeof(struct rt_rq),
-                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+                                     GFP_KERNEL, cpu_to_node(i));
                if (!rt_rq)
                        goto err;
-                rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
+                rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
-                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+                                     GFP_KERNEL, cpu_to_node(i));
                if (!rt_se)
                        goto err;
-                parent_se = parent ? parent->rt_se[i] : NULL;
+                init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
-                init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
        }
        return 1;
@@ -9249,11 +9206,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 * (balbir@in.ibm.com).
 */
-/* track cpu usage of a group of tasks */
+/* track cpu usage of a group of tasks and its child groups */
 struct cpuacct {
        struct cgroup_subsys_state css;
        /* cpuusage holds pointer to a u64-type object on every cpu */
        u64 *cpuusage;
+        struct cpuacct *parent;
 };
 struct cgroup_subsys cpuacct_subsys;
@@ -9287,6 +9245,9 @@ static struct cgroup_subsys_state *cpuacct_create(
                return ERR_PTR(-ENOMEM);
        }
+        if (cgrp->parent)
+                ca->parent = cgroup_ca(cgrp->parent);
        return &ca->css;
 }
@@ -9366,14 +9327,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
        struct cpuacct *ca;
+        int cpu;
        if (!cpuacct_subsys.active)
                return;
+        cpu = task_cpu(tsk);
        ca = task_ca(tsk);
-        if (ca) {
-                u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
+        for (; ca; ca = ca->parent) {
+                u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
                *cpuusage += cputime;
        }
 }
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 26ed8e3d1c15..4293cfa9681d 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -53,6 +53,40 @@ static unsigned long nsec_low(unsigned long long nsec)
 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void print_cfs_group_stats(struct seq_file *m, int cpu,
+                struct task_group *tg)
+{
+        struct sched_entity *se = tg->se[cpu];
+        if (!se)
+                return;
+#define P(F) \
+        SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
+#define PN(F) \
+        SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
+        PN(se->exec_start);
+        PN(se->vruntime);
+        PN(se->sum_exec_runtime);
+#ifdef CONFIG_SCHEDSTATS
+        PN(se->wait_start);
+        PN(se->sleep_start);
+        PN(se->block_start);
+        PN(se->sleep_max);
+        PN(se->block_max);
+        PN(se->exec_max);
+        PN(se->slice_max);
+        PN(se->wait_max);
+        PN(se->wait_sum);
+        P(se->wait_count);
+#endif
+        P(se->load.weight);
+#undef PN
+#undef P
+}
+#endif
 static void
 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 {
@@ -121,20 +155,19 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
        char path[128] = "";
-        struct cgroup *cgroup = NULL;
        struct task_group *tg = cfs_rq->tg;
-        if (tg)
+        cgroup_path(tg->css.cgroup, path, sizeof(path));
-                cgroup = tg->css.cgroup;
-        if (cgroup)
-                cgroup_path(cgroup, path, sizeof(path));
        SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
+#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
+        {
+                uid_t uid = cfs_rq->tg->uid;
+                SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid);
+        }
 #else
        SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
 #endif
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
                        SPLIT_NS(cfs_rq->exec_clock));
@@ -168,6 +201,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #ifdef CONFIG_SMP
        SEQ_printf(m, "  .%-30s: %lu\n", "shares", cfs_rq->shares);
 #endif
+        print_cfs_group_stats(m, cpu, cfs_rq->tg);
 #endif
 }
@@ -175,14 +209,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 {
 #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
        char path[128] = "";
-        struct cgroup *cgroup = NULL;
        struct task_group *tg = rt_rq->tg;
-        if (tg)
+        cgroup_path(tg->css.cgroup, path, sizeof(path));
-                cgroup = tg->css.cgroup;
-        if (cgroup)
-                cgroup_path(cgroup, path, sizeof(path));
        SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
 #else
@@ -272,7 +301,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
        u64 now = ktime_to_ns(ktime_get());
        int cpu;
-        SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n",
+        SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n",
                init_utsname()->release,
                (int)strcspn(init_utsname()->version, " "),
                init_utsname()->version);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d9ba9d5f99d6..587a16e2a8f5 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -537,13 +537,13 @@ static void update_curr_rt(struct rq *rq)
        for_each_sched_rt_entity(rt_se) {
                rt_rq = rt_rq_of_se(rt_se);
-                spin_lock(&rt_rq->rt_runtime_lock);
                if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
+                        spin_lock(&rt_rq->rt_runtime_lock);
                        rt_rq->rt_time += delta_exec;
                        if (sched_rt_runtime_exceeded(rt_rq))
                                resched_task(curr);
+                        spin_unlock(&rt_rq->rt_runtime_lock);
                }
-                spin_unlock(&rt_rq->rt_runtime_lock);
        }
 }
@@ -909,9 +909,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 /* Only try algorithms three times */
 #define RT_MAX_TRIES 3
-static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
-static void double_unlock_balance(struct rq *this_rq, struct rq *busiest);
 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
diff --git a/kernel/user.c b/kernel/user.c
index 39d6159fae43..cec2224bc9f5 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -101,6 +101,8 @@ static int sched_create_user(struct user_struct *up)
        if (IS_ERR(up->tg))
                rc = -ENOMEM;
+        set_tg_uid(up);
        return rc;
 }