14 files changed, 723 insertions, 249 deletions
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer
new file mode 100644
index 00000000000..a3bb4cb5253
--- /dev/null
+++ b/kernel/Kconfig.freezer
@@ -0,0 +1,2 @@
+config FREEZER
+        def_bool PM_SLEEP || CGROUP_FREEZER
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df7c3e..066550aa61c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
+obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_UTS_NS) += utsname.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c6e1c17e6d..046c1609606 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
        struct cg_cgroup_link *link;
        struct cg_cgroup_link *saved_link;
-        write_lock(&css_set_lock);
        hlist_del(&cg->hlist);
        css_set_count--;
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
                list_del(&link->cgrp_link_list);
                kfree(link);
        }
-        write_unlock(&css_set_lock);
 }
-static void __release_css_set(struct kref *k, int taskexit)
+static void __put_css_set(struct css_set *cg, int taskexit)
 {
        int i;
-        struct css_set *cg = container_of(k, struct css_set, ref);
+        /*
+         * Ensure that the refcount doesn't hit zero while any readers
+         * can see it. Similar to atomic_dec_and_lock(), but for an
+         * rwlock
+         */
+        if (atomic_add_unless(&cg->refcount, -1, 1))
+                return;
+        write_lock(&css_set_lock);
+        if (!atomic_dec_and_test(&cg->refcount)) {
+                write_unlock(&css_set_lock);
+                return;
+        }
        unlink_css_set(cg);
+        write_unlock(&css_set_lock);
        rcu_read_lock();
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
        kfree(cg);
 }
-static void release_css_set(struct kref *k)
-{
-        __release_css_set(k, 0);
-}
-static void release_css_set_taskexit(struct kref *k)
-{
-        __release_css_set(k, 1);
-}
 /*
 * refcounted get/put for css_set objects
 */
 static inline void get_css_set(struct css_set *cg)
 {
-        kref_get(&cg->ref);
+        atomic_inc(&cg->refcount);
 }
 static inline void put_css_set(struct css_set *cg)
 {
-        kref_put(&cg->ref, release_css_set);
+        __put_css_set(cg, 0);
 }
 static inline void put_css_set_taskexit(struct css_set *cg)
 {
-        kref_put(&cg->ref, release_css_set_taskexit);
+        __put_css_set(cg, 1);
 }
 /*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
                return NULL;
        }
-        kref_init(&res->ref);
+        atomic_set(&res->refcount, 1);
        INIT_LIST_HEAD(&res->cg_links);
        INIT_LIST_HEAD(&res->tasks);
        INIT_HLIST_NODE(&res->hlist);
@@ -870,6 +868,14 @@ static struct super_operations cgroup_ops = {
        .remount_fs = cgroup_remount,
 };
+static void init_cgroup_housekeeping(struct cgroup *cgrp)
+{
+        INIT_LIST_HEAD(&cgrp->sibling);
+        INIT_LIST_HEAD(&cgrp->children);
+        INIT_LIST_HEAD(&cgrp->css_sets);
+        INIT_LIST_HEAD(&cgrp->release_list);
+        init_rwsem(&cgrp->pids_mutex);
+}
 static void init_cgroup_root(struct cgroupfs_root *root)
 {
        struct cgroup *cgrp = &root->top_cgroup;
@@ -878,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
        root->number_of_cgroups = 1;
        cgrp->root = root;
        cgrp->top_cgroup = cgrp;
-        INIT_LIST_HEAD(&cgrp->sibling);
+        init_cgroup_housekeeping(cgrp);
-        INIT_LIST_HEAD(&cgrp->children);
-        INIT_LIST_HEAD(&cgrp->css_sets);
-        INIT_LIST_HEAD(&cgrp->release_list);
 }
 static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1728,7 +1731,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
        read_lock(&css_set_lock);
        list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
-                count += atomic_read(&link->cg->ref.refcount);
+                count += atomic_read(&link->cg->refcount);
        }
        read_unlock(&css_set_lock);
        return count;
@@ -1997,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
 * but we cannot guarantee that the information we produce is correct
 * unless we produce it entirely atomically.
 *
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here).  The struct
- * ctr_struct * is stored in file->private_data.  Its resources will
- * be freed by release() when the file is closed.  The array is used
- * to sprintf the PIDs and then used by read().
 */
-struct ctr_struct {
-        char *buf;
-        int bufsz;
-};
 /*
 * Load into 'pidarray' up to 'npids' of the tasks using cgroup
@@ -2088,42 +2082,132 @@ static int cmppid(const void *a, const void *b)
        return *(pid_t *)a - *(pid_t *)b;
 }
 /*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
+ * seq_file methods for the "tasks" file. The seq_file position is the
- * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
+ * next pid to display; the seq_file iterator is a pointer to the pid
- * count 'cnt' of how many chars would be written if buf were large enough.
+ * in the cgroup->tasks_pids array.
 */
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 {
-        int cnt = 0;
+        /*
-        int i;
+         * Initially we receive a position value that corresponds to
+         * one more than the last pid shown (or 0 on the first call or
+         * after a seek to the start). Use a binary-search to find the
+         * next pid to display, if any
+         */
+        struct cgroup *cgrp = s->private;
+        int index = 0, pid = *pos;
+        int *iter;
-        for (i = 0; i < npids; i++)
+        down_read(&cgrp->pids_mutex);
-                cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
+        if (pid) {
-        return cnt;
+                int end = cgrp->pids_length;
+                int i;
+                while (index < end) {
+                        int mid = (index + end) / 2;
+                        if (cgrp->tasks_pids[mid] == pid) {
+                                index = mid;
+                                break;
+                        } else if (cgrp->tasks_pids[mid] <= pid)
+                                index = mid + 1;
+                        else
+                                end = mid;
+                }
+        }
+        /* If we're off the end of the array, we're done */
+        if (index >= cgrp->pids_length)
+                return NULL;
+        /* Update the abstract position to be the actual pid that we found */
+        iter = cgrp->tasks_pids + index;
+        *pos = *iter;
+        return iter;
+}
+static void cgroup_tasks_stop(struct seq_file *s, void *v)
+{
+        struct cgroup *cgrp = s->private;
+        up_read(&cgrp->pids_mutex);
 }
+static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+{
+        struct cgroup *cgrp = s->private;
+        int *p = v;
+        int *end = cgrp->tasks_pids + cgrp->pids_length;
+        /*
+         * Advance to the next pid in the array. If this goes off the
+         * end, we're done
+         */
+        p++;
+        if (p >= end) {
+                return NULL;
+        } else {
+                *pos = *p;
+                return p;
+        }
+}
+static int cgroup_tasks_show(struct seq_file *s, void *v)
+{
+        return seq_printf(s, "%d\n", *(int *)v);
+}
+static struct seq_operations cgroup_tasks_seq_operations = {
+        .start = cgroup_tasks_start,
+        .stop = cgroup_tasks_stop,
+        .next = cgroup_tasks_next,
+        .show = cgroup_tasks_show,
+};
+static void release_cgroup_pid_array(struct cgroup *cgrp)
+{
+        down_write(&cgrp->pids_mutex);
+        BUG_ON(!cgrp->pids_use_count);
+        if (!--cgrp->pids_use_count) {
+                kfree(cgrp->tasks_pids);
+                cgrp->tasks_pids = NULL;
+                cgrp->pids_length = 0;
+        }
+        up_write(&cgrp->pids_mutex);
+}
+static int cgroup_tasks_release(struct inode *inode, struct file *file)
+{
+        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+        if (!(file->f_mode & FMODE_READ))
+                return 0;
+        release_cgroup_pid_array(cgrp);
+        return seq_release(inode, file);
+}
+static struct file_operations cgroup_tasks_operations = {
+        .read = seq_read,
+        .llseek = seq_lseek,
+        .write = cgroup_file_write,
+        .release = cgroup_tasks_release,
+};
 /*
- * Handle an open on 'tasks' file.  Prepare a buffer listing the
+ * Handle an open on 'tasks' file.  Prepare an array containing the
 * process id's of tasks currently attached to the cgroup being opened.
- *
- * Does not require any specific cgroup mutexes, and does not take any.
 */
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-        struct ctr_struct *ctr;
        pid_t *pidarray;
        int npids;
-        char c;
+        int retval;
+        /* Nothing to do for write-only files */
        if (!(file->f_mode & FMODE_READ))
                return 0;
-        ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
-        if (!ctr)
-                goto err0;
        /*
         * If cgroup gets more users after we read count, we won't have
         * enough space - tough.  This race is indistinguishable to the
@@ -2131,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
         * show up until sometime later on.
         */
        npids = cgroup_task_count(cgrp);
-        if (npids) {
+        pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-                pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+        if (!pidarray)
-                if (!pidarray)
+                return -ENOMEM;
-                        goto err1;
+        npids = pid_array_load(pidarray, npids, cgrp);
+        sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-                npids = pid_array_load(pidarray, npids, cgrp);
-                sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-                /* Call pid_array_to_buf() twice, first just to get bufsz */
-                ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
-                ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
-                if (!ctr->buf)
-                        goto err2;
-                ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-                kfree(pidarray);
-        } else {
-                ctr->buf = NULL;
-                ctr->bufsz = 0;
-        }
-        file->private_data = ctr;
-        return 0;
-err2:
-        kfree(pidarray);
-err1:
-        kfree(ctr);
-err0:
-        return -ENOMEM;
-}
-static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
+        /*
-                                    struct cftype *cft,
+         * Store the array in the cgroup, freeing the old
-                                    struct file *file, char __user *buf,
+         * array if necessary
-                                    size_t nbytes, loff_t *ppos)
+         */
-{
+        down_write(&cgrp->pids_mutex);
-        struct ctr_struct *ctr = file->private_data;
+        kfree(cgrp->tasks_pids);
+        cgrp->tasks_pids = pidarray;
+        cgrp->pids_length = npids;
+        cgrp->pids_use_count++;
+        up_write(&cgrp->pids_mutex);
-        return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
+        file->f_op = &cgroup_tasks_operations;
-}
-static int cgroup_tasks_release(struct inode *unused_inode,
+        retval = seq_open(file, &cgroup_tasks_seq_operations);
-                                        struct file *file)
+        if (retval) {
-{
+                release_cgroup_pid_array(cgrp);
-        struct ctr_struct *ctr;
+                return retval;
-        if (file->f_mode & FMODE_READ) {
-                ctr = file->private_data;
-                kfree(ctr->buf);
-                kfree(ctr);
        }
+        ((struct seq_file *)file->private_data)->private = cgrp;
        return 0;
 }
@@ -2210,7 +2268,6 @@ static struct cftype files[] = {
        {
                .name = "tasks",
                .open = cgroup_tasks_open,
-                .read = cgroup_tasks_read,
                .write_u64 = cgroup_tasks_write,
                .release = cgroup_tasks_release,
                .private = FILE_TASKLIST,
@@ -2300,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
        mutex_lock(&cgroup_mutex);
-        INIT_LIST_HEAD(&cgrp->sibling);
+        init_cgroup_housekeeping(cgrp);
-        INIT_LIST_HEAD(&cgrp->children);
-        INIT_LIST_HEAD(&cgrp->css_sets);
-        INIT_LIST_HEAD(&cgrp->release_list);
        cgrp->parent = parent;
        cgrp->root = parent->root;
@@ -2495,8 +2549,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 int __init cgroup_init_early(void)
 {
        int i;
-        kref_init(&init_css_set.ref);
+        atomic_set(&init_css_set.refcount, 1);
-        kref_get(&init_css_set.ref);
        INIT_LIST_HEAD(&init_css_set.cg_links);
        INIT_LIST_HEAD(&init_css_set.tasks);
        INIT_HLIST_NODE(&init_css_set.hlist);
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
index c3dc3aba4c0..daca6209202 100644
--- a/kernel/cgroup_debug.c
+++ b/kernel/cgroup_debug.c
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
        u64 count;
        rcu_read_lock();
-        count = atomic_read(&current->cgroups->ref.refcount);
+        count = atomic_read(&current->cgroups->refcount);
        rcu_read_unlock();
        return count;
 }
@@ -90,7 +90,7 @@ static struct cftype files[] =  {
        {
                .name = "releasable",
                .read_u64 = releasable_read,
-        }
+        },
 };
 static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
new file mode 100644
index 00000000000..e9505695449
--- /dev/null
+++ b/kernel/cgroup_freezer.c
@@ -0,0 +1,379 @@
+/*
+ * cgroup_freezer.c -  control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+enum freezer_state {
+        CGROUP_THAWED = 0,
+        CGROUP_FREEZING,
+        CGROUP_FROZEN,
+};
+struct freezer {
+        struct cgroup_subsys_state css;
+        enum freezer_state state;
+        spinlock_t lock; /* protects _writes_ to state */
+};
+static inline struct freezer *cgroup_freezer(
+                struct cgroup *cgroup)
+{
+        return container_of(
+                cgroup_subsys_state(cgroup, freezer_subsys_id),
+                struct freezer, css);
+}
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+        return container_of(task_subsys_state(task, freezer_subsys_id),
+                            struct freezer, css);
+}
+int cgroup_frozen(struct task_struct *task)
+{
+        struct freezer *freezer;
+        enum freezer_state state;
+        task_lock(task);
+        freezer = task_freezer(task);
+        state = freezer->state;
+        task_unlock(task);
+        return state == CGROUP_FROZEN;
+}
+/*
+ * cgroups_write_string() limits the size of freezer state strings to
+ * CGROUP_LOCAL_BUFFER_SIZE
+ */
+static const char *freezer_state_strs[] = {
+        "THAWED",
+        "FREEZING",
+        "FROZEN",
+};
+/*
+ * State diagram
+ * Transitions are caused by userspace writes to the freezer.state file.
+ * The values in parenthesis are state labels. The rest are edge labels.
+ *
+ * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
+ *    ^ ^                    |                     |
+ *    | \_______THAWED_______/                     |
+ *    \__________________________THAWED____________/
+ */
+struct cgroup_subsys freezer_subsys;
+/* Locks taken and their ordering
+ * ------------------------------
+ * css_set_lock
+ * cgroup_mutex (AKA cgroup_lock)
+ * task->alloc_lock (AKA task_lock)
+ * freezer->lock
+ * task->sighand->siglock
+ *
+ * cgroup code forces css_set_lock to be taken before task->alloc_lock
+ *
+ * freezer_create(), freezer_destroy():
+ * cgroup_mutex [ by cgroup core ]
+ *
+ * can_attach():
+ * cgroup_mutex
+ *
+ * cgroup_frozen():
+ * task->alloc_lock (to get task's cgroup)
+ *
+ * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
+ * task->alloc_lock (to get task's cgroup)
+ * freezer->lock
+ *  sighand->siglock (if the cgroup is freezing)
+ *
+ * freezer_read():
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *
+ * freezer_write() (freeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    sighand->siglock
+ *
+ * freezer_write() (unfreeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock (to prevent races with freeze_task())
+ *     sighand->siglock
+ */
+static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
+                                                  struct cgroup *cgroup)
+{
+        struct freezer *freezer;
+        freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+        if (!freezer)
+                return ERR_PTR(-ENOMEM);
+        spin_lock_init(&freezer->lock);
+        freezer->state = CGROUP_THAWED;
+        return &freezer->css;
+}
+static void freezer_destroy(struct cgroup_subsys *ss,
+                            struct cgroup *cgroup)
+{
+        kfree(cgroup_freezer(cgroup));
+}
+/* Task is frozen or will freeze immediately when next it gets woken */
+static bool is_task_frozen_enough(struct task_struct *task)
+{
+        return frozen(task) ||
+                (task_is_stopped_or_traced(task) && freezing(task));
+}
+/*
+ * The call to cgroup_lock() in the freezer.state write method prevents
+ * a write to that file racing against an attach, and hence the
+ * can_attach() result will remain valid until the attach completes.
+ */
+static int freezer_can_attach(struct cgroup_subsys *ss,
+                              struct cgroup *new_cgroup,
+                              struct task_struct *task)
+{
+        struct freezer *freezer;
+        int retval;
+        /* Anything frozen can't move or be moved to/from */
+        if (is_task_frozen_enough(task))
+                return -EBUSY;
+        freezer = cgroup_freezer(new_cgroup);
+        if (freezer->state == CGROUP_FROZEN)
+                return -EBUSY;
+        retval = 0;
+        task_lock(task);
+        freezer = task_freezer(task);
+        if (freezer->state == CGROUP_FROZEN)
+                retval = -EBUSY;
+        task_unlock(task);
+        return retval;
+}
+static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
+{
+        struct freezer *freezer;
+        task_lock(task);
+        freezer = task_freezer(task);
+        task_unlock(task);
+        BUG_ON(freezer->state == CGROUP_FROZEN);
+        spin_lock_irq(&freezer->lock);
+        /* Locking avoids race with FREEZING -> THAWED transitions. */
+        if (freezer->state == CGROUP_FREEZING)
+                freeze_task(task, true);
+        spin_unlock_irq(&freezer->lock);
+}
+/*
+ * caller must hold freezer->lock
+ */
+static void update_freezer_state(struct cgroup *cgroup,
+                                 struct freezer *freezer)
+{
+        struct cgroup_iter it;
+        struct task_struct *task;
+        unsigned int nfrozen = 0, ntotal = 0;
+        cgroup_iter_start(cgroup, &it);
+        while ((task = cgroup_iter_next(cgroup, &it))) {
+                ntotal++;
+                if (is_task_frozen_enough(task))
+                        nfrozen++;
+        }
+        /*
+         * Transition to FROZEN when no new tasks can be added ensures
+         * that we never exist in the FROZEN state while there are unfrozen
+         * tasks.
+         */
+        if (nfrozen == ntotal)
+                freezer->state = CGROUP_FROZEN;
+        else if (nfrozen > 0)
+                freezer->state = CGROUP_FREEZING;
+        else
+                freezer->state = CGROUP_THAWED;
+        cgroup_iter_end(cgroup, &it);
+}
+static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
+                        struct seq_file *m)
+{
+        struct freezer *freezer;
+        enum freezer_state state;
+        if (!cgroup_lock_live_group(cgroup))
+                return -ENODEV;
+        freezer = cgroup_freezer(cgroup);
+        spin_lock_irq(&freezer->lock);
+        state = freezer->state;
+        if (state == CGROUP_FREEZING) {
+                /* We change from FREEZING to FROZEN lazily if the cgroup was
+                 * only partially frozen when we exitted write. */
+                update_freezer_state(cgroup, freezer);
+                state = freezer->state;
+        }
+        spin_unlock_irq(&freezer->lock);
+        cgroup_unlock();
+        seq_puts(m, freezer_state_strs[state]);
+        seq_putc(m, '\n');
+        return 0;
+}
+static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+        struct cgroup_iter it;
+        struct task_struct *task;
+        unsigned int num_cant_freeze_now = 0;
+        freezer->state = CGROUP_FREEZING;
+        cgroup_iter_start(cgroup, &it);
+        while ((task = cgroup_iter_next(cgroup, &it))) {
+                if (!freeze_task(task, true))
+                        continue;
+                if (is_task_frozen_enough(task))
+                        continue;
+                if (!freezing(task) && !freezer_should_skip(task))
+                        num_cant_freeze_now++;
+        }
+        cgroup_iter_end(cgroup, &it);
+        return num_cant_freeze_now ? -EBUSY : 0;
+}
+static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+        struct cgroup_iter it;
+        struct task_struct *task;
+        cgroup_iter_start(cgroup, &it);
+        while ((task = cgroup_iter_next(cgroup, &it))) {
+                int do_wake;
+                task_lock(task);
+                do_wake = __thaw_process(task);
+                task_unlock(task);
+                if (do_wake)
+                        wake_up_process(task);
+        }
+        cgroup_iter_end(cgroup, &it);
+        freezer->state = CGROUP_THAWED;
+        return 0;
+}
+static int freezer_change_state(struct cgroup *cgroup,
+                                enum freezer_state goal_state)
+{
+        struct freezer *freezer;
+        int retval = 0;
+        freezer = cgroup_freezer(cgroup);
+        spin_lock_irq(&freezer->lock);
+        update_freezer_state(cgroup, freezer);
+        if (goal_state == freezer->state)
+                goto out;
+        switch (freezer->state) {
+        case CGROUP_THAWED:
+                retval = try_to_freeze_cgroup(cgroup, freezer);
+                break;
+        case CGROUP_FREEZING:
+                if (goal_state == CGROUP_FROZEN) {
+                        /* Userspace is retrying after
+                         * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
+                        retval = try_to_freeze_cgroup(cgroup, freezer);
+                        break;
+                }
+                /* state == FREEZING and goal_state == THAWED, so unfreeze */
+        case CGROUP_FROZEN:
+                retval = unfreeze_cgroup(cgroup, freezer);
+                break;
+        default:
+                break;
+        }
+out:
+        spin_unlock_irq(&freezer->lock);
+        return retval;
+}
+static int freezer_write(struct cgroup *cgroup,
+                         struct cftype *cft,
+                         const char *buffer)
+{
+        int retval;
+        enum freezer_state goal_state;
+        if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
+                goal_state = CGROUP_THAWED;
+        else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
+                goal_state = CGROUP_FROZEN;
+        else
+                return -EIO;
+        if (!cgroup_lock_live_group(cgroup))
+                return -ENODEV;
+        retval = freezer_change_state(cgroup, goal_state);
+        cgroup_unlock();
+        return retval;
+}
+static struct cftype files[] = {
+        {
+                .name = "state",
+                .read_seq_string = freezer_read,
+                .write_string = freezer_write,
+        },
+};
+static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
+{
+        return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
+}
+struct cgroup_subsys freezer_subsys = {
+        .name           = "freezer",
+        .create         = freezer_create,
+        .destroy        = freezer_destroy,
+        .populate       = freezer_populate,
+        .subsys_id      = freezer_subsys_id,
+        .can_attach     = freezer_can_attach,
+        .attach         = NULL,
+        .fork           = freezer_fork,
+        .exit           = NULL,
+};
diff --git a/kernel/configs.c b/kernel/configs.c
index 4c345210ed8..abaee684ecb 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -54,9 +54,6 @@
 #ifdef CONFIG_IKCONFIG_PROC
-/**************************************************/
-/* globals and useful constants                   */
 static ssize_t
 ikconfig_read_current(struct file *file, char __user *buf,
                      size_t len, loff_t * offset)
@@ -71,9 +68,6 @@ static const struct file_operations ikconfig_file_ops = {
        .read = ikconfig_read_current,
 };
-/***************************************************/
-/* ikconfig_init: start up everything we need to */
 static int __init ikconfig_init(void)
 {
        struct proc_dir_entry *entry;
@@ -89,9 +83,6 @@ static int __init ikconfig_init(void)
        return 0;
 }
-/***************************************************/
-/* ikconfig_cleanup: clean up our mess           */
 static void __exit ikconfig_cleanup(void)
 {
        remove_proc_entry("config.gz", NULL);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index eab7bd6628e..3e00526f52e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1172,7 +1172,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 {
        struct cpuset trialcs;
        int err;
-        int cpus_nonempty, balance_flag_changed;
+        int balance_flag_changed;
        trialcs = *cs;
        if (turning_on)
@@ -1184,7 +1184,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
        if (err < 0)
                return err;
-        cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
        balance_flag_changed = (is_sched_load_balance(cs) !=
                                        is_sched_load_balance(&trialcs));
@@ -1192,7 +1191,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
        cs->flags = trialcs.flags;
        mutex_unlock(&callback_mutex);
-        if (cpus_nonempty && balance_flag_changed)
+        if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
                async_rebuild_sched_domains();
        return 0;
@@ -2437,19 +2436,15 @@ const struct file_operations proc_cpuset_operations = {
 void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
 {
        seq_printf(m, "Cpus_allowed:\t");
-        m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
+        seq_cpumask(m, &task->cpus_allowed);
-                                        task->cpus_allowed);
        seq_printf(m, "\n");
        seq_printf(m, "Cpus_allowed_list:\t");
-        m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
+        seq_cpumask_list(m, &task->cpus_allowed);
-                                        task->cpus_allowed);
        seq_printf(m, "\n");
        seq_printf(m, "Mems_allowed:\t");
-        m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
+        seq_nodemask(m, &task->mems_allowed);
-                                        task->mems_allowed);
        seq_printf(m, "\n");
        seq_printf(m, "Mems_allowed_list:\t");
-        m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
+        seq_nodemask_list(m, &task->mems_allowed);
-                                        task->mems_allowed);
        seq_printf(m, "\n");
 }
diff --git a/kernel/freezer.c b/kernel/freezer.c
new file mode 100644
index 00000000000..ba6248b323e
--- /dev/null
+++ b/kernel/freezer.c
@@ -0,0 +1,154 @@
+/*
+ * kernel/freezer.c - Function to freeze a process
+ *
+ * Originally from kernel/power/process.c
+ */
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+/*
+ * freezing is complete, mark current process as frozen
+ */
+static inline void frozen_process(void)
+{
+        if (!unlikely(current->flags & PF_NOFREEZE)) {
+                current->flags |= PF_FROZEN;
+                wmb();
+        }
+        clear_freeze_flag(current);
+}
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(void)
+{
+        /* Hmm, should we be allowed to suspend when there are realtime
+           processes around? */
+        long save;
+        task_lock(current);
+        if (freezing(current)) {
+                frozen_process();
+                task_unlock(current);
+        } else {
+                task_unlock(current);
+                return;
+        }
+        save = current->state;
+        pr_debug("%s entered refrigerator\n", current->comm);
+        spin_lock_irq(&current->sighand->siglock);
+        recalc_sigpending(); /* We sent fake signal, clean it up */
+        spin_unlock_irq(&current->sighand->siglock);
+        for (;;) {
+                set_current_state(TASK_UNINTERRUPTIBLE);
+                if (!frozen(current))
+                        break;
+                schedule();
+        }
+        pr_debug("%s left refrigerator\n", current->comm);
+        __set_current_state(save);
+}
+EXPORT_SYMBOL(refrigerator);
+static void fake_signal_wake_up(struct task_struct *p)
+{
+        unsigned long flags;
+        spin_lock_irqsave(&p->sighand->siglock, flags);
+        signal_wake_up(p, 0);
+        spin_unlock_irqrestore(&p->sighand->siglock, flags);
+}
+/**
+ *      freeze_task - send a freeze request to given task
+ *      @p: task to send the request to
+ *      @sig_only: if set, the request will only be sent if the task has the
+ *              PF_FREEZER_NOSIG flag unset
+ *      Return value: 'false', if @sig_only is set and the task has
+ *              PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ *
+ *      The freeze request is sent by setting the tasks's TIF_FREEZE flag and
+ *      either sending a fake signal to it or waking it up, depending on whether
+ *      or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
+ *      has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
+ *      TIF_FREEZE flag will not be set.
+ */
+bool freeze_task(struct task_struct *p, bool sig_only)
+{
+        /*
+         * We first check if the task is freezing and next if it has already
+         * been frozen to avoid the race with frozen_process() which first marks
+         * the task as frozen and next clears its TIF_FREEZE.
+         */
+        if (!freezing(p)) {
+                rmb();
+                if (frozen(p))
+                        return false;
+                if (!sig_only || should_send_signal(p))
+                        set_freeze_flag(p);
+                else
+                        return false;
+        }
+        if (should_send_signal(p)) {
+                if (!signal_pending(p))
+                        fake_signal_wake_up(p);
+        } else if (sig_only) {
+                return false;
+        } else {
+                wake_up_state(p, TASK_INTERRUPTIBLE);
+        }
+        return true;
+}
+void cancel_freezing(struct task_struct *p)
+{
+        unsigned long flags;
+        if (freezing(p)) {
+                pr_debug("  clean up: %s\n", p->comm);
+                clear_freeze_flag(p);
+                spin_lock_irqsave(&p->sighand->siglock, flags);
+                recalc_sigpending_and_wake(p);
+                spin_unlock_irqrestore(&p->sighand->siglock, flags);
+        }
+}
+/*
+ * Wake up a frozen process
+ *
+ * task_lock() is needed to prevent the race with refrigerator() which may
+ * occur if the freezing of tasks fails.  Namely, without the lock, if the
+ * freezing of tasks failed, thaw_tasks() might have run before a task in
+ * refrigerator() could call frozen_process(), in which case the task would be
+ * frozen and no one would thaw it.
+ */
+int __thaw_process(struct task_struct *p)
+{
+        if (frozen(p)) {
+                p->flags &= ~PF_FROZEN;
+                return 1;
+        }
+        clear_freeze_flag(p);
+        return 0;
+}
+int thaw_process(struct task_struct *p)
+{
+        task_lock(p);
+        if (__thaw_process(p) == 1) {
+                task_unlock(p);
+                wake_up_process(p);
+                return 1;
+        }
+        task_unlock(p);
+        return 0;
+}
+EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index aef265325cd..777ac458ac9 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1371,6 +1371,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_SYMBOL(node_online_map);
        VMCOREINFO_SYMBOL(swapper_pg_dir);
        VMCOREINFO_SYMBOL(_stext);
+        VMCOREINFO_SYMBOL(vmlist);
 #ifndef CONFIG_NEED_MULTIPLE_NODES
        VMCOREINFO_SYMBOL(mem_map);
@@ -1406,6 +1407,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_OFFSET(free_area, free_list);
        VMCOREINFO_OFFSET(list_head, next);
        VMCOREINFO_OFFSET(list_head, prev);
+        VMCOREINFO_OFFSET(vm_struct, addr);
        VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
        VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
        VMCOREINFO_NUMBER(NR_FREE_PAGES);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 96cff2f8710..14ec64fe175 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -171,12 +171,11 @@ EXPORT_SYMBOL(kthread_create);
 */
 void kthread_bind(struct task_struct *k, unsigned int cpu)
 {
-        if (k->state != TASK_UNINTERRUPTIBLE) {
+        /* Must have done schedule() in kthread() before we set_task_cpu */
+        if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) {
                WARN_ON(1);
                return;
        }
-        /* Must have done schedule() in kthread() before we set_task_cpu */
-        wait_task_inactive(k, 0);
        set_task_cpu(k, cpu);
        k->cpus_allowed = cpumask_of_cpu(cpu);
        k->rt.nr_cpus_allowed = 1;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 278946aecaf..ca634019497 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -28,121 +28,6 @@ static inline int freezeable(struct task_struct * p)
        return 1;
 }
-/*
- * freezing is complete, mark current process as frozen
- */
-static inline void frozen_process(void)
-{
-        if (!unlikely(current->flags & PF_NOFREEZE)) {
-                current->flags |= PF_FROZEN;
-                wmb();
-        }
-        clear_freeze_flag(current);
-}
-/* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
-{
-        /* Hmm, should we be allowed to suspend when there are realtime
-           processes around? */
-        long save;
-        task_lock(current);
-        if (freezing(current)) {
-                frozen_process();
-                task_unlock(current);
-        } else {
-                task_unlock(current);
-                return;
-        }
-        save = current->state;
-        pr_debug("%s entered refrigerator\n", current->comm);
-        spin_lock_irq(&current->sighand->siglock);
-        recalc_sigpending(); /* We sent fake signal, clean it up */
-        spin_unlock_irq(&current->sighand->siglock);
-        for (;;) {
-                set_current_state(TASK_UNINTERRUPTIBLE);
-                if (!frozen(current))
-                        break;
-                schedule();
-        }
-        pr_debug("%s left refrigerator\n", current->comm);
-        __set_current_state(save);
-}
-static void fake_signal_wake_up(struct task_struct *p)
-{
-        unsigned long flags;
-        spin_lock_irqsave(&p->sighand->siglock, flags);
-        signal_wake_up(p, 0);
-        spin_unlock_irqrestore(&p->sighand->siglock, flags);
-}
-static inline bool should_send_signal(struct task_struct *p)
-{
-        return !(p->flags & PF_FREEZER_NOSIG);
-}
-/**
- *      freeze_task - send a freeze request to given task
- *      @p: task to send the request to
- *      @sig_only: if set, the request will only be sent if the task has the
- *              PF_FREEZER_NOSIG flag unset
- *      Return value: 'false', if @sig_only is set and the task has
- *              PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
- *
- *      The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- *      either sending a fake signal to it or waking it up, depending on whether
- *      or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
- *      has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- *      TIF_FREEZE flag will not be set.
- */
-static bool freeze_task(struct task_struct *p, bool sig_only)
-{
-        /*
-         * We first check if the task is freezing and next if it has already
-         * been frozen to avoid the race with frozen_process() which first marks
-         * the task as frozen and next clears its TIF_FREEZE.
-         */
-        if (!freezing(p)) {
-                rmb();
-                if (frozen(p))
-                        return false;
-                if (!sig_only || should_send_signal(p))
-                        set_freeze_flag(p);
-                else
-                        return false;
-        }
-        if (should_send_signal(p)) {
-                if (!signal_pending(p))
-                        fake_signal_wake_up(p);
-        } else if (sig_only) {
-                return false;
-        } else {
-                wake_up_state(p, TASK_INTERRUPTIBLE);
-        }
-        return true;
-}
-static void cancel_freezing(struct task_struct *p)
-{
-        unsigned long flags;
-        if (freezing(p)) {
-                pr_debug("  clean up: %s\n", p->comm);
-                clear_freeze_flag(p);
-                spin_lock_irqsave(&p->sighand->siglock, flags);
-                recalc_sigpending_and_wake(p);
-                spin_unlock_irqrestore(&p->sighand->siglock, flags);
-        }
-}
 static int try_to_freeze_tasks(bool sig_only)
 {
        struct task_struct *g, *p;
@@ -250,6 +135,9 @@ static void thaw_tasks(bool nosig_only)
                if (nosig_only && should_send_signal(p))
                        continue;
+                if (cgroup_frozen(p))
+                        continue;
                thaw_process(p);
        } while_each_thread(g, p);
        read_unlock(&tasklist_lock);
@@ -264,4 +152,3 @@ void thaw_processes(void)
        printk("done.\n");
 }
-EXPORT_SYMBOL(refrigerator);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 356699a96d5..1e68e4c39e2 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -45,7 +45,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 * TASK_TRACED, resume it now.
 * Requires that irqs be disabled.
 */
-void ptrace_untrace(struct task_struct *child)
+static void ptrace_untrace(struct task_struct *child)
 {
        spin_lock(&child->sighand->siglock);
        if (task_is_traced(child)) {
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ca4bbbe04aa..59236e8b9da 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -54,9 +54,9 @@
 #include <linux/cpu.h>
 #include <linux/random.h>
 #include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/cpumask.h>
 #include <linux/rcupreempt_trace.h>
+#include <asm/byteorder.h>
 /*
 * PREEMPT_RCU data structures.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 617d41e4d6a..b3cc73931d1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "scan_unevictable_pages",
+                .data           = &scan_unevictable_pages,
+                .maxlen         = sizeof(scan_unevictable_pages),
+                .mode           = 0644,
+                .proc_handler   = &scan_unevictable_handler,
+        },
+#endif
 /*
 * NOTE: do not add new entries to this table unless you have read
 * Documentation/sysctl/ctl_unnumbered.txt