28 files changed, 713 insertions, 619 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 2eeea9a14240..10c4930c2bbf 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -170,7 +170,9 @@ void audit_panic(const char *message)
                        printk(KERN_ERR "audit: %s\n", message);
                break;
        case AUDIT_FAIL_PANIC:
-                panic("audit: %s\n", message);
+                /* test audit_pid since printk is always losey, why bother? */
+                if (audit_pid)
+                        panic("audit: %s\n", message);
                break;
        }
 }
@@ -352,6 +354,7 @@ static int kauditd_thread(void *dummy)
                                if (err < 0) {
                                        BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */
                                        printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
+                                        audit_log_lost("auditd dissapeared\n");
                                        audit_pid = 0;
                                }
                        } else {
@@ -1350,17 +1353,19 @@ void audit_log_end(struct audit_buffer *ab)
        if (!audit_rate_check()) {
                audit_log_lost("rate limit exceeded");
        } else {
+                struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
                if (audit_pid) {
-                        struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
                        nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0);
                        skb_queue_tail(&audit_skb_queue, ab->skb);
                        ab->skb = NULL;
                        wake_up_interruptible(&kauditd_wait);
-                } else if (printk_ratelimit()) {
+                } else if (nlh->nlmsg_type != AUDIT_EOE) {
-                        struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
+                        if (printk_ratelimit()) {
-                        printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, ab->skb->data + NLMSG_SPACE(0));
+                                printk(KERN_NOTICE "type=%d %s\n",
-                } else {
+                                        nlh->nlmsg_type,
-                        audit_log_lost("printk limit exceeded\n");
+                                        ab->skb->data + NLMSG_SPACE(0));
+                        } else
+                                audit_log_lost("printk limit exceeded\n");
                }
        }
        audit_buffer_free(ab);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ac6d9b23b018..782262e4107d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1000,9 +1000,10 @@ static int audit_log_single_execve_arg(struct audit_context *context,
         * for strings that are too long, we should not have created
         * any.
         */
-        if (unlikely((len  = -1) || len > MAX_ARG_STRLEN - 1)) {
+        if (unlikely((len == -1) || len > MAX_ARG_STRLEN - 1)) {
                WARN_ON(1);
                send_sig(SIGKILL, current, 0);
+                return -1;
        }
        /* walk the whole argument looking for non-ascii chars */
@@ -1020,6 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
                if (ret) {
                        WARN_ON(1);
                        send_sig(SIGKILL, current, 0);
+                        return -1;
                }
                buf[to_send] = '\0';
                has_cntl = audit_string_contains_control(buf, to_send);
@@ -1068,7 +1070,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
                 * so we can be sure nothing was lost.
                 */
                if ((i == 0) && (too_long))
-                        audit_log_format(*ab, "a%d_len=%ld ", arg_num,
+                        audit_log_format(*ab, "a%d_len=%zu ", arg_num,
                                         has_cntl ? 2*len : len);
                /*
@@ -1083,6 +1085,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
                if (ret) {
                        WARN_ON(1);
                        send_sig(SIGKILL, current, 0);
+                        return -1;
                }
                buf[to_send] = '\0';
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4766bb65e4d9..e9c2fb01e89b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -113,9 +113,9 @@ static int root_count;
 #define dummytop (&rootnode.top_cgroup)
 /* This flag indicates whether tasks in the fork and exit paths should
- * take callback_mutex and check for fork/exit handlers to call. This
+ * check for fork/exit handlers to call. This avoids us having to do
- * avoids us having to do extra work in the fork/exit path if none of the
+ * extra work in the fork/exit path if none of the subsystems need to
- * subsystems need to be called.
+ * be called.
 */
 static int need_forkexit_callback;
@@ -307,7 +307,6 @@ static inline void put_css_set_taskexit(struct css_set *cg)
 * template: location in which to build the desired set of subsystem
 * state objects for the new cgroup group
 */
 static struct css_set *find_existing_css_set(
        struct css_set *oldcg,
        struct cgroup *cgrp,
@@ -320,7 +319,7 @@ static struct css_set *find_existing_css_set(
        /* Built the set of subsystem state objects that we want to
         * see in the new css_set */
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-                if (root->subsys_bits & (1ull << i)) {
+                if (root->subsys_bits & (1UL << i)) {
                        /* Subsystem is in this hierarchy. So we want
                         * the subsystem state from the new
                         * cgroup */
@@ -354,7 +353,6 @@ static struct css_set *find_existing_css_set(
 * and chains them on tmp through their cgrp_link_list fields. Returns 0 on
 * success or a negative error
 */
 static int allocate_cg_links(int count, struct list_head *tmp)
 {
        struct cg_cgroup_link *link;
@@ -396,7 +394,6 @@ static void free_cg_links(struct list_head *tmp)
 * substituted into the appropriate hierarchy. Must be called with
 * cgroup_mutex held
 */
 static struct css_set *find_css_set(
        struct css_set *oldcg, struct cgroup *cgrp)
 {
@@ -473,7 +470,6 @@ static struct css_set *find_css_set(
        /* Link this cgroup group into the list */
        list_add(&res->list, &init_css_set.list);
        css_set_count++;
-        INIT_LIST_HEAD(&res->tasks);
        write_unlock(&css_set_lock);
        return res;
@@ -507,8 +503,8 @@ static struct css_set *find_css_set(
 * critical pieces of code here.  The exception occurs on cgroup_exit(),
 * when a task in a notify_on_release cgroup exits.  Then cgroup_mutex
 * is taken, and if the cgroup count is zero, a usermode call made
- * to /sbin/cgroup_release_agent with the name of the cgroup (path
+ * to the release agent with the name of the cgroup (path relative to
- * relative to the root of cgroup file system) as the argument.
+ * the root of cgroup file system) as the argument.
 *
 * A cgroup can only be deleted if both its 'count' of using tasks
 * is zero, and its list of 'children' cgroups is empty.  Since all
@@ -521,7 +517,7 @@ static struct css_set *find_css_set(
 *
 * The need for this exception arises from the action of
 * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
- * another.  It does so using cgroup_mutexe, however there are
+ * another.  It does so using cgroup_mutex, however there are
 * several performance critical places that need to reference
 * task->cgroup without the expense of grabbing a system global
 * mutex.  Therefore except as noted below, when dereferencing or, as
@@ -537,7 +533,6 @@ static struct css_set *find_css_set(
 * cgroup_lock - lock out any changes to cgroup structures
 *
 */
 void cgroup_lock(void)
 {
        mutex_lock(&cgroup_mutex);
@@ -548,7 +543,6 @@ void cgroup_lock(void)
 *
 * Undo the lock taken in a previous cgroup_lock() call.
 */
 void cgroup_unlock(void)
 {
        mutex_unlock(&cgroup_mutex);
@@ -590,7 +584,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
 * Call subsys's pre_destroy handler.
 * This is called before css refcnt check.
 */
 static void cgroup_call_pre_destroy(struct cgroup *cgrp)
 {
        struct cgroup_subsys *ss;
@@ -600,7 +593,6 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp)
        return;
 }
 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 {
        /* is dentry a directory ? if so, kfree() associated cgroup */
@@ -696,7 +688,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
        added_bits = final_bits & ~root->actual_subsys_bits;
        /* Check that any added subsystems are currently free */
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-                unsigned long long bit = 1ull << i;
+                unsigned long bit = 1UL << i;
                struct cgroup_subsys *ss = subsys[i];
                if (!(bit & added_bits))
                        continue;
@@ -927,7 +919,6 @@ static int cgroup_get_rootdir(struct super_block *sb)
        if (!inode)
                return -ENOMEM;
-        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        inode->i_op = &cgroup_dir_inode_operations;
        /* directories start off with i_nlink == 2 (for "." entry) */
@@ -961,8 +952,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
        }
        root = kzalloc(sizeof(*root), GFP_KERNEL);
-        if (!root)
+        if (!root) {
+                if (opts.release_agent)
+                        kfree(opts.release_agent);
                return -ENOMEM;
+        }
        init_cgroup_root(root);
        root->subsys_bits = opts.subsys_bits;
@@ -1129,8 +1123,13 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
        return dentry->d_fsdata;
 }
-/*
+/**
- * Called with cgroup_mutex held.  Writes path of cgroup into buf.
+ * cgroup_path - generate the path of a cgroup
+ * @cgrp: the cgroup in question
+ * @buf: the buffer to write the path into
+ * @buflen: the length of the buffer
+ *
+ * Called with cgroup_mutex held. Writes path of cgroup into buf.
 * Returns 0 on success, -errno on error.
 */
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
@@ -1188,11 +1187,13 @@ static void get_first_subsys(const struct cgroup *cgrp,
                *subsys_id = test_ss->subsys_id;
 }
-/*
+/**
- * Attach task 'tsk' to cgroup 'cgrp'
+ * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
+ * @cgrp: the cgroup the task is attaching to
+ * @tsk: the task to be attached
 *
- * Call holding cgroup_mutex.  May take task_lock of
+ * Call holding cgroup_mutex. May take task_lock of
- * the task 'pid' during call.
+ * the task 'tsk' during call.
 */
 int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
@@ -1293,7 +1294,6 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
 }
 /* The various types of files and directories in a cgroup file system */
 enum cgroup_filetype {
        FILE_ROOT,
        FILE_DIR,
@@ -1584,12 +1584,11 @@ static int cgroup_create_file(struct dentry *dentry, int mode,
 }
 /*
- *      cgroup_create_dir - create a directory for an object.
+ * cgroup_create_dir - create a directory for an object.
- *      cgrp:   the cgroup we create the directory for.
+ * @cgrp: the cgroup we create the directory for. It must have a valid
- *              It must have a valid ->parent field
+ *        ->parent field. And we are going to fill its ->dentry field.
- *              And we are going to fill its ->dentry field.
+ * @dentry: dentry of the new cgroup
- *      dentry: dentry of the new cgroup
+ * @mode: mode to set on new directory.
- *      mode:   mode to set on new directory.
 */
 static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
                                int mode)
@@ -1651,8 +1650,12 @@ int cgroup_add_files(struct cgroup *cgrp,
        return 0;
 }
-/* Count the number of tasks in a cgroup. */
+/**
+ * cgroup_task_count - count the number of tasks in a cgroup.
+ * @cgrp: the cgroup in question
+ *
+ * Return the number of tasks in the cgroup.
+ */
 int cgroup_task_count(const struct cgroup *cgrp)
 {
        int count = 0;
@@ -1962,12 +1965,13 @@ static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
 }
 /**
- * Build and fill cgroupstats so that taskstats can export it to user
+ * cgroupstats_build - build and fill cgroupstats
- * space.
- *
 * @stats: cgroupstats to fill information into
 * @dentry: A dentry entry belonging to the cgroup for which stats have
 * been requested.
+ *
+ * Build and fill cgroupstats so that taskstats can export it to user
+ * space.
 */
 int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 {
@@ -2199,14 +2203,13 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
 }
 /*
- *      cgroup_create - create a cgroup
+ * cgroup_create - create a cgroup
- *      parent: cgroup that will be parent of the new cgroup.
+ * @parent: cgroup that will be parent of the new cgroup
- *      name:           name of the new cgroup. Will be strcpy'ed.
+ * @dentry: dentry of the new cgroup
- *      mode:           mode to set on new inode
+ * @mode: mode to set on new inode
 *
- *      Must be called with the mutex on the parent inode held
+ * Must be called with the mutex on the parent inode held
 */
 static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
                             int mode)
 {
@@ -2229,7 +2232,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
        mutex_lock(&cgroup_mutex);
-        cgrp->flags = 0;
        INIT_LIST_HEAD(&cgrp->sibling);
        INIT_LIST_HEAD(&cgrp->children);
        INIT_LIST_HEAD(&cgrp->css_sets);
@@ -2239,6 +2241,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
        cgrp->root = parent->root;
        cgrp->top_cgroup = parent->top_cgroup;
+        if (notify_on_release(parent))
+                set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
        for_each_subsys(root, ss) {
                struct cgroup_subsys_state *css = ss->create(ss, cgrp);
                if (IS_ERR(css)) {
@@ -2349,13 +2354,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
        parent = cgrp->parent;
        root = cgrp->root;
        sb = root->sb;
        /*
-         * Call pre_destroy handlers of subsys
+         * Call pre_destroy handlers of subsys. Notify subsystems
+         * that rmdir() request comes.
         */
        cgroup_call_pre_destroy(cgrp);
-        /*
-         * Notify subsyses that rmdir() request comes.
-         */
        if (cgroup_has_css_refs(cgrp)) {
                mutex_unlock(&cgroup_mutex);
@@ -2431,8 +2435,10 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss)
 }
 /**
- * cgroup_init_early - initialize cgroups at system boot, and
+ * cgroup_init_early - cgroup initialization at system boot
- * initialize any subsystems that request early init.
+ *
+ * Initialize cgroups at system boot, and initialize any
+ * subsystems that request early init.
 */
 int __init cgroup_init_early(void)
 {
@@ -2474,8 +2480,10 @@ int __init cgroup_init_early(void)
 }
 /**
- * cgroup_init - register cgroup filesystem and /proc file, and
+ * cgroup_init - cgroup initialization
- * initialize any subsystems that didn't request early init.
+ *
+ * Register cgroup filesystem and /proc file, and initialize
+ * any subsystems that didn't request early init.
 */
 int __init cgroup_init(void)
 {
@@ -2618,7 +2626,7 @@ static struct file_operations proc_cgroupstats_operations = {
 /**
 * cgroup_fork - attach newly forked task to its parents cgroup.
- * @tsk: pointer to task_struct of forking parent process.
+ * @child: pointer to task_struct of forking parent process.
 *
 * Description: A task inherits its parent's cgroup at fork().
 *
@@ -2642,9 +2650,12 @@ void cgroup_fork(struct task_struct *child)
 }
 /**
- * cgroup_fork_callbacks - called on a new task very soon before
+ * cgroup_fork_callbacks - run fork callbacks
- * adding it to the tasklist. No need to take any locks since no-one
+ * @child: the new task
- * can be operating on this task
+ *
+ * Called on a new task very soon before adding it to the
+ * tasklist. No need to take any locks since no-one can
+ * be operating on this task.
 */
 void cgroup_fork_callbacks(struct task_struct *child)
 {
@@ -2659,11 +2670,14 @@ void cgroup_fork_callbacks(struct task_struct *child)
 }
 /**
- * cgroup_post_fork - called on a new task after adding it to the
+ * cgroup_post_fork - called on a new task after adding it to the task list
- * task list. Adds the task to the list running through its css_set
+ * @child: the task in question
- * if necessary. Has to be after the task is visible on the task list
+ *
- * in case we race with the first call to cgroup_iter_start() - to
+ * Adds the task to the list running through its css_set if necessary.
- * guarantee that the new task ends up on its list. */
+ * Has to be after the task is visible on the task list in case we race
+ * with the first call to cgroup_iter_start() - to guarantee that the
+ * new task ends up on its list.
+ */
 void cgroup_post_fork(struct task_struct *child)
 {
        if (use_task_css_set_links) {
@@ -2676,6 +2690,7 @@ void cgroup_post_fork(struct task_struct *child)
 /**
 * cgroup_exit - detach cgroup from exiting task
 * @tsk: pointer to task_struct of exiting process
+ * @run_callback: run exit callbacks?
 *
 * Description: Detach cgroup from @tsk and release it.
 *
@@ -2706,7 +2721,6 @@ void cgroup_post_fork(struct task_struct *child)
 *    top_cgroup isn't going away, and either task has PF_EXITING set,
 *    which wards off any cgroup_attach_task() attempts, or task is a failed
 *    fork, never visible to cgroup_attach_task.
- *
 */
 void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 {
@@ -2743,9 +2757,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 }
 /**
- * cgroup_clone - duplicate the current cgroup in the hierarchy
+ * cgroup_clone - clone the cgroup the given subsystem is attached to
- * that the given subsystem is attached to, and move this task into
+ * @tsk: the task to be moved
- * the new child
+ * @subsys: the given subsystem
+ *
+ * Duplicate the current cgroup in the hierarchy that the given
+ * subsystem is attached to, and move this task into the new
+ * child.
 */
 int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
 {
@@ -2858,9 +2876,12 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
        return ret;
 }
-/*
+/**
- * See if "cgrp" is a descendant of the current task's cgroup in
+ * cgroup_is_descendant - see if @cgrp is a descendant of current task's cgrp
- * the appropriate hierarchy
+ * @cgrp: the cgroup in question
+ *
+ * See if @cgrp is a descendant of the current task's cgroup in
+ * the appropriate hierarchy.
 *
 * If we are sending in dummytop, then presumably we are creating
 * the top cgroup in the subsystem.
@@ -2939,9 +2960,7 @@ void __css_put(struct cgroup_subsys_state *css)
 * release agent task.  We don't bother to wait because the caller of
 * this routine has no use for the exit status of the release agent
 * task, so no sense holding our caller up for that.
- *
 */
 static void cgroup_release_agent(struct work_struct *work)
 {
        BUG_ON(work != &release_agent_work);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e296ed81d4d..a1b61f414228 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -322,8 +322,8 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
 * Call without callback_mutex or task_lock() held.  May be
 * called with or without cgroup_mutex held.  Thanks in part to
 * 'the_top_cpuset_hack', the task's cpuset pointer will never
- * be NULL.  This routine also might acquire callback_mutex and
+ * be NULL.  This routine also might acquire callback_mutex during
- * current->mm->mmap_sem during call.
+ * call.
 *
 * Reading current->cpuset->mems_generation doesn't need task_lock
 * to guard the current->cpuset derefence, because it is guarded
diff --git a/kernel/exit.c b/kernel/exit.c
index 506a957b665a..53872bf993fa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -214,20 +214,19 @@ struct pid *session_of_pgrp(struct pid *pgrp)
 static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
 {
        struct task_struct *p;
-        int ret = 1;
        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
-                if (p == ignored_task
+                if ((p == ignored_task) ||
-                                || p->exit_state
+                    (p->exit_state && thread_group_empty(p)) ||
-                                || is_global_init(p->real_parent))
+                    is_global_init(p->real_parent))
                        continue;
                if (task_pgrp(p->real_parent) != pgrp &&
-                    task_session(p->real_parent) == task_session(p)) {
+                    task_session(p->real_parent) == task_session(p))
-                        ret = 0;
+                        return 0;
-                        break;
-                }
        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
-        return ret;     /* (sighing) "Often!" */
+        return 1;
 }
 int is_current_pgrp_orphaned(void)
@@ -255,6 +254,37 @@ static int has_stopped_jobs(struct pid *pgrp)
        return retval;
 }
+/*
+ * Check to see if any process groups have become orphaned as
+ * a result of our exiting, and if they have any stopped jobs,
+ * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
+ */
+static void
+kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
+{
+        struct pid *pgrp = task_pgrp(tsk);
+        struct task_struct *ignored_task = tsk;
+        if (!parent)
+                 /* exit: our father is in a different pgrp than
+                  * we are and we were the only connection outside.
+                  */
+                parent = tsk->real_parent;
+        else
+                /* reparent: our child is in a different pgrp than
+                 * we are, and it was the only connection outside.
+                 */
+                ignored_task = NULL;
+        if (task_pgrp(parent) != pgrp &&
+            task_session(parent) == task_session(tsk) &&
+            will_become_orphaned_pgrp(pgrp, ignored_task) &&
+            has_stopped_jobs(pgrp)) {
+                __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
+                __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
+        }
+}
 /**
 * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
 *
@@ -635,22 +665,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
            p->exit_signal != -1 && thread_group_empty(p))
                do_notify_parent(p, p->exit_signal);
-        /*
+        kill_orphaned_pgrp(p, father);
-         * process group orphan check
-         * Case ii: Our child is in a different pgrp
-         * than we are, and it was the only connection
-         * outside, so the child pgrp is now orphaned.
-         */
-        if ((task_pgrp(p) != task_pgrp(father)) &&
-            (task_session(p) == task_session(father))) {
-                struct pid *pgrp = task_pgrp(p);
-                if (will_become_orphaned_pgrp(pgrp, NULL) &&
-                    has_stopped_jobs(pgrp)) {
-                        __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
-                        __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
-                }
-        }
 }
 /*
@@ -735,11 +750,9 @@ static void forget_original_parent(struct task_struct *father)
 * Send signals to all our closest relatives so that they know
 * to properly mourn us..
 */
-static void exit_notify(struct task_struct *tsk)
+static void exit_notify(struct task_struct *tsk, int group_dead)
 {
        int state;
-        struct task_struct *t;
-        struct pid *pgrp;
        /*
         * This does two things:
@@ -753,25 +766,8 @@ static void exit_notify(struct task_struct *tsk)
        exit_task_namespaces(tsk);
        write_lock_irq(&tasklist_lock);
-        /*
+        if (group_dead)
-         * Check to see if any process groups have become orphaned
+                kill_orphaned_pgrp(tsk->group_leader, NULL);
-         * as a result of our exiting, and if they have any stopped
-         * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
-         *
-         * Case i: Our father is in a different pgrp than we are
-         * and we were the only connection outside, so our pgrp
-         * is about to become orphaned.
-         */
-        t = tsk->real_parent;
-        pgrp = task_pgrp(tsk);
-        if ((task_pgrp(t) != pgrp) &&
-            (task_session(t) == task_session(tsk)) &&
-            will_become_orphaned_pgrp(pgrp, tsk) &&
-            has_stopped_jobs(pgrp)) {
-                __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
-                __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
-        }
        /* Let father know we died
         *
@@ -788,8 +784,8 @@ static void exit_notify(struct task_struct *tsk)
         * the same after a fork.
         */
        if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
-            ( tsk->parent_exec_id != t->self_exec_id  ||
+            (tsk->parent_exec_id != tsk->real_parent->self_exec_id ||
-              tsk->self_exec_id != tsk->parent_exec_id)
+             tsk->self_exec_id != tsk->parent_exec_id)
            && !capable(CAP_KILL))
                tsk->exit_signal = SIGCHLD;
@@ -986,7 +982,7 @@ NORET_TYPE void do_exit(long code)
                module_put(tsk->binfmt->module);
        proc_exit_connector(tsk);
-        exit_notify(tsk);
+        exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
        mpol_free(tsk->mempolicy);
        tsk->mempolicy = NULL;
@@ -1382,7 +1378,7 @@ unlock_sig:
        if (!retval && infop)
                retval = put_user(0, &infop->si_errno);
        if (!retval && infop)
-                retval = put_user(why, &infop->si_code);
+                retval = put_user((short)why, &infop->si_code);
        if (!retval && infop)
                retval = put_user(exit_code, &infop->si_status);
        if (!retval && infop)
diff --git a/kernel/futex.c b/kernel/futex.c
index 221f2128a437..06968cd79200 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -60,6 +60,8 @@
 #include "rtmutex_common.h"
+int __read_mostly futex_cmpxchg_enabled;
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 /*
@@ -469,6 +471,8 @@ void exit_pi_state_list(struct task_struct *curr)
        struct futex_hash_bucket *hb;
        union futex_key key;
+        if (!futex_cmpxchg_enabled)
+                return;
        /*
         * We are a ZOMBIE and nobody can enqueue itself on
         * pi_state_list anymore, but we have to be careful
@@ -1870,6 +1874,8 @@ asmlinkage long
 sys_set_robust_list(struct robust_list_head __user *head,
                    size_t len)
 {
+        if (!futex_cmpxchg_enabled)
+                return -ENOSYS;
        /*
         * The kernel knows only one size for now:
         */
@@ -1894,6 +1900,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
        struct robust_list_head __user *head;
        unsigned long ret;
+        if (!futex_cmpxchg_enabled)
+                return -ENOSYS;
        if (!pid)
                head = current->robust_list;
        else {
@@ -1997,6 +2006,9 @@ void exit_robust_list(struct task_struct *curr)
        unsigned long futex_offset;
        int rc;
+        if (!futex_cmpxchg_enabled)
+                return;
        /*
         * Fetch the list head (which was registered earlier, via
         * sys_set_robust_list()):
@@ -2051,7 +2063,7 @@ void exit_robust_list(struct task_struct *curr)
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                u32 __user *uaddr2, u32 val2, u32 val3)
 {
-        int ret;
+        int ret = -ENOSYS;
        int cmd = op & FUTEX_CMD_MASK;
        struct rw_semaphore *fshared = NULL;
@@ -2083,13 +2095,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
                break;
        case FUTEX_LOCK_PI:
-                ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
+                if (futex_cmpxchg_enabled)
+                        ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
                break;
        case FUTEX_UNLOCK_PI:
-                ret = futex_unlock_pi(uaddr, fshared);
+                if (futex_cmpxchg_enabled)
+                        ret = futex_unlock_pi(uaddr, fshared);
                break;
        case FUTEX_TRYLOCK_PI:
-                ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
+                if (futex_cmpxchg_enabled)
+                        ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
                break;
        default:
                ret = -ENOSYS;
@@ -2145,8 +2160,29 @@ static struct file_system_type futex_fs_type = {
 static int __init init(void)
 {
-        int i = register_filesystem(&futex_fs_type);
+        u32 curval;
+        int i;
+        /*
+         * This will fail and we want it. Some arch implementations do
+         * runtime detection of the futex_atomic_cmpxchg_inatomic()
+         * functionality. We want to know that before we call in any
+         * of the complex code paths. Also we want to prevent
+         * registration of robust lists in that case. NULL is
+         * guaranteed to fault and we get -EFAULT on functional
+         * implementation, the non functional ones will return
+         * -ENOSYS.
+         */
+        curval = cmpxchg_futex_value_locked(NULL, 0, 0);
+        if (curval == -EFAULT)
+                futex_cmpxchg_enabled = 1;
+        for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
+                plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
+                spin_lock_init(&futex_queues[i].lock);
+        }
+        i = register_filesystem(&futex_fs_type);
        if (i)
                return i;
@@ -2156,10 +2192,6 @@ static int __init init(void)
                return PTR_ERR(futex_mnt);
        }
-        for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
-                plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
-                spin_lock_init(&futex_queues[i].lock);
-        }
        return 0;
 }
 __initcall(init);
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 7d5e4b016f39..ff90f049f8f6 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -54,6 +54,9 @@ void compat_exit_robust_list(struct task_struct *curr)
        compat_long_t futex_offset;
        int rc;
+        if (!futex_cmpxchg_enabled)
+                return;
        /*
         * Fetch the list head (which was registered earlier, via
         * sys_set_robust_list()):
@@ -115,6 +118,9 @@ asmlinkage long
 compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
                           compat_size_t len)
 {
+        if (!futex_cmpxchg_enabled)
+                return -ENOSYS;
        if (unlikely(len != sizeof(*head)))
                return -EINVAL;
@@ -130,6 +136,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
        struct compat_robust_list_head __user *head;
        unsigned long ret;
+        if (!futex_cmpxchg_enabled)
+                return -ENOSYS;
        if (!pid)
                head = current->compat_robust_list;
        else {
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index cc54c6276356..fdb3fbe2b0c4 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -246,6 +246,17 @@ static unsigned int default_startup(unsigned int irq)
 }
 /*
+ * default shutdown function
+ */
+static void default_shutdown(unsigned int irq)
+{
+        struct irq_desc *desc = irq_desc + irq;
+        desc->chip->mask(irq);
+        desc->status |= IRQ_MASKED;
+}
+/*
 * Fixup enable/disable function pointers
 */
 void irq_chip_set_defaults(struct irq_chip *chip)
@@ -256,8 +267,15 @@ void irq_chip_set_defaults(struct irq_chip *chip)
                chip->disable = default_disable;
        if (!chip->startup)
                chip->startup = default_startup;
+        /*
+         * We use chip->disable, when the user provided its own. When
+         * we have default_disable set for chip->disable, then we need
+         * to use default_shutdown, otherwise the irq line is not
+         * disabled on free_irq():
+         */
        if (!chip->shutdown)
-                chip->shutdown = chip->disable;
+                chip->shutdown = chip->disable != default_disable ?
+                        chip->disable : default_shutdown;
        if (!chip->name)
                chip->name = chip->typename;
        if (!chip->end)
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index a6b2bc831dd0..088dabbf2d6a 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -6,6 +6,7 @@
 * This file contains spurious interrupt handling.
 */
+#include <linux/jiffies.h>
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
@@ -179,7 +180,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
                 * otherwise the couter becomes a doomsday timer for otherwise
                 * working systems
                 */
-                if (jiffies - desc->last_unhandled > HZ/10)
+                if (time_after(jiffies, desc->last_unhandled + HZ/10))
                        desc->irqs_unhandled = 1;
                else
                        desc->irqs_unhandled++;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7a86e6432338..fcfb580c3afc 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -498,27 +498,36 @@ static int __kprobes in_kprobes_functions(unsigned long addr)
        return 0;
 }
+/*
+ * If we have a symbol_name argument, look it up and add the offset field
+ * to it. This way, we can specify a relative address to a symbol.
+ */
+static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
+{
+        kprobe_opcode_t *addr = p->addr;
+        if (p->symbol_name) {
+                if (addr)
+                        return NULL;
+                kprobe_lookup_name(p->symbol_name, addr);
+        }
+        if (!addr)
+                return NULL;
+        return (kprobe_opcode_t *)(((char *)addr) + p->offset);
+}
 static int __kprobes __register_kprobe(struct kprobe *p,
        unsigned long called_from)
 {
        int ret = 0;
        struct kprobe *old_p;
        struct module *probed_mod;
+        kprobe_opcode_t *addr;
-        /*
+        addr = kprobe_addr(p);
-         * If we have a symbol_name argument look it up,
+        if (!addr)
-         * and add it to the address.  That way the addr
-         * field can either be global or relative to a symbol.
-         */
-        if (p->symbol_name) {
-                if (p->addr)
-                        return -EINVAL;
-                kprobe_lookup_name(p->symbol_name, p->addr);
-        }
-        if (!p->addr)
                return -EINVAL;
-        p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset);
+        p->addr = addr;
        if (!kernel_text_address((unsigned long) p->addr) ||
            in_kprobes_functions((unsigned long) p->addr))
@@ -678,8 +687,7 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
        unregister_kprobe(&jp->kp);
 }
-#ifdef ARCH_SUPPORTS_KRETPROBES
+#ifdef CONFIG_KRETPROBES
 /*
 * This kprobe pre_handler is registered with every kretprobe. When probe
 * hits it will set up the return probe.
@@ -722,12 +730,12 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
        int ret = 0;
        struct kretprobe_instance *inst;
        int i;
-        void *addr = rp->kp.addr;
+        void *addr;
        if (kretprobe_blacklist_size) {
-                if (addr == NULL)
+                addr = kprobe_addr(&rp->kp);
-                        kprobe_lookup_name(rp->kp.symbol_name, addr);
+                if (!addr)
-                addr += rp->kp.offset;
+                        return -EINVAL;
                for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
                        if (kretprobe_blacklist[i].addr == addr)
@@ -769,8 +777,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
        return ret;
 }
-#else /* ARCH_SUPPORTS_KRETPROBES */
+#else /* CONFIG_KRETPROBES */
 int __kprobes register_kretprobe(struct kretprobe *rp)
 {
        return -ENOSYS;
@@ -781,8 +788,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 {
        return 0;
 }
+#endif /* CONFIG_KRETPROBES */
-#endif /* ARCH_SUPPORTS_KRETPROBES */
 void __kprobes unregister_kretprobe(struct kretprobe *rp)
 {
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3574379f4d62..81a4e4a3f087 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -779,6 +779,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
         * parallel walking of the hash-list safe:
         */
        list_add_tail_rcu(&class->hash_entry, hash_head);
+        /*
+         * Add it to the global list of classes:
+         */
+        list_add_tail_rcu(&class->lock_entry, &all_lock_classes);
        if (verbose(class)) {
                graph_unlock();
@@ -2282,10 +2286,6 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
                        return 0;
                break;
        case LOCK_USED:
-                /*
-                 * Add it to the global list of classes:
-                 */
-                list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
                debug_atomic_dec(&nr_unused_locks);
                break;
        default:
diff --git a/kernel/marker.c b/kernel/marker.c
index c4c2cd8b61f5..48a4ea5afffd 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -61,8 +61,8 @@ struct marker_entry {
        int refcount;   /* Number of times armed. 0 if disarmed. */
        struct rcu_head rcu;
        void *oldptr;
-        char rcu_pending:1;
+        unsigned char rcu_pending:1;
-        char ptype:1;
+        unsigned char ptype:1;
        char name[0];   /* Contains name'\0'format'\0' */
 };
@@ -698,14 +698,12 @@ int marker_probe_unregister(const char *name,
 {
        struct marker_entry *entry;
        struct marker_probe_closure *old;
-        int ret = 0;
+        int ret = -ENOENT;
        mutex_lock(&markers_mutex);
        entry = get_marker(name);
-        if (!entry) {
+        if (!entry)
-                ret = -ENOENT;
                goto end;
-        }
        if (entry->rcu_pending)
                rcu_barrier();
        old = marker_entry_remove_probe(entry, probe, probe_private);
@@ -713,12 +711,15 @@ int marker_probe_unregister(const char *name,
        marker_update_probes();         /* may update entry */
        mutex_lock(&markers_mutex);
        entry = get_marker(name);
+        if (!entry)
+                goto end;
        entry->oldptr = old;
        entry->rcu_pending = 1;
        /* write rcu_pending before calling the RCU callback */
        smp_wmb();
        call_rcu(&entry->rcu, free_old_closure);
        remove_marker(name);    /* Ignore busy error message */
+        ret = 0;
 end:
        mutex_unlock(&markers_mutex);
        return ret;
diff --git a/kernel/module.c b/kernel/module.c
index 92595bad3812..be4807fb90e4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -987,12 +987,11 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
        return ret;
 }
 /*
 * /sys/module/foo/sections stuff
 * J. Corbet <corbet@lwn.net>
 */
-#ifdef CONFIG_KALLSYMS
+#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS)
 static ssize_t module_sect_show(struct module_attribute *mattr,
                                struct module *mod, char *buf)
 {
@@ -1188,7 +1187,7 @@ static inline void add_notes_attrs(struct module *mod, unsigned int nsect,
 static inline void remove_notes_attrs(struct module *mod)
 {
 }
-#endif /* CONFIG_KALLSYMS */
+#endif
 #ifdef CONFIG_SYSFS
 int module_add_modinfo_attrs(struct module *mod)
@@ -1231,9 +1230,7 @@ void module_remove_modinfo_attrs(struct module *mod)
        }
        kfree(mod->modinfo_attrs);
 }
-#endif
-#ifdef CONFIG_SYSFS
 int mod_sysfs_init(struct module *mod)
 {
        int err;
@@ -1936,8 +1933,15 @@ static struct module *load_module(void __user *umod,
        /* Set up license info based on the info section */
        set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
+        /*
+         * ndiswrapper is under GPL by itself, but loads proprietary modules.
+         * Don't use add_taint_module(), as it would prevent ndiswrapper from
+         * using GPL-only symbols it needs.
+         */
        if (strcmp(mod->name, "ndiswrapper") == 0)
-                add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+                add_taint(TAINT_PROPRIETARY_MODULE);
+        /* driverloader was caught wrongly pretending to be under GPL */
        if (strcmp(mod->name, "driverloader") == 0)
                add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 859a8e59773a..14a656cdc652 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -391,7 +391,7 @@ int hibernation_platform_enter(void)
                goto Close;
        suspend_console();
-        error = device_suspend(PMSG_SUSPEND);
+        error = device_suspend(PMSG_HIBERNATE);
        if (error)
                goto Resume_console;
@@ -404,7 +404,7 @@ int hibernation_platform_enter(void)
                goto Finish;
        local_irq_disable();
-        error = device_power_down(PMSG_SUSPEND);
+        error = device_power_down(PMSG_HIBERNATE);
        if (!error) {
                hibernation_ops->enter();
                /* We should never get here */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7c2118f9597f..f1d0b345c9ba 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -75,22 +75,15 @@ void refrigerator(void)
        __set_current_state(save);
 }
-static void fake_signal_wake_up(struct task_struct *p, int resume)
+static void fake_signal_wake_up(struct task_struct *p)
 {
        unsigned long flags;
        spin_lock_irqsave(&p->sighand->siglock, flags);
-        signal_wake_up(p, resume);
+        signal_wake_up(p, 0);
        spin_unlock_irqrestore(&p->sighand->siglock, flags);
 }
-static void send_fake_signal(struct task_struct *p)
-{
-        if (task_is_stopped(p))
-                force_sig_specific(SIGSTOP, p);
-        fake_signal_wake_up(p, task_is_stopped(p));
-}
 static int has_mm(struct task_struct *p)
 {
        return (p->mm && !(p->flags & PF_BORROWED_MM));
@@ -121,7 +114,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only)
        if (freezing(p)) {
                if (has_mm(p)) {
                        if (!signal_pending(p))
-                                fake_signal_wake_up(p, 0);
+                                fake_signal_wake_up(p);
                } else {
                        if (with_mm_only)
                                ret = 0;
@@ -135,7 +128,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only)
                } else {
                        if (has_mm(p)) {
                                set_freeze_flag(p);
-                                send_fake_signal(p);
+                                fake_signal_wake_up(p);
                        } else {
                                if (with_mm_only) {
                                        ret = 0;
@@ -182,15 +175,17 @@ static int try_to_freeze_tasks(int freeze_user_space)
                        if (frozen(p) || !freezeable(p))
                                continue;
-                        if (task_is_traced(p) && frozen(p->parent)) {
-                                cancel_freezing(p);
-                                continue;
-                        }
                        if (!freeze_task(p, freeze_user_space))
                                continue;
-                        if (!freezer_should_skip(p))
+                        /*
+                         * Now that we've done set_freeze_flag, don't
+                         * perturb a task in TASK_STOPPED or TASK_TRACED.
+                         * It is "frozen enough".  If the task does wake
+                         * up, it will immediately call try_to_freeze.
+                         */
+                        if (!task_is_stopped_or_traced(p) &&
+                            !freezer_should_skip(p))
                                todo++;
                } while_each_thread(g, p);
                read_unlock(&tasklist_lock);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 95250d7c8d91..72a020cabb4c 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -875,8 +875,8 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
 #endif /* CONFIG_HIGHMEM */
 /**
- *      saveable - Determine whether a non-highmem page should be included in
+ *      saveable_page - Determine whether a non-highmem page should be included
- *      the suspend image.
+ *      in the suspend image.
 *
 *      We should save the page if it isn't Nosave, and is not in the range
 *      of pages statically defined as 'unsaveable', and it isn't a part of
@@ -897,7 +897,8 @@ static struct page *saveable_page(unsigned long pfn)
        if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
                return NULL;
-        if (PageReserved(page) && pfn_is_nosave(pfn))
+        if (PageReserved(page)
+            && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
                return NULL;
        return page;
@@ -938,6 +939,25 @@ static inline void do_copy_page(long *dst, long *src)
                *dst++ = *src++;
 }
+/**
+ *      safe_copy_page - check if the page we are going to copy is marked as
+ *              present in the kernel page tables (this always is the case if
+ *              CONFIG_DEBUG_PAGEALLOC is not set and in that case
+ *              kernel_page_present() always returns 'true').
+ */
+static void safe_copy_page(void *dst, struct page *s_page)
+{
+        if (kernel_page_present(s_page)) {
+                do_copy_page(dst, page_address(s_page));
+        } else {
+                kernel_map_pages(s_page, 1, 1);
+                do_copy_page(dst, page_address(s_page));
+                kernel_map_pages(s_page, 1, 0);
+        }
+}
 #ifdef CONFIG_HIGHMEM
 static inline struct page *
 page_is_saveable(struct zone *zone, unsigned long pfn)
@@ -946,8 +966,7 @@ page_is_saveable(struct zone *zone, unsigned long pfn)
                        saveable_highmem_page(pfn) : saveable_page(pfn);
 }
-static inline void
+static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
-copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
        struct page *s_page, *d_page;
        void *src, *dst;
@@ -961,29 +980,26 @@ copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
                kunmap_atomic(src, KM_USER0);
                kunmap_atomic(dst, KM_USER1);
        } else {
-                src = page_address(s_page);
                if (PageHighMem(d_page)) {
                        /* Page pointed to by src may contain some kernel
                         * data modified by kmap_atomic()
                         */
-                        do_copy_page(buffer, src);
+                        safe_copy_page(buffer, s_page);
                        dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0);
                        memcpy(dst, buffer, PAGE_SIZE);
                        kunmap_atomic(dst, KM_USER0);
                } else {
-                        dst = page_address(d_page);
+                        safe_copy_page(page_address(d_page), s_page);
-                        do_copy_page(dst, src);
                }
        }
 }
 #else
 #define page_is_saveable(zone, pfn)     saveable_page(pfn)
-static inline void
+static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
-copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-        do_copy_page(page_address(pfn_to_page(dst_pfn)),
+        safe_copy_page(page_address(pfn_to_page(dst_pfn)),
-                        page_address(pfn_to_page(src_pfn)));
+                                pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
diff --git a/kernel/printk.c b/kernel/printk.c
index bee36100f110..9adc2a473e6e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -666,7 +666,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
        }
        /* Emit the output into the temporary buffer */
        printed_len += vscnprintf(printk_buf + printed_len,
-                                  sizeof(printk_buf), fmt, args);
+                                  sizeof(printk_buf) - printed_len, fmt, args);
        /*
         * Copy the output into log_buf.  If the caller didn't provide
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 987cfb7ade89..e9517014b57c 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -23,6 +23,10 @@
 *              to Suparna Bhattacharya for pushing me completely away
 *              from atomic instructions on the read side.
 *
+ *  - Added handling of Dynamic Ticks
+ *      Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
+ *                     - Steven Rostedt <srostedt@redhat.com>
+ *
 * Papers:  http://www.rdrop.com/users/paulmck/RCU
 *
 * Design Document: http://lwn.net/Articles/253651/
@@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
        }
 }
+#ifdef CONFIG_NO_HZ
+DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
+static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
+static DEFINE_PER_CPU(int, rcu_update_flag);
+/**
+ * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
+ *
+ * If the CPU was idle with dynamic ticks active, this updates the
+ * dynticks_progress_counter to let the RCU handling know that the
+ * CPU is active.
+ */
+void rcu_irq_enter(void)
+{
+        int cpu = smp_processor_id();
+        if (per_cpu(rcu_update_flag, cpu))
+                per_cpu(rcu_update_flag, cpu)++;
+        /*
+         * Only update if we are coming from a stopped ticks mode
+         * (dynticks_progress_counter is even).
+         */
+        if (!in_interrupt() &&
+            (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
+                /*
+                 * The following might seem like we could have a race
+                 * with NMI/SMIs. But this really isn't a problem.
+                 * Here we do a read/modify/write, and the race happens
+                 * when an NMI/SMI comes in after the read and before
+                 * the write. But NMI/SMIs will increment this counter
+                 * twice before returning, so the zero bit will not
+                 * be corrupted by the NMI/SMI which is the most important
+                 * part.
+                 *
+                 * The only thing is that we would bring back the counter
+                 * to a postion that it was in during the NMI/SMI.
+                 * But the zero bit would be set, so the rest of the
+                 * counter would again be ignored.
+                 *
+                 * On return from the IRQ, the counter may have the zero
+                 * bit be 0 and the counter the same as the return from
+                 * the NMI/SMI. If the state machine was so unlucky to
+                 * see that, it still doesn't matter, since all
+                 * RCU read-side critical sections on this CPU would
+                 * have already completed.
+                 */
+                per_cpu(dynticks_progress_counter, cpu)++;
+                /*
+                 * The following memory barrier ensures that any
+                 * rcu_read_lock() primitives in the irq handler
+                 * are seen by other CPUs to follow the above
+                 * increment to dynticks_progress_counter. This is
+                 * required in order for other CPUs to correctly
+                 * determine when it is safe to advance the RCU
+                 * grace-period state machine.
+                 */
+                smp_mb(); /* see above block comment. */
+                /*
+                 * Since we can't determine the dynamic tick mode from
+                 * the dynticks_progress_counter after this routine,
+                 * we use a second flag to acknowledge that we came
+                 * from an idle state with ticks stopped.
+                 */
+                per_cpu(rcu_update_flag, cpu)++;
+                /*
+                 * If we take an NMI/SMI now, they will also increment
+                 * the rcu_update_flag, and will not update the
+                 * dynticks_progress_counter on exit. That is for
+                 * this IRQ to do.
+                 */
+        }
+}
+/**
+ * rcu_irq_exit - Called from exiting Hard irq context.
+ *
+ * If the CPU was idle with dynamic ticks active, update the
+ * dynticks_progress_counter to put let the RCU handling be
+ * aware that the CPU is going back to idle with no ticks.
+ */
+void rcu_irq_exit(void)
+{
+        int cpu = smp_processor_id();
+        /*
+         * rcu_update_flag is set if we interrupted the CPU
+         * when it was idle with ticks stopped.
+         * Once this occurs, we keep track of interrupt nesting
+         * because a NMI/SMI could also come in, and we still
+         * only want the IRQ that started the increment of the
+         * dynticks_progress_counter to be the one that modifies
+         * it on exit.
+         */
+        if (per_cpu(rcu_update_flag, cpu)) {
+                if (--per_cpu(rcu_update_flag, cpu))
+                        return;
+                /* This must match the interrupt nesting */
+                WARN_ON(in_interrupt());
+                /*
+                 * If an NMI/SMI happens now we are still
+                 * protected by the dynticks_progress_counter being odd.
+                 */
+                /*
+                 * The following memory barrier ensures that any
+                 * rcu_read_unlock() primitives in the irq handler
+                 * are seen by other CPUs to preceed the following
+                 * increment to dynticks_progress_counter. This
+                 * is required in order for other CPUs to determine
+                 * when it is safe to advance the RCU grace-period
+                 * state machine.
+                 */
+                smp_mb(); /* see above block comment. */
+                per_cpu(dynticks_progress_counter, cpu)++;
+                WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
+        }
+}
+static void dyntick_save_progress_counter(int cpu)
+{
+        per_cpu(rcu_dyntick_snapshot, cpu) =
+                per_cpu(dynticks_progress_counter, cpu);
+}
+static inline int
+rcu_try_flip_waitack_needed(int cpu)
+{
+        long curr;
+        long snap;
+        curr = per_cpu(dynticks_progress_counter, cpu);
+        snap = per_cpu(rcu_dyntick_snapshot, cpu);
+        smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+        /*
+         * If the CPU remained in dynticks mode for the entire time
+         * and didn't take any interrupts, NMIs, SMIs, or whatever,
+         * then it cannot be in the middle of an rcu_read_lock(), so
+         * the next rcu_read_lock() it executes must use the new value
+         * of the counter.  So we can safely pretend that this CPU
+         * already acknowledged the counter.
+         */
+        if ((curr == snap) && ((curr & 0x1) == 0))
+                return 0;
+        /*
+         * If the CPU passed through or entered a dynticks idle phase with
+         * no active irq handlers, then, as above, we can safely pretend
+         * that this CPU already acknowledged the counter.
+         */
+        if ((curr - snap) > 2 || (snap & 0x1) == 0)
+                return 0;
+        /* We need this CPU to explicitly acknowledge the counter flip. */
+        return 1;
+}
+static inline int
+rcu_try_flip_waitmb_needed(int cpu)
+{
+        long curr;
+        long snap;
+        curr = per_cpu(dynticks_progress_counter, cpu);
+        snap = per_cpu(rcu_dyntick_snapshot, cpu);
+        smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+        /*
+         * If the CPU remained in dynticks mode for the entire time
+         * and didn't take any interrupts, NMIs, SMIs, or whatever,
+         * then it cannot have executed an RCU read-side critical section
+         * during that time, so there is no need for it to execute a
+         * memory barrier.
+         */
+        if ((curr == snap) && ((curr & 0x1) == 0))
+                return 0;
+        /*
+         * If the CPU either entered or exited an outermost interrupt,
+         * SMI, NMI, or whatever handler, then we know that it executed
+         * a memory barrier when doing so.  So we don't need another one.
+         */
+        if (curr != snap)
+                return 0;
+        /* We need the CPU to execute a memory barrier. */
+        return 1;
+}
+#else /* !CONFIG_NO_HZ */
+# define dyntick_save_progress_counter(cpu)     do { } while (0)
+# define rcu_try_flip_waitack_needed(cpu)       (1)
+# define rcu_try_flip_waitmb_needed(cpu)        (1)
+#endif /* CONFIG_NO_HZ */
 /*
 * Get here when RCU is idle.  Decide whether we need to
 * move out of idle state, and return non-zero if so.
@@ -447,8 +657,10 @@ rcu_try_flip_idle(void)
        /* Now ask each CPU for acknowledgement of the flip. */
-        for_each_cpu_mask(cpu, rcu_cpu_online_map)
+        for_each_cpu_mask(cpu, rcu_cpu_online_map) {
                per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
+                dyntick_save_progress_counter(cpu);
+        }
        return 1;
 }
@@ -464,7 +676,8 @@ rcu_try_flip_waitack(void)
        RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
        for_each_cpu_mask(cpu, rcu_cpu_online_map)
-                if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
+                if (rcu_try_flip_waitack_needed(cpu) &&
+                    per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
                        RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
                        return 0;
                }
@@ -509,8 +722,10 @@ rcu_try_flip_waitzero(void)
        smp_mb();  /*  ^^^^^^^^^^^^ */
        /* Call for a memory barrier from each CPU. */
-        for_each_cpu_mask(cpu, rcu_cpu_online_map)
+        for_each_cpu_mask(cpu, rcu_cpu_online_map) {
                per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
+                dyntick_save_progress_counter(cpu);
+        }
        RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
        return 1;
@@ -528,7 +743,8 @@ rcu_try_flip_waitmb(void)
        RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
        for_each_cpu_mask(cpu, rcu_cpu_online_map)
-                if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
+                if (rcu_try_flip_waitmb_needed(cpu) &&
+                    per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
                        RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
                        return 0;
                }
@@ -702,8 +918,9 @@ void rcu_offline_cpu(int cpu)
         * fix.
         */
+        local_irq_save(flags);
        rdp = RCU_DATA_ME();
-        spin_lock_irqsave(&rdp->lock, flags);
+        spin_lock(&rdp->lock);
        *rdp->nexttail = list;
        if (list)
                rdp->nexttail = tail;
@@ -735,9 +952,11 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 {
        unsigned long flags;
        struct rcu_head *next, *list;
-        struct rcu_data *rdp = RCU_DATA_ME();
+        struct rcu_data *rdp;
-        spin_lock_irqsave(&rdp->lock, flags);
+        local_irq_save(flags);
+        rdp = RCU_DATA_ME();
+        spin_lock(&rdp->lock);
        list = rdp->donelist;
        if (list == NULL) {
                spin_unlock_irqrestore(&rdp->lock, flags);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 16cbec2d5d60..efbfc0fc232f 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -113,6 +113,7 @@ ssize_t res_counter_write(struct res_counter *counter, int member,
        ret = -EINVAL;
+        strstrip(buf);
        if (write_strategy) {
                if (write_strategy(buf, &tmp)) {
                        goto out_free;
diff --git a/kernel/sched.c b/kernel/sched.c
index f28f19e65b59..b02e4fc25645 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -174,41 +174,6 @@ struct task_group {
        struct sched_entity **se;
        /* runqueue "owned" by this group on each cpu */
        struct cfs_rq **cfs_rq;
-        /*
-         * shares assigned to a task group governs how much of cpu bandwidth
-         * is allocated to the group. The more shares a group has, the more is
-         * the cpu bandwidth allocated to it.
-         *
-         * For ex, lets say that there are three task groups, A, B and C which
-         * have been assigned shares 1000, 2000 and 3000 respectively. Then,
-         * cpu bandwidth allocated by the scheduler to task groups A, B and C
-         * should be:
-         *
-         *      Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66%
-         *      Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33%
-         *      Bw(C) = 3000/(1000+2000+3000) * 100 = 50%
-         *
-         * The weight assigned to a task group's schedulable entities on every
-         * cpu (task_group.se[a_cpu]->load.weight) is derived from the task
-         * group's shares. For ex: lets say that task group A has been
-         * assigned shares of 1000 and there are two CPUs in a system. Then,
-         *
-         *  tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000;
-         *
-         * Note: It's not necessary that each of a task's group schedulable
-         *       entity have the same weight on all CPUs. If the group
-         *       has 2 of its tasks on CPU0 and 1 task on CPU1, then a
-         *       better distribution of weight could be:
-         *
-         *      tg_A->se[0]->load.weight = 2/3 * 2000 = 1333
-         *      tg_A->se[1]->load.weight = 1/2 * 2000 =  667
-         *
-         * rebalance_shares() is responsible for distributing the shares of a
-         * task groups like this among the group's schedulable entities across
-         * cpus.
-         *
-         */
        unsigned long shares;
 #endif
@@ -250,22 +215,12 @@ static DEFINE_SPINLOCK(task_group_lock);
 static DEFINE_MUTEX(doms_cur_mutex);
 #ifdef CONFIG_FAIR_GROUP_SCHED
-#ifdef CONFIG_SMP
-/* kernel thread that runs rebalance_shares() periodically */
-static struct task_struct *lb_monitor_task;
-static int load_balance_monitor(void *unused);
-#endif
-static void set_se_shares(struct sched_entity *se, unsigned long shares);
 #ifdef CONFIG_USER_SCHED
 # define INIT_TASK_GROUP_LOAD   (2*NICE_0_LOAD)
 #else
 # define INIT_TASK_GROUP_LOAD   NICE_0_LOAD
 #endif
-#define MIN_GROUP_SHARES        2
 static int init_task_group_load = INIT_TASK_GROUP_LOAD;
 #endif
@@ -668,6 +623,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
 */
 unsigned int sysctl_sched_rt_period = 1000000;
+static __read_mostly int scheduler_running;
 /*
 * part of the period that we allow rt tasks to run in us.
 * default: 0.95s
@@ -689,14 +646,16 @@ unsigned long long cpu_clock(int cpu)
        unsigned long flags;
        struct rq *rq;
-        local_irq_save(flags);
-        rq = cpu_rq(cpu);
        /*
         * Only call sched_clock() if the scheduler has already been
         * initialized (some code might call cpu_clock() very early):
         */
-        if (rq->idle)
+        if (unlikely(!scheduler_running))
-                update_rq_clock(rq);
+                return 0;
+        local_irq_save(flags);
+        rq = cpu_rq(cpu);
+        update_rq_clock(rq);
        now = rq->clock;
        local_irq_restore(flags);
@@ -1241,16 +1200,6 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
 static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
 #endif
-static inline void inc_cpu_load(struct rq *rq, unsigned long load)
-{
-        update_load_add(&rq->load, load);
-}
-static inline void dec_cpu_load(struct rq *rq, unsigned long load)
-{
-        update_load_sub(&rq->load, load);
-}
 #ifdef CONFIG_SMP
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
@@ -1268,14 +1217,26 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
 #define sched_class_highest (&rt_sched_class)
-static void inc_nr_running(struct rq *rq)
+static inline void inc_load(struct rq *rq, const struct task_struct *p)
+{
+        update_load_add(&rq->load, p->se.load.weight);
+}
+static inline void dec_load(struct rq *rq, const struct task_struct *p)
+{
+        update_load_sub(&rq->load, p->se.load.weight);
+}
+static void inc_nr_running(struct task_struct *p, struct rq *rq)
 {
        rq->nr_running++;
+        inc_load(rq, p);
 }
-static void dec_nr_running(struct rq *rq)
+static void dec_nr_running(struct task_struct *p, struct rq *rq)
 {
        rq->nr_running--;
+        dec_load(rq, p);
 }
 static void set_load_weight(struct task_struct *p)
@@ -1367,7 +1328,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
                rq->nr_uninterruptible--;
        enqueue_task(rq, p, wakeup);
-        inc_nr_running(rq);
+        inc_nr_running(p, rq);
 }
 /*
@@ -1379,7 +1340,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
                rq->nr_uninterruptible++;
        dequeue_task(rq, p, sleep);
-        dec_nr_running(rq);
+        dec_nr_running(p, rq);
 }
 /**
@@ -1831,6 +1792,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
        long old_state;
        struct rq *rq;
+        smp_wmb();
        rq = task_rq_lock(p, &flags);
        old_state = p->state;
        if (!(old_state & state))
@@ -2018,7 +1980,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
                 * management (if any):
                 */
                p->sched_class->task_new(rq, p);
-                inc_nr_running(rq);
+                inc_nr_running(p, rq);
        }
        check_preempt_curr(rq, p);
 #ifdef CONFIG_SMP
@@ -3766,7 +3728,7 @@ void scheduler_tick(void)
 #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
-void add_preempt_count(int val)
+void __kprobes add_preempt_count(int val)
 {
        /*
         * Underflow?
@@ -3782,7 +3744,7 @@ void add_preempt_count(int val)
 }
 EXPORT_SYMBOL(add_preempt_count);
-void sub_preempt_count(int val)
+void __kprobes sub_preempt_count(int val)
 {
        /*
         * Underflow?
@@ -3884,7 +3846,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
 asmlinkage void __sched schedule(void)
 {
        struct task_struct *prev, *next;
-        long *switch_count;
+        unsigned long *switch_count;
        struct rq *rq;
        int cpu;
@@ -4357,8 +4319,10 @@ void set_user_nice(struct task_struct *p, long nice)
                goto out_unlock;
        }
        on_rq = p->se.on_rq;
-        if (on_rq)
+        if (on_rq) {
                dequeue_task(rq, p, 0);
+                dec_load(rq, p);
+        }
        p->static_prio = NICE_TO_PRIO(nice);
        set_load_weight(p);
@@ -4368,6 +4332,7 @@ void set_user_nice(struct task_struct *p, long nice)
        if (on_rq) {
                enqueue_task(rq, p, 0);
+                inc_load(rq, p);
                /*
                 * If the task increased its priority or is running and
                 * lowered its priority, then reschedule its CPU:
@@ -4457,7 +4422,7 @@ int task_nice(const struct task_struct *p)
 {
        return TASK_NICE(p);
 }
-EXPORT_SYMBOL_GPL(task_nice);
+EXPORT_SYMBOL(task_nice);
 /**
 * idle_cpu - is a given cpu idle currently?
@@ -5135,7 +5100,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
        time_slice = 0;
        if (p->policy == SCHED_RR) {
                time_slice = DEF_TIMESLICE;
-        } else {
+        } else if (p->policy != SCHED_FIFO) {
                struct sched_entity *se = &p->se;
                unsigned long flags;
                struct rq *rq;
@@ -5848,6 +5813,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                /* Must be high prio: stop_machine expects to yield to it. */
                rq = task_rq_lock(p, &flags);
                __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
+                /* Update our root-domain */
+                if (rq->rd) {
+                        BUG_ON(!cpu_isset(cpu, rq->rd->span));
+                        cpu_set(cpu, rq->rd->online);
+                }
                task_rq_unlock(rq, &flags);
                cpu_rq(cpu)->migration_thread = p;
                break;
@@ -5856,15 +5828,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
        case CPU_ONLINE_FROZEN:
                /* Strictly unnecessary, as first user will wake it. */
                wake_up_process(cpu_rq(cpu)->migration_thread);
-                /* Update our root-domain */
-                rq = cpu_rq(cpu);
-                spin_lock_irqsave(&rq->lock, flags);
-                if (rq->rd) {
-                        BUG_ON(!cpu_isset(cpu, rq->rd->span));
-                        cpu_set(cpu, rq->rd->online);
-                }
-                spin_unlock_irqrestore(&rq->lock, flags);
                break;
 #ifdef CONFIG_HOTPLUG_CPU
@@ -6140,8 +6103,6 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
        rq->rd = rd;
        cpu_set(rq->cpu, rd->span);
-        if (cpu_isset(rq->cpu, cpu_online_map))
-                cpu_set(rq->cpu, rd->online);
        for (class = sched_class_highest; class; class = class->next) {
                if (class->join_domain)
@@ -7082,21 +7043,6 @@ void __init sched_init_smp(void)
        if (set_cpus_allowed(current, non_isolated_cpus) < 0)
                BUG();
        sched_init_granularity();
-#ifdef CONFIG_FAIR_GROUP_SCHED
-        if (nr_cpu_ids == 1)
-                return;
-        lb_monitor_task = kthread_create(load_balance_monitor, NULL,
-                                         "group_balance");
-        if (!IS_ERR(lb_monitor_task)) {
-                lb_monitor_task->flags |= PF_NOFREEZE;
-                wake_up_process(lb_monitor_task);
-        } else {
-                printk(KERN_ERR "Could not create load balance monitor thread"
-                        "(error = %ld) \n", PTR_ERR(lb_monitor_task));
-        }
-#endif
 }
 #else
 void __init sched_init_smp(void)
@@ -7283,6 +7229,8 @@ void __init sched_init(void)
         * During early bootup we pretend to be a normal task:
         */
        current->sched_class = &fair_sched_class;
+        scheduler_running = 1;
 }
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -7417,157 +7365,6 @@ void set_curr_task(int cpu, struct task_struct *p)
 #ifdef CONFIG_GROUP_SCHED
-#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
-/*
- * distribute shares of all task groups among their schedulable entities,
- * to reflect load distribution across cpus.
- */
-static int rebalance_shares(struct sched_domain *sd, int this_cpu)
-{
-        struct cfs_rq *cfs_rq;
-        struct rq *rq = cpu_rq(this_cpu);
-        cpumask_t sdspan = sd->span;
-        int balanced = 1;
-        /* Walk thr' all the task groups that we have */
-        for_each_leaf_cfs_rq(rq, cfs_rq) {
-                int i;
-                unsigned long total_load = 0, total_shares;
-                struct task_group *tg = cfs_rq->tg;
-                /* Gather total task load of this group across cpus */
-                for_each_cpu_mask(i, sdspan)
-                        total_load += tg->cfs_rq[i]->load.weight;
-                /* Nothing to do if this group has no load */
-                if (!total_load)
-                        continue;
-                /*
-                 * tg->shares represents the number of cpu shares the task group
-                 * is eligible to hold on a single cpu. On N cpus, it is
-                 * eligible to hold (N * tg->shares) number of cpu shares.
-                 */
-                total_shares = tg->shares * cpus_weight(sdspan);
-                /*
-                 * redistribute total_shares across cpus as per the task load
-                 * distribution.
-                 */
-                for_each_cpu_mask(i, sdspan) {
-                        unsigned long local_load, local_shares;
-                        local_load = tg->cfs_rq[i]->load.weight;
-                        local_shares = (local_load * total_shares) / total_load;
-                        if (!local_shares)
-                                local_shares = MIN_GROUP_SHARES;
-                        if (local_shares == tg->se[i]->load.weight)
-                                continue;
-                        spin_lock_irq(&cpu_rq(i)->lock);
-                        set_se_shares(tg->se[i], local_shares);
-                        spin_unlock_irq(&cpu_rq(i)->lock);
-                        balanced = 0;
-                }
-        }
-        return balanced;
-}
-/*
- * How frequently should we rebalance_shares() across cpus?
- *
- * The more frequently we rebalance shares, the more accurate is the fairness
- * of cpu bandwidth distribution between task groups. However higher frequency
- * also implies increased scheduling overhead.
- *
- * sysctl_sched_min_bal_int_shares represents the minimum interval between
- * consecutive calls to rebalance_shares() in the same sched domain.
- *
- * sysctl_sched_max_bal_int_shares represents the maximum interval between
- * consecutive calls to rebalance_shares() in the same sched domain.
- *
- * These settings allows for the appropriate trade-off between accuracy of
- * fairness and the associated overhead.
- *
- */
-/* default: 8ms, units: milliseconds */
-const_debug unsigned int sysctl_sched_min_bal_int_shares = 8;
-/* default: 128ms, units: milliseconds */
-const_debug unsigned int sysctl_sched_max_bal_int_shares = 128;
-/* kernel thread that runs rebalance_shares() periodically */
-static int load_balance_monitor(void *unused)
-{
-        unsigned int timeout = sysctl_sched_min_bal_int_shares;
-        struct sched_param schedparm;
-        int ret;
-        /*
-         * We don't want this thread's execution to be limited by the shares
-         * assigned to default group (init_task_group). Hence make it run
-         * as a SCHED_RR RT task at the lowest priority.
-         */
-        schedparm.sched_priority = 1;
-        ret = sched_setscheduler(current, SCHED_RR, &schedparm);
-        if (ret)
-                printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance"
-                                " monitor thread (error = %d) \n", ret);
-        while (!kthread_should_stop()) {
-                int i, cpu, balanced = 1;
-                /* Prevent cpus going down or coming up */
-                get_online_cpus();
-                /* lockout changes to doms_cur[] array */
-                lock_doms_cur();
-                /*
-                 * Enter a rcu read-side critical section to safely walk rq->sd
-                 * chain on various cpus and to walk task group list
-                 * (rq->leaf_cfs_rq_list) in rebalance_shares().
-                 */
-                rcu_read_lock();
-                for (i = 0; i < ndoms_cur; i++) {
-                        cpumask_t cpumap = doms_cur[i];
-                        struct sched_domain *sd = NULL, *sd_prev = NULL;
-                        cpu = first_cpu(cpumap);
-                        /* Find the highest domain at which to balance shares */
-                        for_each_domain(cpu, sd) {
-                                if (!(sd->flags & SD_LOAD_BALANCE))
-                                        continue;
-                                sd_prev = sd;
-                        }
-                        sd = sd_prev;
-                        /* sd == NULL? No load balance reqd in this domain */
-                        if (!sd)
-                                continue;
-                        balanced &= rebalance_shares(sd, cpu);
-                }
-                rcu_read_unlock();
-                unlock_doms_cur();
-                put_online_cpus();
-                if (!balanced)
-                        timeout = sysctl_sched_min_bal_int_shares;
-                else if (timeout < sysctl_sched_max_bal_int_shares)
-                        timeout *= 2;
-                msleep_interruptible(timeout);
-        }
-        return 0;
-}
-#endif  /* CONFIG_SMP */
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void free_fair_sched_group(struct task_group *tg)
 {
@@ -7824,6 +7621,11 @@ void sched_move_task(struct task_struct *tsk)
        set_task_rq(tsk, task_cpu(tsk));
+#ifdef CONFIG_FAIR_GROUP_SCHED
+        if (tsk->sched_class->moved_group)
+                tsk->sched_class->moved_group(tsk);
+#endif
        if (on_rq) {
                if (unlikely(running))
                        tsk->sched_class->set_curr_task(rq);
@@ -7834,29 +7636,25 @@ void sched_move_task(struct task_struct *tsk)
 }
 #ifdef CONFIG_FAIR_GROUP_SCHED
-/* rq->lock to be locked by caller */
 static void set_se_shares(struct sched_entity *se, unsigned long shares)
 {
        struct cfs_rq *cfs_rq = se->cfs_rq;
        struct rq *rq = cfs_rq->rq;
        int on_rq;
-        if (!shares)
+        spin_lock_irq(&rq->lock);
-                shares = MIN_GROUP_SHARES;
        on_rq = se->on_rq;
-        if (on_rq) {
+        if (on_rq)
                dequeue_entity(cfs_rq, se, 0);
-                dec_cpu_load(rq, se->load.weight);
-        }
        se->load.weight = shares;
        se->load.inv_weight = div64_64((1ULL<<32), shares);
-        if (on_rq) {
+        if (on_rq)
                enqueue_entity(cfs_rq, se, 0);
-                inc_cpu_load(rq, se->load.weight);
-        }
+        spin_unlock_irq(&rq->lock);
 }
 static DEFINE_MUTEX(shares_mutex);
@@ -7866,18 +7664,18 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
        int i;
        unsigned long flags;
+        /*
+         * A weight of 0 or 1 can cause arithmetics problems.
+         * (The default weight is 1024 - so there's no practical
+         *  limitation from this.)
+         */
+        if (shares < 2)
+                shares = 2;
        mutex_lock(&shares_mutex);
        if (tg->shares == shares)
                goto done;
-        if (shares < MIN_GROUP_SHARES)
-                shares = MIN_GROUP_SHARES;
-        /*
-         * Prevent any load balance activity (rebalance_shares,
-         * load_balance_fair) from referring to this group first,
-         * by taking it off the rq->leaf_cfs_rq_list on each cpu.
-         */
        spin_lock_irqsave(&task_group_lock, flags);
        for_each_possible_cpu(i)
                unregister_fair_sched_group(tg, i);
@@ -7891,11 +7689,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
         * w/o tripping rebalance_share or load_balance_fair.
         */
        tg->shares = shares;
-        for_each_possible_cpu(i) {
+        for_each_possible_cpu(i)
-                spin_lock_irq(&cpu_rq(i)->lock);
                set_se_shares(tg->se[i], shares);
-                spin_unlock_irq(&cpu_rq(i)->lock);
-        }
        /*
         * Enable load balance activity on this group, by inserting it back on
@@ -7927,9 +7722,7 @@ static unsigned long to_ratio(u64 period, u64 runtime)
        if (runtime == RUNTIME_INF)
                return 1ULL << 16;
-        runtime *= (1ULL << 16);
+        return div64_64(runtime << 16, period);
-        div64_64(runtime, period);
-        return runtime;
 }
 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
@@ -7953,25 +7746,40 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
        return total + to_ratio(period, runtime) < global_ratio;
 }
+/* Must be called with tasklist_lock held */
+static inline int tg_has_rt_tasks(struct task_group *tg)
+{
+        struct task_struct *g, *p;
+        do_each_thread(g, p) {
+                if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
+                        return 1;
+        } while_each_thread(g, p);
+        return 0;
+}
 int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
 {
        u64 rt_runtime, rt_period;
        int err = 0;
-        rt_period = sysctl_sched_rt_period * NSEC_PER_USEC;
+        rt_period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
        rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
        if (rt_runtime_us == -1)
-                rt_runtime = rt_period;
+                rt_runtime = RUNTIME_INF;
        mutex_lock(&rt_constraints_mutex);
+        read_lock(&tasklist_lock);
+        if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) {
+                err = -EBUSY;
+                goto unlock;
+        }
        if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
                err = -EINVAL;
                goto unlock;
        }
-        if (rt_runtime_us == -1)
-                rt_runtime = RUNTIME_INF;
        tg->rt_runtime = rt_runtime;
 unlock:
+        read_unlock(&tasklist_lock);
        mutex_unlock(&rt_constraints_mutex);
        return err;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6c091d6e159d..e2a530515619 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -202,17 +202,12 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
 static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 {
-        struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
+        struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
-        struct sched_entity *se = NULL;
-        struct rb_node *parent;
-        while (*link) {
+        if (!last)
-                parent = *link;
+                return NULL;
-                se = rb_entry(parent, struct sched_entity, run_node);
-                link = &parent->rb_right;
-        }
-        return se;
+        return rb_entry(last, struct sched_entity, run_node);
 }
 /**************************************************************
@@ -732,8 +727,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
        return se->parent;
 }
-#define GROUP_IMBALANCE_PCT     20
 #else   /* CONFIG_FAIR_GROUP_SCHED */
 #define for_each_sched_entity(se) \
@@ -824,26 +817,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p)
 static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
        struct cfs_rq *cfs_rq;
-        struct sched_entity *se = &p->se,
+        struct sched_entity *se = &p->se;
-                            *topse = NULL;      /* Highest schedulable entity */
-        int incload = 1;
        for_each_sched_entity(se) {
-                topse = se;
+                if (se->on_rq)
-                if (se->on_rq) {
-                        incload = 0;
                        break;
-                }
                cfs_rq = cfs_rq_of(se);
                enqueue_entity(cfs_rq, se, wakeup);
                wakeup = 1;
        }
-        /* Increment cpu load if we just enqueued the first task of a group on
-         * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
-         * at the highest grouping level.
-         */
-        if (incload)
-                inc_cpu_load(rq, topse->load.weight);
        hrtick_start_fair(rq, rq->curr);
 }
@@ -856,28 +838,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
 {
        struct cfs_rq *cfs_rq;
-        struct sched_entity *se = &p->se,
+        struct sched_entity *se = &p->se;
-                            *topse = NULL;      /* Highest schedulable entity */
-        int decload = 1;
        for_each_sched_entity(se) {
-                topse = se;
                cfs_rq = cfs_rq_of(se);
                dequeue_entity(cfs_rq, se, sleep);
                /* Don't dequeue parent if it has other entities besides us */
-                if (cfs_rq->load.weight) {
+                if (cfs_rq->load.weight)
-                        if (parent_entity(se))
-                                decload = 0;
                        break;
-                }
                sleep = 1;
        }
-        /* Decrement cpu load if we just dequeued the last task of a group on
-         * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
-         * at the highest grouping level.
-         */
-        if (decload)
-                dec_cpu_load(rq, topse->load.weight);
        hrtick_start_fair(rq, rq->curr);
 }
@@ -1191,6 +1161,25 @@ static struct task_struct *load_balance_next_fair(void *arg)
        return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
 }
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
+{
+        struct sched_entity *curr;
+        struct task_struct *p;
+        if (!cfs_rq->nr_running || !first_fair(cfs_rq))
+                return MAX_PRIO;
+        curr = cfs_rq->curr;
+        if (!curr)
+                curr = __pick_next_entity(cfs_rq);
+        p = task_of(curr);
+        return p->prio;
+}
+#endif
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
@@ -1200,45 +1189,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
        struct cfs_rq *busy_cfs_rq;
        long rem_load_move = max_load_move;
        struct rq_iterator cfs_rq_iterator;
-        unsigned long load_moved;
        cfs_rq_iterator.start = load_balance_start_fair;
        cfs_rq_iterator.next = load_balance_next_fair;
        for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
 #ifdef CONFIG_FAIR_GROUP_SCHED
-                struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu];
+                struct cfs_rq *this_cfs_rq;
-                unsigned long maxload, task_load, group_weight;
+                long imbalance;
-                unsigned long thisload, per_task_load;
+                unsigned long maxload;
-                struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu];
-                task_load = busy_cfs_rq->load.weight;
-                group_weight = se->load.weight;
-                /*
+                this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
-                 * 'group_weight' is contributed by tasks of total weight
-                 * 'task_load'. To move 'rem_load_move' worth of weight only,
-                 * we need to move a maximum task load of:
-                 *
-                 *      maxload = (remload / group_weight) * task_load;
-                 */
-                maxload = (rem_load_move * task_load) / group_weight;
-                if (!maxload || !task_load)
+                imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
+                /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
+                if (imbalance <= 0)
                        continue;
-                per_task_load = task_load / busy_cfs_rq->nr_running;
+                /* Don't pull more than imbalance/2 */
-                /*
+                imbalance /= 2;
-                 * balance_tasks will try to forcibly move atleast one task if
+                maxload = min(rem_load_move, imbalance);
-                 * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if
-                 * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load.
-                 */
-                 if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load)
-                        continue;
-                /* Disable priority-based load balance */
+                *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
-                *this_best_prio = 0;
-                thisload = this_cfs_rq->load.weight;
 #else
 # define maxload rem_load_move
 #endif
@@ -1247,33 +1219,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                 * load_balance_[start|next]_fair iterators
                 */
                cfs_rq_iterator.arg = busy_cfs_rq;
-                load_moved = balance_tasks(this_rq, this_cpu, busiest,
+                rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
                                               maxload, sd, idle, all_pinned,
                                               this_best_prio,
                                               &cfs_rq_iterator);
-#ifdef CONFIG_FAIR_GROUP_SCHED
-                /*
-                 * load_moved holds the task load that was moved. The
-                 * effective (group) weight moved would be:
-                 *      load_moved_eff = load_moved/task_load * group_weight;
-                 */
-                load_moved = (group_weight * load_moved) / task_load;
-                /* Adjust shares on both cpus to reflect load_moved */
-                group_weight -= load_moved;
-                set_se_shares(se, group_weight);
-                se = busy_cfs_rq->tg->se[this_cpu];
-                if (!thisload)
-                        group_weight = load_moved;
-                else
-                        group_weight = se->load.weight + load_moved;
-                set_se_shares(se, group_weight);
-#endif
-                rem_load_move -= load_moved;
                if (rem_load_move <= 0)
                        break;
        }
@@ -1403,6 +1353,16 @@ static void set_curr_task_fair(struct rq *rq)
                set_next_entity(cfs_rq_of(se), se);
 }
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void moved_group_fair(struct task_struct *p)
+{
+        struct cfs_rq *cfs_rq = task_cfs_rq(p);
+        update_curr(cfs_rq);
+        place_entity(cfs_rq, &p->se, 1);
+}
+#endif
 /*
 * All the scheduling class methods:
 */
@@ -1431,6 +1391,10 @@ static const struct sched_class fair_sched_class = {
        .prio_changed           = prio_changed_fair,
        .switched_to            = switched_to_fair,
+#ifdef CONFIG_FAIR_GROUP_SCHED
+        .moved_group            = moved_group_fair,
+#endif
 };
 #ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index f54792b175b2..0a6d2e516420 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -393,8 +393,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
         */
        for_each_sched_rt_entity(rt_se)
                enqueue_rt_entity(rt_se);
-        inc_cpu_load(rq, p->se.load.weight);
 }
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
@@ -414,8 +412,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
                if (rt_rq && rt_rq->rt_nr_running)
                        enqueue_rt_entity(rt_se);
        }
-        dec_cpu_load(rq, p->se.load.weight);
 }
 /*
@@ -1111,9 +1107,11 @@ static void prio_changed_rt(struct rq *rq, struct task_struct *p,
                        pull_rt_task(rq);
                /*
                 * If there's a higher priority task waiting to run
-                 * then reschedule.
+                 * then reschedule. Note, the above pull_rt_task
+                 * can release the rq lock and p could migrate.
+                 * Only reschedule if p is still on the same runqueue.
                 */
-                if (p->prio > rq->rt.highest_prio)
+                if (p->prio > rq->rt.highest_prio && rq->curr == p)
                        resched_task(p);
 #else
                /* For UP simply resched on drop of prio */
diff --git a/kernel/signal.c b/kernel/signal.c
index 84917fe507f7..6af1210092c3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1623,7 +1623,6 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
        /* Let the debugger run.  */
        __set_current_state(TASK_TRACED);
        spin_unlock_irq(&current->sighand->siglock);
-        try_to_freeze();
        read_lock(&tasklist_lock);
        if (!unlikely(killed) && may_ptrace_stop()) {
                do_notify_parent_cldstop(current, CLD_TRAPPED);
@@ -1641,6 +1640,13 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
        }
        /*
+         * While in TASK_TRACED, we were considered "frozen enough".
+         * Now that we woke up, it's crucial if we're supposed to be
+         * frozen that we freeze now before running anything substantial.
+         */
+        try_to_freeze();
+        /*
         * We are back.  Now reacquire the siglock before touching
         * last_siginfo, so that we are sure to have synchronized with
         * any signal-sending on another CPU that wants to examine it.
@@ -1757,9 +1763,15 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
        sigset_t *mask = &current->blocked;
        int signr = 0;
+relock:
+        /*
+         * We'll jump back here after any time we were stopped in TASK_STOPPED.
+         * While in TASK_STOPPED, we were considered "frozen enough".
+         * Now that we woke up, it's crucial if we're supposed to be
+         * frozen that we freeze now before running anything substantial.
+         */
        try_to_freeze();
-relock:
        spin_lock_irq(&current->sighand->siglock);
        for (;;) {
                struct k_sigaction *ka;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5b3aea5f471e..31e9f2a47928 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -313,6 +313,7 @@ void irq_exit(void)
        /* Make sure that timer wheel updates are propagated */
        if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
                tick_nohz_stop_sched_tick();
+        rcu_irq_exit();
 #endif
        preempt_enable_no_resched();
 }
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 7c2da88db4ed..01b6522fd92b 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -216,26 +216,27 @@ static int watchdog(void *__bind_cpu)
        /* initialize timestamp */
        touch_softlockup_watchdog();
+        set_current_state(TASK_INTERRUPTIBLE);
        /*
         * Run briefly once per second to reset the softlockup timestamp.
         * If this gets delayed for more than 60 seconds then the
         * debug-printout triggers in softlockup_tick().
         */
        while (!kthread_should_stop()) {
-                set_current_state(TASK_INTERRUPTIBLE);
                touch_softlockup_watchdog();
                schedule();
                if (kthread_should_stop())
                        break;
-                if (this_cpu != check_cpu)
+                if (this_cpu == check_cpu) {
-                        continue;
+                        if (sysctl_hung_task_timeout_secs)
+                                check_hung_uninterruptible_tasks(this_cpu);
-                if (sysctl_hung_task_timeout_secs)
+                }
-                        check_hung_uninterruptible_tasks(this_cpu);
+                set_current_state(TASK_INTERRUPTIBLE);
        }
+        __set_current_state(TASK_RUNNING);
        return 0;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8b7e95411795..b2a2d6889bab 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -311,24 +311,6 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
-#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
-        {
-                .ctl_name       = CTL_UNNUMBERED,
-                .procname       = "sched_min_bal_int_shares",
-                .data           = &sysctl_sched_min_bal_int_shares,
-                .maxlen         = sizeof(unsigned int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
-        },
-        {
-                .ctl_name       = CTL_UNNUMBERED,
-                .procname       = "sched_max_bal_int_shares",
-                .data           = &sysctl_sched_max_bal_int_shares,
-                .maxlen         = sizeof(unsigned int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
-        },
-#endif
 #endif
        {
                .ctl_name       = CTL_UNNUMBERED,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fa9bb73dbdb4..2968298f8f36 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -282,6 +282,7 @@ void tick_nohz_stop_sched_tick(void)
                        ts->idle_tick = ts->sched_timer.expires;
                        ts->tick_stopped = 1;
                        ts->idle_jiffies = last_jiffies;
+                        rcu_enter_nohz();
                }
                /*
@@ -375,6 +376,8 @@ void tick_nohz_restart_sched_tick(void)
                return;
        }
+        rcu_exit_nohz();
        /* Update jiffies first */
        select_nohz_load_balancer(0);
        now = ktime_get();
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index d3d94c1a0fd2..67fe8fc21fb1 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -65,9 +65,9 @@ print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now)
        SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
 #endif
        SEQ_printf(m, "\n");
-        SEQ_printf(m, " # expires at %Lu nsecs [in %Lu nsecs]\n",
+        SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n",
                (unsigned long long)ktime_to_ns(timer->expires),
-                (unsigned long long)(ktime_to_ns(timer->expires) - now));
+                (long long)(ktime_to_ns(timer->expires) - now));
 }
 static void