35 files changed, 730 insertions, 479 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 6b066632e4..c64ce9c142 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -63,6 +63,3 @@ config PREEMPT_BKL
          Say Y here if you are building a kernel for a desktop system.
          Say N if you are unsure.
-config PREEMPT_NOTIFIERS
-        bool
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 0ae703c157..938e60a618 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -45,7 +45,6 @@
 #include <linux/init.h>
 #include <asm/types.h>
 #include <asm/atomic.h>
-#include <asm/types.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/mm.h>
diff --git a/kernel/capability.c b/kernel/capability.c
index c8d3c77620..4e350a36ed 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -17,9 +17,6 @@
 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
 kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
-EXPORT_SYMBOL(securebits);
-EXPORT_SYMBOL(cap_bset);
 /*
 * This lock protects task->cap_* for all tasks including current.
 * Locking rule: acquire this prior to tasklist_lock.
@@ -244,7 +241,6 @@ int __capable(struct task_struct *t, int cap)
        }
        return 0;
 }
-EXPORT_SYMBOL(__capable);
 int capable(int cap)
 {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 57e6448b17..2eb2e50db0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
 /*
 * Return in *pmask the portion of a cpusets's mems_allowed that
- * are online.  If none are online, walk up the cpuset hierarchy
+ * are online, with memory.  If none are online with memory, walk
- * until we find one that does have some online mems.  If we get
+ * up the cpuset hierarchy until we find one that does have some
- * all the way to the top and still haven't found any online mems,
+ * online mems.  If we get all the way to the top and still haven't
- * return node_online_map.
+ * found any online mems, return node_states[N_HIGH_MEMORY].
 *
 * One way or another, we guarantee to return some non-empty subset
- * of node_online_map.
+ * of node_states[N_HIGH_MEMORY].
 *
 * Call with callback_mutex held.
 */
 static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
 {
-        while (cs && !nodes_intersects(cs->mems_allowed, node_online_map))
+        while (cs && !nodes_intersects(cs->mems_allowed,
+                                        node_states[N_HIGH_MEMORY]))
                cs = cs->parent;
        if (cs)
-                nodes_and(*pmask, cs->mems_allowed, node_online_map);
+                nodes_and(*pmask, cs->mems_allowed,
+                                        node_states[N_HIGH_MEMORY]);
        else
-                *pmask = node_online_map;
+                *pmask = node_states[N_HIGH_MEMORY];
-        BUG_ON(!nodes_intersects(*pmask, node_online_map));
+        BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
 }
 /**
@@ -753,68 +755,13 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 }
 /*
- * For a given cpuset cur, partition the system as follows
- * a. All cpus in the parent cpuset's cpus_allowed that are not part of any
- *    exclusive child cpusets
- * b. All cpus in the current cpuset's cpus_allowed that are not part of any
- *    exclusive child cpusets
- * Build these two partitions by calling partition_sched_domains
- *
- * Call with manage_mutex held.  May nest a call to the
- * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
- * Must not be called holding callback_mutex, because we must
- * not call lock_cpu_hotplug() while holding callback_mutex.
- */
-static void update_cpu_domains(struct cpuset *cur)
-{
-        struct cpuset *c, *par = cur->parent;
-        cpumask_t pspan, cspan;
-        if (par == NULL || cpus_empty(cur->cpus_allowed))
-                return;
-        /*
-         * Get all cpus from parent's cpus_allowed not part of exclusive
-         * children
-         */
-        pspan = par->cpus_allowed;
-        list_for_each_entry(c, &par->children, sibling) {
-                if (is_cpu_exclusive(c))
-                        cpus_andnot(pspan, pspan, c->cpus_allowed);
-        }
-        if (!is_cpu_exclusive(cur)) {
-                cpus_or(pspan, pspan, cur->cpus_allowed);
-                if (cpus_equal(pspan, cur->cpus_allowed))
-                        return;
-                cspan = CPU_MASK_NONE;
-        } else {
-                if (cpus_empty(pspan))
-                        return;
-                cspan = cur->cpus_allowed;
-                /*
-                 * Get all cpus from current cpuset's cpus_allowed not part
-                 * of exclusive children
-                 */
-                list_for_each_entry(c, &cur->children, sibling) {
-                        if (is_cpu_exclusive(c))
-                                cpus_andnot(cspan, cspan, c->cpus_allowed);
-                }
-        }
-        lock_cpu_hotplug();
-        partition_sched_domains(&pspan, &cspan);
-        unlock_cpu_hotplug();
-}
-/*
 * Call with manage_mutex held.  May take callback_mutex during call.
 */
 static int update_cpumask(struct cpuset *cs, char *buf)
 {
        struct cpuset trialcs;
-        int retval, cpus_unchanged;
+        int retval;
        /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
        if (cs == &top_cpuset)
@@ -841,12 +788,9 @@ static int update_cpumask(struct cpuset *cs, char *buf)
        retval = validate_change(cs, &trialcs);
        if (retval < 0)
                return retval;
-        cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);
        mutex_lock(&callback_mutex);
        cs->cpus_allowed = trialcs.cpus_allowed;
        mutex_unlock(&callback_mutex);
-        if (is_cpu_exclusive(cs) && !cpus_unchanged)
-                update_cpu_domains(cs);
        return 0;
 }
@@ -924,7 +868,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
        int fudge;
        int retval;
-        /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
+        /*
+         * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
+         * it's read-only
+         */
        if (cs == &top_cpuset)
                return -EACCES;
@@ -941,8 +888,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
                retval = nodelist_parse(buf, trialcs.mems_allowed);
                if (retval < 0)
                        goto done;
+                if (!nodes_intersects(trialcs.mems_allowed,
+                                                node_states[N_HIGH_MEMORY])) {
+                        /*
+                         * error if only memoryless nodes specified.
+                         */
+                        retval = -ENOSPC;
+                        goto done;
+                }
        }
-        nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map);
+        /*
+         * Exclude memoryless nodes.  We know that trialcs.mems_allowed
+         * contains at least one node with memory.
+         */
+        nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
+                                                node_states[N_HIGH_MEMORY]);
        oldmem = cs->mems_allowed;
        if (nodes_equal(oldmem, trialcs.mems_allowed)) {
                retval = 0;             /* Too easy - nothing to do */
@@ -1067,7 +1027,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
 {
        int turning_on;
        struct cpuset trialcs;
-        int err, cpu_exclusive_changed;
+        int err;
        turning_on = (simple_strtoul(buf, NULL, 10) != 0);
@@ -1080,14 +1040,10 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
        err = validate_change(cs, &trialcs);
        if (err < 0)
                return err;
-        cpu_exclusive_changed =
-                (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
        mutex_lock(&callback_mutex);
        cs->flags = trialcs.flags;
        mutex_unlock(&callback_mutex);
-        if (cpu_exclusive_changed)
-                update_cpu_domains(cs);
        return 0;
 }
@@ -1445,7 +1401,7 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
        ssize_t retval = 0;
        char *s;
-        if (!(page = (char *)__get_free_page(GFP_KERNEL)))
+        if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
                return -ENOMEM;
        s = page;
@@ -1947,17 +1903,6 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
 }
-/*
- * Locking note on the strange update_flag() call below:
- *
- * If the cpuset being removed is marked cpu_exclusive, then simulate
- * turning cpu_exclusive off, which will call update_cpu_domains().
- * The lock_cpu_hotplug() call in update_cpu_domains() must not be
- * made while holding callback_mutex.  Elsewhere the kernel nests
- * callback_mutex inside lock_cpu_hotplug() calls.  So the reverse
- * nesting would risk an ABBA deadlock.
- */
 static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
 {
        struct cpuset *cs = dentry->d_fsdata;
@@ -1977,13 +1922,6 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
                mutex_unlock(&manage_mutex);
                return -EBUSY;
        }
-        if (is_cpu_exclusive(cs)) {
-                int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0");
-                if (retval < 0) {
-                        mutex_unlock(&manage_mutex);
-                        return retval;
-                }
-        }
        parent = cs->parent;
        mutex_lock(&callback_mutex);
        set_bit(CS_REMOVED, &cs->flags);
@@ -2098,8 +2036,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
 /*
 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
- * cpu_online_map and node_online_map.  Force the top cpuset to track
+ * cpu_online_map and node_states[N_HIGH_MEMORY].  Force the top cpuset to
- * whats online after any CPU or memory node hotplug or unplug event.
+ * track what's online after any CPU or memory node hotplug or unplug
+ * event.
 *
 * To ensure that we don't remove a CPU or node from the top cpuset
 * that is currently in use by a child cpuset (which would violate
@@ -2119,7 +2058,7 @@ static void common_cpu_mem_hotplug_unplug(void)
        guarantee_online_cpus_mems_in_subtree(&top_cpuset);
        top_cpuset.cpus_allowed = cpu_online_map;
-        top_cpuset.mems_allowed = node_online_map;
+        top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
        mutex_unlock(&callback_mutex);
        mutex_unlock(&manage_mutex);
@@ -2147,8 +2086,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
- * Keep top_cpuset.mems_allowed tracking node_online_map.
+ * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
- * Call this routine anytime after you change node_online_map.
+ * Call this routine anytime after you change
+ * node_states[N_HIGH_MEMORY].
 * See also the previous routine cpuset_handle_cpuhp().
 */
@@ -2167,7 +2107,7 @@ void cpuset_track_online_nodes(void)
 void __init cpuset_init_smp(void)
 {
        top_cpuset.cpus_allowed = cpu_online_map;
-        top_cpuset.mems_allowed = node_online_map;
+        top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
        hotcpu_notifier(cpuset_handle_cpuhp, 0);
 }
@@ -2309,7 +2249,7 @@ void cpuset_init_current_mems_allowed(void)
 *
 * Description: Returns the nodemask_t mems_allowed of the cpuset
 * attached to the specified @tsk.  Guaranteed to return some non-empty
- * subset of node_online_map, even if this means going outside the
+ * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
 * tasks cpuset.
 **/
@@ -2566,41 +2506,20 @@ int cpuset_mem_spread_node(void)
 EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
 /**
- * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
+ * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
- * @p: pointer to task_struct of some other task.
+ * @tsk1: pointer to task_struct of some task.
- *
+ * @tsk2: pointer to task_struct of some other task.
- * Description: Return true if the nearest mem_exclusive ancestor
+ *
- * cpusets of tasks @p and current overlap.  Used by oom killer to
+ * Description: Return true if @tsk1's mems_allowed intersects the
- * determine if task @p's memory usage might impact the memory
+ * mems_allowed of @tsk2.  Used by the OOM killer to determine if
- * available to the current task.
+ * one of the task's memory usage might impact the memory available
- *
+ * to the other.
- * Call while holding callback_mutex.
 **/
-int cpuset_excl_nodes_overlap(const struct task_struct *p)
+int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
+                                   const struct task_struct *tsk2)
 {
-        const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
+        return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
-        int overlap = 1;                /* do cpusets overlap? */
-        task_lock(current);
-        if (current->flags & PF_EXITING) {
-                task_unlock(current);
-                goto done;
-        }
-        cs1 = nearest_exclusive_ancestor(current->cpuset);
-        task_unlock(current);
-        task_lock((struct task_struct *)p);
-        if (p->flags & PF_EXITING) {
-                task_unlock((struct task_struct *)p);
-                goto done;
-        }
-        cs2 = nearest_exclusive_ancestor(p->cpuset);
-        task_unlock((struct task_struct *)p);
-        overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
-done:
-        return overlap;
 }
 /*
diff --git a/kernel/exit.c b/kernel/exit.c
index 7f7959de4a..2c704c86ed 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -44,7 +44,6 @@
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
-#include <linux/freezer.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -93,10 +92,9 @@ static void __exit_signal(struct task_struct *tsk)
                 * If there is any task waiting for the group exit
                 * then notify it:
                 */
-                if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
+                if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count)
                        wake_up_process(sig->group_exit_task);
-                        sig->group_exit_task = NULL;
-                }
                if (tsk == sig->curr_target)
                        sig->curr_target = next_thread(tsk);
                /*
@@ -593,17 +591,6 @@ static void exit_mm(struct task_struct * tsk)
        mmput(mm);
 }
-static inline void
-choose_new_parent(struct task_struct *p, struct task_struct *reaper)
-{
-        /*
-         * Make sure we're not reparenting to ourselves and that
-         * the parent is not a zombie.
-         */
-        BUG_ON(p == reaper || reaper->exit_state);
-        p->real_parent = reaper;
-}
 static void
 reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 {
@@ -711,7 +698,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
                if (father == p->real_parent) {
                        /* reparent with a reaper, real father it's us */
-                        choose_new_parent(p, reaper);
+                        p->real_parent = reaper;
                        reparent_thread(p, father, 0);
                } else {
                        /* reparent ptraced task to its real parent */
@@ -732,7 +719,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
        }
        list_for_each_safe(_p, _n, &father->ptrace_children) {
                p = list_entry(_p, struct task_struct, ptrace_list);
-                choose_new_parent(p, reaper);
+                p->real_parent = reaper;
                reparent_thread(p, father, 1);
        }
 }
@@ -759,13 +746,11 @@ static void exit_notify(struct task_struct *tsk)
                 * Now we'll wake all the threads in the group just to make
                 * sure someone gets all the pending signals.
                 */
-                read_lock(&tasklist_lock);
                spin_lock_irq(&tsk->sighand->siglock);
                for (t = next_thread(tsk); t != tsk; t = next_thread(t))
                        if (!signal_pending(t) && !(t->flags & PF_EXITING))
                                recalc_sigpending_and_wake(t);
                spin_unlock_irq(&tsk->sighand->siglock);
-                read_unlock(&tasklist_lock);
        }
        write_lock_irq(&tasklist_lock);
@@ -793,9 +778,8 @@ static void exit_notify(struct task_struct *tsk)
         * and we were the only connection outside, so our pgrp
         * is about to become orphaned.
         */
-         
        t = tsk->real_parent;
-        
        pgrp = task_pgrp(tsk);
        if ((task_pgrp(t) != pgrp) &&
            (task_session(t) == task_session(tsk)) &&
@@ -842,6 +826,11 @@ static void exit_notify(struct task_struct *tsk)
                state = EXIT_DEAD;
        tsk->exit_state = state;
+        if (thread_group_leader(tsk) &&
+            tsk->signal->notify_count < 0 &&
+            tsk->signal->group_exit_task)
+                wake_up_process(tsk->signal->group_exit_task);
        write_unlock_irq(&tasklist_lock);
        list_for_each_safe(_p, _n, &ptrace_dead) {
@@ -883,6 +872,14 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
+static inline void exit_child_reaper(struct task_struct *tsk)
+{
+        if (likely(tsk->group_leader != child_reaper(tsk)))
+                return;
+        panic("Attempted to kill init!");
+}
 fastcall NORET_TYPE void do_exit(long code)
 {
        struct task_struct *tsk = current;
@@ -896,13 +893,6 @@ fastcall NORET_TYPE void do_exit(long code)
                panic("Aiee, killing interrupt handler!");
        if (unlikely(!tsk->pid))
                panic("Attempted to kill the idle task!");
-        if (unlikely(tsk == child_reaper(tsk))) {
-                if (tsk->nsproxy->pid_ns != &init_pid_ns)
-                        tsk->nsproxy->pid_ns->child_reaper = init_pid_ns.child_reaper;
-                else
-                        panic("Attempted to kill init!");
-        }
        if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
                current->ptrace_message = code;
@@ -932,13 +922,13 @@ fastcall NORET_TYPE void do_exit(long code)
                schedule();
        }
+        tsk->flags |= PF_EXITING;
        /*
         * tsk->flags are checked in the futex code to protect against
         * an exiting task cleaning up the robust pi futexes.
         */
-        spin_lock_irq(&tsk->pi_lock);
+        smp_mb();
-        tsk->flags |= PF_EXITING;
+        spin_unlock_wait(&tsk->pi_lock);
-        spin_unlock_irq(&tsk->pi_lock);
        if (unlikely(in_atomic()))
                printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -952,16 +942,19 @@ fastcall NORET_TYPE void do_exit(long code)
        }
        group_dead = atomic_dec_and_test(&tsk->signal->live);
        if (group_dead) {
+                exit_child_reaper(tsk);
                hrtimer_cancel(&tsk->signal->real_timer);
                exit_itimers(tsk->signal);
        }
        acct_collect(code, group_dead);
+#ifdef CONFIG_FUTEX
        if (unlikely(tsk->robust_list))
                exit_robust_list(tsk);
-#if defined(CONFIG_FUTEX) && defined(CONFIG_COMPAT)
+#ifdef CONFIG_COMPAT
        if (unlikely(tsk->compat_robust_list))
                compat_exit_robust_list(tsk);
 #endif
+#endif
        if (group_dead)
                tty_audit_exit();
        if (unlikely(tsk->audit_context))
@@ -996,6 +989,7 @@ fastcall NORET_TYPE void do_exit(long code)
        mpol_free(tsk->mempolicy);
        tsk->mempolicy = NULL;
 #endif
+#ifdef CONFIG_FUTEX
        /*
         * This must happen late, after the PID is not
         * hashed anymore:
@@ -1004,6 +998,7 @@ fastcall NORET_TYPE void do_exit(long code)
                exit_pi_state_list(tsk);
        if (unlikely(current->pi_state_cache))
                kfree(current->pi_state_cache);
+#endif
        /*
         * Make sure we are holding no locks:
         */
@@ -1168,8 +1163,7 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
                            int __user *stat_addr, struct rusage __user *ru)
 {
        unsigned long state;
-        int retval;
+        int retval, status, traced;
-        int status;
        if (unlikely(noreap)) {
                pid_t pid = p->pid;
@@ -1203,15 +1197,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
                BUG_ON(state != EXIT_DEAD);
                return 0;
        }
-        if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) {
-                /*
-                 * This can only happen in a race with a ptraced thread
-                 * dying on another processor.
-                 */
-                return 0;
-        }
-        if (likely(p->real_parent == p->parent) && likely(p->signal)) {
+        /* traced means p->ptrace, but not vice versa */
+        traced = (p->real_parent != p->parent);
+        if (likely(!traced)) {
                struct signal_struct *psig;
                struct signal_struct *sig;
@@ -1298,35 +1288,30 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
                retval = put_user(p->pid, &infop->si_pid);
        if (!retval && infop)
                retval = put_user(p->uid, &infop->si_uid);
-        if (retval) {
+        if (!retval)
-                // TODO: is this safe?
+                retval = p->pid;
-                p->exit_state = EXIT_ZOMBIE;
-                return retval;
+        if (traced) {
-        }
-        retval = p->pid;
-        if (p->real_parent != p->parent) {
                write_lock_irq(&tasklist_lock);
-                /* Double-check with lock held.  */
+                /* We dropped tasklist, ptracer could die and untrace */
-                if (p->real_parent != p->parent) {
+                ptrace_unlink(p);
-                        __ptrace_unlink(p);
+                /*
-                        // TODO: is this safe?
+                 * If this is not a detached task, notify the parent.
-                        p->exit_state = EXIT_ZOMBIE;
+                 * If it's still not detached after that, don't release
-                        /*
+                 * it now.
-                         * If this is not a detached task, notify the parent.
+                 */
-                         * If it's still not detached after that, don't release
+                if (p->exit_signal != -1) {
-                         * it now.
+                        do_notify_parent(p, p->exit_signal);
-                         */
                        if (p->exit_signal != -1) {
-                                do_notify_parent(p, p->exit_signal);
+                                p->exit_state = EXIT_ZOMBIE;
-                                if (p->exit_signal != -1)
+                                p = NULL;
-                                        p = NULL;
                        }
                }
                write_unlock_irq(&tasklist_lock);
        }
        if (p != NULL)
                release_task(p);
-        BUG_ON(!retval);
        return retval;
 }
@@ -1345,7 +1330,7 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
        if (!p->exit_code)
                return 0;
        if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
-            p->signal && p->signal->group_stop_count > 0)
+            p->signal->group_stop_count > 0)
                /*
                 * A group stop is in progress and this is the group leader.
                 * We won't report until all threads have stopped.
@@ -1459,9 +1444,6 @@ static int wait_task_continued(struct task_struct *p, int noreap,
        pid_t pid;
        uid_t uid;
-        if (unlikely(!p->signal))
-                return 0;
        if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
                return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 3fc3c13839..490495a39c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -107,6 +107,7 @@ static struct kmem_cache *mm_cachep;
 void free_task(struct task_struct *tsk)
 {
+        prop_local_destroy_single(&tsk->dirties);
        free_thread_info(tsk->stack);
        rt_mutex_debug_task_free(tsk);
        free_task_struct(tsk);
@@ -163,6 +164,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
        struct task_struct *tsk;
        struct thread_info *ti;
+        int err;
        prepare_to_copy(orig);
@@ -178,6 +180,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
        *tsk = *orig;
        tsk->stack = ti;
+        err = prop_local_init_single(&tsk->dirties);
+        if (err) {
+                free_thread_info(ti);
+                free_task_struct(tsk);
+                return NULL;
+        }
        setup_thread_stack(tsk, orig);
 #ifdef CONFIG_CC_STACKPROTECTOR
@@ -1069,7 +1079,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        do_posix_clock_monotonic_gettime(&p->start_time);
        p->real_start_time = p->start_time;
        monotonic_to_bootbased(&p->real_start_time);
+#ifdef CONFIG_SECURITY
        p->security = NULL;
+#endif
        p->io_context = NULL;
        p->io_wait = NULL;
        p->audit_context = NULL;
@@ -1146,13 +1158,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         * Clear TID on mm_release()?
         */
        p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
+#ifdef CONFIG_FUTEX
        p->robust_list = NULL;
 #ifdef CONFIG_COMPAT
        p->compat_robust_list = NULL;
 #endif
        INIT_LIST_HEAD(&p->pi_state_list);
        p->pi_state_cache = NULL;
+#endif
        /*
         * sigaltstack should be cleared when sharing the same VM
         */
@@ -1435,8 +1448,7 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
-static void sighand_ctor(void *data, struct kmem_cache *cachep,
+static void sighand_ctor(struct kmem_cache *cachep, void *data)
-                        unsigned long flags)
 {
        struct sighand_struct *sighand = data;
diff --git a/kernel/futex.c b/kernel/futex.c
index fcc94e7b40..d725676d84 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -52,6 +52,7 @@
 #include <linux/syscalls.h>
 #include <linux/signal.h>
 #include <linux/module.h>
+#include <linux/magic.h>
 #include <asm/futex.h>
 #include "rtmutex_common.h"
@@ -2080,7 +2081,7 @@ static int futexfs_get_sb(struct file_system_type *fs_type,
                          int flags, const char *dev_name, void *data,
                          struct vfsmount *mnt)
 {
-        return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt);
+        return get_sb_pseudo(fs_type, "futex", NULL, FUTEXFS_SUPER_MAGIC, mnt);
 }
 static struct file_system_type futex_fs_type = {
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f1a73f0b54..9b5dff6b3f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -503,7 +503,6 @@ out_unlock:
        spin_unlock(&desc->lock);
 }
-#ifdef CONFIG_SMP
 /**
 *      handle_percpu_IRQ - Per CPU local irq handler
 *      @irq:   the interrupt number
@@ -529,8 +528,6 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
                desc->chip->eoi(irq);
 }
-#endif /* CONFIG_SMP */
 void
 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
                  const char *name)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7230d914ea..80eab7a042 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -405,7 +405,6 @@ void free_irq(unsigned int irq, void *dev_id)
        struct irq_desc *desc;
        struct irqaction **p;
        unsigned long flags;
-        irqreturn_t (*handler)(int, void *) = NULL;
        WARN_ON(in_interrupt());
        if (irq >= NR_IRQS)
@@ -445,8 +444,21 @@ void free_irq(unsigned int irq, void *dev_id)
                        /* Make sure it's not being used on another CPU */
                        synchronize_irq(irq);
-                        if (action->flags & IRQF_SHARED)
+#ifdef CONFIG_DEBUG_SHIRQ
-                                handler = action->handler;
+                        /*
+                         * It's a shared IRQ -- the driver ought to be
+                         * prepared for it to happen even now it's
+                         * being freed, so let's make sure....  We do
+                         * this after actually deregistering it, to
+                         * make sure that a 'real' IRQ doesn't run in
+                         * parallel with our fake
+                         */
+                        if (action->flags & IRQF_SHARED) {
+                                local_irq_save(flags);
+                                action->handler(irq, dev_id);
+                                local_irq_restore(flags);
+                        }
+#endif
                        kfree(action);
                        return;
                }
@@ -454,19 +466,6 @@ void free_irq(unsigned int irq, void *dev_id)
                spin_unlock_irqrestore(&desc->lock, flags);
                return;
        }
-#ifdef CONFIG_DEBUG_SHIRQ
-        if (handler) {
-                /*
-                 * It's a shared IRQ -- the driver ought to be prepared for it
-                 * to happen even now it's being freed, so let's make sure....
-                 * We do this after actually deregistering it, to make sure that
-                 * a 'real' IRQ doesn't run in parallel with our fake
-                 */
-                local_irq_save(flags);
-                handler(irq, dev_id);
-                local_irq_restore(flags);
-        }
-#endif
 }
 EXPORT_SYMBOL(free_irq);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 25db14b89e..7885269b0d 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -17,21 +17,30 @@
 #include <linux/highmem.h>
 #include <linux/syscalls.h>
 #include <linux/reboot.h>
-#include <linux/syscalls.h>
 #include <linux/ioport.h>
 #include <linux/hardirq.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
+#include <linux/utsrelease.h>
+#include <linux/utsname.h>
+#include <linux/numa.h>
 #include <asm/page.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/system.h>
 #include <asm/semaphore.h>
+#include <asm/sections.h>
 /* Per cpu memory for storing cpu states in case of system crash. */
 note_buf_t* crash_notes;
+/* vmcoreinfo stuff */
+unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
+u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+size_t vmcoreinfo_size;
+size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
 /* Location of the reserved area for the crash kernel */
 struct resource crashk_res = {
        .name  = "Crash kernel",
@@ -1061,6 +1070,7 @@ void crash_kexec(struct pt_regs *regs)
                if (kexec_crash_image) {
                        struct pt_regs fixed_regs;
                        crash_setup_regs(&fixed_regs, regs);
+                        crash_save_vmcoreinfo();
                        machine_crash_shutdown(&fixed_regs);
                        machine_kexec(kexec_crash_image);
                }
@@ -1135,3 +1145,104 @@ static int __init crash_notes_memory_init(void)
        return 0;
 }
 module_init(crash_notes_memory_init)
+void crash_save_vmcoreinfo(void)
+{
+        u32 *buf;
+        if (!vmcoreinfo_size)
+                return;
+        vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
+        buf = (u32 *)vmcoreinfo_note;
+        buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
+                              vmcoreinfo_size);
+        final_note(buf);
+}
+void vmcoreinfo_append_str(const char *fmt, ...)
+{
+        va_list args;
+        char buf[0x50];
+        int r;
+        va_start(args, fmt);
+        r = vsnprintf(buf, sizeof(buf), fmt, args);
+        va_end(args);
+        if (r + vmcoreinfo_size > vmcoreinfo_max_size)
+                r = vmcoreinfo_max_size - vmcoreinfo_size;
+        memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
+        vmcoreinfo_size += r;
+}
+/*
+ * provide an empty default implementation here -- architecture
+ * code may override this
+ */
+void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
+{}
+unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
+{
+        return __pa((unsigned long)(char *)&vmcoreinfo_note);
+}
+static int __init crash_save_vmcoreinfo_init(void)
+{
+        vmcoreinfo_append_str("OSRELEASE=%s\n", init_uts_ns.name.release);
+        vmcoreinfo_append_str("PAGESIZE=%ld\n", PAGE_SIZE);
+        VMCOREINFO_SYMBOL(init_uts_ns);
+        VMCOREINFO_SYMBOL(node_online_map);
+        VMCOREINFO_SYMBOL(swapper_pg_dir);
+        VMCOREINFO_SYMBOL(_stext);
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+        VMCOREINFO_SYMBOL(mem_map);
+        VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#ifdef CONFIG_SPARSEMEM
+        VMCOREINFO_SYMBOL(mem_section);
+        VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
+        VMCOREINFO_SIZE(mem_section);
+        VMCOREINFO_OFFSET(mem_section, section_mem_map);
+#endif
+        VMCOREINFO_SIZE(page);
+        VMCOREINFO_SIZE(pglist_data);
+        VMCOREINFO_SIZE(zone);
+        VMCOREINFO_SIZE(free_area);
+        VMCOREINFO_SIZE(list_head);
+        VMCOREINFO_TYPEDEF_SIZE(nodemask_t);
+        VMCOREINFO_OFFSET(page, flags);
+        VMCOREINFO_OFFSET(page, _count);
+        VMCOREINFO_OFFSET(page, mapping);
+        VMCOREINFO_OFFSET(page, lru);
+        VMCOREINFO_OFFSET(pglist_data, node_zones);
+        VMCOREINFO_OFFSET(pglist_data, nr_zones);
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+        VMCOREINFO_OFFSET(pglist_data, node_mem_map);
+#endif
+        VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
+        VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
+        VMCOREINFO_OFFSET(pglist_data, node_id);
+        VMCOREINFO_OFFSET(zone, free_area);
+        VMCOREINFO_OFFSET(zone, vm_stat);
+        VMCOREINFO_OFFSET(zone, spanned_pages);
+        VMCOREINFO_OFFSET(free_area, free_list);
+        VMCOREINFO_OFFSET(list_head, next);
+        VMCOREINFO_OFFSET(list_head, prev);
+        VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
+        VMCOREINFO_NUMBER(NR_FREE_PAGES);
+        arch_crash_save_vmcoreinfo();
+        return 0;
+}
+module_init(crash_save_vmcoreinfo_init)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 4b8a4493c5..e3a5d817ac 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -64,7 +64,6 @@
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
-static atomic_t kprobe_count;
 /* NOTE: change this value only with kprobe_mutex held */
 static bool kprobe_enabled;
@@ -73,11 +72,6 @@ DEFINE_MUTEX(kprobe_mutex);		/* Protects kprobe_table */
 DEFINE_SPINLOCK(kretprobe_lock);        /* Protects kretprobe_inst_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
-static struct notifier_block kprobe_page_fault_nb = {
-        .notifier_call = kprobe_exceptions_notify,
-        .priority = 0x7fffffff /* we need to notified first */
-};
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 /*
 * kprobe->ainsn.insn points to the copy of the instruction to be
@@ -556,8 +550,6 @@ static int __kprobes __register_kprobe(struct kprobe *p,
        old_p = get_kprobe(p->addr);
        if (old_p) {
                ret = register_aggr_kprobe(old_p, p);
-                if (!ret)
-                        atomic_inc(&kprobe_count);
                goto out;
        }
@@ -569,13 +561,9 @@ static int __kprobes __register_kprobe(struct kprobe *p,
        hlist_add_head_rcu(&p->hlist,
                       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
-        if (kprobe_enabled) {
+        if (kprobe_enabled)
-                if (atomic_add_return(1, &kprobe_count) == \
-                                (ARCH_INACTIVE_KPROBE_COUNT + 1))
-                        register_page_fault_notifier(&kprobe_page_fault_nb);
                arch_arm_kprobe(p);
-        }
 out:
        mutex_unlock(&kprobe_mutex);
@@ -658,16 +646,6 @@ valid_p:
                }
                mutex_unlock(&kprobe_mutex);
        }
-        /* Call unregister_page_fault_notifier()
-         * if no probes are active
-         */
-        mutex_lock(&kprobe_mutex);
-        if (atomic_add_return(-1, &kprobe_count) == \
-                                ARCH_INACTIVE_KPROBE_COUNT)
-                unregister_page_fault_notifier(&kprobe_page_fault_nb);
-        mutex_unlock(&kprobe_mutex);
-        return;
 }
 static struct notifier_block kprobe_exceptions_nb = {
@@ -738,6 +716,18 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
        int ret = 0;
        struct kretprobe_instance *inst;
        int i;
+        void *addr = rp->kp.addr;
+        if (kretprobe_blacklist_size) {
+                if (addr == NULL)
+                        kprobe_lookup_name(rp->kp.symbol_name, addr);
+                addr += rp->kp.offset;
+                for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
+                        if (kretprobe_blacklist[i].addr == addr)
+                                return -EINVAL;
+                }
+        }
        rp->kp.pre_handler = pre_handler_kretprobe;
        rp->kp.post_handler = NULL;
@@ -815,7 +805,17 @@ static int __init init_kprobes(void)
                INIT_HLIST_HEAD(&kprobe_table[i]);
                INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
        }
-        atomic_set(&kprobe_count, 0);
+        if (kretprobe_blacklist_size) {
+                /* lookup the function address from its name */
+                for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
+                        kprobe_lookup_name(kretprobe_blacklist[i].name,
+                                           kretprobe_blacklist[i].addr);
+                        if (!kretprobe_blacklist[i].addr)
+                                printk("kretprobe: lookup failed: %s\n",
+                                       kretprobe_blacklist[i].name);
+                }
+        }
        /* By default, kprobes are enabled */
        kprobe_enabled = true;
@@ -921,13 +921,6 @@ static void __kprobes enable_all_kprobes(void)
        if (kprobe_enabled)
                goto already_enabled;
-        /*
-         * Re-register the page fault notifier only if there are any
-         * active probes at the time of enabling kprobes globally
-         */
-        if (atomic_read(&kprobe_count) > ARCH_INACTIVE_KPROBE_COUNT)
-                register_page_fault_notifier(&kprobe_page_fault_nb);
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist)
@@ -968,10 +961,7 @@ static void __kprobes disable_all_kprobes(void)
        mutex_unlock(&kprobe_mutex);
        /* Allow all currently running kprobes to complete */
        synchronize_sched();
+        return;
-        mutex_lock(&kprobe_mutex);
-        /* Unconditionally unregister the page_fault notifier */
-        unregister_page_fault_notifier(&kprobe_page_fault_nb);
 already_disabled:
        mutex_unlock(&kprobe_mutex);
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 6046939d08..65daa5373c 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -61,6 +61,15 @@ static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page)
        return sprintf(page, "%d\n", !!kexec_crash_image);
 }
 KERNEL_ATTR_RO(kexec_crash_loaded);
+static ssize_t vmcoreinfo_show(struct kset *kset, char *page)
+{
+        return sprintf(page, "%lx %x\n",
+                       paddr_vmcoreinfo_note(),
+                       (unsigned int)vmcoreinfo_max_size);
+}
+KERNEL_ATTR_RO(vmcoreinfo);
 #endif /* CONFIG_KEXEC */
 /*
@@ -96,6 +105,7 @@ static struct attribute * kernel_attrs[] = {
 #ifdef CONFIG_KEXEC
        &kexec_loaded_attr.attr,
        &kexec_crash_loaded_attr.attr,
+        &vmcoreinfo_attr.attr,
 #endif
        NULL
 };
diff --git a/kernel/module.c b/kernel/module.c
index db0ead0363..a389b423c2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -20,6 +20,7 @@
 #include <linux/moduleloader.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
+#include <linux/sysfs.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -692,8 +693,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
        }
        /* If it has an init func, it must have an exit func to unload */
-        if ((mod->init != NULL && mod->exit == NULL)
+        if (mod->init && !mod->exit) {
-            || mod->unsafe) {
                forced = try_force_unload(flags);
                if (!forced) {
                        /* This module can't be removed */
@@ -741,11 +741,6 @@ static void print_unload_info(struct seq_file *m, struct module *mod)
                seq_printf(m, "%s,", use->module_which_uses->name);
        }
-        if (mod->unsafe) {
-                printed_something = 1;
-                seq_printf(m, "[unsafe],");
-        }
        if (mod->init != NULL && mod->exit == NULL) {
                printed_something = 1;
                seq_printf(m, "[permanent],");
@@ -1053,6 +1048,100 @@ static void remove_sect_attrs(struct module *mod)
        }
 }
+/*
+ * /sys/module/foo/notes/.section.name gives contents of SHT_NOTE sections.
+ */
+struct module_notes_attrs {
+        struct kobject *dir;
+        unsigned int notes;
+        struct bin_attribute attrs[0];
+};
+static ssize_t module_notes_read(struct kobject *kobj,
+                                 struct bin_attribute *bin_attr,
+                                 char *buf, loff_t pos, size_t count)
+{
+        /*
+         * The caller checked the pos and count against our size.
+         */
+        memcpy(buf, bin_attr->private + pos, count);
+        return count;
+}
+static void free_notes_attrs(struct module_notes_attrs *notes_attrs,
+                             unsigned int i)
+{
+        if (notes_attrs->dir) {
+                while (i-- > 0)
+                        sysfs_remove_bin_file(notes_attrs->dir,
+                                              &notes_attrs->attrs[i]);
+                kobject_del(notes_attrs->dir);
+        }
+        kfree(notes_attrs);
+}
+static void add_notes_attrs(struct module *mod, unsigned int nsect,
+                            char *secstrings, Elf_Shdr *sechdrs)
+{
+        unsigned int notes, loaded, i;
+        struct module_notes_attrs *notes_attrs;
+        struct bin_attribute *nattr;
+        /* Count notes sections and allocate structures.  */
+        notes = 0;
+        for (i = 0; i < nsect; i++)
+                if ((sechdrs[i].sh_flags & SHF_ALLOC) &&
+                    (sechdrs[i].sh_type == SHT_NOTE))
+                        ++notes;
+        if (notes == 0)
+                return;
+        notes_attrs = kzalloc(sizeof(*notes_attrs)
+                              + notes * sizeof(notes_attrs->attrs[0]),
+                              GFP_KERNEL);
+        if (notes_attrs == NULL)
+                return;
+        notes_attrs->notes = notes;
+        nattr = &notes_attrs->attrs[0];
+        for (loaded = i = 0; i < nsect; ++i) {
+                if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+                        continue;
+                if (sechdrs[i].sh_type == SHT_NOTE) {
+                        nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
+                        nattr->attr.mode = S_IRUGO;
+                        nattr->size = sechdrs[i].sh_size;
+                        nattr->private = (void *) sechdrs[i].sh_addr;
+                        nattr->read = module_notes_read;
+                        ++nattr;
+                }
+                ++loaded;
+        }
+        notes_attrs->dir = kobject_add_dir(&mod->mkobj.kobj, "notes");
+        if (!notes_attrs->dir)
+                goto out;
+        for (i = 0; i < notes; ++i)
+                if (sysfs_create_bin_file(notes_attrs->dir,
+                                          &notes_attrs->attrs[i]))
+                        goto out;
+        mod->notes_attrs = notes_attrs;
+        return;
+  out:
+        free_notes_attrs(notes_attrs, i);
+}
+static void remove_notes_attrs(struct module *mod)
+{
+        if (mod->notes_attrs)
+                free_notes_attrs(mod->notes_attrs, mod->notes_attrs->notes);
+}
 #else
 static inline void add_sect_attrs(struct module *mod, unsigned int nsect,
@@ -1063,6 +1152,15 @@ static inline void add_sect_attrs(struct module *mod, unsigned int nsect,
 static inline void remove_sect_attrs(struct module *mod)
 {
 }
+static inline void add_notes_attrs(struct module *mod, unsigned int nsect,
+                                   char *sectstrings, Elf_Shdr *sechdrs)
+{
+}
+static inline void remove_notes_attrs(struct module *mod)
+{
+}
 #endif /* CONFIG_KALLSYMS */
 #ifdef CONFIG_SYSFS
@@ -1197,6 +1295,7 @@ static void free_module(struct module *mod)
 {
        /* Delete from various lists */
        stop_machine_run(__unlink_module, mod, NR_CPUS);
+        remove_notes_attrs(mod);
        remove_sect_attrs(mod);
        mod_kobject_remove(mod);
@@ -1782,7 +1881,8 @@ static struct module *load_module(void __user *umod,
        module_unload_init(mod);
        /* Initialize kobject, so we can reference it. */
-        if (mod_sysfs_init(mod) != 0)
+        err = mod_sysfs_init(mod);
+        if (err)
                goto cleanup;
        /* Set up license info based on the info section */
@@ -1924,6 +2024,7 @@ static struct module *load_module(void __user *umod,
        if (err < 0)
                goto arch_cleanup;
        add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
+        add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
        /* Size of section 0 is 0, so this works well if no unwind info. */
        mod->unwind_info = unwind_add_table(mod,
@@ -2011,15 +2112,10 @@ sys_init_module(void __user *umod,
                   buggy refcounters. */
                mod->state = MODULE_STATE_GOING;
                synchronize_sched();
-                if (mod->unsafe)
+                module_put(mod);
-                        printk(KERN_ERR "%s: module is now stuck!\n",
+                mutex_lock(&module_mutex);
-                               mod->name);
+                free_module(mod);
-                else {
+                mutex_unlock(&module_mutex);
-                        module_put(mod);
-                        mutex_lock(&module_mutex);
-                        free_module(mod);
-                        mutex_unlock(&module_mutex);
-                }
                return ret;
        }
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f1decd21a5..049e7c0ac5 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -203,8 +203,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 static int __init nsproxy_cache_init(void)
 {
-        nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
+        nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
-                                           0, SLAB_PANIC, NULL);
        return 0;
 }
diff --git a/kernel/params.c b/kernel/params.c
index 4e57732fcf..1d6aca288c 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -252,8 +252,9 @@ int param_get_bool(char *buffer, struct kernel_param *kp)
 int param_set_invbool(const char *val, struct kernel_param *kp)
 {
        int boolval, ret;
-        struct kernel_param dummy = { .arg = &boolval };
+        struct kernel_param dummy;
+        dummy.arg = &boolval;
        ret = param_set_bool(val, &dummy);
        if (ret == 0)
                *(int *)kp->arg = !boolval;
@@ -262,11 +263,7 @@ int param_set_invbool(const char *val, struct kernel_param *kp)
 int param_get_invbool(char *buffer, struct kernel_param *kp)
 {
-        int val;
+        return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y');
-        struct kernel_param dummy = { .arg = &val };
-        val = !*(int *)kp->arg;
-        return param_get_bool(buffer, &dummy);
 }
 /* We break the rule and mangle the string. */
@@ -325,7 +322,7 @@ static int param_array(const char *name,
 int param_array_set(const char *val, struct kernel_param *kp)
 {
-        struct kparam_array *arr = kp->arg;
+        const struct kparam_array *arr = kp->arr;
        unsigned int temp_num;
        return param_array(kp->name, val, 1, arr->max, arr->elem,
@@ -335,7 +332,7 @@ int param_array_set(const char *val, struct kernel_param *kp)
 int param_array_get(char *buffer, struct kernel_param *kp)
 {
        int i, off, ret;
-        struct kparam_array *arr = kp->arg;
+        const struct kparam_array *arr = kp->arr;
        struct kernel_param p;
        p = *kp;
@@ -354,7 +351,7 @@ int param_array_get(char *buffer, struct kernel_param *kp)
 int param_set_copystring(const char *val, struct kernel_param *kp)
 {
-        struct kparam_string *kps = kp->arg;
+        const struct kparam_string *kps = kp->str;
        if (!val) {
                printk(KERN_ERR "%s: missing param set value\n", kp->name);
@@ -371,7 +368,7 @@ int param_set_copystring(const char *val, struct kernel_param *kp)
 int param_get_string(char *buffer, struct kernel_param *kp)
 {
-        struct kparam_string *kps = kp->arg;
+        const struct kparam_string *kps = kp->str;
        return strlcpy(buffer, kps->string, kps->maxlen);
 }
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 57efe0400b..d71ed09fe1 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -241,7 +241,8 @@ static __init int init_posix_timers(void)
        register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
        posix_timers_cache = kmem_cache_create("posix_timers_cache",
-                                        sizeof (struct k_itimer), 0, 0, NULL);
+                                        sizeof (struct k_itimer), 0, SLAB_PANIC,
+                                        NULL);
        idr_init(&posix_timers_id);
        return 0;
 }
diff --git a/kernel/printk.c b/kernel/printk.c
index 8451dfc31d..52493474f0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -22,6 +22,8 @@
 #include <linux/tty_driver.h>
 #include <linux/console.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/nmi.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/interrupt.h>                    /* For in_interrupt() */
@@ -162,6 +164,113 @@ out:
 __setup("log_buf_len=", log_buf_len_setup);
+#ifdef CONFIG_BOOT_PRINTK_DELAY
+static unsigned int boot_delay; /* msecs delay after each printk during bootup */
+static unsigned long long printk_delay_msec; /* per msec, based on boot_delay */
+static int __init boot_delay_setup(char *str)
+{
+        unsigned long lpj;
+        unsigned long long loops_per_msec;
+        lpj = preset_lpj ? preset_lpj : 1000000;        /* some guess */
+        loops_per_msec = (unsigned long long)lpj / 1000 * HZ;
+        get_option(&str, &boot_delay);
+        if (boot_delay > 10 * 1000)
+                boot_delay = 0;
+        printk_delay_msec = loops_per_msec;
+        printk(KERN_DEBUG "boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
+                "HZ: %d, printk_delay_msec: %llu\n",
+                boot_delay, preset_lpj, lpj, HZ, printk_delay_msec);
+        return 1;
+}
+__setup("boot_delay=", boot_delay_setup);
+static void boot_delay_msec(void)
+{
+        unsigned long long k;
+        unsigned long timeout;
+        if (boot_delay == 0 || system_state != SYSTEM_BOOTING)
+                return;
+        k = (unsigned long long)printk_delay_msec * boot_delay;
+        timeout = jiffies + msecs_to_jiffies(boot_delay);
+        while (k) {
+                k--;
+                cpu_relax();
+                /*
+                 * use (volatile) jiffies to prevent
+                 * compiler reduction; loop termination via jiffies
+                 * is secondary and may or may not happen.
+                 */
+                if (time_after(jiffies, timeout))
+                        break;
+                touch_nmi_watchdog();
+        }
+}
+#else
+static inline void boot_delay_msec(void)
+{
+}
+#endif
+/*
+ * Return the number of unread characters in the log buffer.
+ */
+int log_buf_get_len(void)
+{
+        return logged_chars;
+}
+/*
+ * Copy a range of characters from the log buffer.
+ */
+int log_buf_copy(char *dest, int idx, int len)
+{
+        int ret, max;
+        bool took_lock = false;
+        if (!oops_in_progress) {
+                spin_lock_irq(&logbuf_lock);
+                took_lock = true;
+        }
+        max = log_buf_get_len();
+        if (idx < 0 || idx >= max) {
+                ret = -1;
+        } else {
+                if (len > max)
+                        len = max;
+                ret = len;
+                idx += (log_end - max);
+                while (len-- > 0)
+                        dest[len] = LOG_BUF(idx + len);
+        }
+        if (took_lock)
+                spin_unlock_irq(&logbuf_lock);
+        return ret;
+}
+/*
+ * Extract a single character from the log buffer.
+ */
+int log_buf_read(int idx)
+{
+        char ret;
+        if (log_buf_copy(&ret, idx, 1) == 1)
+                return ret;
+        else
+                return -1;
+}
 /*
 * Commands to do_syslog:
 *
@@ -527,6 +636,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
        static char printk_buf[1024];
        static int log_level_unknown = 1;
+        boot_delay_msec();
        preempt_disable();
        if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
                /* If a crash is occurring during printk() on this CPU,
diff --git a/kernel/profile.c b/kernel/profile.c
index cb1e37d2da..631b75c25d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -37,7 +37,7 @@ struct profile_hit {
 #define NR_PROFILE_GRP          (NR_PROFILE_HIT/PROFILE_GRPSZ)
 /* Oprofile timer tick hook */
-int (*timer_hook)(struct pt_regs *) __read_mostly;
+static int (*timer_hook)(struct pt_regs *) __read_mostly;
 static atomic_t *prof_buffer;
 static unsigned long prof_len, prof_shift;
@@ -346,7 +346,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
                per_cpu(cpu_profile_flip, cpu) = 0;
                if (!per_cpu(cpu_profile_hits, cpu)[1]) {
                        page = alloc_pages_node(node,
-                                        GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+                                        GFP_KERNEL | __GFP_ZERO,
                                        0);
                        if (!page)
                                return NOTIFY_BAD;
@@ -354,7 +354,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
                }
                if (!per_cpu(cpu_profile_hits, cpu)[0]) {
                        page = alloc_pages_node(node,
-                                        GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+                                        GFP_KERNEL | __GFP_ZERO,
                                        0);
                        if (!page)
                                goto out_free;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 3eca7a55f2..a73ebd3b9d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -386,6 +386,9 @@ int ptrace_request(struct task_struct *child, long request,
        case PTRACE_SETSIGINFO:
                ret = ptrace_setsiginfo(child, (siginfo_t __user *) data);
                break;
+        case PTRACE_DETACH:      /* detach a process that was attached. */
+                ret = ptrace_detach(child, data);
+                break;
        default:
                break;
        }
@@ -450,6 +453,10 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
        return child;
 }
+#ifndef arch_ptrace_attach
+#define arch_ptrace_attach(child)       do { } while (0)
+#endif
 #ifndef __ARCH_SYS_PTRACE
 asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
@@ -473,6 +480,12 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
        if (request == PTRACE_ATTACH) {
                ret = ptrace_attach(child);
+                /*
+                 * Some architectures need to do book-keeping after
+                 * a ptrace attach.
+                 */
+                if (!ret)
+                        arch_ptrace_attach(child);
                goto out_put_task_struct;
        }
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 130214f3d2..a66d4d1615 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,7 +45,6 @@
 #include <linux/moduleparam.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
-#include <linux/rcupdate.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index ddff332477..c3e165c231 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -35,14 +35,12 @@
 #include <linux/sched.h>
 #include <asm/atomic.h>
 #include <linux/bitops.h>
-#include <linux/module.h>
 #include <linux/completion.h>
 #include <linux/moduleparam.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/freezer.h>
 #include <linux/cpu.h>
-#include <linux/random.h>
 #include <linux/delay.h>
 #include <linux/byteorder/swabb.h>
 #include <linux/stat.h>
@@ -166,16 +164,14 @@ struct rcu_random_state {
 /*
 * Crude but fast random-number generator.  Uses a linear congruential
- * generator, with occasional help from get_random_bytes().
+ * generator, with occasional help from cpu_clock().
 */
 static unsigned long
 rcu_random(struct rcu_random_state *rrsp)
 {
-        long refresh;
        if (--rrsp->rrs_count < 0) {
-                get_random_bytes(&refresh, sizeof(refresh));
+                rrsp->rrs_state +=
-                rrsp->rrs_state += refresh;
+                        (unsigned long)cpu_clock(raw_smp_processor_id());
                rrsp->rrs_count = RCU_RANDOM_REFRESH;
        }
        rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
diff --git a/kernel/resource.c b/kernel/resource.c
index 9bd14fd3e6..a358142ff4 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -234,7 +234,7 @@ EXPORT_SYMBOL(release_resource);
 * the caller must specify res->start, res->end, res->flags.
 * If found, returns 0, res is overwritten, if not found, returns -1.
 */
-int find_next_system_ram(struct resource *res)
+static int find_next_system_ram(struct resource *res)
 {
        resource_size_t start, end;
        struct resource *p;
@@ -267,6 +267,30 @@ int find_next_system_ram(struct resource *res)
                res->end = p->end;
        return 0;
 }
+int
+walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
+                        int (*func)(unsigned long, unsigned long, void *))
+{
+        struct resource res;
+        unsigned long pfn, len;
+        u64 orig_end;
+        int ret = -1;
+        res.start = (u64) start_pfn << PAGE_SHIFT;
+        res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
+        res.flags = IORESOURCE_MEM;
+        orig_end = res.end;
+        while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
+                pfn = (unsigned long)(res.start >> PAGE_SHIFT);
+                len = (unsigned long)((res.end + 1 - res.start) >> PAGE_SHIFT);
+                ret = (*func)(pfn, len, arg);
+                if (ret)
+                        break;
+                res.start = res.end + 1;
+                res.end = orig_end;
+        }
+        return ret;
+}
 #endif
 /*
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 5aedbee014..6b0703db15 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -82,12 +82,7 @@ do {						\
 * into the tracing code when doing error printk or
 * executing a BUG():
 */
-int rt_trace_on = 1;
+static int rt_trace_on = 1;
-void deadlock_trace_off(void)
-{
-        rt_trace_on = 0;
-}
 static void printk_task(struct task_struct *p)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index bba57adb95..92721d1534 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1712,7 +1712,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
        p->prio = effective_prio(p);
-        if (!p->sched_class->task_new || !current->se.on_rq || !rq->cfs.curr) {
+        if (!p->sched_class->task_new || !current->se.on_rq) {
                activate_task(rq, p, 0);
        } else {
                /*
@@ -2336,7 +2336,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
        unsigned long max_pull;
        unsigned long busiest_load_per_task, busiest_nr_running;
        unsigned long this_load_per_task, this_nr_running;
-        int load_idx;
+        int load_idx, group_imb = 0;
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
        int power_savings_balance = 1;
        unsigned long leader_nr_running = 0, min_load_per_task = 0;
@@ -2355,9 +2355,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                load_idx = sd->idle_idx;
        do {
-                unsigned long load, group_capacity;
+                unsigned long load, group_capacity, max_cpu_load, min_cpu_load;
                int local_group;
                int i;
+                int __group_imb = 0;
                unsigned int balance_cpu = -1, first_idle_cpu = 0;
                unsigned long sum_nr_running, sum_weighted_load;
@@ -2368,6 +2369,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                /* Tally up the load of all CPUs in the group */
                sum_weighted_load = sum_nr_running = avg_load = 0;
+                max_cpu_load = 0;
+                min_cpu_load = ~0UL;
                for_each_cpu_mask(i, group->cpumask) {
                        struct rq *rq;
@@ -2388,8 +2391,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                                }
                                load = target_load(i, load_idx);
-                        } else
+                        } else {
                                load = source_load(i, load_idx);
+                                if (load > max_cpu_load)
+                                        max_cpu_load = load;
+                                if (min_cpu_load > load)
+                                        min_cpu_load = load;
+                        }
                        avg_load += load;
                        sum_nr_running += rq->nr_running;
@@ -2415,6 +2423,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                avg_load = sg_div_cpu_power(group,
                                avg_load * SCHED_LOAD_SCALE);
+                if ((max_cpu_load - min_cpu_load) > SCHED_LOAD_SCALE)
+                        __group_imb = 1;
                group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
                if (local_group) {
@@ -2423,11 +2434,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                        this_nr_running = sum_nr_running;
                        this_load_per_task = sum_weighted_load;
                } else if (avg_load > max_load &&
-                           sum_nr_running > group_capacity) {
+                           (sum_nr_running > group_capacity || __group_imb)) {
                        max_load = avg_load;
                        busiest = group;
                        busiest_nr_running = sum_nr_running;
                        busiest_load_per_task = sum_weighted_load;
+                        group_imb = __group_imb;
                }
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2499,6 +2511,9 @@ group_next:
                goto out_balanced;
        busiest_load_per_task /= busiest_nr_running;
+        if (group_imb)
+                busiest_load_per_task = min(busiest_load_per_task, avg_load);
        /*
         * We're trying to get all the cpus to the average_load, so we don't
         * want to push ourselves above the average load, nor do we wish to
@@ -5060,6 +5075,17 @@ wait_to_die:
 }
 #ifdef CONFIG_HOTPLUG_CPU
+static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
+{
+        int ret;
+        local_irq_disable();
+        ret = __migrate_task(p, src_cpu, dest_cpu);
+        local_irq_enable();
+        return ret;
+}
 /*
 * Figure out where task on dead CPU should go, use force if neccessary.
 * NOTE: interrupts should be disabled by the caller
@@ -5098,7 +5124,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
                                       "longer affine to cpu%d\n",
                                       p->pid, p->comm, dead_cpu);
                }
-        } while (!__migrate_task(p, dead_cpu, dest_cpu));
+        } while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
 }
 /*
@@ -5126,7 +5152,7 @@ static void migrate_live_tasks(int src_cpu)
 {
        struct task_struct *p, *t;
-        write_lock_irq(&tasklist_lock);
+        read_lock(&tasklist_lock);
        do_each_thread(t, p) {
                if (p == current)
@@ -5136,7 +5162,7 @@ static void migrate_live_tasks(int src_cpu)
                        move_task_off_dead_cpu(src_cpu, p);
        } while_each_thread(t, p);
-        write_unlock_irq(&tasklist_lock);
+        read_unlock(&tasklist_lock);
 }
 /*
@@ -5214,11 +5240,10 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
         * Drop lock around migration; if someone else moves it,
         * that's OK.  No task can be added to this CPU, so iteration is
         * fine.
-         * NOTE: interrupts should be left disabled  --dev@
         */
-        spin_unlock(&rq->lock);
+        spin_unlock_irq(&rq->lock);
        move_task_off_dead_cpu(dead_cpu, p);
-        spin_lock(&rq->lock);
+        spin_lock_irq(&rq->lock);
        put_task_struct(p);
 }
@@ -5272,11 +5297,20 @@ static struct ctl_table *sd_alloc_ctl_entry(int n)
 static void sd_free_ctl_entry(struct ctl_table **tablep)
 {
-        struct ctl_table *entry = *tablep;
+        struct ctl_table *entry;
-        for (entry = *tablep; entry->procname; entry++)
+        /*
+         * In the intermediate directories, both the child directory and
+         * procname are dynamically allocated and could fail but the mode
+         * will always be set.  In the lowest directory the names are
+         * static strings and all have proc handlers.
+         */
+        for (entry = *tablep; entry->mode; entry++) {
                if (entry->child)
                        sd_free_ctl_entry(&entry->child);
+                if (entry->proc_handler == NULL)
+                        kfree(entry->procname);
+        }
        kfree(*tablep);
        *tablep = NULL;
@@ -5447,14 +5481,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                kthread_stop(rq->migration_thread);
                rq->migration_thread = NULL;
                /* Idle task back to normal (off runqueue, low prio) */
-                rq = task_rq_lock(rq->idle, &flags);
+                spin_lock_irq(&rq->lock);
                update_rq_clock(rq);
                deactivate_task(rq, rq->idle, 0);
                rq->idle->static_prio = MAX_PRIO;
                __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
                rq->idle->sched_class = &idle_sched_class;
                migrate_dead_tasks(cpu);
-                task_rq_unlock(rq, &flags);
+                spin_unlock_irq(&rq->lock);
                migrate_nr_uninterruptible(rq);
                BUG_ON(rq->nr_running != 0);
@@ -5869,7 +5903,7 @@ static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map,
                             struct sched_group **sg)
 {
        int group;
-        cpumask_t mask = cpu_sibling_map[cpu];
+        cpumask_t mask = per_cpu(cpu_sibling_map, cpu);
        cpus_and(mask, mask, *cpu_map);
        group = first_cpu(mask);
        if (sg)
@@ -5898,7 +5932,7 @@ static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map,
        cpus_and(mask, mask, *cpu_map);
        group = first_cpu(mask);
 #elif defined(CONFIG_SCHED_SMT)
-        cpumask_t mask = cpu_sibling_map[cpu];
+        cpumask_t mask = per_cpu(cpu_sibling_map, cpu);
        cpus_and(mask, mask, *cpu_map);
        group = first_cpu(mask);
 #else
@@ -6132,7 +6166,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
                p = sd;
                sd = &per_cpu(cpu_domains, i);
                *sd = SD_SIBLING_INIT;
-                sd->span = cpu_sibling_map[i];
+                sd->span = per_cpu(cpu_sibling_map, i);
                cpus_and(sd->span, sd->span, *cpu_map);
                sd->parent = p;
                p->child = sd;
@@ -6143,7 +6177,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 #ifdef CONFIG_SCHED_SMT
        /* Set up CPU (sibling) groups */
        for_each_cpu_mask(i, *cpu_map) {
-                cpumask_t this_sibling_map = cpu_sibling_map[i];
+                cpumask_t this_sibling_map = per_cpu(cpu_sibling_map, i);
                cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
                if (i != first_cpu(this_sibling_map))
                        continue;
@@ -6348,35 +6382,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
        arch_destroy_sched_domains(cpu_map);
 }
-/*
- * Partition sched domains as specified by the cpumasks below.
- * This attaches all cpus from the cpumasks to the NULL domain,
- * waits for a RCU quiescent period, recalculates sched
- * domain information and then attaches them back to the
- * correct sched domains
- * Call with hotplug lock held
- */
-int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
-{
-        cpumask_t change_map;
-        int err = 0;
-        cpus_and(*partition1, *partition1, cpu_online_map);
-        cpus_and(*partition2, *partition2, cpu_online_map);
-        cpus_or(change_map, *partition1, *partition2);
-        /* Detach sched domains from all of the affected cpus */
-        detach_destroy_domains(&change_map);
-        if (!cpus_empty(*partition1))
-                err = build_sched_domains(partition1);
-        if (!err && !cpus_empty(*partition2))
-                err = build_sched_domains(partition2);
-        register_sched_domain_sysctl();
-        return err;
-}
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 static int arch_reinit_sched_domains(void)
 {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a17b785d70..166ed6db60 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1031,12 +1031,8 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
                swap(curr->vruntime, se->vruntime);
        }
-        update_stats_enqueue(cfs_rq, se);
-        check_spread(cfs_rq, se);
-        check_spread(cfs_rq, curr);
-        __enqueue_entity(cfs_rq, se);
-        account_entity_enqueue(cfs_rq, se);
        se->peer_preempt = 0;
+        enqueue_task_fair(rq, p, 0);
        resched_task(rq->curr);
 }
diff --git a/kernel/signal.c b/kernel/signal.c
index 7929523810..2124ffadcf 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -909,8 +909,7 @@ __group_complete_signal(int sig, struct task_struct *p)
                        do {
                                sigaddset(&t->pending.signal, SIGKILL);
                                signal_wake_up(t, 1);
-                                t = next_thread(t);
+                        } while_each_thread(p, t);
-                        } while (t != p);
                        return;
                }
@@ -928,13 +927,11 @@ __group_complete_signal(int sig, struct task_struct *p)
                rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
                p->signal->group_stop_count = 0;
                p->signal->group_exit_task = t;
-                t = p;
+                p = t;
                do {
                        p->signal->group_stop_count++;
-                        signal_wake_up(t, 0);
+                        signal_wake_up(t, t == p);
-                        t = next_thread(t);
+                } while_each_thread(p, t);
-                } while (t != p);
-                wake_up_process(p->signal->group_exit_task);
                return;
        }
@@ -985,9 +982,6 @@ void zap_other_threads(struct task_struct *p)
        p->signal->flags = SIGNAL_GROUP_EXIT;
        p->signal->group_stop_count = 0;
-        if (thread_group_empty(p))
-                return;
        for (t = next_thread(p); t != p; t = next_thread(t)) {
                /*
                 * Don't bother with already dead threads
@@ -2300,15 +2294,6 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
        k = &current->sighand->action[sig-1];
        spin_lock_irq(&current->sighand->siglock);
-        if (signal_pending(current)) {
-                /*
-                 * If there might be a fatal signal pending on multiple
-                 * threads, make sure we take it before changing the action.
-                 */
-                spin_unlock_irq(&current->sighand->siglock);
-                return -ERESTARTNOINTR;
-        }
        if (oact)
                *oact = *k;
@@ -2335,7 +2320,6 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
                        rm_from_queue_full(&mask, &t->signal->shared_pending);
                        do {
                                rm_from_queue_full(&mask, &t->pending);
-                                recalc_sigpending_and_wake(t);
                                t = next_thread(t);
                        } while (t != current);
                }
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 708d4882c0..edeeef3a6a 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -15,13 +15,16 @@
 #include <linux/notifier.h>
 #include <linux/module.h>
+#include <asm/irq_regs.h>
 static DEFINE_SPINLOCK(print_lock);
 static DEFINE_PER_CPU(unsigned long, touch_timestamp);
 static DEFINE_PER_CPU(unsigned long, print_timestamp);
 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
-static int did_panic = 0;
+static int did_panic;
+int softlockup_thresh = 10;
 static int
 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -40,14 +43,16 @@ static struct notifier_block panic_block = {
 * resolution, and we don't need to waste time with a big divide when
 * 2^30ns == 1.074s.
 */
-static unsigned long get_timestamp(void)
+static unsigned long get_timestamp(int this_cpu)
 {
-        return sched_clock() >> 30;  /* 2^30 ~= 10^9 */
+        return cpu_clock(this_cpu) >> 30;  /* 2^30 ~= 10^9 */
 }
 void touch_softlockup_watchdog(void)
 {
-        __raw_get_cpu_var(touch_timestamp) = get_timestamp();
+        int this_cpu = raw_smp_processor_id();
+        __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu);
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
@@ -70,6 +75,7 @@ void softlockup_tick(void)
        int this_cpu = smp_processor_id();
        unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
        unsigned long print_timestamp;
+        struct pt_regs *regs = get_irq_regs();
        unsigned long now;
        if (touch_timestamp == 0) {
@@ -80,10 +86,11 @@ void softlockup_tick(void)
        print_timestamp = per_cpu(print_timestamp, this_cpu);
        /* report at most once a second */
-        if (print_timestamp < (touch_timestamp + 1) ||
+        if ((print_timestamp >= touch_timestamp &&
-                did_panic ||
+                        print_timestamp < (touch_timestamp + 1)) ||
-                        !per_cpu(watchdog_task, this_cpu))
+                        did_panic || !per_cpu(watchdog_task, this_cpu)) {
                return;
+        }
        /* do not print during early bootup: */
        if (unlikely(system_state != SYSTEM_RUNNING)) {
@@ -91,28 +98,33 @@ void softlockup_tick(void)
                return;
        }
-        now = get_timestamp();
+        now = get_timestamp(this_cpu);
        /* Wake up the high-prio watchdog task every second: */
        if (now > (touch_timestamp + 1))
                wake_up_process(per_cpu(watchdog_task, this_cpu));
        /* Warn about unreasonable 10+ seconds delays: */
-        if (now > (touch_timestamp + 10)) {
+        if (now <= (touch_timestamp + softlockup_thresh))
-                per_cpu(print_timestamp, this_cpu) = touch_timestamp;
+                return;
-                spin_lock(&print_lock);
+        per_cpu(print_timestamp, this_cpu) = touch_timestamp;
-                printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n",
-                        this_cpu);
+        spin_lock(&print_lock);
+        printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
+                        this_cpu, now - touch_timestamp,
+                                current->comm, current->pid);
+        if (regs)
+                show_regs(regs);
+        else
                dump_stack();
-                spin_unlock(&print_lock);
+        spin_unlock(&print_lock);
-        }
 }
 /*
 * The watchdog thread - runs every second and touches the timestamp.
 */
-static int watchdog(void * __bind_cpu)
+static int watchdog(void *__bind_cpu)
 {
        struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -150,13 +162,13 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
                BUG_ON(per_cpu(watchdog_task, hotcpu));
                p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
                if (IS_ERR(p)) {
-                        printk("watchdog for %i failed\n", hotcpu);
+                        printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
                        return NOTIFY_BAD;
                }
-                per_cpu(touch_timestamp, hotcpu) = 0;
+                per_cpu(touch_timestamp, hotcpu) = 0;
-                per_cpu(watchdog_task, hotcpu) = p;
+                per_cpu(watchdog_task, hotcpu) = p;
                kthread_bind(p, hotcpu);
-                break;
+                break;
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                wake_up_process(per_cpu(watchdog_task, hotcpu));
@@ -176,7 +188,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
                kthread_stop(p);
                break;
 #endif /* CONFIG_HOTPLUG_CPU */
-        }
+        }
        return NOTIFY_OK;
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index b0ec498a18..52c7a151e2 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -4,6 +4,10 @@
 #include <asm/unistd.h>
+/*  we can't #include <linux/syscalls.h> here,
+    but tell gcc to not warn with -Wmissing-prototypes  */
+asmlinkage long sys_ni_syscall(void);
 /*
 * Non-implemented system calls get redirected here.
 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ec14aa8ac5..dde3d53e8a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -63,6 +63,7 @@ extern int print_fatal_signals;
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 extern int sysctl_panic_on_oom;
+extern int sysctl_oom_kill_allocating_task;
 extern int max_threads;
 extern int core_uses_pid;
 extern int suid_dumpable;
@@ -79,6 +80,19 @@ extern int maps_protect;
 extern int sysctl_stat_interval;
 extern int audit_argv_kb;
+/* Constants used for minimum and  maximum */
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+static int one = 1;
+static int sixty = 60;
+#endif
+#ifdef CONFIG_MMU
+static int two = 2;
+#endif
+static int zero;
+static int one_hundred = 100;
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
 static int minolduid;
@@ -710,6 +724,19 @@ static ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "softlockup_thresh",
+                .data           = &softlockup_thresh,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec_minmax,
+                .strategy       = &sysctl_intvec,
+                .extra1         = &one,
+                .extra2         = &sixty,
+        },
+#endif
 #ifdef CONFIG_COMPAT
        {
                .ctl_name       = KERN_COMPAT_LOG,
@@ -756,13 +783,6 @@ static ctl_table kern_table[] = {
        { .ctl_name = 0 }
 };
-/* Constants for minimum and maximum testing in vm_table.
-   We use these as one-element integer vectors. */
-static int zero;
-static int two = 2;
-static int one_hundred = 100;
 static ctl_table vm_table[] = {
        {
                .ctl_name       = VM_OVERCOMMIT_MEMORY,
@@ -781,6 +801,14 @@ static ctl_table vm_table[] = {
                .proc_handler   = &proc_dointvec,
        },
        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "oom_kill_allocating_task",
+                .data           = &sysctl_oom_kill_allocating_task,
+                .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec,
+        },
+        {
                .ctl_name       = VM_OVERCOMMIT_RATIO,
                .procname       = "overcommit_ratio",
                .data           = &sysctl_overcommit_ratio,
@@ -813,7 +841,7 @@ static ctl_table vm_table[] = {
                .data           = &vm_dirty_ratio,
                .maxlen         = sizeof(vm_dirty_ratio),
                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_minmax,
+                .proc_handler   = &dirty_ratio_handler,
                .strategy       = &sysctl_intvec,
                .extra1         = &zero,
                .extra2         = &one_hundred,
@@ -880,6 +908,14 @@ static ctl_table vm_table[] = {
                .mode           = 0644,
                .proc_handler   = &hugetlb_treat_movable_handler,
        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "hugetlb_dynamic_pool",
+                .data           = &hugetlb_dynamic_pool,
+                .maxlen         = sizeof(hugetlb_dynamic_pool),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec,
+        },
 #endif
        {
                .ctl_name       = VM_LOWMEM_RESERVE_RATIO,
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 059431ed67..7d4d7f9c1b 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -20,7 +20,6 @@
 #include <linux/taskstats_kern.h>
 #include <linux/tsacct_kern.h>
 #include <linux/delayacct.h>
-#include <linux/tsacct_kern.h>
 #include <linux/cpumask.h>
 #include <linux/percpu.h>
 #include <net/genetlink.h>
diff --git a/kernel/time.c b/kernel/time.c
index 2289a8d683..2d5b6a6821 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -34,7 +34,6 @@
 #include <linux/syscalls.h>
 #include <linux/security.h>
 #include <linux/fs.h>
-#include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -57,11 +56,7 @@ EXPORT_SYMBOL(sys_tz);
 */
 asmlinkage long sys_time(time_t __user * tloc)
 {
-        time_t i;
+        time_t i = get_seconds();
-        struct timespec tv;
-        getnstimeofday(&tv);
-        i = tv.tv_sec;
        if (tloc) {
                if (put_user(i,tloc))
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index fc3fc79b3d..8cfb8b2ce7 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -222,20 +222,8 @@ static void tick_do_broadcast_on_off(void *why)
        if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
                goto out;
-        /*
+        if (!tick_device_is_functional(dev))
-         * Defect device ?
+                goto out;
-         */
-        if (!tick_device_is_functional(dev)) {
-                /*
-                 * AMD C1E wreckage fixup:
-                 *
-                 * Device was registered functional in the first
-                 * place. Now the secondary CPU detected the C1E
-                 * misfeature and notifies us to fix it up
-                 */
-                if (*reason != CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
-                        goto out;
-        }
        switch (*reason) {
        case CLOCK_EVT_NOTIFY_BROADCAST_ON:
@@ -246,6 +234,8 @@ static void tick_do_broadcast_on_off(void *why)
                                clockevents_set_mode(dev,
                                                     CLOCK_EVT_MODE_SHUTDOWN);
                }
+                if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
+                        dev->features |= CLOCK_EVT_FEAT_DUMMY;
                break;
        case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
                if (cpu_isset(cpu, tick_broadcast_mask)) {
@@ -274,21 +264,12 @@ out:
 */
 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
 {
-        int cpu = get_cpu();
+        if (!cpu_isset(*oncpu, cpu_online_map))
-        if (!cpu_isset(*oncpu, cpu_online_map)) {
                printk(KERN_ERR "tick-braodcast: ignoring broadcast for "
                       "offline CPU #%d\n", *oncpu);
-        } else {
+        else
+                smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
-                if (cpu == *oncpu)
+                                         &reason, 1, 1);
-                        tick_do_broadcast_on_off(&reason);
-                else
-                        smp_call_function_single(*oncpu,
-                                                 tick_do_broadcast_on_off,
-                                                 &reason, 1, 1);
-        }
-        put_cpu();
 }
 /*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 8c3fef1db0..ce89ffb474 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -570,7 +570,7 @@ void tick_setup_sched_timer(void)
        /* Get the next period (per cpu) */
        ts->sched_timer.expires = tick_init_jiffy_update();
        offset = ktime_to_ns(tick_period) >> 1;
-        do_div(offset, NR_CPUS);
+        do_div(offset, num_possible_cpus());
        offset *= smp_processor_id();
        ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4ad79f6bde..e5e466b275 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -24,9 +24,7 @@
 * This read-write spinlock protects us from races in SMP while
 * playing with xtime and avenrun.
 */
-__attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
-EXPORT_SYMBOL(xtime_lock);
 /*
@@ -47,21 +45,13 @@ EXPORT_SYMBOL(xtime_lock);
 struct timespec xtime __attribute__ ((aligned (16)));
 struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
 static unsigned long total_sleep_time;          /* seconds */
-EXPORT_SYMBOL(xtime);
-#ifdef CONFIG_NO_HZ
 static struct timespec xtime_cache __attribute__ ((aligned (16)));
 static inline void update_xtime_cache(u64 nsec)
 {
        xtime_cache = xtime;
        timespec_add_ns(&xtime_cache, nsec);
 }
-#else
-#define xtime_cache xtime
-/* We do *not* want to evaluate the argument for this case */
-#define update_xtime_cache(n) do { } while (0)
-#endif
 static struct clocksource *clock; /* pointer to current clocksource */
diff --git a/kernel/user.c b/kernel/user.c
index f0e561e6d0..e91331c457 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -44,7 +44,6 @@ struct user_struct root_user = {
        .processes      = ATOMIC_INIT(1),
        .files          = ATOMIC_INIT(0),
        .sigpending     = ATOMIC_INIT(0),
-        .mq_bytes       = 0,
        .locked_shm     = 0,
 #ifdef CONFIG_KEYS
        .uid_keyring    = &root_user_keyring,
@@ -58,19 +57,17 @@ struct user_struct root_user = {
 /*
 * These routines must be called with the uidhash spinlock held!
 */
-static inline void uid_hash_insert(struct user_struct *up,
+static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
-                                                struct hlist_head *hashent)
 {
        hlist_add_head(&up->uidhash_node, hashent);
 }
-static inline void uid_hash_remove(struct user_struct *up)
+static void uid_hash_remove(struct user_struct *up)
 {
        hlist_del_init(&up->uidhash_node);
 }
-static inline struct user_struct *uid_hash_find(uid_t uid,
+static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
-                                                struct hlist_head *hashent)
 {
        struct user_struct *user;
        struct hlist_node *h;
@@ -87,9 +84,6 @@ static inline struct user_struct *uid_hash_find(uid_t uid,
 #ifdef CONFIG_FAIR_USER_SCHED
-static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */
-static DEFINE_MUTEX(uids_mutex);
 static void sched_destroy_user(struct user_struct *up)
 {
        sched_destroy_group(up->tg);
@@ -111,6 +105,19 @@ static void sched_switch_user(struct task_struct *p)
        sched_move_task(p);
 }
+#else   /* CONFIG_FAIR_USER_SCHED */
+static void sched_destroy_user(struct user_struct *up) { }
+static int sched_create_user(struct user_struct *up) { return 0; }
+static void sched_switch_user(struct task_struct *p) { }
+#endif  /* CONFIG_FAIR_USER_SCHED */
+#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS)
+static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */
+static DEFINE_MUTEX(uids_mutex);
 static inline void uids_mutex_lock(void)
 {
        mutex_lock(&uids_mutex);
@@ -257,11 +264,8 @@ static inline void free_user(struct user_struct *up, unsigned long flags)
        schedule_work(&up->work);
 }
-#else   /* CONFIG_FAIR_USER_SCHED */
+#else   /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */
-static void sched_destroy_user(struct user_struct *up) { }
-static int sched_create_user(struct user_struct *up) { return 0; }
-static void sched_switch_user(struct task_struct *p) { }
 static inline int user_kobject_create(struct user_struct *up) { return 0; }
 static inline void uids_mutex_lock(void) { }
 static inline void uids_mutex_unlock(void) { }
@@ -280,7 +284,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags)
        kmem_cache_free(uid_cachep, up);
 }
-#endif  /* CONFIG_FAIR_USER_SCHED */
+#endif
 /*
 * Locate the user_struct for the passed UID.  If found, take a ref on it.  The
@@ -343,8 +347,9 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
                atomic_set(&new->inotify_watches, 0);
                atomic_set(&new->inotify_devs, 0);
 #endif
+#ifdef CONFIG_POSIX_MQUEUE
                new->mq_bytes = 0;
+#endif
                new->locked_shm = 0;
                if (alloc_uid_keyring(new, current) < 0) {