19 files changed, 636 insertions, 180 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 296132c19a57..c0cc67ad764c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -5,7 +5,7 @@
 obj-y     = fork.o exec_domain.o panic.o printk.o \
            cpu.o exit.o itimer.o time.o softirq.o resource.o \
            sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
-            signal.o sys.o kmod.o workqueue.o pid.o \
+            signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
            rcupdate.o extable.o params.o posix-timers.o \
            kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
            hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
@@ -25,6 +25,9 @@ endif
 obj-y += sched/
 obj-y += power/
+ifeq ($(CONFIG_CHECKPOINT_RESTORE),y)
+obj-$(CONFIG_X86) += kcmp.o
+endif
 obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a0c6af34d500..0f3527d6184a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5132,7 +5132,7 @@ EXPORT_SYMBOL_GPL(css_depth);
 * @root: the css supporsed to be an ancestor of the child.
 *
 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
- * this function reads css->id, this use rcu_dereference() and rcu_read_lock().
+ * this function reads css->id, the caller must hold rcu_read_lock().
 * But, considering usual usage, the csses should be valid objects after test.
 * Assuming that the caller will do some action to the child if this returns
 * returns true, the caller must take "child";s reference count.
@@ -5144,18 +5144,18 @@ bool css_is_ancestor(struct cgroup_subsys_state *child,
 {
        struct css_id *child_id;
        struct css_id *root_id;
-        bool ret = true;
-        rcu_read_lock();
        child_id  = rcu_dereference(child->id);
+        if (!child_id)
+                return false;
        root_id = rcu_dereference(root->id);
-        if (!child_id
+        if (!root_id)
-            || !root_id
+                return false;
-            || (child_id->depth < root_id->depth)
+        if (child_id->depth < root_id->depth)
-            || (child_id->stack[root_id->depth] != root_id->id))
+                return false;
-                ret = false;
+        if (child_id->stack[root_id->depth] != root_id->id)
-        rcu_read_unlock();
+                return false;
-        return ret;
+        return true;
 }
 void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0e6353cf147a..a4eb5227a19e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -10,7 +10,10 @@
 #include <linux/sched.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
+#include <linux/oom.h>
+#include <linux/rcupdate.h>
 #include <linux/export.h>
+#include <linux/bug.h>
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
@@ -173,6 +176,47 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_cpu_notifier);
+/**
+ * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
+ * @cpu: a CPU id
+ *
+ * This function walks all processes, finds a valid mm struct for each one and
+ * then clears a corresponding bit in mm's cpumask.  While this all sounds
+ * trivial, there are various non-obvious corner cases, which this function
+ * tries to solve in a safe manner.
+ *
+ * Also note that the function uses a somewhat relaxed locking scheme, so it may
+ * be called only for an already offlined CPU.
+ */
+void clear_tasks_mm_cpumask(int cpu)
+{
+        struct task_struct *p;
+        /*
+         * This function is called after the cpu is taken down and marked
+         * offline, so its not like new tasks will ever get this cpu set in
+         * their mm mask. -- Peter Zijlstra
+         * Thus, we may use rcu_read_lock() here, instead of grabbing
+         * full-fledged tasklist_lock.
+         */
+        WARN_ON(cpu_online(cpu));
+        rcu_read_lock();
+        for_each_process(p) {
+                struct task_struct *t;
+                /*
+                 * Main thread might exit, but other threads may still have
+                 * a valid mm. Find one.
+                 */
+                t = find_lock_task_mm(p);
+                if (!t)
+                        continue;
+                cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
+                task_unlock(t);
+        }
+        rcu_read_unlock();
+}
 static inline void check_for_tasks(int cpu)
 {
        struct task_struct *p;
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
index 249152e15308..9656a3c36503 100644
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -81,7 +81,7 @@ int cpu_pm_unregister_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
 /**
- * cpm_pm_enter - CPU low power entry notifier
+ * cpu_pm_enter - CPU low power entry notifier
 *
 * Notifies listeners that a single CPU is entering a low power state that may
 * cause some blocks in the same power domain as the cpu to reset.
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
 * Must be called on the affected CPU with interrupts disabled.  Platform is
 * responsible for ensuring that cpu_pm_enter is not called twice on the same
 * CPU before cpu_pm_exit is called. Notified drivers can include VFP
- * co-processor, interrupt controller and it's PM extensions, local CPU
+ * co-processor, interrupt controller and its PM extensions, local CPU
 * timers context save/restore which shouldn't be interrupted. Hence it
 * must be called with interrupts disabled.
 *
@@ -115,13 +115,13 @@ int cpu_pm_enter(void)
 EXPORT_SYMBOL_GPL(cpu_pm_enter);
 /**
- * cpm_pm_exit - CPU low power exit notifier
+ * cpu_pm_exit - CPU low power exit notifier
 *
 * Notifies listeners that a single CPU is exiting a low power state that may
 * have caused some blocks in the same power domain as the cpu to reset.
 *
 * Notified drivers can include VFP co-processor, interrupt controller
- * and it's PM extensions, local CPU timers context save/restore which
+ * and its PM extensions, local CPU timers context save/restore which
 * shouldn't be interrupted. Hence it must be called with interrupts disabled.
 *
 * Return conditions are same as __raw_notifier_call_chain.
@@ -139,7 +139,7 @@ int cpu_pm_exit(void)
 EXPORT_SYMBOL_GPL(cpu_pm_exit);
 /**
- * cpm_cluster_pm_enter - CPU cluster low power entry notifier
+ * cpu_cluster_pm_enter - CPU cluster low power entry notifier
 *
 * Notifies listeners that all cpus in a power domain are entering a low power
 * state that may cause some blocks in the same power domain to reset.
@@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit);
 * Must be called after cpu_pm_enter has been called on all cpus in the power
 * domain, and before cpu_pm_exit has been called on any cpu in the power
 * domain. Notified drivers can include VFP co-processor, interrupt controller
- * and it's PM extensions, local CPU timers context save/restore which
+ * and its PM extensions, local CPU timers context save/restore which
 * shouldn't be interrupted. Hence it must be called with interrupts disabled.
 *
 * Must be called with interrupts disabled.
@@ -174,7 +174,7 @@ int cpu_cluster_pm_enter(void)
 EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
 /**
- * cpm_cluster_pm_exit - CPU cluster low power exit notifier
+ * cpu_cluster_pm_exit - CPU cluster low power exit notifier
 *
 * Notifies listeners that all cpus in a power domain are exiting form a
 * low power state that may have caused some blocks in the same power domain
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
 * Must be called after cpu_pm_exit has been called on all cpus in the power
 * domain, and before cpu_pm_exit has been called on any cpu in the power
 * domain. Notified drivers can include VFP co-processor, interrupt controller
- * and it's PM extensions, local CPU timers context save/restore which
+ * and its PM extensions, local CPU timers context save/restore which
 * shouldn't be interrupted. Hence it must be called with interrupts disabled.
 *
 * Return conditions are same as __raw_notifier_call_chain.
diff --git a/kernel/cred.c b/kernel/cred.c
index 430557ea488f..de728ac50d82 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -207,13 +207,6 @@ void exit_creds(struct task_struct *tsk)
        validate_creds(cred);
        alter_cred_subscribers(cred, -1);
        put_cred(cred);
-        cred = (struct cred *) tsk->replacement_session_keyring;
-        if (cred) {
-                tsk->replacement_session_keyring = NULL;
-                validate_creds(cred);
-                put_cred(cred);
-        }
 }
 /**
@@ -396,8 +389,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
        struct cred *new;
        int ret;
-        p->replacement_session_keyring = NULL;
        if (
 #ifdef CONFIG_KEYS
                !p->cred->thread_keyring &&
diff --git a/kernel/exit.c b/kernel/exit.c
index 910a0716e17a..34867cc5b42a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -884,9 +884,9 @@ static void check_stack_usage(void)
        spin_lock(&low_water_lock);
        if (free < lowest_to_date) {
-                printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
+                printk(KERN_WARNING "%s (%d) used greatest stack depth: "
-                                "left\n",
+                                "%lu bytes left\n",
-                                current->comm, free);
+                                current->comm, task_pid_nr(current), free);
                lowest_to_date = free;
        }
        spin_unlock(&low_water_lock);
@@ -946,12 +946,13 @@ void do_exit(long code)
        exit_signals(tsk);  /* sets PF_EXITING */
        /*
         * tsk->flags are checked in the futex code to protect against
-         * an exiting task cleaning up the robust pi futexes.
+         * an exiting task cleaning up the robust pi futexes, and in
+         * task_work_add() to avoid the race with exit_task_work().
         */
        smp_mb();
        raw_spin_unlock_wait(&tsk->pi_lock);
-        exit_irq_thread();
+        exit_task_work(tsk);
        if (unlikely(in_atomic()))
                printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -1214,7 +1215,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
        unsigned long state;
        int retval, status, traced;
        pid_t pid = task_pid_vnr(p);
-        uid_t uid = from_kuid_munged(current_user_ns(), __task_cred(p)->uid);
+        uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
        struct siginfo __user *infop;
        if (!likely(wo->wo_flags & WEXITED))
diff --git a/kernel/fork.c b/kernel/fork.c
index 47b4e4f379f9..ab5211b9e622 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -386,7 +386,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                }
                charge = 0;
                if (mpnt->vm_flags & VM_ACCOUNT) {
-                        unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+                        unsigned long len;
+                        len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
                        if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
                                goto fail_nomem;
                        charge = len;
@@ -614,7 +615,6 @@ void mmput(struct mm_struct *mm)
                        list_del(&mm->mmlist);
                        spin_unlock(&mmlist_lock);
                }
-                put_swap_token(mm);
                if (mm->binfmt)
                        module_put(mm->binfmt->module);
                mmdrop(mm);
@@ -787,9 +787,6 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
        /* Get rid of any cached register state */
        deactivate_mm(tsk, mm);
-        if (tsk->vfork_done)
-                complete_vfork_done(tsk);
        /*
         * If we're exiting normally, clear a user-space tid field if
         * requested.  We leave this alone when dying by signal, to leave
@@ -810,6 +807,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
                }
                tsk->clear_child_tid = NULL;
        }
+        /*
+         * All done, finally we can wake up parent and return this mm to him.
+         * Also kthread_stop() uses this completion for synchronization.
+         */
+        if (tsk->vfork_done)
+                complete_vfork_done(tsk);
 }
 /*
@@ -831,10 +835,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
        memcpy(mm, oldmm, sizeof(*mm));
        mm_init_cpumask(mm);
-        /* Initializing for Swap token stuff */
-        mm->token_priority = 0;
-        mm->last_interval = 0;
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
        mm->pmd_huge_pte = NULL;
 #endif
@@ -913,10 +913,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
                goto fail_nomem;
 good_mm:
-        /* Initializing for Swap token stuff */
-        mm->token_priority = 0;
-        mm->last_interval = 0;
        tsk->mm = mm;
        tsk->active_mm = mm;
        return 0;
@@ -984,9 +980,8 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
         * Share io context with parent, if CLONE_IO is set
         */
        if (clone_flags & CLONE_IO) {
-                tsk->io_context = ioc_task_link(ioc);
+                ioc_task_link(ioc);
-                if (unlikely(!tsk->io_context))
+                tsk->io_context = ioc;
-                        return -ENOMEM;
        } else if (ioprio_valid(ioc->ioprio)) {
                new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
                if (unlikely(!new_ioc))
@@ -1420,6 +1415,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         */
        p->group_leader = p;
        INIT_LIST_HEAD(&p->thread_group);
+        INIT_HLIST_HEAD(&p->task_works);
        /* Now that the task is set up, run cgroup callbacks if
         * necessary. We need to run them before the task is visible
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index bb32326afe87..ea0c6c2ae6f7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -7,6 +7,8 @@
 * This file contains driver APIs to the irq subsystem.
 */
+#define pr_fmt(fmt) "genirq: " fmt
 #include <linux/irq.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
@@ -14,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/task_work.h>
 #include "internals.h"
@@ -565,7 +568,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
                 * IRQF_TRIGGER_* but the PIC does not support multiple
                 * flow-types?
                 */
-                pr_debug("genirq: No set_type function for IRQ %d (%s)\n", irq,
+                pr_debug("No set_type function for IRQ %d (%s)\n", irq,
                         chip ? (chip->name ? : "unknown") : "unknown");
                return 0;
        }
@@ -600,7 +603,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
                ret = 0;
                break;
        default:
-                pr_err("genirq: Setting trigger mode %lu for irq %u failed (%pF)\n",
+                pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n",
                       flags, irq, chip->irq_set_type);
        }
        if (unmask)
@@ -773,11 +776,39 @@ static void wake_threads_waitq(struct irq_desc *desc)
                wake_up(&desc->wait_for_threads);
 }
+static void irq_thread_dtor(struct task_work *unused)
+{
+        struct task_struct *tsk = current;
+        struct irq_desc *desc;
+        struct irqaction *action;
+        if (WARN_ON_ONCE(!(current->flags & PF_EXITING)))
+                return;
+        action = kthread_data(tsk);
+        pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
+               tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
+        desc = irq_to_desc(action->irq);
+        /*
+         * If IRQTF_RUNTHREAD is set, we need to decrement
+         * desc->threads_active and wake possible waiters.
+         */
+        if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+                wake_threads_waitq(desc);
+        /* Prevent a stale desc->threads_oneshot */
+        irq_finalize_oneshot(desc, action);
+}
 /*
 * Interrupt handler thread
 */
 static int irq_thread(void *data)
 {
+        struct task_work on_exit_work;
        static const struct sched_param param = {
                .sched_priority = MAX_USER_RT_PRIO/2,
        };
@@ -793,7 +824,9 @@ static int irq_thread(void *data)
                handler_fn = irq_thread_fn;
        sched_setscheduler(current, SCHED_FIFO, &param);
-        current->irq_thread = 1;
+        init_task_work(&on_exit_work, irq_thread_dtor, NULL);
+        task_work_add(current, &on_exit_work, false);
        while (!irq_wait_for_interrupt(action)) {
                irqreturn_t action_ret;
@@ -815,44 +848,11 @@ static int irq_thread(void *data)
         * cannot touch the oneshot mask at this point anymore as
         * __setup_irq() might have given out currents thread_mask
         * again.
-         *
-         * Clear irq_thread. Otherwise exit_irq_thread() would make
-         * fuzz about an active irq thread going into nirvana.
         */
-        current->irq_thread = 0;
+        task_work_cancel(current, irq_thread_dtor);
        return 0;
 }
-/*
- * Called from do_exit()
- */
-void exit_irq_thread(void)
-{
-        struct task_struct *tsk = current;
-        struct irq_desc *desc;
-        struct irqaction *action;
-        if (!tsk->irq_thread)
-                return;
-        action = kthread_data(tsk);
-        pr_err("genirq: exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
-               tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
-        desc = irq_to_desc(action->irq);
-        /*
-         * If IRQTF_RUNTHREAD is set, we need to decrement
-         * desc->threads_active and wake possible waiters.
-         */
-        if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags))
-                wake_threads_waitq(desc);
-        /* Prevent a stale desc->threads_oneshot */
-        irq_finalize_oneshot(desc, action);
-}
 static void irq_setup_forced_threading(struct irqaction *new)
 {
        if (!force_irqthreads)
@@ -1044,7 +1044,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                 * has. The type flags are unreliable as the
                 * underlying chip implementation can override them.
                 */
-                pr_err("genirq: Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
+                pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
                       irq);
                ret = -EINVAL;
                goto out_mask;
@@ -1095,7 +1095,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                if (nmsk != omsk)
                        /* hope the handler works with current  trigger mode */
-                        pr_warning("genirq: irq %d uses trigger mode %u; requested %u\n",
+                        pr_warning("irq %d uses trigger mode %u; requested %u\n",
                                   irq, nmsk, omsk);
        }
@@ -1133,7 +1133,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 mismatch:
        if (!(new->flags & IRQF_PROBE_SHARED)) {
-                pr_err("genirq: Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
+                pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
                       irq, new->flags, new->name, old->flags, old->name);
 #ifdef CONFIG_DEBUG_SHIRQ
                dump_stack();
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 079f1d39a8b8..2169feeba529 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -343,7 +343,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
 /* Look up a kernel symbol and return it in a text buffer. */
 static int __sprint_symbol(char *buffer, unsigned long address,
-                           int symbol_offset)
+                           int symbol_offset, int add_offset)
 {
        char *modname;
        const char *name;
@@ -358,13 +358,13 @@ static int __sprint_symbol(char *buffer, unsigned long address,
        if (name != buffer)
                strcpy(buffer, name);
        len = strlen(buffer);
-        buffer += len;
        offset -= symbol_offset;
+        if (add_offset)
+                len += sprintf(buffer + len, "+%#lx/%#lx", offset, size);
        if (modname)
-                len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname);
+                len += sprintf(buffer + len, " [%s]", modname);
-        else
-                len += sprintf(buffer, "+%#lx/%#lx", offset, size);
        return len;
 }
@@ -382,12 +382,28 @@ static int __sprint_symbol(char *buffer, unsigned long address,
 */
 int sprint_symbol(char *buffer, unsigned long address)
 {
-        return __sprint_symbol(buffer, address, 0);
+        return __sprint_symbol(buffer, address, 0, 1);
 }
 EXPORT_SYMBOL_GPL(sprint_symbol);
 /**
+ * sprint_symbol_no_offset - Look up a kernel symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function looks up a kernel symbol with @address and stores its name
+ * and module name to @buffer if possible. If no symbol was found, just saves
+ * its @address as is.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_symbol_no_offset(char *buffer, unsigned long address)
+{
+        return __sprint_symbol(buffer, address, 0, 0);
+}
+EXPORT_SYMBOL_GPL(sprint_symbol_no_offset);
+/**
 * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer
 * @buffer: buffer to be stored
 * @address: address to lookup
@@ -403,7 +419,7 @@ EXPORT_SYMBOL_GPL(sprint_symbol);
 */
 int sprint_backtrace(char *buffer, unsigned long address)
 {
-        return __sprint_symbol(buffer, address, -1);
+        return __sprint_symbol(buffer, address, -1, 1);
 }
 /* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
new file mode 100644
index 000000000000..30b7b225306c
--- /dev/null
+++ b/kernel/kcmp.c
@@ -0,0 +1,196 @@
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/fdtable.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/cache.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/kcmp.h>
+#include <asm/unistd.h>
+/*
+ * We don't expose the real in-memory order of objects for security reasons.
+ * But still the comparison results should be suitable for sorting. So we
+ * obfuscate kernel pointers values and compare the production instead.
+ *
+ * The obfuscation is done in two steps. First we xor the kernel pointer with
+ * a random value, which puts pointer into a new position in a reordered space.
+ * Secondly we multiply the xor production with a large odd random number to
+ * permute its bits even more (the odd multiplier guarantees that the product
+ * is unique ever after the high bits are truncated, since any odd number is
+ * relative prime to 2^n).
+ *
+ * Note also that the obfuscation itself is invisible to userspace and if needed
+ * it can be changed to an alternate scheme.
+ */
+static unsigned long cookies[KCMP_TYPES][2] __read_mostly;
+static long kptr_obfuscate(long v, int type)
+{
+        return (v ^ cookies[type][0]) * cookies[type][1];
+}
+/*
+ * 0 - equal, i.e. v1 = v2
+ * 1 - less than, i.e. v1 < v2
+ * 2 - greater than, i.e. v1 > v2
+ * 3 - not equal but ordering unavailable (reserved for future)
+ */
+static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
+{
+        long ret;
+        ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type);
+        return (ret < 0) | ((ret > 0) << 1);
+}
+/* The caller must have pinned the task */
+static struct file *
+get_file_raw_ptr(struct task_struct *task, unsigned int idx)
+{
+        struct file *file = NULL;
+        task_lock(task);
+        rcu_read_lock();
+        if (task->files)
+                file = fcheck_files(task->files, idx);
+        rcu_read_unlock();
+        task_unlock(task);
+        return file;
+}
+static void kcmp_unlock(struct mutex *m1, struct mutex *m2)
+{
+        if (likely(m2 != m1))
+                mutex_unlock(m2);
+        mutex_unlock(m1);
+}
+static int kcmp_lock(struct mutex *m1, struct mutex *m2)
+{
+        int err;
+        if (m2 > m1)
+                swap(m1, m2);
+        err = mutex_lock_killable(m1);
+        if (!err && likely(m1 != m2)) {
+                err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING);
+                if (err)
+                        mutex_unlock(m1);
+        }
+        return err;
+}
+SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
+                unsigned long, idx1, unsigned long, idx2)
+{
+        struct task_struct *task1, *task2;
+        int ret;
+        rcu_read_lock();
+        /*
+         * Tasks are looked up in caller's PID namespace only.
+         */
+        task1 = find_task_by_vpid(pid1);
+        task2 = find_task_by_vpid(pid2);
+        if (!task1 || !task2)
+                goto err_no_task;
+        get_task_struct(task1);
+        get_task_struct(task2);
+        rcu_read_unlock();
+        /*
+         * One should have enough rights to inspect task details.
+         */
+        ret = kcmp_lock(&task1->signal->cred_guard_mutex,
+                        &task2->signal->cred_guard_mutex);
+        if (ret)
+                goto err;
+        if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
+            !ptrace_may_access(task2, PTRACE_MODE_READ)) {
+                ret = -EPERM;
+                goto err_unlock;
+        }
+        switch (type) {
+        case KCMP_FILE: {
+                struct file *filp1, *filp2;
+                filp1 = get_file_raw_ptr(task1, idx1);
+                filp2 = get_file_raw_ptr(task2, idx2);
+                if (filp1 && filp2)
+                        ret = kcmp_ptr(filp1, filp2, KCMP_FILE);
+                else
+                        ret = -EBADF;
+                break;
+        }
+        case KCMP_VM:
+                ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM);
+                break;
+        case KCMP_FILES:
+                ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES);
+                break;
+        case KCMP_FS:
+                ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS);
+                break;
+        case KCMP_SIGHAND:
+                ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND);
+                break;
+        case KCMP_IO:
+                ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO);
+                break;
+        case KCMP_SYSVSEM:
+#ifdef CONFIG_SYSVIPC
+                ret = kcmp_ptr(task1->sysvsem.undo_list,
+                               task2->sysvsem.undo_list,
+                               KCMP_SYSVSEM);
+#else
+                ret = -EOPNOTSUPP;
+#endif
+                break;
+        default:
+                ret = -EINVAL;
+                break;
+        }
+err_unlock:
+        kcmp_unlock(&task1->signal->cred_guard_mutex,
+                    &task2->signal->cred_guard_mutex);
+err:
+        put_task_struct(task1);
+        put_task_struct(task2);
+        return ret;
+err_no_task:
+        rcu_read_unlock();
+        return -ESRCH;
+}
+static __init int kcmp_cookies_init(void)
+{
+        int i;
+        get_random_bytes(cookies, sizeof(cookies));
+        for (i = 0; i < KCMP_TYPES; i++)
+                cookies[i][1] |= (~(~0UL >>  1) | 1);
+        return 0;
+}
+arch_initcall(kcmp_cookies_init);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 05698a7415fe..ff2c7cb86d77 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -221,13 +221,12 @@ fail:
        return 0;
 }
-void call_usermodehelper_freeinfo(struct subprocess_info *info)
+static void call_usermodehelper_freeinfo(struct subprocess_info *info)
 {
        if (info->cleanup)
                (*info->cleanup)(info);
        kfree(info);
 }
-EXPORT_SYMBOL(call_usermodehelper_freeinfo);
 static void umh_complete(struct subprocess_info *sub_info)
 {
@@ -410,7 +409,7 @@ EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
 /**
 * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled.
- * depth: New value to assign to usermodehelper_disabled.
+ * @depth: New value to assign to usermodehelper_disabled.
 *
 * Change the value of usermodehelper_disabled (under umhelper_sem locked for
 * writing) and wakeup tasks waiting for it to change.
@@ -479,6 +478,7 @@ static void helper_unlock(void)
 * structure.  This should be passed to call_usermodehelper_exec to
 * exec the process and free the structure.
 */
+static
 struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
                                                  char **envp, gfp_t gfp_mask)
 {
@@ -494,7 +494,6 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
  out:
        return sub_info;
 }
-EXPORT_SYMBOL(call_usermodehelper_setup);
 /**
 * call_usermodehelper_setfns - set a cleanup/init function
@@ -512,6 +511,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
 * Function must be runnable in either a process context or the
 * context in which call_usermodehelper_exec is called.
 */
+static
 void call_usermodehelper_setfns(struct subprocess_info *info,
                    int (*init)(struct subprocess_info *info, struct cred *new),
                    void (*cleanup)(struct subprocess_info *info),
@@ -521,7 +521,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info,
        info->init = init;
        info->data = data;
 }
-EXPORT_SYMBOL(call_usermodehelper_setfns);
 /**
 * call_usermodehelper_exec - start a usermode application
@@ -535,6 +534,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns);
 * asynchronously if wait is not set, and runs as a child of keventd.
 * (ie. it runs with full root capabilities).
 */
+static
 int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 {
        DECLARE_COMPLETION_ONSTACK(done);
@@ -576,7 +576,25 @@ unlock:
        helper_unlock();
        return retval;
 }
-EXPORT_SYMBOL(call_usermodehelper_exec);
+int call_usermodehelper_fns(
+        char *path, char **argv, char **envp, int wait,
+        int (*init)(struct subprocess_info *info, struct cred *new),
+        void (*cleanup)(struct subprocess_info *), void *data)
+{
+        struct subprocess_info *info;
+        gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
+        info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
+        if (info == NULL)
+                return -ENOMEM;
+        call_usermodehelper_setfns(info, init, cleanup, data);
+        return call_usermodehelper_exec(info, wait);
+}
+EXPORT_SYMBOL(call_usermodehelper_fns);
 static int proc_cap_handler(struct ctl_table *table, int write,
                         void __user *buffer, size_t *lenp, loff_t *ppos)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 57bc1fd35b3c..16b20e38c4a1 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -149,7 +149,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 {
        int nr;
        int rc;
-        struct task_struct *task;
+        struct task_struct *task, *me = current;
+        /* Ignore SIGCHLD causing any terminated children to autoreap */
+        spin_lock_irq(&me->sighand->siglock);
+        me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
+        spin_unlock_irq(&me->sighand->siglock);
        /*
         * The last thread in the cgroup-init thread group is terminating.
@@ -191,6 +196,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
        return;
 }
+#ifdef CONFIG_CHECKPOINT_RESTORE
 static int pid_ns_ctl_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -218,8 +224,8 @@ static struct ctl_table pid_ns_ctl_table[] = {
        },
        { }
 };
 static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
+#endif  /* CONFIG_CHECKPOINT_RESTORE */
 int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
 {
@@ -253,7 +259,10 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
 static __init int pid_namespaces_init(void)
 {
        pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
+#ifdef CONFIG_CHECKPOINT_RESTORE
        register_sysctl_paths(kern_path, pid_ns_ctl_table);
+#endif
        return 0;
 }
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bebe2b170d49..ad581aa2369a 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -94,13 +94,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
        counter->usage -= val;
 }
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+void res_counter_uncharge_until(struct res_counter *counter,
+                                struct res_counter *top,
+                                unsigned long val)
 {
        unsigned long flags;
        struct res_counter *c;
        local_irq_save(flags);
-        for (c = counter; c != NULL; c = c->parent) {
+        for (c = counter; c != top; c = c->parent) {
                spin_lock(&c->lock);
                res_counter_uncharge_locked(c, val);
                spin_unlock(&c->lock);
@@ -108,6 +110,10 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
        local_irq_restore(flags);
 }
+void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+{
+        res_counter_uncharge_until(counter, NULL, val);
+}
 static inline unsigned long long *
 res_counter_member(struct res_counter *counter, int member)
diff --git a/kernel/resource.c b/kernel/resource.c
index 7e8ea66a8c01..e1d2b8ee76d5 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -515,8 +515,8 @@ out:
 * @root: root resource descriptor
 * @new: resource descriptor desired by caller
 * @size: requested resource region size
- * @min: minimum size to allocate
+ * @min: minimum boundary to allocate
- * @max: maximum size to allocate
+ * @max: maximum boundary to allocate
 * @align: alignment requested, in bytes
 * @alignf: alignment function, optional, called if not NULL
 * @alignf_data: arbitrary data to pass to the @alignf function
diff --git a/kernel/signal.c b/kernel/signal.c
index f7b418217633..08dfbd748cd2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1656,19 +1656,18 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
        info.si_signo = sig;
        info.si_errno = 0;
        /*
-         * we are under tasklist_lock here so our parent is tied to
+         * We are under tasklist_lock here so our parent is tied to
-         * us and cannot exit and release its namespace.
+         * us and cannot change.
         *
-         * the only it can is to switch its nsproxy with sys_unshare,
+         * task_active_pid_ns will always return the same pid namespace
-         * bu uncharing pid namespaces is not allowed, so we'll always
+         * until a task passes through release_task.
-         * see relevant namespace
         *
         * write_lock() currently calls preempt_disable() which is the
         * same as rcu_read_lock(), but according to Oleg, this is not
         * correct to rely on this
         */
        rcu_read_lock();
-        info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+        info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent));
        info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns),
                                       task_uid(tsk));
        rcu_read_unlock();
diff --git a/kernel/sys.c b/kernel/sys.c
index 6df42624e454..9ff89cb9657a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -36,6 +36,8 @@
 #include <linux/personality.h>
 #include <linux/ptrace.h>
 #include <linux/fs_struct.h>
+#include <linux/file.h>
+#include <linux/mount.h>
 #include <linux/gfp.h>
 #include <linux/syscore_ops.h>
 #include <linux/version.h>
@@ -1378,8 +1380,8 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
                memcpy(u->nodename, tmp, len);
                memset(u->nodename + len, 0, sizeof(u->nodename) - len);
                errno = 0;
+                uts_proc_notify(UTS_PROC_HOSTNAME);
        }
-        uts_proc_notify(UTS_PROC_HOSTNAME);
        up_write(&uts_sem);
        return errno;
 }
@@ -1429,8 +1431,8 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
                memcpy(u->domainname, tmp, len);
                memset(u->domainname + len, 0, sizeof(u->domainname) - len);
                errno = 0;
+                uts_proc_notify(UTS_PROC_DOMAINNAME);
        }
-        uts_proc_notify(UTS_PROC_DOMAINNAME);
        up_write(&uts_sem);
        return errno;
 }
@@ -1784,77 +1786,102 @@ SYSCALL_DEFINE1(umask, int, mask)
 }
 #ifdef CONFIG_CHECKPOINT_RESTORE
+static bool vma_flags_mismatch(struct vm_area_struct *vma,
+                               unsigned long required,
+                               unsigned long banned)
+{
+        return (vma->vm_flags & required) != required ||
+                (vma->vm_flags & banned);
+}
+static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
+{
+        struct file *exe_file;
+        struct dentry *dentry;
+        int err;
+        /*
+         * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
+         * remain. So perform a quick test first.
+         */
+        if (mm->num_exe_file_vmas)
+                return -EBUSY;
+        exe_file = fget(fd);
+        if (!exe_file)
+                return -EBADF;
+        dentry = exe_file->f_path.dentry;
+        /*
+         * Because the original mm->exe_file points to executable file, make
+         * sure that this one is executable as well, to avoid breaking an
+         * overall picture.
+         */
+        err = -EACCES;
+        if (!S_ISREG(dentry->d_inode->i_mode)   ||
+            exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC)
+                goto exit;
+        err = inode_permission(dentry->d_inode, MAY_EXEC);
+        if (err)
+                goto exit;
+        /*
+         * The symlink can be changed only once, just to disallow arbitrary
+         * transitions malicious software might bring in. This means one
+         * could make a snapshot over all processes running and monitor
+         * /proc/pid/exe changes to notice unusual activity if needed.
+         */
+        down_write(&mm->mmap_sem);
+        if (likely(!mm->exe_file))
+                set_mm_exe_file(mm, exe_file);
+        else
+                err = -EBUSY;
+        up_write(&mm->mmap_sem);
+exit:
+        fput(exe_file);
+        return err;
+}
 static int prctl_set_mm(int opt, unsigned long addr,
                        unsigned long arg4, unsigned long arg5)
 {
        unsigned long rlim = rlimit(RLIMIT_DATA);
-        unsigned long vm_req_flags;
-        unsigned long vm_bad_flags;
-        struct vm_area_struct *vma;
-        int error = 0;
        struct mm_struct *mm = current->mm;
+        struct vm_area_struct *vma;
+        int error;
-        if (arg4 | arg5)
+        if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
                return -EINVAL;
        if (!capable(CAP_SYS_RESOURCE))
                return -EPERM;
+        if (opt == PR_SET_MM_EXE_FILE)
+                return prctl_set_mm_exe_file(mm, (unsigned int)addr);
        if (addr >= TASK_SIZE)
                return -EINVAL;
+        error = -EINVAL;
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, addr);
-        if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
-                /* It must be existing VMA */
-                if (!vma || vma->vm_start > addr)
-                        goto out;
-        }
-        error = -EINVAL;
        switch (opt) {
        case PR_SET_MM_START_CODE:
+                mm->start_code = addr;
+                break;
        case PR_SET_MM_END_CODE:
-                vm_req_flags = VM_READ | VM_EXEC;
+                mm->end_code = addr;
-                vm_bad_flags = VM_WRITE | VM_MAYSHARE;
-                if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
-                    (vma->vm_flags & vm_bad_flags))
-                        goto out;
-                if (opt == PR_SET_MM_START_CODE)
-                        mm->start_code = addr;
-                else
-                        mm->end_code = addr;
                break;
        case PR_SET_MM_START_DATA:
-        case PR_SET_MM_END_DATA:
+                mm->start_data = addr;
-                vm_req_flags = VM_READ | VM_WRITE;
-                vm_bad_flags = VM_EXEC | VM_MAYSHARE;
-                if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
-                    (vma->vm_flags & vm_bad_flags))
-                        goto out;
-                if (opt == PR_SET_MM_START_DATA)
-                        mm->start_data = addr;
-                else
-                        mm->end_data = addr;
                break;
+        case PR_SET_MM_END_DATA:
-        case PR_SET_MM_START_STACK:
+                mm->end_data = addr;
-#ifdef CONFIG_STACK_GROWSUP
-                vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
-#else
-                vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
-#endif
-                if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
-                        goto out;
-                mm->start_stack = addr;
                break;
        case PR_SET_MM_START_BRK:
@@ -1881,16 +1908,77 @@ static int prctl_set_mm(int opt, unsigned long addr,
                mm->brk = addr;
                break;
+        /*
+         * If command line arguments and environment
+         * are placed somewhere else on stack, we can
+         * set them up here, ARG_START/END to setup
+         * command line argumets and ENV_START/END
+         * for environment.
+         */
+        case PR_SET_MM_START_STACK:
+        case PR_SET_MM_ARG_START:
+        case PR_SET_MM_ARG_END:
+        case PR_SET_MM_ENV_START:
+        case PR_SET_MM_ENV_END:
+                if (!vma) {
+                        error = -EFAULT;
+                        goto out;
+                }
+#ifdef CONFIG_STACK_GROWSUP
+                if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
+#else
+                if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
+#endif
+                        goto out;
+                if (opt == PR_SET_MM_START_STACK)
+                        mm->start_stack = addr;
+                else if (opt == PR_SET_MM_ARG_START)
+                        mm->arg_start = addr;
+                else if (opt == PR_SET_MM_ARG_END)
+                        mm->arg_end = addr;
+                else if (opt == PR_SET_MM_ENV_START)
+                        mm->env_start = addr;
+                else if (opt == PR_SET_MM_ENV_END)
+                        mm->env_end = addr;
+                break;
+        /*
+         * This doesn't move auxiliary vector itself
+         * since it's pinned to mm_struct, but allow
+         * to fill vector with new values. It's up
+         * to a caller to provide sane values here
+         * otherwise user space tools which use this
+         * vector might be unhappy.
+         */
+        case PR_SET_MM_AUXV: {
+                unsigned long user_auxv[AT_VECTOR_SIZE];
+                if (arg4 > sizeof(user_auxv))
+                        goto out;
+                up_read(&mm->mmap_sem);
+                if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
+                        return -EFAULT;
+                /* Make sure the last entry is always AT_NULL */
+                user_auxv[AT_VECTOR_SIZE - 2] = 0;
+                user_auxv[AT_VECTOR_SIZE - 1] = 0;
+                BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
+                task_lock(current);
+                memcpy(mm->saved_auxv, user_auxv, arg4);
+                task_unlock(current);
+                return 0;
+        }
        default:
-                error = -EINVAL;
                goto out;
        }
        error = 0;
 out:
        up_read(&mm->mmap_sem);
        return error;
 }
 #else /* CONFIG_CHECKPOINT_RESTORE */
@@ -2114,7 +2202,6 @@ int orderly_poweroff(bool force)
                NULL
        };
        int ret = -ENOMEM;
-        struct subprocess_info *info;
        if (argv == NULL) {
                printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
@@ -2122,18 +2209,16 @@ int orderly_poweroff(bool force)
                goto out;
        }
-        info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
+        ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT,
-        if (info == NULL) {
+                                      NULL, argv_cleanup, NULL);
-                argv_free(argv);
+out:
-                goto out;
+        if (likely(!ret))
-        }
+                return 0;
-        call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
-        ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
+        if (ret == -ENOMEM)
+                argv_free(argv);
-  out:
+        if (force) {
-        if (ret && force) {
                printk(KERN_WARNING "Failed to start orderly shutdown: "
                       "forcing the issue\n");
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 47bfa16430d7..dbff751e4086 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark);
 cond_syscall(sys_name_to_handle_at);
 cond_syscall(sys_open_by_handle_at);
 cond_syscall(compat_sys_open_by_handle_at);
+/* compare kernel pointers */
+cond_syscall(sys_kcmp);
diff --git a/kernel/task_work.c b/kernel/task_work.c
new file mode 100644
index 000000000000..82d1c794066d
--- /dev/null
+++ b/kernel/task_work.c
@@ -0,0 +1,84 @@
+#include <linux/spinlock.h>
+#include <linux/task_work.h>
+#include <linux/tracehook.h>
+int
+task_work_add(struct task_struct *task, struct task_work *twork, bool notify)
+{
+        unsigned long flags;
+        int err = -ESRCH;
+#ifndef TIF_NOTIFY_RESUME
+        if (notify)
+                return -ENOTSUPP;
+#endif
+        /*
+         * We must not insert the new work if the task has already passed
+         * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait()
+         * and check PF_EXITING under pi_lock.
+         */
+        raw_spin_lock_irqsave(&task->pi_lock, flags);
+        if (likely(!(task->flags & PF_EXITING))) {
+                hlist_add_head(&twork->hlist, &task->task_works);
+                err = 0;
+        }
+        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+        /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
+        if (likely(!err) && notify)
+                set_notify_resume(task);
+        return err;
+}
+struct task_work *
+task_work_cancel(struct task_struct *task, task_work_func_t func)
+{
+        unsigned long flags;
+        struct task_work *twork;
+        struct hlist_node *pos;
+        raw_spin_lock_irqsave(&task->pi_lock, flags);
+        hlist_for_each_entry(twork, pos, &task->task_works, hlist) {
+                if (twork->func == func) {
+                        hlist_del(&twork->hlist);
+                        goto found;
+                }
+        }
+        twork = NULL;
+ found:
+        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+        return twork;
+}
+void task_work_run(void)
+{
+        struct task_struct *task = current;
+        struct hlist_head task_works;
+        struct hlist_node *pos;
+        raw_spin_lock_irq(&task->pi_lock);
+        hlist_move_list(&task->task_works, &task_works);
+        raw_spin_unlock_irq(&task->pi_lock);
+        if (unlikely(hlist_empty(&task_works)))
+                return;
+        /*
+         * We use hlist to save the space in task_struct, but we want fifo.
+         * Find the last entry, the list should be short, then process them
+         * in reverse order.
+         */
+        for (pos = task_works.first; pos->next; pos = pos->next)
+                ;
+        for (;;) {
+                struct hlist_node **pprev = pos->pprev;
+                struct task_work *twork = container_of(pos, struct task_work,
+                                                        hlist);
+                twork->func(twork);
+                if (pprev == &task_works.first)
+                        break;
+                pos = container_of(pprev, struct hlist_node, next);
+        }
+}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6420cda62336..1d0f6a8a0e5e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1486,6 +1486,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
        if (!buffer)
                return size;
+        /* Make sure the requested buffer exists */
+        if (cpu_id != RING_BUFFER_ALL_CPUS &&
+            !cpumask_test_cpu(cpu_id, buffer->cpumask))
+                return size;
        size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
        size *= BUF_PAGE_SIZE;