31 files changed, 238 insertions, 182 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 7c2893602d06..47845c57eb19 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -643,13 +643,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
                if ((task_active_pid_ns(current) != &init_pid_ns))
                        return -EPERM;
-                if (!capable(CAP_AUDIT_CONTROL))
+                if (!netlink_capable(skb, CAP_AUDIT_CONTROL))
                        err = -EPERM;
                break;
        case AUDIT_USER:
        case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
        case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
-                if (!capable(CAP_AUDIT_WRITE))
+                if (!netlink_capable(skb, CAP_AUDIT_WRITE))
                        err = -EPERM;
                break;
        default:  /* bad msg */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9fcdaa705b6c..3f1ca934a237 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -348,7 +348,7 @@ struct cgrp_cset_link {
 * reference-counted, to improve performance when child cgroups
 * haven't been created.
 */
-static struct css_set init_css_set = {
+struct css_set init_css_set = {
        .refcount               = ATOMIC_INIT(1),
        .cgrp_links             = LIST_HEAD_INIT(init_css_set.cgrp_links),
        .tasks                  = LIST_HEAD_INIT(init_css_set.tasks),
@@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
         */
        if (!use_task_css_set_links)
                cgroup_enable_task_cg_lists();
-retry:
        mutex_lock(&cgroup_tree_mutex);
        mutex_lock(&cgroup_mutex);
@@ -1503,7 +1503,7 @@ retry:
        ret = parse_cgroupfs_options(data, &opts);
        if (ret)
                goto out_unlock;
+retry:
        /* look for a matching existing root */
        if (!opts.subsys_mask && !opts.none && !opts.name) {
                cgrp_dfl_root_visible = true;
@@ -1562,9 +1562,9 @@ retry:
                if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
                        mutex_unlock(&cgroup_mutex);
                        mutex_unlock(&cgroup_tree_mutex);
-                        kfree(opts.release_agent);
-                        kfree(opts.name);
                        msleep(10);
+                        mutex_lock(&cgroup_tree_mutex);
+                        mutex_lock(&cgroup_mutex);
                        goto retry;
                }
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 2bc4a2256444..345628c78b5b 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -21,6 +21,7 @@
 #include <linux/uaccess.h>
 #include <linux/freezer.h>
 #include <linux/seq_file.h>
+#include <linux/mutex.h>
 /*
 * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is
@@ -42,9 +43,10 @@ enum freezer_state_flags {
 struct freezer {
        struct cgroup_subsys_state      css;
        unsigned int                    state;
-        spinlock_t                      lock;
 };
+static DEFINE_MUTEX(freezer_mutex);
 static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
 {
        return css ? container_of(css, struct freezer, css) : NULL;
@@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css)
        if (!freezer)
                return ERR_PTR(-ENOMEM);
-        spin_lock_init(&freezer->lock);
        return &freezer->css;
 }
@@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
        struct freezer *freezer = css_freezer(css);
        struct freezer *parent = parent_freezer(freezer);
-        /*
+        mutex_lock(&freezer_mutex);
-         * The following double locking and freezing state inheritance
-         * guarantee that @cgroup can never escape ancestors' freezing
-         * states.  See css_for_each_descendant_pre() for details.
-         */
-        if (parent)
-                spin_lock_irq(&parent->lock);
-        spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING);
        freezer->state |= CGROUP_FREEZER_ONLINE;
@@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
                atomic_inc(&system_freezing_cnt);
        }
-        spin_unlock(&freezer->lock);
+        mutex_unlock(&freezer_mutex);
-        if (parent)
-                spin_unlock_irq(&parent->lock);
        return 0;
 }
@@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css)
 {
        struct freezer *freezer = css_freezer(css);
-        spin_lock_irq(&freezer->lock);
+        mutex_lock(&freezer_mutex);
        if (freezer->state & CGROUP_FREEZING)
                atomic_dec(&system_freezing_cnt);
        freezer->state = 0;
-        spin_unlock_irq(&freezer->lock);
+        mutex_unlock(&freezer_mutex);
 }
 static void freezer_css_free(struct cgroup_subsys_state *css)
@@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
        struct task_struct *task;
        bool clear_frozen = false;
-        spin_lock_irq(&freezer->lock);
+        mutex_lock(&freezer_mutex);
        /*
         * Make the new tasks conform to the current state of @new_css.
@@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
                }
        }
-        spin_unlock_irq(&freezer->lock);
+        /* propagate FROZEN clearing upwards */
-        /*
-         * Propagate FROZEN clearing upwards.  We may race with
-         * update_if_frozen(), but as long as both work bottom-up, either
-         * update_if_frozen() sees child's FROZEN cleared or we clear the
-         * parent's FROZEN later.  No parent w/ !FROZEN children can be
-         * left FROZEN.
-         */
        while (clear_frozen && (freezer = parent_freezer(freezer))) {
-                spin_lock_irq(&freezer->lock);
                freezer->state &= ~CGROUP_FROZEN;
                clear_frozen = freezer->state & CGROUP_FREEZING;
-                spin_unlock_irq(&freezer->lock);
        }
+        mutex_unlock(&freezer_mutex);
 }
 /**
@@ -228,9 +211,6 @@ static void freezer_fork(struct task_struct *task)
 {
        struct freezer *freezer;
-        rcu_read_lock();
-        freezer = task_freezer(task);
        /*
         * The root cgroup is non-freezable, so we can skip locking the
         * freezer.  This is safe regardless of race with task migration.
@@ -238,24 +218,18 @@ static void freezer_fork(struct task_struct *task)
         * to do.  If we lost and root is the new cgroup, noop is still the
         * right thing to do.
         */
-        if (!parent_freezer(freezer))
+        if (task_css_is_root(task, freezer_cgrp_id))
-                goto out;
+                return;
-        /*
+        mutex_lock(&freezer_mutex);
-         * Grab @freezer->lock and freeze @task after verifying @task still
+        rcu_read_lock();
-         * belongs to @freezer and it's freezing.  The former is for the
-         * case where we have raced against task migration and lost and
+        freezer = task_freezer(task);
-         * @task is already in a different cgroup which may not be frozen.
+        if (freezer->state & CGROUP_FREEZING)
-         * This isn't strictly necessary as freeze_task() is allowed to be
-         * called spuriously but let's do it anyway for, if nothing else,
-         * documentation.
-         */
-        spin_lock_irq(&freezer->lock);
-        if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING))
                freeze_task(task);
-        spin_unlock_irq(&freezer->lock);
-out:
        rcu_read_unlock();
+        mutex_unlock(&freezer_mutex);
 }
 /**
@@ -281,22 +255,24 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
        struct css_task_iter it;
        struct task_struct *task;
-        WARN_ON_ONCE(!rcu_read_lock_held());
+        lockdep_assert_held(&freezer_mutex);
-        spin_lock_irq(&freezer->lock);
        if (!(freezer->state & CGROUP_FREEZING) ||
            (freezer->state & CGROUP_FROZEN))
-                goto out_unlock;
+                return;
        /* are all (live) children frozen? */
+        rcu_read_lock();
        css_for_each_child(pos, css) {
                struct freezer *child = css_freezer(pos);
                if ((child->state & CGROUP_FREEZER_ONLINE) &&
-                    !(child->state & CGROUP_FROZEN))
+                    !(child->state & CGROUP_FROZEN)) {
-                        goto out_unlock;
+                        rcu_read_unlock();
+                        return;
+                }
        }
+        rcu_read_unlock();
        /* are all tasks frozen? */
        css_task_iter_start(css, &it);
@@ -317,21 +293,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
        freezer->state |= CGROUP_FROZEN;
 out_iter_end:
        css_task_iter_end(&it);
-out_unlock:
-        spin_unlock_irq(&freezer->lock);
 }
 static int freezer_read(struct seq_file *m, void *v)
 {
        struct cgroup_subsys_state *css = seq_css(m), *pos;
+        mutex_lock(&freezer_mutex);
        rcu_read_lock();
        /* update states bottom-up */
-        css_for_each_descendant_post(pos, css)
+        css_for_each_descendant_post(pos, css) {
+                if (!css_tryget(pos))
+                        continue;
+                rcu_read_unlock();
                update_if_frozen(pos);
+                rcu_read_lock();
+                css_put(pos);
+        }
        rcu_read_unlock();
+        mutex_unlock(&freezer_mutex);
        seq_puts(m, freezer_state_strs(css_freezer(css)->state));
        seq_putc(m, '\n');
@@ -373,7 +357,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
                                unsigned int state)
 {
        /* also synchronizes against task migration, see freezer_attach() */
-        lockdep_assert_held(&freezer->lock);
+        lockdep_assert_held(&freezer_mutex);
        if (!(freezer->state & CGROUP_FREEZER_ONLINE))
                return;
@@ -414,31 +398,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
         * descendant will try to inherit its parent's FREEZING state as
         * CGROUP_FREEZING_PARENT.
         */
+        mutex_lock(&freezer_mutex);
        rcu_read_lock();
        css_for_each_descendant_pre(pos, &freezer->css) {
                struct freezer *pos_f = css_freezer(pos);
                struct freezer *parent = parent_freezer(pos_f);
-                spin_lock_irq(&pos_f->lock);
+                if (!css_tryget(pos))
+                        continue;
+                rcu_read_unlock();
-                if (pos_f == freezer) {
+                if (pos_f == freezer)
                        freezer_apply_state(pos_f, freeze,
                                            CGROUP_FREEZING_SELF);
-                } else {
+                else
-                        /*
-                         * Our update to @parent->state is already visible
-                         * which is all we need.  No need to lock @parent.
-                         * For more info on synchronization, see
-                         * freezer_post_create().
-                         */
                        freezer_apply_state(pos_f,
                                            parent->state & CGROUP_FREEZING,
                                            CGROUP_FREEZING_PARENT);
-                }
-                spin_unlock_irq(&pos_f->lock);
+                rcu_read_lock();
+                css_put(pos);
        }
        rcu_read_unlock();
+        mutex_unlock(&freezer_mutex);
 }
 static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 6cb20d2e7ee0..019d45008448 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -120,7 +120,7 @@ void context_tracking_user_enter(void)
 * instead of preempt_schedule() to exit user context if needed before
 * calling the scheduler.
 */
-asmlinkage void __sched notrace preempt_schedule_context(void)
+asmlinkage __visible void __sched notrace preempt_schedule_context(void)
 {
        enum ctx_state prev_ctx;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d55092ceee29..e0501fe7140d 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -234,6 +234,11 @@ again:
                        goto again;
                }
                timer->base = new_base;
+        } else {
+                if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+                        cpu = this_cpu;
+                        goto again;
+                }
        }
        return new_base;
 }
@@ -569,6 +574,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
        cpu_base->expires_next.tv64 = expires_next.tv64;
+        /*
+         * If a hang was detected in the last timer interrupt then we
+         * leave the hang delay active in the hardware. We want the
+         * system to make progress. That also prevents the following
+         * scenario:
+         * T1 expires 50ms from now
+         * T2 expires 5s from now
+         *
+         * T1 is removed, so this code is called and would reprogram
+         * the hardware to 5s from now. Any hrtimer_start after that
+         * will not reprogram the hardware due to hang_detected being
+         * set. So we'd effectivly block all timers until the T2 event
+         * fires.
+         */
+        if (cpu_base->hang_detected)
+                return;
        if (cpu_base->expires_next.tv64 != KTIME_MAX)
                tick_program_event(cpu_base->expires_next, 1);
 }
@@ -968,11 +990,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
        /* Remove an active timer from the queue: */
        ret = remove_hrtimer(timer, base);
-        /* Switch the timer base, if necessary: */
-        new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
        if (mode & HRTIMER_MODE_REL) {
-                tim = ktime_add_safe(tim, new_base->get_time());
+                tim = ktime_add_safe(tim, base->get_time());
                /*
                 * CONFIG_TIME_LOW_RES is a temporary way for architectures
                 * to signal that they simply return xtime in
@@ -987,6 +1006,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
        hrtimer_set_expires_range_ns(timer, tim, delta_ns);
+        /* Switch the timer base, if necessary: */
+        new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
        timer_stats_hrtimer_set_start_info(timer);
        leftmost = enqueue_hrtimer(timer, new_base);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a7174617616b..bb07f2928f4b 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -363,6 +363,13 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
                if (from > irq)
                        return -EINVAL;
                from = irq;
+        } else {
+                /*
+                 * For interrupts which are freely allocated the
+                 * architecture can force a lower bound to the @from
+                 * argument. x86 uses this to exclude the GSI space.
+                 */
+                from = arch_dynirq_lower_bound(from);
        }
        mutex_lock(&sparse_irq_lock);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 2486a4c1a710..d34131ca372b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -180,7 +180,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
        struct irq_chip *chip = irq_data_get_irq_chip(data);
        int ret;
-        ret = chip->irq_set_affinity(data, mask, false);
+        ret = chip->irq_set_affinity(data, mask, force);
        switch (ret) {
        case IRQ_SET_MASK_OK:
                cpumask_copy(data->affinity, mask);
@@ -192,7 +192,8 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
        return ret;
 }
-int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
+int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
+                            bool force)
 {
        struct irq_chip *chip = irq_data_get_irq_chip(data);
        struct irq_desc *desc = irq_data_to_desc(data);
@@ -202,7 +203,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
                return -EINVAL;
        if (irq_can_move_pcntxt(data)) {
-                ret = irq_do_set_affinity(data, mask, false);
+                ret = irq_do_set_affinity(data, mask, force);
        } else {
                irqd_set_move_pending(data);
                irq_copy_pending(desc, mask);
@@ -217,13 +218,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
        return ret;
 }
-/**
+int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force)
- *      irq_set_affinity - Set the irq affinity of a given irq
- *      @irq:           Interrupt to set affinity
- *      @mask:          cpumask
- *
- */
-int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
@@ -233,7 +228,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
                return -EINVAL;
        raw_spin_lock_irqsave(&desc->lock, flags);
-        ret =  __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask);
+        ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force);
        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index b0e9467922e1..d24e4339b46d 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4188,7 +4188,7 @@ void debug_show_held_locks(struct task_struct *task)
 }
 EXPORT_SYMBOL_GPL(debug_show_held_locks);
-asmlinkage void lockdep_sys_exit(void)
+asmlinkage __visible void lockdep_sys_exit(void)
 {
        struct task_struct *curr = current;
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index e1191c996c59..5cf6731b98e9 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -71,18 +71,17 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 void debug_mutex_unlock(struct mutex *lock)
 {
-        if (unlikely(!debug_locks))
+        if (likely(debug_locks)) {
-                return;
+                DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-        DEBUG_LOCKS_WARN_ON(lock->magic != lock);
+                if (!lock->owner)
+                        DEBUG_LOCKS_WARN_ON(!lock->owner);
+                else
+                        DEBUG_LOCKS_WARN_ON(lock->owner != current);
-        if (!lock->owner)
+                DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
-                DEBUG_LOCKS_WARN_ON(!lock->owner);
+                mutex_clear_owner(lock);
-        else
+        }
-                DEBUG_LOCKS_WARN_ON(lock->owner != current);
-        DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
-        mutex_clear_owner(lock);
        /*
         * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug
diff --git a/kernel/module.c b/kernel/module.c
index 11869408f79b..079c4615607d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -815,9 +815,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
                return -EFAULT;
        name[MODULE_NAME_LEN-1] = '\0';
-        if (!(flags & O_NONBLOCK))
-                pr_warn("waiting module removal not supported: please upgrade\n");
        if (mutex_lock_interruptible(&module_mutex) != 0)
                return -EINTR;
@@ -3271,6 +3268,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
        dynamic_debug_setup(info->debug, info->num_debug);
+        /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */
+        ftrace_module_init(mod);
        /* Finally it's fully formed, ready to start executing. */
        err = complete_formation(mod, info);
        if (err)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 18fb7a2fb14b..1ea328aafdc9 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1586,7 +1586,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
        return -ENOMEM;
 }
-asmlinkage int swsusp_save(void)
+asmlinkage __visible int swsusp_save(void)
 {
        unsigned int nr_pages, nr_highmem;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c3ad9cafe930..8233cd4047d7 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
+#include <linux/cpuidle.h>
 #include <linux/syscalls.h>
 #include <linux/gfp.h>
 #include <linux/io.h>
@@ -53,7 +54,9 @@ static void freeze_begin(void)
 static void freeze_enter(void)
 {
+        cpuidle_resume();
        wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
+        cpuidle_pause();
 }
 void freeze_wake(void)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a45b50962295..7228258b85ec 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1674,7 +1674,7 @@ EXPORT_SYMBOL(printk_emit);
 *
 * See the vsnprintf() documentation for format string extensions over C99.
 */
-asmlinkage int printk(const char *fmt, ...)
+asmlinkage __visible int printk(const char *fmt, ...)
 {
        va_list args;
        int r;
@@ -1737,7 +1737,7 @@ void early_vprintk(const char *fmt, va_list ap)
        }
 }
-asmlinkage void early_printk(const char *fmt, ...)
+asmlinkage __visible void early_printk(const char *fmt, ...)
 {
        va_list ap;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 268a45ea238c..d9d8ece46a15 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2192,7 +2192,7 @@ static inline void post_schedule(struct rq *rq)
 * schedule_tail - first thing a freshly forked thread must call.
 * @prev: the thread we just switched away from.
 */
-asmlinkage void schedule_tail(struct task_struct *prev)
+asmlinkage __visible void schedule_tail(struct task_struct *prev)
        __releases(rq->lock)
 {
        struct rq *rq = this_rq();
@@ -2741,7 +2741,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
                blk_schedule_flush_plug(tsk);
 }
-asmlinkage void __sched schedule(void)
+asmlinkage __visible void __sched schedule(void)
 {
        struct task_struct *tsk = current;
@@ -2751,7 +2751,7 @@ asmlinkage void __sched schedule(void)
 EXPORT_SYMBOL(schedule);
 #ifdef CONFIG_CONTEXT_TRACKING
-asmlinkage void __sched schedule_user(void)
+asmlinkage __visible void __sched schedule_user(void)
 {
        /*
         * If we come here after a random call to set_need_resched(),
@@ -2783,7 +2783,7 @@ void __sched schedule_preempt_disabled(void)
 * off of preempt_enable. Kernel preemptions off return from interrupt
 * occur there and call schedule directly.
 */
-asmlinkage void __sched notrace preempt_schedule(void)
+asmlinkage __visible void __sched notrace preempt_schedule(void)
 {
        /*
         * If there is a non-zero preempt_count or interrupts are disabled,
@@ -2813,7 +2813,7 @@ EXPORT_SYMBOL(preempt_schedule);
 * Note, that this is called and return with irqs disabled. This will
 * protect us against recursive calling from irq.
 */
-asmlinkage void __sched preempt_schedule_irq(void)
+asmlinkage __visible void __sched preempt_schedule_irq(void)
 {
        enum ctx_state prev_state;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 27ef40925525..b08095786cb8 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1021,8 +1021,17 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
        dl_rq = &rq->dl;
-        if (need_pull_dl_task(rq, prev))
+        if (need_pull_dl_task(rq, prev)) {
                pull_dl_task(rq);
+                /*
+                 * pull_rt_task() can drop (and re-acquire) rq->lock; this
+                 * means a stop task can slip in, in which case we need to
+                 * re-start task selection.
+                 */
+                if (rq->stop && rq->stop->on_rq)
+                        return RETRY_TASK;
+        }
        /*
         * When prev is DL, we may throttle it in put_prev_task().
         * So, we update time before we check for dl_nr_running.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7e9bd0b1fa9e..7570dd969c28 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1497,7 +1497,7 @@ static void task_numa_placement(struct task_struct *p)
        /* If the task is part of a group prevent parallel updates to group stats */
        if (p->numa_group) {
                group_lock = &p->numa_group->lock;
-                spin_lock(group_lock);
+                spin_lock_irq(group_lock);
        }
        /* Find the node with the highest number of faults */
@@ -1572,7 +1572,7 @@ static void task_numa_placement(struct task_struct *p)
                        }
                }
-                spin_unlock(group_lock);
+                spin_unlock_irq(group_lock);
        }
        /* Preferred node as the node with the most faults */
@@ -1677,7 +1677,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
        if (!join)
                return;
-        double_lock(&my_grp->lock, &grp->lock);
+        BUG_ON(irqs_disabled());
+        double_lock_irq(&my_grp->lock, &grp->lock);
        for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) {
                my_grp->faults[i] -= p->numa_faults_memory[i];
@@ -1691,7 +1692,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
        grp->nr_tasks++;
        spin_unlock(&my_grp->lock);
-        spin_unlock(&grp->lock);
+        spin_unlock_irq(&grp->lock);
        rcu_assign_pointer(p->numa_group, grp);
@@ -1710,14 +1711,14 @@ void task_numa_free(struct task_struct *p)
        void *numa_faults = p->numa_faults_memory;
        if (grp) {
-                spin_lock(&grp->lock);
+                spin_lock_irq(&grp->lock);
                for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
                        grp->faults[i] -= p->numa_faults_memory[i];
                grp->total_faults -= p->total_numa_faults;
                list_del(&p->numa_entry);
                grp->nr_tasks--;
-                spin_unlock(&grp->lock);
+                spin_unlock_irq(&grp->lock);
                rcu_assign_pointer(p->numa_group, NULL);
                put_numa_group(grp);
        }
@@ -6727,7 +6728,8 @@ static int idle_balance(struct rq *this_rq)
 out:
        /* Is there a task of a high priority class? */
        if (this_rq->nr_running != this_rq->cfs.h_nr_running &&
-            (this_rq->dl.dl_nr_running ||
+            ((this_rq->stop && this_rq->stop->on_rq) ||
+             this_rq->dl.dl_nr_running ||
             (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt))))
                pulled_task = -1;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d8cdf1618551..bd2267ad404f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1362,10 +1362,11 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
                pull_rt_task(rq);
                /*
                 * pull_rt_task() can drop (and re-acquire) rq->lock; this
-                 * means a dl task can slip in, in which case we need to
+                 * means a dl or stop task can slip in, in which case we need
-                 * re-start task selection.
+                 * to re-start task selection.
                 */
-                if (unlikely(rq->dl.dl_nr_running))
+                if (unlikely((rq->stop && rq->stop->on_rq) ||
+                             rq->dl.dl_nr_running))
                        return RETRY_TASK;
        }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c9007f28d3a2..456e492a3dca 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1385,6 +1385,15 @@ static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
        spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
 }
+static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
+{
+        if (l1 > l2)
+                swap(l1, l2);
+        spin_lock_irq(l1);
+        spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
+}
 static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
 {
        if (l1 > l2)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d8d046c0726a..b35c21503a36 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -69,18 +69,17 @@ static void populate_seccomp_data(struct seccomp_data *sd)
 {
        struct task_struct *task = current;
        struct pt_regs *regs = task_pt_regs(task);
+        unsigned long args[6];
        sd->nr = syscall_get_nr(task, regs);
        sd->arch = syscall_get_arch();
+        syscall_get_arguments(task, regs, 0, 6, args);
-        /* Unroll syscall_get_args to help gcc on arm. */
+        sd->args[0] = args[0];
-        syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]);
+        sd->args[1] = args[1];
-        syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]);
+        sd->args[2] = args[2];
-        syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]);
+        sd->args[3] = args[3];
-        syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]);
+        sd->args[4] = args[4];
-        syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]);
+        sd->args[5] = args[5];
-        syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]);
        sd->instruction_pointer = KSTK_EIP(task);
 }
@@ -256,6 +255,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
                goto free_prog;
        /* Allocate a new seccomp_filter */
+        ret = -ENOMEM;
        filter = kzalloc(sizeof(struct seccomp_filter) +
                         sizeof(struct sock_filter_int) * new_len,
                         GFP_KERNEL|__GFP_NOWARN);
@@ -265,6 +265,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
        ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
        if (ret)
                goto free_filter;
+        kfree(fp);
        atomic_set(&filter->usage, 1);
        filter->len = new_len;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b50990a5bea0..92f24f5e8d52 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -223,7 +223,7 @@ static inline bool lockdep_softirq_start(void) { return false; }
 static inline void lockdep_softirq_end(bool in_hardirq) { }
 #endif
-asmlinkage void __do_softirq(void)
+asmlinkage __visible void __do_softirq(void)
 {
        unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
        unsigned long old_flags = current->flags;
@@ -299,7 +299,7 @@ restart:
        tsk_restore_flags(current, old_flags, PF_MEMALLOC);
 }
-asmlinkage void do_softirq(void)
+asmlinkage __visible void do_softirq(void)
 {
        __u32 pending;
        unsigned long flags;
@@ -779,3 +779,8 @@ int __init __weak arch_early_irq_init(void)
 {
        return 0;
 }
+unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
+{
+        return from;
+}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 015661279b68..0a0608edeb26 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -276,7 +276,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev,
 bool tick_check_replacement(struct clock_event_device *curdev,
                            struct clock_event_device *newdev)
 {
-        if (tick_check_percpu(curdev, newdev, smp_processor_id()))
+        if (!tick_check_percpu(curdev, newdev, smp_processor_id()))
                return false;
        return tick_check_preferred(curdev, newdev);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 9f8af69c67ec..6558b7ac112d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -84,6 +84,9 @@ static void tick_do_update_jiffies64(ktime_t now)
                /* Keep the tick_next_period variable up to date */
                tick_next_period = ktime_add(last_jiffies_update, tick_period);
+        } else {
+                write_sequnlock(&jiffies_lock);
+                return;
        }
        write_sequnlock(&jiffies_lock);
        update_wall_time();
@@ -967,7 +970,7 @@ static void tick_nohz_switch_to_nohz(void)
        struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
        ktime_t next;
-        if (!tick_nohz_active)
+        if (!tick_nohz_enabled)
                return;
        local_irq_disable();
diff --git a/kernel/timer.c b/kernel/timer.c
index 87bd529879c2..3bb01a323b2a 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -838,7 +838,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
        bit = find_last_bit(&mask, BITS_PER_LONG);
-        mask = (1 << bit) - 1;
+        mask = (1UL << bit) - 1;
        expires_limit = expires_limit & ~(mask);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1fd4b9479210..4a54a25afa2f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4330,16 +4330,11 @@ static void ftrace_init_module(struct module *mod,
        ftrace_process_locs(mod, start, end);
 }
-static int ftrace_module_notify_enter(struct notifier_block *self,
+void ftrace_module_init(struct module *mod)
-                                      unsigned long val, void *data)
 {
-        struct module *mod = data;
+        ftrace_init_module(mod, mod->ftrace_callsites,
+                           mod->ftrace_callsites +
-        if (val == MODULE_STATE_COMING)
+                           mod->num_ftrace_callsites);
-                ftrace_init_module(mod, mod->ftrace_callsites,
-                                   mod->ftrace_callsites +
-                                   mod->num_ftrace_callsites);
-        return 0;
 }
 static int ftrace_module_notify_exit(struct notifier_block *self,
@@ -4353,11 +4348,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self,
        return 0;
 }
 #else
-static int ftrace_module_notify_enter(struct notifier_block *self,
-                                      unsigned long val, void *data)
-{
-        return 0;
-}
 static int ftrace_module_notify_exit(struct notifier_block *self,
                                     unsigned long val, void *data)
 {
@@ -4365,11 +4355,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self,
 }
 #endif /* CONFIG_MODULES */
-struct notifier_block ftrace_module_enter_nb = {
-        .notifier_call = ftrace_module_notify_enter,
-        .priority = INT_MAX,    /* Run before anything that can use kprobes */
-};
 struct notifier_block ftrace_module_exit_nb = {
        .notifier_call = ftrace_module_notify_exit,
        .priority = INT_MIN,    /* Run after anything that can remove kprobes */
@@ -4403,10 +4388,6 @@ void __init ftrace_init(void)
                                  __start_mcount_loc,
                                  __stop_mcount_loc);
-        ret = register_module_notifier(&ftrace_module_enter_nb);
-        if (ret)
-                pr_warning("Failed to register trace ftrace module enter notifier\n");
        ret = register_module_notifier(&ftrace_module_exit_nb);
        if (ret)
                pr_warning("Failed to register trace ftrace module exit notifier\n");
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 925f537f07d1..4747b476a030 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -77,7 +77,7 @@ event_triggers_call(struct ftrace_event_file *file, void *rec)
                        data->ops->func(data);
                        continue;
                }
-                filter = rcu_dereference(data->filter);
+                filter = rcu_dereference_sched(data->filter);
                if (filter && !filter_match_preds(filter, rec))
                        continue;
                if (data->cmd_ops->post_trigger) {
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 5b781d2be383..ffd56351b521 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -58,12 +58,16 @@ int ftrace_create_function_files(struct trace_array *tr,
 {
        int ret;
-        /* The top level array uses the "global_ops". */
+        /*
-        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
+         * The top level array uses the "global_ops", and the files are
-                ret = allocate_ftrace_ops(tr);
+         * created on boot up.
-                if (ret)
+         */
-                        return ret;
+        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
-        }
+                return 0;
+        ret = allocate_ftrace_ops(tr);
+        if (ret)
+                return ret;
        ftrace_create_filter_files(tr->ops, parent);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 930e51462dc8..c082a7441345 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -732,9 +732,15 @@ static int uprobe_buffer_enable(void)
 static void uprobe_buffer_disable(void)
 {
+        int cpu;
        BUG_ON(!mutex_is_locked(&event_mutex));
        if (--uprobe_buffer_refcnt == 0) {
+                for_each_possible_cpu(cpu)
+                        free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer,
+                                                             cpu)->buf);
                free_percpu(uprobe_cpu_buffer);
                uprobe_cpu_buffer = NULL;
        }
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index ac5b23cf7212..6620e5837ce2 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -188,7 +188,6 @@ static int tracepoint_add_func(struct tracepoint *tp,
                WARN_ON_ONCE(1);
                return PTR_ERR(old);
        }
-        release_probes(old);
        /*
         * rcu_assign_pointer has a smp_wmb() which makes sure that the new
@@ -200,6 +199,7 @@ static int tracepoint_add_func(struct tracepoint *tp,
        rcu_assign_pointer(tp->funcs, tp_funcs);
        if (!static_key_enabled(&tp->key))
                static_key_slow_inc(&tp->key);
+        release_probes(old);
        return 0;
 }
@@ -221,7 +221,6 @@ static int tracepoint_remove_func(struct tracepoint *tp,
                WARN_ON_ONCE(1);
                return PTR_ERR(old);
        }
-        release_probes(old);
        if (!tp_funcs) {
                /* Removed last function */
@@ -232,6 +231,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
                        static_key_slow_dec(&tp->key);
        }
        rcu_assign_pointer(tp->funcs, tp_funcs);
+        release_probes(old);
        return 0;
 }
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0d8f6023fd8d..bf71b4b2d632 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -152,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
        /* Find the matching extent */
        extents = map->nr_extents;
-        smp_read_barrier_depends();
+        smp_rmb();
        for (idx = 0; idx < extents; idx++) {
                first = map->extent[idx].first;
                last = first + map->extent[idx].count - 1;
@@ -176,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id)
        /* Find the matching extent */
        extents = map->nr_extents;
-        smp_read_barrier_depends();
+        smp_rmb();
        for (idx = 0; idx < extents; idx++) {
                first = map->extent[idx].first;
                last = first + map->extent[idx].count - 1;
@@ -199,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id)
        /* Find the matching extent */
        extents = map->nr_extents;
-        smp_read_barrier_depends();
+        smp_rmb();
        for (idx = 0; idx < extents; idx++) {
                first = map->extent[idx].lower_first;
                last = first + map->extent[idx].count - 1;
@@ -615,9 +615,8 @@ static ssize_t map_write(struct file *file, const char __user *buf,
         * were written before the count of the extents.
         *
         * To achieve this smp_wmb() is used on guarantee the write
-         * order and smp_read_barrier_depends() is guaranteed that we
+         * order and smp_rmb() is guaranteed that we don't have crazy
-         * don't have crazy architectures returning stale data.
+         * architectures returning stale data.
-         *
         */
        mutex_lock(&id_map_mutex);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index e90089fd78e0..516203e665fc 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -138,7 +138,11 @@ static void __touch_watchdog(void)
 void touch_softlockup_watchdog(void)
 {
-        __this_cpu_write(watchdog_touch_ts, 0);
+        /*
+         * Preemption can be enabled.  It doesn't matter which CPU's timestamp
+         * gets zeroed here, so use the raw_ operation.
+         */
+        raw_cpu_write(watchdog_touch_ts, 0);
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0ee63af30bd1..8edc87185427 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1916,6 +1916,12 @@ static void send_mayday(struct work_struct *work)
        /* mayday mayday mayday */
        if (list_empty(&pwq->mayday_node)) {
+                /*
+                 * If @pwq is for an unbound wq, its base ref may be put at
+                 * any time due to an attribute change.  Pin @pwq until the
+                 * rescuer is done with it.
+                 */
+                get_pwq(pwq);
                list_add_tail(&pwq->mayday_node, &wq->maydays);
                wake_up_process(wq->rescuer->task);
        }
@@ -2398,6 +2404,7 @@ static int rescuer_thread(void *__rescuer)
        struct worker *rescuer = __rescuer;
        struct workqueue_struct *wq = rescuer->rescue_wq;
        struct list_head *scheduled = &rescuer->scheduled;
+        bool should_stop;
        set_user_nice(current, RESCUER_NICE_LEVEL);
@@ -2409,11 +2416,15 @@ static int rescuer_thread(void *__rescuer)
 repeat:
        set_current_state(TASK_INTERRUPTIBLE);
-        if (kthread_should_stop()) {
+        /*
-                __set_current_state(TASK_RUNNING);
+         * By the time the rescuer is requested to stop, the workqueue
-                rescuer->task->flags &= ~PF_WQ_WORKER;
+         * shouldn't have any work pending, but @wq->maydays may still have
-                return 0;
+         * pwq(s) queued.  This can happen by non-rescuer workers consuming
-        }
+         * all the work items before the rescuer got to them.  Go through
+         * @wq->maydays processing before acting on should_stop so that the
+         * list is always empty on exit.
+         */
+        should_stop = kthread_should_stop();
        /* see whether any pwq is asking for help */
        spin_lock_irq(&wq_mayday_lock);
@@ -2445,6 +2456,12 @@ repeat:
                process_scheduled_works(rescuer);
                /*
+                 * Put the reference grabbed by send_mayday().  @pool won't
+                 * go away while we're holding its lock.
+                 */
+                put_pwq(pwq);
+                /*
                 * Leave this pool.  If keep_working() is %true, notify a
                 * regular worker; otherwise, we end up with 0 concurrency
                 * and stalling the execution.
@@ -2459,6 +2476,12 @@ repeat:
        spin_unlock_irq(&wq_mayday_lock);
+        if (should_stop) {
+                __set_current_state(TASK_RUNNING);
+                rescuer->task->flags &= ~PF_WQ_WORKER;
+                return 0;
+        }
        /* rescuers should never participate in concurrency management */
        WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
        schedule();
@@ -4100,7 +4123,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
        if (!pwq) {
                pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
                           wq->name);
-                goto out_unlock;
+                mutex_lock(&wq->mutex);
+                goto use_dfl_pwq;
        }
        /*