21 files changed, 332 insertions, 317 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 526128a2e62..2a202a84675 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
        default 1000 if HZ_1000
 config SCHED_HRTICK
-        def_bool HIGH_RES_TIMERS && X86
+        def_bool HIGH_RES_TIMERS
diff --git a/kernel/Makefile b/kernel/Makefile
index 985ddb7da4d..15ab63ffe64 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,8 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
            hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
            notifier.o ksysfs.o pm_qos_params.o sched_clock.o
+CFLAGS_REMOVE_sched.o = -mno-spe
 ifdef CONFIG_FTRACE
 # Do not trace debug files and internal ftrace files
 CFLAGS_REMOVE_lockdep.o = -pg
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c10e7aae04d..4699950e65b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1476,7 +1476,8 @@ void audit_syscall_entry(int arch, int major,
        struct audit_context *context = tsk->audit_context;
        enum audit_state     state;
-        BUG_ON(!context);
+        if (unlikely(!context))
+                return;
        /*
         * This happens only on certain architectures that make system
diff --git a/kernel/cpu.c b/kernel/cpu.c
index cfb1d43ab80..2cc409ce0a8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -64,6 +64,8 @@ void __init cpu_hotplug_init(void)
        cpu_hotplug.refcount = 0;
 }
+cpumask_t cpu_active_map;
 #ifdef CONFIG_HOTPLUG_CPU
 void get_online_cpus(void)
@@ -291,11 +293,30 @@ int __ref cpu_down(unsigned int cpu)
        int err = 0;
        cpu_maps_update_begin();
-        if (cpu_hotplug_disabled)
+        if (cpu_hotplug_disabled) {
                err = -EBUSY;
-        else
+                goto out;
-                err = _cpu_down(cpu, 0);
+        }
+        cpu_clear(cpu, cpu_active_map);
+        /*
+         * Make sure the all cpus did the reschedule and are not
+         * using stale version of the cpu_active_map.
+         * This is not strictly necessary becuase stop_machine()
+         * that we run down the line already provides the required
+         * synchronization. But it's really a side effect and we do not
+         * want to depend on the innards of the stop_machine here.
+         */
+        synchronize_sched();
+        err = _cpu_down(cpu, 0);
+        if (cpu_online(cpu))
+                cpu_set(cpu, cpu_active_map);
+out:
        cpu_maps_update_done();
        return err;
 }
@@ -355,11 +376,18 @@ int __cpuinit cpu_up(unsigned int cpu)
        }
        cpu_maps_update_begin();
-        if (cpu_hotplug_disabled)
+        if (cpu_hotplug_disabled) {
                err = -EBUSY;
-        else
+                goto out;
-                err = _cpu_up(cpu, 0);
+        }
+        err = _cpu_up(cpu, 0);
+        if (cpu_online(cpu))
+                cpu_set(cpu, cpu_active_map);
+out:
        cpu_maps_update_done();
        return err;
 }
@@ -413,7 +441,7 @@ void __ref enable_nonboot_cpus(void)
                goto out;
        printk("Enabling non-boot CPUs ...\n");
-        for_each_cpu_mask(cpu, frozen_cpus) {
+        for_each_cpu_mask_nr(cpu, frozen_cpus) {
                error = _cpu_up(cpu, 1);
                if (!error) {
                        printk("CPU%d is up\n", cpu);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d2cc67dac8b..d5738910c34 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -564,7 +564,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
 *      partition_sched_domains().
 */
-static void rebuild_sched_domains(void)
+void rebuild_sched_domains(void)
 {
        struct kfifo *q;        /* queue of cpusets to be scanned */
        struct cpuset *cp;      /* scans q */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 77a51be3601..3cfc0fefb5e 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -217,6 +217,17 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
+int set_irq_wake_real(unsigned int irq, unsigned int on)
+{
+        struct irq_desc *desc = irq_desc + irq;
+        int ret = -ENXIO;
+        if (desc->chip->set_wake)
+                ret = desc->chip->set_wake(irq, on);
+        return ret;
+}
 /**
 *      set_irq_wake - control irq power management wakeup
 *      @irq:   interrupt to control
@@ -233,30 +244,34 @@ int set_irq_wake(unsigned int irq, unsigned int on)
 {
        struct irq_desc *desc = irq_desc + irq;
        unsigned long flags;
-        int ret = -ENXIO;
+        int ret = 0;
-        int (*set_wake)(unsigned, unsigned) = desc->chip->set_wake;
        /* wakeup-capable irqs can be shared between drivers that
         * don't need to have the same sleep mode behaviors.
         */
        spin_lock_irqsave(&desc->lock, flags);
        if (on) {
-                if (desc->wake_depth++ == 0)
+                if (desc->wake_depth++ == 0) {
-                        desc->status |= IRQ_WAKEUP;
+                        ret = set_irq_wake_real(irq, on);
-                else
+                        if (ret)
-                        set_wake = NULL;
+                                desc->wake_depth = 0;
+                        else
+                                desc->status |= IRQ_WAKEUP;
+                }
        } else {
                if (desc->wake_depth == 0) {
                        printk(KERN_WARNING "Unbalanced IRQ %d "
                                        "wake disable\n", irq);
                        WARN_ON(1);
-                } else if (--desc->wake_depth == 0)
+                } else if (--desc->wake_depth == 0) {
-                        desc->status &= ~IRQ_WAKEUP;
+                        ret = set_irq_wake_real(irq, on);
-                else
+                        if (ret)
-                        set_wake = NULL;
+                                desc->wake_depth = 1;
+                        else
+                                desc->status &= ~IRQ_WAKEUP;
+                }
        }
-        if (set_wake)
-                ret = desc->chip->set_wake(irq, on);
        spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 16eeeaa9d61..6f8696c502f 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -106,7 +106,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
                 */
                cpus_and(cpumask, rcp->cpumask, cpu_online_map);
                cpu_clear(rdp->cpu, cpumask);
-                for_each_cpu_mask(cpu, cpumask)
+                for_each_cpu_mask_nr(cpu, cpumask)
                        smp_send_reschedule(cpu);
        }
 }
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 6f62b77d93c..27827931ca0 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -756,7 +756,7 @@ rcu_try_flip_idle(void)
        /* Now ask each CPU for acknowledgement of the flip. */
-        for_each_cpu_mask(cpu, rcu_cpu_online_map) {
+        for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
                per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
                dyntick_save_progress_counter(cpu);
        }
@@ -774,7 +774,7 @@ rcu_try_flip_waitack(void)
        int cpu;
        RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
-        for_each_cpu_mask(cpu, rcu_cpu_online_map)
+        for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
                if (rcu_try_flip_waitack_needed(cpu) &&
                    per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
                        RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
@@ -806,7 +806,7 @@ rcu_try_flip_waitzero(void)
        /* Check to see if the sum of the "last" counters is zero. */
        RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
-        for_each_cpu_mask(cpu, rcu_cpu_online_map)
+        for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
                sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
        if (sum != 0) {
                RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
@@ -821,7 +821,7 @@ rcu_try_flip_waitzero(void)
        smp_mb();  /*  ^^^^^^^^^^^^ */
        /* Call for a memory barrier from each CPU. */
-        for_each_cpu_mask(cpu, rcu_cpu_online_map) {
+        for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
                per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
                dyntick_save_progress_counter(cpu);
        }
@@ -841,7 +841,7 @@ rcu_try_flip_waitmb(void)
        int cpu;
        RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
-        for_each_cpu_mask(cpu, rcu_cpu_online_map)
+        for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
                if (rcu_try_flip_waitmb_needed(cpu) &&
                    per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
                        RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
diff --git a/kernel/sched.c b/kernel/sched.c
index b1104ea5d25..6acf749d333 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -571,8 +571,10 @@ struct rq {
 #endif
 #ifdef CONFIG_SCHED_HRTICK
-        unsigned long hrtick_flags;
+#ifdef CONFIG_SMP
-        ktime_t hrtick_expire;
+        int hrtick_csd_pending;
+        struct call_single_data hrtick_csd;
+#endif
        struct hrtimer hrtick_timer;
 #endif
@@ -983,13 +985,6 @@ static struct rq *this_rq_lock(void)
        return rq;
 }
-static void __resched_task(struct task_struct *p, int tif_bit);
-static inline void resched_task(struct task_struct *p)
-{
-        __resched_task(p, TIF_NEED_RESCHED);
-}
 #ifdef CONFIG_SCHED_HRTICK
 /*
 * Use HR-timers to deliver accurate preemption points.
@@ -1001,25 +996,6 @@ static inline void resched_task(struct task_struct *p)
 * When we get rescheduled we reprogram the hrtick_timer outside of the
 * rq->lock.
 */
-static inline void resched_hrt(struct task_struct *p)
-{
-        __resched_task(p, TIF_HRTICK_RESCHED);
-}
-static inline void resched_rq(struct rq *rq)
-{
-        unsigned long flags;
-        spin_lock_irqsave(&rq->lock, flags);
-        resched_task(rq->curr);
-        spin_unlock_irqrestore(&rq->lock, flags);
-}
-enum {
-        HRTICK_SET,             /* re-programm hrtick_timer */
-        HRTICK_RESET,           /* not a new slice */
-        HRTICK_BLOCK,           /* stop hrtick operations */
-};
 /*
 * Use hrtick when:
@@ -1030,40 +1006,11 @@ static inline int hrtick_enabled(struct rq *rq)
 {
        if (!sched_feat(HRTICK))
                return 0;
-        if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags)))
+        if (!cpu_active(cpu_of(rq)))
                return 0;
        return hrtimer_is_hres_active(&rq->hrtick_timer);
 }
-/*
- * Called to set the hrtick timer state.
- *
- * called with rq->lock held and irqs disabled
- */
-static void hrtick_start(struct rq *rq, u64 delay, int reset)
-{
-        assert_spin_locked(&rq->lock);
-        /*
-         * preempt at: now + delay
-         */
-        rq->hrtick_expire =
-                ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
-        /*
-         * indicate we need to program the timer
-         */
-        __set_bit(HRTICK_SET, &rq->hrtick_flags);
-        if (reset)
-                __set_bit(HRTICK_RESET, &rq->hrtick_flags);
-        /*
-         * New slices are called from the schedule path and don't need a
-         * forced reschedule.
-         */
-        if (reset)
-                resched_hrt(rq->curr);
-}
 static void hrtick_clear(struct rq *rq)
 {
        if (hrtimer_active(&rq->hrtick_timer))
@@ -1071,32 +1018,6 @@ static void hrtick_clear(struct rq *rq)
 }
 /*
- * Update the timer from the possible pending state.
- */
-static void hrtick_set(struct rq *rq)
-{
-        ktime_t time;
-        int set, reset;
-        unsigned long flags;
-        WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-        spin_lock_irqsave(&rq->lock, flags);
-        set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
-        reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
-        time = rq->hrtick_expire;
-        clear_thread_flag(TIF_HRTICK_RESCHED);
-        spin_unlock_irqrestore(&rq->lock, flags);
-        if (set) {
-                hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
-                if (reset && !hrtimer_active(&rq->hrtick_timer))
-                        resched_rq(rq);
-        } else
-                hrtick_clear(rq);
-}
-/*
 * High-resolution timer tick.
 * Runs from hardirq context with interrupts disabled.
 */
@@ -1115,27 +1036,37 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
 }
 #ifdef CONFIG_SMP
-static void hotplug_hrtick_disable(int cpu)
+/*
+ * called from hardirq (IPI) context
+ */
+static void __hrtick_start(void *arg)
 {
-        struct rq *rq = cpu_rq(cpu);
+        struct rq *rq = arg;
-        unsigned long flags;
-        spin_lock_irqsave(&rq->lock, flags);
-        rq->hrtick_flags = 0;
-        __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
-        spin_unlock_irqrestore(&rq->lock, flags);
-        hrtick_clear(rq);
+        spin_lock(&rq->lock);
+        hrtimer_restart(&rq->hrtick_timer);
+        rq->hrtick_csd_pending = 0;
+        spin_unlock(&rq->lock);
 }
-static void hotplug_hrtick_enable(int cpu)
+/*
+ * Called to set the hrtick timer state.
+ *
+ * called with rq->lock held and irqs disabled
+ */
+static void hrtick_start(struct rq *rq, u64 delay)
 {
-        struct rq *rq = cpu_rq(cpu);
+        struct hrtimer *timer = &rq->hrtick_timer;
-        unsigned long flags;
+        ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
-        spin_lock_irqsave(&rq->lock, flags);
+        timer->expires = time;
-        __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags);
-        spin_unlock_irqrestore(&rq->lock, flags);
+        if (rq == this_rq()) {
+                hrtimer_restart(timer);
+        } else if (!rq->hrtick_csd_pending) {
+                __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
+                rq->hrtick_csd_pending = 1;
+        }
 }
 static int
@@ -1150,16 +1081,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
        case CPU_DOWN_PREPARE_FROZEN:
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-                hotplug_hrtick_disable(cpu);
+                hrtick_clear(cpu_rq(cpu));
-                return NOTIFY_OK;
-        case CPU_UP_PREPARE:
-        case CPU_UP_PREPARE_FROZEN:
-        case CPU_DOWN_FAILED:
-        case CPU_DOWN_FAILED_FROZEN:
-        case CPU_ONLINE:
-        case CPU_ONLINE_FROZEN:
-                hotplug_hrtick_enable(cpu);
                return NOTIFY_OK;
        }
@@ -1170,46 +1092,45 @@ static void init_hrtick(void)
 {
        hotcpu_notifier(hotplug_hrtick, 0);
 }
-#endif /* CONFIG_SMP */
+#else
+/*
+ * Called to set the hrtick timer state.
+ *
+ * called with rq->lock held and irqs disabled
+ */
+static void hrtick_start(struct rq *rq, u64 delay)
+{
+        hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
+}
-static void init_rq_hrtick(struct rq *rq)
+static void init_hrtick(void)
 {
-        rq->hrtick_flags = 0;
-        hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-        rq->hrtick_timer.function = hrtick;
-        rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 }
+#endif /* CONFIG_SMP */
-void hrtick_resched(void)
+static void init_rq_hrtick(struct rq *rq)
 {
-        struct rq *rq;
+#ifdef CONFIG_SMP
-        unsigned long flags;
+        rq->hrtick_csd_pending = 0;
-        if (!test_thread_flag(TIF_HRTICK_RESCHED))
+        rq->hrtick_csd.flags = 0;
-                return;
+        rq->hrtick_csd.func = __hrtick_start;
+        rq->hrtick_csd.info = rq;
+#endif
-        local_irq_save(flags);
+        hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-        rq = cpu_rq(smp_processor_id());
+        rq->hrtick_timer.function = hrtick;
-        hrtick_set(rq);
+        rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
-        local_irq_restore(flags);
 }
 #else
 static inline void hrtick_clear(struct rq *rq)
 {
 }
-static inline void hrtick_set(struct rq *rq)
-{
-}
 static inline void init_rq_hrtick(struct rq *rq)
 {
 }
-void hrtick_resched(void)
-{
-}
 static inline void init_hrtick(void)
 {
 }
@@ -1228,16 +1149,16 @@ static inline void init_hrtick(void)
 #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
 #endif
-static void __resched_task(struct task_struct *p, int tif_bit)
+static void resched_task(struct task_struct *p)
 {
        int cpu;
        assert_spin_locked(&task_rq(p)->lock);
-        if (unlikely(test_tsk_thread_flag(p, tif_bit)))
+        if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
                return;
-        set_tsk_thread_flag(p, tif_bit);
+        set_tsk_thread_flag(p, TIF_NEED_RESCHED);
        cpu = task_cpu(p);
        if (cpu == smp_processor_id())
@@ -1303,10 +1224,10 @@ void wake_up_idle_cpu(int cpu)
 #endif /* CONFIG_NO_HZ */
 #else /* !CONFIG_SMP */
-static void __resched_task(struct task_struct *p, int tif_bit)
+static void resched_task(struct task_struct *p)
 {
        assert_spin_locked(&task_rq(p)->lock);
-        set_tsk_thread_flag(p, tif_bit);
+        set_tsk_need_resched(p);
 }
 #endif /* CONFIG_SMP */
@@ -2108,7 +2029,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
                /* Tally up the load of all CPUs in the group */
                avg_load = 0;
-                for_each_cpu_mask(i, group->cpumask) {
+                for_each_cpu_mask_nr(i, group->cpumask) {
                        /* Bias balancing toward cpus of our domain */
                        if (local_group)
                                load = source_load(i, load_idx);
@@ -2150,7 +2071,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
        /* Traverse only the allowed CPUs */
        cpus_and(*tmp, group->cpumask, p->cpus_allowed);
-        for_each_cpu_mask(i, *tmp) {
+        for_each_cpu_mask_nr(i, *tmp) {
                load = weighted_cpuload(i);
                if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2881,7 +2802,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
        rq = task_rq_lock(p, &flags);
        if (!cpu_isset(dest_cpu, p->cpus_allowed)
-            || unlikely(cpu_is_offline(dest_cpu)))
+            || unlikely(!cpu_active(dest_cpu)))
                goto out;
        /* force the process onto the specified CPU */
@@ -3168,7 +3089,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                max_cpu_load = 0;
                min_cpu_load = ~0UL;
-                for_each_cpu_mask(i, group->cpumask) {
+                for_each_cpu_mask_nr(i, group->cpumask) {
                        struct rq *rq;
                        if (!cpu_isset(i, *cpus))
@@ -3447,7 +3368,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
        unsigned long max_load = 0;
        int i;
-        for_each_cpu_mask(i, group->cpumask) {
+        for_each_cpu_mask_nr(i, group->cpumask) {
                unsigned long wl;
                if (!cpu_isset(i, *cpus))
@@ -3849,7 +3770,7 @@ int select_nohz_load_balancer(int stop_tick)
                /*
                 * If we are going offline and still the leader, give up!
                 */
-                if (cpu_is_offline(cpu) &&
+                if (!cpu_active(cpu) &&
                    atomic_read(&nohz.load_balancer) == cpu) {
                        if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
                                BUG();
@@ -3989,7 +3910,7 @@ static void run_rebalance_domains(struct softirq_action *h)
                int balance_cpu;
                cpu_clear(this_cpu, cpus);
-                for_each_cpu_mask(balance_cpu, cpus) {
+                for_each_cpu_mask_nr(balance_cpu, cpus) {
                        /*
                         * If this cpu gets work to do, stop the load balancing
                         * work being done for other cpus. Next load
@@ -4395,7 +4316,7 @@ asmlinkage void __sched schedule(void)
        struct task_struct *prev, *next;
        unsigned long *switch_count;
        struct rq *rq;
-        int cpu, hrtick = sched_feat(HRTICK);
+        int cpu;
 need_resched:
        preempt_disable();
@@ -4410,7 +4331,7 @@ need_resched_nonpreemptible:
        schedule_debug(prev);
-        if (hrtick)
+        if (sched_feat(HRTICK))
                hrtick_clear(rq);
        /*
@@ -4457,9 +4378,6 @@ need_resched_nonpreemptible:
        } else
                spin_unlock_irq(&rq->lock);
-        if (hrtick)
-                hrtick_set(rq);
        if (unlikely(reacquire_kernel_lock(current) < 0))
                goto need_resched_nonpreemptible;
@@ -5876,7 +5794,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
        struct rq *rq_dest, *rq_src;
        int ret = 0, on_rq;
-        if (unlikely(cpu_is_offline(dest_cpu)))
+        if (unlikely(!cpu_active(dest_cpu)))
                return ret;
        rq_src = cpu_rq(src_cpu);
@@ -6768,7 +6686,8 @@ static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
 {
-        int ints[NR_CPUS], i;
+        static int __initdata ints[NR_CPUS];
+        int i;
        str = get_options(str, ARRAY_SIZE(ints), ints);
        cpus_clear(cpu_isolated_map);
@@ -6802,7 +6721,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
        cpus_clear(*covered);
-        for_each_cpu_mask(i, *span) {
+        for_each_cpu_mask_nr(i, *span) {
                struct sched_group *sg;
                int group = group_fn(i, cpu_map, &sg, tmpmask);
                int j;
@@ -6813,7 +6732,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
                cpus_clear(sg->cpumask);
                sg->__cpu_power = 0;
-                for_each_cpu_mask(j, *span) {
+                for_each_cpu_mask_nr(j, *span) {
                        if (group_fn(j, cpu_map, NULL, tmpmask) != group)
                                continue;
@@ -7013,7 +6932,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
        if (!sg)
                return;
        do {
-                for_each_cpu_mask(j, sg->cpumask) {
+                for_each_cpu_mask_nr(j, sg->cpumask) {
                        struct sched_domain *sd;
                        sd = &per_cpu(phys_domains, j);
@@ -7038,7 +6957,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
 {
        int cpu, i;
-        for_each_cpu_mask(cpu, *cpu_map) {
+        for_each_cpu_mask_nr(cpu, *cpu_map) {
                struct sched_group **sched_group_nodes
                        = sched_group_nodes_bycpu[cpu];
@@ -7277,7 +7196,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
        /*
         * Set up domains for cpus specified by the cpu_map.
         */
-        for_each_cpu_mask(i, *cpu_map) {
+        for_each_cpu_mask_nr(i, *cpu_map) {
                struct sched_domain *sd = NULL, *p;
                SCHED_CPUMASK_VAR(nodemask, allmasks);
@@ -7344,7 +7263,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #ifdef CONFIG_SCHED_SMT
        /* Set up CPU (sibling) groups */
-        for_each_cpu_mask(i, *cpu_map) {
+        for_each_cpu_mask_nr(i, *cpu_map) {
                SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
                SCHED_CPUMASK_VAR(send_covered, allmasks);
@@ -7361,7 +7280,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #ifdef CONFIG_SCHED_MC
        /* Set up multi-core groups */
-        for_each_cpu_mask(i, *cpu_map) {
+        for_each_cpu_mask_nr(i, *cpu_map) {
                SCHED_CPUMASK_VAR(this_core_map, allmasks);
                SCHED_CPUMASK_VAR(send_covered, allmasks);
@@ -7428,7 +7347,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
                        goto error;
                }
                sched_group_nodes[i] = sg;
-                for_each_cpu_mask(j, *nodemask) {
+                for_each_cpu_mask_nr(j, *nodemask) {
                        struct sched_domain *sd;
                        sd = &per_cpu(node_domains, j);
@@ -7474,21 +7393,21 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
        /* Calculate CPU power for physical packages and nodes */
 #ifdef CONFIG_SCHED_SMT
-        for_each_cpu_mask(i, *cpu_map) {
+        for_each_cpu_mask_nr(i, *cpu_map) {
                struct sched_domain *sd = &per_cpu(cpu_domains, i);
                init_sched_groups_power(i, sd);
        }
 #endif
 #ifdef CONFIG_SCHED_MC
-        for_each_cpu_mask(i, *cpu_map) {
+        for_each_cpu_mask_nr(i, *cpu_map) {
                struct sched_domain *sd = &per_cpu(core_domains, i);
                init_sched_groups_power(i, sd);
        }
 #endif
-        for_each_cpu_mask(i, *cpu_map) {
+        for_each_cpu_mask_nr(i, *cpu_map) {
                struct sched_domain *sd = &per_cpu(phys_domains, i);
                init_sched_groups_power(i, sd);
@@ -7508,7 +7427,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #endif
        /* Attach the domains */
-        for_each_cpu_mask(i, *cpu_map) {
+        for_each_cpu_mask_nr(i, *cpu_map) {
                struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
                sd = &per_cpu(cpu_domains, i);
@@ -7553,18 +7472,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
 }
 /*
- * Free current domain masks.
- * Called after all cpus are attached to NULL domain.
- */
-static void free_sched_domains(void)
-{
-        ndoms_cur = 0;
-        if (doms_cur != &fallback_doms)
-                kfree(doms_cur);
-        doms_cur = &fallback_doms;
-}
-/*
 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
 * For now this just excludes isolated cpus, but could be used to
 * exclude other special cases in the future.
@@ -7603,7 +7510,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
        unregister_sched_domain_sysctl();
-        for_each_cpu_mask(i, *cpu_map)
+        for_each_cpu_mask_nr(i, *cpu_map)
                cpu_attach_domain(NULL, &def_root_domain, i);
        synchronize_sched();
        arch_destroy_sched_domains(cpu_map, &tmpmask);
@@ -7642,7 +7549,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
 * ownership of it and will kfree it when done with it. If the caller
 * failed the kmalloc call, then it can pass in doms_new == NULL,
 * and partition_sched_domains() will fallback to the single partition
- * 'fallback_doms'.
+ * 'fallback_doms', it also forces the domains to be rebuilt.
 *
 * Call with hotplug lock held
 */
@@ -7656,12 +7563,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
        /* always unregister in case we don't destroy any domains */
        unregister_sched_domain_sysctl();
-        if (doms_new == NULL) {
+        if (doms_new == NULL)
-                ndoms_new = 1;
+                ndoms_new = 0;
-                doms_new = &fallback_doms;
-                cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
-                dattr_new = NULL;
-        }
        /* Destroy deleted domains */
        for (i = 0; i < ndoms_cur; i++) {
@@ -7676,6 +7579,14 @@ match1:
                ;
        }
+        if (doms_new == NULL) {
+                ndoms_cur = 0;
+                ndoms_new = 1;
+                doms_new = &fallback_doms;
+                cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+                dattr_new = NULL;
+        }
        /* Build new domains */
        for (i = 0; i < ndoms_new; i++) {
                for (j = 0; j < ndoms_cur; j++) {
@@ -7706,17 +7617,10 @@ match2:
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 int arch_reinit_sched_domains(void)
 {
-        int err;
        get_online_cpus();
-        mutex_lock(&sched_domains_mutex);
+        rebuild_sched_domains();
-        detach_destroy_domains(&cpu_online_map);
-        free_sched_domains();
-        err = arch_init_sched_domains(&cpu_online_map);
-        mutex_unlock(&sched_domains_mutex);
        put_online_cpus();
+        return 0;
-        return err;
 }
 static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
@@ -7786,59 +7690,49 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
 }
 #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+#ifndef CONFIG_CPUSETS
 /*
- * Force a reinitialization of the sched domains hierarchy. The domains
+ * Add online and remove offline CPUs from the scheduler domains.
- * and groups cannot be updated in place without racing with the balancing
+ * When cpusets are enabled they take over this function.
- * code, so we temporarily attach all running cpus to the NULL domain
- * which will prevent rebalancing while the sched domains are recalculated.
 */
 static int update_sched_domains(struct notifier_block *nfb,
                                unsigned long action, void *hcpu)
 {
+        switch (action) {
+        case CPU_ONLINE:
+        case CPU_ONLINE_FROZEN:
+        case CPU_DEAD:
+        case CPU_DEAD_FROZEN:
+                partition_sched_domains(0, NULL, NULL);
+                return NOTIFY_OK;
+        default:
+                return NOTIFY_DONE;
+        }
+}
+#endif
+static int update_runtime(struct notifier_block *nfb,
+                                unsigned long action, void *hcpu)
+{
        int cpu = (int)(long)hcpu;
        switch (action) {
        case CPU_DOWN_PREPARE:
        case CPU_DOWN_PREPARE_FROZEN:
                disable_runtime(cpu_rq(cpu));
-                /* fall-through */
-        case CPU_UP_PREPARE:
-        case CPU_UP_PREPARE_FROZEN:
-                detach_destroy_domains(&cpu_online_map);
-                free_sched_domains();
                return NOTIFY_OK;
        case CPU_DOWN_FAILED:
        case CPU_DOWN_FAILED_FROZEN:
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                enable_runtime(cpu_rq(cpu));
-                /* fall-through */
+                return NOTIFY_OK;
-        case CPU_UP_CANCELED:
-        case CPU_UP_CANCELED_FROZEN:
-        case CPU_DEAD:
-        case CPU_DEAD_FROZEN:
-                /*
-                 * Fall through and re-initialise the domains.
-                 */
-                break;
        default:
                return NOTIFY_DONE;
        }
-#ifndef CONFIG_CPUSETS
-        /*
-         * Create default domain partitioning if cpusets are disabled.
-         * Otherwise we let cpusets rebuild the domains based on the
-         * current setup.
-         */
-        /* The hotplug lock is already held by cpu_up/cpu_down */
-        arch_init_sched_domains(&cpu_online_map);
-#endif
-        return NOTIFY_OK;
 }
 void __init sched_init_smp(void)
@@ -7858,8 +7752,15 @@ void __init sched_init_smp(void)
                cpu_set(smp_processor_id(), non_isolated_cpus);
        mutex_unlock(&sched_domains_mutex);
        put_online_cpus();
+#ifndef CONFIG_CPUSETS
        /* XXX: Theoretical race here - CPU may be hotplugged now */
        hotcpu_notifier(update_sched_domains, 0);
+#endif
+        /* RT runtime code needs to handle some hotplug events */
+        hotcpu_notifier(update_runtime, 0);
        init_hrtick();
        /* Move init over to a non-isolated CPU */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2aa987027d..cf2cd6ce4cb 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -878,7 +878,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 #ifdef CONFIG_SCHED_HRTICK
 static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 {
-        int requeue = rq->curr == p;
        struct sched_entity *se = &p->se;
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
@@ -899,10 +898,10 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
                 * Don't schedule slices shorter than 10000ns, that just
                 * doesn't make sense. Rely on vruntime for fairness.
                 */
-                if (!requeue)
+                if (rq->curr != p)
                        delta = max(10000LL, delta);
-                hrtick_start(rq, delta, requeue);
+                hrtick_start(rq, delta);
        }
 }
 #else /* !CONFIG_SCHED_HRTICK */
@@ -1004,6 +1003,8 @@ static void yield_task_fair(struct rq *rq)
 * not idle and an idle cpu is available.  The span of cpus to
 * search starts with cpus closest then further out as needed,
 * so we always favor a closer, idle cpu.
+ * Domains may include CPUs that are not usable for migration,
+ * hence we need to mask them out (cpu_active_map)
 *
 * Returns the CPU we should wake onto.
 */
@@ -1031,7 +1032,8 @@ static int wake_idle(int cpu, struct task_struct *p)
                    || ((sd->flags & SD_WAKE_IDLE_FAR)
                        && !task_hot(p, task_rq(p)->clock, sd))) {
                        cpus_and(tmp, sd->span, p->cpus_allowed);
-                        for_each_cpu_mask(i, tmp) {
+                        cpus_and(tmp, tmp, cpu_active_map);
+                        for_each_cpu_mask_nr(i, tmp) {
                                if (idle_cpu(i)) {
                                        if (i != task_cpu(p)) {
                                                schedstat_inc(p,
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 47ceac9e855..f85a76363ee 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -240,7 +240,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
        spin_lock(&rt_b->rt_runtime_lock);
        rt_period = ktime_to_ns(rt_b->rt_period);
-        for_each_cpu_mask(i, rd->span) {
+        for_each_cpu_mask_nr(i, rd->span) {
                struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
                s64 diff;
@@ -505,7 +505,9 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
        rt_rq->rt_nr_running++;
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
        if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
+#ifdef CONFIG_SMP
                struct rq *rq = rq_of_rt_rq(rt_rq);
+#endif
                rt_rq->highest_prio = rt_se_prio(rt_se);
 #ifdef CONFIG_SMP
@@ -599,11 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
        if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
                return;
-        if (rt_se->nr_cpus_allowed == 1)
+        list_add_tail(&rt_se->run_list, queue);
-                list_add(&rt_se->run_list, queue);
-        else
-                list_add_tail(&rt_se->run_list, queue);
        __set_bit(rt_se_prio(rt_se), array->bitmap);
        inc_rt_tasks(rt_se, rt_rq);
@@ -688,32 +686,34 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
 * Put task to the end of the run list without the overhead of dequeue
 * followed by enqueue.
 */
-static
+static void
-void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
+requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
 {
-        struct rt_prio_array *array = &rt_rq->active;
        if (on_rt_rq(rt_se)) {
-                list_del_init(&rt_se->run_list);
+                struct rt_prio_array *array = &rt_rq->active;
-                list_add_tail(&rt_se->run_list,
+                struct list_head *queue = array->queue + rt_se_prio(rt_se);
-                              array->queue + rt_se_prio(rt_se));
+                if (head)
+                        list_move(&rt_se->run_list, queue);
+                else
+                        list_move_tail(&rt_se->run_list, queue);
        }
 }
-static void requeue_task_rt(struct rq *rq, struct task_struct *p)
+static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
 {
        struct sched_rt_entity *rt_se = &p->rt;
        struct rt_rq *rt_rq;
        for_each_sched_rt_entity(rt_se) {
                rt_rq = rt_rq_of_se(rt_se);
-                requeue_rt_entity(rt_rq, rt_se);
+                requeue_rt_entity(rt_rq, rt_se, head);
        }
 }
 static void yield_task_rt(struct rq *rq)
 {
-        requeue_task_rt(rq, rq->curr);
+        requeue_task_rt(rq, rq->curr, 0);
 }
 #ifdef CONFIG_SMP
@@ -753,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
         */
        return task_cpu(p);
 }
+static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
+{
+        cpumask_t mask;
+        if (rq->curr->rt.nr_cpus_allowed == 1)
+                return;
+        if (p->rt.nr_cpus_allowed != 1
+            && cpupri_find(&rq->rd->cpupri, p, &mask))
+                return;
+        if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
+                return;
+        /*
+         * There appears to be other cpus that can accept
+         * current and none to run 'p', so lets reschedule
+         * to try and push current away:
+         */
+        requeue_task_rt(rq, p, 1);
+        resched_task(rq->curr);
+}
 #endif /* CONFIG_SMP */
 /*
@@ -778,18 +802,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
         * to move current somewhere else, making room for our non-migratable
         * task.
         */
-        if((p->prio == rq->curr->prio)
+        if (p->prio == rq->curr->prio && !need_resched())
-           && p->rt.nr_cpus_allowed == 1
+                check_preempt_equal_prio(rq, p);
-           && rq->curr->rt.nr_cpus_allowed != 1) {
-                cpumask_t mask;
-                if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
-                        /*
-                         * There appears to be other cpus that can accept
-                         * current, so lets reschedule to try and push it away
-                         */
-                        resched_task(rq->curr);
-        }
 #endif
 }
@@ -922,6 +936,13 @@ static int find_lowest_rq(struct task_struct *task)
                return -1; /* No targets found */
        /*
+         * Only consider CPUs that are usable for migration.
+         * I guess we might want to change cpupri_find() to ignore those
+         * in the first place.
+         */
+        cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
+        /*
         * At this point we have built a mask of cpus representing the
         * lowest priority tasks in the system.  Now we want to elect
         * the best one based on our affinity and topology.
@@ -1107,7 +1128,7 @@ static int pull_rt_task(struct rq *this_rq)
        next = pick_next_task_rt(this_rq);
-        for_each_cpu_mask(cpu, this_rq->rd->rto_mask) {
+        for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
                if (this_cpu == cpu)
                        continue;
@@ -1415,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
         * on the queue:
         */
        if (p->rt.run_list.prev != p->rt.run_list.next) {
-                requeue_task_rt(rq, p);
+                requeue_task_rt(rq, p, 0);
                set_tsk_need_resched(p);
        }
 }
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index a272d78185e..7bd8d1aadd5 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
+#include <linux/lockdep.h>
 #include <linux/notifier.h>
 #include <linux/module.h>
@@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
 static int __read_mostly did_panic;
-unsigned long __read_mostly softlockup_thresh = 60;
+int __read_mostly softlockup_thresh = 60;
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * soft-lockup occurs:
+ */
+unsigned int __read_mostly softlockup_panic =
+                                CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+static int __init softlockup_panic_setup(char *str)
+{
+        softlockup_panic = simple_strtoul(str, NULL, 0);
+        return 1;
+}
+__setup("softlockup_panic=", softlockup_panic_setup);
 static int
 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -84,6 +100,14 @@ void softlockup_tick(void)
        struct pt_regs *regs = get_irq_regs();
        unsigned long now;
+        /* Is detection switched off? */
+        if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
+                /* Be sure we don't false trigger if switched back on */
+                if (touch_timestamp)
+                        per_cpu(touch_timestamp, this_cpu) = 0;
+                return;
+        }
        if (touch_timestamp == 0) {
                __touch_softlockup_watchdog();
                return;
@@ -92,11 +116,8 @@ void softlockup_tick(void)
        print_timestamp = per_cpu(print_timestamp, this_cpu);
        /* report at most once a second */
-        if ((print_timestamp >= touch_timestamp &&
+        if (print_timestamp == touch_timestamp || did_panic)
-                        print_timestamp < (touch_timestamp + 1)) ||
-                        did_panic || !per_cpu(watchdog_task, this_cpu)) {
                return;
-        }
        /* do not print during early bootup: */
        if (unlikely(system_state != SYSTEM_RUNNING)) {
@@ -106,8 +127,11 @@ void softlockup_tick(void)
        now = get_timestamp(this_cpu);
-        /* Wake up the high-prio watchdog task every second: */
+        /*
-        if (now > (touch_timestamp + 1))
+         * Wake up the high-prio watchdog task twice per
+         * threshold timespan.
+         */
+        if (now > touch_timestamp + softlockup_thresh/2)
                wake_up_process(per_cpu(watchdog_task, this_cpu));
        /* Warn about unreasonable delays: */
@@ -121,11 +145,15 @@ void softlockup_tick(void)
                        this_cpu, now - touch_timestamp,
                        current->comm, task_pid_nr(current));
        print_modules();
+        print_irqtrace_events(current);
        if (regs)
                show_regs(regs);
        else
                dump_stack();
        spin_unlock(&print_lock);
+        if (softlockup_panic)
+                panic("softlockup: hung tasks");
 }
 /*
@@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
        t->last_switch_timestamp = now;
        touch_nmi_watchdog();
+        if (softlockup_panic)
+                panic("softlockup: blocked tasks");
 }
 /*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ba9b2054ecb..738b411ff2d 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -33,8 +33,9 @@ static int stopmachine(void *cpu)
 {
        int irqs_disabled = 0;
        int prepared = 0;
+        cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
-        set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
+        set_cpus_allowed_ptr(current, cpumask);
        /* Ack: we are alive */
        smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b859e6b5a76..2a7b9d88706 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -88,12 +88,13 @@ extern int rcutorture_runnable;
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 /* Constants used for minimum and  maximum */
-#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
 static int one = 1;
 #endif
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 static int sixty = 60;
+static int neg_one = -1;
 #endif
 #ifdef CONFIG_MMU
@@ -739,13 +740,24 @@ static struct ctl_table kern_table[] = {
 #ifdef CONFIG_DETECT_SOFTLOCKUP
        {
                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "softlockup_panic",
+                .data           = &softlockup_panic,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec_minmax,
+                .strategy       = &sysctl_intvec,
+                .extra1         = &zero,
+                .extra2         = &one,
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
                .procname       = "softlockup_thresh",
                .data           = &softlockup_thresh,
-                .maxlen         = sizeof(unsigned long),
+                .maxlen         = sizeof(int),
                .mode           = 0644,
-                .proc_handler   = &proc_doulongvec_minmax,
+                .proc_handler   = &proc_dointvec_minmax,
                .strategy       = &sysctl_intvec,
-                .extra1         = &one,
+                .extra1         = &neg_one,
                .extra2         = &sixty,
        },
        {
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4a23517169a..06b17547f4e 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -301,7 +301,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
                return -EINVAL;
        if (isadd == REGISTER) {
-                for_each_cpu_mask(cpu, mask) {
+                for_each_cpu_mask_nr(cpu, mask) {
                        s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
                                         cpu_to_node(cpu));
                        if (!s)
@@ -320,7 +320,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
        /* Deregister or cleanup */
 cleanup:
-        for_each_cpu_mask(cpu, mask) {
+        for_each_cpu_mask_nr(cpu, mask) {
                listeners = &per_cpu(listener_array, cpu);
                down_write(&listeners->sem);
                list_for_each_entry_safe(s, tmp, &listeners->list, list) {
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b1c2da81b05..093d4acf993 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -145,9 +145,9 @@ static void clocksource_watchdog(unsigned long data)
                 * Cycle through CPUs to check if the CPUs stay
                 * synchronized to each other.
                 */
-                int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map);
+                int next_cpu = next_cpu_nr(raw_smp_processor_id(), cpu_online_map);
-                if (next_cpu >= NR_CPUS)
+                if (next_cpu >= nr_cpu_ids)
                        next_cpu = first_cpu(cpu_online_map);
                watchdog_timer.expires += WATCHDOG_INTERVAL;
                add_timer_on(&watchdog_timer, next_cpu);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f48d0f09d32..31463d370b9 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -399,8 +399,7 @@ again:
        mask = CPU_MASK_NONE;
        now = ktime_get();
        /* Find all expired events */
-        for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
+        for_each_cpu_mask_nr(cpu, tick_broadcast_oneshot_mask) {
-             cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
                td = &per_cpu(tick_cpu_device, cpu);
                if (td->evtdev->next_event.tv64 <= now.tv64)
                        cpu_set(cpu, mask);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 4f3886562b8..bf43284d685 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -135,7 +135,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 */
 static void tick_setup_device(struct tick_device *td,
                              struct clock_event_device *newdev, int cpu,
-                              cpumask_t cpumask)
+                              const cpumask_t *cpumask)
 {
        ktime_t next_event;
        void (*handler)(struct clock_event_device *) = NULL;
@@ -169,8 +169,8 @@ static void tick_setup_device(struct tick_device *td,
         * When the device is not per cpu, pin the interrupt to the
         * current cpu:
         */
-        if (!cpus_equal(newdev->cpumask, cpumask))
+        if (!cpus_equal(newdev->cpumask, *cpumask))
-                irq_set_affinity(newdev->irq, cpumask);
+                irq_set_affinity(newdev->irq, *cpumask);
        /*
         * When global broadcasting is active, check if the current
@@ -196,20 +196,20 @@ static int tick_check_new_device(struct clock_event_device *newdev)
        struct tick_device *td;
        int cpu, ret = NOTIFY_OK;
        unsigned long flags;
-        cpumask_t cpumask;
+        cpumask_of_cpu_ptr_declare(cpumask);
        spin_lock_irqsave(&tick_device_lock, flags);
        cpu = smp_processor_id();
+        cpumask_of_cpu_ptr_next(cpumask, cpu);
        if (!cpu_isset(cpu, newdev->cpumask))
                goto out_bc;
        td = &per_cpu(tick_cpu_device, cpu);
        curdev = td->evtdev;
-        cpumask = cpumask_of_cpu(cpu);
        /* cpu local device ? */
-        if (!cpus_equal(newdev->cpumask, cpumask)) {
+        if (!cpus_equal(newdev->cpumask, *cpumask)) {
                /*
                 * If the cpu affinity of the device interrupt can not
@@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
                 * If we have a cpu local device already, do not replace it
                 * by a non cpu local device
                 */
-                if (curdev && cpus_equal(curdev->cpumask, cpumask))
+                if (curdev && cpus_equal(curdev->cpumask, *cpumask))
                        goto out_bc;
        }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index beef7ccdf84..942fc7c8528 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void)
        if (!ts->tick_stopped)
                return;
-        touch_softlockup_watchdog();
        cpu_clear(cpu, nohz_cpu_mask);
        now = ktime_get();
        ts->idle_waketime = now;
@@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void)
        local_irq_save(flags);
        tick_do_update_jiffies64(now);
        local_irq_restore(flags);
+        touch_softlockup_watchdog();
 }
 void tick_nohz_stop_idle(int cpu)
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 2301e1e7c60..63528086337 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -213,7 +213,9 @@ static void start_stack_timers(void)
        int cpu;
        for_each_online_cpu(cpu) {
-                set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+                cpumask_of_cpu_ptr(new_mask, cpu);
+                set_cpus_allowed_ptr(current, new_mask);
                start_stack_timer(cpu);
        }
        set_cpus_allowed_ptr(current, &saved_mask);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ce7799540c9..a6d36346d10 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -397,7 +397,7 @@ void flush_workqueue(struct workqueue_struct *wq)
        might_sleep();
        lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
        lock_release(&wq->lockdep_map, 1, _THIS_IP_);
-        for_each_cpu_mask(cpu, *cpu_map)
+        for_each_cpu_mask_nr(cpu, *cpu_map)
                flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
@@ -477,7 +477,7 @@ static void wait_on_work(struct work_struct *work)
        wq = cwq->wq;
        cpu_map = wq_cpu_map(wq);
-        for_each_cpu_mask(cpu, *cpu_map)
+        for_each_cpu_mask_nr(cpu, *cpu_map)
                wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
 }
@@ -813,7 +813,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
        list_del(&wq->list);
        spin_unlock(&workqueue_lock);
-        for_each_cpu_mask(cpu, *cpu_map)
+        for_each_cpu_mask_nr(cpu, *cpu_map)
                cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
        put_online_cpus();