82 files changed, 2446 insertions, 1932 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 9a4715a2f6bf..a6605ca921b6 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -536,7 +536,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
        do_div(elapsed, AHZ);
        ac.ac_btime = get_seconds() - elapsed;
        /* we really need to bite the bullet and change layout */
-        current_uid_gid(&ac.ac_uid, &ac.ac_gid);
+        ac.ac_uid = orig_cred->uid;
+        ac.ac_gid = orig_cred->gid;
 #if ACCT_VERSION==2
        ac.ac_ahz = AHZ;
 #endif
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2451dc6f3282..4b05bd9479db 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,7 +277,7 @@ static void untag_chunk(struct node *p)
                owner->root = NULL;
        }
-        for (i = j = 0; i < size; i++, j++) {
+        for (i = j = 0; j <= size; i++, j++) {
                struct audit_tree *s;
                if (&chunk->owners[j] == p) {
                        list_del_init(&p->list);
@@ -290,7 +290,7 @@ static void untag_chunk(struct node *p)
                if (!s) /* result of earlier fallback */
                        continue;
                get_tree(s);
-                list_replace_init(&chunk->owners[i].list, &new->owners[j].list);
+                list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
        }
        list_replace_rcu(&chunk->hash, &new->hash);
@@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
        for (n = 0; n < old->count; n++) {
                if (old->owners[n].owner == tree) {
                        spin_unlock(&hash_lock);
-                        put_inotify_watch(watch);
+                        put_inotify_watch(&old->watch);
                        return 0;
                }
        }
        spin_unlock(&hash_lock);
        chunk = alloc_chunk(old->count + 1);
-        if (!chunk)
+        if (!chunk) {
+                put_inotify_watch(&old->watch);
                return -ENOMEM;
+        }
        mutex_lock(&inode->inotify_mutex);
        if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
@@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
        spin_unlock(&hash_lock);
        inotify_evict_watch(&old->watch);
        mutex_unlock(&inode->inotify_mutex);
-        put_inotify_watch(&old->watch);
+        put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
+        put_inotify_watch(&old->watch); /* and kill it */
        return 0;
 }
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 3c5301381837..98a51f26c136 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -12,7 +12,7 @@
 void foo(void)
 {
-        /* The enum constants to put into include/linux/bounds.h */
+        /* The enum constants to put into include/generated/bounds.h */
        DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
        DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
        /* End of constants */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 7c4e2713df0a..1c8ddd6ee940 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -209,9 +209,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
                return -ENOMEM;
        cpu_hotplug_begin();
+        set_cpu_active(cpu, false);
        err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
                                        hcpu, -1, &nr_calls);
        if (err == NOTIFY_BAD) {
+                set_cpu_active(cpu, true);
                nr_calls--;
                __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
                                          hcpu, nr_calls, NULL);
@@ -223,11 +226,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        /* Ensure that we are not runnable on dying cpu */
        cpumask_copy(old_allowed, &current->cpus_allowed);
-        set_cpus_allowed_ptr(current,
+        set_cpus_allowed_ptr(current, cpu_active_mask);
-                             cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
        err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
        if (err) {
+                set_cpu_active(cpu, true);
                /* CPU didn't die: tell everyone.  Can't complain. */
                if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
                                            hcpu) == NOTIFY_BAD)
@@ -278,23 +281,8 @@ int __ref cpu_down(unsigned int cpu)
                goto out;
        }
-        set_cpu_active(cpu, false);
-        /*
-         * Make sure the all cpus did the reschedule and are not
-         * using stale version of the cpu_active_mask.
-         * This is not strictly necessary becuase stop_machine()
-         * that we run down the line already provides the required
-         * synchronization. But it's really a side effect and we do not
-         * want to depend on the innards of the stop_machine here.
-         */
-        synchronize_sched();
        err = _cpu_down(cpu, 0);
-        if (cpu_online(cpu))
-                set_cpu_active(cpu, true);
 out:
        cpu_maps_update_done();
        stop_machine_destroy();
@@ -383,10 +371,12 @@ int disable_nonboot_cpus(void)
                return error;
        cpu_maps_update_begin();
        first_cpu = cpumask_first(cpu_online_mask);
-        /* We take down all of the non-boot CPUs in one shot to avoid races
+        /*
+         * We take down all of the non-boot CPUs in one shot to avoid races
         * with the userspace trying to use the CPU hotplug at the same time
         */
        cpumask_clear(frozen_cpus);
        printk("Disabling non-boot CPUs ...\n");
        for_each_online_cpu(cpu) {
                if (cpu == first_cpu)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3cf2183b472d..ba401fab459f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -737,7 +737,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused)
 {
 }
-static int generate_sched_domains(struct cpumask **domains,
+static int generate_sched_domains(cpumask_var_t **domains,
                        struct sched_domain_attr **attributes)
 {
        *domains = NULL;
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
                if (retval < 0)
                        return retval;
-                if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
+                if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
                        return -EINVAL;
        }
        retval = validate_change(cs, trialcs);
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
                }
                /* Continue past cpusets with all cpus, mems online */
-                if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
+                if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
                    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
                        continue;
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
                /* Remove offline cpus and mems from this cpuset. */
                mutex_lock(&callback_mutex);
                cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
-                            cpu_online_mask);
+                            cpu_active_mask);
                nodes_and(cp->mems_allowed, cp->mems_allowed,
                                                node_states[N_HIGH_MEMORY]);
                mutex_unlock(&callback_mutex);
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
        switch (phase) {
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
-        case CPU_DEAD:
+        case CPU_DOWN_PREPARE:
-        case CPU_DEAD_FROZEN:
+        case CPU_DOWN_PREPARE_FROZEN:
+        case CPU_DOWN_FAILED:
+        case CPU_DOWN_FAILED_FROZEN:
                break;
        default:
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
        cgroup_lock();
        mutex_lock(&callback_mutex);
-        cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
+        cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
        mutex_unlock(&callback_mutex);
        scan_for_empty_cpusets(&top_cpuset);
        ndoms = generate_sched_domains(&doms, &attr);
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
 void __init cpuset_init_smp(void)
 {
-        cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
+        cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
        top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
        hotcpu_notifier(cpuset_track_online_cpus, 0);
diff --git a/kernel/exit.c b/kernel/exit.c
index 1143012951e9..546774a31a66 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -68,10 +68,10 @@ static void __unhash_process(struct task_struct *p)
                detach_pid(p, PIDTYPE_SID);
                list_del_rcu(&p->tasks);
+                list_del_init(&p->sibling);
                __get_cpu_var(process_counts)--;
        }
        list_del_rcu(&p->thread_group);
-        list_del_init(&p->sibling);
 }
 /*
@@ -736,12 +736,9 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
 /*
 * Any that need to be release_task'd are put on the @dead list.
 */
-static void reparent_thread(struct task_struct *father, struct task_struct *p,
+static void reparent_leader(struct task_struct *father, struct task_struct *p,
                                struct list_head *dead)
 {
-        if (p->pdeath_signal)
-                group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
        list_move_tail(&p->sibling, &p->real_parent->children);
        if (task_detached(p))
@@ -780,12 +777,18 @@ static void forget_original_parent(struct task_struct *father)
        reaper = find_new_reaper(father);
        list_for_each_entry_safe(p, n, &father->children, sibling) {
-                p->real_parent = reaper;
+                struct task_struct *t = p;
-                if (p->parent == father) {
+                do {
-                        BUG_ON(task_ptrace(p));
+                        t->real_parent = reaper;
-                        p->parent = p->real_parent;
+                        if (t->parent == father) {
-                }
+                                BUG_ON(task_ptrace(t));
-                reparent_thread(father, p, &dead_children);
+                                t->parent = t->real_parent;
+                        }
+                        if (t->pdeath_signal)
+                                group_send_sig_info(t->pdeath_signal,
+                                                    SEND_SIG_NOINFO, t);
+                } while_each_thread(p, t);
+                reparent_leader(father, p, &dead_children);
        }
        write_unlock_irq(&tasklist_lock);
@@ -933,7 +936,7 @@ NORET_TYPE void do_exit(long code)
         * an exiting task cleaning up the robust pi futexes.
         */
        smp_mb();
-        spin_unlock_wait(&tsk->pi_lock);
+        raw_spin_unlock_wait(&tsk->pi_lock);
        if (unlikely(in_atomic()))
                printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -971,7 +974,7 @@ NORET_TYPE void do_exit(long code)
        exit_thread();
        cgroup_exit(tsk, 1);
-        if (group_dead && tsk->signal->leader)
+        if (group_dead)
                disassociate_ctty(1);
        module_put(task_thread_info(tsk)->exec_domain->module);
@@ -1551,14 +1554,9 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
        struct task_struct *p;
        list_for_each_entry(p, &tsk->children, sibling) {
-                /*
+                int ret = wait_consider_task(wo, 0, p);
-                 * Do not consider detached threads.
+                if (ret)
-                 */
+                        return ret;
-                if (!task_detached(p)) {
-                        int ret = wait_consider_task(wo, 0, p);
-                        if (ret)
-                                return ret;
-                }
        }
        return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1415dc4598ae..5b2959b3ffc2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -939,9 +939,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 static void rt_mutex_init_task(struct task_struct *p)
 {
-        spin_lock_init(&p->pi_lock);
+        raw_spin_lock_init(&p->pi_lock);
 #ifdef CONFIG_RT_MUTEXES
-        plist_head_init(&p->pi_waiters, &p->pi_lock);
+        plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
        p->pi_blocked_on = NULL;
 #endif
 }
@@ -1127,6 +1127,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
 #endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+        p->memcg_batch.do_batch = 0;
+        p->memcg_batch.memcg = NULL;
+#endif
        p->bts = NULL;
@@ -1206,9 +1210,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                p->sas_ss_sp = p->sas_ss_size = 0;
        /*
-         * Syscall tracing should be turned off in the child regardless
+         * Syscall tracing and stepping should be turned off in the
-         * of CLONE_PTRACE.
+         * child regardless of CLONE_PTRACE.
         */
+        user_disable_single_step(p);
        clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
 #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
@@ -1286,7 +1291,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        }
        if (likely(p->pid)) {
-                list_add_tail(&p->sibling, &p->real_parent->children);
                tracehook_finish_clone(p, clone_flags, trace);
                if (thread_group_leader(p)) {
@@ -1298,6 +1302,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                        p->signal->tty = tty_kref_get(current->signal->tty);
                        attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
                        attach_pid(p, PIDTYPE_SID, task_session(current));
+                        list_add_tail(&p->sibling, &p->real_parent->children);
                        list_add_tail_rcu(&p->tasks, &init_task.tasks);
                        __get_cpu_var(process_counts)++;
                }
diff --git a/kernel/futex.c b/kernel/futex.c
index fb65e822fc41..8e3c3ffe1b9a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -304,8 +304,14 @@ void put_futex_key(int fshared, union futex_key *key)
 */
 static int fault_in_user_writeable(u32 __user *uaddr)
 {
-        int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
+        struct mm_struct *mm = current->mm;
-                                 1, 1, 0, NULL, NULL);
+        int ret;
+        down_read(&mm->mmap_sem);
+        ret = get_user_pages(current, mm, (unsigned long)uaddr,
+                             1, 1, 0, NULL, NULL);
+        up_read(&mm->mmap_sem);
        return ret < 0 ? ret : 0;
 }
@@ -397,9 +403,9 @@ static void free_pi_state(struct futex_pi_state *pi_state)
         * and has cleaned up the pi_state already
         */
        if (pi_state->owner) {
-                spin_lock_irq(&pi_state->owner->pi_lock);
+                raw_spin_lock_irq(&pi_state->owner->pi_lock);
                list_del_init(&pi_state->list);
-                spin_unlock_irq(&pi_state->owner->pi_lock);
+                raw_spin_unlock_irq(&pi_state->owner->pi_lock);
                rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
        }
@@ -464,18 +470,18 @@ void exit_pi_state_list(struct task_struct *curr)
         * pi_state_list anymore, but we have to be careful
         * versus waiters unqueueing themselves:
         */
-        spin_lock_irq(&curr->pi_lock);
+        raw_spin_lock_irq(&curr->pi_lock);
        while (!list_empty(head)) {
                next = head->next;
                pi_state = list_entry(next, struct futex_pi_state, list);
                key = pi_state->key;
                hb = hash_futex(&key);
-                spin_unlock_irq(&curr->pi_lock);
+                raw_spin_unlock_irq(&curr->pi_lock);
                spin_lock(&hb->lock);
-                spin_lock_irq(&curr->pi_lock);
+                raw_spin_lock_irq(&curr->pi_lock);
                /*
                 * We dropped the pi-lock, so re-check whether this
                 * task still owns the PI-state:
@@ -489,15 +495,15 @@ void exit_pi_state_list(struct task_struct *curr)
                WARN_ON(list_empty(&pi_state->list));
                list_del_init(&pi_state->list);
                pi_state->owner = NULL;
-                spin_unlock_irq(&curr->pi_lock);
+                raw_spin_unlock_irq(&curr->pi_lock);
                rt_mutex_unlock(&pi_state->pi_mutex);
                spin_unlock(&hb->lock);
-                spin_lock_irq(&curr->pi_lock);
+                raw_spin_lock_irq(&curr->pi_lock);
        }
-        spin_unlock_irq(&curr->pi_lock);
+        raw_spin_unlock_irq(&curr->pi_lock);
 }
 static int
@@ -552,7 +558,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
         * change of the task flags, we do this protected by
         * p->pi_lock:
         */
-        spin_lock_irq(&p->pi_lock);
+        raw_spin_lock_irq(&p->pi_lock);
        if (unlikely(p->flags & PF_EXITING)) {
                /*
                 * The task is on the way out. When PF_EXITPIDONE is
@@ -561,7 +567,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
                 */
                int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
-                spin_unlock_irq(&p->pi_lock);
+                raw_spin_unlock_irq(&p->pi_lock);
                put_task_struct(p);
                return ret;
        }
@@ -580,7 +586,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
        WARN_ON(!list_empty(&pi_state->list));
        list_add(&pi_state->list, &p->pi_state_list);
        pi_state->owner = p;
-        spin_unlock_irq(&p->pi_lock);
+        raw_spin_unlock_irq(&p->pi_lock);
        put_task_struct(p);
@@ -754,7 +760,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
        if (!pi_state)
                return -EINVAL;
-        spin_lock(&pi_state->pi_mutex.wait_lock);
+        raw_spin_lock(&pi_state->pi_mutex.wait_lock);
        new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
        /*
@@ -783,23 +789,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
                else if (curval != uval)
                        ret = -EINVAL;
                if (ret) {
-                        spin_unlock(&pi_state->pi_mutex.wait_lock);
+                        raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
                        return ret;
                }
        }
-        spin_lock_irq(&pi_state->owner->pi_lock);
+        raw_spin_lock_irq(&pi_state->owner->pi_lock);
        WARN_ON(list_empty(&pi_state->list));
        list_del_init(&pi_state->list);
-        spin_unlock_irq(&pi_state->owner->pi_lock);
+        raw_spin_unlock_irq(&pi_state->owner->pi_lock);
-        spin_lock_irq(&new_owner->pi_lock);
+        raw_spin_lock_irq(&new_owner->pi_lock);
        WARN_ON(!list_empty(&pi_state->list));
        list_add(&pi_state->list, &new_owner->pi_state_list);
        pi_state->owner = new_owner;
-        spin_unlock_irq(&new_owner->pi_lock);
+        raw_spin_unlock_irq(&new_owner->pi_lock);
-        spin_unlock(&pi_state->pi_mutex.wait_lock);
+        raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
        rt_mutex_unlock(&pi_state->pi_mutex);
        return 0;
@@ -1004,7 +1010,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
                plist_add(&q->list, &hb2->chain);
                q->lock_ptr = &hb2->lock;
 #ifdef CONFIG_DEBUG_PI_LIST
-                q->list.plist.lock = &hb2->lock;
+                q->list.plist.spinlock = &hb2->lock;
 #endif
        }
        get_futex_key_refs(key2);
@@ -1040,7 +1046,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
        q->lock_ptr = &hb->lock;
 #ifdef CONFIG_DEBUG_PI_LIST
-        q->list.plist.lock = &hb->lock;
+        q->list.plist.spinlock = &hb->lock;
 #endif
        wake_up_state(q->task, TASK_NORMAL);
@@ -1388,7 +1394,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
        plist_node_init(&q->list, prio);
 #ifdef CONFIG_DEBUG_PI_LIST
-        q->list.plist.lock = &hb->lock;
+        q->list.plist.spinlock = &hb->lock;
 #endif
        plist_add(&q->list, &hb->chain);
        q->task = current;
@@ -1523,18 +1529,18 @@ retry:
         * itself.
         */
        if (pi_state->owner != NULL) {
-                spin_lock_irq(&pi_state->owner->pi_lock);
+                raw_spin_lock_irq(&pi_state->owner->pi_lock);
                WARN_ON(list_empty(&pi_state->list));
                list_del_init(&pi_state->list);
-                spin_unlock_irq(&pi_state->owner->pi_lock);
+                raw_spin_unlock_irq(&pi_state->owner->pi_lock);
        }
        pi_state->owner = newowner;
-        spin_lock_irq(&newowner->pi_lock);
+        raw_spin_lock_irq(&newowner->pi_lock);
        WARN_ON(!list_empty(&pi_state->list));
        list_add(&pi_state->list, &newowner->pi_state_list);
-        spin_unlock_irq(&newowner->pi_lock);
+        raw_spin_unlock_irq(&newowner->pi_lock);
        return 0;
        /*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ede527708123..0086628b6e97 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,11 +127,11 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
        for (;;) {
                base = timer->base;
                if (likely(base != NULL)) {
-                        spin_lock_irqsave(&base->cpu_base->lock, *flags);
+                        raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
                        if (likely(base == timer->base))
                                return base;
                        /* The timer has migrated to another CPU: */
-                        spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
+                        raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
                }
                cpu_relax();
        }
@@ -208,13 +208,13 @@ again:
                /* See the comment in lock_timer_base() */
                timer->base = NULL;
-                spin_unlock(&base->cpu_base->lock);
+                raw_spin_unlock(&base->cpu_base->lock);
-                spin_lock(&new_base->cpu_base->lock);
+                raw_spin_lock(&new_base->cpu_base->lock);
                if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
                        cpu = this_cpu;
-                        spin_unlock(&new_base->cpu_base->lock);
+                        raw_spin_unlock(&new_base->cpu_base->lock);
-                        spin_lock(&base->cpu_base->lock);
+                        raw_spin_lock(&base->cpu_base->lock);
                        timer->base = base;
                        goto again;
                }
@@ -230,7 +230,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 {
        struct hrtimer_clock_base *base = timer->base;
-        spin_lock_irqsave(&base->cpu_base->lock, *flags);
+        raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
        return base;
 }
@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
 static int hrtimer_reprogram(struct hrtimer *timer,
                             struct hrtimer_clock_base *base)
 {
-        ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
+        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
        ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
        int res;
@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer,
        if (expires.tv64 < 0)
                return -ETIME;
-        if (expires.tv64 >= expires_next->tv64)
+        if (expires.tv64 >= cpu_base->expires_next.tv64)
+                return 0;
+        /*
+         * If a hang was detected in the last timer interrupt then we
+         * do not schedule a timer which is earlier than the expiry
+         * which we enforced in the hang detection. We want the system
+         * to make progress.
+         */
+        if (cpu_base->hang_detected)
                return 0;
        /*
@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
         */
        res = tick_program_event(expires, 0);
        if (!IS_ERR_VALUE(res))
-                *expires_next = expires;
+                cpu_base->expires_next = expires;
        return res;
 }
@@ -619,12 +628,12 @@ static void retrigger_next_event(void *arg)
        base = &__get_cpu_var(hrtimer_bases);
        /* Adjust CLOCK_REALTIME offset */
-        spin_lock(&base->lock);
+        raw_spin_lock(&base->lock);
        base->clock_base[CLOCK_REALTIME].offset =
                timespec_to_ktime(realtime_offset);
        hrtimer_force_reprogram(base, 0);
-        spin_unlock(&base->lock);
+        raw_spin_unlock(&base->lock);
 }
 /*
@@ -685,9 +694,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 {
        if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
                if (wakeup) {
-                        spin_unlock(&base->cpu_base->lock);
+                        raw_spin_unlock(&base->cpu_base->lock);
                        raise_softirq_irqoff(HRTIMER_SOFTIRQ);
-                        spin_lock(&base->cpu_base->lock);
+                        raw_spin_lock(&base->cpu_base->lock);
                } else
                        __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
@@ -747,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
 #endif /* CONFIG_HIGH_RES_TIMERS */
-#ifdef CONFIG_TIMER_STATS
+static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
-void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
 {
+#ifdef CONFIG_TIMER_STATS
        if (timer->start_site)
                return;
+        timer->start_site = __builtin_return_address(0);
-        timer->start_site = addr;
        memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
        timer->start_pid = current->pid;
+#endif
 }
+static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+        timer->start_site = NULL;
+#endif
+}
+static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+        if (likely(!timer_stats_active))
+                return;
+        timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+                                 timer->function, timer->start_comm, 0);
 #endif
+}
 /*
 * Counterpart to lock_hrtimer_base above:
@@ -765,7 +790,7 @@ void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
 static inline
 void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 {
-        spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
+        raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
 }
 /**
@@ -1098,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void)
        unsigned long flags;
        int i;
-        spin_lock_irqsave(&cpu_base->lock, flags);
+        raw_spin_lock_irqsave(&cpu_base->lock, flags);
        if (!hrtimer_hres_active()) {
                for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
@@ -1115,7 +1140,7 @@ ktime_t hrtimer_get_next_event(void)
                }
        }
-        spin_unlock_irqrestore(&cpu_base->lock, flags);
+        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
        if (mindelta.tv64 < 0)
                mindelta.tv64 = 0;
@@ -1197,11 +1222,11 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
         * they get migrated to another cpu, therefore its safe to unlock
         * the timer base.
         */
-        spin_unlock(&cpu_base->lock);
+        raw_spin_unlock(&cpu_base->lock);
        trace_hrtimer_expire_entry(timer, now);
        restart = fn(timer);
        trace_hrtimer_expire_exit(timer);
-        spin_lock(&cpu_base->lock);
+        raw_spin_lock(&cpu_base->lock);
        /*
         * Note: We clear the CALLBACK bit after enqueue_hrtimer and
@@ -1217,30 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 #ifdef CONFIG_HIGH_RES_TIMERS
-static int force_clock_reprogram;
-/*
- * After 5 iteration's attempts, we consider that hrtimer_interrupt()
- * is hanging, which could happen with something that slows the interrupt
- * such as the tracing. Then we force the clock reprogramming for each future
- * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
- * threshold that we will overwrite.
- * The next tick event will be scheduled to 3 times we currently spend on
- * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
- * 1/4 of their time to process the hrtimer interrupts. This is enough to
- * let it running without serious starvation.
- */
-static inline void
-hrtimer_interrupt_hanging(struct clock_event_device *dev,
-                        ktime_t try_time)
-{
-        force_clock_reprogram = 1;
-        dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
-        printk(KERN_WARNING "hrtimer: interrupt too slow, "
-               "forcing clock min delta to %llu ns\n",
-               (unsigned long long) dev->min_delta_ns);
-}
 /*
 * High resolution timer interrupt
 * Called with interrupts disabled
@@ -1249,24 +1250,18 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 {
        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
        struct hrtimer_clock_base *base;
-        ktime_t expires_next, now;
+        ktime_t expires_next, now, entry_time, delta;
-        int nr_retries = 0;
+        int i, retries = 0;
-        int i;
        BUG_ON(!cpu_base->hres_active);
        cpu_base->nr_events++;
        dev->next_event.tv64 = KTIME_MAX;
- retry:
+        entry_time = now = ktime_get();
-        /* 5 retries is enough to notice a hang */
+retry:
-        if (!(++nr_retries % 5))
-                hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
-        now = ktime_get();
        expires_next.tv64 = KTIME_MAX;
-        spin_lock(&cpu_base->lock);
+        raw_spin_lock(&cpu_base->lock);
        /*
         * We set expires_next to KTIME_MAX here with cpu_base->lock
         * held to prevent that a timer is enqueued in our queue via
@@ -1322,13 +1317,51 @@ void hrtimer_interrupt(struct clock_event_device *dev)
         * against it.
         */
        cpu_base->expires_next = expires_next;
-        spin_unlock(&cpu_base->lock);
+        raw_spin_unlock(&cpu_base->lock);
        /* Reprogramming necessary ? */
-        if (expires_next.tv64 != KTIME_MAX) {
+        if (expires_next.tv64 == KTIME_MAX ||
-                if (tick_program_event(expires_next, force_clock_reprogram))
+            !tick_program_event(expires_next, 0)) {
-                        goto retry;
+                cpu_base->hang_detected = 0;
+                return;
        }
+        /*
+         * The next timer was already expired due to:
+         * - tracing
+         * - long lasting callbacks
+         * - being scheduled away when running in a VM
+         *
+         * We need to prevent that we loop forever in the hrtimer
+         * interrupt routine. We give it 3 attempts to avoid
+         * overreacting on some spurious event.
+         */
+        now = ktime_get();
+        cpu_base->nr_retries++;
+        if (++retries < 3)
+                goto retry;
+        /*
+         * Give the system a chance to do something else than looping
+         * here. We stored the entry time, so we know exactly how long
+         * we spent here. We schedule the next event this amount of
+         * time away.
+         */
+        cpu_base->nr_hangs++;
+        cpu_base->hang_detected = 1;
+        delta = ktime_sub(now, entry_time);
+        if (delta.tv64 > cpu_base->max_hang_time.tv64)
+                cpu_base->max_hang_time = delta;
+        /*
+         * Limit it to a sensible value as we enforce a longer
+         * delay. Give the CPU at least 100ms to catch up.
+         */
+        if (delta.tv64 > 100 * NSEC_PER_MSEC)
+                expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
+        else
+                expires_next = ktime_add(now, delta);
+        tick_program_event(expires_next, 1);
+        printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
+                    ktime_to_ns(delta));
 }
 /*
@@ -1424,7 +1457,7 @@ void hrtimer_run_queues(void)
                        gettime = 0;
                }
-                spin_lock(&cpu_base->lock);
+                raw_spin_lock(&cpu_base->lock);
                while ((node = base->first)) {
                        struct hrtimer *timer;
@@ -1436,7 +1469,7 @@ void hrtimer_run_queues(void)
                        __run_hrtimer(timer, &base->softirq_time);
                }
-                spin_unlock(&cpu_base->lock);
+                raw_spin_unlock(&cpu_base->lock);
        }
 }
@@ -1592,7 +1625,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
        struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
        int i;
-        spin_lock_init(&cpu_base->lock);
+        raw_spin_lock_init(&cpu_base->lock);
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
                cpu_base->clock_base[i].cpu_base = cpu_base;
@@ -1650,16 +1683,16 @@ static void migrate_hrtimers(int scpu)
         * The caller is globally serialized and nobody else
         * takes two locks at once, deadlock is not possible.
         */
-        spin_lock(&new_base->lock);
+        raw_spin_lock(&new_base->lock);
-        spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+        raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                migrate_hrtimer_list(&old_base->clock_base[i],
                                     &new_base->clock_base[i]);
        }
-        spin_unlock(&old_base->lock);
+        raw_spin_unlock(&old_base->lock);
-        spin_unlock(&new_base->lock);
+        raw_spin_unlock(&new_base->lock);
        /* Check, if we got expired work to do */
        __hrtimer_peek_ahead_timers();
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index cf5ee1628411..dbcbf6a33a08 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -52,7 +52,7 @@
 static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
 /* Number of pinned task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
+static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
 /* Number of non-pinned cpu/task breakpoints in a cpu */
 static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
@@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex);
 static unsigned int max_task_bp_pinned(int cpu)
 {
        int i;
-        unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
+        unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
        for (i = HBP_NUM -1; i >= 0; i--) {
                if (tsk_pinned[i] > 0)
@@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu)
        return 0;
 }
+static int task_bp_pinned(struct task_struct *tsk)
+{
+        struct perf_event_context *ctx = tsk->perf_event_ctxp;
+        struct list_head *list;
+        struct perf_event *bp;
+        unsigned long flags;
+        int count = 0;
+        if (WARN_ONCE(!ctx, "No perf context for this task"))
+                return 0;
+        list = &ctx->event_list;
+        raw_spin_lock_irqsave(&ctx->lock, flags);
+        /*
+         * The current breakpoint counter is not included in the list
+         * at the open() callback time
+         */
+        list_for_each_entry(bp, list, event_entry) {
+                if (bp->attr.type == PERF_TYPE_BREAKPOINT)
+                        count++;
+        }
+        raw_spin_unlock_irqrestore(&ctx->lock, flags);
+        return count;
+}
 /*
 * Report the number of pinned/un-pinned breakpoints we have in
 * a given cpu (cpu > -1) or in all of them (cpu = -1).
 */
-static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
+static void
+fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
 {
+        int cpu = bp->cpu;
+        struct task_struct *tsk = bp->ctx->task;
        if (cpu >= 0) {
                slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
-                slots->pinned += max_task_bp_pinned(cpu);
+                if (!tsk)
+                        slots->pinned += max_task_bp_pinned(cpu);
+                else
+                        slots->pinned += task_bp_pinned(tsk);
                slots->flexible = per_cpu(nr_bp_flexible, cpu);
                return;
@@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
                unsigned int nr;
                nr = per_cpu(nr_cpu_bp_pinned, cpu);
-                nr += max_task_bp_pinned(cpu);
+                if (!tsk)
+                        nr += max_task_bp_pinned(cpu);
+                else
+                        nr += task_bp_pinned(tsk);
                if (nr > slots->pinned)
                        slots->pinned = nr;
@@ -118,35 +157,12 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
 */
 static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
 {
-        int count = 0;
-        struct perf_event *bp;
-        struct perf_event_context *ctx = tsk->perf_event_ctxp;
        unsigned int *tsk_pinned;
-        struct list_head *list;
+        int count = 0;
-        unsigned long flags;
-        if (WARN_ONCE(!ctx, "No perf context for this task"))
-                return;
-        list = &ctx->event_list;
-        spin_lock_irqsave(&ctx->lock, flags);
-        /*
-         * The current breakpoint counter is not included in the list
-         * at the open() callback time
-         */
-        list_for_each_entry(bp, list, event_entry) {
-                if (bp->attr.type == PERF_TYPE_BREAKPOINT)
-                        count++;
-        }
-        spin_unlock_irqrestore(&ctx->lock, flags);
-        if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
+        count = task_bp_pinned(tsk);
-                return;
-        tsk_pinned = per_cpu(task_bp_pinned, cpu);
+        tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
        if (enable) {
                tsk_pinned[count]++;
                if (count > 0)
@@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
 *   - If attached to a single cpu, check:
 *
 *       (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
- *           + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
+ *           + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
 *
 *       -> If there are already non-pinned counters in this cpu, it means
 *          there is already a free slot for them.
@@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
 *   - If attached to every cpus, check:
 *
 *       (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
- *           + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
+ *           + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
 *
 *       -> This is roughly the same, except we check the number of per cpu
 *          bp for every cpu and we keep the max one. Same for the per tasks
@@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
 *   - If attached to a single cpu, check:
 *
 *       ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
- *            + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
+ *            + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
 *
 *       -> Same checks as before. But now the nr_bp_flexible, if any, must keep
 *          one register at least (or they will never be fed).
@@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
 *   - If attached to every cpus, check:
 *
 *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
- *            + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
+ *            + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
 */
 int reserve_bp_slot(struct perf_event *bp)
 {
@@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp)
        mutex_lock(&nr_bp_mutex);
-        fetch_bp_busy_slots(&slots, bp->cpu);
+        fetch_bp_busy_slots(&slots, bp);
        /* Flexible counters need to keep at least one slot */
        if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
@@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp)
 }
-int __register_perf_hw_breakpoint(struct perf_event *bp)
+int register_perf_hw_breakpoint(struct perf_event *bp)
 {
        int ret;
@@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp)
         * This is a quick hack that will be removed soon, once we remove
         * the tmp breakpoints from ptrace
         */
-        if (!bp->attr.disabled || bp->callback == perf_bp_event)
+        if (!bp->attr.disabled || !bp->overflow_handler)
                ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
        return ret;
 }
-int register_perf_hw_breakpoint(struct perf_event *bp)
-{
-        bp->callback = perf_bp_event;
-        return __register_perf_hw_breakpoint(bp);
-}
 /**
 * register_user_hw_breakpoint - register a hardware breakpoint for user space
 * @attr: breakpoint attributes
@@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
 */
 struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
-                            perf_callback_t triggered,
+                            perf_overflow_handler_t triggered,
                            struct task_struct *tsk)
 {
        return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
@@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 * @triggered: callback to trigger when we hit the breakpoint
 * @tsk: pointer to 'task_struct' of the process to which the address belongs
 */
-struct perf_event *
+int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
-modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
-                          perf_callback_t triggered,
-                          struct task_struct *tsk)
 {
-        /*
+        u64 old_addr = bp->attr.bp_addr;
-         * FIXME: do it without unregistering
+        int old_type = bp->attr.bp_type;
-         * - We don't want to lose our slot
+        int old_len = bp->attr.bp_len;
-         * - If the new bp is incorrect, don't lose the older one
+        int err = 0;
-         */
-        unregister_hw_breakpoint(bp);
-        return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
+        perf_event_disable(bp);
+        bp->attr.bp_addr = attr->bp_addr;
+        bp->attr.bp_type = attr->bp_type;
+        bp->attr.bp_len = attr->bp_len;
+        if (attr->disabled)
+                goto end;
+        err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+        if (!err)
+                perf_event_enable(bp);
+        if (err) {
+                bp->attr.bp_addr = old_addr;
+                bp->attr.bp_type = old_type;
+                bp->attr.bp_len = old_len;
+                if (!bp->attr.disabled)
+                        perf_event_enable(bp);
+                return err;
+        }
+end:
+        bp->attr.disabled = attr->disabled;
+        return 0;
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
@@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 */
 struct perf_event **
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-                            perf_callback_t triggered)
+                            perf_overflow_handler_t triggered)
 {
        struct perf_event **cpu_events, **pevent, *bp;
        long err;
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 1de9700f416e..2295a31ef110 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -45,7 +45,7 @@ unsigned long probe_irq_on(void)
         * flush such a longstanding irq before considering it as spurious.
         */
        for_each_irq_desc_reverse(i, desc) {
-                spin_lock_irq(&desc->lock);
+                raw_spin_lock_irq(&desc->lock);
                if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
                        /*
                         * An old-style architecture might still have
@@ -61,7 +61,7 @@ unsigned long probe_irq_on(void)
                                desc->chip->set_type(i, IRQ_TYPE_PROBE);
                        desc->chip->startup(i);
                }
-                spin_unlock_irq(&desc->lock);
+                raw_spin_unlock_irq(&desc->lock);
        }
        /* Wait for longstanding interrupts to trigger. */
@@ -73,13 +73,13 @@ unsigned long probe_irq_on(void)
         * happened in the previous stage, it may have masked itself)
         */
        for_each_irq_desc_reverse(i, desc) {
-                spin_lock_irq(&desc->lock);
+                raw_spin_lock_irq(&desc->lock);
                if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
                        desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
                        if (desc->chip->startup(i))
                                desc->status |= IRQ_PENDING;
                }
-                spin_unlock_irq(&desc->lock);
+                raw_spin_unlock_irq(&desc->lock);
        }
        /*
@@ -91,7 +91,7 @@ unsigned long probe_irq_on(void)
         * Now filter out any obviously spurious interrupts
         */
        for_each_irq_desc(i, desc) {
-                spin_lock_irq(&desc->lock);
+                raw_spin_lock_irq(&desc->lock);
                status = desc->status;
                if (status & IRQ_AUTODETECT) {
@@ -103,7 +103,7 @@ unsigned long probe_irq_on(void)
                                if (i < 32)
                                        mask |= 1 << i;
                }
-                spin_unlock_irq(&desc->lock);
+                raw_spin_unlock_irq(&desc->lock);
        }
        return mask;
@@ -129,7 +129,7 @@ unsigned int probe_irq_mask(unsigned long val)
        int i;
        for_each_irq_desc(i, desc) {
-                spin_lock_irq(&desc->lock);
+                raw_spin_lock_irq(&desc->lock);
                status = desc->status;
                if (status & IRQ_AUTODETECT) {
@@ -139,7 +139,7 @@ unsigned int probe_irq_mask(unsigned long val)
                        desc->status = status & ~IRQ_AUTODETECT;
                        desc->chip->shutdown(i);
                }
-                spin_unlock_irq(&desc->lock);
+                raw_spin_unlock_irq(&desc->lock);
        }
        mutex_unlock(&probing_active);
@@ -171,7 +171,7 @@ int probe_irq_off(unsigned long val)
        unsigned int status;
        for_each_irq_desc(i, desc) {
-                spin_lock_irq(&desc->lock);
+                raw_spin_lock_irq(&desc->lock);
                status = desc->status;
                if (status & IRQ_AUTODETECT) {
@@ -183,7 +183,7 @@ int probe_irq_off(unsigned long val)
                        desc->status = status & ~IRQ_AUTODETECT;
                        desc->chip->shutdown(i);
                }
-                spin_unlock_irq(&desc->lock);
+                raw_spin_unlock_irq(&desc->lock);
        }
        mutex_unlock(&probing_active);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ba566c261adc..ecc3fa28f666 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -34,7 +34,7 @@ void dynamic_irq_init(unsigned int irq)
        }
        /* Ensure we don't have left over values from a previous use of this irq */
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        desc->status = IRQ_DISABLED;
        desc->chip = &no_irq_chip;
        desc->handle_irq = handle_bad_irq;
@@ -51,7 +51,7 @@ void dynamic_irq_init(unsigned int irq)
        cpumask_clear(desc->pending_mask);
 #endif
 #endif
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 /**
@@ -68,9 +68,9 @@ void dynamic_irq_cleanup(unsigned int irq)
                return;
        }
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        if (desc->action) {
-                spin_unlock_irqrestore(&desc->lock, flags);
+                raw_spin_unlock_irqrestore(&desc->lock, flags);
                WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n",
                        irq);
                return;
@@ -82,7 +82,7 @@ void dynamic_irq_cleanup(unsigned int irq)
        desc->chip = &no_irq_chip;
        desc->name = NULL;
        clear_kstat_irqs(desc);
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
@@ -104,10 +104,10 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
        if (!chip)
                chip = &no_irq_chip;
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        irq_chip_set_defaults(chip);
        desc->chip = chip;
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return 0;
 }
@@ -133,9 +133,9 @@ int set_irq_type(unsigned int irq, unsigned int type)
        if (type == IRQ_TYPE_NONE)
                return 0;
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        ret = __irq_set_trigger(desc, irq, type);
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(set_irq_type);
@@ -158,9 +158,9 @@ int set_irq_data(unsigned int irq, void *data)
                return -EINVAL;
        }
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        desc->handler_data = data;
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return 0;
 }
 EXPORT_SYMBOL(set_irq_data);
@@ -183,11 +183,11 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
                return -EINVAL;
        }
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        desc->msi_desc = entry;
        if (entry)
                entry->irq = irq;
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return 0;
 }
@@ -214,9 +214,9 @@ int set_irq_chip_data(unsigned int irq, void *data)
                return -EINVAL;
        }
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        desc->chip_data = data;
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return 0;
 }
@@ -241,12 +241,12 @@ void set_irq_nested_thread(unsigned int irq, int nest)
        if (!desc)
                return;
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        if (nest)
                desc->status |= IRQ_NESTED_THREAD;
        else
                desc->status &= ~IRQ_NESTED_THREAD;
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 EXPORT_SYMBOL_GPL(set_irq_nested_thread);
@@ -343,7 +343,7 @@ void handle_nested_irq(unsigned int irq)
        might_sleep();
-        spin_lock_irq(&desc->lock);
+        raw_spin_lock_irq(&desc->lock);
        kstat_incr_irqs_this_cpu(irq, desc);
@@ -352,17 +352,17 @@ void handle_nested_irq(unsigned int irq)
                goto out_unlock;
        desc->status |= IRQ_INPROGRESS;
-        spin_unlock_irq(&desc->lock);
+        raw_spin_unlock_irq(&desc->lock);
        action_ret = action->thread_fn(action->irq, action->dev_id);
        if (!noirqdebug)
                note_interrupt(irq, desc, action_ret);
-        spin_lock_irq(&desc->lock);
+        raw_spin_lock_irq(&desc->lock);
        desc->status &= ~IRQ_INPROGRESS;
 out_unlock:
-        spin_unlock_irq(&desc->lock);
+        raw_spin_unlock_irq(&desc->lock);
 }
 EXPORT_SYMBOL_GPL(handle_nested_irq);
@@ -384,7 +384,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
        struct irqaction *action;
        irqreturn_t action_ret;
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        if (unlikely(desc->status & IRQ_INPROGRESS))
                goto out_unlock;
@@ -396,16 +396,16 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
                goto out_unlock;
        desc->status |= IRQ_INPROGRESS;
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
        action_ret = handle_IRQ_event(irq, action);
        if (!noirqdebug)
                note_interrupt(irq, desc, action_ret);
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        desc->status &= ~IRQ_INPROGRESS;
 out_unlock:
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
 }
 /**
@@ -424,7 +424,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
        struct irqaction *action;
        irqreturn_t action_ret;
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        mask_ack_irq(desc, irq);
        if (unlikely(desc->status & IRQ_INPROGRESS))
@@ -441,13 +441,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
                goto out_unlock;
        desc->status |= IRQ_INPROGRESS;
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
        action_ret = handle_IRQ_event(irq, action);
        if (!noirqdebug)
                note_interrupt(irq, desc, action_ret);
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        desc->status &= ~IRQ_INPROGRESS;
        if (unlikely(desc->status & IRQ_ONESHOT))
@@ -455,7 +455,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
        else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
                desc->chip->unmask(irq);
 out_unlock:
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
 }
 EXPORT_SYMBOL_GPL(handle_level_irq);
@@ -475,7 +475,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
        struct irqaction *action;
        irqreturn_t action_ret;
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        if (unlikely(desc->status & IRQ_INPROGRESS))
                goto out;
@@ -497,18 +497,18 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
        desc->status |= IRQ_INPROGRESS;
        desc->status &= ~IRQ_PENDING;
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
        action_ret = handle_IRQ_event(irq, action);
        if (!noirqdebug)
                note_interrupt(irq, desc, action_ret);
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        desc->status &= ~IRQ_INPROGRESS;
 out:
        desc->chip->eoi(irq);
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
 }
 /**
@@ -530,7 +530,7 @@ out:
 void
 handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 {
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
@@ -576,17 +576,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
                }
                desc->status &= ~IRQ_PENDING;
-                spin_unlock(&desc->lock);
+                raw_spin_unlock(&desc->lock);
                action_ret = handle_IRQ_event(irq, action);
                if (!noirqdebug)
                        note_interrupt(irq, desc, action_ret);
-                spin_lock(&desc->lock);
+                raw_spin_lock(&desc->lock);
        } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
        desc->status &= ~IRQ_INPROGRESS;
 out_unlock:
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
 }
 /**
@@ -643,7 +643,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
        }
        chip_bus_lock(irq, desc);
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        /* Uninstall? */
        if (handle == handle_bad_irq) {
@@ -661,7 +661,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
                desc->depth = 0;
                desc->chip->startup(irq);
        }
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL_GPL(__set_irq_handler);
@@ -692,9 +692,9 @@ void __init set_irq_noprobe(unsigned int irq)
                return;
        }
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        desc->status |= IRQ_NOPROBE;
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 void __init set_irq_probe(unsigned int irq)
@@ -707,7 +707,7 @@ void __init set_irq_probe(unsigned int irq)
                return;
        }
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        desc->status &= ~IRQ_NOPROBE;
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 17c71bb565c6..814940e7f485 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -80,7 +80,7 @@ static struct irq_desc irq_desc_init = {
        .chip       = &no_irq_chip,
        .handle_irq = handle_bad_irq,
        .depth      = 1,
-        .lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+        .lock       = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 };
 void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
@@ -108,7 +108,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 {
        memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
-        spin_lock_init(&desc->lock);
+        raw_spin_lock_init(&desc->lock);
        desc->irq = irq;
 #ifdef CONFIG_SMP
        desc->node = node;
@@ -130,7 +130,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 /*
 * Protect the sparse_irqs:
 */
-DEFINE_SPINLOCK(sparse_irq_lock);
+DEFINE_RAW_SPINLOCK(sparse_irq_lock);
 struct irq_desc **irq_desc_ptrs __read_mostly;
@@ -141,7 +141,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
                .chip       = &no_irq_chip,
                .handle_irq = handle_bad_irq,
                .depth      = 1,
-                .lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+                .lock       = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
        }
 };
@@ -212,7 +212,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
        if (desc)
                return desc;
-        spin_lock_irqsave(&sparse_irq_lock, flags);
+        raw_spin_lock_irqsave(&sparse_irq_lock, flags);
        /* We have to check it to avoid races with another CPU */
        desc = irq_desc_ptrs[irq];
@@ -234,7 +234,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
        irq_desc_ptrs[irq] = desc;
 out_unlock:
-        spin_unlock_irqrestore(&sparse_irq_lock, flags);
+        raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
        return desc;
 }
@@ -247,7 +247,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
                .chip = &no_irq_chip,
                .handle_irq = handle_bad_irq,
                .depth = 1,
-                .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+                .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
        }
 };
@@ -473,7 +473,7 @@ unsigned int __do_IRQ(unsigned int irq)
                return 1;
        }
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        if (desc->chip->ack)
                desc->chip->ack(irq);
        /*
@@ -517,13 +517,13 @@ unsigned int __do_IRQ(unsigned int irq)
        for (;;) {
                irqreturn_t action_ret;
-                spin_unlock(&desc->lock);
+                raw_spin_unlock(&desc->lock);
                action_ret = handle_IRQ_event(irq, action);
                if (!noirqdebug)
                        note_interrupt(irq, desc, action_ret);
-                spin_lock(&desc->lock);
+                raw_spin_lock(&desc->lock);
                if (likely(!(desc->status & IRQ_PENDING)))
                        break;
                desc->status &= ~IRQ_PENDING;
@@ -536,7 +536,7 @@ out:
         * disabled while the handler was running.
         */
        desc->chip->end(irq);
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
        return 1;
 }
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 1b5d742c6a77..b2821f070a3d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -18,7 +18,7 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
 extern struct lock_class_key irq_desc_lock_class;
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
 extern void clear_kstat_irqs(struct irq_desc *desc);
-extern spinlock_t sparse_irq_lock;
+extern raw_spinlock_t sparse_irq_lock;
 #ifdef CONFIG_SPARSE_IRQ
 /* irq_desc_ptrs allocated at boot time */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7305b297d1eb..eb6078ca60c7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -46,9 +46,9 @@ void synchronize_irq(unsigned int irq)
                        cpu_relax();
                /* Ok, that indicated we're done: double-check carefully. */
-                spin_lock_irqsave(&desc->lock, flags);
+                raw_spin_lock_irqsave(&desc->lock, flags);
                status = desc->status;
-                spin_unlock_irqrestore(&desc->lock, flags);
+                raw_spin_unlock_irqrestore(&desc->lock, flags);
                /* Oops, that failed? */
        } while (status & IRQ_INPROGRESS);
@@ -114,7 +114,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
        if (!desc->chip->set_affinity)
                return -EINVAL;
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
        if (desc->status & IRQ_MOVE_PCNTXT) {
@@ -134,7 +134,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
        }
 #endif
        desc->status |= IRQ_AFFINITY_SET;
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return 0;
 }
@@ -181,11 +181,11 @@ int irq_select_affinity_usr(unsigned int irq)
        unsigned long flags;
        int ret;
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        ret = setup_affinity(irq, desc);
        if (!ret)
                irq_set_thread_affinity(desc);
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
@@ -231,9 +231,9 @@ void disable_irq_nosync(unsigned int irq)
                return;
        chip_bus_lock(irq, desc);
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        __disable_irq(desc, irq, false);
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL(disable_irq_nosync);
@@ -308,9 +308,9 @@ void enable_irq(unsigned int irq)
                return;
        chip_bus_lock(irq, desc);
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        __enable_irq(desc, irq, false);
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL(enable_irq);
@@ -347,7 +347,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
        /* wakeup-capable irqs can be shared between drivers that
         * don't need to have the same sleep mode behaviors.
         */
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        if (on) {
                if (desc->wake_depth++ == 0) {
                        ret = set_irq_wake_real(irq, on);
@@ -368,7 +368,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
                }
        }
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(set_irq_wake);
@@ -484,12 +484,12 @@ static int irq_wait_for_interrupt(struct irqaction *action)
 static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
 {
        chip_bus_lock(irq, desc);
-        spin_lock_irq(&desc->lock);
+        raw_spin_lock_irq(&desc->lock);
        if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
                desc->status &= ~IRQ_MASKED;
                desc->chip->unmask(irq);
        }
-        spin_unlock_irq(&desc->lock);
+        raw_spin_unlock_irq(&desc->lock);
        chip_bus_sync_unlock(irq, desc);
 }
@@ -514,9 +514,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
                return;
        }
-        spin_lock_irq(&desc->lock);
+        raw_spin_lock_irq(&desc->lock);
        cpumask_copy(mask, desc->affinity);
-        spin_unlock_irq(&desc->lock);
+        raw_spin_unlock_irq(&desc->lock);
        set_cpus_allowed_ptr(current, mask);
        free_cpumask_var(mask);
@@ -545,7 +545,7 @@ static int irq_thread(void *data)
                atomic_inc(&desc->threads_active);
-                spin_lock_irq(&desc->lock);
+                raw_spin_lock_irq(&desc->lock);
                if (unlikely(desc->status & IRQ_DISABLED)) {
                        /*
                         * CHECKME: We might need a dedicated
@@ -555,9 +555,9 @@ static int irq_thread(void *data)
                         * retriggers the interrupt itself --- tglx
                         */
                        desc->status |= IRQ_PENDING;
-                        spin_unlock_irq(&desc->lock);
+                        raw_spin_unlock_irq(&desc->lock);
                } else {
-                        spin_unlock_irq(&desc->lock);
+                        raw_spin_unlock_irq(&desc->lock);
                        action->thread_fn(action->irq, action->dev_id);
@@ -679,7 +679,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
        /*
         * The following block of code has to be executed atomically
         */
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        old_ptr = &desc->action;
        old = *old_ptr;
        if (old) {
@@ -775,7 +775,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                __enable_irq(desc, irq, false);
        }
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        /*
         * Strictly no need to wake it up, but hung_task complains
@@ -802,7 +802,7 @@ mismatch:
        ret = -EBUSY;
 out_thread:
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        if (new->thread) {
                struct task_struct *t = new->thread;
@@ -844,7 +844,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
        if (!desc)
                return NULL;
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        /*
         * There can be multiple actions per IRQ descriptor, find the right
@@ -856,7 +856,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
                if (!action) {
                        WARN(1, "Trying to free already-free IRQ %d\n", irq);
-                        spin_unlock_irqrestore(&desc->lock, flags);
+                        raw_spin_unlock_irqrestore(&desc->lock, flags);
                        return NULL;
                }
@@ -884,7 +884,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
                        desc->chip->disable(irq);
        }
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        unregister_handler_proc(irq, action);
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index fcb6c96f2627..241962280836 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -27,7 +27,7 @@ void move_masked_irq(int irq)
        if (!desc->chip->set_affinity)
                return;
-        assert_spin_locked(&desc->lock);
+        assert_raw_spin_locked(&desc->lock);
        /*
         * If there was a valid mask to work with, please
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 3fd30197da2e..26bac9d8f860 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -42,7 +42,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
                                "for migration.\n", irq);
                return false;
        }
-        spin_lock_init(&desc->lock);
+        raw_spin_lock_init(&desc->lock);
        desc->node = node;
        lockdep_set_class(&desc->lock, &irq_desc_lock_class);
        init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
@@ -67,7 +67,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
        irq = old_desc->irq;
-        spin_lock_irqsave(&sparse_irq_lock, flags);
+        raw_spin_lock_irqsave(&sparse_irq_lock, flags);
        /* We have to check it to avoid races with another CPU */
        desc = irq_desc_ptrs[irq];
@@ -91,7 +91,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
        }
        irq_desc_ptrs[irq] = desc;
-        spin_unlock_irqrestore(&sparse_irq_lock, flags);
+        raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
        /* free the old one */
        free_one_irq_desc(old_desc, desc);
@@ -100,7 +100,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
        return desc;
 out_unlock:
-        spin_unlock_irqrestore(&sparse_irq_lock, flags);
+        raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
        return desc;
 }
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index a0bb09e79867..0d4005d85b03 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -28,9 +28,9 @@ void suspend_device_irqs(void)
        for_each_irq_desc(irq, desc) {
                unsigned long flags;
-                spin_lock_irqsave(&desc->lock, flags);
+                raw_spin_lock_irqsave(&desc->lock, flags);
                __disable_irq(desc, irq, true);
-                spin_unlock_irqrestore(&desc->lock, flags);
+                raw_spin_unlock_irqrestore(&desc->lock, flags);
        }
        for_each_irq_desc(irq, desc)
@@ -56,9 +56,9 @@ void resume_device_irqs(void)
                if (!(desc->status & IRQ_SUSPENDED))
                        continue;
-                spin_lock_irqsave(&desc->lock, flags);
+                raw_spin_lock_irqsave(&desc->lock, flags);
                __enable_irq(desc, irq, true);
-                spin_unlock_irqrestore(&desc->lock, flags);
+                raw_spin_unlock_irqrestore(&desc->lock, flags);
        }
 }
 EXPORT_SYMBOL_GPL(resume_device_irqs);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0832145fea97..6f50eccc79c0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -179,7 +179,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
        unsigned long flags;
        int ret = 1;
-        spin_lock_irqsave(&desc->lock, flags);
+        raw_spin_lock_irqsave(&desc->lock, flags);
        for (action = desc->action ; action; action = action->next) {
                if ((action != new_action) && action->name &&
                                !strcmp(new_action->name, action->name)) {
@@ -187,7 +187,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
                        break;
                }
        }
-        spin_unlock_irqrestore(&desc->lock, flags);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index e49ea1c5232d..89fb90ae534f 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -28,7 +28,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
        struct irqaction *action;
        int ok = 0, work = 0;
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        /* Already running on another processor */
        if (desc->status & IRQ_INPROGRESS) {
                /*
@@ -37,13 +37,13 @@ static int try_one_irq(int irq, struct irq_desc *desc)
                 */
                if (desc->action && (desc->action->flags & IRQF_SHARED))
                        desc->status |= IRQ_PENDING;
-                spin_unlock(&desc->lock);
+                raw_spin_unlock(&desc->lock);
                return ok;
        }
        /* Honour the normal IRQ locking */
        desc->status |= IRQ_INPROGRESS;
        action = desc->action;
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
        while (action) {
                /* Only shared IRQ handlers are safe to call */
@@ -56,7 +56,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
        }
        local_irq_disable();
        /* Now clean up the flags */
-        spin_lock(&desc->lock);
+        raw_spin_lock(&desc->lock);
        action = desc->action;
        /*
@@ -68,9 +68,9 @@ static int try_one_irq(int irq, struct irq_desc *desc)
                 * Perform real IRQ processing for the IRQ we deferred
                 */
                work = 1;
-                spin_unlock(&desc->lock);
+                raw_spin_unlock(&desc->lock);
                handle_IRQ_event(irq, action);
-                spin_lock(&desc->lock);
+                raw_spin_lock(&desc->lock);
                desc->status &= ~IRQ_PENDING;
        }
        desc->status &= ~IRQ_INPROGRESS;
@@ -80,7 +80,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
         */
        if (work && desc->chip && desc->chip->end)
                desc->chip->end(irq);
-        spin_unlock(&desc->lock);
+        raw_spin_unlock(&desc->lock);
        return ok;
 }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e2107f98..a9a93d9ee7a7 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -21,7 +21,7 @@
 #include <linux/hardirq.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/utsname.h>
 #include <linux/numa.h>
 #include <linux/suspend.h>
@@ -31,6 +31,7 @@
 #include <linux/cpu.h>
 #include <linux/console.h>
 #include <linux/vmalloc.h>
+#include <linux/swap.h>
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1082,6 +1083,64 @@ void crash_kexec(struct pt_regs *regs)
        }
 }
+size_t crash_get_memory_size(void)
+{
+        size_t size;
+        mutex_lock(&kexec_mutex);
+        size = crashk_res.end - crashk_res.start + 1;
+        mutex_unlock(&kexec_mutex);
+        return size;
+}
+static void free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+        unsigned long addr;
+        for (addr = begin; addr < end; addr += PAGE_SIZE) {
+                ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
+                init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
+                free_page((unsigned long)__va(addr));
+                totalram_pages++;
+        }
+}
+int crash_shrink_memory(unsigned long new_size)
+{
+        int ret = 0;
+        unsigned long start, end;
+        mutex_lock(&kexec_mutex);
+        if (kexec_crash_image) {
+                ret = -ENOENT;
+                goto unlock;
+        }
+        start = crashk_res.start;
+        end = crashk_res.end;
+        if (new_size >= end - start + 1) {
+                ret = -EINVAL;
+                if (new_size == end - start + 1)
+                        ret = 0;
+                goto unlock;
+        }
+        start = roundup(start, PAGE_SIZE);
+        end = roundup(start + new_size, PAGE_SIZE);
+        free_reserved_phys_range(end, crashk_res.end);
+        if (start == end) {
+                crashk_res.end = end;
+                release_resource(&crashk_res);
+        } else
+                crashk_res.end = end - 1;
+unlock:
+        mutex_unlock(&kexec_mutex);
+        return ret;
+}
 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
                            size_t data_len)
 {
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 7d7014634022..2eb517e23514 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -129,6 +129,7 @@ struct task_struct		*kgdb_usethread;
 struct task_struct              *kgdb_contthread;
 int                             kgdb_single_step;
+pid_t                           kgdb_sstep_pid;
 /* Our I/O buffers. */
 static char                     remcom_in_buffer[BUFMAX];
@@ -541,12 +542,17 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid)
         */
        if (tid == 0 || tid == -1)
                tid = -atomic_read(&kgdb_active) - 2;
-        if (tid < 0) {
+        if (tid < -1 && tid > -NR_CPUS - 2) {
                if (kgdb_info[-tid - 2].task)
                        return kgdb_info[-tid - 2].task;
                else
                        return idle_task(-tid - 2);
        }
+        if (tid <= 0) {
+                printk(KERN_ERR "KGDB: Internal thread select error\n");
+                dump_stack();
+                return NULL;
+        }
        /*
         * find_task_by_pid_ns() does not take the tasklist lock anymore
@@ -619,7 +625,8 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
 static int kgdb_activate_sw_breakpoints(void)
 {
        unsigned long addr;
-        int error = 0;
+        int error;
+        int ret = 0;
        int i;
        for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
@@ -629,13 +636,16 @@ static int kgdb_activate_sw_breakpoints(void)
                addr = kgdb_break[i].bpt_addr;
                error = kgdb_arch_set_breakpoint(addr,
                                kgdb_break[i].saved_instr);
-                if (error)
+                if (error) {
-                        return error;
+                        ret = error;
+                        printk(KERN_INFO "KGDB: BP install failed: %lx", addr);
+                        continue;
+                }
                kgdb_flush_swbreak_addr(addr);
                kgdb_break[i].state = BP_ACTIVE;
        }
-        return 0;
+        return ret;
 }
 static int kgdb_set_sw_break(unsigned long addr)
@@ -682,7 +692,8 @@ static int kgdb_set_sw_break(unsigned long addr)
 static int kgdb_deactivate_sw_breakpoints(void)
 {
        unsigned long addr;
-        int error = 0;
+        int error;
+        int ret = 0;
        int i;
        for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
@@ -691,13 +702,15 @@ static int kgdb_deactivate_sw_breakpoints(void)
                addr = kgdb_break[i].bpt_addr;
                error = kgdb_arch_remove_breakpoint(addr,
                                        kgdb_break[i].saved_instr);
-                if (error)
+                if (error) {
-                        return error;
+                        printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr);
+                        ret = error;
+                }
                kgdb_flush_swbreak_addr(addr);
                kgdb_break[i].state = BP_SET;
        }
-        return 0;
+        return ret;
 }
 static int kgdb_remove_sw_break(unsigned long addr)
@@ -1204,8 +1217,10 @@ static int gdb_cmd_exception_pass(struct kgdb_state *ks)
                return 1;
        } else {
-                error_packet(remcom_out_buffer, -EINVAL);
+                kgdb_msg_write("KGDB only knows signal 9 (pass)"
-                return 0;
+                        " and 15 (pass and disconnect)\n"
+                        "Executing a continue without signal passing\n", 0);
+                remcom_in_buffer[0] = 'c';
        }
        /* Indicate fall through */
@@ -1395,6 +1410,7 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
        struct kgdb_state kgdb_var;
        struct kgdb_state *ks = &kgdb_var;
        unsigned long flags;
+        int sstep_tries = 100;
        int error = 0;
        int i, cpu;
@@ -1425,13 +1441,14 @@ acquirelock:
                cpu_relax();
        /*
-         * Do not start the debugger connection on this CPU if the last
+         * For single stepping, try to only enter on the processor
-         * instance of the exception handler wanted to come into the
+         * that was single stepping.  To gaurd against a deadlock, the
-         * debugger on a different CPU via a single step
+         * kernel will only try for the value of sstep_tries before
+         * giving up and continuing on.
         */
        if (atomic_read(&kgdb_cpu_doing_single_step) != -1 &&
-            atomic_read(&kgdb_cpu_doing_single_step) != cpu) {
+            (kgdb_info[cpu].task &&
+             kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
                atomic_set(&kgdb_active, -1);
                touch_softlockup_watchdog();
                clocksource_touch_watchdog();
@@ -1524,6 +1541,13 @@ acquirelock:
        }
 kgdb_restore:
+        if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
+                int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step);
+                if (kgdb_info[sstep_cpu].task)
+                        kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid;
+                else
+                        kgdb_sstep_pid = 0;
+        }
        /* Free kgdb_active */
        atomic_set(&kgdb_active, -1);
        touch_softlockup_watchdog();
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 528dd78e7e7e..3feaf5a74514 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -100,6 +100,26 @@ static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
 }
 KERNEL_ATTR_RO(kexec_crash_loaded);
+static ssize_t kexec_crash_size_show(struct kobject *kobj,
+                                       struct kobj_attribute *attr, char *buf)
+{
+        return sprintf(buf, "%zu\n", crash_get_memory_size());
+}
+static ssize_t kexec_crash_size_store(struct kobject *kobj,
+                                   struct kobj_attribute *attr,
+                                   const char *buf, size_t count)
+{
+        unsigned long cnt;
+        int ret;
+        if (strict_strtoul(buf, 0, &cnt))
+                return -EINVAL;
+        ret = crash_shrink_memory(cnt);
+        return ret < 0 ? ret : count;
+}
+KERNEL_ATTR_RW(kexec_crash_size);
 static ssize_t vmcoreinfo_show(struct kobject *kobj,
                               struct kobj_attribute *attr, char *buf)
 {
@@ -147,6 +167,7 @@ static struct attribute * kernel_attrs[] = {
 #ifdef CONFIG_KEXEC
        &kexec_loaded_attr.attr,
        &kexec_crash_loaded_attr.attr,
+        &kexec_crash_size_attr.attr,
        &vmcoreinfo_attr.attr,
 #endif
        NULL
diff --git a/kernel/kthread.c b/kernel/kthread.c
index ab7ae57773e1..fbb6222fe7e0 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -150,6 +150,29 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 EXPORT_SYMBOL(kthread_create);
 /**
+ * kthread_bind - bind a just-created kthread to a cpu.
+ * @p: thread created by kthread_create().
+ * @cpu: cpu (might not be online, must be possible) for @k to run on.
+ *
+ * Description: This function is equivalent to set_cpus_allowed(),
+ * except that @cpu doesn't need to be online, and the thread must be
+ * stopped (i.e., just returned from kthread_create()).
+ */
+void kthread_bind(struct task_struct *p, unsigned int cpu)
+{
+        /* Must have done schedule() in kthread() before we set_task_cpu */
+        if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+                WARN_ON(1);
+                return;
+        }
+        p->cpus_allowed = cpumask_of_cpu(cpu);
+        p->rt.nr_cpus_allowed = 1;
+        p->flags |= PF_THREAD_BOUND;
+}
+EXPORT_SYMBOL(kthread_bind);
+/**
 * kthread_stop - stop a thread created by kthread_create().
 * @k: thread created by kthread_create().
 *
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index f5dcd36d3151..5feaddcdbe49 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -73,11 +73,11 @@ module_param(lock_stat, int, 0644);
 * to use a raw spinlock - we really dont want the spinlock
 * code to recurse back into the lockdep code...
 */
-static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 static int graph_lock(void)
 {
-        __raw_spin_lock(&lockdep_lock);
+        arch_spin_lock(&lockdep_lock);
        /*
         * Make sure that if another CPU detected a bug while
         * walking the graph we dont change it (while the other
@@ -85,7 +85,7 @@ static int graph_lock(void)
         * dropped already)
         */
        if (!debug_locks) {
-                __raw_spin_unlock(&lockdep_lock);
+                arch_spin_unlock(&lockdep_lock);
                return 0;
        }
        /* prevent any recursions within lockdep from causing deadlocks */
@@ -95,11 +95,11 @@ static int graph_lock(void)
 static inline int graph_unlock(void)
 {
-        if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
+        if (debug_locks && !arch_spin_is_locked(&lockdep_lock))
                return DEBUG_LOCKS_WARN_ON(1);
        current->lockdep_recursion--;
-        __raw_spin_unlock(&lockdep_lock);
+        arch_spin_unlock(&lockdep_lock);
        return 0;
 }
@@ -111,7 +111,7 @@ static inline int debug_locks_off_graph_unlock(void)
 {
        int ret = debug_locks_off();
-        __raw_spin_unlock(&lockdep_lock);
+        arch_spin_unlock(&lockdep_lock);
        return ret;
 }
@@ -140,7 +140,8 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
 }
 #ifdef CONFIG_LOCK_STAT
-static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
+static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
+                      cpu_lock_stats);
 static inline u64 lockstat_clock(void)
 {
@@ -168,7 +169,7 @@ static void lock_time_inc(struct lock_time *lt, u64 time)
        if (time > lt->max)
                lt->max = time;
-        if (time < lt->min || !lt->min)
+        if (time < lt->min || !lt->nr)
                lt->min = time;
        lt->total += time;
@@ -177,8 +178,15 @@ static void lock_time_inc(struct lock_time *lt, u64 time)
 static inline void lock_time_add(struct lock_time *src, struct lock_time *dst)
 {
-        dst->min += src->min;
+        if (!src->nr)
-        dst->max += src->max;
+                return;
+        if (src->max > dst->max)
+                dst->max = src->max;
+        if (src->min < dst->min || !dst->nr)
+                dst->min = src->min;
        dst->total += src->total;
        dst->nr += src->nr;
 }
@@ -191,7 +199,7 @@ struct lock_class_stats lock_stats(struct lock_class *class)
        memset(&stats, 0, sizeof(struct lock_class_stats));
        for_each_possible_cpu(cpu) {
                struct lock_class_stats *pcs =
-                        &per_cpu(lock_stats, cpu)[class - lock_classes];
+                        &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
                for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
                        stats.contention_point[i] += pcs->contention_point[i];
@@ -218,7 +226,7 @@ void clear_lock_stats(struct lock_class *class)
        for_each_possible_cpu(cpu) {
                struct lock_class_stats *cpu_stats =
-                        &per_cpu(lock_stats, cpu)[class - lock_classes];
+                        &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
                memset(cpu_stats, 0, sizeof(struct lock_class_stats));
        }
@@ -228,12 +236,12 @@ void clear_lock_stats(struct lock_class *class)
 static struct lock_class_stats *get_lock_stats(struct lock_class *class)
 {
-        return &get_cpu_var(lock_stats)[class - lock_classes];
+        return &get_cpu_var(cpu_lock_stats)[class - lock_classes];
 }
 static void put_lock_stats(struct lock_class_stats *stats)
 {
-        put_cpu_var(lock_stats);
+        put_cpu_var(cpu_lock_stats);
 }
 static void lock_release_holdtime(struct held_lock *hlock)
@@ -379,7 +387,8 @@ static int save_trace(struct stack_trace *trace)
         * complete trace that maxes out the entries provided will be reported
         * as incomplete, friggin useless </rant>
         */
-        if (trace->entries[trace->nr_entries-1] == ULONG_MAX)
+        if (trace->nr_entries != 0 &&
+            trace->entries[trace->nr_entries-1] == ULONG_MAX)
                trace->nr_entries--;
        trace->max_entries = trace->nr_entries;
@@ -1161,9 +1170,9 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
        this.class = class;
        local_irq_save(flags);
-        __raw_spin_lock(&lockdep_lock);
+        arch_spin_lock(&lockdep_lock);
        ret = __lockdep_count_forward_deps(&this);
-        __raw_spin_unlock(&lockdep_lock);
+        arch_spin_unlock(&lockdep_lock);
        local_irq_restore(flags);
        return ret;
@@ -1188,9 +1197,9 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
        this.class = class;
        local_irq_save(flags);
-        __raw_spin_lock(&lockdep_lock);
+        arch_spin_lock(&lockdep_lock);
        ret = __lockdep_count_backward_deps(&this);
-        __raw_spin_unlock(&lockdep_lock);
+        arch_spin_unlock(&lockdep_lock);
        local_irq_restore(flags);
        return ret;
diff --git a/kernel/module.c b/kernel/module.c
index 5842a71cf052..e96b8ed1cb6a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -370,8 +370,6 @@ EXPORT_SYMBOL_GPL(find_module);
 #ifdef CONFIG_SMP
-#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
 static void *percpu_modalloc(unsigned long size, unsigned long align,
                             const char *name)
 {
@@ -395,154 +393,6 @@ static void percpu_modfree(void *freeme)
        free_percpu(freeme);
 }
-#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
-/* Number of blocks used and allocated. */
-static unsigned int pcpu_num_used, pcpu_num_allocated;
-/* Size of each block.  -ve means used. */
-static int *pcpu_size;
-static int split_block(unsigned int i, unsigned short size)
-{
-        /* Reallocation required? */
-        if (pcpu_num_used + 1 > pcpu_num_allocated) {
-                int *new;
-                new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2,
-                               GFP_KERNEL);
-                if (!new)
-                        return 0;
-                pcpu_num_allocated *= 2;
-                pcpu_size = new;
-        }
-        /* Insert a new subblock */
-        memmove(&pcpu_size[i+1], &pcpu_size[i],
-                sizeof(pcpu_size[0]) * (pcpu_num_used - i));
-        pcpu_num_used++;
-        pcpu_size[i+1] -= size;
-        pcpu_size[i] = size;
-        return 1;
-}
-static inline unsigned int block_size(int val)
-{
-        if (val < 0)
-                return -val;
-        return val;
-}
-static void *percpu_modalloc(unsigned long size, unsigned long align,
-                             const char *name)
-{
-        unsigned long extra;
-        unsigned int i;
-        void *ptr;
-        int cpu;
-        if (align > PAGE_SIZE) {
-                printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
-                       name, align, PAGE_SIZE);
-                align = PAGE_SIZE;
-        }
-        ptr = __per_cpu_start;
-        for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-                /* Extra for alignment requirement. */
-                extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
-                BUG_ON(i == 0 && extra != 0);
-                if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
-                        continue;
-                /* Transfer extra to previous block. */
-                if (pcpu_size[i-1] < 0)
-                        pcpu_size[i-1] -= extra;
-                else
-                        pcpu_size[i-1] += extra;
-                pcpu_size[i] -= extra;
-                ptr += extra;
-                /* Split block if warranted */
-                if (pcpu_size[i] - size > sizeof(unsigned long))
-                        if (!split_block(i, size))
-                                return NULL;
-                /* add the per-cpu scanning areas */
-                for_each_possible_cpu(cpu)
-                        kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0,
-                                       GFP_KERNEL);
-                /* Mark allocated */
-                pcpu_size[i] = -pcpu_size[i];
-                return ptr;
-        }
-        printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
-               size);
-        return NULL;
-}
-static void percpu_modfree(void *freeme)
-{
-        unsigned int i;
-        void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
-        int cpu;
-        /* First entry is core kernel percpu data. */
-        for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-                if (ptr == freeme) {
-                        pcpu_size[i] = -pcpu_size[i];
-                        goto free;
-                }
-        }
-        BUG();
- free:
-        /* remove the per-cpu scanning areas */
-        for_each_possible_cpu(cpu)
-                kmemleak_free(freeme + per_cpu_offset(cpu));
-        /* Merge with previous? */
-        if (pcpu_size[i-1] >= 0) {
-                pcpu_size[i-1] += pcpu_size[i];
-                pcpu_num_used--;
-                memmove(&pcpu_size[i], &pcpu_size[i+1],
-                        (pcpu_num_used - i) * sizeof(pcpu_size[0]));
-                i--;
-        }
-        /* Merge with next? */
-        if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
-                pcpu_size[i] += pcpu_size[i+1];
-                pcpu_num_used--;
-                memmove(&pcpu_size[i+1], &pcpu_size[i+2],
-                        (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
-        }
-}
-static int percpu_modinit(void)
-{
-        pcpu_num_used = 2;
-        pcpu_num_allocated = 2;
-        pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
-                            GFP_KERNEL);
-        /* Static in-kernel percpu data (used). */
-        pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
-        /* Free room. */
-        pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
-        if (pcpu_size[1] < 0) {
-                printk(KERN_ERR "No per-cpu room for modules.\n");
-                pcpu_num_used = 1;
-        }
-        return 0;
-}
-__initcall(percpu_modinit);
-#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 static unsigned int find_pcpusec(Elf_Ehdr *hdr,
                                 Elf_Shdr *sechdrs,
                                 const char *secstrings)
@@ -1030,11 +880,23 @@ static int try_to_force_load(struct module *mod, const char *reason)
 }
 #ifdef CONFIG_MODVERSIONS
+/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */
+static unsigned long maybe_relocated(unsigned long crc,
+                                     const struct module *crc_owner)
+{
+#ifdef ARCH_RELOCATES_KCRCTAB
+        if (crc_owner == NULL)
+                return crc - (unsigned long)reloc_start;
+#endif
+        return crc;
+}
 static int check_version(Elf_Shdr *sechdrs,
                         unsigned int versindex,
                         const char *symname,
                         struct module *mod, 
-                         const unsigned long *crc)
+                         const unsigned long *crc,
+                         const struct module *crc_owner)
 {
        unsigned int i, num_versions;
        struct modversion_info *versions;
@@ -1055,10 +917,10 @@ static int check_version(Elf_Shdr *sechdrs,
                if (strcmp(versions[i].name, symname) != 0)
                        continue;
-                if (versions[i].crc == *crc)
+                if (versions[i].crc == maybe_relocated(*crc, crc_owner))
                        return 1;
                DEBUGP("Found checksum %lX vs module %lX\n",
-                       *crc, versions[i].crc);
+                       maybe_relocated(*crc, crc_owner), versions[i].crc);
                goto bad_version;
        }
@@ -1081,7 +943,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
        if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
                         &crc, true, false))
                BUG();
-        return check_version(sechdrs, versindex, "module_layout", mod, crc);
+        return check_version(sechdrs, versindex, "module_layout", mod, crc,
+                             NULL);
 }
 /* First part is kernel version, which we ignore if module has crcs. */
@@ -1099,7 +962,8 @@ static inline int check_version(Elf_Shdr *sechdrs,
                                unsigned int versindex,
                                const char *symname,
                                struct module *mod, 
-                                const unsigned long *crc)
+                                const unsigned long *crc,
+                                const struct module *crc_owner)
 {
        return 1;
 }
@@ -1134,8 +998,8 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
        /* use_module can fail due to OOM,
           or module initialization or unloading */
        if (sym) {
-                if (!check_version(sechdrs, versindex, name, mod, crc) ||
+                if (!check_version(sechdrs, versindex, name, mod, crc, owner)
-                    !use_module(mod, owner))
+                    || !use_module(mod, owner))
                        sym = NULL;
        }
        return sym;
@@ -2046,9 +1910,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
        unsigned int i;
        /* only scan the sections containing data */
-        kmemleak_scan_area(mod->module_core, (unsigned long)mod -
+        kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
-                           (unsigned long)mod->module_core,
-                           sizeof(struct module), GFP_KERNEL);
        for (i = 1; i < hdr->e_shnum; i++) {
                if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -2057,8 +1919,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
                    && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
                        continue;
-                kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr -
+                kmemleak_scan_area((void *)sechdrs[i].sh_addr,
-                                   (unsigned long)mod->module_core,
                                   sechdrs[i].sh_size, GFP_KERNEL);
        }
 }
@@ -2386,6 +2247,12 @@ static noinline struct module *load_module(void __user *umod,
                                         "_ftrace_events",
                                         sizeof(*mod->trace_events),
                                         &mod->num_trace_events);
+        /*
+         * This section contains pointers to allocated objects in the trace
+         * code and not scanning it leads to false positives.
+         */
+        kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
+                           mod->num_trace_events, GFP_KERNEL);
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
        /* sechdrs[0].sh_size is always zero */
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 6b2d735846a5..57d527a16f9d 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -43,13 +43,13 @@ static inline void mutex_clear_owner(struct mutex *lock)
                                                        \
                DEBUG_LOCKS_WARN_ON(in_interrupt());    \
                local_irq_save(flags);                  \
-                __raw_spin_lock(&(lock)->raw_lock);     \
+                arch_spin_lock(&(lock)->rlock.raw_lock);\
                DEBUG_LOCKS_WARN_ON(l->magic != l);     \
        } while (0)
-#define spin_unlock_mutex(lock, flags)                  \
+#define spin_unlock_mutex(lock, flags)                          \
-        do {                                            \
+        do {                                                    \
-                __raw_spin_unlock(&(lock)->raw_lock);   \
+                arch_spin_unlock(&(lock)->rlock.raw_lock);      \
-                local_irq_restore(flags);               \
+                local_irq_restore(flags);                       \
-                preempt_check_resched();                \
+                preempt_check_resched();                        \
        } while (0)
diff --git a/kernel/panic.c b/kernel/panic.c
index 96b45d0b4ba5..5827f7b97254 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -10,6 +10,7 @@
 */
 #include <linux/debug_locks.h>
 #include <linux/interrupt.h>
+#include <linux/kmsg_dump.h>
 #include <linux/kallsyms.h>
 #include <linux/notifier.h>
 #include <linux/module.h>
@@ -74,6 +75,7 @@ NORET_TYPE void panic(const char * fmt, ...)
        dump_stack();
 #endif
+        kmsg_dump(KMSG_DUMP_PANIC);
        /*
         * If we have crashed and we have a crash kernel loaded let it handle
         * everything else.
@@ -339,6 +341,7 @@ void oops_exit(void)
 {
        do_oops_enter_exit();
        print_oops_end_marker();
+        kmsg_dump(KMSG_DUMP_OOPS);
 }
 #ifdef WANT_WARN_ON_SLOWPATH
diff --git a/kernel/params.c b/kernel/params.c
index d656c276508d..cf1b69183127 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -24,6 +24,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
+#include <linux/string.h>
 #if 0
 #define DEBUGP printk
@@ -122,9 +123,7 @@ static char *next_arg(char *args, char **param, char **val)
                next = args + i;
        /* Chew up trailing spaces. */
-        while (isspace(*next))
+        return skip_spaces(next);
-                next++;
-        return next;
 }
 /* Args looks like "foo=bar,bar2 baz=fuz wiz". */
@@ -139,8 +138,7 @@ int parse_args(const char *name,
        DEBUGP("Parsing ARGS: %s\n", args);
        /* Chew leading spaces */
-        while (isspace(*args))
+        args = skip_spaces(args);
-                args++;
        while (*args) {
                int ret;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 40a996ec39fa..97d1a3dd7a59 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -36,7 +36,7 @@
 /*
 * Each CPU has a list of per CPU events:
 */
-DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
+static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
 int perf_max_events __read_mostly = 1;
 static int perf_reserved_percpu __read_mostly;
@@ -203,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
                 * if so.  If we locked the right context, then it
                 * can't get swapped on us any more.
                 */
-                spin_lock_irqsave(&ctx->lock, *flags);
+                raw_spin_lock_irqsave(&ctx->lock, *flags);
                if (ctx != rcu_dereference(task->perf_event_ctxp)) {
-                        spin_unlock_irqrestore(&ctx->lock, *flags);
+                        raw_spin_unlock_irqrestore(&ctx->lock, *flags);
                        goto retry;
                }
                if (!atomic_inc_not_zero(&ctx->refcount)) {
-                        spin_unlock_irqrestore(&ctx->lock, *flags);
+                        raw_spin_unlock_irqrestore(&ctx->lock, *flags);
                        ctx = NULL;
                }
        }
@@ -231,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task
        ctx = perf_lock_task_context(task, &flags);
        if (ctx) {
                ++ctx->pin_count;
-                spin_unlock_irqrestore(&ctx->lock, flags);
+                raw_spin_unlock_irqrestore(&ctx->lock, flags);
        }
        return ctx;
 }
@@ -240,9 +240,9 @@ static void perf_unpin_context(struct perf_event_context *ctx)
 {
        unsigned long flags;
-        spin_lock_irqsave(&ctx->lock, flags);
+        raw_spin_lock_irqsave(&ctx->lock, flags);
        --ctx->pin_count;
-        spin_unlock_irqrestore(&ctx->lock, flags);
+        raw_spin_unlock_irqrestore(&ctx->lock, flags);
        put_ctx(ctx);
 }
@@ -427,7 +427,7 @@ static void __perf_event_remove_from_context(void *info)
        if (ctx->task && cpuctx->task_ctx != ctx)
                return;
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        /*
         * Protect the list operation against NMI by disabling the
         * events on a global level.
@@ -449,7 +449,7 @@ static void __perf_event_remove_from_context(void *info)
        }
        perf_enable();
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
 }
@@ -488,12 +488,12 @@ retry:
        task_oncpu_function_call(task, __perf_event_remove_from_context,
                                 event);
-        spin_lock_irq(&ctx->lock);
+        raw_spin_lock_irq(&ctx->lock);
        /*
         * If the context is active we need to retry the smp call.
         */
        if (ctx->nr_active && !list_empty(&event->group_entry)) {
-                spin_unlock_irq(&ctx->lock);
+                raw_spin_unlock_irq(&ctx->lock);
                goto retry;
        }
@@ -504,7 +504,7 @@ retry:
         */
        if (!list_empty(&event->group_entry))
                list_del_event(event, ctx);
-        spin_unlock_irq(&ctx->lock);
+        raw_spin_unlock_irq(&ctx->lock);
 }
 /*
@@ -535,7 +535,7 @@ static void __perf_event_disable(void *info)
        if (ctx->task && cpuctx->task_ctx != ctx)
                return;
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        /*
         * If the event is on, turn it off.
@@ -551,7 +551,7 @@ static void __perf_event_disable(void *info)
                event->state = PERF_EVENT_STATE_OFF;
        }
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
 }
 /*
@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info)
 * is the current context on this CPU and preemption is disabled,
 * hence we can't get into perf_event_task_sched_out for this context.
 */
-static void perf_event_disable(struct perf_event *event)
+void perf_event_disable(struct perf_event *event)
 {
        struct perf_event_context *ctx = event->ctx;
        struct task_struct *task = ctx->task;
@@ -584,12 +584,12 @@ static void perf_event_disable(struct perf_event *event)
 retry:
        task_oncpu_function_call(task, __perf_event_disable, event);
-        spin_lock_irq(&ctx->lock);
+        raw_spin_lock_irq(&ctx->lock);
        /*
         * If the event is still active, we need to retry the cross-call.
         */
        if (event->state == PERF_EVENT_STATE_ACTIVE) {
-                spin_unlock_irq(&ctx->lock);
+                raw_spin_unlock_irq(&ctx->lock);
                goto retry;
        }
@@ -602,7 +602,7 @@ static void perf_event_disable(struct perf_event *event)
                event->state = PERF_EVENT_STATE_OFF;
        }
-        spin_unlock_irq(&ctx->lock);
+        raw_spin_unlock_irq(&ctx->lock);
 }
 static int
@@ -770,7 +770,7 @@ static void __perf_install_in_context(void *info)
                cpuctx->task_ctx = ctx;
        }
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        ctx->is_active = 1;
        update_context_time(ctx);
@@ -782,6 +782,9 @@ static void __perf_install_in_context(void *info)
        add_event_to_ctx(event, ctx);
+        if (event->cpu != -1 && event->cpu != smp_processor_id())
+                goto unlock;
        /*
         * Don't put the event on if it is disabled or if
         * it is in a group and the group isn't on.
@@ -820,7 +823,7 @@ static void __perf_install_in_context(void *info)
 unlock:
        perf_enable();
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
 }
 /*
@@ -856,12 +859,12 @@ retry:
        task_oncpu_function_call(task, __perf_install_in_context,
                                 event);
-        spin_lock_irq(&ctx->lock);
+        raw_spin_lock_irq(&ctx->lock);
        /*
         * we need to retry the smp call.
         */
        if (ctx->is_active && list_empty(&event->group_entry)) {
-                spin_unlock_irq(&ctx->lock);
+                raw_spin_unlock_irq(&ctx->lock);
                goto retry;
        }
@@ -872,7 +875,7 @@ retry:
         */
        if (list_empty(&event->group_entry))
                add_event_to_ctx(event, ctx);
-        spin_unlock_irq(&ctx->lock);
+        raw_spin_unlock_irq(&ctx->lock);
 }
 /*
@@ -917,7 +920,7 @@ static void __perf_event_enable(void *info)
                cpuctx->task_ctx = ctx;
        }
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        ctx->is_active = 1;
        update_context_time(ctx);
@@ -925,6 +928,9 @@ static void __perf_event_enable(void *info)
                goto unlock;
        __perf_event_mark_enabled(event, ctx);
+        if (event->cpu != -1 && event->cpu != smp_processor_id())
+                goto unlock;
        /*
         * If the event is in a group and isn't the group leader,
         * then don't put it on unless the group is on.
@@ -959,7 +965,7 @@ static void __perf_event_enable(void *info)
        }
 unlock:
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
 }
 /*
@@ -971,7 +977,7 @@ static void __perf_event_enable(void *info)
 * perf_event_for_each_child or perf_event_for_each as described
 * for perf_event_disable.
 */
-static void perf_event_enable(struct perf_event *event)
+void perf_event_enable(struct perf_event *event)
 {
        struct perf_event_context *ctx = event->ctx;
        struct task_struct *task = ctx->task;
@@ -985,7 +991,7 @@ static void perf_event_enable(struct perf_event *event)
                return;
        }
-        spin_lock_irq(&ctx->lock);
+        raw_spin_lock_irq(&ctx->lock);
        if (event->state >= PERF_EVENT_STATE_INACTIVE)
                goto out;
@@ -1000,10 +1006,10 @@ static void perf_event_enable(struct perf_event *event)
                event->state = PERF_EVENT_STATE_OFF;
 retry:
-        spin_unlock_irq(&ctx->lock);
+        raw_spin_unlock_irq(&ctx->lock);
        task_oncpu_function_call(task, __perf_event_enable, event);
-        spin_lock_irq(&ctx->lock);
+        raw_spin_lock_irq(&ctx->lock);
        /*
         * If the context is active and the event is still off,
@@ -1020,7 +1026,7 @@ static void perf_event_enable(struct perf_event *event)
                __perf_event_mark_enabled(event, ctx);
 out:
-        spin_unlock_irq(&ctx->lock);
+        raw_spin_unlock_irq(&ctx->lock);
 }
 static int perf_event_refresh(struct perf_event *event, int refresh)
@@ -1042,7 +1048,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
 {
        struct perf_event *event;
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        ctx->is_active = 0;
        if (likely(!ctx->nr_events))
                goto out;
@@ -1055,7 +1061,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
        }
        perf_enable();
 out:
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
 }
 /*
@@ -1193,8 +1199,8 @@ void perf_event_task_sched_out(struct task_struct *task,
                 * order we take the locks because no other cpu could
                 * be trying to lock both of these tasks.
                 */
-                spin_lock(&ctx->lock);
+                raw_spin_lock(&ctx->lock);
-                spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+                raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
                if (context_equiv(ctx, next_ctx)) {
                        /*
                         * XXX do we need a memory barrier of sorts
@@ -1208,8 +1214,8 @@ void perf_event_task_sched_out(struct task_struct *task,
                        perf_event_sync_stat(ctx, next_ctx);
                }
-                spin_unlock(&next_ctx->lock);
+                raw_spin_unlock(&next_ctx->lock);
-                spin_unlock(&ctx->lock);
+                raw_spin_unlock(&ctx->lock);
        }
        rcu_read_unlock();
@@ -1251,7 +1257,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
        struct perf_event *event;
        int can_add_hw = 1;
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        ctx->is_active = 1;
        if (likely(!ctx->nr_events))
                goto out;
@@ -1306,7 +1312,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
        }
        perf_enable();
 out:
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
 }
 /*
@@ -1370,7 +1376,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
        struct hw_perf_event *hwc;
        u64 interrupts, freq;
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (event->state != PERF_EVENT_STATE_ACTIVE)
                        continue;
@@ -1425,7 +1431,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
                        perf_enable();
                }
        }
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
 }
 /*
@@ -1438,7 +1444,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
        if (!ctx->nr_events)
                return;
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        /*
         * Rotate the first entry last (works just fine for group events too):
         */
@@ -1449,7 +1455,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
        }
        perf_enable();
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
 }
 void perf_event_task_tick(struct task_struct *curr, int cpu)
@@ -1498,7 +1504,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
        __perf_event_task_sched_out(ctx);
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        list_for_each_entry(event, &ctx->group_list, group_entry) {
                if (!event->attr.enable_on_exec)
@@ -1516,7 +1522,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
        if (enabled)
                unclone_ctx(ctx);
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
        perf_event_task_sched_in(task, smp_processor_id());
 out:
@@ -1542,10 +1548,10 @@ static void __perf_event_read(void *info)
        if (ctx->task && cpuctx->task_ctx != ctx)
                return;
-        spin_lock(&ctx->lock);
+        raw_spin_lock(&ctx->lock);
        update_context_time(ctx);
        update_event_times(event);
-        spin_unlock(&ctx->lock);
+        raw_spin_unlock(&ctx->lock);
        event->pmu->read(event);
 }
@@ -1563,10 +1569,10 @@ static u64 perf_event_read(struct perf_event *event)
                struct perf_event_context *ctx = event->ctx;
                unsigned long flags;
-                spin_lock_irqsave(&ctx->lock, flags);
+                raw_spin_lock_irqsave(&ctx->lock, flags);
                update_context_time(ctx);
                update_event_times(event);
-                spin_unlock_irqrestore(&ctx->lock, flags);
+                raw_spin_unlock_irqrestore(&ctx->lock, flags);
        }
        return atomic64_read(&event->count);
@@ -1579,8 +1585,7 @@ static void
 __perf_event_init_context(struct perf_event_context *ctx,
                            struct task_struct *task)
 {
-        memset(ctx, 0, sizeof(*ctx));
+        raw_spin_lock_init(&ctx->lock);
-        spin_lock_init(&ctx->lock);
        mutex_init(&ctx->mutex);
        INIT_LIST_HEAD(&ctx->group_list);
        INIT_LIST_HEAD(&ctx->event_list);
@@ -1596,15 +1601,12 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
        unsigned long flags;
        int err;
-        /*
+        if (pid == -1 && cpu != -1) {
-         * If cpu is not a wildcard then this is a percpu event:
-         */
-        if (cpu != -1) {
                /* Must be root to operate on a CPU event: */
                if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
                        return ERR_PTR(-EACCES);
-                if (cpu < 0 || cpu > num_possible_cpus())
+                if (cpu < 0 || cpu >= nr_cpumask_bits)
                        return ERR_PTR(-EINVAL);
                /*
@@ -1612,7 +1614,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
                 * offline CPU and activate it when the CPU comes up, but
                 * that's for later.
                 */
-                if (!cpu_isset(cpu, cpu_online_map))
+                if (!cpu_online(cpu))
                        return ERR_PTR(-ENODEV);
                cpuctx = &per_cpu(perf_cpu_context, cpu);
@@ -1650,11 +1652,11 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
        ctx = perf_lock_task_context(task, &flags);
        if (ctx) {
                unclone_ctx(ctx);
-                spin_unlock_irqrestore(&ctx->lock, flags);
+                raw_spin_unlock_irqrestore(&ctx->lock, flags);
        }
        if (!ctx) {
-                ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+                ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
                err = -ENOMEM;
                if (!ctx)
                        goto errout;
@@ -1988,7 +1990,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
        if (!value)
                return -EINVAL;
-        spin_lock_irq(&ctx->lock);
+        raw_spin_lock_irq(&ctx->lock);
        if (event->attr.freq) {
                if (value > sysctl_perf_event_sample_rate) {
                        ret = -EINVAL;
@@ -2001,7 +2003,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
                event->hw.sample_period = value;
        }
 unlock:
-        spin_unlock_irq(&ctx->lock);
+        raw_spin_unlock_irq(&ctx->lock);
        return ret;
 }
@@ -4011,6 +4013,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
        event->pmu->read(event);
        data.addr = 0;
+        data.raw = NULL;
        data.period = event->hw.last_period;
        regs = get_irq_regs();
        /*
@@ -4080,8 +4083,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
        u64 now;
        now = cpu_clock(cpu);
-        prev = atomic64_read(&event->hw.prev_count);
+        prev = atomic64_xchg(&event->hw.prev_count, now);
-        atomic64_set(&event->hw.prev_count, now);
        atomic64_add(now - prev, &event->count);
 }
@@ -4286,15 +4288,8 @@ static void bp_perf_event_destroy(struct perf_event *event)
 static const struct pmu *bp_perf_event_init(struct perf_event *bp)
 {
        int err;
-        /*
-         * The breakpoint is already filled if we haven't created the counter
+        err = register_perf_hw_breakpoint(bp);
-         * through perf syscall
-         * FIXME: manage to get trigerred to NULL if it comes from syscalls
-         */
-        if (!bp->callback)
-                err = register_perf_hw_breakpoint(bp);
-        else
-                err = __register_perf_hw_breakpoint(bp);
        if (err)
                return ERR_PTR(err);
@@ -4308,6 +4303,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
        struct perf_sample_data sample;
        struct pt_regs *regs = data;
+        sample.raw = NULL;
        sample.addr = bp->attr.bp_addr;
        if (!perf_exclude_event(bp, regs))
@@ -4390,7 +4386,7 @@ perf_event_alloc(struct perf_event_attr *attr,
                   struct perf_event_context *ctx,
                   struct perf_event *group_leader,
                   struct perf_event *parent_event,
-                   perf_callback_t callback,
+                   perf_overflow_handler_t overflow_handler,
                   gfp_t gfpflags)
 {
        const struct pmu *pmu;
@@ -4433,10 +4429,10 @@ perf_event_alloc(struct perf_event_attr *attr,
        event->state            = PERF_EVENT_STATE_INACTIVE;
-        if (!callback && parent_event)
+        if (!overflow_handler && parent_event)
-                callback = parent_event->callback;
+                overflow_handler = parent_event->overflow_handler;
        
-        event->callback = callback;
+        event->overflow_handler = overflow_handler;
        if (attr->disabled)
                event->state = PERF_EVENT_STATE_OFF;
@@ -4571,7 +4567,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
        if (attr->type >= PERF_TYPE_MAX)
                return -EINVAL;
-        if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+        if (attr->__reserved_1 || attr->__reserved_2)
                return -EINVAL;
        if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@@ -4776,7 +4772,8 @@ err_put_context:
 */
 struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
-                                 pid_t pid, perf_callback_t callback)
+                                 pid_t pid,
+                                 perf_overflow_handler_t overflow_handler)
 {
        struct perf_event *event;
        struct perf_event_context *ctx;
@@ -4793,7 +4790,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        }
        event = perf_event_alloc(attr, cpu, ctx, NULL,
-                                     NULL, callback, GFP_KERNEL);
+                                 NULL, overflow_handler, GFP_KERNEL);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
                goto err_put_context;
@@ -4998,7 +4995,7 @@ void perf_event_exit_task(struct task_struct *child)
         * reading child->perf_event_ctxp, we wait until it has
         * incremented the context's refcount before we do put_ctx below.
         */
-        spin_lock(&child_ctx->lock);
+        raw_spin_lock(&child_ctx->lock);
        child->perf_event_ctxp = NULL;
        /*
         * If this context is a clone; unclone it so it can't get
@@ -5007,7 +5004,7 @@ void perf_event_exit_task(struct task_struct *child)
         */
        unclone_ctx(child_ctx);
        update_context_time(child_ctx);
-        spin_unlock_irqrestore(&child_ctx->lock, flags);
+        raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
        /*
         * Report the task dead after unscheduling the events so that we
@@ -5090,7 +5087,7 @@ again:
 */
 int perf_event_init_task(struct task_struct *child)
 {
-        struct perf_event_context *child_ctx, *parent_ctx;
+        struct perf_event_context *child_ctx = NULL, *parent_ctx;
        struct perf_event_context *cloned_ctx;
        struct perf_event *event;
        struct task_struct *parent = current;
@@ -5106,20 +5103,6 @@ int perf_event_init_task(struct task_struct *child)
                return 0;
        /*
-         * This is executed from the parent task context, so inherit
-         * events that have been marked for cloning.
-         * First allocate and initialize a context for the child.
-         */
-        child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
-        if (!child_ctx)
-                return -ENOMEM;
-        __perf_event_init_context(child_ctx, child);
-        child->perf_event_ctxp = child_ctx;
-        get_task_struct(child);
-        /*
         * If the parent's context is a clone, pin it so it won't get
         * swapped under us.
         */
@@ -5149,6 +5132,26 @@ int perf_event_init_task(struct task_struct *child)
                        continue;
                }
+                if (!child->perf_event_ctxp) {
+                        /*
+                         * This is executed from the parent task context, so
+                         * inherit events that have been marked for cloning.
+                         * First allocate and initialize a context for the
+                         * child.
+                         */
+                        child_ctx = kzalloc(sizeof(struct perf_event_context),
+                                            GFP_KERNEL);
+                        if (!child_ctx) {
+                                ret = -ENOMEM;
+                                goto exit;
+                        }
+                        __perf_event_init_context(child_ctx, child);
+                        child->perf_event_ctxp = child_ctx;
+                        get_task_struct(child);
+                }
                ret = inherit_group(event, parent, parent_ctx,
                                             child, child_ctx);
                if (ret) {
@@ -5177,6 +5180,7 @@ int perf_event_init_task(struct task_struct *child)
                get_ctx(child_ctx->parent_ctx);
        }
+exit:
        mutex_unlock(&parent_ctx->mutex);
        perf_unpin_context(parent_ctx);
@@ -5291,11 +5295,11 @@ perf_set_reserve_percpu(struct sysdev_class *class,
        perf_reserved_percpu = val;
        for_each_online_cpu(cpu) {
                cpuctx = &per_cpu(perf_cpu_context, cpu);
-                spin_lock_irq(&cpuctx->ctx.lock);
+                raw_spin_lock_irq(&cpuctx->ctx.lock);
                mpt = min(perf_max_events - cpuctx->ctx.nr_events,
                          perf_max_events - perf_reserved_percpu);
                cpuctx->max_pertask = mpt;
-                spin_unlock_irq(&cpuctx->ctx.lock);
+                raw_spin_unlock_irq(&cpuctx->ctx.lock);
        }
        spin_unlock(&perf_resource_lock);
diff --git a/kernel/pid.c b/kernel/pid.c
index d3f722d20f9c..2e17c9c92cbe 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -141,11 +141,12 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
                         * installing it:
                         */
                        spin_lock_irq(&pidmap_lock);
-                        if (map->page)
+                        if (!map->page) {
-                                kfree(page);
-                        else
                                map->page = page;
+                                page = NULL;
+                        }
                        spin_unlock_irq(&pidmap_lock);
+                        kfree(page);
                        if (unlikely(!map->page))
                                break;
                }
@@ -268,12 +269,11 @@ struct pid *alloc_pid(struct pid_namespace *ns)
        for (type = 0; type < PIDTYPE_MAX; ++type)
                INIT_HLIST_HEAD(&pid->tasks[type]);
+        upid = pid->numbers + ns->level;
        spin_lock_irq(&pidmap_lock);
-        for (i = ns->level; i >= 0; i--) {
+        for ( ; upid >= pid->numbers; --upid)
-                upid = &pid->numbers[i];
                hlist_add_head_rcu(&upid->pid_chain,
                                &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
-        }
        spin_unlock_irq(&pidmap_lock);
 out:
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 5187136fe1de..218e5af90156 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -6,7 +6,7 @@
 #include <linux/vt_kern.h>
 #include <linux/kbd_kern.h>
-#include <linux/console.h>
+#include <linux/vt.h>
 #include <linux/module.h>
 #include "power.h"
@@ -21,8 +21,7 @@ int pm_prepare_console(void)
        if (orig_fgconsole < 0)
                return 1;
-        orig_kmsg = kmsg_redirect;
+        orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE);
-        kmsg_redirect = SUSPEND_CONSOLE;
        return 0;
 }
@@ -30,7 +29,7 @@ void pm_restore_console(void)
 {
        if (orig_fgconsole >= 0) {
                vt_move_to_console(orig_fgconsole, 0);
-                kmsg_redirect = orig_kmsg;
+                vt_kmsg_redirect(orig_kmsg);
        }
 }
 #endif
diff --git a/kernel/printk.c b/kernel/printk.c
index b5ac4d99c667..17463ca2e229 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -34,6 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
 #include <linux/ratelimit.h>
+#include <linux/kmsg_dump.h>
 #include <asm/uaccess.h>
@@ -1405,4 +1406,122 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
        return false;
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
+static DEFINE_SPINLOCK(dump_list_lock);
+static LIST_HEAD(dump_list);
+/**
+ * kmsg_dump_register - register a kernel log dumper.
+ * @dumper: pointer to the kmsg_dumper structure
+ *
+ * Adds a kernel log dumper to the system. The dump callback in the
+ * structure will be called when the kernel oopses or panics and must be
+ * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
+ */
+int kmsg_dump_register(struct kmsg_dumper *dumper)
+{
+        unsigned long flags;
+        int err = -EBUSY;
+        /* The dump callback needs to be set */
+        if (!dumper->dump)
+                return -EINVAL;
+        spin_lock_irqsave(&dump_list_lock, flags);
+        /* Don't allow registering multiple times */
+        if (!dumper->registered) {
+                dumper->registered = 1;
+                list_add_tail(&dumper->list, &dump_list);
+                err = 0;
+        }
+        spin_unlock_irqrestore(&dump_list_lock, flags);
+        return err;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_register);
+/**
+ * kmsg_dump_unregister - unregister a kmsg dumper.
+ * @dumper: pointer to the kmsg_dumper structure
+ *
+ * Removes a dump device from the system. Returns zero on success and
+ * %-EINVAL otherwise.
+ */
+int kmsg_dump_unregister(struct kmsg_dumper *dumper)
+{
+        unsigned long flags;
+        int err = -EINVAL;
+        spin_lock_irqsave(&dump_list_lock, flags);
+        if (dumper->registered) {
+                dumper->registered = 0;
+                list_del(&dumper->list);
+                err = 0;
+        }
+        spin_unlock_irqrestore(&dump_list_lock, flags);
+        return err;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
+static const char const *kmsg_reasons[] = {
+        [KMSG_DUMP_OOPS]        = "oops",
+        [KMSG_DUMP_PANIC]       = "panic",
+};
+static const char *kmsg_to_str(enum kmsg_dump_reason reason)
+{
+        if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0)
+                return "unknown";
+        return kmsg_reasons[reason];
+}
+/**
+ * kmsg_dump - dump kernel log to kernel message dumpers.
+ * @reason: the reason (oops, panic etc) for dumping
+ *
+ * Iterate through each of the dump devices and call the oops/panic
+ * callbacks with the log buffer.
+ */
+void kmsg_dump(enum kmsg_dump_reason reason)
+{
+        unsigned long end;
+        unsigned chars;
+        struct kmsg_dumper *dumper;
+        const char *s1, *s2;
+        unsigned long l1, l2;
+        unsigned long flags;
+        /* Theoretically, the log could move on after we do this, but
+           there's not a lot we can do about that. The new messages
+           will overwrite the start of what we dump. */
+        spin_lock_irqsave(&logbuf_lock, flags);
+        end = log_end & LOG_BUF_MASK;
+        chars = logged_chars;
+        spin_unlock_irqrestore(&logbuf_lock, flags);
+        if (logged_chars > end) {
+                s1 = log_buf + log_buf_len - logged_chars + end;
+                l1 = logged_chars - end;
+                s2 = log_buf;
+                l2 = end;
+        } else {
+                s1 = "";
+                l1 = 0;
+                s2 = log_buf + end - logged_chars;
+                l2 = logged_chars;
+        }
+        if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
+                printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n",
+                                kmsg_to_str(reason));
+                return;
+        }
+        list_for_each_entry(dumper, &dump_list, list)
+                dumper->dump(dumper, reason, s1, l1, s2, l2);
+        spin_unlock_irqrestore(&dump_list_lock, flags);
+}
 #endif
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index a621a67ef4e3..9bb52177af02 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -763,13 +763,13 @@ static void rcu_torture_timer(unsigned long unused)
                /* Should not happen, but... */
                pipe_count = RCU_TORTURE_PIPE_LEN;
        }
-        ++__get_cpu_var(rcu_torture_count)[pipe_count];
+        __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
        completed = cur_ops->completed() - completed;
        if (completed > RCU_TORTURE_PIPE_LEN) {
                /* Should not happen, but... */
                completed = RCU_TORTURE_PIPE_LEN;
        }
-        ++__get_cpu_var(rcu_torture_batch)[completed];
+        __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
        preempt_enable();
        cur_ops->readunlock(idx);
 }
@@ -818,13 +818,13 @@ rcu_torture_reader(void *arg)
                        /* Should not happen, but... */
                        pipe_count = RCU_TORTURE_PIPE_LEN;
                }
-                ++__get_cpu_var(rcu_torture_count)[pipe_count];
+                __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
                completed = cur_ops->completed() - completed;
                if (completed > RCU_TORTURE_PIPE_LEN) {
                        /* Should not happen, but... */
                        completed = RCU_TORTURE_PIPE_LEN;
                }
-                ++__get_cpu_var(rcu_torture_batch)[completed];
+                __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
                preempt_enable();
                cur_ops->readunlock(idx);
                schedule();
diff --git a/kernel/relay.c b/kernel/relay.c
index 760c26209a3c..c705a41b4ba3 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1198,7 +1198,7 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
        relay_consume_bytes(rbuf, buf->private);
 }
-static struct pipe_buf_operations relay_pipe_buf_ops = {
+static const struct pipe_buf_operations relay_pipe_buf_ops = {
        .can_merge = 0,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
diff --git a/kernel/resource.c b/kernel/resource.c
index fb11a58b9594..dc15686b7a77 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -308,35 +308,37 @@ static int find_resource(struct resource *root, struct resource *new,
                         void *alignf_data)
 {
        struct resource *this = root->child;
+        resource_size_t start, end;
-        new->start = root->start;
+        start = root->start;
        /*
         * Skip past an allocated resource that starts at 0, since the assignment
         * of this->start - 1 to new->end below would cause an underflow.
         */
        if (this && this->start == 0) {
-                new->start = this->end + 1;
+                start = this->end + 1;
                this = this->sibling;
        }
        for(;;) {
                if (this)
-                        new->end = this->start - 1;
+                        end = this->start - 1;
                else
-                        new->end = root->end;
+                        end = root->end;
-                if (new->start < min)
+                if (start < min)
-                        new->start = min;
+                        start = min;
-                if (new->end > max)
+                if (end > max)
-                        new->end = max;
+                        end = max;
-                new->start = ALIGN(new->start, align);
+                start = ALIGN(start, align);
                if (alignf)
                        alignf(alignf_data, new, size, align);
-                if (new->start < new->end && new->end - new->start >= size - 1) {
+                if (start < end && end - start >= size - 1) {
-                        new->end = new->start + size - 1;
+                        new->start = start;
+                        new->end = start + size - 1;
                        return 0;
                }
                if (!this)
                        break;
-                new->start = this->end + 1;
+                start = this->end + 1;
                this = this->sibling;
        }
        return -EBUSY;
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 5fcb4fe645e2..ddabb54bb5c8 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -37,8 +37,8 @@ do {								\
        if (rt_trace_on) {                                      \
                rt_trace_on = 0;                                \
                console_verbose();                              \
-                if (spin_is_locked(&current->pi_lock))          \
+                if (raw_spin_is_locked(&current->pi_lock))      \
-                        spin_unlock(&current->pi_lock);         \
+                        raw_spin_unlock(&current->pi_lock);     \
        }                                                       \
 } while (0)
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 29bd4baf9e75..a9604815786a 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -138,9 +138,9 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
 {
        unsigned long flags;
-        spin_lock_irqsave(&task->pi_lock, flags);
+        raw_spin_lock_irqsave(&task->pi_lock, flags);
        __rt_mutex_adjust_prio(task);
-        spin_unlock_irqrestore(&task->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 }
 /*
@@ -195,7 +195,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        /*
         * Task can not go away as we did a get_task() before !
         */
-        spin_lock_irqsave(&task->pi_lock, flags);
+        raw_spin_lock_irqsave(&task->pi_lock, flags);
        waiter = task->pi_blocked_on;
        /*
@@ -231,8 +231,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                goto out_unlock_pi;
        lock = waiter->lock;
-        if (!spin_trylock(&lock->wait_lock)) {
+        if (!raw_spin_trylock(&lock->wait_lock)) {
-                spin_unlock_irqrestore(&task->pi_lock, flags);
+                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
                cpu_relax();
                goto retry;
        }
@@ -240,7 +240,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        /* Deadlock detection */
        if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
                debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
-                spin_unlock(&lock->wait_lock);
+                raw_spin_unlock(&lock->wait_lock);
                ret = deadlock_detect ? -EDEADLK : 0;
                goto out_unlock_pi;
        }
@@ -253,13 +253,13 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        plist_add(&waiter->list_entry, &lock->wait_list);
        /* Release the task */
-        spin_unlock_irqrestore(&task->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
        put_task_struct(task);
        /* Grab the next task */
        task = rt_mutex_owner(lock);
        get_task_struct(task);
-        spin_lock_irqsave(&task->pi_lock, flags);
+        raw_spin_lock_irqsave(&task->pi_lock, flags);
        if (waiter == rt_mutex_top_waiter(lock)) {
                /* Boost the owner */
@@ -277,10 +277,10 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                __rt_mutex_adjust_prio(task);
        }
-        spin_unlock_irqrestore(&task->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
        top_waiter = rt_mutex_top_waiter(lock);
-        spin_unlock(&lock->wait_lock);
+        raw_spin_unlock(&lock->wait_lock);
        if (!detect_deadlock && waiter != top_waiter)
                goto out_put_task;
@@ -288,7 +288,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        goto again;
 out_unlock_pi:
-        spin_unlock_irqrestore(&task->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 out_put_task:
        put_task_struct(task);
@@ -313,9 +313,9 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
        if (pendowner == task)
                return 1;
-        spin_lock_irqsave(&pendowner->pi_lock, flags);
+        raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
        if (task->prio >= pendowner->prio) {
-                spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+                raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
                return 0;
        }
@@ -325,7 +325,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
         * priority.
         */
        if (likely(!rt_mutex_has_waiters(lock))) {
-                spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+                raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
                return 1;
        }
@@ -333,7 +333,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
        next = rt_mutex_top_waiter(lock);
        plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
        __rt_mutex_adjust_prio(pendowner);
-        spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
        /*
         * We are going to steal the lock and a waiter was
@@ -350,10 +350,10 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
         * might be task:
         */
        if (likely(next->task != task)) {
-                spin_lock_irqsave(&task->pi_lock, flags);
+                raw_spin_lock_irqsave(&task->pi_lock, flags);
                plist_add(&next->pi_list_entry, &task->pi_waiters);
                __rt_mutex_adjust_prio(task);
-                spin_unlock_irqrestore(&task->pi_lock, flags);
+                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
        }
        return 1;
 }
@@ -420,7 +420,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        unsigned long flags;
        int chain_walk = 0, res;
-        spin_lock_irqsave(&task->pi_lock, flags);
+        raw_spin_lock_irqsave(&task->pi_lock, flags);
        __rt_mutex_adjust_prio(task);
        waiter->task = task;
        waiter->lock = lock;
@@ -434,17 +434,17 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        task->pi_blocked_on = waiter;
-        spin_unlock_irqrestore(&task->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
        if (waiter == rt_mutex_top_waiter(lock)) {
-                spin_lock_irqsave(&owner->pi_lock, flags);
+                raw_spin_lock_irqsave(&owner->pi_lock, flags);
                plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
                plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
                __rt_mutex_adjust_prio(owner);
                if (owner->pi_blocked_on)
                        chain_walk = 1;
-                spin_unlock_irqrestore(&owner->pi_lock, flags);
+                raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
        }
        else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
                chain_walk = 1;
@@ -459,12 +459,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
         */
        get_task_struct(owner);
-        spin_unlock(&lock->wait_lock);
+        raw_spin_unlock(&lock->wait_lock);
        res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
                                         task);
-        spin_lock(&lock->wait_lock);
+        raw_spin_lock(&lock->wait_lock);
        return res;
 }
@@ -483,7 +483,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
        struct task_struct *pendowner;
        unsigned long flags;
-        spin_lock_irqsave(&current->pi_lock, flags);
+        raw_spin_lock_irqsave(&current->pi_lock, flags);
        waiter = rt_mutex_top_waiter(lock);
        plist_del(&waiter->list_entry, &lock->wait_list);
@@ -500,7 +500,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
        rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
-        spin_unlock_irqrestore(&current->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&current->pi_lock, flags);
        /*
         * Clear the pi_blocked_on variable and enqueue a possible
@@ -509,7 +509,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
         * waiter with higher priority than pending-owner->normal_prio
         * is blocked on the unboosted (pending) owner.
         */
-        spin_lock_irqsave(&pendowner->pi_lock, flags);
+        raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
        WARN_ON(!pendowner->pi_blocked_on);
        WARN_ON(pendowner->pi_blocked_on != waiter);
@@ -523,7 +523,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
                next = rt_mutex_top_waiter(lock);
                plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
        }
-        spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
        wake_up_process(pendowner);
 }
@@ -541,15 +541,15 @@ static void remove_waiter(struct rt_mutex *lock,
        unsigned long flags;
        int chain_walk = 0;
-        spin_lock_irqsave(&current->pi_lock, flags);
+        raw_spin_lock_irqsave(&current->pi_lock, flags);
        plist_del(&waiter->list_entry, &lock->wait_list);
        waiter->task = NULL;
        current->pi_blocked_on = NULL;
-        spin_unlock_irqrestore(&current->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&current->pi_lock, flags);
        if (first && owner != current) {
-                spin_lock_irqsave(&owner->pi_lock, flags);
+                raw_spin_lock_irqsave(&owner->pi_lock, flags);
                plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
@@ -564,7 +564,7 @@ static void remove_waiter(struct rt_mutex *lock,
                if (owner->pi_blocked_on)
                        chain_walk = 1;
-                spin_unlock_irqrestore(&owner->pi_lock, flags);
+                raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
        }
        WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
@@ -575,11 +575,11 @@ static void remove_waiter(struct rt_mutex *lock,
        /* gets dropped in rt_mutex_adjust_prio_chain()! */
        get_task_struct(owner);
-        spin_unlock(&lock->wait_lock);
+        raw_spin_unlock(&lock->wait_lock);
        rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
-        spin_lock(&lock->wait_lock);
+        raw_spin_lock(&lock->wait_lock);
 }
 /*
@@ -592,15 +592,15 @@ void rt_mutex_adjust_pi(struct task_struct *task)
        struct rt_mutex_waiter *waiter;
        unsigned long flags;
-        spin_lock_irqsave(&task->pi_lock, flags);
+        raw_spin_lock_irqsave(&task->pi_lock, flags);
        waiter = task->pi_blocked_on;
        if (!waiter || waiter->list_entry.prio == task->prio) {
-                spin_unlock_irqrestore(&task->pi_lock, flags);
+                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
                return;
        }
-        spin_unlock_irqrestore(&task->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
        /* gets dropped in rt_mutex_adjust_prio_chain()! */
        get_task_struct(task);
@@ -672,14 +672,14 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
                                break;
                }
-                spin_unlock(&lock->wait_lock);
+                raw_spin_unlock(&lock->wait_lock);
                debug_rt_mutex_print_deadlock(waiter);
                if (waiter->task)
                        schedule_rt_mutex(lock);
-                spin_lock(&lock->wait_lock);
+                raw_spin_lock(&lock->wait_lock);
                set_current_state(state);
        }
@@ -700,11 +700,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
        debug_rt_mutex_init_waiter(&waiter);
        waiter.task = NULL;
-        spin_lock(&lock->wait_lock);
+        raw_spin_lock(&lock->wait_lock);
        /* Try to acquire the lock again: */
        if (try_to_take_rt_mutex(lock)) {
-                spin_unlock(&lock->wait_lock);
+                raw_spin_unlock(&lock->wait_lock);
                return 0;
        }
@@ -731,7 +731,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
         */
        fixup_rt_mutex_waiters(lock);
-        spin_unlock(&lock->wait_lock);
+        raw_spin_unlock(&lock->wait_lock);
        /* Remove pending timer: */
        if (unlikely(timeout))
@@ -758,7 +758,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
 {
        int ret = 0;
-        spin_lock(&lock->wait_lock);
+        raw_spin_lock(&lock->wait_lock);
        if (likely(rt_mutex_owner(lock) != current)) {
@@ -770,7 +770,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
                fixup_rt_mutex_waiters(lock);
        }
-        spin_unlock(&lock->wait_lock);
+        raw_spin_unlock(&lock->wait_lock);
        return ret;
 }
@@ -781,7 +781,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
 static void __sched
 rt_mutex_slowunlock(struct rt_mutex *lock)
 {
-        spin_lock(&lock->wait_lock);
+        raw_spin_lock(&lock->wait_lock);
        debug_rt_mutex_unlock(lock);
@@ -789,13 +789,13 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
        if (!rt_mutex_has_waiters(lock)) {
                lock->owner = NULL;
-                spin_unlock(&lock->wait_lock);
+                raw_spin_unlock(&lock->wait_lock);
                return;
        }
        wakeup_next_waiter(lock);
-        spin_unlock(&lock->wait_lock);
+        raw_spin_unlock(&lock->wait_lock);
        /* Undo pi boosting if necessary: */
        rt_mutex_adjust_prio(current);
@@ -970,8 +970,8 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
 {
        lock->owner = NULL;
-        spin_lock_init(&lock->wait_lock);
+        raw_spin_lock_init(&lock->wait_lock);
-        plist_head_init(&lock->wait_list, &lock->wait_lock);
+        plist_head_init_raw(&lock->wait_list, &lock->wait_lock);
        debug_rt_mutex_init(lock, name);
 }
@@ -1032,7 +1032,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
-        spin_lock(&lock->wait_lock);
+        raw_spin_lock(&lock->wait_lock);
        mark_rt_mutex_waiters(lock);
@@ -1040,7 +1040,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
                /* We got the lock for task. */
                debug_rt_mutex_lock(lock);
                rt_mutex_set_owner(lock, task, 0);
-                spin_unlock(&lock->wait_lock);
+                raw_spin_unlock(&lock->wait_lock);
                rt_mutex_deadlock_account_lock(lock, task);
                return 1;
        }
@@ -1056,7 +1056,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
                 */
                ret = 0;
        }
-        spin_unlock(&lock->wait_lock);
+        raw_spin_unlock(&lock->wait_lock);
        debug_rt_mutex_print_deadlock(waiter);
@@ -1106,7 +1106,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
-        spin_lock(&lock->wait_lock);
+        raw_spin_lock(&lock->wait_lock);
        set_current_state(TASK_INTERRUPTIBLE);
@@ -1124,7 +1124,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
         */
        fixup_rt_mutex_waiters(lock);
-        spin_unlock(&lock->wait_lock);
+        raw_spin_unlock(&lock->wait_lock);
        /*
         * Readjust priority, when we did not get the lock. We might have been
diff --git a/kernel/sched.c b/kernel/sched.c
index e7f2cfa6a257..720df108a2d6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -26,6 +26,8 @@
 *              Thomas Gleixner, Mike Kravetz
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
@@ -141,7 +143,7 @@ struct rt_prio_array {
 struct rt_bandwidth {
        /* nests inside the rq lock: */
-        spinlock_t              rt_runtime_lock;
+        raw_spinlock_t          rt_runtime_lock;
        ktime_t                 rt_period;
        u64                     rt_runtime;
        struct hrtimer          rt_period_timer;
@@ -178,7 +180,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
        rt_b->rt_period = ns_to_ktime(period);
        rt_b->rt_runtime = runtime;
-        spin_lock_init(&rt_b->rt_runtime_lock);
+        raw_spin_lock_init(&rt_b->rt_runtime_lock);
        hrtimer_init(&rt_b->rt_period_timer,
                        CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -200,7 +202,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
        if (hrtimer_active(&rt_b->rt_period_timer))
                return;
-        spin_lock(&rt_b->rt_runtime_lock);
+        raw_spin_lock(&rt_b->rt_runtime_lock);
        for (;;) {
                unsigned long delta;
                ktime_t soft, hard;
@@ -217,7 +219,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
                __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
                                HRTIMER_MODE_ABS_PINNED, 0);
        }
-        spin_unlock(&rt_b->rt_runtime_lock);
+        raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -298,7 +300,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq);
 #ifdef CONFIG_RT_GROUP_SCHED
 static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var);
 #endif /* CONFIG_RT_GROUP_SCHED */
 #else /* !CONFIG_USER_SCHED */
 #define root_task_group init_task_group
@@ -470,7 +472,7 @@ struct rt_rq {
        u64 rt_time;
        u64 rt_runtime;
        /* Nests inside the rq lock: */
-        spinlock_t rt_runtime_lock;
+        raw_spinlock_t rt_runtime_lock;
 #ifdef CONFIG_RT_GROUP_SCHED
        unsigned long rt_nr_boosted;
@@ -525,7 +527,7 @@ static struct root_domain def_root_domain;
 */
 struct rq {
        /* runqueue lock: */
-        spinlock_t lock;
+        raw_spinlock_t lock;
        /*
         * nr_running and cpu_load should be in the same cacheline because
@@ -685,7 +687,7 @@ inline void update_rq_clock(struct rq *rq)
 */
 int runqueue_is_locked(int cpu)
 {
-        return spin_is_locked(&cpu_rq(cpu)->lock);
+        return raw_spin_is_locked(&cpu_rq(cpu)->lock);
 }
 /*
@@ -814,6 +816,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
 * default: 0.25ms
 */
 unsigned int sysctl_sched_shares_ratelimit = 250000;
+unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
 /*
 * Inject some fuzzyness into changing the per-cpu group shares
@@ -892,7 +895,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
         */
        spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-        spin_unlock_irq(&rq->lock);
+        raw_spin_unlock_irq(&rq->lock);
 }
 #else /* __ARCH_WANT_UNLOCKED_CTXSW */
@@ -916,9 +919,9 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
        next->oncpu = 1;
 #endif
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
-        spin_unlock_irq(&rq->lock);
+        raw_spin_unlock_irq(&rq->lock);
 #else
-        spin_unlock(&rq->lock);
+        raw_spin_unlock(&rq->lock);
 #endif
 }
@@ -948,10 +951,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
 {
        for (;;) {
                struct rq *rq = task_rq(p);
-                spin_lock(&rq->lock);
+                raw_spin_lock(&rq->lock);
                if (likely(rq == task_rq(p)))
                        return rq;
-                spin_unlock(&rq->lock);
+                raw_spin_unlock(&rq->lock);
        }
 }
@@ -968,10 +971,10 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
        for (;;) {
                local_irq_save(*flags);
                rq = task_rq(p);
-                spin_lock(&rq->lock);
+                raw_spin_lock(&rq->lock);
                if (likely(rq == task_rq(p)))
                        return rq;
-                spin_unlock_irqrestore(&rq->lock, *flags);
+                raw_spin_unlock_irqrestore(&rq->lock, *flags);
        }
 }
@@ -980,19 +983,19 @@ void task_rq_unlock_wait(struct task_struct *p)
        struct rq *rq = task_rq(p);
        smp_mb(); /* spin-unlock-wait is not a full memory barrier */
-        spin_unlock_wait(&rq->lock);
+        raw_spin_unlock_wait(&rq->lock);
 }
 static void __task_rq_unlock(struct rq *rq)
        __releases(rq->lock)
 {
-        spin_unlock(&rq->lock);
+        raw_spin_unlock(&rq->lock);
 }
 static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
        __releases(rq->lock)
 {
-        spin_unlock_irqrestore(&rq->lock, *flags);
+        raw_spin_unlock_irqrestore(&rq->lock, *flags);
 }
 /*
@@ -1005,7 +1008,7 @@ static struct rq *this_rq_lock(void)
        local_irq_disable();
        rq = this_rq();
-        spin_lock(&rq->lock);
+        raw_spin_lock(&rq->lock);
        return rq;
 }
@@ -1052,10 +1055,10 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
        WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-        spin_lock(&rq->lock);
+        raw_spin_lock(&rq->lock);
        update_rq_clock(rq);
        rq->curr->sched_class->task_tick(rq, rq->curr, 1);
-        spin_unlock(&rq->lock);
+        raw_spin_unlock(&rq->lock);
        return HRTIMER_NORESTART;
 }
@@ -1068,10 +1071,10 @@ static void __hrtick_start(void *arg)
 {
        struct rq *rq = arg;
-        spin_lock(&rq->lock);
+        raw_spin_lock(&rq->lock);
        hrtimer_restart(&rq->hrtick_timer);
        rq->hrtick_csd_pending = 0;
-        spin_unlock(&rq->lock);
+        raw_spin_unlock(&rq->lock);
 }
 /*
@@ -1178,7 +1181,7 @@ static void resched_task(struct task_struct *p)
 {
        int cpu;
-        assert_spin_locked(&task_rq(p)->lock);
+        assert_raw_spin_locked(&task_rq(p)->lock);
        if (test_tsk_need_resched(p))
                return;
@@ -1200,10 +1203,10 @@ static void resched_cpu(int cpu)
        struct rq *rq = cpu_rq(cpu);
        unsigned long flags;
-        if (!spin_trylock_irqsave(&rq->lock, flags))
+        if (!raw_spin_trylock_irqsave(&rq->lock, flags))
                return;
        resched_task(cpu_curr(cpu));
-        spin_unlock_irqrestore(&rq->lock, flags);
+        raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 #ifdef CONFIG_NO_HZ
@@ -1272,7 +1275,7 @@ static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 #else /* !CONFIG_SMP */
 static void resched_task(struct task_struct *p)
 {
-        assert_spin_locked(&task_rq(p)->lock);
+        assert_raw_spin_locked(&task_rq(p)->lock);
        set_tsk_need_resched(p);
 }
@@ -1599,11 +1602,11 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
                struct rq *rq = cpu_rq(cpu);
                unsigned long flags;
-                spin_lock_irqsave(&rq->lock, flags);
+                raw_spin_lock_irqsave(&rq->lock, flags);
                tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
                tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
                __set_se_shares(tg->se[cpu], shares);
-                spin_unlock_irqrestore(&rq->lock, flags);
+                raw_spin_unlock_irqrestore(&rq->lock, flags);
        }
 }
@@ -1614,7 +1617,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
 */
 static int tg_shares_up(struct task_group *tg, void *data)
 {
-        unsigned long weight, rq_weight = 0, shares = 0;
+        unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
        unsigned long *usd_rq_weight;
        struct sched_domain *sd = data;
        unsigned long flags;
@@ -1630,6 +1633,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
                weight = tg->cfs_rq[i]->load.weight;
                usd_rq_weight[i] = weight;
+                rq_weight += weight;
                /*
                 * If there are currently no tasks on the cpu pretend there
                 * is one of average load so that when a new task gets to
@@ -1638,10 +1642,13 @@ static int tg_shares_up(struct task_group *tg, void *data)
                if (!weight)
                        weight = NICE_0_LOAD;
-                rq_weight += weight;
+                sum_weight += weight;
                shares += tg->cfs_rq[i]->shares;
        }
+        if (!rq_weight)
+                rq_weight = sum_weight;
        if ((!shares && rq_weight) || shares > tg->shares)
                shares = tg->shares;
@@ -1701,9 +1708,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
        if (root_task_group_empty())
                return;
-        spin_unlock(&rq->lock);
+        raw_spin_unlock(&rq->lock);
        update_shares(sd);
-        spin_lock(&rq->lock);
+        raw_spin_lock(&rq->lock);
 }
 static void update_h_load(long cpu)
@@ -1743,7 +1750,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
        __acquires(busiest->lock)
        __acquires(this_rq->lock)
 {
-        spin_unlock(&this_rq->lock);
+        raw_spin_unlock(&this_rq->lock);
        double_rq_lock(this_rq, busiest);
        return 1;
@@ -1764,14 +1771,16 @@ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 {
        int ret = 0;
-        if (unlikely(!spin_trylock(&busiest->lock))) {
+        if (unlikely(!raw_spin_trylock(&busiest->lock))) {
                if (busiest < this_rq) {
-                        spin_unlock(&this_rq->lock);
+                        raw_spin_unlock(&this_rq->lock);
-                        spin_lock(&busiest->lock);
+                        raw_spin_lock(&busiest->lock);
-                        spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
+                        raw_spin_lock_nested(&this_rq->lock,
+                                              SINGLE_DEPTH_NESTING);
                        ret = 1;
                } else
-                        spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+                        raw_spin_lock_nested(&busiest->lock,
+                                              SINGLE_DEPTH_NESTING);
        }
        return ret;
 }
@@ -1785,7 +1794,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
 {
        if (unlikely(!irqs_disabled())) {
                /* printk() doesn't work good under rq->lock */
-                spin_unlock(&this_rq->lock);
+                raw_spin_unlock(&this_rq->lock);
                BUG_ON(1);
        }
@@ -1795,7 +1804,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
 static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(busiest->lock)
 {
-        spin_unlock(&busiest->lock);
+        raw_spin_unlock(&busiest->lock);
        lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
 }
 #endif
@@ -1810,6 +1819,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
 #endif
 static void calc_load_account_active(struct rq *this_rq);
+static void update_sysctl(void);
+static int get_update_sysctl_factor(void);
+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+        set_task_rq(p, cpu);
+#ifdef CONFIG_SMP
+        /*
+         * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+         * successfuly executed on another CPU. We must ensure that updates of
+         * per-task data have been completed by this moment.
+         */
+        smp_wmb();
+        task_thread_info(p)->cpu = cpu;
+#endif
+}
 #include "sched_stats.h"
 #include "sched_idletask.c"
@@ -1967,20 +1992,6 @@ inline int task_curr(const struct task_struct *p)
        return cpu_curr(task_cpu(p)) == p;
 }
-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-{
-        set_task_rq(p, cpu);
-#ifdef CONFIG_SMP
-        /*
-         * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
-         * successfuly executed on another CPU. We must ensure that updates of
-         * per-task data have been completed by this moment.
-         */
-        smp_wmb();
-        task_thread_info(p)->cpu = cpu;
-#endif
-}
 static inline void check_class_changed(struct rq *rq, struct task_struct *p,
                                       const struct sched_class *prev_class,
                                       int oldprio, int running)
@@ -1993,39 +2004,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
                p->sched_class->prio_changed(rq, p, oldprio, running);
 }
-/**
- * kthread_bind - bind a just-created kthread to a cpu.
- * @p: thread created by kthread_create().
- * @cpu: cpu (might not be online, must be possible) for @k to run on.
- *
- * Description: This function is equivalent to set_cpus_allowed(),
- * except that @cpu doesn't need to be online, and the thread must be
- * stopped (i.e., just returned from kthread_create()).
- *
- * Function lives here instead of kthread.c because it messes with
- * scheduler internals which require locking.
- */
-void kthread_bind(struct task_struct *p, unsigned int cpu)
-{
-        struct rq *rq = cpu_rq(cpu);
-        unsigned long flags;
-        /* Must have done schedule() in kthread() before we set_task_cpu */
-        if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
-                WARN_ON(1);
-                return;
-        }
-        spin_lock_irqsave(&rq->lock, flags);
-        update_rq_clock(rq);
-        set_task_cpu(p, cpu);
-        p->cpus_allowed = cpumask_of_cpu(cpu);
-        p->rt.nr_cpus_allowed = 1;
-        p->flags |= PF_THREAD_BOUND;
-        spin_unlock_irqrestore(&rq->lock, flags);
-}
-EXPORT_SYMBOL(kthread_bind);
 #ifdef CONFIG_SMP
 /*
 * Is this task likely cache-hot:
@@ -2035,6 +2013,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 {
        s64 delta;
+        if (p->sched_class != &fair_sched_class)
+                return 0;
        /*
         * Buddy candidates are cache hot:
         */
@@ -2043,9 +2024,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
                         &p->se == cfs_rq_of(&p->se)->last))
                return 1;
-        if (p->sched_class != &fair_sched_class)
-                return 0;
        if (sysctl_sched_migration_cost == -1)
                return 1;
        if (sysctl_sched_migration_cost == 0)
@@ -2056,38 +2034,24 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
        return delta < (s64)sysctl_sched_migration_cost;
 }
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
-        int old_cpu = task_cpu(p);
+#ifdef CONFIG_SCHED_DEBUG
-        struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
+        /*
-        struct cfs_rq *old_cfsrq = task_cfs_rq(p),
+         * We should never call set_task_cpu() on a blocked task,
-                      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
+         * ttwu() will sort out the placement.
-        u64 clock_offset;
+         */
+        WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-        clock_offset = old_rq->clock - new_rq->clock;
+                        !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
+#endif
        trace_sched_migrate_task(p, new_cpu);
-#ifdef CONFIG_SCHEDSTATS
+        if (task_cpu(p) == new_cpu)
-        if (p->se.wait_start)
+                return;
-                p->se.wait_start -= clock_offset;
-        if (p->se.sleep_start)
+        p->se.nr_migrations++;
-                p->se.sleep_start -= clock_offset;
+        perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
-        if (p->se.block_start)
-                p->se.block_start -= clock_offset;
-#endif
-        if (old_cpu != new_cpu) {
-                p->se.nr_migrations++;
-#ifdef CONFIG_SCHEDSTATS
-                if (task_hot(p, old_rq->clock, NULL))
-                        schedstat_inc(p, se.nr_forced2_migrations);
-#endif
-                perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
-                                     1, 1, NULL, 0);
-        }
-        p->se.vruntime -= old_cfsrq->min_vruntime -
-                                         new_cfsrq->min_vruntime;
        __set_task_cpu(p, new_cpu);
 }
@@ -2112,13 +2076,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
        /*
         * If the task is not on a runqueue (and not running), then
-         * it is sufficient to simply update the task's cpu field.
+         * the next wake-up will properly place the task.
         */
-        if (!p->se.on_rq && !task_running(rq, p)) {
+        if (!p->se.on_rq && !task_running(rq, p))
-                update_rq_clock(rq);
-                set_task_cpu(p, dest_cpu);
                return 0;
-        }
        init_completion(&req->done);
        req->task = p;
@@ -2323,6 +2284,77 @@ void task_oncpu_function_call(struct task_struct *p,
        preempt_enable();
 }
+#ifdef CONFIG_SMP
+static int select_fallback_rq(int cpu, struct task_struct *p)
+{
+        int dest_cpu;
+        const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+        /* Look for allowed, online CPU in same node. */
+        for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+                if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+                        return dest_cpu;
+        /* Any allowed, online CPU? */
+        dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+        if (dest_cpu < nr_cpu_ids)
+                return dest_cpu;
+        /* No more Mr. Nice Guy. */
+        if (dest_cpu >= nr_cpu_ids) {
+                rcu_read_lock();
+                cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+                rcu_read_unlock();
+                dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
+                /*
+                 * Don't tell them about moving exiting tasks or
+                 * kernel threads (both mm NULL), since they never
+                 * leave kernel.
+                 */
+                if (p->mm && printk_ratelimit()) {
+                        printk(KERN_INFO "process %d (%s) no "
+                               "longer affine to cpu%d\n",
+                               task_pid_nr(p), p->comm, cpu);
+                }
+        }
+        return dest_cpu;
+}
+/*
+ * Called from:
+ *
+ *  - fork, @p is stable because it isn't on the tasklist yet
+ *
+ *  - exec, @p is unstable, retry loop
+ *
+ *  - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
+ *             we should be good.
+ */
+static inline
+int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+{
+        int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+        /*
+         * In order not to call set_task_cpu() on a blocking task we need
+         * to rely on ttwu() to place the task on a valid ->cpus_allowed
+         * cpu.
+         *
+         * Since this is common to all placement strategies, this lives here.
+         *
+         * [ this allows ->select_task() to simply return task_cpu(p) and
+         *   not worry about this generic constraint ]
+         */
+        if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+                     !cpu_active(cpu)))
+                cpu = select_fallback_rq(task_cpu(p), p);
+        return cpu;
+}
+#endif
 /***
 * try_to_wake_up - wake up a thread
 * @p: the to-be-woken-up thread
@@ -2374,17 +2406,18 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
        if (task_contributes_to_load(p))
                rq->nr_uninterruptible--;
        p->state = TASK_WAKING;
-        task_rq_unlock(rq, &flags);
-        cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+        if (p->sched_class->task_waking)
-        if (cpu != orig_cpu) {
+                p->sched_class->task_waking(rq, p);
-                local_irq_save(flags);
-                rq = cpu_rq(cpu);
+        __task_rq_unlock(rq);
-                update_rq_clock(rq);
+        cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+        if (cpu != orig_cpu)
                set_task_cpu(p, cpu);
-                local_irq_restore(flags);
-        }
+        rq = __task_rq_lock(p);
-        rq = task_rq_lock(p, &flags);
+        update_rq_clock(rq);
        WARN_ON(p->state != TASK_WAKING);
        cpu = task_cpu(p);
@@ -2440,8 +2473,8 @@ out_running:
        p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
-        if (p->sched_class->task_wake_up)
+        if (p->sched_class->task_woken)
-                p->sched_class->task_wake_up(rq, p);
+                p->sched_class->task_woken(rq, p);
        if (unlikely(rq->idle_stamp)) {
                u64 delta = rq->clock - rq->idle_stamp;
@@ -2499,7 +2532,6 @@ static void __sched_fork(struct task_struct *p)
        p->se.avg_overlap               = 0;
        p->se.start_runtime             = 0;
        p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
-        p->se.avg_running               = 0;
 #ifdef CONFIG_SCHEDSTATS
        p->se.wait_start                        = 0;
@@ -2521,7 +2553,6 @@ static void __sched_fork(struct task_struct *p)
        p->se.nr_failed_migrations_running      = 0;
        p->se.nr_failed_migrations_hot          = 0;
        p->se.nr_forced_migrations              = 0;
-        p->se.nr_forced2_migrations             = 0;
        p->se.nr_wakeups                        = 0;
        p->se.nr_wakeups_sync                   = 0;
@@ -2542,14 +2573,6 @@ static void __sched_fork(struct task_struct *p)
 #ifdef CONFIG_PREEMPT_NOTIFIERS
        INIT_HLIST_HEAD(&p->preempt_notifiers);
 #endif
-        /*
-         * We mark the process as running here, but have not actually
-         * inserted it onto the runqueue yet. This guarantees that
-         * nobody will actually run it, and a signal or other external
-         * event cannot wake it up and insert it on the runqueue either.
-         */
-        p->state = TASK_RUNNING;
 }
 /*
@@ -2558,9 +2581,14 @@ static void __sched_fork(struct task_struct *p)
 void sched_fork(struct task_struct *p, int clone_flags)
 {
        int cpu = get_cpu();
-        unsigned long flags;
        __sched_fork(p);
+        /*
+         * We mark the process as waking here. This guarantees that
+         * nobody will actually run it, and a signal or other external
+         * event cannot wake it up and insert it on the runqueue either.
+         */
+        p->state = TASK_WAKING;
        /*
         * Revert to default priority/policy on fork if requested.
@@ -2592,13 +2620,13 @@ void sched_fork(struct task_struct *p, int clone_flags)
        if (!rt_prio(p->prio))
                p->sched_class = &fair_sched_class;
+        if (p->sched_class->task_fork)
+                p->sched_class->task_fork(p);
 #ifdef CONFIG_SMP
-        cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
+        cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
 #endif
-        local_irq_save(flags);
-        update_rq_clock(cpu_rq(cpu));
        set_task_cpu(p, cpu);
-        local_irq_restore(flags);
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
        if (likely(sched_info_on()))
@@ -2629,24 +2657,15 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
        struct rq *rq;
        rq = task_rq_lock(p, &flags);
-        BUG_ON(p->state != TASK_RUNNING);
+        BUG_ON(p->state != TASK_WAKING);
+        p->state = TASK_RUNNING;
        update_rq_clock(rq);
+        activate_task(rq, p, 0);
-        if (!p->sched_class->task_new || !current->se.on_rq) {
-                activate_task(rq, p, 0);
-        } else {
-                /*
-                 * Let the scheduling class do new task startup
-                 * management (if any):
-                 */
-                p->sched_class->task_new(rq, p);
-                inc_nr_running(rq);
-        }
        trace_sched_wakeup_new(rq, p, 1);
        check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
-        if (p->sched_class->task_wake_up)
+        if (p->sched_class->task_woken)
-                p->sched_class->task_wake_up(rq, p);
+                p->sched_class->task_woken(rq, p);
 #endif
        task_rq_unlock(rq, &flags);
 }
@@ -2798,10 +2817,10 @@ static inline void post_schedule(struct rq *rq)
        if (rq->post_schedule) {
                unsigned long flags;
-                spin_lock_irqsave(&rq->lock, flags);
+                raw_spin_lock_irqsave(&rq->lock, flags);
                if (rq->curr->sched_class->post_schedule)
                        rq->curr->sched_class->post_schedule(rq);
-                spin_unlock_irqrestore(&rq->lock, flags);
+                raw_spin_unlock_irqrestore(&rq->lock, flags);
                rq->post_schedule = 0;
        }
@@ -3083,15 +3102,15 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
 {
        BUG_ON(!irqs_disabled());
        if (rq1 == rq2) {
-                spin_lock(&rq1->lock);
+                raw_spin_lock(&rq1->lock);
                __acquire(rq2->lock);   /* Fake it out ;) */
        } else {
                if (rq1 < rq2) {
-                        spin_lock(&rq1->lock);
+                        raw_spin_lock(&rq1->lock);
-                        spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
+                        raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
                } else {
-                        spin_lock(&rq2->lock);
+                        raw_spin_lock(&rq2->lock);
-                        spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
+                        raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
                }
        }
        update_rq_clock(rq1);
@@ -3108,29 +3127,44 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
        __releases(rq1->lock)
        __releases(rq2->lock)
 {
-        spin_unlock(&rq1->lock);
+        raw_spin_unlock(&rq1->lock);
        if (rq1 != rq2)
-                spin_unlock(&rq2->lock);
+                raw_spin_unlock(&rq2->lock);
        else
                __release(rq2->lock);
 }
 /*
- * If dest_cpu is allowed for this process, migrate the task to it.
+ * sched_exec - execve() is a valuable balancing opportunity, because at
- * This is accomplished by forcing the cpu_allowed mask to only
+ * this point the task has the smallest effective memory and cache footprint.
- * allow dest_cpu, which will force the cpu onto dest_cpu. Then
- * the cpu_allowed mask is restored.
 */
-static void sched_migrate_task(struct task_struct *p, int dest_cpu)
+void sched_exec(void)
 {
+        struct task_struct *p = current;
        struct migration_req req;
+        int dest_cpu, this_cpu;
        unsigned long flags;
        struct rq *rq;
+again:
+        this_cpu = get_cpu();
+        dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
+        if (dest_cpu == this_cpu) {
+                put_cpu();
+                return;
+        }
        rq = task_rq_lock(p, &flags);
+        put_cpu();
+        /*
+         * select_task_rq() can race against ->cpus_allowed
+         */
        if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
-            || unlikely(!cpu_active(dest_cpu)))
+            || unlikely(!cpu_active(dest_cpu))) {
-                goto out;
+                task_rq_unlock(rq, &flags);
+                goto again;
+        }
        /* force the process onto the specified CPU */
        if (migrate_task(p, dest_cpu, &req)) {
@@ -3145,24 +3179,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
                return;
        }
-out:
        task_rq_unlock(rq, &flags);
 }
 /*
- * sched_exec - execve() is a valuable balancing opportunity, because at
- * this point the task has the smallest effective memory and cache footprint.
- */
-void sched_exec(void)
-{
-        int new_cpu, this_cpu = get_cpu();
-        new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
-        put_cpu();
-        if (new_cpu != this_cpu)
-                sched_migrate_task(current, new_cpu);
-}
-/*
 * pull_task - move a task from a remote runqueue to the local runqueue.
 * Both runqueues must be locked.
 */
@@ -3172,10 +3192,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
        deactivate_task(src_rq, p, 0);
        set_task_cpu(p, this_cpu);
        activate_task(this_rq, p, 0);
-        /*
-         * Note that idle threads have a prio of MAX_PRIO, for this test
-         * to be always true for them.
-         */
        check_preempt_curr(this_rq, p, 0);
 }
@@ -4134,7 +4150,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
        unsigned long flags;
        struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
-        cpumask_copy(cpus, cpu_online_mask);
+        cpumask_copy(cpus, cpu_active_mask);
        /*
         * When power savings policy is enabled for the parent domain, idle
@@ -4207,14 +4223,15 @@ redo:
                if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
-                        spin_lock_irqsave(&busiest->lock, flags);
+                        raw_spin_lock_irqsave(&busiest->lock, flags);
                        /* don't kick the migration_thread, if the curr
                         * task on busiest cpu can't be moved to this_cpu
                         */
                        if (!cpumask_test_cpu(this_cpu,
                                              &busiest->curr->cpus_allowed)) {
-                                spin_unlock_irqrestore(&busiest->lock, flags);
+                                raw_spin_unlock_irqrestore(&busiest->lock,
+                                                            flags);
                                all_pinned = 1;
                                goto out_one_pinned;
                        }
@@ -4224,7 +4241,7 @@ redo:
                                busiest->push_cpu = this_cpu;
                                active_balance = 1;
                        }
-                        spin_unlock_irqrestore(&busiest->lock, flags);
+                        raw_spin_unlock_irqrestore(&busiest->lock, flags);
                        if (active_balance)
                                wake_up_process(busiest->migration_thread);
@@ -4297,7 +4314,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
        int all_pinned = 0;
        struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
-        cpumask_copy(cpus, cpu_online_mask);
+        cpumask_copy(cpus, cpu_active_mask);
        /*
         * When power savings policy is enabled for the parent domain, idle
@@ -4406,10 +4423,10 @@ redo:
                /*
                 * Should not call ttwu while holding a rq->lock
                 */
-                spin_unlock(&this_rq->lock);
+                raw_spin_unlock(&this_rq->lock);
                if (active_balance)
                        wake_up_process(busiest->migration_thread);
-                spin_lock(&this_rq->lock);
+                raw_spin_lock(&this_rq->lock);
        } else
                sd->nr_balance_failed = 0;
@@ -4694,7 +4711,7 @@ int select_nohz_load_balancer(int stop_tick)
                cpumask_set_cpu(cpu, nohz.cpu_mask);
                /* time for ilb owner also to sleep */
-                if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
+                if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
                        if (atomic_read(&nohz.load_balancer) == cpu)
                                atomic_set(&nohz.load_balancer, -1);
                        return 0;
@@ -5278,11 +5295,11 @@ void scheduler_tick(void)
        sched_clock_tick();
-        spin_lock(&rq->lock);
+        raw_spin_lock(&rq->lock);
        update_rq_clock(rq);
        update_cpu_load(rq);
        curr->sched_class->task_tick(rq, curr, 0);
-        spin_unlock(&rq->lock);
+        raw_spin_unlock(&rq->lock);
        perf_event_task_tick(curr, cpu);
@@ -5358,8 +5375,8 @@ static noinline void __schedule_bug(struct task_struct *prev)
 {
        struct pt_regs *regs = get_irq_regs();
-        printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
+        pr_err("BUG: scheduling while atomic: %s/%d/0x%08x\n",
-                prev->comm, prev->pid, preempt_count());
+               prev->comm, prev->pid, preempt_count());
        debug_show_held_locks(prev);
        print_modules();
@@ -5396,13 +5413,14 @@ static inline void schedule_debug(struct task_struct *prev)
 #endif
 }
-static void put_prev_task(struct rq *rq, struct task_struct *p)
+static void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
-        u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime;
+        if (prev->state == TASK_RUNNING) {
+                u64 runtime = prev->se.sum_exec_runtime;
-        update_avg(&p->se.avg_running, runtime);
+                runtime -= prev->se.prev_sum_exec_runtime;
+                runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
-        if (p->state == TASK_RUNNING) {
                /*
                 * In order to avoid avg_overlap growing stale when we are
                 * indeed overlapping and hence not getting put to sleep, grow
@@ -5412,12 +5430,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p)
                 * correlates to the amount of cache footprint a task can
                 * build up.
                 */
-                runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+                update_avg(&prev->se.avg_overlap, runtime);
-                update_avg(&p->se.avg_overlap, runtime);
-        } else {
-                update_avg(&p->se.avg_running, 0);
        }
-        p->sched_class->put_prev_task(rq, p);
+        prev->sched_class->put_prev_task(rq, prev);
 }
 /*
@@ -5478,7 +5493,7 @@ need_resched_nonpreemptible:
        if (sched_feat(HRTICK))
                hrtick_clear(rq);
-        spin_lock_irq(&rq->lock);
+        raw_spin_lock_irq(&rq->lock);
        update_rq_clock(rq);
        clear_tsk_need_resched(prev);
@@ -5514,7 +5529,7 @@ need_resched_nonpreemptible:
                cpu = smp_processor_id();
                rq = cpu_rq(cpu);
        } else
-                spin_unlock_irq(&rq->lock);
+                raw_spin_unlock_irq(&rq->lock);
        post_schedule(rq);
@@ -5931,14 +5946,15 @@ EXPORT_SYMBOL(wait_for_completion_killable);
 */
 bool try_wait_for_completion(struct completion *x)
 {
+        unsigned long flags;
        int ret = 1;
-        spin_lock_irq(&x->wait.lock);
+        spin_lock_irqsave(&x->wait.lock, flags);
        if (!x->done)
                ret = 0;
        else
                x->done--;
-        spin_unlock_irq(&x->wait.lock);
+        spin_unlock_irqrestore(&x->wait.lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(try_wait_for_completion);
@@ -5953,12 +5969,13 @@ EXPORT_SYMBOL(try_wait_for_completion);
 */
 bool completion_done(struct completion *x)
 {
+        unsigned long flags;
        int ret = 1;
-        spin_lock_irq(&x->wait.lock);
+        spin_lock_irqsave(&x->wait.lock, flags);
        if (!x->done)
                ret = 0;
-        spin_unlock_irq(&x->wait.lock);
+        spin_unlock_irqrestore(&x->wait.lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(completion_done);
@@ -6343,7 +6360,7 @@ recheck:
         * make sure no PI-waiters arrive (or leave) while we are
         * changing the priority of the task:
         */
-        spin_lock_irqsave(&p->pi_lock, flags);
+        raw_spin_lock_irqsave(&p->pi_lock, flags);
        /*
         * To be able to change p->policy safely, the apropriate
         * runqueue lock must be held.
@@ -6353,7 +6370,7 @@ recheck:
        if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
                policy = oldpolicy = -1;
                __task_rq_unlock(rq);
-                spin_unlock_irqrestore(&p->pi_lock, flags);
+                raw_spin_unlock_irqrestore(&p->pi_lock, flags);
                goto recheck;
        }
        update_rq_clock(rq);
@@ -6377,7 +6394,7 @@ recheck:
                check_class_changed(rq, p, prev_class, oldprio, running);
        }
        __task_rq_unlock(rq);
-        spin_unlock_irqrestore(&p->pi_lock, flags);
+        raw_spin_unlock_irqrestore(&p->pi_lock, flags);
        rt_mutex_adjust_pi(p);
@@ -6477,7 +6494,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
                return -EINVAL;
        retval = -ESRCH;
-        read_lock(&tasklist_lock);
+        rcu_read_lock();
        p = find_process_by_pid(pid);
        if (p) {
                retval = security_task_getscheduler(p);
@@ -6485,7 +6502,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
                        retval = p->policy
                                | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
        }
-        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
        return retval;
 }
@@ -6503,7 +6520,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
        if (!param || pid < 0)
                return -EINVAL;
-        read_lock(&tasklist_lock);
+        rcu_read_lock();
        p = find_process_by_pid(pid);
        retval = -ESRCH;
        if (!p)
@@ -6514,7 +6531,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
                goto out_unlock;
        lp.sched_priority = p->rt_priority;
-        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
        /*
         * This one might sleep, we cannot do it with a spinlock held ...
@@ -6524,7 +6541,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
        return retval;
 out_unlock:
-        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
        return retval;
 }
@@ -6535,22 +6552,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
        int retval;
        get_online_cpus();
-        read_lock(&tasklist_lock);
+        rcu_read_lock();
        p = find_process_by_pid(pid);
        if (!p) {
-                read_unlock(&tasklist_lock);
+                rcu_read_unlock();
                put_online_cpus();
                return -ESRCH;
        }
-        /*
+        /* Prevent p going away */
-         * It is not safe to call set_cpus_allowed with the
-         * tasklist_lock held. We will bump the task_struct's
-         * usage count and then drop tasklist_lock.
-         */
        get_task_struct(p);
-        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
        if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
                retval = -ENOMEM;
@@ -6631,10 +6644,12 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
 long sched_getaffinity(pid_t pid, struct cpumask *mask)
 {
        struct task_struct *p;
+        unsigned long flags;
+        struct rq *rq;
        int retval;
        get_online_cpus();
-        read_lock(&tasklist_lock);
+        rcu_read_lock();
        retval = -ESRCH;
        p = find_process_by_pid(pid);
@@ -6645,10 +6660,12 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
        if (retval)
                goto out_unlock;
+        rq = task_rq_lock(p, &flags);
        cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
+        task_rq_unlock(rq, &flags);
 out_unlock:
-        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
        put_online_cpus();
        return retval;
@@ -6703,7 +6720,7 @@ SYSCALL_DEFINE0(sched_yield)
         */
        __release(rq->lock);
        spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
-        _raw_spin_unlock(&rq->lock);
+        do_raw_spin_unlock(&rq->lock);
        preempt_enable_no_resched();
        schedule();
@@ -6883,6 +6900,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 {
        struct task_struct *p;
        unsigned int time_slice;
+        unsigned long flags;
+        struct rq *rq;
        int retval;
        struct timespec t;
@@ -6890,7 +6909,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
                return -EINVAL;
        retval = -ESRCH;
-        read_lock(&tasklist_lock);
+        rcu_read_lock();
        p = find_process_by_pid(pid);
        if (!p)
                goto out_unlock;
@@ -6899,15 +6918,17 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
        if (retval)
                goto out_unlock;
-        time_slice = p->sched_class->get_rr_interval(p);
+        rq = task_rq_lock(p, &flags);
+        time_slice = p->sched_class->get_rr_interval(rq, p);
+        task_rq_unlock(rq, &flags);
-        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
        jiffies_to_timespec(time_slice, &t);
        retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
        return retval;
 out_unlock:
-        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
        return retval;
 }
@@ -6919,23 +6940,23 @@ void sched_show_task(struct task_struct *p)
        unsigned state;
        state = p->state ? __ffs(p->state) + 1 : 0;
-        printk(KERN_INFO "%-13.13s %c", p->comm,
+        pr_info("%-13.13s %c", p->comm,
                state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
 #if BITS_PER_LONG == 32
        if (state == TASK_RUNNING)
-                printk(KERN_CONT " running  ");
+                pr_cont(" running  ");
        else
-                printk(KERN_CONT " %08lx ", thread_saved_pc(p));
+                pr_cont(" %08lx ", thread_saved_pc(p));
 #else
        if (state == TASK_RUNNING)
-                printk(KERN_CONT "  running task    ");
+                pr_cont("  running task    ");
        else
-                printk(KERN_CONT " %016lx ", thread_saved_pc(p));
+                pr_cont(" %016lx ", thread_saved_pc(p));
 #endif
 #ifdef CONFIG_DEBUG_STACK_USAGE
        free = stack_not_used(p);
 #endif
-        printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
+        pr_cont("%5lu %5d %6d 0x%08lx\n", free,
                task_pid_nr(p), task_pid_nr(p->real_parent),
                (unsigned long)task_thread_info(p)->flags);
@@ -6947,11 +6968,9 @@ void show_state_filter(unsigned long state_filter)
        struct task_struct *g, *p;
 #if BITS_PER_LONG == 32
-        printk(KERN_INFO
+        pr_info("  task                PC stack   pid father\n");
-                "  task                PC stack   pid father\n");
 #else
-        printk(KERN_INFO
+        pr_info("  task                        PC stack   pid father\n");
-                "  task                        PC stack   pid father\n");
 #endif
        read_lock(&tasklist_lock);
        do_each_thread(g, p) {
@@ -6995,12 +7014,12 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
        struct rq *rq = cpu_rq(cpu);
        unsigned long flags;
-        spin_lock_irqsave(&rq->lock, flags);
+        raw_spin_lock_irqsave(&rq->lock, flags);
        __sched_fork(idle);
+        idle->state = TASK_RUNNING;
        idle->se.exec_start = sched_clock();
-        idle->prio = idle->normal_prio = MAX_PRIO;
        cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
        __set_task_cpu(idle, cpu);
@@ -7008,7 +7027,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
        idle->oncpu = 1;
 #endif
-        spin_unlock_irqrestore(&rq->lock, flags);
+        raw_spin_unlock_irqrestore(&rq->lock, flags);
        /* Set the preempt count _outside_ the spinlocks! */
 #if defined(CONFIG_PREEMPT)
@@ -7041,22 +7060,43 @@ cpumask_var_t nohz_cpu_mask;
 *
 * This idea comes from the SD scheduler of Con Kolivas:
 */
-static inline void sched_init_granularity(void)
+static int get_update_sysctl_factor(void)
 {
-        unsigned int factor = 1 + ilog2(num_online_cpus());
+        unsigned int cpus = min_t(int, num_online_cpus(), 8);
-        const unsigned long limit = 200000000;
+        unsigned int factor;
+        switch (sysctl_sched_tunable_scaling) {
+        case SCHED_TUNABLESCALING_NONE:
+                factor = 1;
+                break;
+        case SCHED_TUNABLESCALING_LINEAR:
+                factor = cpus;
+                break;
+        case SCHED_TUNABLESCALING_LOG:
+        default:
+                factor = 1 + ilog2(cpus);
+                break;
+        }
-        sysctl_sched_min_granularity *= factor;
+        return factor;
-        if (sysctl_sched_min_granularity > limit)
+}
-                sysctl_sched_min_granularity = limit;
-        sysctl_sched_latency *= factor;
+static void update_sysctl(void)
-        if (sysctl_sched_latency > limit)
+{
-                sysctl_sched_latency = limit;
+        unsigned int factor = get_update_sysctl_factor();
-        sysctl_sched_wakeup_granularity *= factor;
+#define SET_SYSCTL(name) \
+        (sysctl_##name = (factor) * normalized_sysctl_##name)
+        SET_SYSCTL(sched_min_granularity);
+        SET_SYSCTL(sched_latency);
+        SET_SYSCTL(sched_wakeup_granularity);
+        SET_SYSCTL(sched_shares_ratelimit);
+#undef SET_SYSCTL
+}
-        sysctl_sched_shares_ratelimit *= factor;
+static inline void sched_init_granularity(void)
+{
+        update_sysctl();
 }
 #ifdef CONFIG_SMP
@@ -7092,8 +7132,24 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
        struct rq *rq;
        int ret = 0;
+        /*
+         * Since we rely on wake-ups to migrate sleeping tasks, don't change
+         * the ->cpus_allowed mask from under waking tasks, which would be
+         * possible when we change rq->lock in ttwu(), so synchronize against
+         * TASK_WAKING to avoid that.
+         */
+again:
+        while (p->state == TASK_WAKING)
+                cpu_relax();
        rq = task_rq_lock(p, &flags);
-        if (!cpumask_intersects(new_mask, cpu_online_mask)) {
+        if (p->state == TASK_WAKING) {
+                task_rq_unlock(rq, &flags);
+                goto again;
+        }
+        if (!cpumask_intersects(new_mask, cpu_active_mask)) {
                ret = -EINVAL;
                goto out;
        }
@@ -7115,7 +7171,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
        if (cpumask_test_cpu(task_cpu(p), new_mask))
                goto out;
-        if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
+        if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
                /* Need help from migration thread: drop lock and wait. */
                struct task_struct *mt = rq->migration_thread;
@@ -7148,7 +7204,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
 static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 {
        struct rq *rq_dest, *rq_src;
-        int ret = 0, on_rq;
+        int ret = 0;
        if (unlikely(!cpu_active(dest_cpu)))
                return ret;
@@ -7164,12 +7220,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
        if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
                goto fail;
-        on_rq = p->se.on_rq;
+        /*
-        if (on_rq)
+         * If we're not on a rq, the next wake-up will ensure we're
+         * placed properly.
+         */
+        if (p->se.on_rq) {
                deactivate_task(rq_src, p, 0);
+                set_task_cpu(p, dest_cpu);
-        set_task_cpu(p, dest_cpu);
-        if (on_rq) {
                activate_task(rq_dest, p, 0);
                check_preempt_curr(rq_dest, p, 0);
        }
@@ -7204,10 +7261,10 @@ static int migration_thread(void *data)
                struct migration_req *req;
                struct list_head *head;
-                spin_lock_irq(&rq->lock);
+                raw_spin_lock_irq(&rq->lock);
                if (cpu_is_offline(cpu)) {
-                        spin_unlock_irq(&rq->lock);
+                        raw_spin_unlock_irq(&rq->lock);
                        break;
                }
@@ -7219,7 +7276,7 @@ static int migration_thread(void *data)
                head = &rq->migration_queue;
                if (list_empty(head)) {
-                        spin_unlock_irq(&rq->lock);
+                        raw_spin_unlock_irq(&rq->lock);
                        schedule();
                        set_current_state(TASK_INTERRUPTIBLE);
                        continue;
@@ -7228,14 +7285,14 @@ static int migration_thread(void *data)
                list_del_init(head->next);
                if (req->task != NULL) {
-                        spin_unlock(&rq->lock);
+                        raw_spin_unlock(&rq->lock);
                        __migrate_task(req->task, cpu, req->dest_cpu);
                } else if (likely(cpu == (badcpu = smp_processor_id()))) {
                        req->dest_cpu = RCU_MIGRATION_GOT_QS;
-                        spin_unlock(&rq->lock);
+                        raw_spin_unlock(&rq->lock);
                } else {
                        req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
-                        spin_unlock(&rq->lock);
+                        raw_spin_unlock(&rq->lock);
                        WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
                }
                local_irq_enable();
@@ -7265,37 +7322,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
        int dest_cpu;
-        const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
 again:
-        /* Look for allowed, online CPU in same node. */
+        dest_cpu = select_fallback_rq(dead_cpu, p);
-        for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
-                if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
-                        goto move;
-        /* Any allowed, online CPU? */
-        dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
-        if (dest_cpu < nr_cpu_ids)
-                goto move;
-        /* No more Mr. Nice Guy. */
-        if (dest_cpu >= nr_cpu_ids) {
-                cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
-                dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
-                /*
-                 * Don't tell them about moving exiting tasks or
-                 * kernel threads (both mm NULL), since they never
-                 * leave kernel.
-                 */
-                if (p->mm && printk_ratelimit()) {
-                        printk(KERN_INFO "process %d (%s) no "
-                               "longer affine to cpu%d\n",
-                               task_pid_nr(p), p->comm, dead_cpu);
-                }
-        }
-move:
        /* It can have affinity changed while we were choosing. */
        if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
                goto again;
@@ -7310,7 +7340,7 @@ move:
 */
 static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
-        struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
+        struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
        unsigned long flags;
        local_irq_save(flags);
@@ -7358,14 +7388,14 @@ void sched_idle_next(void)
         * Strictly not necessary since rest of the CPUs are stopped by now
         * and interrupts disabled on the current cpu.
         */
-        spin_lock_irqsave(&rq->lock, flags);
+        raw_spin_lock_irqsave(&rq->lock, flags);
        __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
        update_rq_clock(rq);
        activate_task(rq, p, 0);
-        spin_unlock_irqrestore(&rq->lock, flags);
+        raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 /*
@@ -7401,9 +7431,9 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
         * that's OK. No task can be added to this CPU, so iteration is
         * fine.
         */
-        spin_unlock_irq(&rq->lock);
+        raw_spin_unlock_irq(&rq->lock);
        move_task_off_dead_cpu(dead_cpu, p);
-        spin_lock_irq(&rq->lock);
+        raw_spin_lock_irq(&rq->lock);
        put_task_struct(p);
 }
@@ -7563,7 +7593,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
 static struct ctl_table_header *sd_sysctl_header;
 static void register_sched_domain_sysctl(void)
 {
-        int i, cpu_num = num_online_cpus();
+        int i, cpu_num = num_possible_cpus();
        struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
        char buf[32];
@@ -7573,7 +7603,7 @@ static void register_sched_domain_sysctl(void)
        if (entry == NULL)
                return;
-        for_each_online_cpu(i) {
+        for_each_possible_cpu(i) {
                snprintf(buf, 32, "cpu%d", i);
                entry->procname = kstrdup(buf, GFP_KERNEL);
                entry->mode = 0555;
@@ -7669,13 +7699,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                /* Update our root-domain */
                rq = cpu_rq(cpu);
-                spin_lock_irqsave(&rq->lock, flags);
+                raw_spin_lock_irqsave(&rq->lock, flags);
                if (rq->rd) {
                        BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                        set_rq_online(rq);
                }
-                spin_unlock_irqrestore(&rq->lock, flags);
+                raw_spin_unlock_irqrestore(&rq->lock, flags);
                break;
 #ifdef CONFIG_HOTPLUG_CPU
@@ -7700,14 +7730,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                put_task_struct(rq->migration_thread);
                rq->migration_thread = NULL;
                /* Idle task back to normal (off runqueue, low prio) */
-                spin_lock_irq(&rq->lock);
+                raw_spin_lock_irq(&rq->lock);
                update_rq_clock(rq);
                deactivate_task(rq, rq->idle, 0);
-                rq->idle->static_prio = MAX_PRIO;
                __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
                rq->idle->sched_class = &idle_sched_class;
                migrate_dead_tasks(cpu);
-                spin_unlock_irq(&rq->lock);
+                raw_spin_unlock_irq(&rq->lock);
                cpuset_unlock();
                migrate_nr_uninterruptible(rq);
                BUG_ON(rq->nr_running != 0);
@@ -7717,30 +7746,30 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                 * they didn't take sched_hotcpu_mutex. Just wake up
                 * the requestors.
                 */
-                spin_lock_irq(&rq->lock);
+                raw_spin_lock_irq(&rq->lock);
                while (!list_empty(&rq->migration_queue)) {
                        struct migration_req *req;
                        req = list_entry(rq->migration_queue.next,
                                         struct migration_req, list);
                        list_del_init(&req->list);
-                        spin_unlock_irq(&rq->lock);
+                        raw_spin_unlock_irq(&rq->lock);
                        complete(&req->done);
-                        spin_lock_irq(&rq->lock);
+                        raw_spin_lock_irq(&rq->lock);
                }
-                spin_unlock_irq(&rq->lock);
+                raw_spin_unlock_irq(&rq->lock);
                break;
        case CPU_DYING:
        case CPU_DYING_FROZEN:
                /* Update our root-domain */
                rq = cpu_rq(cpu);
-                spin_lock_irqsave(&rq->lock, flags);
+                raw_spin_lock_irqsave(&rq->lock, flags);
                if (rq->rd) {
                        BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                        set_rq_offline(rq);
                }
-                spin_unlock_irqrestore(&rq->lock, flags);
+                raw_spin_unlock_irqrestore(&rq->lock, flags);
                break;
 #endif
        }
@@ -7799,48 +7828,44 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
        printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
        if (!(sd->flags & SD_LOAD_BALANCE)) {
-                printk("does not load-balance\n");
+                pr_cont("does not load-balance\n");
                if (sd->parent)
-                        printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
+                        pr_err("ERROR: !SD_LOAD_BALANCE domain has parent\n");
-                                        " has parent");
                return -1;
        }
-        printk(KERN_CONT "span %s level %s\n", str, sd->name);
+        pr_cont("span %s level %s\n", str, sd->name);
        if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-                printk(KERN_ERR "ERROR: domain->span does not contain "
+                pr_err("ERROR: domain->span does not contain CPU%d\n", cpu);
-                                "CPU%d\n", cpu);
        }
        if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
-                printk(KERN_ERR "ERROR: domain->groups does not contain"
+                pr_err("ERROR: domain->groups does not contain CPU%d\n", cpu);
-                                " CPU%d\n", cpu);
        }
        printk(KERN_DEBUG "%*s groups:", level + 1, "");
        do {
                if (!group) {
-                        printk("\n");
+                        pr_cont("\n");
-                        printk(KERN_ERR "ERROR: group is NULL\n");
+                        pr_err("ERROR: group is NULL\n");
                        break;
                }
                if (!group->cpu_power) {
-                        printk(KERN_CONT "\n");
+                        pr_cont("\n");
-                        printk(KERN_ERR "ERROR: domain->cpu_power not "
+                        pr_err("ERROR: domain->cpu_power not set\n");
-                                        "set\n");
                        break;
                }
                if (!cpumask_weight(sched_group_cpus(group))) {
-                        printk(KERN_CONT "\n");
+                        pr_cont("\n");
-                        printk(KERN_ERR "ERROR: empty group\n");
+                        pr_err("ERROR: empty group\n");
                        break;
                }
                if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
-                        printk(KERN_CONT "\n");
+                        pr_cont("\n");
-                        printk(KERN_ERR "ERROR: repeated CPUs\n");
+                        pr_err("ERROR: repeated CPUs\n");
                        break;
                }
@@ -7848,23 +7873,21 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
-                printk(KERN_CONT " %s", str);
+                pr_cont(" %s", str);
                if (group->cpu_power != SCHED_LOAD_SCALE) {
-                        printk(KERN_CONT " (cpu_power = %d)",
+                        pr_cont(" (cpu_power = %d)", group->cpu_power);
-                                group->cpu_power);
                }
                group = group->next;
        } while (group != sd->groups);
-        printk(KERN_CONT "\n");
+        pr_cont("\n");
        if (!cpumask_equal(sched_domain_span(sd), groupmask))
-                printk(KERN_ERR "ERROR: groups don't span domain->span\n");
+                pr_err("ERROR: groups don't span domain->span\n");
        if (sd->parent &&
            !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
-                printk(KERN_ERR "ERROR: parent span is not a superset "
+                pr_err("ERROR: parent span is not a superset of domain->span\n");
-                        "of domain->span\n");
        return 0;
 }
@@ -7970,7 +7993,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
        struct root_domain *old_rd = NULL;
        unsigned long flags;
-        spin_lock_irqsave(&rq->lock, flags);
+        raw_spin_lock_irqsave(&rq->lock, flags);
        if (rq->rd) {
                old_rd = rq->rd;
@@ -7996,7 +8019,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
        if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
                set_rq_online(rq);
-        spin_unlock_irqrestore(&rq->lock, flags);
+        raw_spin_unlock_irqrestore(&rq->lock, flags);
        if (old_rd)
                free_rootdomain(old_rd);
@@ -8282,14 +8305,14 @@ enum s_alloc {
 */
 #ifdef CONFIG_SCHED_SMT
 static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
+static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
 static int
 cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
                 struct sched_group **sg, struct cpumask *unused)
 {
        if (sg)
-                *sg = &per_cpu(sched_group_cpus, cpu).sg;
+                *sg = &per_cpu(sched_groups, cpu).sg;
        return cpu;
 }
 #endif /* CONFIG_SCHED_SMT */
@@ -8420,8 +8443,7 @@ static int build_numa_sched_groups(struct s_data *d,
        sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
                          GFP_KERNEL, num);
        if (!sg) {
-                printk(KERN_WARNING "Can not alloc domain group for node %d\n",
+                pr_warning("Can not alloc domain group for node %d\n", num);
-                       num);
                return -ENOMEM;
        }
        d->sched_group_nodes[num] = sg;
@@ -8450,8 +8472,8 @@ static int build_numa_sched_groups(struct s_data *d,
                sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
                                  GFP_KERNEL, num);
                if (!sg) {
-                        printk(KERN_WARNING
+                        pr_warning("Can not alloc domain group for node %d\n",
-                               "Can not alloc domain group for node %d\n", j);
+                                   j);
                        return -ENOMEM;
                }
                sg->cpu_power = 0;
@@ -8679,7 +8701,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
        d->sched_group_nodes = kcalloc(nr_node_ids,
                                      sizeof(struct sched_group *), GFP_KERNEL);
        if (!d->sched_group_nodes) {
-                printk(KERN_WARNING "Can not alloc sched group node list\n");
+                pr_warning("Can not alloc sched group node list\n");
                return sa_notcovered;
        }
        sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
@@ -8696,7 +8718,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
                return sa_send_covered;
        d->rd = alloc_rootdomain();
        if (!d->rd) {
-                printk(KERN_WARNING "Cannot alloc root domain\n");
+                pr_warning("Cannot alloc root domain\n");
                return sa_tmpmask;
        }
        return sa_rootdomain;
@@ -9099,7 +9121,7 @@ match1:
        if (doms_new == NULL) {
                ndoms_cur = 0;
                doms_new = &fallback_doms;
-                cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map);
+                cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
                WARN_ON_ONCE(dattr_new);
        }
@@ -9230,8 +9252,10 @@ static int update_sched_domains(struct notifier_block *nfb,
        switch (action) {
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
-        case CPU_DEAD:
+        case CPU_DOWN_PREPARE:
-        case CPU_DEAD_FROZEN:
+        case CPU_DOWN_PREPARE_FROZEN:
+        case CPU_DOWN_FAILED:
+        case CPU_DOWN_FAILED_FROZEN:
                partition_sched_domains(1, NULL, NULL);
                return NOTIFY_OK;
@@ -9278,7 +9302,7 @@ void __init sched_init_smp(void)
 #endif
        get_online_cpus();
        mutex_lock(&sched_domains_mutex);
-        arch_init_sched_domains(cpu_online_mask);
+        arch_init_sched_domains(cpu_active_mask);
        cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
        if (cpumask_empty(non_isolated_cpus))
                cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@ -9351,13 +9375,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 #ifdef CONFIG_SMP
        rt_rq->rt_nr_migratory = 0;
        rt_rq->overloaded = 0;
-        plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
+        plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock);
 #endif
        rt_rq->rt_time = 0;
        rt_rq->rt_throttled = 0;
        rt_rq->rt_runtime = 0;
-        spin_lock_init(&rt_rq->rt_runtime_lock);
+        raw_spin_lock_init(&rt_rq->rt_runtime_lock);
 #ifdef CONFIG_RT_GROUP_SCHED
        rt_rq->rt_nr_boosted = 0;
@@ -9517,7 +9541,7 @@ void __init sched_init(void)
                struct rq *rq;
                rq = cpu_rq(i);
-                spin_lock_init(&rq->lock);
+                raw_spin_lock_init(&rq->lock);
                rq->nr_running = 0;
                rq->calc_load_active = 0;
                rq->calc_load_update = jiffies + LOAD_FREQ;
@@ -9577,7 +9601,7 @@ void __init sched_init(void)
 #elif defined CONFIG_USER_SCHED
                init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
                init_tg_rt_entry(&init_task_group,
-                                &per_cpu(init_rt_rq, i),
+                                &per_cpu(init_rt_rq_var, i),
                                &per_cpu(init_sched_rt_entity, i), i, 1,
                                root_task_group.rt_se[i]);
 #endif
@@ -9615,7 +9639,7 @@ void __init sched_init(void)
 #endif
 #ifdef CONFIG_RT_MUTEXES
-        plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
+        plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock);
 #endif
        /*
@@ -9659,7 +9683,7 @@ void __init sched_init(void)
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 static inline int preempt_count_equals(int preempt_offset)
 {
-        int nested = preempt_count() & ~PREEMPT_ACTIVE;
+        int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
        return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
 }
@@ -9676,13 +9700,11 @@ void __might_sleep(char *file, int line, int preempt_offset)
                return;
        prev_jiffy = jiffies;
-        printk(KERN_ERR
+        pr_err("BUG: sleeping function called from invalid context at %s:%d\n",
-                "BUG: sleeping function called from invalid context at %s:%d\n",
+               file, line);
-                        file, line);
+        pr_err("in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-        printk(KERN_ERR
+               in_atomic(), irqs_disabled(),
-                "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+               current->pid, current->comm);
-                        in_atomic(), irqs_disabled(),
-                        current->pid, current->comm);
        debug_show_held_locks(current);
        if (irqs_disabled())
@@ -9740,13 +9762,13 @@ void normalize_rt_tasks(void)
                        continue;
                }
-                spin_lock(&p->pi_lock);
+                raw_spin_lock(&p->pi_lock);
                rq = __task_rq_lock(p);
                normalize_task(rq, p);
                __task_rq_unlock(rq);
-                spin_unlock(&p->pi_lock);
+                raw_spin_unlock(&p->pi_lock);
        } while_each_thread(g, p);
        read_unlock_irqrestore(&tasklist_lock, flags);
@@ -9842,13 +9864,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
                se = kzalloc_node(sizeof(struct sched_entity),
                                  GFP_KERNEL, cpu_to_node(i));
                if (!se)
-                        goto err;
+                        goto err_free_rq;
                init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
        }
        return 1;
+ err_free_rq:
+        kfree(cfs_rq);
 err:
        return 0;
 }
@@ -9930,13 +9954,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
                rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
                                     GFP_KERNEL, cpu_to_node(i));
                if (!rt_se)
-                        goto err;
+                        goto err_free_rq;
                init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
        }
        return 1;
+ err_free_rq:
+        kfree(rt_rq);
 err:
        return 0;
 }
@@ -10070,7 +10096,7 @@ void sched_move_task(struct task_struct *tsk)
 #ifdef CONFIG_FAIR_GROUP_SCHED
        if (tsk->sched_class->moved_group)
-                tsk->sched_class->moved_group(tsk);
+                tsk->sched_class->moved_group(tsk, on_rq);
 #endif
        if (unlikely(running))
@@ -10105,9 +10131,9 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
        struct rq *rq = cfs_rq->rq;
        unsigned long flags;
-        spin_lock_irqsave(&rq->lock, flags);
+        raw_spin_lock_irqsave(&rq->lock, flags);
        __set_se_shares(se, shares);
-        spin_unlock_irqrestore(&rq->lock, flags);
+        raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 static DEFINE_MUTEX(shares_mutex);
@@ -10292,18 +10318,18 @@ static int tg_set_bandwidth(struct task_group *tg,
        if (err)
                goto unlock;
-        spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
+        raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
        tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
        tg->rt_bandwidth.rt_runtime = rt_runtime;
        for_each_possible_cpu(i) {
                struct rt_rq *rt_rq = tg->rt_rq[i];
-                spin_lock(&rt_rq->rt_runtime_lock);
+                raw_spin_lock(&rt_rq->rt_runtime_lock);
                rt_rq->rt_runtime = rt_runtime;
-                spin_unlock(&rt_rq->rt_runtime_lock);
+                raw_spin_unlock(&rt_rq->rt_runtime_lock);
        }
-        spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
+        raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
 unlock:
        read_unlock(&tasklist_lock);
        mutex_unlock(&rt_constraints_mutex);
@@ -10408,15 +10434,15 @@ static int sched_rt_global_constraints(void)
        if (sysctl_sched_rt_runtime == 0)
                return -EBUSY;
-        spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
+        raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
        for_each_possible_cpu(i) {
                struct rt_rq *rt_rq = &cpu_rq(i)->rt;
-                spin_lock(&rt_rq->rt_runtime_lock);
+                raw_spin_lock(&rt_rq->rt_runtime_lock);
                rt_rq->rt_runtime = global_rt_runtime();
-                spin_unlock(&rt_rq->rt_runtime_lock);
+                raw_spin_unlock(&rt_rq->rt_runtime_lock);
        }
-        spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
+        raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
        return 0;
 }
@@ -10707,9 +10733,9 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
        /*
         * Take rq->lock to make 64-bit read safe on 32-bit platforms.
         */
-        spin_lock_irq(&cpu_rq(cpu)->lock);
+        raw_spin_lock_irq(&cpu_rq(cpu)->lock);
        data = *cpuusage;
-        spin_unlock_irq(&cpu_rq(cpu)->lock);
+        raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
 #else
        data = *cpuusage;
 #endif
@@ -10725,9 +10751,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
        /*
         * Take rq->lock to make 64-bit write safe on 32-bit platforms.
         */
-        spin_lock_irq(&cpu_rq(cpu)->lock);
+        raw_spin_lock_irq(&cpu_rq(cpu)->lock);
        *cpuusage = val;
-        spin_unlock_irq(&cpu_rq(cpu)->lock);
+        raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
 #else
        *cpuusage = val;
 #endif
@@ -10961,9 +10987,9 @@ void synchronize_sched_expedited(void)
                init_completion(&req->done);
                req->task = NULL;
                req->dest_cpu = RCU_MIGRATION_NEED_QS;
-                spin_lock_irqsave(&rq->lock, flags);
+                raw_spin_lock_irqsave(&rq->lock, flags);
                list_add(&req->list, &rq->migration_queue);
-                spin_unlock_irqrestore(&rq->lock, flags);
+                raw_spin_unlock_irqrestore(&rq->lock, flags);
                wake_up_process(rq->migration_thread);
        }
        for_each_online_cpu(cpu) {
@@ -10971,11 +10997,11 @@ void synchronize_sched_expedited(void)
                req = &per_cpu(rcu_migration_req, cpu);
                rq = cpu_rq(cpu);
                wait_for_completion(&req->done);
-                spin_lock_irqsave(&rq->lock, flags);
+                raw_spin_lock_irqsave(&rq->lock, flags);
                if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
                        need_full_sync = 1;
                req->dest_cpu = RCU_MIGRATION_IDLE;
-                spin_unlock_irqrestore(&rq->lock, flags);
+                raw_spin_unlock_irqrestore(&rq->lock, flags);
        }
        rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
        synchronize_sched_expedited_count++;
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 479ce5682d7c..5b496132c28a 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -236,6 +236,18 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
+unsigned long long cpu_clock(int cpu)
+{
+        unsigned long long clock;
+        unsigned long flags;
+        local_irq_save(flags);
+        clock = sched_clock_cpu(cpu);
+        local_irq_restore(flags);
+        return clock;
+}
 #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 void sched_clock_init(void)
@@ -251,17 +263,12 @@ u64 sched_clock_cpu(int cpu)
        return sched_clock();
 }
-#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 unsigned long long cpu_clock(int cpu)
 {
-        unsigned long long clock;
+        return sched_clock_cpu(cpu);
-        unsigned long flags;
+}
-        local_irq_save(flags);
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
-        clock = sched_clock_cpu(cpu);
-        local_irq_restore(flags);
-        return clock;
-}
 EXPORT_SYMBOL_GPL(cpu_clock);
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 0f052fc674d5..597b33099dfa 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -135,26 +135,26 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
        if (likely(newpri != CPUPRI_INVALID)) {
                struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
-                spin_lock_irqsave(&vec->lock, flags);
+                raw_spin_lock_irqsave(&vec->lock, flags);
                cpumask_set_cpu(cpu, vec->mask);
                vec->count++;
                if (vec->count == 1)
                        set_bit(newpri, cp->pri_active);
-                spin_unlock_irqrestore(&vec->lock, flags);
+                raw_spin_unlock_irqrestore(&vec->lock, flags);
        }
        if (likely(oldpri != CPUPRI_INVALID)) {
                struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
-                spin_lock_irqsave(&vec->lock, flags);
+                raw_spin_lock_irqsave(&vec->lock, flags);
                vec->count--;
                if (!vec->count)
                        clear_bit(oldpri, cp->pri_active);
                cpumask_clear_cpu(cpu, vec->mask);
-                spin_unlock_irqrestore(&vec->lock, flags);
+                raw_spin_unlock_irqrestore(&vec->lock, flags);
        }
        *currpri = newpri;
@@ -180,7 +180,7 @@ int cpupri_init(struct cpupri *cp, bool bootmem)
        for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
                struct cpupri_vec *vec = &cp->pri_to_cpu[i];
-                spin_lock_init(&vec->lock);
+                raw_spin_lock_init(&vec->lock);
                vec->count = 0;
                if (!zalloc_cpumask_var(&vec->mask, gfp))
                        goto cleanup;
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index 9a7e859b8fbf..7cb5bb6b95be 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -12,7 +12,7 @@
 /* values 2-101 are RT priorities 0-99 */
 struct cpupri_vec {
-        spinlock_t lock;
+        raw_spinlock_t lock;
        int        count;
        cpumask_var_t mask;
 };
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6988cf08f705..67f95aada4b9 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -184,7 +184,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
                        SPLIT_NS(cfs_rq->exec_clock));
-        spin_lock_irqsave(&rq->lock, flags);
+        raw_spin_lock_irqsave(&rq->lock, flags);
        if (cfs_rq->rb_leftmost)
                MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime;
        last = __pick_last_entity(cfs_rq);
@@ -192,7 +192,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
                max_vruntime = last->vruntime;
        min_vruntime = cfs_rq->min_vruntime;
        rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
-        spin_unlock_irqrestore(&rq->lock, flags);
+        raw_spin_unlock_irqrestore(&rq->lock, flags);
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
                        SPLIT_NS(MIN_vruntime));
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
@@ -309,6 +309,12 @@ static void print_cpu(struct seq_file *m, int cpu)
        print_rq(m, rq, cpu);
 }
+static const char *sched_tunable_scaling_names[] = {
+        "none",
+        "logaritmic",
+        "linear"
+};
 static int sched_debug_show(struct seq_file *m, void *v)
 {
        u64 now = ktime_to_ns(ktime_get());
@@ -334,6 +340,10 @@ static int sched_debug_show(struct seq_file *m, void *v)
 #undef PN
 #undef P
+        SEQ_printf(m, "  .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling",
+                sysctl_sched_tunable_scaling,
+                sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
        for_each_online_cpu(cpu)
                print_cpu(m, cpu);
@@ -399,7 +409,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        PN(se.sum_exec_runtime);
        PN(se.avg_overlap);
        PN(se.avg_wakeup);
-        PN(se.avg_running);
        nr_switches = p->nvcsw + p->nivcsw;
@@ -423,7 +432,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        P(se.nr_failed_migrations_running);
        P(se.nr_failed_migrations_hot);
        P(se.nr_forced_migrations);
-        P(se.nr_forced2_migrations);
        P(se.nr_wakeups);
        P(se.nr_wakeups_sync);
        P(se.nr_wakeups_migrate);
@@ -499,7 +507,6 @@ void proc_sched_set_task(struct task_struct *p)
        p->se.nr_failed_migrations_running      = 0;
        p->se.nr_failed_migrations_hot          = 0;
        p->se.nr_forced_migrations              = 0;
-        p->se.nr_forced2_migrations             = 0;
        p->se.nr_wakeups                        = 0;
        p->se.nr_wakeups_sync                   = 0;
        p->se.nr_wakeups_migrate                = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f61837ad336d..42ac3c9f66f6 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -21,6 +21,7 @@
 */
 #include <linux/latencytop.h>
+#include <linux/sched.h>
 /*
 * Targeted preemption latency for CPU-bound tasks:
@@ -35,12 +36,26 @@
 *  run vmstat and monitor the context-switches (cs) field)
 */
 unsigned int sysctl_sched_latency = 5000000ULL;
+unsigned int normalized_sysctl_sched_latency = 5000000ULL;
+/*
+ * The initial- and re-scaling of tunables is configurable
+ * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
+ *
+ * Options are:
+ * SCHED_TUNABLESCALING_NONE - unscaled, always *1
+ * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
+ * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
+ */
+enum sched_tunable_scaling sysctl_sched_tunable_scaling
+        = SCHED_TUNABLESCALING_LOG;
 /*
 * Minimal preemption granularity for CPU-bound tasks:
 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
 */
 unsigned int sysctl_sched_min_granularity = 1000000ULL;
+unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL;
 /*
 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
@@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
 * have immediate wakeup/sleep latencies.
 */
 unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
+unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
@@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 */
 #ifdef CONFIG_SCHED_DEBUG
-int sched_nr_latency_handler(struct ctl_table *table, int write,
+int sched_proc_update_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
        int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+        int factor = get_update_sysctl_factor();
        if (ret || !write)
                return ret;
@@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
        sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
                                        sysctl_sched_min_granularity);
+#define WRT_SYSCTL(name) \
+        (normalized_sysctl_##name = sysctl_##name / (factor))
+        WRT_SYSCTL(sched_min_granularity);
+        WRT_SYSCTL(sched_latency);
+        WRT_SYSCTL(sched_wakeup_granularity);
+        WRT_SYSCTL(sched_shares_ratelimit);
+#undef WRT_SYSCTL
        return 0;
 }
 #endif
@@ -485,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
        curr->sum_exec_runtime += delta_exec;
        schedstat_add(cfs_rq, exec_clock, delta_exec);
        delta_exec_weighted = calc_delta_fair(delta_exec, curr);
        curr->vruntime += delta_exec_weighted;
        update_min_vruntime(cfs_rq);
 }
@@ -740,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
        se->vruntime = vruntime;
 }
+#define ENQUEUE_WAKEUP  1
+#define ENQUEUE_MIGRATE 2
 static void
-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
        /*
+         * Update the normalized vruntime before updating min_vruntime
+         * through callig update_curr().
+         */
+        if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
+                se->vruntime += cfs_rq->min_vruntime;
+        /*
         * Update run-time statistics of the 'current'.
         */
        update_curr(cfs_rq);
        account_entity_enqueue(cfs_rq, se);
-        if (wakeup) {
+        if (flags & ENQUEUE_WAKEUP) {
                place_entity(cfs_rq, se, 0);
                enqueue_sleeper(cfs_rq, se);
        }
@@ -803,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
                __dequeue_entity(cfs_rq, se);
        account_entity_dequeue(cfs_rq, se);
        update_min_vruntime(cfs_rq);
+        /*
+         * Normalize the entity after updating the min_vruntime because the
+         * update can refer to the ->curr item and we need to reflect this
+         * movement in our normalized position.
+         */
+        if (!sleep)
+                se->vruntime -= cfs_rq->min_vruntime;
 }
 /*
@@ -1013,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
        struct cfs_rq *cfs_rq;
        struct sched_entity *se = &p->se;
+        int flags = 0;
+        if (wakeup)
+                flags |= ENQUEUE_WAKEUP;
+        if (p->state == TASK_WAKING)
+                flags |= ENQUEUE_MIGRATE;
        for_each_sched_entity(se) {
                if (se->on_rq)
                        break;
                cfs_rq = cfs_rq_of(se);
-                enqueue_entity(cfs_rq, se, wakeup);
+                enqueue_entity(cfs_rq, se, flags);
-                wakeup = 1;
+                flags = ENQUEUE_WAKEUP;
        }
        hrtick_update(rq);
@@ -1095,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
 #ifdef CONFIG_SMP
+static void task_waking_fair(struct rq *rq, struct task_struct *p)
+{
+        struct sched_entity *se = &p->se;
+        struct cfs_rq *cfs_rq = cfs_rq_of(se);
+        se->vruntime -= cfs_rq->min_vruntime;
+}
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
 * effective_load() calculates the load change as seen from the root_task_group
@@ -1403,8 +1461,10 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
                new_cpu = prev_cpu;
        }
-        rcu_read_lock();
        for_each_domain(cpu, tmp) {
+                if (!(tmp->flags & SD_LOAD_BALANCE))
+                        continue;
                /*
                 * If power savings logic is enabled for a domain, see if we
                 * are not overloaded, if so, don't balance wider.
@@ -1484,10 +1544,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
                        update_shares(tmp);
        }
-        if (affine_sd && wake_affine(affine_sd, p, sync)) {
+        if (affine_sd && wake_affine(affine_sd, p, sync))
-                new_cpu = cpu;
+                return cpu;
-                goto out;
-        }
        while (sd) {
                int load_idx = sd->forkexec_idx;
@@ -1528,8 +1586,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
                /* while loop will break here if sd == NULL */
        }
-out:
-        rcu_read_unlock();
        return new_cpu;
 }
 #endif /* CONFIG_SMP */
@@ -1651,12 +1707,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
        int sync = wake_flags & WF_SYNC;
        int scale = cfs_rq->nr_running >= sched_nr_latency;
-        update_curr(cfs_rq);
+        if (unlikely(rt_prio(p->prio)))
+                goto preempt;
-        if (unlikely(rt_prio(p->prio))) {
-                resched_task(curr);
-                return;
-        }
        if (unlikely(p->sched_class != &fair_sched_class))
                return;
@@ -1682,50 +1734,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
                return;
        /* Idle tasks are by definition preempted by everybody. */
-        if (unlikely(curr->policy == SCHED_IDLE)) {
+        if (unlikely(curr->policy == SCHED_IDLE))
-                resched_task(curr);
+                goto preempt;
-                return;
-        }
-        if ((sched_feat(WAKEUP_SYNC) && sync) ||
+        if (sched_feat(WAKEUP_SYNC) && sync)
-            (sched_feat(WAKEUP_OVERLAP) &&
+                goto preempt;
-             (se->avg_overlap < sysctl_sched_migration_cost &&
-              pse->avg_overlap < sysctl_sched_migration_cost))) {
-                resched_task(curr);
-                return;
-        }
-        if (sched_feat(WAKEUP_RUNNING)) {
+        if (sched_feat(WAKEUP_OVERLAP) &&
-                if (pse->avg_running < se->avg_running) {
+                        se->avg_overlap < sysctl_sched_migration_cost &&
-                        set_next_buddy(pse);
+                        pse->avg_overlap < sysctl_sched_migration_cost)
-                        resched_task(curr);
+                goto preempt;
-                        return;
-                }
-        }
        if (!sched_feat(WAKEUP_PREEMPT))
                return;
+        update_curr(cfs_rq);
        find_matching_se(&se, &pse);
        BUG_ON(!pse);
+        if (wakeup_preempt_entity(se, pse) == 1)
+                goto preempt;
-        if (wakeup_preempt_entity(se, pse) == 1) {
+        return;
-                resched_task(curr);
-                /*
+preempt:
-                 * Only set the backward buddy when the current task is still
+        resched_task(curr);
-                 * on the rq. This can happen when a wakeup gets interleaved
+        /*
-                 * with schedule on the ->pre_schedule() or idle_balance()
+         * Only set the backward buddy when the current task is still
-                 * point, either of which can * drop the rq lock.
+         * on the rq. This can happen when a wakeup gets interleaved
-                 *
+         * with schedule on the ->pre_schedule() or idle_balance()
-                 * Also, during early boot the idle thread is in the fair class,
+         * point, either of which can * drop the rq lock.
-                 * for obvious reasons its a bad idea to schedule back to it.
+         *
-                 */
+         * Also, during early boot the idle thread is in the fair class,
-                if (unlikely(!se->on_rq || curr == rq->idle))
+         * for obvious reasons its a bad idea to schedule back to it.
-                        return;
+         */
-                if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
+        if (unlikely(!se->on_rq || curr == rq->idle))
-                        set_last_buddy(se);
+                return;
-        }
+        if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
+                set_last_buddy(se);
 }
 static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1905,6 +1951,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
        return 0;
 }
+static void rq_online_fair(struct rq *rq)
+{
+        update_sysctl();
+}
+static void rq_offline_fair(struct rq *rq)
+{
+        update_sysctl();
+}
 #endif /* CONFIG_SMP */
 /*
@@ -1922,28 +1979,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 }
 /*
- * Share the fairness runtime between parent and child, thus the
+ * called on fork with the child task as argument from the parent's context
- * total amount of pressure for CPU stays equal - new tasks
+ *  - child not yet on the tasklist
- * get a chance to run but frequent forkers are not allowed to
+ *  - preemption disabled
- * monopolize the CPU. Note: the parent runqueue is locked,
- * the child is not running yet.
 */
-static void task_new_fair(struct rq *rq, struct task_struct *p)
+static void task_fork_fair(struct task_struct *p)
 {
-        struct cfs_rq *cfs_rq = task_cfs_rq(p);
+        struct cfs_rq *cfs_rq = task_cfs_rq(current);
        struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
        int this_cpu = smp_processor_id();
+        struct rq *rq = this_rq();
+        unsigned long flags;
-        sched_info_queued(p);
+        raw_spin_lock_irqsave(&rq->lock, flags);
+        if (unlikely(task_cpu(p) != this_cpu))
+                __set_task_cpu(p, this_cpu);
        update_curr(cfs_rq);
        if (curr)
                se->vruntime = curr->vruntime;
        place_entity(cfs_rq, se, 1);
-        /* 'curr' will be NULL if the child belongs to a different group */
+        if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
-        if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
-                        curr && entity_before(curr, se)) {
                /*
                 * Upon rescheduling, sched_class::put_prev_task() will place
                 * 'current' within the tree based on its new key value.
@@ -1952,7 +2011,9 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
                resched_task(rq->curr);
        }
-        enqueue_task_fair(rq, p, 0);
+        se->vruntime -= cfs_rq->min_vruntime;
+        raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 /*
@@ -2005,30 +2066,27 @@ static void set_curr_task_fair(struct rq *rq)
 }
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void moved_group_fair(struct task_struct *p)
+static void moved_group_fair(struct task_struct *p, int on_rq)
 {
        struct cfs_rq *cfs_rq = task_cfs_rq(p);
        update_curr(cfs_rq);
-        place_entity(cfs_rq, &p->se, 1);
+        if (!on_rq)
+                place_entity(cfs_rq, &p->se, 1);
 }
 #endif
-unsigned int get_rr_interval_fair(struct task_struct *task)
+unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
 {
        struct sched_entity *se = &task->se;
-        unsigned long flags;
-        struct rq *rq;
        unsigned int rr_interval = 0;
        /*
         * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
         * idle runqueue:
         */
-        rq = task_rq_lock(task, &flags);
        if (rq->cfs.load.weight)
                rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
-        task_rq_unlock(rq, &flags);
        return rr_interval;
 }
@@ -2052,11 +2110,15 @@ static const struct sched_class fair_sched_class = {
        .load_balance           = load_balance_fair,
        .move_one_task          = move_one_task_fair,
+        .rq_online              = rq_online_fair,
+        .rq_offline             = rq_offline_fair,
+        .task_waking            = task_waking_fair,
 #endif
        .set_curr_task          = set_curr_task_fair,
        .task_tick              = task_tick_fair,
-        .task_new               = task_new_fair,
+        .task_fork              = task_fork_fair,
        .prio_changed           = prio_changed_fair,
        .switched_to            = switched_to_fair,
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 0d94083582c7..d5059fd761d9 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -54,11 +54,6 @@ SCHED_FEAT(WAKEUP_SYNC, 0)
 SCHED_FEAT(WAKEUP_OVERLAP, 0)
 /*
- * Wakeup preemption towards tasks that run short
- */
-SCHED_FEAT(WAKEUP_RUNNING, 0)
-/*
 * Use the SYNC wakeup hint, pipes and the likes use this to indicate
 * the remote end is likely to consume the data we just wrote, and
 * therefore has cache benefit from being placed on the same cpu, see
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index b133a28fcde3..21b969a28725 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -34,10 +34,10 @@ static struct task_struct *pick_next_task_idle(struct rq *rq)
 static void
 dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
 {
-        spin_unlock_irq(&rq->lock);
+        raw_spin_unlock_irq(&rq->lock);
-        printk(KERN_ERR "bad: scheduling from the idle thread!\n");
+        pr_err("bad: scheduling from the idle thread!\n");
        dump_stack();
-        spin_lock_irq(&rq->lock);
+        raw_spin_lock_irq(&rq->lock);
 }
 static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
                check_preempt_curr(rq, p, 0);
 }
-unsigned int get_rr_interval_idle(struct task_struct *task)
+unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
 {
        return 0;
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 5c5fef378415..f48328ac216f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -327,7 +327,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
        weight = cpumask_weight(rd->span);
-        spin_lock(&rt_b->rt_runtime_lock);
+        raw_spin_lock(&rt_b->rt_runtime_lock);
        rt_period = ktime_to_ns(rt_b->rt_period);
        for_each_cpu(i, rd->span) {
                struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
@@ -336,7 +336,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
                if (iter == rt_rq)
                        continue;
-                spin_lock(&iter->rt_runtime_lock);
+                raw_spin_lock(&iter->rt_runtime_lock);
                /*
                 * Either all rqs have inf runtime and there's nothing to steal
                 * or __disable_runtime() below sets a specific rq to inf to
@@ -358,14 +358,14 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
                        rt_rq->rt_runtime += diff;
                        more = 1;
                        if (rt_rq->rt_runtime == rt_period) {
-                                spin_unlock(&iter->rt_runtime_lock);
+                                raw_spin_unlock(&iter->rt_runtime_lock);
                                break;
                        }
                }
 next:
-                spin_unlock(&iter->rt_runtime_lock);
+                raw_spin_unlock(&iter->rt_runtime_lock);
        }
-        spin_unlock(&rt_b->rt_runtime_lock);
+        raw_spin_unlock(&rt_b->rt_runtime_lock);
        return more;
 }
@@ -386,8 +386,8 @@ static void __disable_runtime(struct rq *rq)
                s64 want;
                int i;
-                spin_lock(&rt_b->rt_runtime_lock);
+                raw_spin_lock(&rt_b->rt_runtime_lock);
-                spin_lock(&rt_rq->rt_runtime_lock);
+                raw_spin_lock(&rt_rq->rt_runtime_lock);
                /*
                 * Either we're all inf and nobody needs to borrow, or we're
                 * already disabled and thus have nothing to do, or we have
@@ -396,7 +396,7 @@ static void __disable_runtime(struct rq *rq)
                if (rt_rq->rt_runtime == RUNTIME_INF ||
                                rt_rq->rt_runtime == rt_b->rt_runtime)
                        goto balanced;
-                spin_unlock(&rt_rq->rt_runtime_lock);
+                raw_spin_unlock(&rt_rq->rt_runtime_lock);
                /*
                 * Calculate the difference between what we started out with
@@ -418,7 +418,7 @@ static void __disable_runtime(struct rq *rq)
                        if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
                                continue;
-                        spin_lock(&iter->rt_runtime_lock);
+                        raw_spin_lock(&iter->rt_runtime_lock);
                        if (want > 0) {
                                diff = min_t(s64, iter->rt_runtime, want);
                                iter->rt_runtime -= diff;
@@ -427,13 +427,13 @@ static void __disable_runtime(struct rq *rq)
                                iter->rt_runtime -= want;
                                want -= want;
                        }
-                        spin_unlock(&iter->rt_runtime_lock);
+                        raw_spin_unlock(&iter->rt_runtime_lock);
                        if (!want)
                                break;
                }
-                spin_lock(&rt_rq->rt_runtime_lock);
+                raw_spin_lock(&rt_rq->rt_runtime_lock);
                /*
                 * We cannot be left wanting - that would mean some runtime
                 * leaked out of the system.
@@ -445,8 +445,8 @@ balanced:
                 * runtime - in which case borrowing doesn't make sense.
                 */
                rt_rq->rt_runtime = RUNTIME_INF;
-                spin_unlock(&rt_rq->rt_runtime_lock);
+                raw_spin_unlock(&rt_rq->rt_runtime_lock);
-                spin_unlock(&rt_b->rt_runtime_lock);
+                raw_spin_unlock(&rt_b->rt_runtime_lock);
        }
 }
@@ -454,9 +454,9 @@ static void disable_runtime(struct rq *rq)
 {
        unsigned long flags;
-        spin_lock_irqsave(&rq->lock, flags);
+        raw_spin_lock_irqsave(&rq->lock, flags);
        __disable_runtime(rq);
-        spin_unlock_irqrestore(&rq->lock, flags);
+        raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 static void __enable_runtime(struct rq *rq)
@@ -472,13 +472,13 @@ static void __enable_runtime(struct rq *rq)
        for_each_leaf_rt_rq(rt_rq, rq) {
                struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
-                spin_lock(&rt_b->rt_runtime_lock);
+                raw_spin_lock(&rt_b->rt_runtime_lock);
-                spin_lock(&rt_rq->rt_runtime_lock);
+                raw_spin_lock(&rt_rq->rt_runtime_lock);
                rt_rq->rt_runtime = rt_b->rt_runtime;
                rt_rq->rt_time = 0;
                rt_rq->rt_throttled = 0;
-                spin_unlock(&rt_rq->rt_runtime_lock);
+                raw_spin_unlock(&rt_rq->rt_runtime_lock);
-                spin_unlock(&rt_b->rt_runtime_lock);
+                raw_spin_unlock(&rt_b->rt_runtime_lock);
        }
 }
@@ -486,9 +486,9 @@ static void enable_runtime(struct rq *rq)
 {
        unsigned long flags;
-        spin_lock_irqsave(&rq->lock, flags);
+        raw_spin_lock_irqsave(&rq->lock, flags);
        __enable_runtime(rq);
-        spin_unlock_irqrestore(&rq->lock, flags);
+        raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 static int balance_runtime(struct rt_rq *rt_rq)
@@ -496,9 +496,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
        int more = 0;
        if (rt_rq->rt_time > rt_rq->rt_runtime) {
-                spin_unlock(&rt_rq->rt_runtime_lock);
+                raw_spin_unlock(&rt_rq->rt_runtime_lock);
                more = do_balance_runtime(rt_rq);
-                spin_lock(&rt_rq->rt_runtime_lock);
+                raw_spin_lock(&rt_rq->rt_runtime_lock);
        }
        return more;
@@ -524,11 +524,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
                struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
                struct rq *rq = rq_of_rt_rq(rt_rq);
-                spin_lock(&rq->lock);
+                raw_spin_lock(&rq->lock);
                if (rt_rq->rt_time) {
                        u64 runtime;
-                        spin_lock(&rt_rq->rt_runtime_lock);
+                        raw_spin_lock(&rt_rq->rt_runtime_lock);
                        if (rt_rq->rt_throttled)
                                balance_runtime(rt_rq);
                        runtime = rt_rq->rt_runtime;
@@ -539,13 +539,13 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
                        }
                        if (rt_rq->rt_time || rt_rq->rt_nr_running)
                                idle = 0;
-                        spin_unlock(&rt_rq->rt_runtime_lock);
+                        raw_spin_unlock(&rt_rq->rt_runtime_lock);
                } else if (rt_rq->rt_nr_running)
                        idle = 0;
                if (enqueue)
                        sched_rt_rq_enqueue(rt_rq);
-                spin_unlock(&rq->lock);
+                raw_spin_unlock(&rq->lock);
        }
        return idle;
@@ -624,11 +624,11 @@ static void update_curr_rt(struct rq *rq)
                rt_rq = rt_rq_of_se(rt_se);
                if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
-                        spin_lock(&rt_rq->rt_runtime_lock);
+                        raw_spin_lock(&rt_rq->rt_runtime_lock);
                        rt_rq->rt_time += delta_exec;
                        if (sched_rt_runtime_exceeded(rt_rq))
                                resched_task(curr);
-                        spin_unlock(&rt_rq->rt_runtime_lock);
+                        raw_spin_unlock(&rt_rq->rt_runtime_lock);
                }
        }
 }
@@ -1246,7 +1246,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
                                     task_running(rq, task) ||
                                     !task->se.on_rq)) {
-                                spin_unlock(&lowest_rq->lock);
+                                raw_spin_unlock(&lowest_rq->lock);
                                lowest_rq = NULL;
                                break;
                        }
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq)
 * If we are not running and we are not going to reschedule soon, we should
 * try to push tasks away now
 */
-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
        if (!task_running(rq, p) &&
            !test_tsk_need_resched(rq->curr) &&
@@ -1721,7 +1721,7 @@ static void set_curr_task_rt(struct rq *rq)
        dequeue_pushable_task(rq, p);
 }
-unsigned int get_rr_interval_rt(struct task_struct *task)
+unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 {
        /*
         * Time slice is 0 for SCHED_FIFO tasks
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = {
        .rq_offline             = rq_offline_rt,
        .pre_schedule           = pre_schedule_rt,
        .post_schedule          = post_schedule_rt,
-        .task_wake_up           = task_wake_up_rt,
+        .task_woken             = task_woken_rt,
        .switched_from          = switched_from_rt,
 #endif
diff --git a/kernel/signal.c b/kernel/signal.c
index 6b982f2cf524..d09692b40376 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -218,13 +218,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
        struct user_struct *user;
        /*
-         * We won't get problems with the target's UID changing under us
+         * Protect access to @t credentials. This can go away when all
-         * because changing it requires RCU be used, and if t != current, the
+         * callers hold rcu read lock.
-         * caller must be holding the RCU readlock (by way of a spinlock) and
-         * we use RCU protection here
         */
+        rcu_read_lock();
        user = get_uid(__task_cred(t)->user);
        atomic_inc(&user->sigpending);
+        rcu_read_unlock();
        if (override_rlimit ||
            atomic_read(&user->sigpending) <=
@@ -423,7 +423,7 @@ still_pending:
                 */
                info->si_signo = sig;
                info->si_errno = 0;
-                info->si_code = 0;
+                info->si_code = SI_USER;
                info->si_pid = 0;
                info->si_uid = 0;
        }
@@ -607,6 +607,17 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s)
        return 1;
 }
+static inline int is_si_special(const struct siginfo *info)
+{
+        return info <= SEND_SIG_FORCED;
+}
+static inline bool si_fromuser(const struct siginfo *info)
+{
+        return info == SEND_SIG_NOINFO ||
+                (!is_si_special(info) && SI_FROMUSER(info));
+}
 /*
 * Bad permissions for sending the signal
 * - the caller must hold at least the RCU read lock
@@ -621,7 +632,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
        if (!valid_signal(sig))
                return -EINVAL;
-        if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
+        if (!si_fromuser(info))
                return 0;
        error = audit_signal_info(sig, t); /* Let audit system see the signal */
@@ -949,9 +960,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
        int from_ancestor_ns = 0;
 #ifdef CONFIG_PID_NS
-        if (!is_si_special(info) && SI_FROMUSER(info) &&
+        from_ancestor_ns = si_fromuser(info) &&
-                        task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0)
+                           !task_pid_nr_ns(current, task_active_pid_ns(t));
-                from_ancestor_ns = 1;
 #endif
        return __send_signal(sig, info, t, group, from_ancestor_ns);
@@ -1052,12 +1062,6 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
        return ret;
 }
-void
-force_sig_specific(int sig, struct task_struct *t)
-{
-        force_sig_info(sig, SEND_SIG_FORCED, t);
-}
 /*
 * Nuke all other threads in the group.
 */
@@ -1175,19 +1179,19 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
        int ret = -EINVAL;
        struct task_struct *p;
        const struct cred *pcred;
+        unsigned long flags;
        if (!valid_signal(sig))
                return ret;
-        read_lock(&tasklist_lock);
+        rcu_read_lock();
        p = pid_task(pid, PIDTYPE_PID);
        if (!p) {
                ret = -ESRCH;
                goto out_unlock;
        }
        pcred = __task_cred(p);
-        if ((info == SEND_SIG_NOINFO ||
+        if (si_fromuser(info) &&
-             (!is_si_special(info) && SI_FROMUSER(info))) &&
            euid != pcred->suid && euid != pcred->uid &&
            uid  != pcred->suid && uid  != pcred->uid) {
                ret = -EPERM;
@@ -1196,14 +1200,16 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
        ret = security_task_kill(p, info, sig, secid);
        if (ret)
                goto out_unlock;
-        if (sig && p->sighand) {
-                unsigned long flags;
+        if (sig) {
-                spin_lock_irqsave(&p->sighand->siglock, flags);
+                if (lock_task_sighand(p, &flags)) {
-                ret = __send_signal(sig, info, p, 1, 0);
+                        ret = __send_signal(sig, info, p, 1, 0);
-                spin_unlock_irqrestore(&p->sighand->siglock, flags);
+                        unlock_task_sighand(p, &flags);
+                } else
+                        ret = -ESRCH;
        }
 out_unlock:
-        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
        return ret;
 }
 EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
@@ -1837,11 +1843,6 @@ relock:
        for (;;) {
                struct k_sigaction *ka;
-                if (unlikely(signal->group_stop_count > 0) &&
-                    do_signal_stop(0))
-                        goto relock;
                /*
                 * Tracing can induce an artifical signal and choose sigaction.
                 * The return value in @signr determines the default action,
@@ -1853,6 +1854,10 @@ relock:
                if (unlikely(signr != 0))
                        ka = return_ka;
                else {
+                        if (unlikely(signal->group_stop_count > 0) &&
+                            do_signal_stop(0))
+                                goto relock;
                        signr = dequeue_signal(current, &current->blocked,
                                               info);
diff --git a/kernel/smp.c b/kernel/smp.c
index a8c76069cf50..de735a6637d0 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -16,11 +16,11 @@ static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
 static struct {
        struct list_head        queue;
-        spinlock_t              lock;
+        raw_spinlock_t          lock;
 } call_function __cacheline_aligned_in_smp =
        {
                .queue          = LIST_HEAD_INIT(call_function.queue),
-                .lock           = __SPIN_LOCK_UNLOCKED(call_function.lock),
+                .lock           = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock),
        };
 enum {
@@ -35,7 +35,7 @@ struct call_function_data {
 struct call_single_queue {
        struct list_head        list;
-        spinlock_t              lock;
+        raw_spinlock_t          lock;
 };
 static DEFINE_PER_CPU(struct call_function_data, cfd_data);
@@ -80,7 +80,7 @@ static int __cpuinit init_call_single_data(void)
        for_each_possible_cpu(i) {
                struct call_single_queue *q = &per_cpu(call_single_queue, i);
-                spin_lock_init(&q->lock);
+                raw_spin_lock_init(&q->lock);
                INIT_LIST_HEAD(&q->list);
        }
@@ -141,10 +141,10 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
        unsigned long flags;
        int ipi;
-        spin_lock_irqsave(&dst->lock, flags);
+        raw_spin_lock_irqsave(&dst->lock, flags);
        ipi = list_empty(&dst->list);
        list_add_tail(&data->list, &dst->list);
-        spin_unlock_irqrestore(&dst->lock, flags);
+        raw_spin_unlock_irqrestore(&dst->lock, flags);
        /*
         * The list addition should be visible before sending the IPI
@@ -171,7 +171,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
 void generic_smp_call_function_interrupt(void)
 {
        struct call_function_data *data;
-        int cpu = get_cpu();
+        int cpu = smp_processor_id();
        /*
         * Shouldn't receive this interrupt on a cpu that is not yet online.
@@ -201,9 +201,9 @@ void generic_smp_call_function_interrupt(void)
                refs = atomic_dec_return(&data->refs);
                WARN_ON(refs < 0);
                if (!refs) {
-                        spin_lock(&call_function.lock);
+                        raw_spin_lock(&call_function.lock);
                        list_del_rcu(&data->csd.list);
-                        spin_unlock(&call_function.lock);
+                        raw_spin_unlock(&call_function.lock);
                }
                if (refs)
@@ -212,7 +212,6 @@ void generic_smp_call_function_interrupt(void)
                csd_unlock(&data->csd);
        }
-        put_cpu();
 }
 /*
@@ -230,9 +229,9 @@ void generic_smp_call_function_single_interrupt(void)
         */
        WARN_ON_ONCE(!cpu_online(smp_processor_id()));
-        spin_lock(&q->lock);
+        raw_spin_lock(&q->lock);
        list_replace_init(&q->list, &list);
-        spin_unlock(&q->lock);
+        raw_spin_unlock(&q->lock);
        while (!list_empty(&list)) {
                struct call_single_data *data;
@@ -449,14 +448,14 @@ void smp_call_function_many(const struct cpumask *mask,
        cpumask_clear_cpu(this_cpu, data->cpumask);
        atomic_set(&data->refs, cpumask_weight(data->cpumask));
-        spin_lock_irqsave(&call_function.lock, flags);
+        raw_spin_lock_irqsave(&call_function.lock, flags);
        /*
         * Place entry at the _HEAD_ of the list, so that any cpu still
         * observing the entry in generic_smp_call_function_interrupt()
         * will not miss any other list entries:
         */
        list_add_rcu(&data->csd.list, &call_function.queue);
-        spin_unlock_irqrestore(&call_function.lock, flags);
+        raw_spin_unlock_irqrestore(&call_function.lock, flags);
        /*
         * Make the list addition visible before sending the ipi.
@@ -501,20 +500,20 @@ EXPORT_SYMBOL(smp_call_function);
 void ipi_call_lock(void)
 {
-        spin_lock(&call_function.lock);
+        raw_spin_lock(&call_function.lock);
 }
 void ipi_call_unlock(void)
 {
-        spin_unlock(&call_function.lock);
+        raw_spin_unlock(&call_function.lock);
 }
 void ipi_call_lock_irq(void)
 {
-        spin_lock_irq(&call_function.lock);
+        raw_spin_lock_irq(&call_function.lock);
 }
 void ipi_call_unlock_irq(void)
 {
-        spin_unlock_irq(&call_function.lock);
+        raw_spin_unlock_irq(&call_function.lock);
 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 21939d9e830e..a09502e2ef75 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -697,7 +697,7 @@ void __init softirq_init(void)
        open_softirq(HI_SOFTIRQ, tasklet_hi_action);
 }
-static int ksoftirqd(void * __bind_cpu)
+static int run_ksoftirqd(void * __bind_cpu)
 {
        set_current_state(TASK_INTERRUPTIBLE);
@@ -810,7 +810,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-                p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+                p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
                if (IS_ERR(p)) {
                        printk("ksoftirqd for %i failed\n", hotcpu);
                        return NOTIFY_BAD;
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 81324d12eb35..d22579087e27 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -22,9 +22,9 @@
 static DEFINE_SPINLOCK(print_lock);
-static DEFINE_PER_CPU(unsigned long, touch_timestamp);
+static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
-static DEFINE_PER_CPU(unsigned long, print_timestamp);
+static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
-static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
+static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static int __read_mostly did_panic;
 int __read_mostly softlockup_thresh = 60;
@@ -70,12 +70,12 @@ static void __touch_softlockup_watchdog(void)
 {
        int this_cpu = raw_smp_processor_id();
-        __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu);
+        __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
 }
 void touch_softlockup_watchdog(void)
 {
-        __raw_get_cpu_var(touch_timestamp) = 0;
+        __raw_get_cpu_var(softlockup_touch_ts) = 0;
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
@@ -85,7 +85,7 @@ void touch_all_softlockup_watchdogs(void)
        /* Cause each CPU to re-update its timestamp rather than complain */
        for_each_online_cpu(cpu)
-                per_cpu(touch_timestamp, cpu) = 0;
+                per_cpu(softlockup_touch_ts, cpu) = 0;
 }
 EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
@@ -104,28 +104,28 @@ int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
 void softlockup_tick(void)
 {
        int this_cpu = smp_processor_id();
-        unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
+        unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
-        unsigned long print_timestamp;
+        unsigned long print_ts;
        struct pt_regs *regs = get_irq_regs();
        unsigned long now;
        /* Is detection switched off? */
-        if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
+        if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
                /* Be sure we don't false trigger if switched back on */
-                if (touch_timestamp)
+                if (touch_ts)
-                        per_cpu(touch_timestamp, this_cpu) = 0;
+                        per_cpu(softlockup_touch_ts, this_cpu) = 0;
                return;
        }
-        if (touch_timestamp == 0) {
+        if (touch_ts == 0) {
                __touch_softlockup_watchdog();
                return;
        }
-        print_timestamp = per_cpu(print_timestamp, this_cpu);
+        print_ts = per_cpu(softlockup_print_ts, this_cpu);
        /* report at most once a second */
-        if (print_timestamp == touch_timestamp || did_panic)
+        if (print_ts == touch_ts || did_panic)
                return;
        /* do not print during early bootup: */
@@ -140,18 +140,18 @@ void softlockup_tick(void)
         * Wake up the high-prio watchdog task twice per
         * threshold timespan.
         */
-        if (now > touch_timestamp + softlockup_thresh/2)
+        if (now > touch_ts + softlockup_thresh/2)
-                wake_up_process(per_cpu(watchdog_task, this_cpu));
+                wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
        /* Warn about unreasonable delays: */
-        if (now <= (touch_timestamp + softlockup_thresh))
+        if (now <= (touch_ts + softlockup_thresh))
                return;
-        per_cpu(print_timestamp, this_cpu) = touch_timestamp;
+        per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
        spin_lock(&print_lock);
        printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
-                        this_cpu, now - touch_timestamp,
+                        this_cpu, now - touch_ts,
                        current->comm, task_pid_nr(current));
        print_modules();
        print_irqtrace_events(current);
@@ -209,32 +209,32 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-                BUG_ON(per_cpu(watchdog_task, hotcpu));
+                BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
                p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
                if (IS_ERR(p)) {
                        printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
                        return NOTIFY_BAD;
                }
-                per_cpu(touch_timestamp, hotcpu) = 0;
+                per_cpu(softlockup_touch_ts, hotcpu) = 0;
-                per_cpu(watchdog_task, hotcpu) = p;
+                per_cpu(softlockup_watchdog, hotcpu) = p;
                kthread_bind(p, hotcpu);
                break;
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
-                wake_up_process(per_cpu(watchdog_task, hotcpu));
+                wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
                break;
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
-                if (!per_cpu(watchdog_task, hotcpu))
+                if (!per_cpu(softlockup_watchdog, hotcpu))
                        break;
                /* Unbind so it can run.  Fall thru. */
-                kthread_bind(per_cpu(watchdog_task, hotcpu),
+                kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
                             cpumask_any(cpu_online_mask));
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-                p = per_cpu(watchdog_task, hotcpu);
+                p = per_cpu(softlockup_watchdog, hotcpu);
-                per_cpu(watchdog_task, hotcpu) = NULL;
+                per_cpu(softlockup_watchdog, hotcpu) = NULL;
                kthread_stop(p);
                break;
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 41e042219ff6..be6517fb9c14 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -32,6 +32,8 @@
 * include/linux/spinlock_api_smp.h
 */
 #else
+#define raw_read_can_lock(l)    read_can_lock(l)
+#define raw_write_can_lock(l)   write_can_lock(l)
 /*
 * We build the __lock_function inlines here. They are too large for
 * inlining all over the place, but here is only one user per function
@@ -42,49 +44,49 @@
 * towards that other CPU that it should break the lock ASAP.
 */
 #define BUILD_LOCK_OPS(op, locktype)                                    \
-void __lockfunc __##op##_lock(locktype##_t *lock)                       \
+void __lockfunc __raw_##op##_lock(locktype##_t *lock)                   \
 {                                                                       \
        for (;;) {                                                      \
                preempt_disable();                                      \
-                if (likely(_raw_##op##_trylock(lock)))                  \
+                if (likely(do_raw_##op##_trylock(lock)))                \
                        break;                                          \
                preempt_enable();                                       \
                                                                        \
                if (!(lock)->break_lock)                                \
                        (lock)->break_lock = 1;                         \
-                while (!op##_can_lock(lock) && (lock)->break_lock)      \
+                while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
-                        _raw_##op##_relax(&lock->raw_lock);             \
+                        arch_##op##_relax(&lock->raw_lock);             \
        }                                                               \
        (lock)->break_lock = 0;                                         \
 }                                                                       \
                                                                        \
-unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock)      \
+unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock)  \
 {                                                                       \
        unsigned long flags;                                            \
                                                                        \
        for (;;) {                                                      \
                preempt_disable();                                      \
                local_irq_save(flags);                                  \
-                if (likely(_raw_##op##_trylock(lock)))                  \
+                if (likely(do_raw_##op##_trylock(lock)))                \
                        break;                                          \
                local_irq_restore(flags);                               \
                preempt_enable();                                       \
                                                                        \
                if (!(lock)->break_lock)                                \
                        (lock)->break_lock = 1;                         \
-                while (!op##_can_lock(lock) && (lock)->break_lock)      \
+                while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
-                        _raw_##op##_relax(&lock->raw_lock);             \
+                        arch_##op##_relax(&lock->raw_lock);             \
        }                                                               \
        (lock)->break_lock = 0;                                         \
        return flags;                                                   \
 }                                                                       \
                                                                        \
-void __lockfunc __##op##_lock_irq(locktype##_t *lock)                   \
+void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock)               \
 {                                                                       \
-        _##op##_lock_irqsave(lock);                                     \
+        _raw_##op##_lock_irqsave(lock);                                 \
 }                                                                       \
                                                                        \
-void __lockfunc __##op##_lock_bh(locktype##_t *lock)                    \
+void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock)                \
 {                                                                       \
        unsigned long flags;                                            \
                                                                        \
@@ -93,7 +95,7 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock)			\
        /* irq-disabling. We use the generic preemption-aware   */      \
        /* function:                                            */      \
        /**/                                                            \
-        flags = _##op##_lock_irqsave(lock);                             \
+        flags = _raw_##op##_lock_irqsave(lock);                         \
        local_bh_disable();                                             \
        local_irq_restore(flags);                                       \
 }                                                                       \
@@ -107,269 +109,269 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock)			\
 *         __[spin|read|write]_lock_irqsave()
 *         __[spin|read|write]_lock_bh()
 */
-BUILD_LOCK_OPS(spin, spinlock);
+BUILD_LOCK_OPS(spin, raw_spinlock);
 BUILD_LOCK_OPS(read, rwlock);
 BUILD_LOCK_OPS(write, rwlock);
 #endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#ifndef CONFIG_INLINE_SPIN_TRYLOCK
+int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
-void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
 {
-        preempt_disable();
+        return __raw_spin_trylock(lock);
-        spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-        LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 }
-EXPORT_SYMBOL(_spin_lock_nested);
+EXPORT_SYMBOL(_raw_spin_trylock);
+#endif
-unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock,
+#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
-                                                   int subclass)
+int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
 {
-        unsigned long flags;
+        return __raw_spin_trylock_bh(lock);
-        local_irq_save(flags);
-        preempt_disable();
-        spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-        LOCK_CONTENDED_FLAGS(lock, _raw_spin_trylock, _raw_spin_lock,
-                                _raw_spin_lock_flags, &flags);
-        return flags;
 }
-EXPORT_SYMBOL(_spin_lock_irqsave_nested);
+EXPORT_SYMBOL(_raw_spin_trylock_bh);
+#endif
-void __lockfunc _spin_lock_nest_lock(spinlock_t *lock,
+#ifndef CONFIG_INLINE_SPIN_LOCK
-                                     struct lockdep_map *nest_lock)
+void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
 {
-        preempt_disable();
+        __raw_spin_lock(lock);
-        spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
-        LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 }
-EXPORT_SYMBOL(_spin_lock_nest_lock);
+EXPORT_SYMBOL(_raw_spin_lock);
 #endif
-#ifndef CONFIG_INLINE_SPIN_TRYLOCK
+#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
-int __lockfunc _spin_trylock(spinlock_t *lock)
+unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
 {
-        return __spin_trylock(lock);
+        return __raw_spin_lock_irqsave(lock);
 }
-EXPORT_SYMBOL(_spin_trylock);
+EXPORT_SYMBOL(_raw_spin_lock_irqsave);
 #endif
-#ifndef CONFIG_INLINE_READ_TRYLOCK
+#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
-int __lockfunc _read_trylock(rwlock_t *lock)
+void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
 {
-        return __read_trylock(lock);
+        __raw_spin_lock_irq(lock);
 }
-EXPORT_SYMBOL(_read_trylock);
+EXPORT_SYMBOL(_raw_spin_lock_irq);
 #endif
-#ifndef CONFIG_INLINE_WRITE_TRYLOCK
+#ifndef CONFIG_INLINE_SPIN_LOCK_BH
-int __lockfunc _write_trylock(rwlock_t *lock)
+void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
 {
-        return __write_trylock(lock);
+        __raw_spin_lock_bh(lock);
 }
-EXPORT_SYMBOL(_write_trylock);
+EXPORT_SYMBOL(_raw_spin_lock_bh);
 #endif
-#ifndef CONFIG_INLINE_READ_LOCK
+#ifndef CONFIG_INLINE_SPIN_UNLOCK
-void __lockfunc _read_lock(rwlock_t *lock)
+void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
 {
-        __read_lock(lock);
+        __raw_spin_unlock(lock);
 }
-EXPORT_SYMBOL(_read_lock);
+EXPORT_SYMBOL(_raw_spin_unlock);
 #endif
-#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
-unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
+void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
 {
-        return __spin_lock_irqsave(lock);
+        __raw_spin_unlock_irqrestore(lock, flags);
 }
-EXPORT_SYMBOL(_spin_lock_irqsave);
+EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
 #endif
-#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
-void __lockfunc _spin_lock_irq(spinlock_t *lock)
+void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
 {
-        __spin_lock_irq(lock);
+        __raw_spin_unlock_irq(lock);
 }
-EXPORT_SYMBOL(_spin_lock_irq);
+EXPORT_SYMBOL(_raw_spin_unlock_irq);
 #endif
-#ifndef CONFIG_INLINE_SPIN_LOCK_BH
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
-void __lockfunc _spin_lock_bh(spinlock_t *lock)
+void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
 {
-        __spin_lock_bh(lock);
+        __raw_spin_unlock_bh(lock);
 }
-EXPORT_SYMBOL(_spin_lock_bh);
+EXPORT_SYMBOL(_raw_spin_unlock_bh);
 #endif
-#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
+#ifndef CONFIG_INLINE_READ_TRYLOCK
-unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
+int __lockfunc _raw_read_trylock(rwlock_t *lock)
 {
-        return __read_lock_irqsave(lock);
+        return __raw_read_trylock(lock);
 }
-EXPORT_SYMBOL(_read_lock_irqsave);
+EXPORT_SYMBOL(_raw_read_trylock);
 #endif
-#ifndef CONFIG_INLINE_READ_LOCK_IRQ
+#ifndef CONFIG_INLINE_READ_LOCK
-void __lockfunc _read_lock_irq(rwlock_t *lock)
+void __lockfunc _raw_read_lock(rwlock_t *lock)
 {
-        __read_lock_irq(lock);
+        __raw_read_lock(lock);
 }
-EXPORT_SYMBOL(_read_lock_irq);
+EXPORT_SYMBOL(_raw_read_lock);
 #endif
-#ifndef CONFIG_INLINE_READ_LOCK_BH
+#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
-void __lockfunc _read_lock_bh(rwlock_t *lock)
+unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
 {
-        __read_lock_bh(lock);
+        return __raw_read_lock_irqsave(lock);
 }
-EXPORT_SYMBOL(_read_lock_bh);
+EXPORT_SYMBOL(_raw_read_lock_irqsave);
 #endif
-#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
+#ifndef CONFIG_INLINE_READ_LOCK_IRQ
-unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
+void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
 {
-        return __write_lock_irqsave(lock);
+        __raw_read_lock_irq(lock);
 }
-EXPORT_SYMBOL(_write_lock_irqsave);
+EXPORT_SYMBOL(_raw_read_lock_irq);
 #endif
-#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
+#ifndef CONFIG_INLINE_READ_LOCK_BH
-void __lockfunc _write_lock_irq(rwlock_t *lock)
+void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
 {
-        __write_lock_irq(lock);
+        __raw_read_lock_bh(lock);
 }
-EXPORT_SYMBOL(_write_lock_irq);
+EXPORT_SYMBOL(_raw_read_lock_bh);
 #endif
-#ifndef CONFIG_INLINE_WRITE_LOCK_BH
+#ifndef CONFIG_INLINE_READ_UNLOCK
-void __lockfunc _write_lock_bh(rwlock_t *lock)
+void __lockfunc _raw_read_unlock(rwlock_t *lock)
 {
-        __write_lock_bh(lock);
+        __raw_read_unlock(lock);
 }
-EXPORT_SYMBOL(_write_lock_bh);
+EXPORT_SYMBOL(_raw_read_unlock);
 #endif
-#ifndef CONFIG_INLINE_SPIN_LOCK
+#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
-void __lockfunc _spin_lock(spinlock_t *lock)
+void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-        __spin_lock(lock);
+        __raw_read_unlock_irqrestore(lock, flags);
 }
-EXPORT_SYMBOL(_spin_lock);
+EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
 #endif
-#ifndef CONFIG_INLINE_WRITE_LOCK
+#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
-void __lockfunc _write_lock(rwlock_t *lock)
+void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
 {
-        __write_lock(lock);
+        __raw_read_unlock_irq(lock);
 }
-EXPORT_SYMBOL(_write_lock);
+EXPORT_SYMBOL(_raw_read_unlock_irq);
 #endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK
+#ifndef CONFIG_INLINE_READ_UNLOCK_BH
-void __lockfunc _spin_unlock(spinlock_t *lock)
+void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
 {
-        __spin_unlock(lock);
+        __raw_read_unlock_bh(lock);
 }
-EXPORT_SYMBOL(_spin_unlock);
+EXPORT_SYMBOL(_raw_read_unlock_bh);
 #endif
-#ifndef CONFIG_INLINE_WRITE_UNLOCK
+#ifndef CONFIG_INLINE_WRITE_TRYLOCK
-void __lockfunc _write_unlock(rwlock_t *lock)
+int __lockfunc _raw_write_trylock(rwlock_t *lock)
 {
-        __write_unlock(lock);
+        return __raw_write_trylock(lock);
 }
-EXPORT_SYMBOL(_write_unlock);
+EXPORT_SYMBOL(_raw_write_trylock);
 #endif
-#ifndef CONFIG_INLINE_READ_UNLOCK
+#ifndef CONFIG_INLINE_WRITE_LOCK
-void __lockfunc _read_unlock(rwlock_t *lock)
+void __lockfunc _raw_write_lock(rwlock_t *lock)
 {
-        __read_unlock(lock);
+        __raw_write_lock(lock);
 }
-EXPORT_SYMBOL(_read_unlock);
+EXPORT_SYMBOL(_raw_write_lock);
 #endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
+#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
-void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
 {
-        __spin_unlock_irqrestore(lock, flags);
+        return __raw_write_lock_irqsave(lock);
 }
-EXPORT_SYMBOL(_spin_unlock_irqrestore);
+EXPORT_SYMBOL(_raw_write_lock_irqsave);
 #endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
+#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
-void __lockfunc _spin_unlock_irq(spinlock_t *lock)
+void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
 {
-        __spin_unlock_irq(lock);
+        __raw_write_lock_irq(lock);
 }
-EXPORT_SYMBOL(_spin_unlock_irq);
+EXPORT_SYMBOL(_raw_write_lock_irq);
 #endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
+#ifndef CONFIG_INLINE_WRITE_LOCK_BH
-void __lockfunc _spin_unlock_bh(spinlock_t *lock)
+void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
 {
-        __spin_unlock_bh(lock);
+        __raw_write_lock_bh(lock);
 }
-EXPORT_SYMBOL(_spin_unlock_bh);
+EXPORT_SYMBOL(_raw_write_lock_bh);
 #endif
-#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
+#ifndef CONFIG_INLINE_WRITE_UNLOCK
-void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+void __lockfunc _raw_write_unlock(rwlock_t *lock)
 {
-        __read_unlock_irqrestore(lock, flags);
+        __raw_write_unlock(lock);
 }
-EXPORT_SYMBOL(_read_unlock_irqrestore);
+EXPORT_SYMBOL(_raw_write_unlock);
 #endif
-#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
-void __lockfunc _read_unlock_irq(rwlock_t *lock)
+void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-        __read_unlock_irq(lock);
+        __raw_write_unlock_irqrestore(lock, flags);
 }
-EXPORT_SYMBOL(_read_unlock_irq);
+EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
 #endif
-#ifndef CONFIG_INLINE_READ_UNLOCK_BH
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
-void __lockfunc _read_unlock_bh(rwlock_t *lock)
+void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
 {
-        __read_unlock_bh(lock);
+        __raw_write_unlock_irq(lock);
 }
-EXPORT_SYMBOL(_read_unlock_bh);
+EXPORT_SYMBOL(_raw_write_unlock_irq);
 #endif
-#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
-void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
 {
-        __write_unlock_irqrestore(lock, flags);
+        __raw_write_unlock_bh(lock);
 }
-EXPORT_SYMBOL(_write_unlock_irqrestore);
+EXPORT_SYMBOL(_raw_write_unlock_bh);
 #endif
-#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void __lockfunc _write_unlock_irq(rwlock_t *lock)
+void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
 {
-        __write_unlock_irq(lock);
+        preempt_disable();
+        spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+        LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
-EXPORT_SYMBOL(_write_unlock_irq);
+EXPORT_SYMBOL(_raw_spin_lock_nested);
-#endif
-#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
+unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
-void __lockfunc _write_unlock_bh(rwlock_t *lock)
+                                                   int subclass)
 {
-        __write_unlock_bh(lock);
+        unsigned long flags;
+        local_irq_save(flags);
+        preempt_disable();
+        spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+        LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock,
+                                do_raw_spin_lock_flags, &flags);
+        return flags;
 }
-EXPORT_SYMBOL(_write_unlock_bh);
+EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested);
-#endif
-#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
+void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock,
-int __lockfunc _spin_trylock_bh(spinlock_t *lock)
+                                     struct lockdep_map *nest_lock)
 {
-        return __spin_trylock_bh(lock);
+        preempt_disable();
+        spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
+        LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
-EXPORT_SYMBOL(_spin_trylock_bh);
+EXPORT_SYMBOL(_raw_spin_lock_nest_lock);
 #endif
 notrace int in_lock_functions(unsigned long addr)
diff --git a/kernel/sys.c b/kernel/sys.c
index 585d6cd10040..26a6b73a6b85 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -162,6 +162,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
        if (niceval > 19)
                niceval = 19;
+        rcu_read_lock();
        read_lock(&tasklist_lock);
        switch (which) {
                case PRIO_PROCESS:
@@ -189,16 +190,17 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
                                 !(user = find_user(who)))
                                goto out_unlock;        /* No processes for this user */
-                        do_each_thread(g, p)
+                        do_each_thread(g, p) {
                                if (__task_cred(p)->uid == who)
                                        error = set_one_prio(p, niceval, error);
-                        while_each_thread(g, p);
+                        } while_each_thread(g, p);
                        if (who != cred->uid)
                                free_uid(user);         /* For find_user() */
                        break;
        }
 out_unlock:
        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
 out:
        return error;
 }
@@ -252,13 +254,13 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
                                 !(user = find_user(who)))
                                goto out_unlock;        /* No processes for this user */
-                        do_each_thread(g, p)
+                        do_each_thread(g, p) {
                                if (__task_cred(p)->uid == who) {
                                        niceval = 20 - task_nice(p);
                                        if (niceval > retval)
                                                retval = niceval;
                                }
-                        while_each_thread(g, p);
+                        } while_each_thread(g, p);
                        if (who != cred->uid)
                                free_uid(user);         /* for find_user() */
                        break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9327a26765c5..8a68b2448468 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -244,6 +244,10 @@ static int min_sched_granularity_ns = 100000;		/* 100 usecs */
 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
 static int min_wakeup_granularity_ns;                   /* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
+static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
+static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
+static int min_sched_shares_ratelimit = 100000; /* 100 usec */
+static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
 #endif
 static struct ctl_table kern_table[] = {
@@ -260,7 +264,7 @@ static struct ctl_table kern_table[] = {
                .data           = &sysctl_sched_min_granularity,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-                .proc_handler   = sched_nr_latency_handler,
+                .proc_handler   = sched_proc_update_handler,
                .extra1         = &min_sched_granularity_ns,
                .extra2         = &max_sched_granularity_ns,
        },
@@ -269,7 +273,7 @@ static struct ctl_table kern_table[] = {
                .data           = &sysctl_sched_latency,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-                .proc_handler   = sched_nr_latency_handler,
+                .proc_handler   = sched_proc_update_handler,
                .extra1         = &min_sched_granularity_ns,
                .extra2         = &max_sched_granularity_ns,
        },
@@ -278,7 +282,7 @@ static struct ctl_table kern_table[] = {
                .data           = &sysctl_sched_wakeup_granularity,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec_minmax,
+                .proc_handler   = sched_proc_update_handler,
                .extra1         = &min_wakeup_granularity_ns,
                .extra2         = &max_wakeup_granularity_ns,
        },
@@ -287,7 +291,18 @@ static struct ctl_table kern_table[] = {
                .data           = &sysctl_sched_shares_ratelimit,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec,
+                .proc_handler   = sched_proc_update_handler,
+                .extra1         = &min_sched_shares_ratelimit,
+                .extra2         = &max_sched_shares_ratelimit,
+        },
+        {
+                .procname       = "sched_tunable_scaling",
+                .data           = &sysctl_sched_tunable_scaling,
+                .maxlen         = sizeof(enum sched_tunable_scaling),
+                .mode           = 0644,
+                .proc_handler   = sched_proc_update_handler,
+                .extra1         = &min_sched_tunable_scaling,
+                .extra2         = &max_sched_tunable_scaling,
        },
        {
                .procname       = "sched_shares_thresh",
@@ -298,13 +313,6 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
        },
        {
-                .procname       = "sched_features",
-                .data           = &sysctl_sched_features,
-                .maxlen         = sizeof(unsigned int),
-                .mode           = 0644,
-                .proc_handler   = proc_dointvec,
-        },
-        {
                .procname       = "sched_migration_cost",
                .data           = &sysctl_sched_migration_cost,
                .maxlen         = sizeof(unsigned int),
@@ -1043,7 +1051,7 @@ static struct ctl_table vm_table[] = {
                .extra2         = &one_hundred,
        },
 #ifdef CONFIG_HUGETLB_PAGE
-         {
+        {
                .procname       = "nr_hugepages",
                .data           = NULL,
                .maxlen         = sizeof(unsigned long),
@@ -1051,7 +1059,18 @@ static struct ctl_table vm_table[] = {
                .proc_handler   = hugetlb_sysctl_handler,
                .extra1         = (void *)&hugetlb_zero,
                .extra2         = (void *)&hugetlb_infinity,
-         },
+        },
+#ifdef CONFIG_NUMA
+        {
+                .procname       = "nr_hugepages_mempolicy",
+                .data           = NULL,
+                .maxlen         = sizeof(unsigned long),
+                .mode           = 0644,
+                .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
+                .extra1         = (void *)&hugetlb_zero,
+                .extra2         = (void *)&hugetlb_infinity,
+        },
+#endif
         {
                .procname       = "hugetlb_shm_group",
                .data           = &sysctl_hugetlb_shm_group,
@@ -1112,7 +1131,8 @@ static struct ctl_table vm_table[] = {
                .data           = &sysctl_max_map_count,
                .maxlen         = sizeof(sysctl_max_map_count),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
        },
 #else
        {
@@ -1194,6 +1214,7 @@ static struct ctl_table vm_table[] = {
                .proc_handler   = proc_dointvec_jiffies,
        },
 #endif
+#ifdef CONFIG_MMU
        {
                .procname       = "mmap_min_addr",
                .data           = &dac_mmap_min_addr,
@@ -1201,6 +1222,7 @@ static struct ctl_table vm_table[] = {
                .mode           = 0644,
                .proc_handler   = mmap_min_addr_handler,
        },
+#endif
 #ifdef CONFIG_NUMA
        {
                .procname       = "numa_zonelist_order",
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index b75dbf40f573..112533d5fc08 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1399,6 +1399,13 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
 {
        int i;
+        /*
+         * CTL_KERN/KERN_VERSION is used by older glibc and cannot
+         * ever go away.
+         */
+        if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
+                return;
        if (printk_ratelimit()) {
                printk(KERN_INFO
                        "warning: process `%s' used the deprecated sysctl "
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 20a8920029ee..6f740d9f0948 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -30,7 +30,7 @@ static LIST_HEAD(clockevents_released);
 static RAW_NOTIFIER_HEAD(clockevents_chain);
 /* Protection for the above */
-static DEFINE_SPINLOCK(clockevents_lock);
+static DEFINE_RAW_SPINLOCK(clockevents_lock);
 /**
 * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
@@ -141,9 +141,9 @@ int clockevents_register_notifier(struct notifier_block *nb)
        unsigned long flags;
        int ret;
-        spin_lock_irqsave(&clockevents_lock, flags);
+        raw_spin_lock_irqsave(&clockevents_lock, flags);
        ret = raw_notifier_chain_register(&clockevents_chain, nb);
-        spin_unlock_irqrestore(&clockevents_lock, flags);
+        raw_spin_unlock_irqrestore(&clockevents_lock, flags);
        return ret;
 }
@@ -185,13 +185,13 @@ void clockevents_register_device(struct clock_event_device *dev)
        BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
        BUG_ON(!dev->cpumask);
-        spin_lock_irqsave(&clockevents_lock, flags);
+        raw_spin_lock_irqsave(&clockevents_lock, flags);
        list_add(&dev->list, &clockevent_devices);
        clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
        clockevents_notify_released();
-        spin_unlock_irqrestore(&clockevents_lock, flags);
+        raw_spin_unlock_irqrestore(&clockevents_lock, flags);
 }
 EXPORT_SYMBOL_GPL(clockevents_register_device);
@@ -238,10 +238,11 @@ void clockevents_exchange_device(struct clock_event_device *old,
 */
 void clockevents_notify(unsigned long reason, void *arg)
 {
-        struct list_head *node, *tmp;
+        struct clock_event_device *dev, *tmp;
        unsigned long flags;
+        int cpu;
-        spin_lock_irqsave(&clockevents_lock, flags);
+        raw_spin_lock_irqsave(&clockevents_lock, flags);
        clockevents_do_notify(reason, arg);
        switch (reason) {
@@ -250,13 +251,24 @@ void clockevents_notify(unsigned long reason, void *arg)
                 * Unregister the clock event devices which were
                 * released from the users in the notify chain.
                 */
-                list_for_each_safe(node, tmp, &clockevents_released)
+                list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
-                        list_del(node);
+                        list_del(&dev->list);
+                /*
+                 * Now check whether the CPU has left unused per cpu devices
+                 */
+                cpu = *((int *)arg);
+                list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+                        if (cpumask_test_cpu(cpu, dev->cpumask) &&
+                            cpumask_weight(dev->cpumask) == 1) {
+                                BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+                                list_del(&dev->list);
+                        }
+                }
                break;
        default:
                break;
        }
-        spin_unlock_irqrestore(&clockevents_lock, flags);
+        raw_spin_unlock_irqrestore(&clockevents_lock, flags);
 }
 EXPORT_SYMBOL_GPL(clockevents_notify);
 #endif
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index c2ec25087a35..b3bafd5fc66d 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -31,7 +31,7 @@ static struct tick_device tick_broadcast_device;
 /* FIXME: Use cpumask_var_t. */
 static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
 static DECLARE_BITMAP(tmpmask, NR_CPUS);
-static DEFINE_SPINLOCK(tick_broadcast_lock);
+static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 static int tick_broadcast_force;
 #ifdef CONFIG_TICK_ONESHOT
@@ -96,7 +96,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
        unsigned long flags;
        int ret = 0;
-        spin_lock_irqsave(&tick_broadcast_lock, flags);
+        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        /*
         * Devices might be registered with both periodic and oneshot
@@ -122,7 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
                        tick_broadcast_clear_oneshot(cpu);
                }
        }
-        spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
        return ret;
 }
@@ -161,13 +161,13 @@ static void tick_do_broadcast(struct cpumask *mask)
 */
 static void tick_do_periodic_broadcast(void)
 {
-        spin_lock(&tick_broadcast_lock);
+        raw_spin_lock(&tick_broadcast_lock);
        cpumask_and(to_cpumask(tmpmask),
                    cpu_online_mask, tick_get_broadcast_mask());
        tick_do_broadcast(to_cpumask(tmpmask));
-        spin_unlock(&tick_broadcast_lock);
+        raw_spin_unlock(&tick_broadcast_lock);
 }
 /*
@@ -212,7 +212,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
        unsigned long flags;
        int cpu, bc_stopped;
-        spin_lock_irqsave(&tick_broadcast_lock, flags);
+        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        cpu = smp_processor_id();
        td = &per_cpu(tick_cpu_device, cpu);
@@ -263,7 +263,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
                        tick_broadcast_setup_oneshot(bc);
        }
 out:
-        spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 /*
@@ -299,7 +299,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
        unsigned long flags;
        unsigned int cpu = *cpup;
-        spin_lock_irqsave(&tick_broadcast_lock, flags);
+        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        bc = tick_broadcast_device.evtdev;
        cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
@@ -309,7 +309,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
                        clockevents_shutdown(bc);
        }
-        spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 void tick_suspend_broadcast(void)
@@ -317,13 +317,13 @@ void tick_suspend_broadcast(void)
        struct clock_event_device *bc;
        unsigned long flags;
-        spin_lock_irqsave(&tick_broadcast_lock, flags);
+        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        bc = tick_broadcast_device.evtdev;
        if (bc)
                clockevents_shutdown(bc);
-        spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 int tick_resume_broadcast(void)
@@ -332,7 +332,7 @@ int tick_resume_broadcast(void)
        unsigned long flags;
        int broadcast = 0;
-        spin_lock_irqsave(&tick_broadcast_lock, flags);
+        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        bc = tick_broadcast_device.evtdev;
@@ -351,7 +351,7 @@ int tick_resume_broadcast(void)
                        break;
                }
        }
-        spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
        return broadcast;
 }
@@ -405,7 +405,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
        ktime_t now, next_event;
        int cpu;
-        spin_lock(&tick_broadcast_lock);
+        raw_spin_lock(&tick_broadcast_lock);
 again:
        dev->next_event.tv64 = KTIME_MAX;
        next_event.tv64 = KTIME_MAX;
@@ -443,7 +443,7 @@ again:
                if (tick_broadcast_set_event(next_event, 0))
                        goto again;
        }
-        spin_unlock(&tick_broadcast_lock);
+        raw_spin_unlock(&tick_broadcast_lock);
 }
 /*
@@ -457,7 +457,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
        unsigned long flags;
        int cpu;
-        spin_lock_irqsave(&tick_broadcast_lock, flags);
+        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        /*
         * Periodic mode does not care about the enter/exit of power
@@ -492,7 +492,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
        }
 out:
-        spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 /*
@@ -563,13 +563,13 @@ void tick_broadcast_switch_to_oneshot(void)
        struct clock_event_device *bc;
        unsigned long flags;
-        spin_lock_irqsave(&tick_broadcast_lock, flags);
+        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
        bc = tick_broadcast_device.evtdev;
        if (bc)
                tick_broadcast_setup_oneshot(bc);
-        spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
@@ -581,7 +581,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
        unsigned long flags;
        unsigned int cpu = *cpup;
-        spin_lock_irqsave(&tick_broadcast_lock, flags);
+        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        /*
         * Clear the broadcast mask flag for the dead cpu, but do not
@@ -589,7 +589,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
         */
        cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
-        spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 /*
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 83c4417b6a3c..b6b898d2eeef 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
 ktime_t tick_next_period;
 ktime_t tick_period;
 int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
-DEFINE_SPINLOCK(tick_device_lock);
+static DEFINE_RAW_SPINLOCK(tick_device_lock);
 /*
 * Debugging: see timer_list.c
@@ -209,7 +209,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
        int cpu, ret = NOTIFY_OK;
        unsigned long flags;
-        spin_lock_irqsave(&tick_device_lock, flags);
+        raw_spin_lock_irqsave(&tick_device_lock, flags);
        cpu = smp_processor_id();
        if (!cpumask_test_cpu(cpu, newdev->cpumask))
@@ -268,7 +268,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
        if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
                tick_oneshot_notify();
-        spin_unlock_irqrestore(&tick_device_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_device_lock, flags);
        return NOTIFY_STOP;
 out_bc:
@@ -278,7 +278,7 @@ out_bc:
        if (tick_check_broadcast_device(newdev))
                ret = NOTIFY_STOP;
-        spin_unlock_irqrestore(&tick_device_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_device_lock, flags);
        return ret;
 }
@@ -311,7 +311,7 @@ static void tick_shutdown(unsigned int *cpup)
        struct clock_event_device *dev = td->evtdev;
        unsigned long flags;
-        spin_lock_irqsave(&tick_device_lock, flags);
+        raw_spin_lock_irqsave(&tick_device_lock, flags);
        td->mode = TICKDEV_MODE_PERIODIC;
        if (dev) {
                /*
@@ -322,7 +322,7 @@ static void tick_shutdown(unsigned int *cpup)
                clockevents_exchange_device(dev, NULL);
                td->evtdev = NULL;
        }
-        spin_unlock_irqrestore(&tick_device_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_device_lock, flags);
 }
 static void tick_suspend(void)
@@ -330,9 +330,9 @@ static void tick_suspend(void)
        struct tick_device *td = &__get_cpu_var(tick_cpu_device);
        unsigned long flags;
-        spin_lock_irqsave(&tick_device_lock, flags);
+        raw_spin_lock_irqsave(&tick_device_lock, flags);
        clockevents_shutdown(td->evtdev);
-        spin_unlock_irqrestore(&tick_device_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_device_lock, flags);
 }
 static void tick_resume(void)
@@ -341,7 +341,7 @@ static void tick_resume(void)
        unsigned long flags;
        int broadcast = tick_resume_broadcast();
-        spin_lock_irqsave(&tick_device_lock, flags);
+        raw_spin_lock_irqsave(&tick_device_lock, flags);
        clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
        if (!broadcast) {
@@ -350,7 +350,7 @@ static void tick_resume(void)
                else
                        tick_resume_oneshot();
        }
-        spin_unlock_irqrestore(&tick_device_lock, flags);
+        raw_spin_unlock_irqrestore(&tick_device_lock, flags);
 }
 /*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index b1c05bf75ee0..290eefbc1f60 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -6,7 +6,6 @@
 #define TICK_DO_TIMER_BOOT      -2
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
-extern spinlock_t tick_device_lock;
 extern ktime_t tick_next_period;
 extern ktime_t tick_period;
 extern int tick_do_timer_cpu __read_mostly;
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
index 96ff643a5a59..12f5c55090be 100644
--- a/kernel/time/timecompare.c
+++ b/kernel/time/timecompare.c
@@ -89,7 +89,7 @@ int timecompare_offset(struct timecompare *sync,
                         * source time
                         */
                        sample.offset =
-                                ktime_to_ns(ktime_add(end, start)) / 2 -
+                                (ktime_to_ns(end) + ktime_to_ns(start)) / 2 -
                                ts;
                        /* simple insertion sort based on duration */
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 665c76edbf17..bdfb8dd1050c 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -84,7 +84,7 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
 next_one:
        i = 0;
-        spin_lock_irqsave(&base->cpu_base->lock, flags);
+        raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
        curr = base->first;
        /*
@@ -100,13 +100,13 @@ next_one:
                timer = rb_entry(curr, struct hrtimer, node);
                tmp = *timer;
-                spin_unlock_irqrestore(&base->cpu_base->lock, flags);
+                raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
                print_timer(m, timer, &tmp, i, now);
                next++;
                goto next_one;
        }
-        spin_unlock_irqrestore(&base->cpu_base->lock, flags);
+        raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 }
 static void
@@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
        P_ns(expires_next);
        P(hres_active);
        P(nr_events);
+        P(nr_retries);
+        P(nr_hangs);
+        P_ns(max_hang_time);
 #endif
 #undef P
 #undef P_ns
@@ -234,10 +237,10 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
        print_tickdevice(m, tick_get_broadcast_device(), -1);
        SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
-                   tick_get_broadcast_mask()->bits[0]);
+                   cpumask_bits(tick_get_broadcast_mask())[0]);
 #ifdef CONFIG_TICK_ONESHOT
        SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
-                   tick_get_broadcast_oneshot_mask()->bits[0]);
+                   cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
 #endif
        SEQ_printf(m, "\n");
 #endif
@@ -254,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v)
        u64 now = ktime_to_ns(ktime_get());
        int cpu;
-        SEQ_printf(m, "Timer List Version: v0.4\n");
+        SEQ_printf(m, "Timer List Version: v0.5\n");
        SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
        SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index ee5681f8d7ec..2f3b585b8d7d 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -86,7 +86,7 @@ static DEFINE_SPINLOCK(table_lock);
 /*
 * Per-CPU lookup locks for fast hash lookup:
 */
-static DEFINE_PER_CPU(spinlock_t, lookup_lock);
+static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
 /*
 * Mutex to serialize state changes with show-stats activities:
@@ -238,14 +238,14 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
        /*
         * It doesnt matter which lock we take:
         */
-        spinlock_t *lock;
+        raw_spinlock_t *lock;
        struct entry *entry, input;
        unsigned long flags;
        if (likely(!timer_stats_active))
                return;
-        lock = &per_cpu(lookup_lock, raw_smp_processor_id());
+        lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
        input.timer = timer;
        input.start_func = startf;
@@ -253,7 +253,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
        input.pid = pid;
        input.timer_flag = timer_flag;
-        spin_lock_irqsave(lock, flags);
+        raw_spin_lock_irqsave(lock, flags);
        if (!timer_stats_active)
                goto out_unlock;
@@ -264,7 +264,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
                atomic_inc(&overflow_count);
 out_unlock:
-        spin_unlock_irqrestore(lock, flags);
+        raw_spin_unlock_irqrestore(lock, flags);
 }
 static void print_name_offset(struct seq_file *m, unsigned long addr)
@@ -348,9 +348,11 @@ static void sync_access(void)
        int cpu;
        for_each_online_cpu(cpu) {
-                spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
+                raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
+                raw_spin_lock_irqsave(lock, flags);
                /* nothing */
-                spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
+                raw_spin_unlock_irqrestore(lock, flags);
        }
 }
@@ -408,7 +410,7 @@ void __init init_timer_stats(void)
        int cpu;
        for_each_possible_cpu(cpu)
-                spin_lock_init(&per_cpu(lookup_lock, cpu));
+                raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
 }
 static int __init init_tstats_procfs(void)
diff --git a/kernel/timer.c b/kernel/timer.c
index 5db5a8d26811..15533b792397 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -656,8 +656,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
        debug_activate(timer, expires);
-        new_base = __get_cpu_var(tvec_bases);
        cpu = smp_processor_id();
 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e51a1bcb7bed..7968762c8167 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1724,7 +1724,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
        return ftrace_match(str, regex, len, type);
 }
-static void ftrace_match_records(char *buff, int len, int enable)
+static int ftrace_match_records(char *buff, int len, int enable)
 {
        unsigned int search_len;
        struct ftrace_page *pg;
@@ -1733,6 +1733,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
        char *search;
        int type;
        int not;
+        int found = 0;
        flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
        type = filter_parse_regex(buff, len, &search, &not);
@@ -1750,6 +1751,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
                                rec->flags &= ~flag;
                        else
                                rec->flags |= flag;
+                        found = 1;
                }
                /*
                 * Only enable filtering if we have a function that
@@ -1759,6 +1761,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
                        ftrace_filtered = 1;
        } while_for_each_ftrace_rec();
        mutex_unlock(&ftrace_lock);
+        return found;
 }
 static int
@@ -1780,7 +1784,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
                return 1;
 }
-static void ftrace_match_module_records(char *buff, char *mod, int enable)
+static int ftrace_match_module_records(char *buff, char *mod, int enable)
 {
        unsigned search_len = 0;
        struct ftrace_page *pg;
@@ -1789,6 +1793,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
        char *search = buff;
        unsigned long flag;
        int not = 0;
+        int found = 0;
        flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
@@ -1819,12 +1824,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
                                rec->flags &= ~flag;
                        else
                                rec->flags |= flag;
+                        found = 1;
                }
                if (enable && (rec->flags & FTRACE_FL_FILTER))
                        ftrace_filtered = 1;
        } while_for_each_ftrace_rec();
        mutex_unlock(&ftrace_lock);
+        return found;
 }
 /*
@@ -1853,8 +1861,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
        if (!strlen(mod))
                return -EINVAL;
-        ftrace_match_module_records(func, mod, enable);
+        if (ftrace_match_module_records(func, mod, enable))
-        return 0;
+                return 0;
+        return -EINVAL;
 }
 static struct ftrace_func_command ftrace_mod_cmd = {
@@ -2151,8 +2160,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
        func = strsep(&next, ":");
        if (!next) {
-                ftrace_match_records(func, len, enable);
+                if (ftrace_match_records(func, len, enable))
-                return 0;
+                        return 0;
+                return ret;
        }
        /* command found */
@@ -2198,10 +2208,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
            !trace_parser_cont(parser)) {
                ret = ftrace_process_regex(parser->buffer,
                                           parser->idx, enable);
+                trace_parser_clear(parser);
                if (ret)
                        goto out_unlock;
-                trace_parser_clear(parser);
        }
        ret = read;
@@ -2543,10 +2552,9 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
                                        exists = true;
                                        break;
                                }
-                        if (!exists) {
+                        if (!exists)
                                array[(*idx)++] = rec->ip;
-                                found = 1;
+                        found = 1;
-                        }
                }
        } while_for_each_ftrace_rec();
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3d56a3..9f4f565b01e6 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -14,7 +14,5 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/power.h>
-EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
-EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
 EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a1ca4956ab5e..2326b04c95c4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -423,7 +423,7 @@ struct ring_buffer_per_cpu {
        int                             cpu;
        struct ring_buffer              *buffer;
        spinlock_t                      reader_lock;    /* serialize readers */
-        raw_spinlock_t                  lock;
+        arch_spinlock_t                 lock;
        struct lock_class_key           lock_key;
        struct list_head                *pages;
        struct buffer_page              *head_page;     /* read from head */
@@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
        cpu_buffer->buffer = buffer;
        spin_lock_init(&cpu_buffer->reader_lock);
        lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
-        cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+        cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
        bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
                            GFP_KERNEL, cpu_to_node(cpu));
@@ -1193,9 +1193,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
        struct list_head *p;
        unsigned i;
-        atomic_inc(&cpu_buffer->record_disabled);
-        synchronize_sched();
        spin_lock_irq(&cpu_buffer->reader_lock);
        rb_head_page_deactivate(cpu_buffer);
@@ -1211,12 +1208,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
                return;
        rb_reset_cpu(cpu_buffer);
-        spin_unlock_irq(&cpu_buffer->reader_lock);
        rb_check_pages(cpu_buffer);
-        atomic_dec(&cpu_buffer->record_disabled);
+        spin_unlock_irq(&cpu_buffer->reader_lock);
 }
 static void
@@ -1227,9 +1221,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
        struct list_head *p;
        unsigned i;
-        atomic_inc(&cpu_buffer->record_disabled);
-        synchronize_sched();
        spin_lock_irq(&cpu_buffer->reader_lock);
        rb_head_page_deactivate(cpu_buffer);
@@ -1242,11 +1233,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
                list_add_tail(&bpage->list, cpu_buffer->pages);
        }
        rb_reset_cpu(cpu_buffer);
-        spin_unlock_irq(&cpu_buffer->reader_lock);
        rb_check_pages(cpu_buffer);
-        atomic_dec(&cpu_buffer->record_disabled);
+        spin_unlock_irq(&cpu_buffer->reader_lock);
 }
 /**
@@ -1254,11 +1243,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
 * @buffer: the buffer to resize.
 * @size: the new size.
 *
- * The tracer is responsible for making sure that the buffer is
- * not being used while changing the size.
- * Note: We may be able to change the above requirement by using
- *  RCU synchronizations.
- *
 * Minimum size is 2 * BUF_PAGE_SIZE.
 *
 * Returns -1 on failure.
@@ -1290,6 +1274,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
        if (size == buffer_size)
                return size;
+        atomic_inc(&buffer->record_disabled);
+        /* Make sure all writers are done with this buffer. */
+        synchronize_sched();
        mutex_lock(&buffer->mutex);
        get_online_cpus();
@@ -1352,6 +1341,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
        put_online_cpus();
        mutex_unlock(&buffer->mutex);
+        atomic_dec(&buffer->record_disabled);
        return size;
 free_pages:
@@ -1361,6 +1352,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
        }
        put_online_cpus();
        mutex_unlock(&buffer->mutex);
+        atomic_dec(&buffer->record_disabled);
        return -ENOMEM;
        /*
@@ -1370,6 +1362,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 out_fail:
        put_online_cpus();
        mutex_unlock(&buffer->mutex);
+        atomic_dec(&buffer->record_disabled);
        return -1;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -2834,7 +2827,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
        int ret;
        local_irq_save(flags);
-        __raw_spin_lock(&cpu_buffer->lock);
+        arch_spin_lock(&cpu_buffer->lock);
 again:
        /*
@@ -2923,7 +2916,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
        goto again;
 out:
-        __raw_spin_unlock(&cpu_buffer->lock);
+        arch_spin_unlock(&cpu_buffer->lock);
        local_irq_restore(flags);
        return reader;
@@ -3286,9 +3279,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
        synchronize_sched();
        spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
-        __raw_spin_lock(&cpu_buffer->lock);
+        arch_spin_lock(&cpu_buffer->lock);
        rb_iter_reset(iter);
-        __raw_spin_unlock(&cpu_buffer->lock);
+        arch_spin_unlock(&cpu_buffer->lock);
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
        return iter;
@@ -3408,11 +3401,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
        if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
                goto out;
-        __raw_spin_lock(&cpu_buffer->lock);
+        arch_spin_lock(&cpu_buffer->lock);
        rb_reset_cpu(cpu_buffer);
-        __raw_spin_unlock(&cpu_buffer->lock);
+        arch_spin_unlock(&cpu_buffer->lock);
 out:
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 874f2893cff0..8b9f20ab8eed 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
 *  Copyright (C) 2004 William Lee Irwin III
 */
 #include <linux/ring_buffer.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/stacktrace.h>
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
@@ -86,17 +86,17 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
 */
 static int tracing_disabled = 1;
-DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+DEFINE_PER_CPU(int, ftrace_cpu_disabled);
 static inline void ftrace_disable_cpu(void)
 {
        preempt_disable();
-        local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+        __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
 }
 static inline void ftrace_enable_cpu(void)
 {
-        local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+        __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
        preempt_enable();
 }
@@ -203,7 +203,7 @@ cycle_t ftrace_now(int cpu)
 */
 static struct trace_array       max_tr;
-static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
+static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
 /* tracer_enabled is used to toggle activation of a tracer */
 static int                      tracer_enabled = 1;
@@ -313,7 +313,6 @@ static const char *trace_options[] = {
        "bin",
        "block",
        "stacktrace",
-        "sched-tree",
        "trace_printk",
        "ftrace_preempt",
        "branch",
@@ -493,15 +492,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 * protected by per_cpu spinlocks. But the action of the swap
 * needs its own lock.
 *
- * This is defined as a raw_spinlock_t in order to help
+ * This is defined as a arch_spinlock_t in order to help
 * with performance when lockdep debugging is enabled.
 *
 * It is also used in other places outside the update_max_tr
 * so it needs to be defined outside of the
 * CONFIG_TRACER_MAX_TRACE.
 */
-static raw_spinlock_t ftrace_max_lock =
+static arch_spinlock_t ftrace_max_lock =
-        (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+        (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 #ifdef CONFIG_TRACER_MAX_TRACE
 unsigned long __read_mostly     tracing_max_latency;
@@ -555,13 +554,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
        WARN_ON_ONCE(!irqs_disabled());
-        __raw_spin_lock(&ftrace_max_lock);
+        arch_spin_lock(&ftrace_max_lock);
        tr->buffer = max_tr.buffer;
        max_tr.buffer = buf;
        __update_max_tr(tr, tsk, cpu);
-        __raw_spin_unlock(&ftrace_max_lock);
+        arch_spin_unlock(&ftrace_max_lock);
 }
 /**
@@ -581,7 +580,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
        WARN_ON_ONCE(!irqs_disabled());
-        __raw_spin_lock(&ftrace_max_lock);
+        arch_spin_lock(&ftrace_max_lock);
        ftrace_disable_cpu();
@@ -603,7 +602,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
        __update_max_tr(tr, tsk, cpu);
-        __raw_spin_unlock(&ftrace_max_lock);
+        arch_spin_unlock(&ftrace_max_lock);
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
@@ -802,7 +801,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
 static int cmdline_idx;
-static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 /* temporary disable recording */
 static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -915,7 +914,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
         * nor do we want to disable interrupts,
         * so if we miss here, then better luck next time.
         */
-        if (!__raw_spin_trylock(&trace_cmdline_lock))
+        if (!arch_spin_trylock(&trace_cmdline_lock))
                return;
        idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +939,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
        memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
-        __raw_spin_unlock(&trace_cmdline_lock);
+        arch_spin_unlock(&trace_cmdline_lock);
 }
 void trace_find_cmdline(int pid, char comm[])
@@ -958,14 +957,14 @@ void trace_find_cmdline(int pid, char comm[])
        }
        preempt_disable();
-        __raw_spin_lock(&trace_cmdline_lock);
+        arch_spin_lock(&trace_cmdline_lock);
        map = map_pid_to_cmdline[pid];
        if (map != NO_CMDLINE_MAP)
                strcpy(comm, saved_cmdlines[map]);
        else
                strcpy(comm, "<...>");
-        __raw_spin_unlock(&trace_cmdline_lock);
+        arch_spin_unlock(&trace_cmdline_lock);
        preempt_enable();
 }
@@ -1085,7 +1084,7 @@ trace_function(struct trace_array *tr,
        struct ftrace_entry *entry;
        /* If we are reading the ring buffer, don't trace */
-        if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+        if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
                return;
        event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1151,6 +1150,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
        __ftrace_trace_stack(tr->buffer, flags, skip, pc);
 }
+/**
+ * trace_dump_stack - record a stack back trace in the trace buffer
+ */
+void trace_dump_stack(void)
+{
+        unsigned long flags;
+        if (tracing_disabled || tracing_selftest_running)
+                return;
+        local_save_flags(flags);
+        /* skipping 3 traces, seems to get us at the caller of this function */
+        __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+}
 void
 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
@@ -1251,8 +1266,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 */
 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 {
-        static raw_spinlock_t trace_buf_lock =
+        static arch_spinlock_t trace_buf_lock =
-                (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+                (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
        static u32 trace_buf[TRACE_BUF_SIZE];
        struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1298,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
        /* Lockdep uses trace_printk for lock tracing */
        local_irq_save(flags);
-        __raw_spin_lock(&trace_buf_lock);
+        arch_spin_lock(&trace_buf_lock);
        len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
        if (len > TRACE_BUF_SIZE || len < 0)
@@ -1304,7 +1319,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
                ring_buffer_unlock_commit(buffer, event);
 out_unlock:
-        __raw_spin_unlock(&trace_buf_lock);
+        arch_spin_unlock(&trace_buf_lock);
        local_irq_restore(flags);
 out:
@@ -1334,7 +1349,7 @@ int trace_array_printk(struct trace_array *tr,
 int trace_array_vprintk(struct trace_array *tr,
                        unsigned long ip, const char *fmt, va_list args)
 {
-        static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+        static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
        static char trace_buf[TRACE_BUF_SIZE];
        struct ftrace_event_call *call = &event_print;
@@ -1360,12 +1375,8 @@ int trace_array_vprintk(struct trace_array *tr,
        pause_graph_tracing();
        raw_local_irq_save(irq_flags);
-        __raw_spin_lock(&trace_buf_lock);
+        arch_spin_lock(&trace_buf_lock);
-        if (args == NULL) {
+        len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
-                strncpy(trace_buf, fmt, TRACE_BUF_SIZE);
-                len = strlen(trace_buf);
-        } else
-                len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
        size = sizeof(*entry) + len + 1;
        buffer = tr->buffer;
@@ -1382,7 +1393,7 @@ int trace_array_vprintk(struct trace_array *tr,
                ring_buffer_unlock_commit(buffer, event);
 out_unlock:
-        __raw_spin_unlock(&trace_buf_lock);
+        arch_spin_unlock(&trace_buf_lock);
        raw_local_irq_restore(irq_flags);
        unpause_graph_tracing();
 out:
@@ -1516,6 +1527,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
        int i = (int)*pos;
        void *ent;
+        WARN_ON_ONCE(iter->leftover);
        (*pos)++;
        /* can't go backwards */
@@ -1614,8 +1627,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
                        ;
        } else {
-                l = *pos - 1;
+                /*
-                p = s_next(m, p, &l);
+                 * If we overflowed the seq_file before, then we want
+                 * to just reuse the trace_seq buffer again.
+                 */
+                if (iter->leftover)
+                        p = iter;
+                else {
+                        l = *pos - 1;
+                        p = s_next(m, p, &l);
+                }
        }
        trace_event_read_lock();
@@ -1923,6 +1944,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
 static int s_show(struct seq_file *m, void *v)
 {
        struct trace_iterator *iter = v;
+        int ret;
        if (iter->ent == NULL) {
                if (iter->tr) {
@@ -1942,9 +1964,27 @@ static int s_show(struct seq_file *m, void *v)
                        if (!(trace_flags & TRACE_ITER_VERBOSE))
                                print_func_help_header(m);
                }
+        } else if (iter->leftover) {
+                /*
+                 * If we filled the seq_file buffer earlier, we
+                 * want to just show it now.
+                 */
+                ret = trace_print_seq(m, &iter->seq);
+                /* ret should this time be zero, but you never know */
+                iter->leftover = ret;
        } else {
                print_trace_line(iter);
-                trace_print_seq(m, &iter->seq);
+                ret = trace_print_seq(m, &iter->seq);
+                /*
+                 * If we overflow the seq_file buffer, then it will
+                 * ask us for this data again at start up.
+                 * Use that instead.
+                 *  ret is 0 if seq_file write succeeded.
+                 *        -1 otherwise.
+                 */
+                iter->leftover = ret;
        }
        return 0;
@@ -2254,7 +2294,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
        mutex_lock(&tracing_cpumask_update_lock);
        local_irq_disable();
-        __raw_spin_lock(&ftrace_max_lock);
+        arch_spin_lock(&ftrace_max_lock);
        for_each_tracing_cpu(cpu) {
                /*
                 * Increase/decrease the disabled counter if we are
@@ -2269,7 +2309,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
                        atomic_dec(&global_trace.data[cpu]->disabled);
                }
        }
-        __raw_spin_unlock(&ftrace_max_lock);
+        arch_spin_unlock(&ftrace_max_lock);
        local_irq_enable();
        cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2291,67 +2331,49 @@ static const struct file_operations tracing_cpumask_fops = {
        .write          = tracing_cpumask_write,
 };
-static ssize_t
+static int tracing_trace_options_show(struct seq_file *m, void *v)
-tracing_trace_options_read(struct file *filp, char __user *ubuf,
-                       size_t cnt, loff_t *ppos)
 {
        struct tracer_opt *trace_opts;
        u32 tracer_flags;
-        int len = 0;
-        char *buf;
-        int r = 0;
        int i;
-        /* calculate max size */
-        for (i = 0; trace_options[i]; i++) {
-                len += strlen(trace_options[i]);
-                len += 3; /* "no" and newline */
-        }
        mutex_lock(&trace_types_lock);
        tracer_flags = current_trace->flags->val;
        trace_opts = current_trace->flags->opts;
-        /*
-         * Increase the size with names of options specific
-         * of the current tracer.
-         */
-        for (i = 0; trace_opts[i].name; i++) {
-                len += strlen(trace_opts[i].name);
-                len += 3; /* "no" and newline */
-        }
-        /* +1 for \0 */
-        buf = kmalloc(len + 1, GFP_KERNEL);
-        if (!buf) {
-                mutex_unlock(&trace_types_lock);
-                return -ENOMEM;
-        }
        for (i = 0; trace_options[i]; i++) {
                if (trace_flags & (1 << i))
-                        r += sprintf(buf + r, "%s\n", trace_options[i]);
+                        seq_printf(m, "%s\n", trace_options[i]);
                else
-                        r += sprintf(buf + r, "no%s\n", trace_options[i]);
+                        seq_printf(m, "no%s\n", trace_options[i]);
        }
        for (i = 0; trace_opts[i].name; i++) {
                if (tracer_flags & trace_opts[i].bit)
-                        r += sprintf(buf + r, "%s\n",
+                        seq_printf(m, "%s\n", trace_opts[i].name);
-                                trace_opts[i].name);
                else
-                        r += sprintf(buf + r, "no%s\n",
+                        seq_printf(m, "no%s\n", trace_opts[i].name);
-                                trace_opts[i].name);
        }
        mutex_unlock(&trace_types_lock);
-        WARN_ON(r >= len + 1);
+        return 0;
+}
-        r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+static int __set_tracer_option(struct tracer *trace,
+                               struct tracer_flags *tracer_flags,
+                               struct tracer_opt *opts, int neg)
+{
+        int ret;
-        kfree(buf);
+        ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
-        return r;
+        if (ret)
+                return ret;
+        if (neg)
+                tracer_flags->val &= ~opts->bit;
+        else
+                tracer_flags->val |= opts->bit;
+        return 0;
 }
 /* Try to assign a tracer specific option */
@@ -2359,33 +2381,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
 {
        struct tracer_flags *tracer_flags = trace->flags;
        struct tracer_opt *opts = NULL;
-        int ret = 0, i = 0;
+        int i;
-        int len;
        for (i = 0; tracer_flags->opts[i].name; i++) {
                opts = &tracer_flags->opts[i];
-                len = strlen(opts->name);
-                if (strncmp(cmp, opts->name, len) == 0) {
+                if (strcmp(cmp, opts->name) == 0)
-                        ret = trace->set_flag(tracer_flags->val,
+                        return __set_tracer_option(trace, trace->flags,
-                                opts->bit, !neg);
+                                                   opts, neg);
-                        break;
-                }
        }
-        /* Not found */
-        if (!tracer_flags->opts[i].name)
-                return -EINVAL;
-        /* Refused to handle */
-        if (ret)
-                return ret;
-        if (neg)
-                tracer_flags->val &= ~opts->bit;
-        else
-                tracer_flags->val |= opts->bit;
-        return 0;
+        return -EINVAL;
 }
 static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2405,7 +2411,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                        size_t cnt, loff_t *ppos)
 {
        char buf[64];
-        char *cmp = buf;
+        char *cmp;
        int neg = 0;
        int ret;
        int i;
@@ -2417,16 +2423,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                return -EFAULT;
        buf[cnt] = 0;
+        cmp = strstrip(buf);
-        if (strncmp(buf, "no", 2) == 0) {
+        if (strncmp(cmp, "no", 2) == 0) {
                neg = 1;
                cmp += 2;
        }
        for (i = 0; trace_options[i]; i++) {
-                int len = strlen(trace_options[i]);
+                if (strcmp(cmp, trace_options[i]) == 0) {
-                if (strncmp(cmp, trace_options[i], len) == 0) {
                        set_tracer_flags(1 << i, !neg);
                        break;
                }
@@ -2446,9 +2451,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
+static int tracing_trace_options_open(struct inode *inode, struct file *file)
+{
+        if (tracing_disabled)
+                return -ENODEV;
+        return single_open(file, tracing_trace_options_show, NULL);
+}
 static const struct file_operations tracing_iter_fops = {
-        .open           = tracing_open_generic,
+        .open           = tracing_trace_options_open,
-        .read           = tracing_trace_options_read,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
        .write          = tracing_trace_options_write,
 };
@@ -2898,6 +2912,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
        else
                cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
+        if (iter->trace->pipe_close)
+                iter->trace->pipe_close(iter);
        mutex_unlock(&trace_types_lock);
        free_cpumask_var(iter->started);
@@ -3104,7 +3122,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
        __free_page(spd->pages[idx]);
 }
-static struct pipe_buf_operations tracing_pipe_buf_ops = {
+static const struct pipe_buf_operations tracing_pipe_buf_ops = {
        .can_merge              = 0,
        .map                    = generic_pipe_buf_map,
        .unmap                  = generic_pipe_buf_unmap,
@@ -3320,6 +3338,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
+static int mark_printk(const char *fmt, ...)
+{
+        int ret;
+        va_list args;
+        va_start(args, fmt);
+        ret = trace_vprintk(0, fmt, args);
+        va_end(args);
+        return ret;
+}
 static ssize_t
 tracing_mark_write(struct file *filp, const char __user *ubuf,
                                        size_t cnt, loff_t *fpos)
@@ -3346,28 +3374,25 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
        } else
                buf[cnt] = '\0';
-        cnt = trace_vprintk(0, buf, NULL);
+        cnt = mark_printk("%s", buf);
        kfree(buf);
        *fpos += cnt;
        return cnt;
 }
-static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
+static int tracing_clock_show(struct seq_file *m, void *v)
-                                  size_t cnt, loff_t *ppos)
 {
-        char buf[64];
-        int bufiter = 0;
        int i;
        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
-                bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
+                seq_printf(m,
                        "%s%s%s%s", i ? " " : "",
                        i == trace_clock_id ? "[" : "", trace_clocks[i].name,
                        i == trace_clock_id ? "]" : "");
-        bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
+        seq_putc(m, '\n');
-        return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
+        return 0;
 }
 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3409,6 +3434,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
+static int tracing_clock_open(struct inode *inode, struct file *file)
+{
+        if (tracing_disabled)
+                return -ENODEV;
+        return single_open(file, tracing_clock_show, NULL);
+}
 static const struct file_operations tracing_max_lat_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_max_lat_read,
@@ -3447,8 +3479,10 @@ static const struct file_operations tracing_mark_fops = {
 };
 static const struct file_operations trace_clock_fops = {
-        .open           = tracing_open_generic,
+        .open           = tracing_clock_open,
-        .read           = tracing_clock_read,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
        .write          = tracing_clock_write,
 };
@@ -3578,7 +3612,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 }
 /* Pipe buffer operations for a buffer. */
-static struct pipe_buf_operations buffer_pipe_buf_ops = {
+static const struct pipe_buf_operations buffer_pipe_buf_ops = {
        .can_merge              = 0,
        .map                    = generic_pipe_buf_map,
        .unmap                  = generic_pipe_buf_unmap,
@@ -3909,39 +3943,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
        if (ret < 0)
                return ret;
-        ret = 0;
+        if (val != 0 && val != 1)
-        switch (val) {
+                return -EINVAL;
-        case 0:
-                /* do nothing if already cleared */
-                if (!(topt->flags->val & topt->opt->bit))
-                        break;
-                mutex_lock(&trace_types_lock);
-                if (current_trace->set_flag)
-                        ret = current_trace->set_flag(topt->flags->val,
-                                                      topt->opt->bit, 0);
-                mutex_unlock(&trace_types_lock);
-                if (ret)
-                        return ret;
-                topt->flags->val &= ~topt->opt->bit;
-                break;
-        case 1:
-                /* do nothing if already set */
-                if (topt->flags->val & topt->opt->bit)
-                        break;
+        if (!!(topt->flags->val & topt->opt->bit) != val) {
                mutex_lock(&trace_types_lock);
-                if (current_trace->set_flag)
+                ret = __set_tracer_option(current_trace, topt->flags,
-                        ret = current_trace->set_flag(topt->flags->val,
+                                          topt->opt, val);
-                                                      topt->opt->bit, 1);
                mutex_unlock(&trace_types_lock);
                if (ret)
                        return ret;
-                topt->flags->val |= topt->opt->bit;
-                break;
-        default:
-                return -EINVAL;
        }
        *ppos += cnt;
@@ -4268,8 +4279,8 @@ trace_printk_seq(struct trace_seq *s)
 static void __ftrace_dump(bool disable_tracing)
 {
-        static raw_spinlock_t ftrace_dump_lock =
+        static arch_spinlock_t ftrace_dump_lock =
-                (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+                (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
        /* use static because iter can be a bit big for the stack */
        static struct trace_iterator iter;
        unsigned int old_userobj;
@@ -4279,7 +4290,7 @@ static void __ftrace_dump(bool disable_tracing)
        /* only one dump */
        local_irq_save(flags);
-        __raw_spin_lock(&ftrace_dump_lock);
+        arch_spin_lock(&ftrace_dump_lock);
        if (dump_ran)
                goto out;
@@ -4354,7 +4365,7 @@ static void __ftrace_dump(bool disable_tracing)
        }
 out:
-        __raw_spin_unlock(&ftrace_dump_lock);
+        arch_spin_unlock(&ftrace_dump_lock);
        local_irq_restore(flags);
 }
@@ -4415,7 +4426,7 @@ __init static int tracer_alloc_buffers(void)
        /* Allocate the first page for all buffers */
        for_each_tracing_cpu(i) {
                global_trace.data[i] = &per_cpu(global_trace_cpu, i);
-                max_tr.data[i] = &per_cpu(max_data, i);
+                max_tr.data[i] = &per_cpu(max_tr_data, i);
        }
        trace_init_cmdlines();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1d7f4830a80d..4df6a77eb196 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,6 +272,7 @@ struct tracer_flags {
 * @pipe_open: called when the trace_pipe file is opened
 * @wait_pipe: override how the user waits for traces on trace_pipe
 * @close: called when the trace file is released
+ * @pipe_close: called when the trace_pipe file is released
 * @read: override the default read callback on trace_pipe
 * @splice_read: override the default splice_read callback on trace_pipe
 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -290,6 +291,7 @@ struct tracer {
        void                    (*pipe_open)(struct trace_iterator *iter);
        void                    (*wait_pipe)(struct trace_iterator *iter);
        void                    (*close)(struct trace_iterator *iter);
+        void                    (*pipe_close)(struct trace_iterator *iter);
        ssize_t                 (*read)(struct trace_iterator *iter,
                                        struct file *filp, char __user *ubuf,
                                        size_t cnt, loff_t *ppos);
@@ -441,7 +443,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
 extern int ring_buffer_expanded;
 extern bool tracing_selftest_disabled;
-DECLARE_PER_CPU(local_t, ftrace_cpu_disabled);
+DECLARE_PER_CPU(int, ftrace_cpu_disabled);
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 extern int trace_selftest_startup_function(struct tracer *trace,
@@ -595,18 +597,17 @@ enum trace_iterator_flags {
        TRACE_ITER_BIN                  = 0x40,
        TRACE_ITER_BLOCK                = 0x80,
        TRACE_ITER_STACKTRACE           = 0x100,
-        TRACE_ITER_SCHED_TREE           = 0x200,
+        TRACE_ITER_PRINTK               = 0x200,
-        TRACE_ITER_PRINTK               = 0x400,
+        TRACE_ITER_PREEMPTONLY          = 0x400,
-        TRACE_ITER_PREEMPTONLY          = 0x800,
+        TRACE_ITER_BRANCH               = 0x800,
-        TRACE_ITER_BRANCH               = 0x1000,
+        TRACE_ITER_ANNOTATE             = 0x1000,
-        TRACE_ITER_ANNOTATE             = 0x2000,
+        TRACE_ITER_USERSTACKTRACE       = 0x2000,
-        TRACE_ITER_USERSTACKTRACE       = 0x4000,
+        TRACE_ITER_SYM_USEROBJ          = 0x4000,
-        TRACE_ITER_SYM_USEROBJ          = 0x8000,
+        TRACE_ITER_PRINTK_MSGONLY       = 0x8000,
-        TRACE_ITER_PRINTK_MSGONLY       = 0x10000,
+        TRACE_ITER_CONTEXT_INFO         = 0x10000, /* Print pid/cpu/time */
-        TRACE_ITER_CONTEXT_INFO         = 0x20000, /* Print pid/cpu/time */
+        TRACE_ITER_LATENCY_FMT          = 0x20000,
-        TRACE_ITER_LATENCY_FMT          = 0x40000,
+        TRACE_ITER_SLEEP_TIME           = 0x40000,
-        TRACE_ITER_SLEEP_TIME           = 0x80000,
+        TRACE_ITER_GRAPH_TIME           = 0x80000,
-        TRACE_ITER_GRAPH_TIME           = 0x100000,
 };
 /*
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 878c03f386ba..84a3a7ba072a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -71,10 +71,10 @@ u64 notrace trace_clock(void)
 /* keep prev_time and lock in the same cacheline. */
 static struct {
        u64 prev_time;
-        raw_spinlock_t lock;
+        arch_spinlock_t lock;
 } trace_clock_struct ____cacheline_aligned_in_smp =
        {
-                .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED,
+                .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
        };
 u64 notrace trace_clock_global(void)
@@ -94,7 +94,7 @@ u64 notrace trace_clock_global(void)
        if (unlikely(in_nmi()))
                goto out;
-        __raw_spin_lock(&trace_clock_struct.lock);
+        arch_spin_lock(&trace_clock_struct.lock);
        /*
         * TODO: if this happens often then maybe we should reset
@@ -106,7 +106,7 @@ u64 notrace trace_clock_global(void)
        trace_clock_struct.prev_time = now;
-        __raw_spin_unlock(&trace_clock_struct.lock);
+        arch_spin_unlock(&trace_clock_struct.lock);
 out:
        raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index d9c60f80aa0d..9e25573242cf 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
        char *buf;
        int ret = -ENOMEM;
-        if (atomic_inc_return(&event->profile_count))
+        if (event->profile_count++ > 0)
                return 0;
        if (!total_profile_count) {
@@ -56,7 +56,7 @@ fail_buf_nmi:
                perf_trace_buf = NULL;
        }
 fail_buf:
-        atomic_dec(&event->profile_count);
+        event->profile_count--;
        return ret;
 }
@@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
        char *buf, *nmi_buf;
-        if (!atomic_add_negative(-1, &event->profile_count))
+        if (--event->profile_count > 0)
                return;
        event->profile_disable(event);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d18315dc836..189b09baf4fb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
        if (ret)                                                        \
                return ret;
-int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(struct ftrace_event_call *call)
 {
        int ret;
        struct trace_entry ent;
@@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
        return ret;
 }
-EXPORT_SYMBOL_GPL(trace_define_common_fields);
 void trace_destroy_fields(struct ftrace_event_call *call)
 {
@@ -105,9 +104,25 @@ void trace_destroy_fields(struct ftrace_event_call *call)
        }
 }
-static void ftrace_event_enable_disable(struct ftrace_event_call *call,
+int trace_event_raw_init(struct ftrace_event_call *call)
+{
+        int id;
+        id = register_ftrace_event(call->event);
+        if (!id)
+                return -ENODEV;
+        call->id = id;
+        INIT_LIST_HEAD(&call->fields);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(trace_event_raw_init);
+static int ftrace_event_enable_disable(struct ftrace_event_call *call,
                                        int enable)
 {
+        int ret = 0;
        switch (enable) {
        case 0:
                if (call->enabled) {
@@ -118,12 +133,20 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
                break;
        case 1:
                if (!call->enabled) {
-                        call->enabled = 1;
                        tracing_start_cmdline_record();
-                        call->regfunc(call);
+                        ret = call->regfunc(call);
+                        if (ret) {
+                                tracing_stop_cmdline_record();
+                                pr_info("event trace: Could not enable event "
+                                        "%s\n", call->name);
+                                break;
+                        }
+                        call->enabled = 1;
                }
                break;
        }
+        return ret;
 }
 static void ftrace_clear_events(void)
@@ -402,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
        case 0:
        case 1:
                mutex_lock(&event_mutex);
-                ftrace_event_enable_disable(call, val);
+                ret = ftrace_event_enable_disable(call, val);
                mutex_unlock(&event_mutex);
                break;
@@ -412,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
        *ppos += cnt;
-        return cnt;
+        return ret ? ret : cnt;
 }
 static ssize_t
@@ -913,7 +936,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
                                  id);
        if (call->define_fields) {
-                ret = call->define_fields(call);
+                ret = trace_define_common_fields(call);
+                if (!ret)
+                        ret = call->define_fields(call);
                if (ret < 0) {
                        pr_warning("Could not initialize trace point"
                                   " events/%s\n", call->name);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index dff8c84ddf17..458e5bfe26d0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -184,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
        struct struct_name field;                                       \
        int ret;                                                        \
                                                                        \
-        ret = trace_define_common_fields(event_call);                   \
-        if (ret)                                                        \
-                return ret;                                             \
-                                                                        \
        tstruct;                                                        \
                                                                        \
        return ret;                                                     \
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01b2e4d..b1342c5d37cf 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,20 @@
 #include "trace.h"
 #include "trace_output.h"
-struct fgraph_data {
+struct fgraph_cpu_data {
        pid_t           last_pid;
        int             depth;
+        int             ignore;
+};
+struct fgraph_data {
+        struct fgraph_cpu_data          *cpu_data;
+        /* Place to preserve last processed entry. */
+        struct ftrace_graph_ent_entry   ent;
+        struct ftrace_graph_ret_entry   ret;
+        int                             failed;
+        int                             cpu;
 };
 #define TRACE_GRAPH_INDENT      2
@@ -176,7 +187,7 @@ static int __trace_graph_entry(struct trace_array *tr,
        struct ring_buffer *buffer = tr->buffer;
        struct ftrace_graph_ent_entry *entry;
-        if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+        if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
                return 0;
        event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -240,7 +251,7 @@ static void __trace_graph_return(struct trace_array *tr,
        struct ring_buffer *buffer = tr->buffer;
        struct ftrace_graph_ret_entry *entry;
-        if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+        if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
                return;
        event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
        if (!data)
                return TRACE_TYPE_HANDLED;
-        last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
+        last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
        if (*last_pid == pid)
                return TRACE_TYPE_HANDLED;
@@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry *
 get_return_for_leaf(struct trace_iterator *iter,
                struct ftrace_graph_ent_entry *curr)
 {
-        struct ring_buffer_iter *ring_iter;
+        struct fgraph_data *data = iter->private;
+        struct ring_buffer_iter *ring_iter = NULL;
        struct ring_buffer_event *event;
        struct ftrace_graph_ret_entry *next;
-        ring_iter = iter->buffer_iter[iter->cpu];
+        /*
+         * If the previous output failed to write to the seq buffer,
+         * then we just reuse the data from before.
+         */
+        if (data && data->failed) {
+                curr = &data->ent;
+                next = &data->ret;
+        } else {
-        /* First peek to compare current entry and the next one */
+                ring_iter = iter->buffer_iter[iter->cpu];
-        if (ring_iter)
-                event = ring_buffer_iter_peek(ring_iter, NULL);
+                /* First peek to compare current entry and the next one */
-        else {
+                if (ring_iter)
-        /* We need to consume the current entry to see the next one */
+                        event = ring_buffer_iter_peek(ring_iter, NULL);
-                ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+                else {
-                event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
+                        /*
-                                        NULL);
+                         * We need to consume the current entry to see
-        }
+                         * the next one.
+                         */
+                        ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+                        event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
+                                                 NULL);
+                }
-        if (!event)
+                if (!event)
-                return NULL;
+                        return NULL;
+                next = ring_buffer_event_data(event);
-        next = ring_buffer_event_data(event);
+                if (data) {
+                        /*
+                         * Save current and next entries for later reference
+                         * if the output fails.
+                         */
+                        data->ent = *curr;
+                        data->ret = *next;
+                }
+        }
        if (next->ent.type != TRACE_GRAPH_RET)
                return NULL;
@@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
        if (data) {
                int cpu = iter->cpu;
-                int *depth = &(per_cpu_ptr(data, cpu)->depth);
+                int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
                /*
                 * Comments display at + 1 to depth. Since
@@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
        if (data) {
                int cpu = iter->cpu;
-                int *depth = &(per_cpu_ptr(data, cpu)->depth);
+                int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
                *depth = call->depth;
        }
@@ -782,19 +816,34 @@ static enum print_line_t
 print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
                        struct trace_iterator *iter)
 {
-        int cpu = iter->cpu;
+        struct fgraph_data *data = iter->private;
        struct ftrace_graph_ent *call = &field->graph_ent;
        struct ftrace_graph_ret_entry *leaf_ret;
+        static enum print_line_t ret;
+        int cpu = iter->cpu;
        if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
                return TRACE_TYPE_PARTIAL_LINE;
        leaf_ret = get_return_for_leaf(iter, field);
        if (leaf_ret)
-                return print_graph_entry_leaf(iter, field, leaf_ret, s);
+                ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
        else
-                return print_graph_entry_nested(iter, field, s, cpu);
+                ret = print_graph_entry_nested(iter, field, s, cpu);
+        if (data) {
+                /*
+                 * If we failed to write our output, then we need to make
+                 * note of it. Because we already consumed our entry.
+                 */
+                if (s->full) {
+                        data->failed = 1;
+                        data->cpu = cpu;
+                } else
+                        data->failed = 0;
+        }
+        return ret;
 }
 static enum print_line_t
@@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
        if (data) {
                int cpu = iter->cpu;
-                int *depth = &(per_cpu_ptr(data, cpu)->depth);
+                int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
                /*
                 * Comments display at + 1 to depth. This is the
@@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s,  struct trace_entry *ent,
        int i;
        if (data)
-                depth = per_cpu_ptr(data, iter->cpu)->depth;
+                depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
        if (print_graph_prologue(iter, s, 0, 0))
                return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s,  struct trace_entry *ent,
 enum print_line_t
 print_graph_function(struct trace_iterator *iter)
 {
+        struct ftrace_graph_ent_entry *field;
+        struct fgraph_data *data = iter->private;
        struct trace_entry *entry = iter->ent;
        struct trace_seq *s = &iter->seq;
+        int cpu = iter->cpu;
+        int ret;
+        if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
+                per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
+                return TRACE_TYPE_HANDLED;
+        }
+        /*
+         * If the last output failed, there's a possibility we need
+         * to print out the missing entry which would never go out.
+         */
+        if (data && data->failed) {
+                field = &data->ent;
+                iter->cpu = data->cpu;
+                ret = print_graph_entry(field, s, iter);
+                if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
+                        per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
+                        ret = TRACE_TYPE_NO_CONSUME;
+                }
+                iter->cpu = cpu;
+                return ret;
+        }
        switch (entry->type) {
        case TRACE_GRAPH_ENT: {
@@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter)
                 * sizeof(struct ftrace_graph_ent_entry) is very small,
                 * it can be safely saved at the stack.
                 */
-                struct ftrace_graph_ent_entry *field, saved;
+                struct ftrace_graph_ent_entry saved;
                trace_assign_type(field, entry);
                saved = *field;
                return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s)
 static void graph_trace_open(struct trace_iterator *iter)
 {
        /* pid and depth on the last trace processed */
-        struct fgraph_data *data = alloc_percpu(struct fgraph_data);
+        struct fgraph_data *data;
        int cpu;
+        iter->private = NULL;
+        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
-                pr_warning("function graph tracer: not enough memory\n");
+                goto out_err;
-        else
-                for_each_possible_cpu(cpu) {
+        data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
-                        pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
+        if (!data->cpu_data)
-                        int *depth = &(per_cpu_ptr(data, cpu)->depth);
+                goto out_err_free;
-                        *pid = -1;
-                        *depth = 0;
+        for_each_possible_cpu(cpu) {
-                }
+                pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
+                int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
+                int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+                *pid = -1;
+                *depth = 0;
+                *ignore = 0;
+        }
        iter->private = data;
+        return;
+ out_err_free:
+        kfree(data);
+ out_err:
+        pr_warning("function graph tracer: not enough memory\n");
 }
 static void graph_trace_close(struct trace_iterator *iter)
 {
-        free_percpu(iter->private);
+        struct fgraph_data *data = iter->private;
+        if (data) {
+                free_percpu(data->cpu_data);
+                kfree(data);
+        }
 }
 static struct tracer graph_trace __read_mostly = {
        .name           = "function_graph",
        .open           = graph_trace_open,
+        .pipe_open      = graph_trace_open,
        .close          = graph_trace_close,
+        .pipe_close     = graph_trace_close,
        .wait_pipe      = poll_wait_pipe,
        .init           = graph_trace_init,
        .reset          = graph_trace_reset,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 69543a905cd5..7b97000745f5 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -20,10 +20,10 @@
 #define BTS_BUFFER_SIZE (1 << 13)
-static DEFINE_PER_CPU(struct bts_tracer *, tracer);
+static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
-static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
+static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
-#define this_tracer per_cpu(tracer, smp_processor_id())
+#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
 static int trace_hw_branches_enabled __read_mostly;
 static int trace_hw_branches_suspended __read_mostly;
@@ -32,12 +32,13 @@ static struct trace_array *hw_branch_trace __read_mostly;
 static void bts_trace_init_cpu(int cpu)
 {
-        per_cpu(tracer, cpu) =
+        per_cpu(hwb_tracer, cpu) =
-                ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
+                ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
-                                   NULL, (size_t)-1, BTS_KERNEL);
+                                   BTS_BUFFER_SIZE, NULL, (size_t)-1,
+                                   BTS_KERNEL);
-        if (IS_ERR(per_cpu(tracer, cpu)))
+        if (IS_ERR(per_cpu(hwb_tracer, cpu)))
-                per_cpu(tracer, cpu) = NULL;
+                per_cpu(hwb_tracer, cpu) = NULL;
 }
 static int bts_trace_init(struct trace_array *tr)
@@ -51,7 +52,7 @@ static int bts_trace_init(struct trace_array *tr)
        for_each_online_cpu(cpu) {
                bts_trace_init_cpu(cpu);
-                if (likely(per_cpu(tracer, cpu)))
+                if (likely(per_cpu(hwb_tracer, cpu)))
                        trace_hw_branches_enabled = 1;
        }
        trace_hw_branches_suspended = 0;
@@ -67,9 +68,9 @@ static void bts_trace_reset(struct trace_array *tr)
        get_online_cpus();
        for_each_online_cpu(cpu) {
-                if (likely(per_cpu(tracer, cpu))) {
+                if (likely(per_cpu(hwb_tracer, cpu))) {
-                        ds_release_bts(per_cpu(tracer, cpu));
+                        ds_release_bts(per_cpu(hwb_tracer, cpu));
-                        per_cpu(tracer, cpu) = NULL;
+                        per_cpu(hwb_tracer, cpu) = NULL;
                }
        }
        trace_hw_branches_enabled = 0;
@@ -83,8 +84,8 @@ static void bts_trace_start(struct trace_array *tr)
        get_online_cpus();
        for_each_online_cpu(cpu)
-                if (likely(per_cpu(tracer, cpu)))
+                if (likely(per_cpu(hwb_tracer, cpu)))
-                        ds_resume_bts(per_cpu(tracer, cpu));
+                        ds_resume_bts(per_cpu(hwb_tracer, cpu));
        trace_hw_branches_suspended = 0;
        put_online_cpus();
 }
@@ -95,8 +96,8 @@ static void bts_trace_stop(struct trace_array *tr)
        get_online_cpus();
        for_each_online_cpu(cpu)
-                if (likely(per_cpu(tracer, cpu)))
+                if (likely(per_cpu(hwb_tracer, cpu)))
-                        ds_suspend_bts(per_cpu(tracer, cpu));
+                        ds_suspend_bts(per_cpu(hwb_tracer, cpu));
        trace_hw_branches_suspended = 1;
        put_online_cpus();
 }
@@ -114,16 +115,16 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
                        bts_trace_init_cpu(cpu);
                        if (trace_hw_branches_suspended &&
-                            likely(per_cpu(tracer, cpu)))
+                            likely(per_cpu(hwb_tracer, cpu)))
-                                ds_suspend_bts(per_cpu(tracer, cpu));
+                                ds_suspend_bts(per_cpu(hwb_tracer, cpu));
                }
                break;
        case CPU_DOWN_PREPARE:
                /* The notification is sent with interrupts enabled. */
-                if (likely(per_cpu(tracer, cpu))) {
+                if (likely(per_cpu(hwb_tracer, cpu))) {
-                        ds_release_bts(per_cpu(tracer, cpu));
+                        ds_release_bts(per_cpu(hwb_tracer, cpu));
-                        per_cpu(tracer, cpu) = NULL;
+                        per_cpu(hwb_tracer, cpu) = NULL;
                }
        }
@@ -258,8 +259,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
        get_online_cpus();
        for_each_online_cpu(cpu)
-                if (likely(per_cpu(tracer, cpu)))
+                if (likely(per_cpu(hwb_tracer, cpu)))
-                        ds_suspend_bts(per_cpu(tracer, cpu));
+                        ds_suspend_bts(per_cpu(hwb_tracer, cpu));
        /*
         * We need to collect the trace on the respective cpu since ftrace
         * implicitly adds the record for the current cpu.
@@ -268,8 +269,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
        on_each_cpu(trace_bts_cpu, iter->tr, 1);
        for_each_online_cpu(cpu)
-                if (likely(per_cpu(tracer, cpu)))
+                if (likely(per_cpu(hwb_tracer, cpu)))
-                        ds_resume_bts(per_cpu(tracer, cpu));
+                        ds_resume_bts(per_cpu(hwb_tracer, cpu));
        put_online_cpus();
 }
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa2114c..2974bc7538c7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
                goto out_unlock;
        trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
+        /* Skip 5 functions to get to the irq/preempt enable function */
+        __trace_stack(tr, flags, 5, pc);
        if (data->critical_sequence != max_sequence)
                goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aff5f80b59b8..7ecab06547a5 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv)
         */
        struct trace_probe *tp;
        int i, ret = 0;
-        int is_return = 0;
+        int is_return = 0, is_delete = 0;
        char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
        unsigned long offset = 0;
        void *addr = NULL;
        char buf[MAX_EVENT_NAME_LEN];
-        if (argc < 2) {
+        /* argc must be >= 1 */
-                pr_info("Probe point is not specified.\n");
-                return -EINVAL;
-        }
        if (argv[0][0] == 'p')
                is_return = 0;
        else if (argv[0][0] == 'r')
                is_return = 1;
+        else if (argv[0][0] == '-')
+                is_delete = 1;
        else {
-                pr_info("Probe definition must be started with 'p' or 'r'.\n");
+                pr_info("Probe definition must be started with 'p', 'r' or"
+                        " '-'.\n");
                return -EINVAL;
        }
@@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv)
                        return -EINVAL;
                }
        }
+        if (!group)
+                group = KPROBE_EVENT_SYSTEM;
+        if (is_delete) {
+                if (!event) {
+                        pr_info("Delete command needs an event name.\n");
+                        return -EINVAL;
+                }
+                tp = find_probe_event(event, group);
+                if (!tp) {
+                        pr_info("Event %s/%s doesn't exist.\n", group, event);
+                        return -ENOENT;
+                }
+                /* delete an event */
+                unregister_trace_probe(tp);
+                free_trace_probe(tp);
+                return 0;
+        }
+        if (argc < 2) {
+                pr_info("Probe point is not specified.\n");
+                return -EINVAL;
+        }
        if (isdigit(argv[1][0])) {
                if (is_return) {
                        pr_info("Return probe point must be a symbol.\n");
@@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv)
        argc -= 2; argv += 2;
        /* setup a probe */
-        if (!group)
-                group = KPROBE_EVENT_SYSTEM;
        if (!event) {
                /* Make a new event name */
                if (symbol)
@@ -1113,10 +1132,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
        struct kprobe_trace_entry field;
        struct trace_probe *tp = (struct trace_probe *)event_call->data;
-        ret = trace_define_common_fields(event_call);
-        if (!ret)
-                return ret;
        DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
        DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
        /* Set argument names as fields */
@@ -1131,10 +1146,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
        struct kretprobe_trace_entry field;
        struct trace_probe *tp = (struct trace_probe *)event_call->data;
-        ret = trace_define_common_fields(event_call);
-        if (!ret)
-                return ret;
        DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
        DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
        DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
@@ -1434,7 +1445,6 @@ static int register_probe_event(struct trace_probe *tp)
        call->unregfunc = probe_event_disable;
 #ifdef CONFIG_EVENT_PROFILE
-        atomic_set(&call->profile_count, -1);
        call->profile_enable = probe_profile_enable;
        call->profile_disable = probe_profile_disable;
 #endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index ddfa0fd43bc0..faf37fa4408c 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
 }
 #endif /* CONFIG_PROFILE_KSYM_TRACER */
-void ksym_hbp_handler(struct perf_event *hbp, void *data)
+void ksym_hbp_handler(struct perf_event *hbp, int nmi,
+                      struct perf_sample_data *data,
+                      struct pt_regs *regs)
 {
        struct ring_buffer_event *event;
        struct ksym_trace_entry *entry;
-        struct pt_regs *regs = data;
        struct ring_buffer *buffer;
        int pc;
@@ -235,7 +236,8 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
        mutex_lock(&ksym_tracer_mutex);
        hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
-                ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
+                ret = trace_seq_printf(s, "%pS:",
+                                (void *)(unsigned long)entry->attr.bp_addr);
                if (entry->attr.bp_type == HW_BREAKPOINT_R)
                        ret = trace_seq_puts(s, "r--\n");
                else if (entry->attr.bp_type == HW_BREAKPOINT_W)
@@ -277,21 +279,20 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 {
        struct trace_ksym *entry;
        struct hlist_node *node;
-        char *input_string, *ksymname = NULL;
+        char *buf, *input_string, *ksymname = NULL;
        unsigned long ksym_addr = 0;
        int ret, op, changed = 0;
-        input_string = kzalloc(count + 1, GFP_KERNEL);
+        buf = kzalloc(count + 1, GFP_KERNEL);
-        if (!input_string)
+        if (!buf)
                return -ENOMEM;
-        if (copy_from_user(input_string, buffer, count)) {
+        ret = -EFAULT;
-                kfree(input_string);
+        if (copy_from_user(buf, buffer, count))
-                return -EFAULT;
+                goto out;
-        }
-        input_string[count] = '\0';
-        strstrip(input_string);
+        buf[count] = '\0';
+        input_string = strstrip(buf);
        /*
         * Clear all breakpoints if:
@@ -299,18 +300,16 @@ static ssize_t ksym_trace_filter_write(struct file *file,
         * 2: echo 0 > ksym_trace_filter
         * 3: echo "*:---" > ksym_trace_filter
         */
-        if (!input_string[0] || !strcmp(input_string, "0") ||
+        if (!buf[0] || !strcmp(buf, "0") ||
-            !strcmp(input_string, "*:---")) {
+            !strcmp(buf, "*:---")) {
                __ksym_trace_reset();
-                kfree(input_string);
+                ret = 0;
-                return count;
+                goto out;
        }
        ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
-        if (ret < 0) {
+        if (ret < 0)
-                kfree(input_string);
+                goto out;
-                return ret;
-        }
        mutex_lock(&ksym_tracer_mutex);
@@ -321,7 +320,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
                        if (entry->attr.bp_type != op)
                                changed = 1;
                        else
-                                goto out;
+                                goto out_unlock;
                        break;
                }
        }
@@ -336,28 +335,24 @@ static ssize_t ksym_trace_filter_write(struct file *file,
                        if (IS_ERR(entry->ksym_hbp))
                                ret = PTR_ERR(entry->ksym_hbp);
                        else
-                                goto out;
+                                goto out_unlock;
                }
                /* Error or "symbol:---" case: drop it */
                ksym_filter_entry_count--;
                hlist_del_rcu(&(entry->ksym_hlist));
                synchronize_rcu();
                kfree(entry);
-                goto out;
+                goto out_unlock;
        } else {
                /* Check for malformed request: (4) */
-                if (op == 0)
+                if (op)
-                        goto out;
+                        ret = process_new_ksym_entry(ksymname, op, ksym_addr);
-                ret = process_new_ksym_entry(ksymname, op, ksym_addr);
        }
-out:
+out_unlock:
        mutex_unlock(&ksym_tracer_mutex);
+out:
-        kfree(input_string);
+        kfree(buf);
+        return !ret ? count : ret;
-        if (!ret)
-                ret = count;
-        return ret;
 }
 static const struct file_operations ksym_tracing_fops = {
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 static int next_event_type = __TRACE_LAST_TYPE + 1;
-void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
        int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+        int ret;
+        ret = seq_write(m, s->buffer, len);
-        seq_write(m, s->buffer, len);
+        /*
+         * Only reset this buffer if we successfully wrote to the
+         * seq_file buffer.
+         */
+        if (!ret)
+                trace_seq_init(s);
-        trace_seq_init(s);
+        return ret;
 }
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
        va_list ap;
        int ret;
-        if (!len)
+        if (s->full || !len)
                return 0;
        va_start(ap, fmt);
@@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
        va_end(ap);
        /* If we can't write it all, don't bother writing anything */
-        if (ret >= len)
+        if (ret >= len) {
+                s->full = 1;
                return 0;
+        }
        s->len += ret;
@@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
        int len = (PAGE_SIZE - 1) - s->len;
        int ret;
-        if (!len)
+        if (s->full || !len)
                return 0;
        ret = vsnprintf(s->buffer + s->len, len, fmt, args);
        /* If we can't write it all, don't bother writing anything */
-        if (ret >= len)
+        if (ret >= len) {
+                s->full = 1;
                return 0;
+        }
        s->len += ret;
@@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
        int len = (PAGE_SIZE - 1) - s->len;
        int ret;
-        if (!len)
+        if (s->full || !len)
                return 0;
        ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
        /* If we can't write it all, don't bother writing anything */
-        if (ret >= len)
+        if (ret >= len) {
+                s->full = 1;
                return 0;
+        }
        s->len += ret;
@@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 {
        int len = strlen(str);
-        if (len > ((PAGE_SIZE - 1) - s->len))
+        if (s->full)
+                return 0;
+        if (len > ((PAGE_SIZE - 1) - s->len)) {
+                s->full = 1;
                return 0;
+        }
        memcpy(s->buffer + s->len, str, len);
        s->len += len;
@@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 int trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
-        if (s->len >= (PAGE_SIZE - 1))
+        if (s->full)
                return 0;
+        if (s->len >= (PAGE_SIZE - 1)) {
+                s->full = 1;
+                return 0;
+        }
        s->buffer[s->len++] = c;
        return 1;
@@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
 int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
 {
-        if (len > ((PAGE_SIZE - 1) - s->len))
+        if (s->full)
                return 0;
+        if (len > ((PAGE_SIZE - 1) - s->len)) {
+                s->full = 1;
+                return 0;
+        }
        memcpy(s->buffer + s->len, mem, len);
        s->len += len;
@@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
        const unsigned char *data = mem;
        int i, j;
+        if (s->full)
+                return 0;
 #ifdef __BIG_ENDIAN
        for (i = 0, j = 0; i < len; i++) {
 #else
@@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
 {
        void *ret;
-        if (len > ((PAGE_SIZE - 1) - s->len))
+        if (s->full)
+                return 0;
+        if (len > ((PAGE_SIZE - 1) - s->len)) {
+                s->full = 1;
                return NULL;
+        }
        ret = s->buffer + s->len;
        s->len += len;
@@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
 {
        unsigned char *p;
-        if (s->len >= (PAGE_SIZE - 1))
+        if (s->full)
+                return 0;
+        if (s->len >= (PAGE_SIZE - 1)) {
+                s->full = 1;
                return 0;
+        }
        p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
        if (!IS_ERR(p)) {
                p = mangle_path(s->buffer + s->len, p, "\n");
@@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
                return 1;
        }
+        s->full = 1;
        return 0;
 }
@@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
        unsigned long vmstart = 0;
        int ret = 1;
+        if (s->full)
+                return 0;
        if (mm) {
                const struct vm_area_struct *vma;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int			wakeup_current_cpu;
 static unsigned                 wakeup_prio = -1;
 static int                      wakeup_rt;
-static raw_spinlock_t wakeup_lock =
+static arch_spinlock_t wakeup_lock =
-        (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+        (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 static void __wakeup_reset(struct trace_array *tr);
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
                goto out;
        local_irq_save(flags);
-        __raw_spin_lock(&wakeup_lock);
+        arch_spin_lock(&wakeup_lock);
        /* We could race with grabbing wakeup_lock */
        if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 out_unlock:
        __wakeup_reset(wakeup_trace);
-        __raw_spin_unlock(&wakeup_lock);
+        arch_spin_unlock(&wakeup_lock);
        local_irq_restore(flags);
 out:
        atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
        tracing_reset_online_cpus(tr);
        local_irq_save(flags);
-        __raw_spin_lock(&wakeup_lock);
+        arch_spin_lock(&wakeup_lock);
        __wakeup_reset(tr);
-        __raw_spin_unlock(&wakeup_lock);
+        arch_spin_unlock(&wakeup_lock);
        local_irq_restore(flags);
 }
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
                goto out;
        /* interrupts should be off from try_to_wake_up */
-        __raw_spin_lock(&wakeup_lock);
+        arch_spin_lock(&wakeup_lock);
        /* check for races. */
        if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
        trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 out_locked:
-        __raw_spin_unlock(&wakeup_lock);
+        arch_spin_unlock(&wakeup_lock);
 out:
        atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index dc98309e839a..280fea470d67 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
        /* Don't allow flipping of max traces now */
        local_irq_save(flags);
-        __raw_spin_lock(&ftrace_max_lock);
+        arch_spin_lock(&ftrace_max_lock);
        cnt = ring_buffer_entries(tr->buffer);
@@ -85,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
                        break;
        }
        tracing_on();
-        __raw_spin_unlock(&ftrace_max_lock);
+        arch_spin_unlock(&ftrace_max_lock);
        local_irq_restore(flags);
        if (count)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..678a5120ee30 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
 };
 static unsigned long max_stack_size;
-static raw_spinlock_t max_stack_lock =
+static arch_spinlock_t max_stack_lock =
-        (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+        (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 static int stack_trace_disabled __read_mostly;
 static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
                return;
        local_irq_save(flags);
-        __raw_spin_lock(&max_stack_lock);
+        arch_spin_lock(&max_stack_lock);
        /* a race could have already updated it */
        if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
        }
 out:
-        __raw_spin_unlock(&max_stack_lock);
+        arch_spin_unlock(&max_stack_lock);
        local_irq_restore(flags);
 }
@@ -171,9 +171,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
                return ret;
        local_irq_save(flags);
-        __raw_spin_lock(&max_stack_lock);
+        arch_spin_lock(&max_stack_lock);
        *ptr = val;
-        __raw_spin_unlock(&max_stack_lock);
+        arch_spin_unlock(&max_stack_lock);
        local_irq_restore(flags);
        return count;
@@ -207,7 +207,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
        local_irq_disable();
-        __raw_spin_lock(&max_stack_lock);
+        arch_spin_lock(&max_stack_lock);
        if (*pos == 0)
                return SEQ_START_TOKEN;
@@ -217,7 +217,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 static void t_stop(struct seq_file *m, void *p)
 {
-        __raw_spin_unlock(&max_stack_lock);
+        arch_spin_unlock(&max_stack_lock);
        local_irq_enable();
 }
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 57501d90096a..75289f372dd2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
        int i;
        int offset = offsetof(typeof(trace), args);
-        ret = trace_define_common_fields(call);
-        if (ret)
-                return ret;
        ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
        if (ret)
                return ret;
@@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
        struct syscall_trace_exit trace;
        int ret;
-        ret = trace_define_common_fields(call);
-        if (ret)
-                return ret;
        ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
        if (ret)
                return ret;
@@ -333,10 +325,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
        mutex_lock(&syscall_trace_lock);
        if (!sys_refcount_enter)
                ret = register_trace_sys_enter(ftrace_syscall_enter);
-        if (ret) {
+        if (!ret) {
-                pr_info("event trace: Could not activate"
-                                "syscall entry trace point");
-        } else {
                set_bit(num, enabled_enter_syscalls);
                sys_refcount_enter++;
        }
@@ -370,10 +359,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
        mutex_lock(&syscall_trace_lock);
        if (!sys_refcount_exit)
                ret = register_trace_sys_exit(ftrace_syscall_exit);
-        if (ret) {
+        if (!ret) {
-                pr_info("event trace: Could not activate"
-                                "syscall exit trace point");
-        } else {
                set_bit(num, enabled_exit_syscalls);
                sys_refcount_exit++;
        }