30 files changed, 522 insertions, 395 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 9a3ec66a9d84..19fad003b19d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
            hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
            notifier.o ksysfs.o pm_qos_params.o sched_clock.o
-CFLAGS_REMOVE_sched.o = -mno-spe
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace debug files and internal ftrace files
 CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,7 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
-CFLAGS_REMOVE_sched.o = -mno-spe -pg
+CFLAGS_REMOVE_sched.o = -pg
 endif
 obj-$(CONFIG_FREEZER) += freezer.o
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 8ba0e0d934f2..8b509441f49a 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_chunk {
        struct list_head trees;         /* with root here */
        int dead;
        int count;
+        atomic_long_t refs;
        struct rcu_head head;
        struct node {
                struct list_head list;
@@ -56,7 +57,8 @@ static LIST_HEAD(prune_list);
 * tree is refcounted; one reference for "some rules on rules_list refer to
 * it", one for each chunk with pointer to it.
 *
- * chunk is refcounted by embedded inotify_watch.
+ * chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount
+ * of watch contributes 1 to .refs).
 *
 * node.index allows to get from node.list to containing chunk.
 * MSB of that sucker is stolen to mark taggings that we might have to
@@ -121,6 +123,7 @@ static struct audit_chunk *alloc_chunk(int count)
        INIT_LIST_HEAD(&chunk->hash);
        INIT_LIST_HEAD(&chunk->trees);
        chunk->count = count;
+        atomic_long_set(&chunk->refs, 1);
        for (i = 0; i < count; i++) {
                INIT_LIST_HEAD(&chunk->owners[i].list);
                chunk->owners[i].index = i;
@@ -129,9 +132,8 @@ static struct audit_chunk *alloc_chunk(int count)
        return chunk;
 }
-static void __free_chunk(struct rcu_head *rcu)
+static void free_chunk(struct audit_chunk *chunk)
 {
-        struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
        int i;
        for (i = 0; i < chunk->count; i++) {
@@ -141,14 +143,16 @@ static void __free_chunk(struct rcu_head *rcu)
        kfree(chunk);
 }
-static inline void free_chunk(struct audit_chunk *chunk)
+void audit_put_chunk(struct audit_chunk *chunk)
 {
-        call_rcu(&chunk->head, __free_chunk);
+        if (atomic_long_dec_and_test(&chunk->refs))
+                free_chunk(chunk);
 }
-void audit_put_chunk(struct audit_chunk *chunk)
+static void __put_chunk(struct rcu_head *rcu)
 {
-        put_inotify_watch(&chunk->watch);
+        struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
+        audit_put_chunk(chunk);
 }
 enum {HASH_SIZE = 128};
@@ -176,7 +180,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
        list_for_each_entry_rcu(p, list, hash) {
                if (p->watch.inode == inode) {
-                        get_inotify_watch(&p->watch);
+                        atomic_long_inc(&p->refs);
                        return p;
                }
        }
@@ -194,17 +198,49 @@ int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
 /* tagging and untagging inodes with trees */
-static void untag_chunk(struct audit_chunk *chunk, struct node *p)
+static struct audit_chunk *find_chunk(struct node *p)
+{
+        int index = p->index & ~(1U<<31);
+        p -= index;
+        return container_of(p, struct audit_chunk, owners[0]);
+}
+static void untag_chunk(struct node *p)
 {
+        struct audit_chunk *chunk = find_chunk(p);
        struct audit_chunk *new;
        struct audit_tree *owner;
        int size = chunk->count - 1;
        int i, j;
+        if (!pin_inotify_watch(&chunk->watch)) {
+                /*
+                 * Filesystem is shutting down; all watches are getting
+                 * evicted, just take it off the node list for this
+                 * tree and let the eviction logics take care of the
+                 * rest.
+                 */
+                owner = p->owner;
+                if (owner->root == chunk) {
+                        list_del_init(&owner->same_root);
+                        owner->root = NULL;
+                }
+                list_del_init(&p->list);
+                p->owner = NULL;
+                put_tree(owner);
+                return;
+        }
+        spin_unlock(&hash_lock);
+        /*
+         * pin_inotify_watch() succeeded, so the watch won't go away
+         * from under us.
+         */
        mutex_lock(&chunk->watch.inode->inotify_mutex);
        if (chunk->dead) {
                mutex_unlock(&chunk->watch.inode->inotify_mutex);
-                return;
+                goto out;
        }
        owner = p->owner;
@@ -221,7 +257,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
                inotify_evict_watch(&chunk->watch);
                mutex_unlock(&chunk->watch.inode->inotify_mutex);
                put_inotify_watch(&chunk->watch);
-                return;
+                goto out;
        }
        new = alloc_chunk(size);
@@ -263,7 +299,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
        inotify_evict_watch(&chunk->watch);
        mutex_unlock(&chunk->watch.inode->inotify_mutex);
        put_inotify_watch(&chunk->watch);
-        return;
+        goto out;
 Fallback:
        // do the best we can
@@ -277,6 +313,9 @@ Fallback:
        put_tree(owner);
        spin_unlock(&hash_lock);
        mutex_unlock(&chunk->watch.inode->inotify_mutex);
+out:
+        unpin_inotify_watch(&chunk->watch);
+        spin_lock(&hash_lock);
 }
 static int create_chunk(struct inode *inode, struct audit_tree *tree)
@@ -387,13 +426,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
        return 0;
 }
-static struct audit_chunk *find_chunk(struct node *p)
-{
-        int index = p->index & ~(1U<<31);
-        p -= index;
-        return container_of(p, struct audit_chunk, owners[0]);
-}
 static void kill_rules(struct audit_tree *tree)
 {
        struct audit_krule *rule, *next;
@@ -431,17 +463,10 @@ static void prune_one(struct audit_tree *victim)
        spin_lock(&hash_lock);
        while (!list_empty(&victim->chunks)) {
                struct node *p;
-                struct audit_chunk *chunk;
                p = list_entry(victim->chunks.next, struct node, list);
-                chunk = find_chunk(p);
-                get_inotify_watch(&chunk->watch);
-                spin_unlock(&hash_lock);
-                untag_chunk(chunk, p);
-                put_inotify_watch(&chunk->watch);
+                untag_chunk(p);
-                spin_lock(&hash_lock);
        }
        spin_unlock(&hash_lock);
        put_tree(victim);
@@ -469,7 +494,6 @@ static void trim_marked(struct audit_tree *tree)
        while (!list_empty(&tree->chunks)) {
                struct node *node;
-                struct audit_chunk *chunk;
                node = list_entry(tree->chunks.next, struct node, list);
@@ -477,14 +501,7 @@ static void trim_marked(struct audit_tree *tree)
                if (!(node->index & (1U<<31)))
                        break;
-                chunk = find_chunk(node);
+                untag_chunk(node);
-                get_inotify_watch(&chunk->watch);
-                spin_unlock(&hash_lock);
-                untag_chunk(chunk, node);
-                put_inotify_watch(&chunk->watch);
-                spin_lock(&hash_lock);
        }
        if (!tree->root && !tree->goner) {
                tree->goner = 1;
@@ -878,7 +895,7 @@ static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
 static void destroy_watch(struct inotify_watch *watch)
 {
        struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
-        free_chunk(chunk);
+        call_rcu(&chunk->head, __put_chunk);
 }
 static const struct inotify_operations rtree_inotify_ops = {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b7d354e2b0ef..9fd85a4640a0 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1094,8 +1094,8 @@ static void audit_inotify_unregister(struct list_head *in_list)
        list_for_each_entry_safe(p, n, in_list, ilist) {
                list_del(&p->ilist);
                inotify_rm_watch(audit_ih, &p->wdata);
-                /* the put matching the get in audit_do_del_rule() */
+                /* the unpin matching the pin in audit_do_del_rule() */
-                put_inotify_watch(&p->wdata);
+                unpin_inotify_watch(&p->wdata);
        }
 }
@@ -1389,9 +1389,13 @@ static inline int audit_del_rule(struct audit_entry *entry,
                                /* Put parent on the inotify un-registration
                                 * list.  Grab a reference before releasing
                                 * audit_filter_mutex, to be released in
-                                 * audit_inotify_unregister(). */
+                                 * audit_inotify_unregister().
-                                list_add(&parent->ilist, &inotify_list);
+                                 * If filesystem is going away, just leave
-                                get_inotify_watch(&parent->wdata);
+                                 * the sucker alone, eviction will take
+                                 * care of it.
+                                 */
+                                if (pin_inotify_watch(&parent->wdata))
+                                        list_add(&parent->ilist, &inotify_list);
                        }
                }
        }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 358e77564e6f..fe00b3b983a8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2039,10 +2039,13 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
        struct cgroup *cgrp;
        struct cgroup_iter it;
        struct task_struct *tsk;
        /*
-         * Validate dentry by checking the superblock operations
+         * Validate dentry by checking the superblock operations,
+         * and make sure it's a directory.
         */
-        if (dentry->d_sb->s_op != &cgroup_ops)
+        if (dentry->d_sb->s_op != &cgroup_ops ||
+            !S_ISDIR(dentry->d_inode->i_mode))
                 goto err;
        ret = 0;
@@ -2472,10 +2475,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
                mutex_unlock(&cgroup_mutex);
                return -EBUSY;
        }
+        mutex_unlock(&cgroup_mutex);
-        parent = cgrp->parent;
-        root = cgrp->root;
-        sb = root->sb;
        /*
         * Call pre_destroy handlers of subsys. Notify subsystems
@@ -2483,7 +2483,14 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
         */
        cgroup_call_pre_destroy(cgrp);
-        if (cgroup_has_css_refs(cgrp)) {
+        mutex_lock(&cgroup_mutex);
+        parent = cgrp->parent;
+        root = cgrp->root;
+        sb = root->sb;
+        if (atomic_read(&cgrp->count)
+            || !list_empty(&cgrp->children)
+            || cgroup_has_css_refs(cgrp)) {
                mutex_unlock(&cgroup_mutex);
                return -EBUSY;
        }
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 7fa476f01d05..fb249e2bcada 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -184,9 +184,20 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
 {
        struct freezer *freezer;
-        task_lock(task);
+        /*
+         * No lock is needed, since the task isn't on tasklist yet,
+         * so it can't be moved to another cgroup, which means the
+         * freezer won't be removed and will be valid during this
+         * function call.
+         */
        freezer = task_freezer(task);
-        task_unlock(task);
+        /*
+         * The root cgroup is non-freezable, so we can skip the
+         * following check.
+         */
+        if (!freezer->css.cgroup->parent)
+                return;
        spin_lock_irq(&freezer->lock);
        BUG_ON(freezer->state == CGROUP_FROZEN);
@@ -331,7 +342,7 @@ static int freezer_write(struct cgroup *cgroup,
        else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
                goal_state = CGROUP_FROZEN;
        else
-                return -EIO;
+                return -EINVAL;
        if (!cgroup_lock_live_group(cgroup))
                return -ENODEV;
@@ -350,6 +361,8 @@ static struct cftype files[] = {
 static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
 {
+        if (!cgroup->parent)
+                return 0;
        return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e00526f52ec..da7ff6137f37 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -36,6 +36,7 @@
 #include <linux/list.h>
 #include <linux/mempolicy.h>
 #include <linux/mm.h>
+#include <linux/memory.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
@@ -587,7 +588,6 @@ static int generate_sched_domains(cpumask_t **domains,
        int ndoms;              /* number of sched domains in result */
        int nslot;              /* next empty doms[] cpumask_t slot */
-        ndoms = 0;
        doms = NULL;
        dattr = NULL;
        csa = NULL;
@@ -674,10 +674,8 @@ restart:
         * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
         */
        doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
-        if (!doms) {
+        if (!doms)
-                ndoms = 0;
                goto done;
-        }
        /*
         * The rest of the code, including the scheduler, can deal with
@@ -732,6 +730,13 @@ restart:
 done:
        kfree(csa);
+        /*
+         * Fallback to the default domain if kmalloc() failed.
+         * See comments in partition_sched_domains().
+         */
+        if (doms == NULL)
+                ndoms = 1;
        *domains    = doms;
        *attributes = dattr;
        return ndoms;
@@ -2011,12 +2016,23 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 * Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
 * See also the previous routine cpuset_track_online_cpus().
 */
-void cpuset_track_online_nodes(void)
+static int cpuset_track_online_nodes(struct notifier_block *self,
+                                unsigned long action, void *arg)
 {
        cgroup_lock();
-        top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+        switch (action) {
-        scan_for_empty_cpusets(&top_cpuset);
+        case MEM_ONLINE:
+                top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+                break;
+        case MEM_OFFLINE:
+                top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+                scan_for_empty_cpusets(&top_cpuset);
+                break;
+        default:
+                break;
+        }
        cgroup_unlock();
+        return NOTIFY_OK;
 }
 #endif
@@ -2032,6 +2048,7 @@ void __init cpuset_init_smp(void)
        top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
        hotcpu_notifier(cpuset_track_online_cpus, 0);
+        hotplug_memory_notifier(cpuset_track_online_nodes, 10);
 }
 /**
diff --git a/kernel/exit.c b/kernel/exit.c
index ae2b92be5fae..30fcdf16737a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -40,7 +40,6 @@
 #include <linux/cn_proc.h>
 #include <linux/mutex.h>
 #include <linux/futex.h>
-#include <linux/compat.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
@@ -1059,14 +1058,6 @@ NORET_TYPE void do_exit(long code)
                exit_itimers(tsk->signal);
        }
        acct_collect(code, group_dead);
-#ifdef CONFIG_FUTEX
-        if (unlikely(tsk->robust_list))
-                exit_robust_list(tsk);
-#ifdef CONFIG_COMPAT
-        if (unlikely(tsk->compat_robust_list))
-                compat_exit_robust_list(tsk);
-#endif
-#endif
        if (group_dead)
                tty_audit_exit();
        if (unlikely(tsk->audit_context))
@@ -1330,10 +1321,10 @@ static int wait_task_zombie(struct task_struct *p, int options,
                 * group, which consolidates times for all threads in the
                 * group including the group leader.
                 */
+                thread_group_cputime(p, &cputime);
                spin_lock_irq(&p->parent->sighand->siglock);
                psig = p->parent->signal;
                sig = p->signal;
-                thread_group_cputime(p, &cputime);
                psig->cutime =
                        cputime_add(psig->cutime,
                        cputime_add(cputime.utime,
diff --git a/kernel/extable.c b/kernel/extable.c
index a26cb2e17023..adf0cc9c02d6 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr)
                return 1;
        return module_text_address(addr) != NULL;
 }
+/*
+ * On some architectures (PPC64, IA64) function pointers
+ * are actually only tokens to some data that then holds the
+ * real function address. As a result, to find if a function
+ * pointer is part of the kernel text, we need to do some
+ * special dereferencing first.
+ */
+int func_ptr_is_kernel_text(void *ptr)
+{
+        unsigned long addr;
+        addr = (unsigned long) dereference_function_descriptor(ptr);
+        if (core_kernel_text(addr))
+                return 1;
+        return module_text_address(addr) != NULL;
+}
diff --git a/kernel/fork.c b/kernel/fork.c
index f6083561dfe0..2a372a0e206f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
 #include <linux/jiffies.h>
 #include <linux/tracehook.h>
 #include <linux/futex.h>
+#include <linux/compat.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
 #include <linux/ptrace.h>
@@ -519,6 +520,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
        struct completion *vfork_done = tsk->vfork_done;
+        /* Get rid of any futexes when releasing the mm */
+#ifdef CONFIG_FUTEX
+        if (unlikely(tsk->robust_list))
+                exit_robust_list(tsk);
+#ifdef CONFIG_COMPAT
+        if (unlikely(tsk->compat_robust_list))
+                compat_exit_robust_list(tsk);
+#endif
+#endif
        /* Get rid of any cached register state */
        deactivate_mm(tsk, mm);
diff --git a/kernel/futex.c b/kernel/futex.c
index 8af10027514b..e10c5c8786a6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -123,24 +123,6 @@ struct futex_hash_bucket {
 static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
 /*
- * Take mm->mmap_sem, when futex is shared
- */
-static inline void futex_lock_mm(struct rw_semaphore *fshared)
-{
-        if (fshared)
-                down_read(fshared);
-}
-/*
- * Release mm->mmap_sem, when the futex is shared
- */
-static inline void futex_unlock_mm(struct rw_semaphore *fshared)
-{
-        if (fshared)
-                up_read(fshared);
-}
-/*
 * We hash on the keys returned from get_futex_key (see below).
 */
 static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -161,6 +143,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
                && key1->both.offset == key2->both.offset);
 }
+/*
+ * Take a reference to the resource addressed by a key.
+ * Can be called while holding spinlocks.
+ *
+ */
+static void get_futex_key_refs(union futex_key *key)
+{
+        if (!key->both.ptr)
+                return;
+        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+        case FUT_OFF_INODE:
+                atomic_inc(&key->shared.inode->i_count);
+                break;
+        case FUT_OFF_MMSHARED:
+                atomic_inc(&key->private.mm->mm_count);
+                break;
+        }
+}
+/*
+ * Drop a reference to the resource addressed by a key.
+ * The hash bucket spinlock must not be held.
+ */
+static void drop_futex_key_refs(union futex_key *key)
+{
+        if (!key->both.ptr)
+                return;
+        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+        case FUT_OFF_INODE:
+                iput(key->shared.inode);
+                break;
+        case FUT_OFF_MMSHARED:
+                mmdrop(key->private.mm);
+                break;
+        }
+}
 /**
 * get_futex_key - Get parameters which are the keys for a futex.
 * @uaddr: virtual address of the futex
@@ -179,12 +200,10 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
 * For other futexes, it points to &current->mm->mmap_sem and
 * caller must have taken the reader lock. but NOT any spinlocks.
 */
-static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
+static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
-                         union futex_key *key)
 {
        unsigned long address = (unsigned long)uaddr;
        struct mm_struct *mm = current->mm;
-        struct vm_area_struct *vma;
        struct page *page;
        int err;
@@ -208,100 +227,50 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
                        return -EFAULT;
                key->private.mm = mm;
                key->private.address = address;
+                get_futex_key_refs(key);
                return 0;
        }
-        /*
-         * The futex is hashed differently depending on whether
-         * it's in a shared or private mapping.  So check vma first.
-         */
-        vma = find_extend_vma(mm, address);
-        if (unlikely(!vma))
-                return -EFAULT;
-        /*
+again:
-         * Permissions.
+        err = get_user_pages_fast(address, 1, 0, &page);
-         */
+        if (err < 0)
-        if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
+                return err;
-                return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+        lock_page(page);
+        if (!page->mapping) {
+                unlock_page(page);
+                put_page(page);
+                goto again;
+        }
        /*
         * Private mappings are handled in a simple way.
         *
         * NOTE: When userspace waits on a MAP_SHARED mapping, even if
         * it's a read-only handle, it's expected that futexes attach to
-         * the object not the particular process.  Therefore we use
+         * the object not the particular process.
-         * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
-         * mappings of _writable_ handles.
         */
-        if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
+        if (PageAnon(page)) {
-                key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
+                key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                key->private.mm = mm;
                key->private.address = address;
-                return 0;
+        } else {
+                key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+                key->shared.inode = page->mapping->host;
+                key->shared.pgoff = page->index;
        }
-        /*
+        get_futex_key_refs(key);
-         * Linear file mappings are also simple.
-         */
-        key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
-        key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
-        if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
-                key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
-                                     + vma->vm_pgoff);
-                return 0;
-        }
-        /*
+        unlock_page(page);
-         * We could walk the page table to read the non-linear
+        put_page(page);
-         * pte, and get the page index without fetching the page
+        return 0;
-         * from swap.  But that's a lot of code to duplicate here
-         * for a rare case, so we simply fetch the page.
-         */
-        err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
-        if (err >= 0) {
-                key->shared.pgoff =
-                        page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-                put_page(page);
-                return 0;
-        }
-        return err;
-}
-/*
- * Take a reference to the resource addressed by a key.
- * Can be called while holding spinlocks.
- *
- */
-static void get_futex_key_refs(union futex_key *key)
-{
-        if (key->both.ptr == NULL)
-                return;
-        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-                case FUT_OFF_INODE:
-                        atomic_inc(&key->shared.inode->i_count);
-                        break;
-                case FUT_OFF_MMSHARED:
-                        atomic_inc(&key->private.mm->mm_count);
-                        break;
-        }
 }
-/*
+static inline
- * Drop a reference to the resource addressed by a key.
+void put_futex_key(int fshared, union futex_key *key)
- * The hash bucket spinlock must not be held.
- */
-static void drop_futex_key_refs(union futex_key *key)
 {
-        if (!key->both.ptr)
+        drop_futex_key_refs(key);
-                return;
-        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-                case FUT_OFF_INODE:
-                        iput(key->shared.inode);
-                        break;
-                case FUT_OFF_MMSHARED:
-                        mmdrop(key->private.mm);
-                        break;
-        }
 }
 static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@ -328,10 +297,8 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from)
 /*
 * Fault handling.
- * if fshared is non NULL, current->mm->mmap_sem is already held
 */
-static int futex_handle_fault(unsigned long address,
+static int futex_handle_fault(unsigned long address, int attempt)
-                              struct rw_semaphore *fshared, int attempt)
 {
        struct vm_area_struct * vma;
        struct mm_struct *mm = current->mm;
@@ -340,8 +307,7 @@ static int futex_handle_fault(unsigned long address,
        if (attempt > 2)
                return ret;
-        if (!fshared)
+        down_read(&mm->mmap_sem);
-                down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
        if (vma && address >= vma->vm_start &&
            (vma->vm_flags & VM_WRITE)) {
@@ -361,8 +327,7 @@ static int futex_handle_fault(unsigned long address,
                                current->min_flt++;
                }
        }
-        if (!fshared)
+        up_read(&mm->mmap_sem);
-                up_read(&mm->mmap_sem);
        return ret;
 }
@@ -385,6 +350,7 @@ static int refill_pi_state_cache(void)
        /* pi_mutex gets initialized later */
        pi_state->owner = NULL;
        atomic_set(&pi_state->refcount, 1);
+        pi_state->key = FUTEX_KEY_INIT;
        current->pi_state_cache = pi_state;
@@ -462,7 +428,7 @@ void exit_pi_state_list(struct task_struct *curr)
        struct list_head *next, *head = &curr->pi_state_list;
        struct futex_pi_state *pi_state;
        struct futex_hash_bucket *hb;
-        union futex_key key;
+        union futex_key key = FUTEX_KEY_INIT;
        if (!futex_cmpxchg_enabled)
                return;
@@ -719,20 +685,17 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
 * Wake up all waiters hashed on the physical page that is mapped
 * to this virtual address:
 */
-static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
+static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
-                      int nr_wake, u32 bitset)
 {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
        struct plist_head *head;
-        union futex_key key;
+        union futex_key key = FUTEX_KEY_INIT;
        int ret;
        if (!bitset)
                return -EINVAL;
-        futex_lock_mm(fshared);
        ret = get_futex_key(uaddr, fshared, &key);
        if (unlikely(ret != 0))
                goto out;
@@ -760,7 +723,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
        spin_unlock(&hb->lock);
 out:
-        futex_unlock_mm(fshared);
+        put_futex_key(fshared, &key);
        return ret;
 }
@@ -769,19 +732,16 @@ out:
 * to this virtual address:
 */
 static int
-futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
+futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
-              u32 __user *uaddr2,
              int nr_wake, int nr_wake2, int op)
 {
-        union futex_key key1, key2;
+        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        struct futex_hash_bucket *hb1, *hb2;
        struct plist_head *head;
        struct futex_q *this, *next;
        int ret, op_ret, attempt = 0;
 retryfull:
-        futex_lock_mm(fshared);
        ret = get_futex_key(uaddr1, fshared, &key1);
        if (unlikely(ret != 0))
                goto out;
@@ -826,18 +786,12 @@ retry:
                 */
                if (attempt++) {
                        ret = futex_handle_fault((unsigned long)uaddr2,
-                                                 fshared, attempt);
+                                                 attempt);
                        if (ret)
                                goto out;
                        goto retry;
                }
-                /*
-                 * If we would have faulted, release mmap_sem,
-                 * fault it in and start all over again.
-                 */
-                futex_unlock_mm(fshared);
                ret = get_user(dummy, uaddr2);
                if (ret)
                        return ret;
@@ -873,7 +827,8 @@ retry:
        if (hb1 != hb2)
                spin_unlock(&hb2->lock);
 out:
-        futex_unlock_mm(fshared);
+        put_futex_key(fshared, &key2);
+        put_futex_key(fshared, &key1);
        return ret;
 }
@@ -882,19 +837,16 @@ out:
 * Requeue all waiters hashed on one physical page to another
 * physical page.
 */
-static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
+static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
-                         u32 __user *uaddr2,
                         int nr_wake, int nr_requeue, u32 *cmpval)
 {
-        union futex_key key1, key2;
+        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        struct futex_hash_bucket *hb1, *hb2;
        struct plist_head *head1;
        struct futex_q *this, *next;
        int ret, drop_count = 0;
 retry:
-        futex_lock_mm(fshared);
        ret = get_futex_key(uaddr1, fshared, &key1);
        if (unlikely(ret != 0))
                goto out;
@@ -917,12 +869,6 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
                        if (hb1 != hb2)
                                spin_unlock(&hb2->lock);
-                        /*
-                         * If we would have faulted, release mmap_sem, fault
-                         * it in and start all over again.
-                         */
-                        futex_unlock_mm(fshared);
                        ret = get_user(curval, uaddr1);
                        if (!ret)
@@ -974,7 +920,8 @@ out_unlock:
                drop_futex_key_refs(&key1);
 out:
-        futex_unlock_mm(fshared);
+        put_futex_key(fshared, &key2);
+        put_futex_key(fshared, &key1);
        return ret;
 }
@@ -1096,8 +1043,7 @@ static void unqueue_me_pi(struct futex_q *q)
 * private futexes.
 */
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-                                struct task_struct *newowner,
+                                struct task_struct *newowner, int fshared)
-                                struct rw_semaphore *fshared)
 {
        u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
        struct futex_pi_state *pi_state = q->pi_state;
@@ -1176,7 +1122,7 @@ retry:
 handle_fault:
        spin_unlock(q->lock_ptr);
-        ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
+        ret = futex_handle_fault((unsigned long)uaddr, attempt++);
        spin_lock(q->lock_ptr);
@@ -1200,7 +1146,7 @@ handle_fault:
 static long futex_wait_restart(struct restart_block *restart);
-static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
+static int futex_wait(u32 __user *uaddr, int fshared,
                      u32 val, ktime_t *abs_time, u32 bitset)
 {
        struct task_struct *curr = current;
@@ -1218,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
        q.pi_state = NULL;
        q.bitset = bitset;
 retry:
-        futex_lock_mm(fshared);
+        q.key = FUTEX_KEY_INIT;
        ret = get_futex_key(uaddr, fshared, &q.key);
        if (unlikely(ret != 0))
                goto out_release_sem;
@@ -1251,12 +1196,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
        if (unlikely(ret)) {
                queue_unlock(&q, hb);
-                /*
-                 * If we would have faulted, release mmap_sem, fault it in and
-                 * start all over again.
-                 */
-                futex_unlock_mm(fshared);
                ret = get_user(uval, uaddr);
                if (!ret)
@@ -1271,12 +1210,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
        queue_me(&q, hb);
        /*
-         * Now the futex is queued and we have checked the data, we
-         * don't want to hold mmap_sem while we sleep.
-         */
-        futex_unlock_mm(fshared);
-        /*
         * There might have been scheduling since the queue_me(), as we
         * cannot hold a spinlock across the get_user() in case it
         * faults, and we cannot just set TASK_INTERRUPTIBLE state when
@@ -1363,7 +1296,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
        queue_unlock(&q, hb);
 out_release_sem:
-        futex_unlock_mm(fshared);
+        put_futex_key(fshared, &q.key);
        return ret;
 }
@@ -1371,13 +1304,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
 static long futex_wait_restart(struct restart_block *restart)
 {
        u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
-        struct rw_semaphore *fshared = NULL;
+        int fshared = 0;
        ktime_t t;
        t.tv64 = restart->futex.time;
        restart->fn = do_no_restart_syscall;
        if (restart->futex.flags & FLAGS_SHARED)
-                fshared = &current->mm->mmap_sem;
+                fshared = 1;
        return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
                                restart->futex.bitset);
 }
@@ -1389,7 +1322,7 @@ static long futex_wait_restart(struct restart_block *restart)
 * if there are waiters then it will block, it does PI, etc. (Due to
 * races the kernel might see a 0 value of the futex too.)
 */
-static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
+static int futex_lock_pi(u32 __user *uaddr, int fshared,
                         int detect, ktime_t *time, int trylock)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
@@ -1412,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
        q.pi_state = NULL;
 retry:
-        futex_lock_mm(fshared);
+        q.key = FUTEX_KEY_INIT;
        ret = get_futex_key(uaddr, fshared, &q.key);
        if (unlikely(ret != 0))
                goto out_release_sem;
@@ -1502,7 +1434,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
                         * exit to complete.
                         */
                        queue_unlock(&q, hb);
-                        futex_unlock_mm(fshared);
                        cond_resched();
                        goto retry;
@@ -1534,12 +1465,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
         */
        queue_me(&q, hb);
-        /*
-         * Now the futex is queued and we have checked the data, we
-         * don't want to hold mmap_sem while we sleep.
-         */
-        futex_unlock_mm(fshared);
        WARN_ON(!q.pi_state);
        /*
         * Block on the PI mutex:
@@ -1552,7 +1477,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
                ret = ret ? 0 : -EWOULDBLOCK;
        }
-        futex_lock_mm(fshared);
        spin_lock(q.lock_ptr);
        if (!ret) {
@@ -1618,7 +1542,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
        /* Unqueue and drop the lock */
        unqueue_me_pi(&q);
-        futex_unlock_mm(fshared);
        if (to)
                destroy_hrtimer_on_stack(&to->timer);
@@ -1628,7 +1551,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
        queue_unlock(&q, hb);
 out_release_sem:
-        futex_unlock_mm(fshared);
+        put_futex_key(fshared, &q.key);
        if (to)
                destroy_hrtimer_on_stack(&to->timer);
        return ret;
@@ -1645,15 +1568,12 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
        queue_unlock(&q, hb);
        if (attempt++) {
-                ret = futex_handle_fault((unsigned long)uaddr, fshared,
+                ret = futex_handle_fault((unsigned long)uaddr, attempt);
-                                         attempt);
                if (ret)
                        goto out_release_sem;
                goto retry_unlocked;
        }
-        futex_unlock_mm(fshared);
        ret = get_user(uval, uaddr);
        if (!ret && (uval != -EFAULT))
                goto retry;
@@ -1668,13 +1588,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 * This is the in-kernel slowpath: we look up the PI state (if any),
 * and do the rt-mutex unlock.
 */
-static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
+static int futex_unlock_pi(u32 __user *uaddr, int fshared)
 {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
        u32 uval;
        struct plist_head *head;
-        union futex_key key;
+        union futex_key key = FUTEX_KEY_INIT;
        int ret, attempt = 0;
 retry:
@@ -1685,10 +1605,6 @@ retry:
         */
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                return -EPERM;
-        /*
-         * First take all the futex related locks:
-         */
-        futex_lock_mm(fshared);
        ret = get_futex_key(uaddr, fshared, &key);
        if (unlikely(ret != 0))
@@ -1747,7 +1663,7 @@ retry_unlocked:
 out_unlock:
        spin_unlock(&hb->lock);
 out:
-        futex_unlock_mm(fshared);
+        put_futex_key(fshared, &key);
        return ret;
@@ -1763,16 +1679,13 @@ pi_faulted:
        spin_unlock(&hb->lock);
        if (attempt++) {
-                ret = futex_handle_fault((unsigned long)uaddr, fshared,
+                ret = futex_handle_fault((unsigned long)uaddr, attempt);
-                                         attempt);
                if (ret)
                        goto out;
                uval = 0;
                goto retry_unlocked;
        }
-        futex_unlock_mm(fshared);
        ret = get_user(uval, uaddr);
        if (!ret && (uval != -EFAULT))
                goto retry;
@@ -1898,8 +1811,7 @@ retry:
                 * PI futexes happens in exit_pi_state():
                 */
                if (!pi && (uval & FUTEX_WAITERS))
-                        futex_wake(uaddr, &curr->mm->mmap_sem, 1,
+                        futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
-                                   FUTEX_BITSET_MATCH_ANY);
        }
        return 0;
 }
@@ -1995,10 +1907,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 {
        int ret = -ENOSYS;
        int cmd = op & FUTEX_CMD_MASK;
-        struct rw_semaphore *fshared = NULL;
+        int fshared = 0;
        if (!(op & FUTEX_PRIVATE_FLAG))
-                fshared = &current->mm->mmap_sem;
+                fshared = 1;
        switch (cmd) {
        case FUTEX_WAIT:
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 95d3949f2ae5..47e63349d1b2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -664,14 +664,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
                /* Timer is expired, act upon the callback mode */
                switch(timer->cb_mode) {
-                case HRTIMER_CB_IRQSAFE_NO_RESTART:
-                        debug_hrtimer_deactivate(timer);
-                        /*
-                         * We can call the callback from here. No restart
-                         * happens, so no danger of recursion
-                         */
-                        BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
-                        return 1;
                case HRTIMER_CB_IRQSAFE_PERCPU:
                case HRTIMER_CB_IRQSAFE_UNLOCKED:
                        /*
@@ -683,7 +675,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
                         */
                        debug_hrtimer_deactivate(timer);
                        return 1;
-                case HRTIMER_CB_IRQSAFE:
                case HRTIMER_CB_SOFTIRQ:
                        /*
                         * Move everything else into the softirq pending list !
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 5072cf1685a2..7b8b0f21a5b1 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -304,17 +304,24 @@ int sprint_symbol(char *buffer, unsigned long address)
        char *modname;
        const char *name;
        unsigned long offset, size;
-        char namebuf[KSYM_NAME_LEN];
+        int len;
-        name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
+        name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
        if (!name)
                return sprintf(buffer, "0x%lx", address);
+        if (name != buffer)
+                strcpy(buffer, name);
+        len = strlen(buffer);
+        buffer += len;
        if (modname)
-                return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
+                len += sprintf(buffer, "+%#lx/%#lx [%s]",
-                                size, modname);
+                                                offset, size, modname);
        else
-                return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
+                len += sprintf(buffer, "+%#lx/%#lx", offset, size);
+        return len;
 }
 /* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8b57a2597f21..9f8a3f25259a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -72,7 +72,7 @@ static bool kprobe_enabled;
 DEFINE_MUTEX(kprobe_mutex);             /* Protects kprobe_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
 static struct {
-        spinlock_t lock ____cacheline_aligned;
+        spinlock_t lock ____cacheline_aligned_in_smp;
 } kretprobe_table_locks[KPROBE_TABLE_SIZE];
 static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
@@ -613,30 +613,37 @@ static int __kprobes __register_kprobe(struct kprobe *p,
                return -EINVAL;
        p->addr = addr;
-        if (!kernel_text_address((unsigned long) p->addr) ||
+        preempt_disable();
-            in_kprobes_functions((unsigned long) p->addr))
+        if (!__kernel_text_address((unsigned long) p->addr) ||
+            in_kprobes_functions((unsigned long) p->addr)) {
+                preempt_enable();
                return -EINVAL;
+        }
        p->mod_refcounted = 0;
        /*
         * Check if are we probing a module.
         */
-        probed_mod = module_text_address((unsigned long) p->addr);
+        probed_mod = __module_text_address((unsigned long) p->addr);
        if (probed_mod) {
-                struct module *calling_mod = module_text_address(called_from);
+                struct module *calling_mod;
+                calling_mod = __module_text_address(called_from);
                /*
                 * We must allow modules to probe themself and in this case
                 * avoid incrementing the module refcount, so as to allow
                 * unloading of self probing modules.
                 */
                if (calling_mod && calling_mod != probed_mod) {
-                        if (unlikely(!try_module_get(probed_mod)))
+                        if (unlikely(!try_module_get(probed_mod))) {
+                                preempt_enable();
                                return -EINVAL;
+                        }
                        p->mod_refcounted = 1;
                } else
                        probed_mod = NULL;
        }
+        preempt_enable();
        p->nmissed = 0;
        INIT_LIST_HEAD(&p->list);
@@ -718,6 +725,10 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
        struct kprobe *old_p;
        if (p->mod_refcounted) {
+                /*
+                 * Since we've already incremented refcount,
+                 * we don't need to disable preemption.
+                 */
                mod = module_text_address((unsigned long)p->addr);
                if (mod)
                        module_put(mod);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index a42858303233..e4bdda8dcf04 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3285,10 +3285,10 @@ void __init lockdep_info(void)
 {
        printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
-        printk("... MAX_LOCKDEP_SUBCLASSES:    %lu\n", MAX_LOCKDEP_SUBCLASSES);
+        printk("... MAX_LOCKDEP_SUBCLASSES:  %lu\n", MAX_LOCKDEP_SUBCLASSES);
        printk("... MAX_LOCK_DEPTH:          %lu\n", MAX_LOCK_DEPTH);
        printk("... MAX_LOCKDEP_KEYS:        %lu\n", MAX_LOCKDEP_KEYS);
-        printk("... CLASSHASH_SIZE:           %lu\n", CLASSHASH_SIZE);
+        printk("... CLASSHASH_SIZE:          %lu\n", CLASSHASH_SIZE);
        printk("... MAX_LOCKDEP_ENTRIES:     %lu\n", MAX_LOCKDEP_ENTRIES);
        printk("... MAX_LOCKDEP_CHAINS:      %lu\n", MAX_LOCKDEP_CHAINS);
        printk("... CHAINHASH_SIZE:          %lu\n", CHAINHASH_SIZE);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 4282c0a40a57..61d5aa5eced3 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -82,6 +82,14 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
        while (nb && nr_to_call) {
                next_nb = rcu_dereference(nb->next);
+#ifdef CONFIG_DEBUG_NOTIFIERS
+                if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
+                        WARN(1, "Invalid notifier called!");
+                        nb = next_nb;
+                        continue;
+                }
+#endif
                ret = nb->notifier_call(nb, val, v);
                if (nr_calls)
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 153dcb2639c3..3f4377e0aa04 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -58,21 +58,21 @@ void thread_group_cputime(
        struct task_struct *tsk,
        struct task_cputime *times)
 {
-        struct signal_struct *sig;
+        struct task_cputime *totals, *tot;
        int i;
-        struct task_cputime *tot;
-        sig = tsk->signal;
+        totals = tsk->signal->cputime.totals;
-        if (unlikely(!sig) || !sig->cputime.totals) {
+        if (!totals) {
                times->utime = tsk->utime;
                times->stime = tsk->stime;
                times->sum_exec_runtime = tsk->se.sum_exec_runtime;
                return;
        }
        times->stime = times->utime = cputime_zero;
        times->sum_exec_runtime = 0;
        for_each_possible_cpu(i) {
-                tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
+                tot = per_cpu_ptr(totals, i);
                times->utime = cputime_add(times->utime, tot->utime);
                times->stime = cputime_add(times->stime, tot->stime);
                times->sum_exec_runtime += tot->sum_exec_runtime;
@@ -1308,9 +1308,10 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
 */
 static inline int fastpath_timer_check(struct task_struct *tsk)
 {
-        struct signal_struct *sig = tsk->signal;
+        struct signal_struct *sig;
-        if (unlikely(!sig))
+        /* tsk == current, ensure it is safe to use ->signal/sighand */
+        if (unlikely(tsk->exit_state))
                return 0;
        if (!task_cputime_zero(&tsk->cputime_expires)) {
@@ -1323,6 +1324,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
                if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
                        return 1;
        }
+        sig = tsk->signal;
        if (!task_cputime_zero(&sig->cputime_expires)) {
                struct task_cputime group_sample;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 19122cf6d827..b8f7ce9473e8 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -174,7 +174,7 @@ static void suspend_test_finish(const char *label)
         * has some performance issues.  The stack dump of a WARN_ON
         * is more likely to get the right attention than a printk...
         */
-        WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000));
+        WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
 }
 #else
diff --git a/kernel/profile.c b/kernel/profile.c
index 9830a037d8db..5b7d1ac7124c 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
 };
 #ifdef CONFIG_SMP
-static void __init profile_nop(void *unused)
+static inline void profile_nop(void *unused)
 {
 }
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 37f72e551542..e503a002f330 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
        /* OK, time to rat on our buddy... */
-        printk(KERN_ERR "RCU detected CPU stalls:");
+        printk(KERN_ERR "INFO: RCU detected CPU stalls:");
        for_each_possible_cpu(cpu) {
                if (cpu_isset(cpu, rcp->cpumask))
                        printk(" %d", cpu);
@@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp)
 {
        unsigned long flags;
-        printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+        printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
                        smp_processor_id(), jiffies,
                        jiffies - rcp->gp_start);
        dump_stack();
diff --git a/kernel/relay.c b/kernel/relay.c
index 8d13a7855c08..32b0befdcb6a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -400,7 +400,7 @@ void relay_reset(struct rchan *chan)
        }
        mutex_lock(&relay_channels_mutex);
-        for_each_online_cpu(i)
+        for_each_possible_cpu(i)
                if (chan->buf[i])
                        __relay_reset(chan->buf[i], 0);
        mutex_unlock(&relay_channels_mutex);
@@ -611,10 +611,9 @@ struct rchan *relay_open(const char *base_filename,
        return chan;
 free_bufs:
-        for_each_online_cpu(i) {
+        for_each_possible_cpu(i) {
-                if (!chan->buf[i])
+                if (chan->buf[i])
-                        break;
+                        relay_close_buf(chan->buf[i]);
-                relay_close_buf(chan->buf[i]);
        }
        kref_put(&chan->kref, relay_destroy_channel);
diff --git a/kernel/sched.c b/kernel/sched.c
index 2a106b6b78b0..558e5f284269 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1456,6 +1456,8 @@ static unsigned long cpu_avg_load_per_task(int cpu)
        if (rq->nr_running)
                rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+        else
+                rq->avg_load_per_task = 0;
        return rq->avg_load_per_task;
 }
@@ -4200,7 +4202,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
        if (p == rq->idle) {
                p->stime = cputime_add(p->stime, steal);
-                account_group_system_time(p, steal);
                if (atomic_read(&rq->nr_iowait) > 0)
                        cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                else
@@ -5868,6 +5869,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
        struct rq *rq = cpu_rq(cpu);
        unsigned long flags;
+        spin_lock_irqsave(&rq->lock, flags);
        __sched_fork(idle);
        idle->se.exec_start = sched_clock();
@@ -5875,7 +5878,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
        idle->cpus_allowed = cpumask_of_cpu(cpu);
        __set_task_cpu(idle, cpu);
-        spin_lock_irqsave(&rq->lock, flags);
        rq->curr = rq->idle = idle;
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
        idle->oncpu = 1;
@@ -7786,13 +7788,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
 *
 * The passed in 'doms_new' should be kmalloc'd. This routine takes
 * ownership of it and will kfree it when done with it. If the caller
- * failed the kmalloc call, then it can pass in doms_new == NULL,
+ * failed the kmalloc call, then it can pass in doms_new == NULL &&
- * and partition_sched_domains() will fallback to the single partition
+ * ndoms_new == 1, and partition_sched_domains() will fallback to
- * 'fallback_doms', it also forces the domains to be rebuilt.
+ * the single partition 'fallback_doms', it also forces the domains
+ * to be rebuilt.
 *
- * If doms_new==NULL it will be replaced with cpu_online_map.
+ * If doms_new == NULL it will be replaced with cpu_online_map.
- * ndoms_new==0 is a special case for destroying existing domains.
+ * ndoms_new == 0 is a special case for destroying existing domains,
- * It will not create the default domain.
+ * and it will not create the default domain.
 *
 * Call with hotplug lock held
 */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 48ecc51e7701..26ed8e3d1c15 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -423,10 +423,11 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 #undef __P
        {
+                unsigned int this_cpu = raw_smp_processor_id();
                u64 t0, t1;
-                t0 = sched_clock();
+                t0 = cpu_clock(this_cpu);
-                t1 = sched_clock();
+                t1 = cpu_clock(this_cpu);
                SEQ_printf(m, "%-35s:%21Ld\n",
                           "clock-delta", (long long)(t1-t0));
        }
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index ee71bec1da66..7dbf72a2b02c 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -298,9 +298,11 @@ static inline void account_group_user_time(struct task_struct *tsk,
 {
        struct signal_struct *sig;
-        sig = tsk->signal;
+        /* tsk == current, ensure it is safe to use ->signal */
-        if (unlikely(!sig))
+        if (unlikely(tsk->exit_state))
                return;
+        sig = tsk->signal;
        if (sig->cputime.totals) {
                struct task_cputime *times;
@@ -325,9 +327,11 @@ static inline void account_group_system_time(struct task_struct *tsk,
 {
        struct signal_struct *sig;
-        sig = tsk->signal;
+        /* tsk == current, ensure it is safe to use ->signal */
-        if (unlikely(!sig))
+        if (unlikely(tsk->exit_state))
                return;
+        sig = tsk->signal;
        if (sig->cputime.totals) {
                struct task_cputime *times;
@@ -353,8 +357,11 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
        struct signal_struct *sig;
        sig = tsk->signal;
+        /* see __exit_signal()->task_rq_unlock_wait() */
+        barrier();
        if (unlikely(!sig))
                return;
        if (sig->cputime.totals) {
                struct task_cputime *times;
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 3953e4aed733..884e6cd2769c 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
 /*
 * Zero means infinite timeout - no checking done:
 */
-unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
 unsigned long __read_mostly sysctl_hung_task_warnings = 10;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 9bc4c00872c9..24e8ceacc388 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -112,7 +112,7 @@ static int chill(void *unused)
 int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
 {
        struct work_struct *sm_work;
-        int i;
+        int i, ret;
        /* Set up initial state. */
        mutex_lock(&lock);
@@ -137,8 +137,9 @@ int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
        /* This will release the thread on our CPU. */
        put_cpu();
        flush_workqueue(stop_machine_wq);
+        ret = active.fnret;
        mutex_unlock(&lock);
-        return active.fnret;
+        return ret;
 }
 int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
diff --git a/kernel/sys.c b/kernel/sys.c
index 31deba8f7d16..5fc3a0cfb994 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms)
        struct task_cputime cputime;
        cputime_t cutime, cstime;
-        spin_lock_irq(&current->sighand->siglock);
        thread_group_cputime(current, &cputime);
+        spin_lock_irq(&current->sighand->siglock);
        cutime = current->signal->cutime;
        cstime = current->signal->cstime;
        spin_unlock_irq(&current->sighand->siglock);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a77b27b11b04..e14a23281707 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
 cond_syscall(sys_listen);
 cond_syscall(sys_accept);
-cond_syscall(sys_paccept);
+cond_syscall(sys_accept4);
 cond_syscall(sys_connect);
 cond_syscall(sys_getsockname);
 cond_syscall(sys_getpeername);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4a39d24568c8..78db083390f0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -185,7 +185,6 @@ enum {
 };
 static int ftrace_filtered;
-static int tracing_on;
 static LIST_HEAD(ftrace_new_addrs);
@@ -327,96 +326,89 @@ ftrace_record_ip(unsigned long ip)
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec,
-                      unsigned char *old, unsigned char *new, int enable)
+                      unsigned char *nop, int enable)
 {
        unsigned long ip, fl;
+        unsigned char *call, *old, *new;
        ip = rec->ip;
-        if (ftrace_filtered && enable) {
+        /*
+         * If this record is not to be traced and
+         * it is not enabled then do nothing.
+         *
+         * If this record is not to be traced and
+         * it is enabled then disabled it.
+         *
+         */
+        if (rec->flags & FTRACE_FL_NOTRACE) {
+                if (rec->flags & FTRACE_FL_ENABLED)
+                        rec->flags &= ~FTRACE_FL_ENABLED;
+                else
+                        return 0;
+        } else if (ftrace_filtered && enable) {
                /*
-                 * If filtering is on:
+                 * Filtering is on:
-                 *
-                 * If this record is set to be filtered and
-                 * is enabled then do nothing.
-                 *
-                 * If this record is set to be filtered and
-                 * it is not enabled, enable it.
-                 *
-                 * If this record is not set to be filtered
-                 * and it is not enabled do nothing.
-                 *
-                 * If this record is set not to trace then
-                 * do nothing.
-                 *
-                 * If this record is set not to trace and
-                 * it is enabled then disable it.
-                 *
-                 * If this record is not set to be filtered and
-                 * it is enabled, disable it.
                 */
-                fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
+                fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
-                                   FTRACE_FL_ENABLED);
-                if ((fl ==  (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
+                /* Record is filtered and enabled, do nothing */
-                    (fl ==  (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
+                if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
-                    !fl || (fl == FTRACE_FL_NOTRACE))
                        return 0;
-                /*
+                /* Record is not filtered and is not enabled do nothing */
-                 * If it is enabled disable it,
+                if (!fl)
-                 * otherwise enable it!
+                        return 0;
-                 */
-                if (fl & FTRACE_FL_ENABLED) {
+                /* Record is not filtered but enabled, disable it */
-                        /* swap new and old */
+                if (fl == FTRACE_FL_ENABLED)
-                        new = old;
-                        old = ftrace_call_replace(ip, FTRACE_ADDR);
                        rec->flags &= ~FTRACE_FL_ENABLED;
-                } else {
+                else
-                        new = ftrace_call_replace(ip, FTRACE_ADDR);
+                /* Otherwise record is filtered but not enabled, enable it */
                        rec->flags |= FTRACE_FL_ENABLED;
-                }
        } else {
+                /* Disable or not filtered */
                if (enable) {
-                        /*
+                        /* if record is enabled, do nothing */
-                         * If this record is set not to trace and is
-                         * not enabled, do nothing.
-                         */
-                        fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
-                        if (fl == FTRACE_FL_NOTRACE)
-                                return 0;
-                        new = ftrace_call_replace(ip, FTRACE_ADDR);
-                } else
-                        old = ftrace_call_replace(ip, FTRACE_ADDR);
-                if (enable) {
                        if (rec->flags & FTRACE_FL_ENABLED)
                                return 0;
                        rec->flags |= FTRACE_FL_ENABLED;
                } else {
+                        /* if record is not enabled do nothing */
                        if (!(rec->flags & FTRACE_FL_ENABLED))
                                return 0;
                        rec->flags &= ~FTRACE_FL_ENABLED;
                }
        }
+        call = ftrace_call_replace(ip, FTRACE_ADDR);
+        if (rec->flags & FTRACE_FL_ENABLED) {
+                old = nop;
+                new = call;
+        } else {
+                old = call;
+                new = nop;
+        }
        return ftrace_modify_code(ip, old, new);
 }
 static void ftrace_replace_code(int enable)
 {
        int i, failed;
-        unsigned char *new = NULL, *old = NULL;
+        unsigned char *nop = NULL;
        struct dyn_ftrace *rec;
        struct ftrace_page *pg;
-        if (enable)
+        nop = ftrace_nop_replace();
-                old = ftrace_nop_replace();
-        else
-                new = ftrace_nop_replace();
        for (pg = ftrace_pages_start; pg; pg = pg->next) {
                for (i = 0; i < pg->index; i++) {
@@ -434,7 +426,7 @@ static void ftrace_replace_code(int enable)
                                unfreeze_record(rec);
                        }
-                        failed = __ftrace_replace_code(rec, old, new, enable);
+                        failed = __ftrace_replace_code(rec, nop, enable);
                        if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
                                rec->flags |= FTRACE_FL_FAILED;
                                if ((system_state == SYSTEM_BOOTING) ||
@@ -506,13 +498,10 @@ static int __ftrace_modify_code(void *data)
 {
        int *command = data;
-        if (*command & FTRACE_ENABLE_CALLS) {
+        if (*command & FTRACE_ENABLE_CALLS)
                ftrace_replace_code(1);
-                tracing_on = 1;
+        else if (*command & FTRACE_DISABLE_CALLS)
-        } else if (*command & FTRACE_DISABLE_CALLS) {
                ftrace_replace_code(0);
-                tracing_on = 0;
-        }
        if (*command & FTRACE_UPDATE_TRACE_FUNC)
                ftrace_update_ftrace_func(ftrace_trace_function);
@@ -538,8 +527,7 @@ static void ftrace_startup(void)
        mutex_lock(&ftrace_start_lock);
        ftrace_start++;
-        if (ftrace_start == 1)
+        command |= FTRACE_ENABLE_CALLS;
-                command |= FTRACE_ENABLE_CALLS;
        if (saved_ftrace_func != ftrace_trace_function) {
                saved_ftrace_func = ftrace_trace_function;
@@ -677,7 +665,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
        cnt = num_to_init / ENTRIES_PER_PAGE;
        pr_info("ftrace: allocating %ld entries in %d pages\n",
-                num_to_init, cnt);
+                num_to_init, cnt + 1);
        for (i = 0; i < cnt; i++) {
                pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -738,6 +726,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                    ((iter->flags & FTRACE_ITER_FAILURES) &&
                     !(rec->flags & FTRACE_FL_FAILED)) ||
+                    ((iter->flags & FTRACE_ITER_FILTER) &&
+                     !(rec->flags & FTRACE_FL_FILTER)) ||
                    ((iter->flags & FTRACE_ITER_NOTRACE) &&
                     !(rec->flags & FTRACE_FL_NOTRACE))) {
                        rec = NULL;
@@ -757,13 +748,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
        void *p = NULL;
        loff_t l = -1;
-        if (*pos != iter->pos) {
+        if (*pos > iter->pos)
-                for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
+                *pos = iter->pos;
-                        ;
-        } else {
+        l = *pos;
-                l = *pos;
+        p = t_next(m, p, &l);
-                p = t_next(m, p, &l);
-        }
        return p;
 }
@@ -774,15 +763,21 @@ static void t_stop(struct seq_file *m, void *p)
 static int t_show(struct seq_file *m, void *v)
 {
+        struct ftrace_iterator *iter = m->private;
        struct dyn_ftrace *rec = v;
        char str[KSYM_SYMBOL_LEN];
+        int ret = 0;
        if (!rec)
                return 0;
        kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
-        seq_printf(m, "%s\n", str);
+        ret = seq_printf(m, "%s\n", str);
+        if (ret < 0) {
+                iter->pos--;
+                iter->idx--;
+        }
        return 0;
 }
@@ -808,7 +803,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
                return -ENOMEM;
        iter->pg = ftrace_pages_start;
-        iter->pos = -1;
+        iter->pos = 0;
        ret = seq_open(file, &show_ftrace_seq_ops);
        if (!ret) {
@@ -895,7 +890,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
        if (file->f_mode & FMODE_READ) {
                iter->pg = ftrace_pages_start;
-                iter->pos = -1;
+                iter->pos = 0;
                iter->flags = enable ? FTRACE_ITER_FILTER :
                        FTRACE_ITER_NOTRACE;
@@ -1186,7 +1181,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
        mutex_lock(&ftrace_sysctl_lock);
        mutex_lock(&ftrace_start_lock);
-        if (iter->filtered && ftrace_start && ftrace_enabled)
+        if (ftrace_start && ftrace_enabled)
                ftrace_run_update_code(FTRACE_ENABLE_CALLS);
        mutex_unlock(&ftrace_start_lock);
        mutex_unlock(&ftrace_sysctl_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2f76193c3489..f780e9552f91 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,49 @@
 #include <linux/list.h>
 #include <linux/fs.h>
+#include "trace.h"
+/* Global flag to disable all recording to ring buffers */
+static int ring_buffers_off __read_mostly;
+/**
+ * tracing_on - enable all tracing buffers
+ *
+ * This function enables all tracing buffers that may have been
+ * disabled with tracing_off.
+ */
+void tracing_on(void)
+{
+        ring_buffers_off = 0;
+}
+/**
+ * tracing_off - turn off all tracing buffers
+ *
+ * This function stops all tracing buffers from recording data.
+ * It does not disable any overhead the tracers themselves may
+ * be causing. This function simply causes all recording to
+ * the ring buffers to fail.
+ */
+void tracing_off(void)
+{
+        ring_buffers_off = 1;
+}
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 /* FIXME!!! */
 u64 ring_buffer_time_stamp(int cpu)
 {
+        u64 time;
+        preempt_disable_notrace();
        /* shift to debug/test normalization and TIME_EXTENTS */
-        return sched_clock() << DEBUG_SHIFT;
+        time = sched_clock() << DEBUG_SHIFT;
+        preempt_enable_notrace();
+        return time;
 }
 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
        LIST_HEAD(pages);
        int i, cpu;
+        /*
+         * Always succeed at resizing a non-existent buffer:
+         */
+        if (!buffer)
+                return size;
        size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
        size *= BUF_PAGE_SIZE;
        buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -576,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
                list_del_init(&page->list);
                free_buffer_page(page);
        }
+        mutex_unlock(&buffer->mutex);
        return -ENOMEM;
 }
@@ -1133,6 +1175,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
        struct ring_buffer_event *event;
        int cpu, resched;
+        if (ring_buffers_off)
+                return NULL;
        if (atomic_read(&buffer->record_disabled))
                return NULL;
@@ -1249,6 +1294,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
        int ret = -EBUSY;
        int cpu, resched;
+        if (ring_buffers_off)
+                return -EBUSY;
        if (atomic_read(&buffer->record_disabled))
                return -EBUSY;
@@ -2070,3 +2118,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
        return 0;
 }
+static ssize_t
+rb_simple_read(struct file *filp, char __user *ubuf,
+               size_t cnt, loff_t *ppos)
+{
+        int *p = filp->private_data;
+        char buf[64];
+        int r;
+        /* !ring_buffers_off == tracing_on */
+        r = sprintf(buf, "%d\n", !*p);
+        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+static ssize_t
+rb_simple_write(struct file *filp, const char __user *ubuf,
+                size_t cnt, loff_t *ppos)
+{
+        int *p = filp->private_data;
+        char buf[64];
+        long val;
+        int ret;
+        if (cnt >= sizeof(buf))
+                return -EINVAL;
+        if (copy_from_user(&buf, ubuf, cnt))
+                return -EFAULT;
+        buf[cnt] = 0;
+        ret = strict_strtoul(buf, 10, &val);
+        if (ret < 0)
+                return ret;
+        /* !ring_buffers_off == tracing_on */
+        *p = !val;
+        (*ppos)++;
+        return cnt;
+}
+static struct file_operations rb_simple_fops = {
+        .open           = tracing_open_generic,
+        .read           = rb_simple_read,
+        .write          = rb_simple_write,
+};
+static __init int rb_init_debugfs(void)
+{
+        struct dentry *d_tracer;
+        struct dentry *entry;
+        d_tracer = tracing_init_dentry();
+        entry = debugfs_create_file("tracing_on", 0644, d_tracer,
+                                    &ring_buffers_off, &rb_simple_fops);
+        if (!entry)
+                pr_warning("Could not create debugfs 'tracing_on' entry\n");
+        return 0;
+}
+fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 697eda36b86a..d86e3252f300 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1936,6 +1936,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
        }
        mutex_unlock(&trace_types_lock);
+        kfree(iter);
        return ERR_PTR(-ENOMEM);
 }