Merge branch 'master' into for-next

Fast-forwarded to current state of Linus' tree as there are patches to be applied for files that didn't exist on the old branch.
author: Jiri Kosina <jkosina@suse.cz> 2011-04-26 04:22:15 -0400
committer: Jiri Kosina <jkosina@suse.cz> 2011-04-26 04:22:59 -0400
commit: 07f9479a40cc778bc1462ada11f95b01360ae4ff (patch)
tree: 0676cf38df3844004bb3ebfd99dfa67a4a8998f5 /kernel
parent: 9d5e6bdb3013acfb311ab407eeca0b6a6a3dedbf (diff)
parent: cd2e49e90f1cae7726c9a2c54488d881d7f1cd1c (diff)
96 files changed, 1247 insertions, 911 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 353d3fe8ba33..85cbfb31e73e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
 obj-$(CONFIG_PADATA) += padata.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 37b2bea170c8..e99dda04b126 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -607,7 +607,7 @@ void audit_trim_trees(void)
                spin_lock(&hash_lock);
                list_for_each_entry(node, &tree->chunks, list) {
                        struct audit_chunk *chunk = find_chunk(node);
-                        /* this could be NULL if the watch is dieing else where... */
+                        /* this could be NULL if the watch is dying else where... */
                        struct inode *inode = chunk->mark.i.inode;
                        node->index |= 1U<<31;
                        if (iterate_mounts(compare_root, inode, root_mnt))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f49a0318c2ed..b33513a08beb 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1011,7 +1011,7 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid,
 /*
 * to_send and len_sent accounting are very loose estimates.  We aren't
 * really worried about a hard cap to MAX_EXECVE_AUDIT_LEN so much as being
- * within about 500 bytes (next page boundry)
+ * within about 500 bytes (next page boundary)
 *
 * why snprintf?  an int is up to 12 digits long.  if we just assumed when
 * logging that a[%d]= was going to be 16 characters long we would be wasting
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 98a51f26c136..0c9b862292b2 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,11 +9,13 @@
 #include <linux/page-flags.h>
 #include <linux/mmzone.h>
 #include <linux/kbuild.h>
+#include <linux/page_cgroup.h>
 void foo(void)
 {
        /* The enum constants to put into include/generated/bounds.h */
        DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
        DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
+        DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
        /* End of constants */
 }
diff --git a/kernel/capability.c b/kernel/capability.c
index 9e9385f132c8..bf0c734d0c12 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -14,6 +14,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 /*
@@ -290,6 +291,60 @@ error:
 }
 /**
+ * has_capability - Does a task have a capability in init_user_ns
+ * @t: The task in question
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect to the initial user namespace, false if not.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+bool has_capability(struct task_struct *t, int cap)
+{
+        int ret = security_real_capable(t, &init_user_ns, cap);
+        return (ret == 0);
+}
+/**
+ * has_capability - Does a task have a capability in a specific user ns
+ * @t: The task in question
+ * @ns: target user namespace
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect to the specified user namespace, false if not.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+bool has_ns_capability(struct task_struct *t,
+                       struct user_namespace *ns, int cap)
+{
+        int ret = security_real_capable(t, ns, cap);
+        return (ret == 0);
+}
+/**
+ * has_capability_noaudit - Does a task have a capability (unaudited)
+ * @t: The task in question
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect to init_user_ns, false if not.  Don't write an
+ * audit message for the check.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+bool has_capability_noaudit(struct task_struct *t, int cap)
+{
+        int ret = security_real_capable_noaudit(t, &init_user_ns, cap);
+        return (ret == 0);
+}
+/**
 * capable - Determine if the current task has a superior capability in effect
 * @cap: The capability to be tested for
 *
@@ -299,17 +354,48 @@ error:
 * This sets PF_SUPERPRIV on the task if the capability is available on the
 * assumption that it's about to be used.
 */
-int capable(int cap)
+bool capable(int cap)
+{
+        return ns_capable(&init_user_ns, cap);
+}
+EXPORT_SYMBOL(capable);
+/**
+ * ns_capable - Determine if the current task has a superior capability in effect
+ * @ns:  The usernamespace we want the capability in
+ * @cap: The capability to be tested for
+ *
+ * Return true if the current task has the given superior capability currently
+ * available for use, false if not.
+ *
+ * This sets PF_SUPERPRIV on the task if the capability is available on the
+ * assumption that it's about to be used.
+ */
+bool ns_capable(struct user_namespace *ns, int cap)
 {
        if (unlikely(!cap_valid(cap))) {
                printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap);
                BUG();
        }
-        if (security_capable(current_cred(), cap) == 0) {
+        if (security_capable(ns, current_cred(), cap) == 0) {
                current->flags |= PF_SUPERPRIV;
-                return 1;
+                return true;
        }
-        return 0;
+        return false;
 }
-EXPORT_SYMBOL(capable);
+EXPORT_SYMBOL(ns_capable);
+/**
+ * task_ns_capable - Determine whether current task has a superior
+ * capability targeted at a specific task's user namespace.
+ * @t: The task whose user namespace is targeted.
+ * @cap: The capability in question.
+ *
+ *  Return true if it does, false otherwise.
+ */
+bool task_ns_capable(struct task_struct *t, int cap)
+{
+        return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
+}
+EXPORT_SYMBOL(task_ns_capable);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 95362d15128c..25c7eb52de1a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -157,7 +157,7 @@ struct css_id {
 };
 /*
- * cgroup_event represents events which userspace want to recieve.
+ * cgroup_event represents events which userspace want to receive.
 */
 struct cgroup_event {
        /*
@@ -1813,10 +1813,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
        /* Update the css_set linked lists if we're using them */
        write_lock(&css_set_lock);
-        if (!list_empty(&tsk->cg_list)) {
+        if (!list_empty(&tsk->cg_list))
-                list_del(&tsk->cg_list);
+                list_move(&tsk->cg_list, &newcg->tasks);
-                list_add(&tsk->cg_list, &newcg->tasks);
-        }
        write_unlock(&css_set_lock);
        for_each_subsys(root, ss) {
@@ -3655,12 +3653,12 @@ again:
        spin_lock(&release_list_lock);
        set_bit(CGRP_REMOVED, &cgrp->flags);
        if (!list_empty(&cgrp->release_list))
-                list_del(&cgrp->release_list);
+                list_del_init(&cgrp->release_list);
        spin_unlock(&release_list_lock);
        cgroup_lock_hierarchy(cgrp->root);
        /* delete this cgroup from parent->children */
-        list_del(&cgrp->sibling);
+        list_del_init(&cgrp->sibling);
        cgroup_unlock_hierarchy(cgrp->root);
        d = dget(cgrp->dentry);
@@ -3879,7 +3877,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
        subsys[ss->subsys_id] = NULL;
        /* remove subsystem from rootnode's list of subsystems */
-        list_del(&ss->sibling);
+        list_del_init(&ss->sibling);
        /*
         * disentangle the css from all css_sets attached to the dummytop. as
@@ -4241,7 +4239,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
        if (!list_empty(&tsk->cg_list)) {
                write_lock(&css_set_lock);
                if (!list_empty(&tsk->cg_list))
-                        list_del(&tsk->cg_list);
+                        list_del_init(&tsk->cg_list);
                write_unlock(&css_set_lock);
        }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 156cc5556140..12b7458f23b1 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -126,7 +126,7 @@ static void cpu_hotplug_done(void)
 #else /* #if CONFIG_HOTPLUG_CPU */
 static void cpu_hotplug_begin(void) {}
 static void cpu_hotplug_done(void) {}
-#endif  /* #esle #if CONFIG_HOTPLUG_CPU */
+#endif  /* #else #if CONFIG_HOTPLUG_CPU */
 /* Need to know about CPUs going up/down? */
 int __ref register_cpu_notifier(struct notifier_block *nb)
@@ -160,7 +160,6 @@ static void cpu_notify_nofail(unsigned long val, void *v)
 {
        BUG_ON(cpu_notify(val, v));
 }
 EXPORT_SYMBOL(register_cpu_notifier);
 void __ref unregister_cpu_notifier(struct notifier_block *nb)
@@ -205,7 +204,6 @@ static int __ref take_cpu_down(void *_param)
                return err;
        cpu_notify(CPU_DYING | param->mod, param->hcpu);
        return 0;
 }
@@ -227,6 +225,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
                return -EINVAL;
        cpu_hotplug_begin();
        err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
        if (err) {
                nr_calls--;
@@ -304,7 +303,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
        ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
        if (ret) {
                nr_calls--;
-                printk("%s: attempt to bring up CPU %u failed\n",
+                printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
                                __func__, cpu);
                goto out_notify;
        }
@@ -450,14 +449,14 @@ void __ref enable_nonboot_cpus(void)
        if (cpumask_empty(frozen_cpus))
                goto out;
-        printk("Enabling non-boot CPUs ...\n");
+        printk(KERN_INFO "Enabling non-boot CPUs ...\n");
        arch_enable_nonboot_cpus_begin();
        for_each_cpu(cpu, frozen_cpus) {
                error = _cpu_up(cpu, 1);
                if (!error) {
-                        printk("CPU%d is up\n", cpu);
+                        printk(KERN_INFO "CPU%d is up\n", cpu);
                        continue;
                }
                printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
@@ -509,7 +508,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
 */
 /* cpu_bit_bitmap[0] is empty - so we can back into it */
-#define MASK_DECLARE_1(x)       [x+1][0] = 1UL << (x)
+#define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
 #define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
 #define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
 #define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e92e98189032..33eee16addb8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p,
        struct cpuset *cs;
        int migrate;
        const nodemask_t *oldmem = scan->data;
-        NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL);
+        static nodemask_t newmems;      /* protected by cgroup_mutex */
-        if (!newmems)
-                return;
        cs = cgroup_cs(scan->cg);
-        guarantee_online_mems(cs, newmems);
+        guarantee_online_mems(cs, &newmems);
-        cpuset_change_task_nodemask(p, newmems);
-        NODEMASK_FREE(newmems);
+        cpuset_change_task_nodemask(p, &newmems);
        mm = get_task_mm(p);
        if (!mm)
@@ -1438,44 +1433,35 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
        struct mm_struct *mm;
        struct cpuset *cs = cgroup_cs(cont);
        struct cpuset *oldcs = cgroup_cs(oldcont);
-        NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL);
+        static nodemask_t to;           /* protected by cgroup_mutex */
-        NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
-        if (from == NULL || to == NULL)
-                goto alloc_fail;
        if (cs == &top_cpuset) {
                cpumask_copy(cpus_attach, cpu_possible_mask);
        } else {
                guarantee_online_cpus(cs, cpus_attach);
        }
-        guarantee_online_mems(cs, to);
+        guarantee_online_mems(cs, &to);
        /* do per-task migration stuff possibly for each in the threadgroup */
-        cpuset_attach_task(tsk, to, cs);
+        cpuset_attach_task(tsk, &to, cs);
        if (threadgroup) {
                struct task_struct *c;
                rcu_read_lock();
                list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                        cpuset_attach_task(c, to, cs);
+                        cpuset_attach_task(c, &to, cs);
                }
                rcu_read_unlock();
        }
        /* change mm; only needs to be done once even if threadgroup */
-        *from = oldcs->mems_allowed;
+        to = cs->mems_allowed;
-        *to = cs->mems_allowed;
        mm = get_task_mm(tsk);
        if (mm) {
-                mpol_rebind_mm(mm, to);
+                mpol_rebind_mm(mm, &to);
                if (is_memory_migrate(cs))
-                        cpuset_migrate_mm(mm, from, to);
+                        cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
                mmput(mm);
        }
-alloc_fail:
-        NODEMASK_FREE(from);
-        NODEMASK_FREE(to);
 }
 /* The various types of files and directories in a cpuset file system */
@@ -1610,34 +1596,26 @@ out:
 * across a page fault.
 */
-static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
+static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
 {
-        int ret;
+        size_t count;
        mutex_lock(&callback_mutex);
-        ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
+        count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
        mutex_unlock(&callback_mutex);
-        return ret;
+        return count;
 }
-static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
+static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
 {
-        NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL);
+        size_t count;
-        int retval;
-        if (mask == NULL)
-                return -ENOMEM;
        mutex_lock(&callback_mutex);
-        *mask = cs->mems_allowed;
+        count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
        mutex_unlock(&callback_mutex);
-        retval = nodelist_scnprintf(page, PAGE_SIZE, *mask);
+        return count;
-        NODEMASK_FREE(mask);
-        return retval;
 }
 static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1862,8 +1840,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
        cs = cgroup_cs(cgroup);
        parent_cs = cgroup_cs(parent);
+        mutex_lock(&callback_mutex);
        cs->mems_allowed = parent_cs->mems_allowed;
        cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
+        mutex_unlock(&callback_mutex);
        return;
 }
@@ -2066,10 +2046,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
        struct cpuset *cp;      /* scans cpusets being updated */
        struct cpuset *child;   /* scans child cpusets of cp */
        struct cgroup *cont;
-        NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
+        static nodemask_t oldmems;      /* protected by cgroup_mutex */
-        if (oldmems == NULL)
-                return;
        list_add_tail((struct list_head *)&root->stack_list, &queue);
@@ -2086,7 +2063,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
                    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
                        continue;
-                *oldmems = cp->mems_allowed;
+                oldmems = cp->mems_allowed;
                /* Remove offline cpus and mems from this cpuset. */
                mutex_lock(&callback_mutex);
@@ -2102,10 +2079,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
                        remove_tasks_in_empty_cpuset(cp);
                else {
                        update_tasks_cpumask(cp, NULL);
-                        update_tasks_nodemask(cp, oldmems, NULL);
+                        update_tasks_nodemask(cp, &oldmems, NULL);
                }
        }
-        NODEMASK_FREE(oldmems);
 }
 /*
@@ -2147,19 +2123,16 @@ void cpuset_update_active_cpus(void)
 static int cpuset_track_online_nodes(struct notifier_block *self,
                                unsigned long action, void *arg)
 {
-        NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
+        static nodemask_t oldmems;      /* protected by cgroup_mutex */
-        if (oldmems == NULL)
-                return NOTIFY_DONE;
        cgroup_lock();
        switch (action) {
        case MEM_ONLINE:
-                *oldmems = top_cpuset.mems_allowed;
+                oldmems = top_cpuset.mems_allowed;
                mutex_lock(&callback_mutex);
                top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
                mutex_unlock(&callback_mutex);
-                update_tasks_nodemask(&top_cpuset, oldmems, NULL);
+                update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
                break;
        case MEM_OFFLINE:
                /*
@@ -2173,7 +2146,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
        }
        cgroup_unlock();
-        NODEMASK_FREE(oldmems);
        return NOTIFY_OK;
 }
 #endif
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
new file mode 100644
index 000000000000..5f85690285d4
--- /dev/null
+++ b/kernel/crash_dump.c
@@ -0,0 +1,34 @@
+#include <linux/kernel.h>
+#include <linux/crash_dump.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+/*
+ * If we have booted due to a crash, max_pfn will be a very low value. We need
+ * to know the amount of memory that the previous kernel used.
+ */
+unsigned long saved_max_pfn;
+/*
+ * stores the physical address of elf header of crash image
+ *
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence put
+ * it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+/*
+ * elfcorehdr= specifies the location of elf core header stored by the crashed
+ * kernel. This option will be passed by kexec loader to the capture kernel.
+ */
+static int __init setup_elfcorehdr(char *arg)
+{
+        char *end;
+        if (!arg)
+                return -EINVAL;
+        elfcorehdr_addr = memparse(arg, &end);
+        return end > arg ? 0 : -EINVAL;
+}
+early_param("elfcorehdr", setup_elfcorehdr);
diff --git a/kernel/cred.c b/kernel/cred.c
index 2343c132c5a7..5557b55048df 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -741,6 +741,12 @@ int set_create_files_as(struct cred *new, struct inode *inode)
 }
 EXPORT_SYMBOL(set_create_files_as);
+struct user_namespace *current_user_ns(void)
+{
+        return _current_user_ns();
+}
+EXPORT_SYMBOL(current_user_ns);
 #ifdef CONFIG_DEBUG_CREDENTIALS
 bool creds_are_invalid(const struct cred *cred)
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index cefd4a11f6d9..bad6786dee88 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -538,7 +538,7 @@ return_normal:
        /*
         * For single stepping, try to only enter on the processor
-         * that was single stepping.  To gaurd against a deadlock, the
+         * that was single stepping.  To guard against a deadlock, the
         * kernel will only try for the value of sstep_tries before
         * giving up and continuing on.
         */
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 481a7bd2dfe7..a11db956dd62 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -1093,3 +1093,33 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
        put_packet(remcom_out_buffer);
        return 0;
 }
+/**
+ * gdbstub_exit - Send an exit message to GDB
+ * @status: The exit code to report.
+ */
+void gdbstub_exit(int status)
+{
+        unsigned char checksum, ch, buffer[3];
+        int loop;
+        buffer[0] = 'W';
+        buffer[1] = hex_asc_hi(status);
+        buffer[2] = hex_asc_lo(status);
+        dbg_io_ops->write_char('$');
+        checksum = 0;
+        for (loop = 0; loop < 3; loop++) {
+                ch = buffer[loop];
+                checksum += ch;
+                dbg_io_ops->write_char(ch);
+        }
+        dbg_io_ops->write_char('#');
+        dbg_io_ops->write_char(hex_asc_hi(checksum));
+        dbg_io_ops->write_char(hex_asc_lo(checksum));
+        /* make sure the output is flushed, lest the bootloader clobber it */
+        dbg_io_ops->flush();
+}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index bd3e8e29caa3..be14779bcef6 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -78,7 +78,7 @@ static unsigned int kdb_continue_catastrophic;
 static kdbtab_t *kdb_commands;
 #define KDB_BASE_CMD_MAX 50
 static int kdb_max_commands = KDB_BASE_CMD_MAX;
-static kdbtab_t kdb_base_commands[50];
+static kdbtab_t kdb_base_commands[KDB_BASE_CMD_MAX];
 #define for_each_kdbcmd(cmd, num)                                       \
        for ((cmd) = kdb_base_commands, (num) = 0;                      \
             num < kdb_max_commands;                                    \
@@ -441,9 +441,9 @@ static int kdb_check_regs(void)
 *      symbol name, and offset to the caller.
 *
 *      The argument may consist of a numeric value (decimal or
- *      hexidecimal), a symbol name, a register name (preceeded by the
+ *      hexidecimal), a symbol name, a register name (preceded by the
 *      percent sign), an environment variable with a numeric value
- *      (preceeded by a dollar sign) or a simple arithmetic expression
+ *      (preceded by a dollar sign) or a simple arithmetic expression
 *      consisting of a symbol name, +/-, and a numeric constant value
 *      (offset).
 * Parameters:
@@ -1335,7 +1335,7 @@ void kdb_print_state(const char *text, int value)
 *      error           The hardware-defined error code
 *      reason2         kdb's current reason code.
 *                      Initially error but can change
- *                      acording to kdb state.
+ *                      according to kdb state.
 *      db_result       Result code from break or debug point.
 *      regs            The exception frame at time of fault/breakpoint.
 *                      should always be valid.
@@ -2892,7 +2892,7 @@ static void __init kdb_inittab(void)
          "Send a signal to a process", 0, KDB_REPEAT_NONE);
        kdb_register_repeat("summary", kdb_summary, "",
          "Summarize the system", 4, KDB_REPEAT_NONE);
-        kdb_register_repeat("per_cpu", kdb_per_cpu, "",
+        kdb_register_repeat("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
          "Display per_cpu variables", 3, KDB_REPEAT_NONE);
        kdb_register_repeat("grephelp", kdb_grep_help, "",
          "Display help on | grep", 0, KDB_REPEAT_NONE);
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 6b2485dcb050..5532dd37aa86 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -545,7 +545,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size)
 *      Mask for process state.
 * Notes:
 *      The mask folds data from several sources into a single long value, so
- *      be carefull not to overlap the bits.  TASK_* bits are in the LSB,
+ *      be careful not to overlap the bits.  TASK_* bits are in the LSB,
 *      special cases like UNRUNNABLE are in the MSB.  As of 2.6.10-rc1 there
 *      is no overlap between TASK_* and EXIT_* but that may not always be
 *      true, so EXIT_* bits are shifted left 16 bits before being stored in
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45ebcc7b1..f5d2f63bae0b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -841,7 +841,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
        /* Let father know we died
         *
         * Thread signals are configurable, but you aren't going to use
-         * that to send signals to arbitary processes.
+         * that to send signals to arbitrary processes.
         * That stops right now.
         *
         * If the parent exec id doesn't match the exec id we saved
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
        profile_task_exit(tsk);
        WARN_ON(atomic_read(&tsk->fs_excl));
+        WARN_ON(blk_needs_flush_plug(tsk));
        if (unlikely(in_interrupt()))
                panic("Aiee, killing interrupt handler!");
diff --git a/kernel/fork.c b/kernel/fork.c
index 05b92c457010..e7548dee636b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
 #include <linux/tracehook.h>
 #include <linux/futex.h>
 #include <linux/compat.h>
+#include <linux/kthread.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
 #include <linux/ptrace.h>
@@ -109,20 +110,25 @@ int nr_processes(void)
 }
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-# define alloc_task_struct()    kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
+# define alloc_task_struct_node(node)           \
-# define free_task_struct(tsk)  kmem_cache_free(task_struct_cachep, (tsk))
+                kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
+# define free_task_struct(tsk)                  \
+                kmem_cache_free(task_struct_cachep, (tsk))
 static struct kmem_cache *task_struct_cachep;
 #endif
 #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
+static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+                                                  int node)
 {
 #ifdef CONFIG_DEBUG_STACK_USAGE
        gfp_t mask = GFP_KERNEL | __GFP_ZERO;
 #else
        gfp_t mask = GFP_KERNEL;
 #endif
-        return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
+        struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
+        return page ? page_address(page) : NULL;
 }
 static inline void free_thread_info(struct thread_info *ti)
@@ -249,16 +255,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
        struct task_struct *tsk;
        struct thread_info *ti;
        unsigned long *stackend;
+        int node = tsk_fork_get_node(orig);
        int err;
        prepare_to_copy(orig);
-        tsk = alloc_task_struct();
+        tsk = alloc_task_struct_node(node);
        if (!tsk)
                return NULL;
-        ti = alloc_thread_info(tsk);
+        ti = alloc_thread_info_node(tsk, node);
        if (!ti) {
                free_task_struct(tsk);
                return NULL;
@@ -1181,12 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                pid = alloc_pid(p->nsproxy->pid_ns);
                if (!pid)
                        goto bad_fork_cleanup_io;
-                if (clone_flags & CLONE_NEWPID) {
-                        retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
-                        if (retval < 0)
-                                goto bad_fork_free_pid;
-                }
        }
        p->pid = pid_nr(pid);
@@ -1205,6 +1205,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         * Clear TID on mm_release()?
         */
        p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
+#ifdef CONFIG_BLOCK
+        p->plug = NULL;
+#endif
 #ifdef CONFIG_FUTEX
        p->robust_list = NULL;
 #ifdef CONFIG_COMPAT
@@ -1290,7 +1293,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                tracehook_finish_clone(p, clone_flags, trace);
                if (thread_group_leader(p)) {
-                        if (clone_flags & CLONE_NEWPID)
+                        if (is_child_reaper(pid))
                                p->nsproxy->pid_ns->child_reaper = p;
                        p->signal->leader_pid = pid;
@@ -1513,38 +1516,24 @@ void __init proc_caches_init(void)
 }
 /*
- * Check constraints on flags passed to the unshare system call and
+ * Check constraints on flags passed to the unshare system call.
- * force unsharing of additional process context as appropriate.
 */
-static void check_unshare_flags(unsigned long *flags_ptr)
+static int check_unshare_flags(unsigned long unshare_flags)
 {
+        if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+                                CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+                                CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+                return -EINVAL;
        /*
-         * If unsharing a thread from a thread group, must also
+         * Not implemented, but pretend it works if there is nothing to
-         * unshare vm.
+         * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
-         */
+         * needs to unshare vm.
-        if (*flags_ptr & CLONE_THREAD)
-                *flags_ptr |= CLONE_VM;
-        /*
-         * If unsharing vm, must also unshare signal handlers.
-         */
-        if (*flags_ptr & CLONE_VM)
-                *flags_ptr |= CLONE_SIGHAND;
-        /*
-         * If unsharing namespace, must also unshare filesystem information.
         */
-        if (*flags_ptr & CLONE_NEWNS)
+        if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
-                *flags_ptr |= CLONE_FS;
+                /* FIXME: get_task_mm() increments ->mm_users */
-}
+                if (atomic_read(&current->mm->mm_users) > 1)
+                        return -EINVAL;
-/*
+        }
- * Unsharing of tasks created with CLONE_THREAD is not supported yet
- */
-static int unshare_thread(unsigned long unshare_flags)
-{
-        if (unshare_flags & CLONE_THREAD)
-                return -EINVAL;
        return 0;
 }
@@ -1571,34 +1560,6 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 }
 /*
- * Unsharing of sighand is not supported yet
- */
-static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
-{
-        struct sighand_struct *sigh = current->sighand;
-        if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
-                return -EINVAL;
-        else
-                return 0;
-}
-/*
- * Unshare vm if it is being shared
- */
-static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
-{
-        struct mm_struct *mm = current->mm;
-        if ((unshare_flags & CLONE_VM) &&
-            (mm && atomic_read(&mm->mm_users) > 1)) {
-                return -EINVAL;
-        }
-        return 0;
-}
-/*
 * Unshare file descriptor table if it is being shared
 */
 static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
@@ -1626,45 +1587,37 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
 */
 SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 {
-        int err = 0;
        struct fs_struct *fs, *new_fs = NULL;
-        struct sighand_struct *new_sigh = NULL;
-        struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
        struct files_struct *fd, *new_fd = NULL;
        struct nsproxy *new_nsproxy = NULL;
        int do_sysvsem = 0;
+        int err;
-        check_unshare_flags(&unshare_flags);
+        err = check_unshare_flags(unshare_flags);
+        if (err)
-        /* Return -EINVAL for all unsupported flags */
-        err = -EINVAL;
-        if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
-                                CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-                                CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
                goto bad_unshare_out;
        /*
+         * If unsharing namespace, must also unshare filesystem information.
+         */
+        if (unshare_flags & CLONE_NEWNS)
+                unshare_flags |= CLONE_FS;
+        /*
         * CLONE_NEWIPC must also detach from the undolist: after switching
         * to a new ipc namespace, the semaphore arrays from the old
         * namespace are unreachable.
         */
        if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
                do_sysvsem = 1;
-        if ((err = unshare_thread(unshare_flags)))
-                goto bad_unshare_out;
        if ((err = unshare_fs(unshare_flags, &new_fs)))
-                goto bad_unshare_cleanup_thread;
+                goto bad_unshare_out;
-        if ((err = unshare_sighand(unshare_flags, &new_sigh)))
-                goto bad_unshare_cleanup_fs;
-        if ((err = unshare_vm(unshare_flags, &new_mm)))
-                goto bad_unshare_cleanup_sigh;
        if ((err = unshare_fd(unshare_flags, &new_fd)))
-                goto bad_unshare_cleanup_vm;
+                goto bad_unshare_cleanup_fs;
        if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
                        new_fs)))
                goto bad_unshare_cleanup_fd;
-        if (new_fs ||  new_mm || new_fd || do_sysvsem || new_nsproxy) {
+        if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
                if (do_sysvsem) {
                        /*
                         * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1690,19 +1643,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
                        spin_unlock(&fs->lock);
                }
-                if (new_mm) {
-                        mm = current->mm;
-                        active_mm = current->active_mm;
-                        current->mm = new_mm;
-                        current->active_mm = new_mm;
-                        if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
-                                atomic_dec(&mm->oom_disable_count);
-                                atomic_inc(&new_mm->oom_disable_count);
-                        }
-                        activate_mm(active_mm, new_mm);
-                        new_mm = mm;
-                }
                if (new_fd) {
                        fd = current->files;
                        current->files = new_fd;
@@ -1719,20 +1659,10 @@ bad_unshare_cleanup_fd:
        if (new_fd)
                put_files_struct(new_fd);
-bad_unshare_cleanup_vm:
-        if (new_mm)
-                mmput(new_mm);
-bad_unshare_cleanup_sigh:
-        if (new_sigh)
-                if (atomic_dec_and_test(&new_sigh->count))
-                        kmem_cache_free(sighand_cachep, new_sigh);
 bad_unshare_cleanup_fs:
        if (new_fs)
                free_fs_struct(new_fs);
-bad_unshare_cleanup_thread:
 bad_unshare_out:
        return err;
 }
diff --git a/kernel/futex.c b/kernel/futex.c
index bda415715382..fe28dc282eae 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -782,8 +782,8 @@ static void __unqueue_futex(struct futex_q *q)
 {
        struct futex_hash_bucket *hb;
-        if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr)
+        if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
-                        || plist_node_empty(&q->list)))
+            || WARN_ON(plist_node_empty(&q->list)))
                return;
        hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
@@ -1886,7 +1886,7 @@ retry:
        restart->futex.val = val;
        restart->futex.time = abs_time->tv64;
        restart->futex.bitset = bitset;
-        restart->futex.flags = flags;
+        restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
        ret = -ERESTART_RESTARTBLOCK;
@@ -2418,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
                        goto err_unlock;
                ret = -EPERM;
                pcred = __task_cred(p);
+                /* If victim is in different user_ns, then uids are not
+                   comparable, so we must have CAP_SYS_PTRACE */
+                if (cred->user->user_ns != pcred->user->user_ns) {
+                        if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
+                                goto err_unlock;
+                        goto ok;
+                }
+                /* If victim is in same user_ns, then uids are comparable */
                if (cred->euid != pcred->euid &&
                    cred->euid != pcred->uid &&
-                    !capable(CAP_SYS_PTRACE))
+                    !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
                        goto err_unlock;
+ok:
                head = p->robust_list;
                rcu_read_unlock();
        }
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index a7934ac75e5b..5f9e689dc8f0 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -153,10 +153,19 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
                        goto err_unlock;
                ret = -EPERM;
                pcred = __task_cred(p);
+                /* If victim is in different user_ns, then uids are not
+                   comparable, so we must have CAP_SYS_PTRACE */
+                if (cred->user->user_ns != pcred->user->user_ns) {
+                        if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
+                                goto err_unlock;
+                        goto ok;
+                }
+                /* If victim is in same user_ns, then uids are comparable */
                if (cred->euid != pcred->euid &&
                    cred->euid != pcred->uid &&
-                    !capable(CAP_SYS_PTRACE))
+                    !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
                        goto err_unlock;
+ok:
                head = p->compat_robust_list;
                rcu_read_unlock();
        }
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 3f761001d517..e97ca59e2520 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -1,3 +1,3 @@
-EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
+ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
 obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o
diff --git a/kernel/groups.c b/kernel/groups.c
index 253dc0f35cf4..1cc476d52dd3 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
        struct group_info *group_info;
        int retval;
-        if (!capable(CAP_SETGID))
+        if (!nsown_capable(CAP_SETGID))
                return -EPERM;
        if ((unsigned)gidsetsize > NGROUPS_MAX)
                return -EINVAL;
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 09bef82d74cb..c574f9a12c48 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -10,13 +10,6 @@ menu "IRQ subsystem"
 config GENERIC_HARDIRQS
       def_bool y
-# Select this to disable the deprecated stuff
-config GENERIC_HARDIRQS_NO_DEPRECATED
-       bool
-config GENERIC_HARDIRQS_NO_COMPAT
-       bool
 # Options selectable by the architecture code
 # Make sparse irq Kconfig switch below available
@@ -31,6 +24,10 @@ config GENERIC_IRQ_PROBE
 config GENERIC_IRQ_SHOW
       bool
+# Print level/edge extra information
+config GENERIC_IRQ_SHOW_LEVEL
+       bool
 # Support for delayed migration from interrupt context
 config GENERIC_PENDING_IRQ
        bool
@@ -47,6 +44,10 @@ config HARDIRQS_SW_RESEND
 config IRQ_PREFLOW_FASTEOI
       bool
+# Edge style eoi based handler (cell)
+config IRQ_EDGE_EOI_HANDLER
+       bool
 # Support forced irq threading
 config IRQ_FORCED_THREADING
       bool
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 394784c57060..342d8f44e401 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -70,10 +70,8 @@ unsigned long probe_irq_on(void)
                raw_spin_lock_irq(&desc->lock);
                if (!desc->action && irq_settings_can_probe(desc)) {
                        desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
-                        if (irq_startup(desc)) {
+                        if (irq_startup(desc))
-                                irq_compat_set_pending(desc);
                                desc->istate |= IRQS_PENDING;
-                        }
                }
                raw_spin_unlock_irq(&desc->lock);
        }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c9c0601f0615..4af1e2b244cb 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -34,9 +34,14 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip)
        if (!chip)
                chip = &no_irq_chip;
-        irq_chip_set_defaults(chip);
        desc->irq_data.chip = chip;
        irq_put_desc_unlock(desc, flags);
+        /*
+         * For !CONFIG_SPARSE_IRQ make the irq show up in
+         * allocated_irqs. For the CONFIG_SPARSE_IRQ case, it is
+         * already marked, and this call is harmless.
+         */
+        irq_reserve_irq(irq);
        return 0;
 }
 EXPORT_SYMBOL(irq_set_chip);
@@ -134,26 +139,22 @@ EXPORT_SYMBOL_GPL(irq_get_irq_data);
 static void irq_state_clr_disabled(struct irq_desc *desc)
 {
-        desc->istate &= ~IRQS_DISABLED;
+        irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
-        irq_compat_clr_disabled(desc);
 }
 static void irq_state_set_disabled(struct irq_desc *desc)
 {
-        desc->istate |= IRQS_DISABLED;
+        irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
-        irq_compat_set_disabled(desc);
 }
 static void irq_state_clr_masked(struct irq_desc *desc)
 {
-        desc->istate &= ~IRQS_MASKED;
+        irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED);
-        irq_compat_clr_masked(desc);
 }
 static void irq_state_set_masked(struct irq_desc *desc)
 {
-        desc->istate |= IRQS_MASKED;
+        irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
-        irq_compat_set_masked(desc);
 }
 int irq_startup(struct irq_desc *desc)
@@ -203,126 +204,6 @@ void irq_disable(struct irq_desc *desc)
        }
 }
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
-/* Temporary migration helpers */
-static void compat_irq_mask(struct irq_data *data)
-{
-        data->chip->mask(data->irq);
-}
-static void compat_irq_unmask(struct irq_data *data)
-{
-        data->chip->unmask(data->irq);
-}
-static void compat_irq_ack(struct irq_data *data)
-{
-        data->chip->ack(data->irq);
-}
-static void compat_irq_mask_ack(struct irq_data *data)
-{
-        data->chip->mask_ack(data->irq);
-}
-static void compat_irq_eoi(struct irq_data *data)
-{
-        data->chip->eoi(data->irq);
-}
-static void compat_irq_enable(struct irq_data *data)
-{
-        data->chip->enable(data->irq);
-}
-static void compat_irq_disable(struct irq_data *data)
-{
-        data->chip->disable(data->irq);
-}
-static void compat_irq_shutdown(struct irq_data *data)
-{
-        data->chip->shutdown(data->irq);
-}
-static unsigned int compat_irq_startup(struct irq_data *data)
-{
-        return data->chip->startup(data->irq);
-}
-static int compat_irq_set_affinity(struct irq_data *data,
-                                   const struct cpumask *dest, bool force)
-{
-        return data->chip->set_affinity(data->irq, dest);
-}
-static int compat_irq_set_type(struct irq_data *data, unsigned int type)
-{
-        return data->chip->set_type(data->irq, type);
-}
-static int compat_irq_set_wake(struct irq_data *data, unsigned int on)
-{
-        return data->chip->set_wake(data->irq, on);
-}
-static int compat_irq_retrigger(struct irq_data *data)
-{
-        return data->chip->retrigger(data->irq);
-}
-static void compat_bus_lock(struct irq_data *data)
-{
-        data->chip->bus_lock(data->irq);
-}
-static void compat_bus_sync_unlock(struct irq_data *data)
-{
-        data->chip->bus_sync_unlock(data->irq);
-}
-#endif
-/*
- * Fixup enable/disable function pointers
- */
-void irq_chip_set_defaults(struct irq_chip *chip)
-{
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
-        if (chip->enable)
-                chip->irq_enable = compat_irq_enable;
-        if (chip->disable)
-                chip->irq_disable = compat_irq_disable;
-        if (chip->shutdown)
-                chip->irq_shutdown = compat_irq_shutdown;
-        if (chip->startup)
-                chip->irq_startup = compat_irq_startup;
-        if (!chip->end)
-                chip->end = dummy_irq_chip.end;
-        if (chip->bus_lock)
-                chip->irq_bus_lock = compat_bus_lock;
-        if (chip->bus_sync_unlock)
-                chip->irq_bus_sync_unlock = compat_bus_sync_unlock;
-        if (chip->mask)
-                chip->irq_mask = compat_irq_mask;
-        if (chip->unmask)
-                chip->irq_unmask = compat_irq_unmask;
-        if (chip->ack)
-                chip->irq_ack = compat_irq_ack;
-        if (chip->mask_ack)
-                chip->irq_mask_ack = compat_irq_mask_ack;
-        if (chip->eoi)
-                chip->irq_eoi = compat_irq_eoi;
-        if (chip->set_affinity)
-                chip->irq_set_affinity = compat_irq_set_affinity;
-        if (chip->set_type)
-                chip->irq_set_type = compat_irq_set_type;
-        if (chip->set_wake)
-                chip->irq_set_wake = compat_irq_set_wake;
-        if (chip->retrigger)
-                chip->irq_retrigger = compat_irq_retrigger;
-#endif
-}
 static inline void mask_ack_irq(struct irq_desc *desc)
 {
        if (desc->irq_data.chip->irq_mask_ack)
@@ -372,11 +253,10 @@ void handle_nested_irq(unsigned int irq)
        kstat_incr_irqs_this_cpu(irq, desc);
        action = desc->action;
-        if (unlikely(!action || (desc->istate & IRQS_DISABLED)))
+        if (unlikely(!action || irqd_irq_disabled(&desc->irq_data)))
                goto out_unlock;
-        irq_compat_set_progress(desc);
+        irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
-        desc->istate |= IRQS_INPROGRESS;
        raw_spin_unlock_irq(&desc->lock);
        action_ret = action->thread_fn(action->irq, action->dev_id);
@@ -384,8 +264,7 @@ void handle_nested_irq(unsigned int irq)
                note_interrupt(irq, desc, action_ret);
        raw_spin_lock_irq(&desc->lock);
-        desc->istate &= ~IRQS_INPROGRESS;
+        irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
-        irq_compat_clr_progress(desc);
 out_unlock:
        raw_spin_unlock_irq(&desc->lock);
@@ -416,14 +295,14 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 {
        raw_spin_lock(&desc->lock);
-        if (unlikely(desc->istate & IRQS_INPROGRESS))
+        if (unlikely(irqd_irq_inprogress(&desc->irq_data)))
                if (!irq_check_poll(desc))
                        goto out_unlock;
        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
        kstat_incr_irqs_this_cpu(irq, desc);
-        if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
+        if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data)))
                goto out_unlock;
        handle_irq_event(desc);
@@ -448,7 +327,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
        raw_spin_lock(&desc->lock);
        mask_ack_irq(desc);
-        if (unlikely(desc->istate & IRQS_INPROGRESS))
+        if (unlikely(irqd_irq_inprogress(&desc->irq_data)))
                if (!irq_check_poll(desc))
                        goto out_unlock;
@@ -459,12 +338,12 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
         * If its disabled or no action available
         * keep it masked and get out of here
         */
-        if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
+        if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data)))
                goto out_unlock;
        handle_irq_event(desc);
-        if (!(desc->istate & (IRQS_DISABLED | IRQS_ONESHOT)))
+        if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT))
                unmask_irq(desc);
 out_unlock:
        raw_spin_unlock(&desc->lock);
@@ -496,7 +375,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 {
        raw_spin_lock(&desc->lock);
-        if (unlikely(desc->istate & IRQS_INPROGRESS))
+        if (unlikely(irqd_irq_inprogress(&desc->irq_data)))
                if (!irq_check_poll(desc))
                        goto out;
@@ -507,8 +386,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
         * If its disabled or no action available
         * then mask it and get out of here:
         */
-        if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) {
+        if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
-                irq_compat_set_pending(desc);
                desc->istate |= IRQS_PENDING;
                mask_irq(desc);
                goto out;
@@ -537,7 +415,7 @@ out:
 *      @desc:  the interrupt description structure for this irq
 *
 *      Interrupt occures on the falling and/or rising edge of a hardware
- *      signal. The occurence is latched into the irq controller hardware
+ *      signal. The occurrence is latched into the irq controller hardware
 *      and must be acked in order to be reenabled. After the ack another
 *      interrupt can happen on the same source even before the first one
 *      is handled by the associated event handler. If this happens it
@@ -558,10 +436,9 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
         * we shouldn't process the IRQ. Mark it pending, handle
         * the necessary masking and go out
         */
-        if (unlikely((desc->istate & (IRQS_DISABLED | IRQS_INPROGRESS) ||
+        if (unlikely(irqd_irq_disabled(&desc->irq_data) ||
-                      !desc->action))) {
+                     irqd_irq_inprogress(&desc->irq_data) || !desc->action)) {
                if (!irq_check_poll(desc)) {
-                        irq_compat_set_pending(desc);
                        desc->istate |= IRQS_PENDING;
                        mask_ack_irq(desc);
                        goto out_unlock;
@@ -584,20 +461,65 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
                 * Renable it, if it was not disabled in meantime.
                 */
                if (unlikely(desc->istate & IRQS_PENDING)) {
-                        if (!(desc->istate & IRQS_DISABLED) &&
+                        if (!irqd_irq_disabled(&desc->irq_data) &&
-                            (desc->istate & IRQS_MASKED))
+                            irqd_irq_masked(&desc->irq_data))
                                unmask_irq(desc);
                }
                handle_irq_event(desc);
        } while ((desc->istate & IRQS_PENDING) &&
-                 !(desc->istate & IRQS_DISABLED));
+                 !irqd_irq_disabled(&desc->irq_data));
 out_unlock:
        raw_spin_unlock(&desc->lock);
 }
+#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER
+/**
+ *      handle_edge_eoi_irq - edge eoi type IRQ handler
+ *      @irq:   the interrupt number
+ *      @desc:  the interrupt description structure for this irq
+ *
+ * Similar as the above handle_edge_irq, but using eoi and w/o the
+ * mask/unmask logic.
+ */
+void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc)
+{
+        struct irq_chip *chip = irq_desc_get_chip(desc);
+        raw_spin_lock(&desc->lock);
+        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
+        /*
+         * If we're currently running this IRQ, or its disabled,
+         * we shouldn't process the IRQ. Mark it pending, handle
+         * the necessary masking and go out
+         */
+        if (unlikely(irqd_irq_disabled(&desc->irq_data) ||
+                     irqd_irq_inprogress(&desc->irq_data) || !desc->action)) {
+                if (!irq_check_poll(desc)) {
+                        desc->istate |= IRQS_PENDING;
+                        goto out_eoi;
+                }
+        }
+        kstat_incr_irqs_this_cpu(irq, desc);
+        do {
+                if (unlikely(!desc->action))
+                        goto out_eoi;
+                handle_irq_event(desc);
+        } while ((desc->istate & IRQS_PENDING) &&
+                 !irqd_irq_disabled(&desc->irq_data));
+out_eoi:
+        chip->irq_eoi(&desc->irq_data);
+        raw_spin_unlock(&desc->lock);
+}
+#endif
 /**
 *      handle_percpu_irq - Per CPU local irq handler
 *      @irq:   the interrupt number
@@ -642,8 +564,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
        if (handle == handle_bad_irq) {
                if (desc->irq_data.chip != &no_irq_chip)
                        mask_ack_irq(desc);
-                irq_compat_set_disabled(desc);
+                irq_state_set_disabled(desc);
-                desc->istate |= IRQS_DISABLED;
                desc->depth = 1;
        }
        desc->handle_irq = handle;
@@ -684,8 +605,70 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
                irqd_set(&desc->irq_data, IRQD_PER_CPU);
        if (irq_settings_can_move_pcntxt(desc))
                irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
+        if (irq_settings_is_level(desc))
+                irqd_set(&desc->irq_data, IRQD_LEVEL);
        irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
        irq_put_desc_unlock(desc, flags);
 }
+/**
+ *      irq_cpu_online - Invoke all irq_cpu_online functions.
+ *
+ *      Iterate through all irqs and invoke the chip.irq_cpu_online()
+ *      for each.
+ */
+void irq_cpu_online(void)
+{
+        struct irq_desc *desc;
+        struct irq_chip *chip;
+        unsigned long flags;
+        unsigned int irq;
+        for_each_active_irq(irq) {
+                desc = irq_to_desc(irq);
+                if (!desc)
+                        continue;
+                raw_spin_lock_irqsave(&desc->lock, flags);
+                chip = irq_data_get_irq_chip(&desc->irq_data);
+                if (chip && chip->irq_cpu_online &&
+                    (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
+                     !irqd_irq_disabled(&desc->irq_data)))
+                        chip->irq_cpu_online(&desc->irq_data);
+                raw_spin_unlock_irqrestore(&desc->lock, flags);
+        }
+}
+/**
+ *      irq_cpu_offline - Invoke all irq_cpu_offline functions.
+ *
+ *      Iterate through all irqs and invoke the chip.irq_cpu_offline()
+ *      for each.
+ */
+void irq_cpu_offline(void)
+{
+        struct irq_desc *desc;
+        struct irq_chip *chip;
+        unsigned long flags;
+        unsigned int irq;
+        for_each_active_irq(irq) {
+                desc = irq_to_desc(irq);
+                if (!desc)
+                        continue;
+                raw_spin_lock_irqsave(&desc->lock, flags);
+                chip = irq_data_get_irq_chip(&desc->irq_data);
+                if (chip && chip->irq_cpu_offline &&
+                    (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
+                     !irqd_irq_disabled(&desc->irq_data)))
+                        chip->irq_cpu_offline(&desc->irq_data);
+                raw_spin_unlock_irqrestore(&desc->lock, flags);
+        }
+}
diff --git a/kernel/irq/compat.h b/kernel/irq/compat.h
deleted file mode 100644
index 6bbaf66aca85..000000000000
--- a/kernel/irq/compat.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Compat layer for transition period
- */
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
-static inline void irq_compat_set_progress(struct irq_desc *desc)
-{
-        desc->status |= IRQ_INPROGRESS;
-}
-static inline void irq_compat_clr_progress(struct irq_desc *desc)
-{
-        desc->status &= ~IRQ_INPROGRESS;
-}
-static inline void irq_compat_set_disabled(struct irq_desc *desc)
-{
-        desc->status |= IRQ_DISABLED;
-}
-static inline void irq_compat_clr_disabled(struct irq_desc *desc)
-{
-        desc->status &= ~IRQ_DISABLED;
-}
-static inline void irq_compat_set_pending(struct irq_desc *desc)
-{
-        desc->status |= IRQ_PENDING;
-}
-static inline void irq_compat_clr_pending(struct irq_desc *desc)
-{
-        desc->status &= ~IRQ_PENDING;
-}
-static inline void irq_compat_set_masked(struct irq_desc *desc)
-{
-        desc->status |= IRQ_MASKED;
-}
-static inline void irq_compat_clr_masked(struct irq_desc *desc)
-{
-        desc->status &= ~IRQ_MASKED;
-}
-static inline void irq_compat_set_move_pending(struct irq_desc *desc)
-{
-        desc->status |= IRQ_MOVE_PENDING;
-}
-static inline void irq_compat_clr_move_pending(struct irq_desc *desc)
-{
-        desc->status &= ~IRQ_MOVE_PENDING;
-}
-static inline void irq_compat_set_affinity(struct irq_desc *desc)
-{
-        desc->status |= IRQ_AFFINITY_SET;
-}
-static inline void irq_compat_clr_affinity(struct irq_desc *desc)
-{
-        desc->status &= ~IRQ_AFFINITY_SET;
-}
-#else
-static inline void irq_compat_set_progress(struct irq_desc *desc) { }
-static inline void irq_compat_clr_progress(struct irq_desc *desc) { }
-static inline void irq_compat_set_disabled(struct irq_desc *desc) { }
-static inline void irq_compat_clr_disabled(struct irq_desc *desc) { }
-static inline void irq_compat_set_pending(struct irq_desc *desc) { }
-static inline void irq_compat_clr_pending(struct irq_desc *desc) { }
-static inline void irq_compat_set_masked(struct irq_desc *desc) { }
-static inline void irq_compat_clr_masked(struct irq_desc *desc) { }
-static inline void irq_compat_set_move_pending(struct irq_desc *desc) { }
-static inline void irq_compat_clr_move_pending(struct irq_desc *desc) { }
-static inline void irq_compat_set_affinity(struct irq_desc *desc) { }
-static inline void irq_compat_clr_affinity(struct irq_desc *desc) { }
-#endif
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index d1a33b7fa61d..306cba37e9a5 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -4,8 +4,10 @@
 #include <linux/kallsyms.h>
-#define P(f) if (desc->status & f) printk("%14s set\n", #f)
+#define P(f) if (desc->status_use_accessors & f) printk("%14s set\n", #f)
 #define PS(f) if (desc->istate & f) printk("%14s set\n", #f)
+/* FIXME */
+#define PD(f) do { } while (0)
 static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
@@ -28,13 +30,15 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
        P(IRQ_NOAUTOEN);
        PS(IRQS_AUTODETECT);
-        PS(IRQS_INPROGRESS);
        PS(IRQS_REPLAY);
        PS(IRQS_WAITING);
-        PS(IRQS_DISABLED);
        PS(IRQS_PENDING);
-        PS(IRQS_MASKED);
+        PD(IRQS_INPROGRESS);
+        PD(IRQS_DISABLED);
+        PD(IRQS_MASKED);
 }
 #undef P
 #undef PS
+#undef PD
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
index 20dc5474947e..b5fcd96c7102 100644
--- a/kernel/irq/dummychip.c
+++ b/kernel/irq/dummychip.c
@@ -31,13 +31,6 @@ static unsigned int noop_ret(struct irq_data *data)
        return 0;
 }
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
-static void compat_noop(unsigned int irq) { }
-#define END_INIT .end = compat_noop
-#else
-#define END_INIT
-#endif
 /*
 * Generic no controller implementation
 */
@@ -48,7 +41,6 @@ struct irq_chip no_irq_chip = {
        .irq_enable     = noop,
        .irq_disable    = noop,
        .irq_ack        = ack_bad,
-        END_INIT
 };
 /*
@@ -64,5 +56,4 @@ struct irq_chip dummy_irq_chip = {
        .irq_ack        = noop,
        .irq_mask       = noop,
        .irq_unmask     = noop,
-        END_INIT
 };
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 517561fc7317..90cb55f6d7eb 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -175,28 +175,13 @@ irqreturn_t handle_irq_event(struct irq_desc *desc)
        struct irqaction *action = desc->action;
        irqreturn_t ret;
-        irq_compat_clr_pending(desc);
        desc->istate &= ~IRQS_PENDING;
-        irq_compat_set_progress(desc);
+        irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
-        desc->istate |= IRQS_INPROGRESS;
        raw_spin_unlock(&desc->lock);
        ret = handle_irq_event_percpu(desc, action);
        raw_spin_lock(&desc->lock);
-        desc->istate &= ~IRQS_INPROGRESS;
+        irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
-        irq_compat_clr_progress(desc);
        return ret;
 }
-/**
- * handle_IRQ_event - irq action chain handler
- * @irq:        the interrupt number
- * @action:     the interrupt action chain for this irq
- *
- * Handles the action chain of an irq event
- */
-irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
-{
-        return handle_irq_event_percpu(irq_to_desc(irq), action);
-}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 6c6ec9a49027..6546431447d7 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -15,10 +15,6 @@
 #define istate core_internal_state__do_not_mess_with_it
-#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
-# define status status_use_accessors
-#endif
 extern int noirqdebug;
 /*
@@ -44,38 +40,28 @@ enum {
 * IRQS_SPURIOUS_DISABLED       - was disabled due to spurious interrupt
 *                                detection
 * IRQS_POLL_INPROGRESS         - polling in progress
- * IRQS_INPROGRESS              - Interrupt in progress
 * IRQS_ONESHOT                 - irq is not unmasked in primary handler
 * IRQS_REPLAY                  - irq is replayed
 * IRQS_WAITING                 - irq is waiting
- * IRQS_DISABLED                - irq is disabled
 * IRQS_PENDING                 - irq is pending and replayed later
- * IRQS_MASKED                  - irq is masked
 * IRQS_SUSPENDED               - irq is suspended
 */
 enum {
        IRQS_AUTODETECT         = 0x00000001,
        IRQS_SPURIOUS_DISABLED  = 0x00000002,
        IRQS_POLL_INPROGRESS    = 0x00000008,
-        IRQS_INPROGRESS         = 0x00000010,
        IRQS_ONESHOT            = 0x00000020,
        IRQS_REPLAY             = 0x00000040,
        IRQS_WAITING            = 0x00000080,
-        IRQS_DISABLED           = 0x00000100,
        IRQS_PENDING            = 0x00000200,
-        IRQS_MASKED             = 0x00000400,
        IRQS_SUSPENDED          = 0x00000800,
 };
-#include "compat.h"
 #include "debug.h"
 #include "settings.h"
 #define irq_data_to_desc(data)  container_of(data, struct irq_desc, irq_data)
-/* Set default functions for irq_chip structures: */
-extern void irq_chip_set_defaults(struct irq_chip *chip);
 extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
                unsigned long flags);
 extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
@@ -162,13 +148,11 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
 static inline void irqd_set_move_pending(struct irq_data *d)
 {
        d->state_use_accessors |= IRQD_SETAFFINITY_PENDING;
-        irq_compat_set_move_pending(irq_data_to_desc(d));
 }
 static inline void irqd_clr_move_pending(struct irq_data *d)
 {
        d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING;
-        irq_compat_clr_move_pending(irq_data_to_desc(d));
 }
 static inline void irqd_clear(struct irq_data *d, unsigned int mask)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index dbccc799407f..2c039c9b9383 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -80,7 +80,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
        desc->irq_data.handler_data = NULL;
        desc->irq_data.msi_desc = NULL;
        irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
-        desc->istate = IRQS_DISABLED;
+        irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
        desc->handle_irq = handle_bad_irq;
        desc->depth = 1;
        desc->irq_count = 0;
@@ -198,15 +198,6 @@ err:
        return -ENOMEM;
 }
-struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
-{
-        int res = irq_alloc_descs(irq, irq, 1, node);
-        if (res == -EEXIST || res == irq)
-                return irq_to_desc(irq);
-        return NULL;
-}
 static int irq_expand_nr_irqs(unsigned int nr)
 {
        if (nr > IRQ_BITMAP_BITS)
@@ -247,7 +238,6 @@ int __init early_irq_init(void)
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
        [0 ... NR_IRQS-1] = {
-                .istate         = IRQS_DISABLED,
                .handle_irq     = handle_bad_irq,
                .depth          = 1,
                .lock           = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
@@ -283,11 +273,6 @@ struct irq_desc *irq_to_desc(unsigned int irq)
        return (irq < NR_IRQS) ? irq_desc + irq : NULL;
 }
-struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
-{
-        return irq_to_desc(irq);
-}
 static void free_desc(unsigned int irq)
 {
        dynamic_irq_cleanup(irq);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a2aa73e536c..07c1611f3899 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -41,7 +41,7 @@ early_param("threadirqs", setup_forced_irqthreads);
 void synchronize_irq(unsigned int irq)
 {
        struct irq_desc *desc = irq_to_desc(irq);
-        unsigned int state;
+        bool inprogress;
        if (!desc)
                return;
@@ -53,16 +53,16 @@ void synchronize_irq(unsigned int irq)
                 * Wait until we're out of the critical section.  This might
                 * give the wrong answer due to the lack of memory barriers.
                 */
-                while (desc->istate & IRQS_INPROGRESS)
+                while (irqd_irq_inprogress(&desc->irq_data))
                        cpu_relax();
                /* Ok, that indicated we're done: double-check carefully. */
                raw_spin_lock_irqsave(&desc->lock, flags);
-                state = desc->istate;
+                inprogress = irqd_irq_inprogress(&desc->irq_data);
                raw_spin_unlock_irqrestore(&desc->lock, flags);
                /* Oops, that failed? */
-        } while (state & IRQS_INPROGRESS);
+        } while (inprogress);
        /*
         * We made sure that no hardirq handler is running. Now verify
@@ -112,13 +112,13 @@ void irq_set_thread_affinity(struct irq_desc *desc)
 }
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-static inline bool irq_can_move_pcntxt(struct irq_desc *desc)
+static inline bool irq_can_move_pcntxt(struct irq_data *data)
 {
-        return irq_settings_can_move_pcntxt(desc);
+        return irqd_can_move_in_process_context(data);
 }
-static inline bool irq_move_pending(struct irq_desc *desc)
+static inline bool irq_move_pending(struct irq_data *data)
 {
-        return irqd_is_setaffinity_pending(&desc->irq_data);
+        return irqd_is_setaffinity_pending(data);
 }
 static inline void
 irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
@@ -131,43 +131,34 @@ irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
        cpumask_copy(mask, desc->pending_mask);
 }
 #else
-static inline bool irq_can_move_pcntxt(struct irq_desc *desc) { return true; }
+static inline bool irq_can_move_pcntxt(struct irq_data *data) { return true; }
-static inline bool irq_move_pending(struct irq_desc *desc) { return false; }
+static inline bool irq_move_pending(struct irq_data *data) { return false; }
 static inline void
 irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
 static inline void
 irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
 #endif
-/**
+int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
- *      irq_set_affinity - Set the irq affinity of a given irq
- *      @irq:           Interrupt to set affinity
- *      @cpumask:       cpumask
- *
- */
-int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
-        struct irq_desc *desc = irq_to_desc(irq);
+        struct irq_chip *chip = irq_data_get_irq_chip(data);
-        struct irq_chip *chip = desc->irq_data.chip;
+        struct irq_desc *desc = irq_data_to_desc(data);
-        unsigned long flags;
        int ret = 0;
-        if (!chip->irq_set_affinity)
+        if (!chip || !chip->irq_set_affinity)
                return -EINVAL;
-        raw_spin_lock_irqsave(&desc->lock, flags);
+        if (irq_can_move_pcntxt(data)) {
+                ret = chip->irq_set_affinity(data, mask, false);
-        if (irq_can_move_pcntxt(desc)) {
-                ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
                switch (ret) {
                case IRQ_SET_MASK_OK:
-                        cpumask_copy(desc->irq_data.affinity, mask);
+                        cpumask_copy(data->affinity, mask);
                case IRQ_SET_MASK_OK_NOCOPY:
                        irq_set_thread_affinity(desc);
                        ret = 0;
                }
        } else {
-                irqd_set_move_pending(&desc->irq_data);
+                irqd_set_move_pending(data);
                irq_copy_pending(desc, mask);
        }
@@ -175,8 +166,28 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
                kref_get(&desc->affinity_notify->kref);
                schedule_work(&desc->affinity_notify->work);
        }
-        irq_compat_set_affinity(desc);
+        irqd_set(data, IRQD_AFFINITY_SET);
-        irqd_set(&desc->irq_data, IRQD_AFFINITY_SET);
+        return ret;
+}
+/**
+ *      irq_set_affinity - Set the irq affinity of a given irq
+ *      @irq:           Interrupt to set affinity
+ *      @mask:          cpumask
+ *
+ */
+int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
+{
+        struct irq_desc *desc = irq_to_desc(irq);
+        unsigned long flags;
+        int ret;
+        if (!desc)
+                return -EINVAL;
+        raw_spin_lock_irqsave(&desc->lock, flags);
+        ret =  __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask);
        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
@@ -206,7 +217,7 @@ static void irq_affinity_notify(struct work_struct *work)
                goto out;
        raw_spin_lock_irqsave(&desc->lock, flags);
-        if (irq_move_pending(desc))
+        if (irq_move_pending(&desc->irq_data))
                irq_get_pending(cpumask, desc);
        else
                cpumask_copy(cpumask, desc->irq_data.affinity);
@@ -285,10 +296,8 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
                if (cpumask_intersects(desc->irq_data.affinity,
                                       cpu_online_mask))
                        set = desc->irq_data.affinity;
-                else {
+                else
-                        irq_compat_clr_affinity(desc);
                        irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
-                }
        }
        cpumask_and(mask, cpu_online_mask, set);
@@ -551,9 +560,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
        flags &= IRQ_TYPE_SENSE_MASK;
        if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
-                if (!(desc->istate & IRQS_MASKED))
+                if (!irqd_irq_masked(&desc->irq_data))
                        mask_irq(desc);
-                if (!(desc->istate & IRQS_DISABLED))
+                if (!irqd_irq_disabled(&desc->irq_data))
                        unmask = 1;
        }
@@ -575,8 +584,6 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
                        irqd_set(&desc->irq_data, IRQD_LEVEL);
                }
-                if (chip != desc->irq_data.chip)
-                        irq_chip_set_defaults(desc->irq_data.chip);
                ret = 0;
                break;
        default:
@@ -651,7 +658,7 @@ again:
         * irq_wake_thread(). See the comment there which explains the
         * serialization.
         */
-        if (unlikely(desc->istate & IRQS_INPROGRESS)) {
+        if (unlikely(irqd_irq_inprogress(&desc->irq_data))) {
                raw_spin_unlock_irq(&desc->lock);
                chip_bus_sync_unlock(desc);
                cpu_relax();
@@ -668,12 +675,10 @@ again:
        desc->threads_oneshot &= ~action->thread_mask;
-        if (!desc->threads_oneshot && !(desc->istate & IRQS_DISABLED) &&
+        if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) &&
-            (desc->istate & IRQS_MASKED)) {
+            irqd_irq_masked(&desc->irq_data))
-                irq_compat_clr_masked(desc);
+                unmask_irq(desc);
-                desc->istate &= ~IRQS_MASKED;
-                desc->irq_data.chip->irq_unmask(&desc->irq_data);
-        }
 out_unlock:
        raw_spin_unlock_irq(&desc->lock);
        chip_bus_sync_unlock(desc);
@@ -767,7 +772,7 @@ static int irq_thread(void *data)
                atomic_inc(&desc->threads_active);
                raw_spin_lock_irq(&desc->lock);
-                if (unlikely(desc->istate & IRQS_DISABLED)) {
+                if (unlikely(irqd_irq_disabled(&desc->irq_data))) {
                        /*
                         * CHECKME: We might need a dedicated
                         * IRQ_THREAD_PENDING flag here, which
@@ -775,7 +780,6 @@ static int irq_thread(void *data)
                         * but AFAICT IRQS_PENDING should be fine as it
                         * retriggers the interrupt itself --- tglx
                         */
-                        irq_compat_set_pending(desc);
                        desc->istate |= IRQS_PENDING;
                        raw_spin_unlock_irq(&desc->lock);
                } else {
@@ -971,8 +975,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
        new->thread_mask = 1 << ffz(thread_mask);
        if (!shared) {
-                irq_chip_set_defaults(desc->irq_data.chip);
                init_waitqueue_head(&desc->wait_for_threads);
                /* Setup the type (level, edge polarity) if configured: */
@@ -985,8 +987,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                }
                desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
-                                  IRQS_INPROGRESS | IRQS_ONESHOT | \
+                                  IRQS_ONESHOT | IRQS_WAITING);
-                                  IRQS_WAITING);
+                irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
                if (new->flags & IRQF_PERCPU) {
                        irqd_set(&desc->irq_data, IRQD_PER_CPU);
@@ -1049,6 +1051,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
        register_irq_proc(irq, desc);
        new->dir = NULL;
        register_handler_proc(irq, new);
+        free_cpumask_var(mask);
        return 0;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index ec4806d4778b..47420908fba0 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -35,7 +35,7 @@ void irq_move_masked_irq(struct irq_data *idata)
         * do the disable, re-program, enable sequence.
         * This is *not* particularly important for level triggered
         * but in a edge trigger case, we might be setting rte
-         * when an active trigger is comming in. This could
+         * when an active trigger is coming in. This could
         * cause some ioapics to mal-function.
         * Being paranoid i guess!
         *
@@ -53,20 +53,14 @@ void irq_move_masked_irq(struct irq_data *idata)
        cpumask_clear(desc->pending_mask);
 }
-void move_masked_irq(int irq)
-{
-        irq_move_masked_irq(irq_get_irq_data(irq));
-}
 void irq_move_irq(struct irq_data *idata)
 {
-        struct irq_desc *desc = irq_data_to_desc(idata);
        bool masked;
        if (likely(!irqd_is_setaffinity_pending(idata)))
                return;
-        if (unlikely(desc->istate & IRQS_DISABLED))
+        if (unlikely(irqd_irq_disabled(idata)))
                return;
        /*
@@ -74,15 +68,10 @@ void irq_move_irq(struct irq_data *idata)
         * threaded interrupt with ONESHOT set, we can end up with an
         * interrupt storm.
         */
-        masked = desc->istate & IRQS_MASKED;
+        masked = irqd_irq_masked(idata);
        if (!masked)
                idata->chip->irq_mask(idata);
        irq_move_masked_irq(idata);
        if (!masked)
                idata->chip->irq_unmask(idata);
 }
-void move_native_irq(int irq)
-{
-        irq_move_irq(irq_get_irq_data(irq));
-}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 760248de109d..dd201bd35103 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -364,6 +364,10 @@ int __weak arch_show_interrupts(struct seq_file *p, int prec)
        return 0;
 }
+#ifndef ACTUAL_NR_IRQS
+# define ACTUAL_NR_IRQS nr_irqs
+#endif
 int show_interrupts(struct seq_file *p, void *v)
 {
        static int prec;
@@ -373,10 +377,10 @@ int show_interrupts(struct seq_file *p, void *v)
        struct irqaction *action;
        struct irq_desc *desc;
-        if (i > nr_irqs)
+        if (i > ACTUAL_NR_IRQS)
                return 0;
-        if (i == nr_irqs)
+        if (i == ACTUAL_NR_IRQS)
                return arch_show_interrupts(p, prec);
        /* print header and calculate the width of the first column */
@@ -404,7 +408,20 @@ int show_interrupts(struct seq_file *p, void *v)
        seq_printf(p, "%*d: ", prec, i);
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-        seq_printf(p, " %8s", desc->irq_data.chip->name);
+        if (desc->irq_data.chip) {
+                if (desc->irq_data.chip->irq_print_chip)
+                        desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
+                else if (desc->irq_data.chip->name)
+                        seq_printf(p, " %8s", desc->irq_data.chip->name);
+                else
+                        seq_printf(p, " %8s", "-");
+        } else {
+                seq_printf(p, " %8s", "None");
+        }
+#ifdef CONFIG_GENIRC_IRQ_SHOW_LEVEL
+        seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
+#endif
        if (desc->name)
                seq_printf(p, "-%-8s", desc->name);
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index ad683a99b1ec..14dd5761e8c9 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -65,7 +65,6 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
        if (desc->istate & IRQS_REPLAY)
                return;
        if (desc->istate & IRQS_PENDING) {
-                irq_compat_clr_pending(desc);
                desc->istate &= ~IRQS_PENDING;
                desc->istate |= IRQS_REPLAY;
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 0227ad358272..0d91730b6330 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -15,17 +15,8 @@ enum {
        _IRQF_MODIFY_MASK       = IRQF_MODIFY_MASK,
 };
-#define IRQ_INPROGRESS          GOT_YOU_MORON
-#define IRQ_REPLAY              GOT_YOU_MORON
-#define IRQ_WAITING             GOT_YOU_MORON
-#define IRQ_DISABLED            GOT_YOU_MORON
-#define IRQ_PENDING             GOT_YOU_MORON
-#define IRQ_MASKED              GOT_YOU_MORON
-#define IRQ_WAKEUP              GOT_YOU_MORON
-#define IRQ_MOVE_PENDING        GOT_YOU_MORON
 #define IRQ_PER_CPU             GOT_YOU_MORON
 #define IRQ_NO_BALANCING        GOT_YOU_MORON
-#define IRQ_AFFINITY_SET        GOT_YOU_MORON
 #define IRQ_LEVEL               GOT_YOU_MORON
 #define IRQ_NOPROBE             GOT_YOU_MORON
 #define IRQ_NOREQUEST           GOT_YOU_MORON
@@ -37,102 +28,98 @@ enum {
 static inline void
 irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
 {
-        desc->status &= ~(clr & _IRQF_MODIFY_MASK);
+        desc->status_use_accessors &= ~(clr & _IRQF_MODIFY_MASK);
-        desc->status |= (set & _IRQF_MODIFY_MASK);
+        desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
 }
 static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
 {
-        return desc->status & _IRQ_PER_CPU;
+        return desc->status_use_accessors & _IRQ_PER_CPU;
 }
 static inline void irq_settings_set_per_cpu(struct irq_desc *desc)
 {
-        desc->status |= _IRQ_PER_CPU;
+        desc->status_use_accessors |= _IRQ_PER_CPU;
 }
 static inline void irq_settings_set_no_balancing(struct irq_desc *desc)
 {
-        desc->status |= _IRQ_NO_BALANCING;
+        desc->status_use_accessors |= _IRQ_NO_BALANCING;
 }
 static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc)
 {
-        return desc->status & _IRQ_NO_BALANCING;
+        return desc->status_use_accessors & _IRQ_NO_BALANCING;
 }
 static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc)
 {
-        return desc->status & IRQ_TYPE_SENSE_MASK;
+        return desc->status_use_accessors & IRQ_TYPE_SENSE_MASK;
 }
 static inline void
 irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask)
 {
-        desc->status &= ~IRQ_TYPE_SENSE_MASK;
+        desc->status_use_accessors &= ~IRQ_TYPE_SENSE_MASK;
-        desc->status |= mask & IRQ_TYPE_SENSE_MASK;
+        desc->status_use_accessors |= mask & IRQ_TYPE_SENSE_MASK;
 }
 static inline bool irq_settings_is_level(struct irq_desc *desc)
 {
-        return desc->status & _IRQ_LEVEL;
+        return desc->status_use_accessors & _IRQ_LEVEL;
 }
 static inline void irq_settings_clr_level(struct irq_desc *desc)
 {
-        desc->status &= ~_IRQ_LEVEL;
+        desc->status_use_accessors &= ~_IRQ_LEVEL;
 }
 static inline void irq_settings_set_level(struct irq_desc *desc)
 {
-        desc->status |= _IRQ_LEVEL;
+        desc->status_use_accessors |= _IRQ_LEVEL;
 }
 static inline bool irq_settings_can_request(struct irq_desc *desc)
 {
-        return !(desc->status & _IRQ_NOREQUEST);
+        return !(desc->status_use_accessors & _IRQ_NOREQUEST);
 }
 static inline void irq_settings_clr_norequest(struct irq_desc *desc)
 {
-        desc->status &= ~_IRQ_NOREQUEST;
+        desc->status_use_accessors &= ~_IRQ_NOREQUEST;
 }
 static inline void irq_settings_set_norequest(struct irq_desc *desc)
 {
-        desc->status |= _IRQ_NOREQUEST;
+        desc->status_use_accessors |= _IRQ_NOREQUEST;
 }
 static inline bool irq_settings_can_probe(struct irq_desc *desc)
 {
-        return !(desc->status & _IRQ_NOPROBE);
+        return !(desc->status_use_accessors & _IRQ_NOPROBE);
 }
 static inline void irq_settings_clr_noprobe(struct irq_desc *desc)
 {
-        desc->status &= ~_IRQ_NOPROBE;
+        desc->status_use_accessors &= ~_IRQ_NOPROBE;
 }
 static inline void irq_settings_set_noprobe(struct irq_desc *desc)
 {
-        desc->status |= _IRQ_NOPROBE;
+        desc->status_use_accessors |= _IRQ_NOPROBE;
 }
 static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc)
 {
-        return desc->status & _IRQ_MOVE_PCNTXT;
+        return desc->status_use_accessors & _IRQ_MOVE_PCNTXT;
 }
 static inline bool irq_settings_can_autoenable(struct irq_desc *desc)
 {
-        return !(desc->status & _IRQ_NOAUTOEN);
+        return !(desc->status_use_accessors & _IRQ_NOAUTOEN);
 }
 static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
 {
-        return desc->status & _IRQ_NESTED_THREAD;
+        return desc->status_use_accessors & _IRQ_NESTED_THREAD;
 }
-/* Nothing should touch desc->status from now on */
-#undef status
-#define status          USE_THE_PROPER_WRAPPERS_YOU_MORON
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dd586ebf9c8c..dfbd550401b2 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -45,12 +45,12 @@ bool irq_wait_for_poll(struct irq_desc *desc)
 #ifdef CONFIG_SMP
        do {
                raw_spin_unlock(&desc->lock);
-                while (desc->istate & IRQS_INPROGRESS)
+                while (irqd_irq_inprogress(&desc->irq_data))
                        cpu_relax();
                raw_spin_lock(&desc->lock);
-        } while (desc->istate & IRQS_INPROGRESS);
+        } while (irqd_irq_inprogress(&desc->irq_data));
        /* Might have been disabled in meantime */
-        return !(desc->istate & IRQS_DISABLED) && desc->action;
+        return !irqd_irq_disabled(&desc->irq_data) && desc->action;
 #else
        return false;
 #endif
@@ -75,7 +75,7 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
         * Do not poll disabled interrupts unless the spurious
         * disabled poller asks explicitely.
         */
-        if ((desc->istate & IRQS_DISABLED) && !force)
+        if (irqd_irq_disabled(&desc->irq_data) && !force)
                goto out;
        /*
@@ -88,12 +88,11 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
                goto out;
        /* Already running on another processor */
-        if (desc->istate & IRQS_INPROGRESS) {
+        if (irqd_irq_inprogress(&desc->irq_data)) {
                /*
                 * Already running: If it is shared get the other
                 * CPU to go looking for our mystery interrupt too
                 */
-                irq_compat_set_pending(desc);
                desc->istate |= IRQS_PENDING;
                goto out;
        }
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6f6d091b5757..079f1d39a8b8 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr)
        if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
            arch_is_kernel_text(addr))
                return 1;
-        return in_gate_area_no_task(addr);
+        return in_gate_area_no_mm(addr);
 }
 static inline int is_kernel(unsigned long addr)
 {
        if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
                return 1;
-        return in_gate_area_no_task(addr);
+        return in_gate_area_no_mm(addr);
 }
 static int is_ksym_addr(unsigned long addr)
@@ -342,13 +342,15 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
 }
 /* Look up a kernel symbol and return it in a text buffer. */
-int sprint_symbol(char *buffer, unsigned long address)
+static int __sprint_symbol(char *buffer, unsigned long address,
+                           int symbol_offset)
 {
        char *modname;
        const char *name;
        unsigned long offset, size;
        int len;
+        address += symbol_offset;
        name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
        if (!name)
                return sprintf(buffer, "0x%lx", address);
@@ -357,17 +359,53 @@ int sprint_symbol(char *buffer, unsigned long address)
                strcpy(buffer, name);
        len = strlen(buffer);
        buffer += len;
+        offset -= symbol_offset;
        if (modname)
-                len += sprintf(buffer, "+%#lx/%#lx [%s]",
+                len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname);
-                                                offset, size, modname);
        else
                len += sprintf(buffer, "+%#lx/%#lx", offset, size);
        return len;
 }
+/**
+ * sprint_symbol - Look up a kernel symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function looks up a kernel symbol with @address and stores its name,
+ * offset, size and module name to @buffer if possible. If no symbol was found,
+ * just saves its @address as is.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_symbol(char *buffer, unsigned long address)
+{
+        return __sprint_symbol(buffer, address, 0);
+}
 EXPORT_SYMBOL_GPL(sprint_symbol);
+/**
+ * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function is for stack backtrace and does the same thing as
+ * sprint_symbol() but with modified/decreased @address. If there is a
+ * tail-call to the function marked "noreturn", gcc optimized out code after
+ * the call so that the stack-saved return address could point outside of the
+ * caller. This function ensures that kallsyms will find the original caller
+ * by decreasing @address.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_backtrace(char *buffer, unsigned long address)
+{
+        return __sprint_symbol(buffer, address, -1);
+}
 /* Look up a kernel symbol and print it to the kernel messages. */
 void __print_symbol(const char *fmt, unsigned long address)
 {
@@ -477,13 +515,11 @@ static int s_show(struct seq_file *m, void *p)
                 */
                type = iter->exported ? toupper(iter->type) :
                                        tolower(iter->type);
-                seq_printf(m, "%0*lx %c %s\t[%s]\n",
+                seq_printf(m, "%pK %c %s\t[%s]\n", (void *)iter->value,
-                           (int)(2 * sizeof(void *)),
+                           type, iter->name, iter->module_name);
-                           iter->value, type, iter->name, iter->module_name);
        } else
-                seq_printf(m, "%0*lx %c %s\n",
+                seq_printf(m, "%pK %c %s\n", (void *)iter->value,
-                           (int)(2 * sizeof(void *)),
+                           iter->type, iter->name);
-                           iter->value, iter->type, iter->name);
        return 0;
 }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ec19b92c7ebd..87b77de03dd3 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -33,6 +33,7 @@
 #include <linux/vmalloc.h>
 #include <linux/swap.h>
 #include <linux/kmsg_dump.h>
+#include <linux/syscore_ops.h>
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -144,7 +145,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
        /* Initialize the list of destination pages */
        INIT_LIST_HEAD(&image->dest_pages);
-        /* Initialize the list of unuseable pages */
+        /* Initialize the list of unusable pages */
        INIT_LIST_HEAD(&image->unuseable_pages);
        /* Read in the segments */
@@ -454,7 +455,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
        /* Deal with the destination pages I have inadvertently allocated.
         *
         * Ideally I would convert multi-page allocations into single
-         * page allocations, and add everyting to image->dest_pages.
+         * page allocations, and add everything to image->dest_pages.
         *
         * For now it is simpler to just free the pages.
         */
@@ -602,7 +603,7 @@ static void kimage_free_extra_pages(struct kimage *image)
        /* Walk through and free any extra destination pages I may have */
        kimage_free_page_list(&image->dest_pages);
-        /* Walk through and free any unuseable pages I have cached */
+        /* Walk through and free any unusable pages I have cached */
        kimage_free_page_list(&image->unuseable_pages);
 }
@@ -1099,7 +1100,8 @@ size_t crash_get_memory_size(void)
        return size;
 }
-static void free_reserved_phys_range(unsigned long begin, unsigned long end)
+void __weak crash_free_reserved_phys_range(unsigned long begin,
+                                           unsigned long end)
 {
        unsigned long addr;
@@ -1135,7 +1137,7 @@ int crash_shrink_memory(unsigned long new_size)
        start = roundup(start, PAGE_SIZE);
        end = roundup(start + new_size, PAGE_SIZE);
-        free_reserved_phys_range(end, crashk_res.end);
+        crash_free_reserved_phys_range(end, crashk_res.end);
        if ((start == end) && (crashk_res.parent != NULL))
                release_resource(&crashk_res);
@@ -1531,6 +1533,11 @@ int kernel_kexec(void)
                local_irq_disable();
                /* Suspend system devices */
                error = sysdev_suspend(PMSG_FREEZE);
+                if (!error) {
+                        error = syscore_suspend();
+                        if (error)
+                                sysdev_resume();
+                }
                if (error)
                        goto Enable_irqs;
        } else
@@ -1545,6 +1552,7 @@ int kernel_kexec(void)
 #ifdef CONFIG_KEXEC_JUMP
        if (kexec_image->preserve_context) {
+                syscore_resume();
                sysdev_resume();
 Enable_irqs:
                local_irq_enable();
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c55afba990a3..3b34d2732bce 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -27,6 +27,7 @@ struct kthread_create_info
        /* Information passed to kthread() from kthreadd. */
        int (*threadfn)(void *data);
        void *data;
+        int node;
        /* Result passed back to kthread_create() from kthreadd. */
        struct task_struct *result;
@@ -98,10 +99,23 @@ static int kthread(void *_create)
        do_exit(ret);
 }
+/* called from do_fork() to get node information for about to be created task */
+int tsk_fork_get_node(struct task_struct *tsk)
+{
+#ifdef CONFIG_NUMA
+        if (tsk == kthreadd_task)
+                return tsk->pref_node_fork;
+#endif
+        return numa_node_id();
+}
 static void create_kthread(struct kthread_create_info *create)
 {
        int pid;
+#ifdef CONFIG_NUMA
+        current->pref_node_fork = create->node;
+#endif
        /* We want our own signal handler (we take no signals by default). */
        pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
        if (pid < 0) {
@@ -111,33 +125,38 @@ static void create_kthread(struct kthread_create_info *create)
 }
 /**
- * kthread_create - create a kthread.
+ * kthread_create_on_node - create a kthread.
 * @threadfn: the function to run until signal_pending(current).
 * @data: data ptr for @threadfn.
+ * @node: memory node number.
 * @namefmt: printf-style name for the thread.
 *
 * Description: This helper function creates and names a kernel
 * thread.  The thread will be stopped: use wake_up_process() to start
 * it.  See also kthread_run().
 *
+ * If thread is going to be bound on a particular cpu, give its node
+ * in @node, to get NUMA affinity for kthread stack, or else give -1.
 * When woken, the thread will run @threadfn() with @data as its
 * argument. @threadfn() can either call do_exit() directly if it is a
- * standalone thread for which noone will call kthread_stop(), or
+ * standalone thread for which no one will call kthread_stop(), or
 * return when 'kthread_should_stop()' is true (which means
 * kthread_stop() has been called).  The return value should be zero
 * or a negative error number; it will be passed to kthread_stop().
 *
 * Returns a task_struct or ERR_PTR(-ENOMEM).
 */
-struct task_struct *kthread_create(int (*threadfn)(void *data),
+struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
-                                   void *data,
+                                           void *data,
-                                   const char namefmt[],
+                                           int node,
-                                   ...)
+                                           const char namefmt[],
+                                           ...)
 {
        struct kthread_create_info create;
        create.threadfn = threadfn;
        create.data = data;
+        create.node = node;
        init_completion(&create.done);
        spin_lock(&kthread_create_lock);
@@ -164,7 +183,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
        }
        return create.result;
 }
-EXPORT_SYMBOL(kthread_create);
+EXPORT_SYMBOL(kthread_create_on_node);
 /**
 * kthread_bind - bind a just-created kthread to a cpu.
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index ee74b35e528d..376066e10413 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -153,7 +153,7 @@ static inline void store_stacktrace(struct task_struct *tsk,
 }
 /**
- * __account_scheduler_latency - record an occured latency
+ * __account_scheduler_latency - record an occurred latency
 * @tsk - the task struct of the task hitting the latency
 * @usecs - the duration of the latency in microseconds
 * @inter - 1 if the sleep was interruptible, 0 if uninterruptible
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0d2058da80f5..53a68956f131 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2309,7 +2309,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
        if (unlikely(curr->hardirqs_enabled)) {
                /*
                 * Neither irq nor preemption are disabled here
-                 * so this is racy by nature but loosing one hit
+                 * so this is racy by nature but losing one hit
                 * in a stat is not a big deal.
                 */
                __debug_atomic_inc(redundant_hardirqs_on);
@@ -2620,7 +2620,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
        if (!graph_lock())
                return 0;
        /*
-         * Make sure we didnt race:
+         * Make sure we didn't race:
         */
        if (unlikely(hlock_class(this)->usage_mask & new_mask)) {
                graph_unlock();
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 1969d2fc4b36..71edd2f60c02 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -225,7 +225,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
                      nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
                      nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
                      nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
-                      sum_forward_deps = 0, factor = 0;
+                      sum_forward_deps = 0;
        list_for_each_entry(class, &all_lock_classes, lock_entry) {
@@ -283,13 +283,6 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
                        nr_hardirq_unsafe * nr_hardirq_safe +
                        nr_list_entries);
-        /*
-         * Estimated factor between direct and indirect
-         * dependencies:
-         */
-        if (nr_list_entries)
-                factor = sum_forward_deps / nr_list_entries;
 #ifdef CONFIG_PROVE_LOCKING
        seq_printf(m, " dependency chains:             %11lu [max: %lu]\n",
                        nr_lock_chains, MAX_LOCKDEP_CHAINS);
diff --git a/kernel/module.c b/kernel/module.c
index efa290ea94bf..d5938a5c19c4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -809,7 +809,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
                wait_for_zero_refcount(mod);
        mutex_unlock(&module_mutex);
-        /* Final destruction now noone is using it. */
+        /* Final destruction now no one is using it. */
        if (mod->exit != NULL)
                mod->exit();
        blocking_notifier_call_chain(&module_notify_list,
@@ -1168,7 +1168,7 @@ static ssize_t module_sect_show(struct module_attribute *mattr,
 {
        struct module_sect_attr *sattr =
                container_of(mattr, struct module_sect_attr, mattr);
-        return sprintf(buf, "0x%lx\n", sattr->address);
+        return sprintf(buf, "0x%pK\n", (void *)sattr->address);
 }
 static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
@@ -2777,7 +2777,7 @@ static struct module *load_module(void __user *umod,
        mod->state = MODULE_STATE_COMING;
        /* Now sew it into the lists so we can get lockdep and oops
-         * info during argument parsing.  Noone should access us, since
+         * info during argument parsing.  No one should access us, since
         * strong_try_module_get() will fail.
         * lockdep/oops can run asynchronous, so use the RCU list insertion
         * function to insert in a way safe to concurrent readers.
@@ -2971,7 +2971,7 @@ static const char *get_ksymbol(struct module *mod,
        else
                nextval = (unsigned long)mod->module_core+mod->core_text_size;
-        /* Scan for closest preceeding symbol, and next symbol. (ELF
+        /* Scan for closest preceding symbol, and next symbol. (ELF
           starts real symbols at 1). */
        for (i = 1; i < mod->num_symtab; i++) {
                if (mod->symtab[i].st_shndx == SHN_UNDEF)
@@ -3224,7 +3224,7 @@ static int m_show(struct seq_file *m, void *p)
                   mod->state == MODULE_STATE_COMING ? "Loading":
                   "Live");
        /* Used by oprofile and other similar tools. */
-        seq_printf(m, " 0x%p", mod->module_core);
+        seq_printf(m, " 0x%pK", mod->module_core);
        /* Taints info */
        if (mod->taints)
diff --git a/kernel/mutex.c b/kernel/mutex.c
index a5889fb28ecf..c4195fa98900 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -245,7 +245,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                }
                __set_task_state(task, state);
-                /* didnt get the lock, go to sleep: */
+                /* didn't get the lock, go to sleep: */
                spin_unlock_mutex(&lock->wait_lock, flags);
                preempt_enable_no_resched();
                schedule();
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f74e6c00e26d..a05d191ffdd9 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -69,13 +69,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
                goto out_ns;
        }
-        new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns);
+        new_nsp->uts_ns = copy_utsname(flags, tsk);
        if (IS_ERR(new_nsp->uts_ns)) {
                err = PTR_ERR(new_nsp->uts_ns);
                goto out_uts;
        }
-        new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns);
+        new_nsp->ipc_ns = copy_ipcs(flags, tsk);
        if (IS_ERR(new_nsp->ipc_ns)) {
                err = PTR_ERR(new_nsp->ipc_ns);
                goto out_ipc;
diff --git a/kernel/padata.c b/kernel/padata.c
index 751019415d23..b91941df5e63 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -262,7 +262,7 @@ static void padata_reorder(struct parallel_data *pd)
                /*
                 * This cpu has to do the parallel processing of the next
                 * object. It's waiting in the cpu's parallelization queue,
-                 * so exit imediately.
+                 * so exit immediately.
                 */
                if (PTR_ERR(padata) == -ENODATA) {
                        del_timer(&pd->timer);
@@ -284,7 +284,7 @@ static void padata_reorder(struct parallel_data *pd)
        /*
         * The next object that needs serialization might have arrived to
         * the reorder queues in the meantime, we will be called again
-         * from the timer function if noone else cares for it.
+         * from the timer function if no one else cares for it.
         */
        if (atomic_read(&pd->reorder_objects)
                        && !(pinst->flags & PADATA_RESET))
@@ -515,7 +515,7 @@ static void __padata_stop(struct padata_instance *pinst)
        put_online_cpus();
 }
-/* Replace the internal control stucture with a new one. */
+/* Replace the internal control structure with a new one. */
 static void padata_replace(struct padata_instance *pinst,
                           struct parallel_data *pd_new)
 {
@@ -768,7 +768,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
 }
 /**
- * padata_remove_cpu - remove a cpu from the one or both(serial and paralell)
+ * padata_remove_cpu - remove a cpu from the one or both(serial and parallel)
 *                     padata cpumasks.
 *
 * @pinst: padata instance
diff --git a/kernel/panic.c b/kernel/panic.c
index 991bb87a1704..69231670eb95 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -433,3 +433,13 @@ EXPORT_SYMBOL(__stack_chk_fail);
 core_param(panic, panic_timeout, int, 0644);
 core_param(pause_on_oops, pause_on_oops, int, 0644);
+static int __init oops_setup(char *s)
+{
+        if (!s)
+                return -EINVAL;
+        if (!strcmp(s, "panic"))
+                panic_on_oops = 1;
+        return 0;
+}
+early_param("oops", oops_setup);
diff --git a/kernel/params.c b/kernel/params.c
index 0da1411222b9..7ab388a48a2e 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -95,7 +95,7 @@ static int parse_one(char *param,
        /* Find parameter */
        for (i = 0; i < num_params; i++) {
                if (parameq(param, params[i].name)) {
-                        /* Noone handled NULL, so do it here. */
+                        /* No one handled NULL, so do it here. */
                        if (!val && params[i].ops->set != param_set_bool)
                                return -EINVAL;
                        DEBUGP("They are equal!  Calling %p\n",
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3472bb1a070c..8e81a9860a0d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -145,7 +145,8 @@ static struct srcu_struct pmus_srcu;
 */
 int sysctl_perf_event_paranoid __read_mostly = 1;
-int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
+/* Minimum for 512 kiB + 1 user control page */
+int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
 /*
 * max perf event sample rate
@@ -363,6 +364,7 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
                        }
                        if (mode & PERF_CGROUP_SWIN) {
+                                WARN_ON_ONCE(cpuctx->cgrp);
                                /* set cgrp before ctxsw in to
                                 * allow event_filter_match() to not
                                 * have to pass task around
@@ -941,6 +943,7 @@ static void perf_group_attach(struct perf_event *event)
 static void
 list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+        struct perf_cpu_context *cpuctx;
        /*
         * We can have double detach due to exit/hot-unplug + close.
         */
@@ -949,8 +952,17 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
        event->attach_state &= ~PERF_ATTACH_CONTEXT;
-        if (is_cgroup_event(event))
+        if (is_cgroup_event(event)) {
                ctx->nr_cgroups--;
+                cpuctx = __get_cpu_context(ctx);
+                /*
+                 * if there are no more cgroup events
+                 * then cler cgrp to avoid stale pointer
+                 * in update_cgrp_time_from_cpuctx()
+                 */
+                if (!ctx->nr_cgroups)
+                        cpuctx->cgrp = NULL;
+        }
        ctx->nr_events--;
        if (event->attr.inherit_stat)
@@ -2412,6 +2424,14 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
        if (!ctx || !ctx->nr_events)
                goto out;
+        /*
+         * We must ctxsw out cgroup events to avoid conflict
+         * when invoking perf_task_event_sched_in() later on
+         * in this function. Otherwise we end up trying to
+         * ctxswin cgroup events which are already scheduled
+         * in.
+         */
+        perf_cgroup_sched_out(current);
        task_ctx_sched_out(ctx, EVENT_ALL);
        raw_spin_lock(&ctx->lock);
@@ -2436,6 +2456,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
        raw_spin_unlock(&ctx->lock);
+        /*
+         * Also calls ctxswin for cgroup events, if any:
+         */
        perf_event_context_sched_in(ctx, ctx->task);
 out:
        local_irq_restore(flags);
@@ -6520,6 +6543,11 @@ SYSCALL_DEFINE5(perf_event_open,
                goto err_alloc;
        }
+        if (task) {
+                put_task_struct(task);
+                task = NULL;
+        }
        /*
         * Look up the group leader (we will attach this event to it):
         */
diff --git a/kernel/pid.c b/kernel/pid.c
index 02f221274265..57a8346a270e 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -217,11 +217,14 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
        return -1;
 }
-int next_pidmap(struct pid_namespace *pid_ns, int last)
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
 {
        int offset;
        struct pidmap *map, *end;
+        if (last >= PID_MAX_LIMIT)
+                return -1;
        offset = (last + 1) & BITS_PER_PAGE_MASK;
        map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
        end = &pid_ns->pidmap[PIDMAP_ENTRIES];
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a5aff94e1f0b..e9c9adc84ca6 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/acct.h>
 #include <linux/slab.h>
+#include <linux/proc_fs.h>
 #define BITS_PER_PAGE           (PAGE_SIZE*8)
@@ -72,7 +73,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
 {
        struct pid_namespace *ns;
        unsigned int level = parent_pid_ns->level + 1;
-        int i;
+        int i, err = -ENOMEM;
        ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
        if (ns == NULL)
@@ -96,14 +97,20 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
        for (i = 1; i < PIDMAP_ENTRIES; i++)
                atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
+        err = pid_ns_prepare_proc(ns);
+        if (err)
+                goto out_put_parent_pid_ns;
        return ns;
+out_put_parent_pid_ns:
+        put_pid_ns(parent_pid_ns);
 out_free_map:
        kfree(ns->pidmap[0].page);
 out_free:
        kmem_cache_free(pid_ns_cachep, ns);
 out:
-        return ERR_PTR(-ENOMEM);
+        return ERR_PTR(err);
 }
 static void destroy_pid_namespace(struct pid_namespace *ns)
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 67fea9d25d55..0791b13df7bf 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1347,7 +1347,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
        /*
         * Now that all the timers on our list have the firing flag,
-         * noone will touch their list entries but us.  We'll take
+         * no one will touch their list entries but us.  We'll take
         * each timer's lock before clearing its firing flag, so no
         * timer call will interfere.
         */
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 4c0124919f9a..e5498d7405c3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -313,7 +313,7 @@ static void schedule_next_timer(struct k_itimer *timr)
 * restarted (i.e. we have flagged this in the sys_private entry of the
 * info block).
 *
- * To protect aginst the timer going away while the interrupt is queued,
+ * To protect against the timer going away while the interrupt is queued,
 * we require that the it_requeue_pending flag be set.
 */
 void do_schedule_next_timer(struct siginfo *info)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 4603f08dc47b..6de9a8fc3417 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -18,9 +18,13 @@ config SUSPEND_FREEZER
          Turning OFF this setting is NOT recommended! If in doubt, say Y.
+config HIBERNATE_CALLBACKS
+        bool
 config HIBERNATION
        bool "Hibernation (aka 'suspend to disk')"
        depends on SWAP && ARCH_HIBERNATION_POSSIBLE
+        select HIBERNATE_CALLBACKS
        select LZO_COMPRESS
        select LZO_DECOMPRESS
        ---help---
@@ -85,7 +89,7 @@ config PM_STD_PARTITION
 config PM_SLEEP
        def_bool y
-        depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
+        depends on SUSPEND || HIBERNATE_CALLBACKS
 config PM_SLEEP_SMP
        def_bool y
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index c350e18b53e3..c5ebc6a90643 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,4 +1,5 @@
-ccflags-$(CONFIG_PM_DEBUG)      :=      -DDEBUG
+ccflags-$(CONFIG_PM_DEBUG)      := -DDEBUG
 obj-$(CONFIG_PM)                += main.o
 obj-$(CONFIG_PM_SLEEP)          += console.o
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
index 83bbc7c02df9..d09dd10c5a5e 100644
--- a/kernel/power/block_io.c
+++ b/kernel/power/block_io.c
@@ -28,7 +28,7 @@
 static int submit(int rw, struct block_device *bdev, sector_t sector,
                struct page *page, struct bio **bio_chain)
 {
-        const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG;
+        const int bio_rw = rw | REQ_SYNC;
        struct bio *bio;
        bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index aeabd26e3342..50aae660174d 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -273,8 +273,11 @@ static int create_image(int platform_mode)
        local_irq_disable();
        error = sysdev_suspend(PMSG_FREEZE);
-        if (!error)
+        if (!error) {
                error = syscore_suspend();
+                if (error)
+                        sysdev_resume();
+        }
        if (error) {
                printk(KERN_ERR "PM: Some system devices failed to power down, "
                        "aborting hibernation\n");
@@ -407,8 +410,11 @@ static int resume_target_kernel(bool platform_mode)
        local_irq_disable();
        error = sysdev_suspend(PMSG_QUIESCE);
-        if (!error)
+        if (!error) {
                error = syscore_suspend();
+                if (error)
+                        sysdev_resume();
+        }
        if (error)
                goto Enable_irqs;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 8eaba5f27b10..de9aef8742f4 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -224,7 +224,7 @@ power_attr(state);
 * writing to 'state'.  It first should read from 'wakeup_count' and store
 * the read value.  Then, after carrying out its own preparations for the system
 * transition to a sleep state, it should write the stored value to
- * 'wakeup_count'.  If that fails, at least one wakeup event has occured since
+ * 'wakeup_count'.  If that fails, at least one wakeup event has occurred since
 * 'wakeup_count' was read and 'state' should not be written to.  Otherwise, it
 * is allowed to write to 'state', but the transition will be aborted if there
 * are any wakeup events detected after 'wakeup_count' was written to.
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 2814c32aed51..8935369d503a 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -164,8 +164,11 @@ static int suspend_enter(suspend_state_t state)
        BUG_ON(!irqs_disabled());
        error = sysdev_suspend(PMSG_SUSPEND);
-        if (!error)
+        if (!error) {
                error = syscore_suspend();
+                if (error)
+                        sysdev_resume();
+        }
        if (!error) {
                if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
                        error = suspend_ops->enter(state);
diff --git a/kernel/printk.c b/kernel/printk.c
index 33284adb2189..da8ca817eae3 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -53,7 +53,7 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
 #define __LOG_BUF_LEN   (1 << CONFIG_LOG_BUF_SHIFT)
 /* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
+#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
 /* We show everything that is MORE important than this.. */
 #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
@@ -113,6 +113,11 @@ static unsigned con_start;	/* Index into log_buf: next char to be sent to consol
 static unsigned log_end;        /* Index into log_buf: most-recently-written-char + 1 */
 /*
+ * If exclusive_console is non-NULL then only this console is to be printed to.
+ */
+static struct console *exclusive_console;
+/*
 *      Array of consoles built from command line options (console=)
 */
 struct console_cmdline
@@ -476,6 +481,8 @@ static void __call_console_drivers(unsigned start, unsigned end)
        struct console *con;
        for_each_console(con) {
+                if (exclusive_console && con != exclusive_console)
+                        continue;
                if ((con->flags & CON_ENABLED) && con->write &&
                                (cpu_online(smp_processor_id()) ||
                                (con->flags & CON_ANYTIME)))
@@ -1230,6 +1237,11 @@ void console_unlock(void)
                local_irq_restore(flags);
        }
        console_locked = 0;
+        /* Release the exclusive_console once it is used */
+        if (unlikely(exclusive_console))
+                exclusive_console = NULL;
        up(&console_sem);
        spin_unlock_irqrestore(&logbuf_lock, flags);
        if (wake_klogd)
@@ -1316,6 +1328,18 @@ void console_start(struct console *console)
 }
 EXPORT_SYMBOL(console_start);
+static int __read_mostly keep_bootcon;
+static int __init keep_bootcon_setup(char *str)
+{
+        keep_bootcon = 1;
+        printk(KERN_INFO "debug: skip boot console de-registration.\n");
+        return 0;
+}
+early_param("keep_bootcon", keep_bootcon_setup);
 /*
 * The console driver calls this routine during kernel initialization
 * to register the console printing procedure with printk() and to
@@ -1452,6 +1476,12 @@ void register_console(struct console *newcon)
                spin_lock_irqsave(&logbuf_lock, flags);
                con_start = log_start;
                spin_unlock_irqrestore(&logbuf_lock, flags);
+                /*
+                 * We're about to replay the log buffer.  Only do this to the
+                 * just-registered console to avoid excessive message spam to
+                 * the already-registered consoles.
+                 */
+                exclusive_console = newcon;
        }
        console_unlock();
        console_sysfs_notify();
@@ -1463,7 +1493,9 @@ void register_console(struct console *newcon)
         * users know there might be something in the kernel's log buffer that
         * went to the bootconsole (that they do not see on the real console)
         */
-        if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
+        if (bcon &&
+            ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
+            !keep_bootcon) {
                /* we need to iterate through twice, to make sure we print
                 * everything out, before we unregister the console(s)
                 */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e2302e40b360..0fc1eed28d27 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -134,21 +134,24 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
                return 0;
        rcu_read_lock();
        tcred = __task_cred(task);
-        if ((cred->uid != tcred->euid ||
+        if (cred->user->user_ns == tcred->user->user_ns &&
-             cred->uid != tcred->suid ||
+            (cred->uid == tcred->euid &&
-             cred->uid != tcred->uid  ||
+             cred->uid == tcred->suid &&
-             cred->gid != tcred->egid ||
+             cred->uid == tcred->uid  &&
-             cred->gid != tcred->sgid ||
+             cred->gid == tcred->egid &&
-             cred->gid != tcred->gid) &&
+             cred->gid == tcred->sgid &&
-            !capable(CAP_SYS_PTRACE)) {
+             cred->gid == tcred->gid))
-                rcu_read_unlock();
+                goto ok;
-                return -EPERM;
+        if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
-        }
+                goto ok;
+        rcu_read_unlock();
+        return -EPERM;
+ok:
        rcu_read_unlock();
        smp_rmb();
        if (task->mm)
                dumpable = get_dumpable(task->mm);
-        if (!dumpable && !capable(CAP_SYS_PTRACE))
+        if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
                return -EPERM;
        return security_ptrace_access_check(task, mode);
@@ -198,7 +201,7 @@ static int ptrace_attach(struct task_struct *task)
                goto unlock_tasklist;
        task->ptrace = PT_PTRACED;
-        if (capable(CAP_SYS_PTRACE))
+        if (task_ns_capable(task, CAP_SYS_PTRACE))
                task->ptrace |= PT_PTRACE_CAP;
        __ptrace_link(task, current);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index c7eaa37a768b..34683efa2cce 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -126,10 +126,24 @@ ssize_t res_counter_read(struct res_counter *counter, int member,
                        pos, buf, s - buf);
 }
+#if BITS_PER_LONG == 32
+u64 res_counter_read_u64(struct res_counter *counter, int member)
+{
+        unsigned long flags;
+        u64 ret;
+        spin_lock_irqsave(&counter->lock, flags);
+        ret = *res_counter_member(counter, member);
+        spin_unlock_irqrestore(&counter->lock, flags);
+        return ret;
+}
+#else
 u64 res_counter_read_u64(struct res_counter *counter, int member)
 {
        return *res_counter_member(counter, member);
 }
+#endif
 int res_counter_memparse_write_strategy(const char *buf,
                                        unsigned long long *res)
diff --git a/kernel/sched.c b/kernel/sched.c
index a172494a9a63..312f8b95c2d4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2309,7 +2309,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 * Cause a process which is running on another CPU to enter
 * kernel-mode, without any delay. (to get signals handled.)
 *
- * NOTE: this function doesnt have to take the runqueue lock,
+ * NOTE: this function doesn't have to take the runqueue lock,
 * because all it wants to ensure is that the remote task enters
 * the kernel. If the IPI races and the task has been migrated
 * to another CPU then no harm is done and the purpose has been
@@ -4111,6 +4111,16 @@ need_resched:
                                        try_to_wake_up_local(to_wakeup);
                        }
                        deactivate_task(rq, prev, DEQUEUE_SLEEP);
+                        /*
+                         * If we are going to sleep and we have plugged IO queued, make
+                         * sure to submit it to avoid deadlocks.
+                         */
+                        if (blk_needs_flush_plug(prev)) {
+                                raw_spin_unlock(&rq->lock);
+                                blk_schedule_flush_plug(prev);
+                                raw_spin_lock(&rq->lock);
+                        }
                }
                switch_count = &prev->nvcsw;
        }
@@ -4892,8 +4902,11 @@ static bool check_same_owner(struct task_struct *p)
        rcu_read_lock();
        pcred = __task_cred(p);
-        match = (cred->euid == pcred->euid ||
+        if (cred->user->user_ns == pcred->user->user_ns)
-                 cred->euid == pcred->uid);
+                match = (cred->euid == pcred->euid ||
+                         cred->euid == pcred->uid);
+        else
+                match = false;
        rcu_read_unlock();
        return match;
 }
@@ -4984,7 +4997,7 @@ recheck:
         */
        raw_spin_lock_irqsave(&p->pi_lock, flags);
        /*
-         * To be able to change p->policy safely, the apropriate
+         * To be able to change p->policy safely, the appropriate
         * runqueue lock must be held.
         */
        rq = __task_rq_lock(p);
@@ -4998,6 +5011,17 @@ recheck:
                return -EINVAL;
        }
+        /*
+         * If not changing anything there's no need to proceed further:
+         */
+        if (unlikely(policy == p->policy && (!rt_policy(policy) ||
+                        param->sched_priority == p->rt_priority))) {
+                __task_rq_unlock(rq);
+                raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+                return 0;
+        }
 #ifdef CONFIG_RT_GROUP_SCHED
        if (user) {
                /*
@@ -5221,7 +5245,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
                goto out_free_cpus_allowed;
        }
        retval = -EPERM;
-        if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
+        if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
                goto out_unlock;
        retval = security_task_setscheduler(p);
@@ -5460,6 +5484,8 @@ EXPORT_SYMBOL(yield);
 * yield_to - yield the current processor to another thread in
 * your thread group, or accelerate that thread toward the
 * processor it's on.
+ * @p: target task
+ * @preempt: whether task preemption is allowed or not
 *
 * It's the caller's job to ensure that the target task struct
 * can't go away on us before we can do any checks.
@@ -5525,6 +5551,7 @@ void __sched io_schedule(void)
        delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
+        blk_flush_plug(current);
        current->in_iowait = 1;
        schedule();
        current->in_iowait = 0;
@@ -5540,6 +5567,7 @@ long __sched io_schedule_timeout(long timeout)
        delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
+        blk_flush_plug(current);
        current->in_iowait = 1;
        ret = schedule_timeout(timeout);
        current->in_iowait = 0;
@@ -5688,7 +5716,7 @@ void show_state_filter(unsigned long state_filter)
        do_each_thread(g, p) {
                /*
                 * reset the NMI-timeout, listing all files on a slow
-                 * console might take alot of time:
+                 * console might take a lot of time:
                 */
                touch_nmi_watchdog();
                if (!state_filter || (p->state & state_filter))
@@ -6303,6 +6331,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                break;
 #endif
        }
+        update_max_interval();
        return NOTIFY_OK;
 }
@@ -8434,7 +8465,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 {
        struct cfs_rq *cfs_rq;
        struct sched_entity *se;
-        struct rq *rq;
        int i;
        tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8447,8 +8477,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
        tg->shares = NICE_0_LOAD;
        for_each_possible_cpu(i) {
-                rq = cpu_rq(i);
                cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
                                      GFP_KERNEL, cpu_to_node(i));
                if (!cfs_rq)
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 5946ac515602..429242f3c484 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -179,7 +179,7 @@ void sched_autogroup_create_attach(struct task_struct *p)
        struct autogroup *ag = autogroup_create();
        autogroup_move_group(p, ag);
-        /* drop extra refrence added by autogroup_create() */
+        /* drop extra reference added by autogroup_create() */
        autogroup_kref_put(ag);
 }
 EXPORT_SYMBOL(sched_autogroup_create_attach);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3f7ec9e27ee1..6fa833ab2cb8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -22,6 +22,7 @@
 #include <linux/latencytop.h>
 #include <linux/sched.h>
+#include <linux/cpumask.h>
 /*
 * Targeted preemption latency for CPU-bound tasks:
@@ -2103,21 +2104,20 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
              enum cpu_idle_type idle, int *all_pinned,
              int *this_best_prio, struct cfs_rq *busiest_cfs_rq)
 {
-        int loops = 0, pulled = 0, pinned = 0;
+        int loops = 0, pulled = 0;
        long rem_load_move = max_load_move;
        struct task_struct *p, *n;
        if (max_load_move == 0)
                goto out;
-        pinned = 1;
        list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) {
                if (loops++ > sysctl_sched_nr_migrate)
                        break;
                if ((p->se.load.weight >> 1) > rem_load_move ||
-                    !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned))
+                    !can_migrate_task(p, busiest, this_cpu, sd, idle,
+                                      all_pinned))
                        continue;
                pull_task(busiest, p, this_rq, this_cpu);
@@ -2152,9 +2152,6 @@ out:
         */
        schedstat_add(sd, lb_gained[idle], pulled);
-        if (all_pinned)
-                *all_pinned = pinned;
        return max_load_move - rem_load_move;
 }
@@ -3061,7 +3058,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
        /*
         * if *imbalance is less than the average load per runnable task
-         * there is no gaurantee that any tasks will be moved so we'll have
+         * there is no guarantee that any tasks will be moved so we'll have
         * a think about bumping its value to force at least one task to be
         * moved
         */
@@ -3126,6 +3123,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
        if (!sds.busiest || sds.busiest_nr_running == 0)
                goto out_balanced;
+        sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
        /*
         * If the busiest group is imbalanced the below checks don't
         * work because they assumes all things are equal, which typically
@@ -3150,7 +3149,6 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
         * Don't pull any tasks if this group is already above the domain
         * average load.
         */
-        sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
        if (sds.this_load >= sds.avg_load)
                goto out_balanced;
@@ -3339,6 +3337,7 @@ redo:
                 * still unbalanced. ld_moved simply stays zero, so it is
                 * correctly treated as an imbalance.
                 */
+                all_pinned = 1;
                local_irq_save(flags);
                double_rq_lock(this_rq, busiest);
                ld_moved = move_tasks(this_rq, this_cpu, busiest,
@@ -3819,6 +3818,17 @@ void select_nohz_load_balancer(int stop_tick)
 static DEFINE_SPINLOCK(balancing);
+static unsigned long __read_mostly max_load_balance_interval = HZ/10;
+/*
+ * Scale the max load_balance interval with the number of CPUs in the system.
+ * This trades load-balance latency on larger machines for less cross talk.
+ */
+static void update_max_interval(void)
+{
+        max_load_balance_interval = HZ*num_online_cpus()/10;
+}
 /*
 * It checks each scheduling domain to see if it is due to be balanced,
 * and initiates a balancing operation if so.
@@ -3848,10 +3858,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
                /* scale ms to jiffies */
                interval = msecs_to_jiffies(interval);
-                if (unlikely(!interval))
+                interval = clamp(interval, 1UL, max_load_balance_interval);
-                        interval = 1;
-                if (interval > HZ*NR_CPUS/10)
-                        interval = HZ*NR_CPUS/10;
                need_serialize = sd->flags & SD_SERIALIZE;
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index c82f26c1b7c3..a776a6396427 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -94,6 +94,4 @@ static const struct sched_class idle_sched_class = {
        .prio_changed           = prio_changed_idle,
        .switched_to            = switched_to_idle,
-        /* no .task_new for idle tasks */
 };
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index db308cb08b75..e7cebdc65f82 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1378,7 +1378,7 @@ retry:
                task = pick_next_pushable_task(rq);
                if (task_cpu(next_task) == rq->cpu && task == next_task) {
                        /*
-                         * If we get here, the task hasnt moved at all, but
+                         * If we get here, the task hasn't moved at all, but
                         * it has failed to push.  We will not try again,
                         * since the other cpus will pull from us when they
                         * are ready.
@@ -1488,7 +1488,7 @@ static int pull_rt_task(struct rq *this_rq)
                        /*
                         * We continue with the search, just in
                         * case there's an even higher prio task
-                         * in another runqueue. (low likelyhood
+                         * in another runqueue. (low likelihood
                         * but possible)
                         */
                }
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 84ec9bcf82d9..1ba2bd40fdac 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -102,6 +102,4 @@ static const struct sched_class stop_sched_class = {
        .prio_changed           = prio_changed_stop,
        .switched_to            = switched_to_stop,
-        /* no .task_new for stop tasks */
 };
diff --git a/kernel/signal.c b/kernel/signal.c
index 4e3cff10fdce..7165af5f1b11 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -226,7 +226,7 @@ static inline void print_dropped_signal(int sig)
 /*
 * allocate a new signal queue record
 * - this may be called without locks if and only if t == current, otherwise an
- *   appopriate lock must be held to stop the target task from exiting
+ *   appropriate lock must be held to stop the target task from exiting
 */
 static struct sigqueue *
 __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
@@ -375,15 +375,15 @@ int unhandled_signal(struct task_struct *tsk, int sig)
        return !tracehook_consider_fatal_signal(tsk, sig);
 }
+/*
-/* Notify the system that a driver wants to block all signals for this
+ * Notify the system that a driver wants to block all signals for this
 * process, and wants to be notified if any signals at all were to be
 * sent/acted upon.  If the notifier routine returns non-zero, then the
 * signal will be acted upon after all.  If the notifier routine returns 0,
 * then then signal will be blocked.  Only one block per process is
 * allowed.  priv is a pointer to private data that the notifier routine
- * can use to determine if the signal should be blocked or not.  */
+ * can use to determine if the signal should be blocked or not.
+ */
 void
 block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
 {
@@ -434,9 +434,10 @@ still_pending:
                copy_siginfo(info, &first->info);
                __sigqueue_free(first);
        } else {
-                /* Ok, it wasn't in the queue.  This must be
+                /*
-                   a fast-pathed signal or we must have been
+                 * Ok, it wasn't in the queue.  This must be
-                   out of queue space.  So zero out the info.
+                 * a fast-pathed signal or we must have been
+                 * out of queue space.  So zero out the info.
                 */
                info->si_signo = sig;
                info->si_errno = 0;
@@ -468,7 +469,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
 }
 /*
- * Dequeue a signal and return the element to the caller, which is 
+ * Dequeue a signal and return the element to the caller, which is
 * expected to free it.
 *
 * All callers have to hold the siglock.
@@ -490,7 +491,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
                 * itimers are process shared and we restart periodic
                 * itimers in the signal delivery path to prevent DoS
                 * attacks in the high resolution timer case. This is
-                 * compliant with the old way of self restarting
+                 * compliant with the old way of self-restarting
                 * itimers, as the SIGALRM is a legacy signal and only
                 * queued once. Changing the restart behaviour to
                 * restart the timer in the signal dequeue path is
@@ -636,13 +637,33 @@ static inline bool si_fromuser(const struct siginfo *info)
 }
 /*
+ * called with RCU read lock from check_kill_permission()
+ */
+static int kill_ok_by_cred(struct task_struct *t)
+{
+        const struct cred *cred = current_cred();
+        const struct cred *tcred = __task_cred(t);
+        if (cred->user->user_ns == tcred->user->user_ns &&
+            (cred->euid == tcred->suid ||
+             cred->euid == tcred->uid ||
+             cred->uid  == tcred->suid ||
+             cred->uid  == tcred->uid))
+                return 1;
+        if (ns_capable(tcred->user->user_ns, CAP_KILL))
+                return 1;
+        return 0;
+}
+/*
 * Bad permissions for sending the signal
 * - the caller must hold the RCU read lock
 */
 static int check_kill_permission(int sig, struct siginfo *info,
                                 struct task_struct *t)
 {
-        const struct cred *cred, *tcred;
        struct pid *sid;
        int error;
@@ -656,14 +677,8 @@ static int check_kill_permission(int sig, struct siginfo *info,
        if (error)
                return error;
-        cred = current_cred();
-        tcred = __task_cred(t);
        if (!same_thread_group(current, t) &&
-            (cred->euid ^ tcred->suid) &&
+            !kill_ok_by_cred(t)) {
-            (cred->euid ^ tcred->uid) &&
-            (cred->uid  ^ tcred->suid) &&
-            (cred->uid  ^ tcred->uid) &&
-            !capable(CAP_KILL)) {
                switch (sig) {
                case SIGCONT:
                        sid = task_session(t);
@@ -909,14 +924,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
        if (info == SEND_SIG_FORCED)
                goto out_set;
-        /* Real-time signals must be queued if sent by sigqueue, or
+        /*
-           some other real-time mechanism.  It is implementation
+         * Real-time signals must be queued if sent by sigqueue, or
-           defined whether kill() does so.  We attempt to do so, on
+         * some other real-time mechanism.  It is implementation
-           the principle of least surprise, but since kill is not
+         * defined whether kill() does so.  We attempt to do so, on
-           allowed to fail with EAGAIN when low on memory we just
+         * the principle of least surprise, but since kill is not
-           make sure at least one signal gets delivered and don't
+         * allowed to fail with EAGAIN when low on memory we just
-           pass on the info struct.  */
+         * make sure at least one signal gets delivered and don't
+         * pass on the info struct.
+         */
        if (sig < SIGRTMIN)
                override_rlimit = (is_si_special(info) || info->si_code >= 0);
        else
@@ -1187,8 +1203,7 @@ retry:
        return error;
 }
-int
+int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
-kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 {
        int error;
        rcu_read_lock();
@@ -1285,8 +1300,7 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
 * These are for backward compatibility with the rest of the kernel source.
 */
-int
+int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
-send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
        /*
         * Make sure legacy kernel users don't send in bad values
@@ -1354,7 +1368,7 @@ EXPORT_SYMBOL(kill_pid);
 * These functions support sending signals using preallocated sigqueue
 * structures.  This is needed "because realtime applications cannot
 * afford to lose notifications of asynchronous events, like timer
- * expirations or I/O completions".  In the case of Posix Timers
+ * expirations or I/O completions".  In the case of POSIX Timers
 * we allocate the sigqueue structure from the timer_create.  If this
 * allocation fails we are able to report the failure to the application
 * with an EAGAIN error.
@@ -1539,7 +1553,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
        info.si_signo = SIGCHLD;
        info.si_errno = 0;
        /*
-         * see comment in do_notify_parent() abot the following 3 lines
+         * see comment in do_notify_parent() about the following 4 lines
         */
        rcu_read_lock();
        info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
@@ -1597,7 +1611,7 @@ static inline int may_ptrace_stop(void)
 }
 /*
- * Return nonzero if there is a SIGKILL that should be waking us up.
+ * Return non-zero if there is a SIGKILL that should be waking us up.
 * Called with the siglock held.
 */
 static int sigkill_pending(struct task_struct *tsk)
@@ -1721,7 +1735,7 @@ void ptrace_notify(int exit_code)
 /*
 * This performs the stopping for SIGSTOP and other stop signals.
 * We have to stop all threads in the thread group.
- * Returns nonzero if we've actually stopped and released the siglock.
+ * Returns non-zero if we've actually stopped and released the siglock.
 * Returns zero if we didn't stop and still hold the siglock.
 */
 static int do_signal_stop(int signr)
@@ -1809,10 +1823,12 @@ static int ptrace_signal(int signr, siginfo_t *info,
        current->exit_code = 0;
-        /* Update the siginfo structure if the signal has
+        /*
-           changed.  If the debugger wanted something
+         * Update the siginfo structure if the signal has
-           specific in the siginfo structure then it should
+         * changed.  If the debugger wanted something
-           have updated *info via PTRACE_SETSIGINFO.  */
+         * specific in the siginfo structure then it should
+         * have updated *info via PTRACE_SETSIGINFO.
+         */
        if (signr != info->si_signo) {
                info->si_signo = signr;
                info->si_errno = 0;
@@ -1871,7 +1887,7 @@ relock:
        for (;;) {
                struct k_sigaction *ka;
                /*
-                 * Tracing can induce an artifical signal and choose sigaction.
+                 * Tracing can induce an artificial signal and choose sigaction.
                 * The return value in @signr determines the default action,
                 * but @info->si_signo is the signal number we will report.
                 */
@@ -2020,7 +2036,8 @@ void exit_signals(struct task_struct *tsk)
        if (!signal_pending(tsk))
                goto out;
-        /* It could be that __group_complete_signal() choose us to
+        /*
+         * It could be that __group_complete_signal() choose us to
         * notify about group-wide signal. Another thread should be
         * woken now to take the signal since we will not.
         */
@@ -2058,6 +2075,9 @@ EXPORT_SYMBOL(unblock_all_signals);
 * System call entry points.
 */
+/**
+ *  sys_restart_syscall - restart a system call
+ */
 SYSCALL_DEFINE0(restart_syscall)
 {
        struct restart_block *restart = &current_thread_info()->restart_block;
@@ -2111,6 +2131,13 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
        return error;
 }
+/**
+ *  sys_rt_sigprocmask - change the list of currently blocked signals
+ *  @how: whether to add, remove, or set signals
+ *  @set: stores pending signals
+ *  @oset: previous value of signal mask if non-null
+ *  @sigsetsize: size of sigset_t type
+ */
 SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
                sigset_t __user *, oset, size_t, sigsetsize)
 {
@@ -2169,8 +2196,14 @@ long do_sigpending(void __user *set, unsigned long sigsetsize)
 out:
        return error;
-}       
+}
+/**
+ *  sys_rt_sigpending - examine a pending signal that has been raised
+ *                      while blocked
+ *  @set: stores pending signals
+ *  @sigsetsize: size of sigset_t type or larger
+ */
 SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
 {
        return do_sigpending(set, sigsetsize);
@@ -2219,9 +2252,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
                err |= __put_user(from->si_trapno, &to->si_trapno);
 #endif
 #ifdef BUS_MCEERR_AO
-                /* 
+                /*
                 * Other callers might not initialize the si_lsb field,
-                 * so check explicitely for the right codes here.
+                 * so check explicitly for the right codes here.
                 */
                if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
                        err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
@@ -2250,6 +2283,14 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
 #endif
+/**
+ *  sys_rt_sigtimedwait - synchronously wait for queued signals specified
+ *                      in @uthese
+ *  @uthese: queued signals to wait for
+ *  @uinfo: if non-null, the signal's siginfo is returned here
+ *  @uts: upper bound on process time suspension
+ *  @sigsetsize: size of sigset_t type
+ */
 SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
                siginfo_t __user *, uinfo, const struct timespec __user *, uts,
                size_t, sigsetsize)
@@ -2266,7 +2307,7 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
        if (copy_from_user(&these, uthese, sizeof(these)))
                return -EFAULT;
-                
        /*
         * Invert the set of allowed signals to get those we
         * want to block.
@@ -2291,9 +2332,11 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
                                   + (ts.tv_sec || ts.tv_nsec));
                if (timeout) {
-                        /* None ready -- temporarily unblock those we're
+                        /*
+                         * None ready -- temporarily unblock those we're
                         * interested while we are sleeping in so that we'll
-                         * be awakened when they arrive.  */
+                         * be awakened when they arrive.
+                         */
                        current->real_blocked = current->blocked;
                        sigandsets(&current->blocked, &current->blocked, &these);
                        recalc_sigpending();
@@ -2325,6 +2368,11 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
        return ret;
 }
+/**
+ *  sys_kill - send a signal to a process
+ *  @pid: the PID of the process
+ *  @sig: signal to be sent
+ */
 SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
 {
        struct siginfo info;
@@ -2400,7 +2448,11 @@ SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
        return do_tkill(tgid, pid, sig);
 }
-/*
+/**
+ *  sys_tkill - send signal to one specific task
+ *  @pid: the PID of the task
+ *  @sig: signal to be sent
+ *
 *  Send a signal to only one task, even if it's a CLONE_THREAD task.
 */
 SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
@@ -2412,6 +2464,12 @@ SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
        return do_tkill(0, pid, sig);
 }
+/**
+ *  sys_rt_sigqueueinfo - send signal information to a signal
+ *  @pid: the PID of the thread
+ *  @sig: signal to be sent
+ *  @uinfo: signal info to be sent
+ */
 SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
                siginfo_t __user *, uinfo)
 {
@@ -2421,9 +2479,13 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
                return -EFAULT;
        /* Not even root can pretend to send signals from the kernel.
-           Nor can they impersonate a kill(), which adds source info.  */
+         * Nor can they impersonate a kill()/tgkill(), which adds source info.
-        if (info.si_code >= 0)
+         */
+        if (info.si_code >= 0 || info.si_code == SI_TKILL) {
+                /* We used to allow any < 0 si_code */
+                WARN_ON_ONCE(info.si_code < 0);
                return -EPERM;
+        }
        info.si_signo = sig;
        /* POSIX.1b doesn't mention process groups.  */
@@ -2437,9 +2499,13 @@ long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
                return -EINVAL;
        /* Not even root can pretend to send signals from the kernel.
-           Nor can they impersonate a kill(), which adds source info.  */
+         * Nor can they impersonate a kill()/tgkill(), which adds source info.
-        if (info->si_code >= 0)
+         */
+        if (info->si_code >= 0 || info->si_code == SI_TKILL) {
+                /* We used to allow any < 0 si_code */
+                WARN_ON_ONCE(info->si_code < 0);
                return -EPERM;
+        }
        info->si_signo = sig;
        return do_send_specific(tgid, pid, sig, info);
@@ -2531,12 +2597,11 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
                error = -EINVAL;
                /*
-                 *
+                 * Note - this code used to test ss_flags incorrectly:
-                 * Note - this code used to test ss_flags incorrectly
                 *        old code may have been written using ss_flags==0
                 *        to mean ss_flags==SS_ONSTACK (as this was the only
                 *        way that worked) - this fix preserves that older
-                 *        mechanism
+                 *        mechanism.
                 */
                if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
                        goto out;
@@ -2570,6 +2635,10 @@ out:
 #ifdef __ARCH_WANT_SYS_SIGPENDING
+/**
+ *  sys_sigpending - examine pending signals
+ *  @set: where mask of pending signal is returned
+ */
 SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
 {
        return do_sigpending(set, sizeof(*set));
@@ -2578,8 +2647,15 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
 #endif
 #ifdef __ARCH_WANT_SYS_SIGPROCMASK
-/* Some platforms have their own version with special arguments others
+/**
-   support only sys_rt_sigprocmask.  */
+ *  sys_sigprocmask - examine and change blocked signals
+ *  @how: whether to add, remove, or set signals
+ *  @set: signals to add or remove (if non-null)
+ *  @oset: previous value of signal mask if non-null
+ *
+ * Some platforms have their own version with special arguments;
+ * others support only sys_rt_sigprocmask.
+ */
 SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
                old_sigset_t __user *, oset)
@@ -2632,6 +2708,13 @@ out:
 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
 #ifdef __ARCH_WANT_SYS_RT_SIGACTION
+/**
+ *  sys_rt_sigaction - alter an action taken by a process
+ *  @sig: signal to be sent
+ *  @act: new sigaction
+ *  @oact: used to save the previous sigaction
+ *  @sigsetsize: size of sigset_t type
+ */
 SYSCALL_DEFINE4(rt_sigaction, int, sig,
                const struct sigaction __user *, act,
                struct sigaction __user *, oact,
@@ -2718,6 +2801,12 @@ SYSCALL_DEFINE0(pause)
 #endif
 #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
+/**
+ *  sys_rt_sigsuspend - replace the signal mask for a value with the
+ *      @unewset value until a signal is received
+ *  @unewset: new signal mask value
+ *  @sigsetsize: size of sigset_t type
+ */
 SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
 {
        sigset_t newset;
diff --git a/kernel/smp.c b/kernel/smp.c
index 7cbd0f293df4..73a195193558 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -604,6 +604,87 @@ void ipi_call_unlock_irq(void)
 }
 #endif /* USE_GENERIC_SMP_HELPERS */
+/* Setup configured maximum number of CPUs to activate */
+unsigned int setup_max_cpus = NR_CPUS;
+EXPORT_SYMBOL(setup_max_cpus);
+/*
+ * Setup routine for controlling SMP activation
+ *
+ * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
+ * activation entirely (the MPS table probe still happens, though).
+ *
+ * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
+ * greater than 0, limits the maximum number of CPUs activated in
+ * SMP mode to <NUM>.
+ */
+void __weak arch_disable_smp_support(void) { }
+static int __init nosmp(char *str)
+{
+        setup_max_cpus = 0;
+        arch_disable_smp_support();
+        return 0;
+}
+early_param("nosmp", nosmp);
+/* this is hard limit */
+static int __init nrcpus(char *str)
+{
+        int nr_cpus;
+        get_option(&str, &nr_cpus);
+        if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
+                nr_cpu_ids = nr_cpus;
+        return 0;
+}
+early_param("nr_cpus", nrcpus);
+static int __init maxcpus(char *str)
+{
+        get_option(&str, &setup_max_cpus);
+        if (setup_max_cpus == 0)
+                arch_disable_smp_support();
+        return 0;
+}
+early_param("maxcpus", maxcpus);
+/* Setup number of possible processor ids */
+int nr_cpu_ids __read_mostly = NR_CPUS;
+EXPORT_SYMBOL(nr_cpu_ids);
+/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
+void __init setup_nr_cpu_ids(void)
+{
+        nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
+}
+/* Called by boot processor to activate the rest. */
+void __init smp_init(void)
+{
+        unsigned int cpu;
+        /* FIXME: This should be done in userspace --RR */
+        for_each_present_cpu(cpu) {
+                if (num_online_cpus() >= setup_max_cpus)
+                        break;
+                if (!cpu_online(cpu))
+                        cpu_up(cpu);
+        }
+        /* Any cleanup work */
+        printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
+        smp_cpus_done(setup_max_cpus);
+}
 /*
 * Call a function on all processors.  May be used during early boot while
 * early_boot_irqs_disabled is set.  Use local_irq_save/restore() instead
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 56e5dec837f0..174f976c2874 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -567,7 +567,7 @@ static void __tasklet_hrtimer_trampoline(unsigned long data)
 /**
 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
 * @ttimer:      tasklet_hrtimer which is initialized
- * @function:    hrtimer callback funtion which gets called from softirq context
+ * @function:    hrtimer callback function which gets called from softirq context
 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
 * @mode:        hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
 */
@@ -845,7 +845,10 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-                p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+                p = kthread_create_on_node(run_ksoftirqd,
+                                           hcpu,
+                                           cpu_to_node(hotcpu),
+                                           "ksoftirqd/%d", hotcpu);
                if (IS_ERR(p)) {
                        printk("ksoftirqd for %i failed\n", hotcpu);
                        return notifier_from_errno(PTR_ERR(p));
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2df820b03beb..e3516b29076c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -301,8 +301,10 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
        case CPU_UP_PREPARE:
                BUG_ON(stopper->thread || stopper->enabled ||
                       !list_empty(&stopper->works));
-                p = kthread_create(cpu_stopper_thread, stopper, "migration/%d",
+                p = kthread_create_on_node(cpu_stopper_thread,
-                                   cpu);
+                                           stopper,
+                                           cpu_to_node(cpu),
+                                           "migration/%d", cpu);
                if (IS_ERR(p))
                        return notifier_from_errno(PTR_ERR(p));
                get_task_struct(p);
diff --git a/kernel/sys.c b/kernel/sys.c
index 1ad48b3b9068..af468edf096a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -120,16 +120,33 @@ EXPORT_SYMBOL(cad_pid);
 void (*pm_power_off_prepare)(void);
 /*
+ * Returns true if current's euid is same as p's uid or euid,
+ * or has CAP_SYS_NICE to p's user_ns.
+ *
+ * Called with rcu_read_lock, creds are safe
+ */
+static bool set_one_prio_perm(struct task_struct *p)
+{
+        const struct cred *cred = current_cred(), *pcred = __task_cred(p);
+        if (pcred->user->user_ns == cred->user->user_ns &&
+            (pcred->uid  == cred->euid ||
+             pcred->euid == cred->euid))
+                return true;
+        if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE))
+                return true;
+        return false;
+}
+/*
 * set the priority of a task
 * - the caller must hold the RCU read lock
 */
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
-        const struct cred *cred = current_cred(), *pcred = __task_cred(p);
        int no_nice;
-        if (pcred->uid  != cred->euid &&
+        if (!set_one_prio_perm(p)) {
-            pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
                error = -EPERM;
                goto out;
        }
@@ -506,7 +523,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
        if (rgid != (gid_t) -1) {
                if (old->gid == rgid ||
                    old->egid == rgid ||
-                    capable(CAP_SETGID))
+                    nsown_capable(CAP_SETGID))
                        new->gid = rgid;
                else
                        goto error;
@@ -515,7 +532,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
                if (old->gid == egid ||
                    old->egid == egid ||
                    old->sgid == egid ||
-                    capable(CAP_SETGID))
+                    nsown_capable(CAP_SETGID))
                        new->egid = egid;
                else
                        goto error;
@@ -550,7 +567,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
        old = current_cred();
        retval = -EPERM;
-        if (capable(CAP_SETGID))
+        if (nsown_capable(CAP_SETGID))
                new->gid = new->egid = new->sgid = new->fsgid = gid;
        else if (gid == old->gid || gid == old->sgid)
                new->egid = new->fsgid = gid;
@@ -617,7 +634,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
                new->uid = ruid;
                if (old->uid != ruid &&
                    old->euid != ruid &&
-                    !capable(CAP_SETUID))
+                    !nsown_capable(CAP_SETUID))
                        goto error;
        }
@@ -626,7 +643,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
                if (old->uid != euid &&
                    old->euid != euid &&
                    old->suid != euid &&
-                    !capable(CAP_SETUID))
+                    !nsown_capable(CAP_SETUID))
                        goto error;
        }
@@ -674,7 +691,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
        old = current_cred();
        retval = -EPERM;
-        if (capable(CAP_SETUID)) {
+        if (nsown_capable(CAP_SETUID)) {
                new->suid = new->uid = uid;
                if (uid != old->uid) {
                        retval = set_user(new);
@@ -716,7 +733,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
        old = current_cred();
        retval = -EPERM;
-        if (!capable(CAP_SETUID)) {
+        if (!nsown_capable(CAP_SETUID)) {
                if (ruid != (uid_t) -1 && ruid != old->uid &&
                    ruid != old->euid  && ruid != old->suid)
                        goto error;
@@ -780,7 +797,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
        old = current_cred();
        retval = -EPERM;
-        if (!capable(CAP_SETGID)) {
+        if (!nsown_capable(CAP_SETGID)) {
                if (rgid != (gid_t) -1 && rgid != old->gid &&
                    rgid != old->egid  && rgid != old->sgid)
                        goto error;
@@ -840,7 +857,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
        if (uid == old->uid  || uid == old->euid  ||
            uid == old->suid || uid == old->fsuid ||
-            capable(CAP_SETUID)) {
+            nsown_capable(CAP_SETUID)) {
                if (uid != old_fsuid) {
                        new->fsuid = uid;
                        if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
@@ -873,7 +890,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
        if (gid == old->gid  || gid == old->egid  ||
            gid == old->sgid || gid == old->fsgid ||
-            capable(CAP_SETGID)) {
+            nsown_capable(CAP_SETGID)) {
                if (gid != old_fsgid) {
                        new->fsgid = gid;
                        goto change_okay;
@@ -1181,8 +1198,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
        int errno;
        char tmp[__NEW_UTS_LEN];
-        if (!capable(CAP_SYS_ADMIN))
+        if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
                return -EPERM;
        if (len < 0 || len > __NEW_UTS_LEN)
                return -EINVAL;
        down_write(&uts_sem);
@@ -1230,7 +1248,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
        int errno;
        char tmp[__NEW_UTS_LEN];
-        if (!capable(CAP_SYS_ADMIN))
+        if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
                return -EPERM;
        if (len < 0 || len > __NEW_UTS_LEN)
                return -EINVAL;
@@ -1345,6 +1363,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource,
        rlim = tsk->signal->rlim + resource;
        task_lock(tsk->group_leader);
        if (new_rlim) {
+                /* Keep the capable check against init_user_ns until
+                   cgroups can contain all limits */
                if (new_rlim->rlim_max > rlim->rlim_max &&
                                !capable(CAP_SYS_RESOURCE))
                        retval = -EPERM;
@@ -1388,19 +1408,22 @@ static int check_prlimit_permission(struct task_struct *task)
 {
        const struct cred *cred = current_cred(), *tcred;
-        tcred = __task_cred(task);
+        if (current == task)
-        if (current != task &&
+                return 0;
-            (cred->uid != tcred->euid ||
-             cred->uid != tcred->suid ||
-             cred->uid != tcred->uid  ||
-             cred->gid != tcred->egid ||
-             cred->gid != tcred->sgid ||
-             cred->gid != tcred->gid) &&
-             !capable(CAP_SYS_RESOURCE)) {
-                return -EPERM;
-        }
-        return 0;
+        tcred = __task_cred(task);
+        if (cred->user->user_ns == tcred->user->user_ns &&
+            (cred->uid == tcred->euid &&
+             cred->uid == tcred->suid &&
+             cred->uid == tcred->uid  &&
+             cred->gid == tcred->egid &&
+             cred->gid == tcred->sgid &&
+             cred->gid == tcred->gid))
+                return 0;
+        if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
+                return 0;
+        return -EPERM;
 }
 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 40245d697602..c0bb32414b17 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -117,6 +117,7 @@ static int neg_one = -1;
 static int zero;
 static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
+static int __maybe_unused three = 3;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
 #ifdef CONFIG_PRINTK
@@ -169,6 +170,11 @@ static int proc_taint(struct ctl_table *table, int write,
                               void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
+#ifdef CONFIG_PRINTK
+static int proc_dmesg_restrict(struct ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
 #ifdef CONFIG_MAGIC_SYSRQ
 /* Note: sysrq code uses it's own private copy */
 static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
@@ -706,7 +712,7 @@ static struct ctl_table kern_table[] = {
                .data           = &kptr_restrict,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec_minmax,
+                .proc_handler   = proc_dmesg_restrict,
                .extra1         = &zero,
                .extra2         = &two,
        },
@@ -971,14 +977,18 @@ static struct ctl_table vm_table[] = {
                .data           = &sysctl_overcommit_memory,
                .maxlen         = sizeof(sysctl_overcommit_memory),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec,
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
+                .extra2         = &two,
        },
        {
                .procname       = "panic_on_oom",
                .data           = &sysctl_panic_on_oom,
                .maxlen         = sizeof(sysctl_panic_on_oom),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec,
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
+                .extra2         = &two,
        },
        {
                .procname       = "oom_kill_allocating_task",
@@ -1006,7 +1016,8 @@ static struct ctl_table vm_table[] = {
                .data           = &page_cluster,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec,
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
        },
        {
                .procname       = "dirty_background_ratio",
@@ -1054,7 +1065,8 @@ static struct ctl_table vm_table[] = {
                .data           = &dirty_expire_interval,
                .maxlen         = sizeof(dirty_expire_interval),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec,
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
        },
        {
                .procname       = "nr_pdflush_threads",
@@ -1130,6 +1142,8 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = drop_caches_sysctl_handler,
+                .extra1         = &one,
+                .extra2         = &three,
        },
 #ifdef CONFIG_COMPACTION
        {
@@ -2385,6 +2399,17 @@ static int proc_taint(struct ctl_table *table, int write,
        return err;
 }
+#ifdef CONFIG_PRINTK
+static int proc_dmesg_restrict(struct ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        if (write && !capable(CAP_SYS_ADMIN))
+                return -EPERM;
+        return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+#endif
 struct do_proc_dointvec_minmax_conv_param {
        int *min;
        int *max;
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 10b90d8a03c4..4e4932a7b360 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -111,11 +111,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
                const char *fail = NULL;
                if (table->parent) {
-                        if (table->procname && !table->parent->procname)
+                        if (!table->parent->procname)
                                set_fail(&fail, table, "Parent without procname");
                }
-                if (!table->procname)
-                        set_fail(&fail, table, "No procname");
                if (table->child) {
                        if (table->data)
                                set_fail(&fail, table, "Directory with data?");
@@ -144,13 +142,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
                                        set_fail(&fail, table, "No maxlen");
                        }
 #ifdef CONFIG_PROC_SYSCTL
-                        if (table->procname && !table->proc_handler)
+                        if (!table->proc_handler)
                                set_fail(&fail, table, "No proc_handler");
 #endif
-#if 0
-                        if (!table->procname && table->proc_handler)
-                                set_fail(&fail, table, "proc_handler without procname");
-#endif
                        sysctl_check_leaf(namespaces, table, &fail);
                }
                if (table->mode > 0777)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 3971c6b9d58d..9ffea360a778 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -685,7 +685,7 @@ static int __init taskstats_init(void)
                goto err_cgroup_ops;
        family_registered = 1;
-        printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
+        pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
        return 0;
 err_cgroup_ops:
        genl_unregister_ops(&family, &taskstats_ops);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index b2fa506667c0..a470154e0408 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -34,7 +34,7 @@
 * inaccuracies caused by missed or lost timer
 * interrupts and the inability for the timer
 * interrupt hardware to accuratly tick at the
- * requested HZ value. It is also not reccomended
+ * requested HZ value. It is also not recommended
 * for "tick-less" systems.
 */
 #define NSEC_PER_JIFFY  ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5f1bb8e2008f..f6117a4c7cb8 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -652,6 +652,8 @@ int do_adjtimex(struct timex *txc)
                struct timespec delta;
                delta.tv_sec  = txc->time.tv_sec;
                delta.tv_nsec = txc->time.tv_usec;
+                if (!capable(CAP_SYS_TIME))
+                        return -EPERM;
                if (!(txc->modes & ADJ_NANO))
                        delta.tv_nsec *= 1000;
                result = timekeeping_inject_offset(&delta);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 25028dd4fa18..c340ca658f37 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -19,7 +19,6 @@
 */
 #include <linux/device.h>
 #include <linux/file.h>
-#include <linux/mutex.h>
 #include <linux/posix-clock.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
@@ -34,19 +33,19 @@ static struct posix_clock *get_posix_clock(struct file *fp)
 {
        struct posix_clock *clk = fp->private_data;
-        mutex_lock(&clk->mutex);
+        down_read(&clk->rwsem);
        if (!clk->zombie)
                return clk;
-        mutex_unlock(&clk->mutex);
+        up_read(&clk->rwsem);
        return NULL;
 }
 static void put_posix_clock(struct posix_clock *clk)
 {
-        mutex_unlock(&clk->mutex);
+        up_read(&clk->rwsem);
 }
 static ssize_t posix_clock_read(struct file *fp, char __user *buf,
@@ -156,7 +155,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
        struct posix_clock *clk =
                container_of(inode->i_cdev, struct posix_clock, cdev);
-        mutex_lock(&clk->mutex);
+        down_read(&clk->rwsem);
        if (clk->zombie) {
                err = -ENODEV;
@@ -172,7 +171,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
                fp->private_data = clk;
        }
 out:
-        mutex_unlock(&clk->mutex);
+        up_read(&clk->rwsem);
        return err;
 }
@@ -211,25 +210,20 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid)
        int err;
        kref_init(&clk->kref);
-        mutex_init(&clk->mutex);
+        init_rwsem(&clk->rwsem);
        cdev_init(&clk->cdev, &posix_clock_file_operations);
        clk->cdev.owner = clk->ops.owner;
        err = cdev_add(&clk->cdev, devid, 1);
-        if (err)
-                goto no_cdev;
        return err;
-no_cdev:
-        mutex_destroy(&clk->mutex);
-        return err;
 }
 EXPORT_SYMBOL_GPL(posix_clock_register);
 static void delete_clock(struct kref *kref)
 {
        struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
-        mutex_destroy(&clk->mutex);
        if (clk->release)
                clk->release(clk);
 }
@@ -238,9 +232,9 @@ void posix_clock_unregister(struct posix_clock *clk)
 {
        cdev_del(&clk->cdev);
-        mutex_lock(&clk->mutex);
+        down_write(&clk->rwsem);
        clk->zombie = true;
-        mutex_unlock(&clk->mutex);
+        up_write(&clk->rwsem);
        kref_put(&clk->kref, delete_clock);
 }
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3bd7e3d5c632..8ad5d576755e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
 #include <linux/time.h>
@@ -597,13 +597,12 @@ static struct timespec timekeeping_suspend_time;
 /**
 * timekeeping_resume - Resumes the generic timekeeping subsystem.
- * @dev:        unused
 *
 * This is for the generic clocksource timekeeping.
 * xtime/wall_to_monotonic/jiffies/etc are
 * still managed by arch specific suspend/resume code.
 */
-static int timekeeping_resume(struct sys_device *dev)
+static void timekeeping_resume(void)
 {
        unsigned long flags;
        struct timespec ts;
@@ -632,11 +631,9 @@ static int timekeeping_resume(struct sys_device *dev)
        /* Resume hrtimers */
        hres_timers_resume();
-        return 0;
 }
-static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
+static int timekeeping_suspend(void)
 {
        unsigned long flags;
@@ -654,26 +651,18 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 }
 /* sysfs resume/suspend bits for timekeeping */
-static struct sysdev_class timekeeping_sysclass = {
+static struct syscore_ops timekeeping_syscore_ops = {
-        .name           = "timekeeping",
        .resume         = timekeeping_resume,
        .suspend        = timekeeping_suspend,
 };
-static struct sys_device device_timer = {
+static int __init timekeeping_init_ops(void)
-        .id             = 0,
-        .cls            = &timekeeping_sysclass,
-};
-static int __init timekeeping_init_device(void)
 {
-        int error = sysdev_class_register(&timekeeping_sysclass);
+        register_syscore_ops(&timekeeping_syscore_ops);
-        if (!error)
+        return 0;
-                error = sysdev_register(&device_timer);
-        return error;
 }
-device_initcall(timekeeping_init_device);
+device_initcall(timekeeping_init_ops);
 /*
 * If the error is already larger, we look ahead even further
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 2f3b585b8d7d..a5d0a3a85dd8 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -236,7 +236,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
                              unsigned int timer_flag)
 {
        /*
-         * It doesnt matter which lock we take:
+         * It doesn't matter which lock we take:
         */
        raw_spinlock_t *lock;
        struct entry *entry, input;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index cbafed7d4f38..6957aa298dfa 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -703,28 +703,21 @@ void blk_trace_shutdown(struct request_queue *q)
 *
 **/
 static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
-                                    u32 what)
+                             u32 what)
 {
        struct blk_trace *bt = q->blk_trace;
-        int rw = rq->cmd_flags & 0x03;
        if (likely(!bt))
                return;
-        if (rq->cmd_flags & REQ_DISCARD)
-                rw |= REQ_DISCARD;
-        if (rq->cmd_flags & REQ_SECURE)
-                rw |= REQ_SECURE;
        if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                what |= BLK_TC_ACT(BLK_TC_PC);
-                __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
+                __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
                                what, rq->errors, rq->cmd_len, rq->cmd);
        } else  {
                what |= BLK_TC_ACT(BLK_TC_FS);
-                __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw,
+                __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
-                                what, rq->errors, 0, NULL);
+                                rq->cmd_flags, what, rq->errors, 0, NULL);
        }
 }
@@ -857,29 +850,21 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q)
                __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
 }
-static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q)
+static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
+                                    unsigned int depth, bool explicit)
 {
        struct blk_trace *bt = q->blk_trace;
        if (bt) {
-                unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+                __be64 rpdu = cpu_to_be64(depth);
-                __be64 rpdu = cpu_to_be64(pdu);
+                u32 what;
-                __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
-                                sizeof(rpdu), &rpdu);
-        }
-}
-static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q)
+                if (explicit)
-{
+                        what = BLK_TA_UNPLUG_IO;
-        struct blk_trace *bt = q->blk_trace;
+                else
+                        what = BLK_TA_UNPLUG_TIMER;
-        if (bt) {
-                unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
-                __be64 rpdu = cpu_to_be64(pdu);
-                __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
+                __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
-                                sizeof(rpdu), &rpdu);
        }
 }
@@ -1022,9 +1007,7 @@ static void blk_register_tracepoints(void)
        WARN_ON(ret);
        ret = register_trace_block_plug(blk_add_trace_plug, NULL);
        WARN_ON(ret);
-        ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
+        ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
-        WARN_ON(ret);
-        ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
        WARN_ON(ret);
        ret = register_trace_block_split(blk_add_trace_split, NULL);
        WARN_ON(ret);
@@ -1039,8 +1022,7 @@ static void blk_unregister_tracepoints(void)
        unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
        unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
        unregister_trace_block_split(blk_add_trace_split, NULL);
-        unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
+        unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
-        unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
        unregister_trace_block_plug(blk_add_trace_plug, NULL);
        unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
        unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 888b611897d3..ee24fa1935ac 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1268,7 +1268,7 @@ static int ftrace_update_code(struct module *mod)
                p->flags = 0L;
                /*
-                 * Do the initial record convertion from mcount jump
+                 * Do the initial record conversion from mcount jump
                 * to the NOP instructions.
                 */
                if (!ftrace_code_disable(mod, p)) {
@@ -1467,7 +1467,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                return t_hash_next(m, pos);
        (*pos)++;
-        iter->pos = *pos;
+        iter->pos = iter->func_pos = *pos;
        if (iter->flags & FTRACE_ITER_PRINTALL)
                return t_hash_start(m, pos);
@@ -1502,7 +1502,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
        if (!rec)
                return t_hash_start(m, pos);
-        iter->func_pos = *pos;
        iter->func = rec;
        return iter;
@@ -3426,7 +3425,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
        atomic_set(&t->tracing_graph_pause, 0);
        atomic_set(&t->trace_overrun, 0);
        t->ftrace_timestamp = 0;
-        /* make curr_ret_stack visable before we add the ret_stack */
+        /* make curr_ret_stack visible before we add the ret_stack */
        smp_wmb();
        t->ret_stack = ret_stack;
 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d9c8bcafb120..0ef7b4b2a1f7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1478,7 +1478,7 @@ static inline unsigned long rb_page_entries(struct buffer_page *bpage)
        return local_read(&bpage->entries) & RB_WRITE_MASK;
 }
-/* Size is determined by what has been commited */
+/* Size is determined by what has been committed */
 static inline unsigned rb_page_size(struct buffer_page *bpage)
 {
        return rb_page_commit(bpage);
@@ -2932,7 +2932,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
        /*
         * cpu_buffer->pages just needs to point to the buffer, it
         *  has no specific buffer page to point to. Lets move it out
-         *  of our way so we don't accidently swap it.
+         *  of our way so we don't accidentally swap it.
         */
        cpu_buffer->pages = reader->list.prev;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9541c27c1cf2..d38c16a06a6f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3239,7 +3239,7 @@ waitagain:
                trace_seq_init(&iter->seq);
        /*
-         * If there was nothing to send to user, inspite of consuming trace
+         * If there was nothing to send to user, in spite of consuming trace
         * entries, go back to wait for more entries.
         */
        if (sret == -EBUSY)
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 685a67d55db0..6302747a1398 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -46,7 +46,7 @@ u64 notrace trace_clock_local(void)
 }
 /*
- * trace_clock(): 'inbetween' trace clock. Not completely serialized,
+ * trace_clock(): 'between' trace clock. Not completely serialized,
 * but not completely incorrect when crossing CPUs either.
 *
 * This is based on cpu_clock(), which will allow at most ~1 jiffy of
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 1516cb3ec549..e32744c84d94 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -27,7 +27,7 @@
 *        in the structure.
 *
 *   * for structures within structures, the format of the internal
- *      structure is layed out. This allows the internal structure
+ *      structure is laid out. This allows the internal structure
 *      to be deciphered for the format file. Although these macros
 *      may become out of sync with the internal structure, they
 *      will create a compile error if it happens. Since the
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 76b05980225c..962cdb24ed81 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -905,7 +905,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
 *
 * returns 1 if
 *  - we are inside irq code
- *  - we just extered irq code
+ *  - we just entered irq code
 *
 * retunns 0 if
 *  - funcgraph-interrupts option is set
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 92b6e1e12d98..a4969b47afc1 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -80,7 +80,7 @@ static struct tracer_flags tracer_flags = {
 * skip the latency if the sequence has changed - some other section
 * did a maximum and could disturb our measurement with serial console
 * printouts, etc. Truly coinciding maximum latencies should be rare
- * and what happens together happens separately as well, so this doesnt
+ * and what happens together happens separately as well, so this doesn't
 * decrease the validity of the maximum found:
 */
 static __cacheline_aligned_in_smp       unsigned long max_sequence;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8435b43b1782..35d55a386145 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1839,7 +1839,7 @@ static void unregister_probe_event(struct trace_probe *tp)
        kfree(tp->call.print_fmt);
 }
-/* Make a debugfs interface for controling probe points */
+/* Make a debugfs interface for controlling probe points */
 static __init int init_kprobe_trace(void)
 {
        struct dentry *d_tracer;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 419209893d87..51c6e89e8619 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -189,7 +189,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
        struct group_info *group_info;
        int retval;
-        if (!capable(CAP_SETGID))
+        if (!nsown_capable(CAP_SETGID))
                return -EPERM;
        if ((unsigned)gidsetsize > NGROUPS_MAX)
                return -EINVAL;
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c
index eb27fd3430a2..92cb706c7fc8 100644
--- a/kernel/user-return-notifier.c
+++ b/kernel/user-return-notifier.c
@@ -20,7 +20,7 @@ EXPORT_SYMBOL_GPL(user_return_notifier_register);
 /*
 * Removes a registered user return notifier.  Must be called from atomic
- * context, and from the same cpu registration occured in.
+ * context, and from the same cpu registration occurred in.
 */
 void user_return_notifier_unregister(struct user_return_notifier *urn)
 {
diff --git a/kernel/user.c b/kernel/user.c
index 5c598ca781df..9e03e9c1df8d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -17,9 +17,13 @@
 #include <linux/module.h>
 #include <linux/user_namespace.h>
+/*
+ * userns count is 1 for root user, 1 for init_uts_ns,
+ * and 1 for... ?
+ */
 struct user_namespace init_user_ns = {
        .kref = {
-                .refcount       = ATOMIC_INIT(2),
+                .refcount       = ATOMIC_INIT(3),
        },
        .creator = &root_user,
 };
@@ -47,7 +51,7 @@ static struct kmem_cache *uid_cachep;
 */
 static DEFINE_SPINLOCK(uidhash_lock);
-/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */
+/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->user_ns */
 struct user_struct root_user = {
        .__count        = ATOMIC_INIT(2),
        .processes      = ATOMIC_INIT(1),
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 8a82b4b8ea52..44646179eaba 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -14,6 +14,7 @@
 #include <linux/utsname.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/user_namespace.h>
 static struct uts_namespace *create_uts_ns(void)
 {
@@ -30,7 +31,8 @@ static struct uts_namespace *create_uts_ns(void)
 * @old_ns: namespace to clone
 * Return NULL on error (failure to kmalloc), new ns otherwise
 */
-static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
+static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
+                                          struct uts_namespace *old_ns)
 {
        struct uts_namespace *ns;
@@ -40,6 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
        down_read(&uts_sem);
        memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+        ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
        up_read(&uts_sem);
        return ns;
 }
@@ -50,8 +53,10 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
 * utsname of this process won't be seen by parent, and vice
 * versa.
 */
-struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns)
+struct uts_namespace *copy_utsname(unsigned long flags,
+                                   struct task_struct *tsk)
 {
+        struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
        struct uts_namespace *new_ns;
        BUG_ON(!old_ns);
@@ -60,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *ol
        if (!(flags & CLONE_NEWUTS))
                return old_ns;
-        new_ns = clone_uts_ns(old_ns);
+        new_ns = clone_uts_ns(tsk, old_ns);
        put_uts_ns(old_ns);
        return new_ns;
@@ -71,5 +76,6 @@ void free_uts_ns(struct kref *kref)
        struct uts_namespace *ns;
        ns = container_of(kref, struct uts_namespace, kref);
+        put_user_ns(ns->user_ns);
        kfree(ns);
 }
diff --git a/kernel/wait.c b/kernel/wait.c
index b0310eb6cc1e..f45ea8d2a1ce 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -142,7 +142,7 @@ EXPORT_SYMBOL(finish_wait);
 * woken up through the queue.
 *
 * This prevents waiter starvation where an exclusive waiter
- * aborts and is woken up concurrently and noone wakes up
+ * aborts and is woken up concurrently and no one wakes up
 * the next waiter.
 */
 void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 18bb15776c57..140dce750450 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -48,12 +48,15 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 * Should we panic when a soft-lockup or hard-lockup occurs:
 */
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic;
+static int hardlockup_panic =
+                        CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 static int __init hardlockup_panic_setup(char *str)
 {
        if (!strncmp(str, "panic", 5))
                hardlockup_panic = 1;
+        else if (!strncmp(str, "nopanic", 7))
+                hardlockup_panic = 0;
        else if (!strncmp(str, "0", 1))
                watchdog_enabled = 0;
        return 1;
@@ -415,19 +418,22 @@ static int watchdog_prepare_cpu(int cpu)
 static int watchdog_enable(int cpu)
 {
        struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
-        int err;
+        int err = 0;
        /* enable the perf event */
        err = watchdog_nmi_enable(cpu);
-        if (err)
-                return err;
+        /* Regardless of err above, fall through and start softlockup */
        /* create the watchdog thread */
        if (!p) {
                p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
                if (IS_ERR(p)) {
                        printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
-                        return PTR_ERR(p);
+                        if (!err)
+                                /* if hardlockup hasn't already set this */
+                                err = PTR_ERR(p);
+                        goto out;
                }
                kthread_bind(p, cpu);
                per_cpu(watchdog_touch_ts, cpu) = 0;
@@ -435,7 +441,8 @@ static int watchdog_enable(int cpu)
                wake_up_process(p);
        }
-        return 0;
+out:
+        return err;
 }
 static void watchdog_disable(int cpu)
@@ -547,7 +554,13 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
                break;
 #endif /* CONFIG_HOTPLUG_CPU */
        }
-        return notifier_from_errno(err);
+        /*
+         * hardlockup and softlockup are not important enough
+         * to block cpu bring up.  Just always succeed and
+         * rely on printk output to flag problems.
+         */
+        return NOTIFY_OK;
 }
 static struct notifier_block __cpuinitdata cpu_nfb = {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5ca7ce9ce754..8859a41806dd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1291,7 +1291,7 @@ __acquires(&gcwq->lock)
                        return true;
                spin_unlock_irq(&gcwq->lock);
-                /* CPU has come up inbetween, retry migration */
+                /* CPU has come up in between, retry migration */
                cpu_relax();
        }
 }
@@ -1366,8 +1366,10 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
        worker->id = id;
        if (!on_unbound_cpu)
-                worker->task = kthread_create(worker_thread, worker,
+                worker->task = kthread_create_on_node(worker_thread,
-                                              "kworker/%u:%d", gcwq->cpu, id);
+                                                      worker,
+                                                      cpu_to_node(gcwq->cpu),
+                                                      "kworker/%u:%d", gcwq->cpu, id);
        else
                worker->task = kthread_create(worker_thread, worker,
                                              "kworker/u:%d", id);
author	Jiri Kosina <jkosina@suse.cz>	2011-04-26 04:22:15 -0400
committer	Jiri Kosina <jkosina@suse.cz>	2011-04-26 04:22:59 -0400
commit	07f9479a40cc778bc1462ada11f95b01360ae4ff (patch)
tree	0676cf38df3844004bb3ebfd99dfa67a4a8998f5 /kernel
parent	9d5e6bdb3013acfb311ab407eeca0b6a6a3dedbf (diff)
parent	cd2e49e90f1cae7726c9a2c54488d881d7f1cd1c (diff)