64 files changed, 1978 insertions, 740 deletions
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 2428cc04dbc8..f3abca7ec53d 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -197,6 +197,7 @@
   may be configured as a kernel built-in or a kernel loadable module.
   You can only make use of <constant>kgdbwait</constant> and early
   debugging if you build kgdboc into the kernel as a built-in.
+   </para>
   <para>Optionally you can elect to activate kms (Kernel Mode
   Setting) integration.  When you use kms with kgdboc and you have a
   video driver that has atomic mode setting hooks, it is possible to
@@ -206,7 +207,6 @@
   crashes or doing analysis of memory with kdb while allowing the
   full graphics console applications to run.
   </para>
-   </para>
   <sect2 id="kgdbocArgs">
   <title>kgdboc arguments</title>
   <para>Usage: <constant>kgdboc=[kms][[,]kbd][[,]serial_device][,baud]</constant></para>
@@ -284,7 +284,6 @@
   </listitem>
   </orderedlist>
   </para>
-   </sect3>
   <para>NOTE: Kgdboc does not support interrupting the target via the
   gdb remote protocol.  You must manually send a sysrq-g unless you
   have a proxy that splits console output to a terminal program.
@@ -305,6 +304,7 @@
    as well as on the initial connect, or to use a debugger proxy that
    allows an unmodified gdb to do the debugging.
   </para>
+   </sect3>
   </sect2>
   </sect1>
   <sect1 id="kgdbwait">
@@ -350,12 +350,12 @@
   </para>
   </listitem>
   </orderedlist>
+  </para>
   <para>IMPORTANT NOTE: You cannot use kgdboc + kgdbcon on a tty that is an
   active system console.  An example of incorrect usage is <constant>console=ttyS0,115200 kgdboc=ttyS0 kgdbcon</constant>
   </para>
   <para>It is possible to use this option with kgdboc on a tty that is not a system console.
   </para>
-  </para>
  </sect1>
   <sect1 id="kgdbreboot">
   <title>Run time parameter: kgdbreboot</title>
diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt
index 199f453cb4de..82fbdbc1e0b0 100644
--- a/Documentation/x86/zero-page.txt
+++ b/Documentation/x86/zero-page.txt
@@ -3,7 +3,7 @@ protocol of kernel. These should be filled by bootloader or 16-bit
 real-mode setup code of the kernel. References/settings to it mainly
 are in:
-  arch/x86/include/asm/bootparam.h
+  arch/x86/include/uapi/asm/bootparam.h
 Offset  Proto   Name            Meaning
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index c4fbb9527c5c..b1453a2ae1ca 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -18,15 +18,15 @@ struct cpu_topology_s390 {
        cpumask_t book_mask;
 };
-extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
+DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
-#define topology_physical_package_id(cpu)       (cpu_topology[cpu].socket_id)
+#define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
-#define topology_thread_id(cpu)                 (cpu_topology[cpu].thread_id)
+#define topology_thread_id(cpu)           (per_cpu(cpu_topology, cpu).thread_id)
-#define topology_thread_cpumask(cpu)            (&cpu_topology[cpu].thread_mask)
+#define topology_thread_cpumask(cpu)      (&per_cpu(cpu_topology, cpu).thread_mask)
-#define topology_core_id(cpu)                   (cpu_topology[cpu].core_id)
+#define topology_core_id(cpu)             (per_cpu(cpu_topology, cpu).core_id)
-#define topology_core_cpumask(cpu)              (&cpu_topology[cpu].core_mask)
+#define topology_core_cpumask(cpu)        (&per_cpu(cpu_topology, cpu).core_mask)
-#define topology_book_id(cpu)                   (cpu_topology[cpu].book_id)
+#define topology_book_id(cpu)             (per_cpu(cpu_topology, cpu).book_id)
-#define topology_book_cpumask(cpu)              (&cpu_topology[cpu].book_mask)
+#define topology_book_cpumask(cpu)        (&per_cpu(cpu_topology, cpu).book_mask)
 #define mc_capable() 1
@@ -51,14 +51,6 @@ static inline void topology_expect_change(void) { }
 #define POLARIZATION_VM         (2)
 #define POLARIZATION_VH         (3)
-#ifdef CONFIG_SCHED_BOOK
-void s390_init_cpu_topology(void);
-#else
-static inline void s390_init_cpu_topology(void)
-{
-};
-#endif
 #include <asm-generic/topology.h>
 #endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 632fa06ea162..0969d113b3d6 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -91,12 +91,9 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
 {
        if (level >= CACHE_MAX_LEVEL)
                return CACHE_TYPE_NOCACHE;
        ci += level;
        if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE)
                return CACHE_TYPE_NOCACHE;
        return cache_type_map[ci->type];
 }
@@ -111,23 +108,19 @@ static inline unsigned long ecag(int ai, int li, int ti)
 }
 static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
-                         enum cache_type type, unsigned int level)
+                         enum cache_type type, unsigned int level, int cpu)
 {
        int ti, num_sets;
-        int cpu = smp_processor_id();
        if (type == CACHE_TYPE_INST)
                ti = CACHE_TI_INSTRUCTION;
        else
                ti = CACHE_TI_UNIFIED;
        this_leaf->level = level + 1;
        this_leaf->type = type;
        this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
-        this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY,
+        this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
-                                                level, ti);
        this_leaf->size = ecag(EXTRACT_SIZE, level, ti);
        num_sets = this_leaf->size / this_leaf->coherency_line_size;
        num_sets /= this_leaf->ways_of_associativity;
        this_leaf->number_of_sets = num_sets;
@@ -145,7 +138,6 @@ int init_cache_level(unsigned int cpu)
        if (!this_cpu_ci)
                return -EINVAL;
        ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
        do {
                ctype = get_cache_type(&ct.ci[0], level);
@@ -154,34 +146,31 @@ int init_cache_level(unsigned int cpu)
                /* Separate instruction and data caches */
                leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
        } while (++level < CACHE_MAX_LEVEL);
        this_cpu_ci->num_levels = level;
        this_cpu_ci->num_leaves = leaves;
        return 0;
 }
 int populate_cache_leaves(unsigned int cpu)
 {
+        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+        struct cacheinfo *this_leaf = this_cpu_ci->info_list;
        unsigned int level, idx, pvt;
        union cache_topology ct;
        enum cache_type ctype;
-        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-        struct cacheinfo *this_leaf = this_cpu_ci->info_list;
        ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
        for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
             idx < this_cpu_ci->num_leaves; idx++, level++) {
                if (!this_leaf)
                        return -EINVAL;
                pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0;
                ctype = get_cache_type(&ct.ci[0], level);
                if (ctype == CACHE_TYPE_SEPARATE) {
-                        ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level);
+                        ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu);
-                        ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level);
+                        ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu);
                } else {
-                        ci_leaf_init(this_leaf++, pvt, ctype, level);
+                        ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
                }
        }
        return 0;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 70a329450901..4427ab7ac23a 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -393,17 +393,19 @@ static __init void detect_machine_facilities(void)
                S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
        if (test_facility(129))
                S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
-        if (test_facility(128))
-                S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
 #endif
 }
-static int __init nocad_setup(char *str)
+static int __init cad_setup(char *str)
 {
-        S390_lowcore.machine_flags &= ~MACHINE_FLAG_CAD;
+        int val;
+        get_option(&str, &val);
+        if (val && test_facility(128))
+                S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
        return 0;
 }
-early_param("nocad", nocad_setup);
+early_param("cad", cad_setup);
 static int __init cad_init(void)
 {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index bfac77ada4f2..a5ea8bc17cb3 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -909,7 +909,6 @@ void __init setup_arch(char **cmdline_p)
        setup_lowcore();
        smp_fill_possible_mask();
        cpu_init();
-        s390_init_cpu_topology();
        /*
         * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a668993ff577..db8f1115a3bf 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -59,14 +59,13 @@ enum {
        CPU_STATE_CONFIGURED,
 };
+static DEFINE_PER_CPU(struct cpu *, cpu_device);
 struct pcpu {
-        struct cpu *cpu;
        struct _lowcore *lowcore;       /* lowcore page(s) for the cpu */
-        unsigned long async_stack;      /* async stack for the cpu */
-        unsigned long panic_stack;      /* panic stack for the cpu */
        unsigned long ec_mask;          /* bit mask for ec_xxx functions */
-        int state;                      /* physical cpu state */
+        signed char state;              /* physical cpu state */
-        int polarization;               /* physical polarization */
+        signed char polarization;       /* physical polarization */
        u16 address;                    /* physical cpu address */
 };
@@ -173,25 +172,30 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
        pcpu_sigp_retry(pcpu, order, 0);
 }
+#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
 static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 {
+        unsigned long async_stack, panic_stack;
        struct _lowcore *lc;
        if (pcpu != &pcpu_devices[0]) {
                pcpu->lowcore = (struct _lowcore *)
                        __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
-                pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+                async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
-                pcpu->panic_stack = __get_free_page(GFP_KERNEL);
+                panic_stack = __get_free_page(GFP_KERNEL);
-                if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack)
+                if (!pcpu->lowcore || !panic_stack || !async_stack)
                        goto out;
+        } else {
+                async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
+                panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
        }
        lc = pcpu->lowcore;
        memcpy(lc, &S390_lowcore, 512);
        memset((char *) lc + 512, 0, sizeof(*lc) - 512);
-        lc->async_stack = pcpu->async_stack + ASYNC_SIZE
+        lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
-                - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+        lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
-        lc->panic_stack = pcpu->panic_stack + PAGE_SIZE
-                - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
        lc->cpu_nr = cpu;
        lc->spinlock_lockval = arch_spin_lockval(cpu);
 #ifndef CONFIG_64BIT
@@ -212,8 +216,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
        return 0;
 out:
        if (pcpu != &pcpu_devices[0]) {
-                free_page(pcpu->panic_stack);
+                free_page(panic_stack);
-                free_pages(pcpu->async_stack, ASYNC_ORDER);
+                free_pages(async_stack, ASYNC_ORDER);
                free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
        }
        return -ENOMEM;
@@ -235,11 +239,11 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
 #else
        vdso_free_per_cpu(pcpu->lowcore);
 #endif
-        if (pcpu != &pcpu_devices[0]) {
+        if (pcpu == &pcpu_devices[0])
-                free_page(pcpu->panic_stack);
+                return;
-                free_pages(pcpu->async_stack, ASYNC_ORDER);
+        free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
-                free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+        free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
-        }
+        free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
@@ -366,7 +370,8 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
 void smp_call_ipl_cpu(void (*func)(void *), void *data)
 {
        pcpu_delegate(&pcpu_devices[0], func, data,
-                      pcpu_devices->panic_stack + PAGE_SIZE);
+                      pcpu_devices->lowcore->panic_stack -
+                      PANIC_FRAME_OFFSET + PAGE_SIZE);
 }
 int smp_find_processor_id(u16 address)
@@ -935,10 +940,6 @@ void __init smp_prepare_boot_cpu(void)
        pcpu->state = CPU_STATE_CONFIGURED;
        pcpu->address = stap();
        pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
-        pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE
-                + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-        pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE
-                + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
        S390_lowcore.percpu_offset = __per_cpu_offset[0];
        smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
        set_cpu_present(0, true);
@@ -1078,8 +1079,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
                          void *hcpu)
 {
        unsigned int cpu = (unsigned int)(long)hcpu;
-        struct cpu *c = pcpu_devices[cpu].cpu;
+        struct device *s = &per_cpu(cpu_device, cpu)->dev;
-        struct device *s = &c->dev;
        int err = 0;
        switch (action & ~CPU_TASKS_FROZEN) {
@@ -1102,7 +1102,7 @@ static int smp_add_present_cpu(int cpu)
        c = kzalloc(sizeof(*c), GFP_KERNEL);
        if (!c)
                return -ENOMEM;
-        pcpu_devices[cpu].cpu = c;
+        per_cpu(cpu_device, cpu) = c;
        s = &c->dev;
        c->hotpluggable = 1;
        rc = register_cpu(c, cpu);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 24ee33f1af24..14da43b801d9 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -7,14 +7,14 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 #include <linux/workqueue.h>
-#include <linux/bootmem.h>
 #include <linux/cpuset.h>
 #include <linux/device.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/init.h>
 #include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
@@ -42,8 +42,8 @@ static DEFINE_SPINLOCK(topology_lock);
 static struct mask_info socket_info;
 static struct mask_info book_info;
-struct cpu_topology_s390 cpu_topology[NR_CPUS];
+DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
-EXPORT_SYMBOL_GPL(cpu_topology);
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
 {
@@ -90,15 +90,15 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
                if (lcpu < 0)
                        continue;
                for (i = 0; i <= smp_cpu_mtid; i++) {
-                        cpu_topology[lcpu + i].book_id = book->id;
+                        per_cpu(cpu_topology, lcpu + i).book_id = book->id;
-                        cpu_topology[lcpu + i].core_id = rcore;
+                        per_cpu(cpu_topology, lcpu + i).core_id = rcore;
-                        cpu_topology[lcpu + i].thread_id = lcpu + i;
+                        per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i;
                        cpumask_set_cpu(lcpu + i, &book->mask);
                        cpumask_set_cpu(lcpu + i, &socket->mask);
                        if (one_socket_per_cpu)
-                                cpu_topology[lcpu + i].socket_id = rcore;
+                                per_cpu(cpu_topology, lcpu + i).socket_id = rcore;
                        else
-                                cpu_topology[lcpu + i].socket_id = socket->id;
+                                per_cpu(cpu_topology, lcpu + i).socket_id = socket->id;
                        smp_cpu_set_polarization(lcpu + i, tl_core->pp);
                }
                if (one_socket_per_cpu)
@@ -249,14 +249,14 @@ static void update_cpu_masks(void)
        spin_lock_irqsave(&topology_lock, flags);
        for_each_possible_cpu(cpu) {
-                cpu_topology[cpu].thread_mask = cpu_thread_map(cpu);
+                per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
-                cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu);
+                per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
-                cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu);
+                per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu);
                if (!MACHINE_HAS_TOPOLOGY) {
-                        cpu_topology[cpu].thread_id = cpu;
+                        per_cpu(cpu_topology, cpu).thread_id = cpu;
-                        cpu_topology[cpu].core_id = cpu;
+                        per_cpu(cpu_topology, cpu).core_id = cpu;
-                        cpu_topology[cpu].socket_id = cpu;
+                        per_cpu(cpu_topology, cpu).socket_id = cpu;
-                        cpu_topology[cpu].book_id = cpu;
+                        per_cpu(cpu_topology, cpu).book_id = cpu;
                }
        }
        spin_unlock_irqrestore(&topology_lock, flags);
@@ -334,50 +334,6 @@ void topology_expect_change(void)
        set_topology_timer();
 }
-static int __init early_parse_topology(char *p)
-{
-        if (strncmp(p, "off", 3))
-                return 0;
-        topology_enabled = 0;
-        return 0;
-}
-early_param("topology", early_parse_topology);
-static void __init alloc_masks(struct sysinfo_15_1_x *info,
-                               struct mask_info *mask, int offset)
-{
-        int i, nr_masks;
-        nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
-        for (i = 0; i < info->mnest - offset; i++)
-                nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
-        nr_masks = max(nr_masks, 1);
-        for (i = 0; i < nr_masks; i++) {
-                mask->next = alloc_bootmem_align(
-                        roundup_pow_of_two(sizeof(struct mask_info)),
-                        roundup_pow_of_two(sizeof(struct mask_info)));
-                mask = mask->next;
-        }
-}
-void __init s390_init_cpu_topology(void)
-{
-        struct sysinfo_15_1_x *info;
-        int i;
-        if (!MACHINE_HAS_TOPOLOGY)
-                return;
-        tl_info = alloc_bootmem_pages(PAGE_SIZE);
-        info = tl_info;
-        store_topology(info);
-        pr_info("The CPU configuration topology of the machine is:");
-        for (i = 0; i < TOPOLOGY_NR_MAG; i++)
-                printk(KERN_CONT " %d", info->mag[i]);
-        printk(KERN_CONT " / %d\n", info->mnest);
-        alloc_masks(info, &socket_info, 1);
-        alloc_masks(info, &book_info, 2);
-}
 static int cpu_management;
 static ssize_t dispatching_show(struct device *dev,
@@ -467,20 +423,29 @@ int topology_cpu_init(struct cpu *cpu)
 const struct cpumask *cpu_thread_mask(int cpu)
 {
-        return &cpu_topology[cpu].thread_mask;
+        return &per_cpu(cpu_topology, cpu).thread_mask;
 }
 const struct cpumask *cpu_coregroup_mask(int cpu)
 {
-        return &cpu_topology[cpu].core_mask;
+        return &per_cpu(cpu_topology, cpu).core_mask;
 }
 static const struct cpumask *cpu_book_mask(int cpu)
 {
-        return &cpu_topology[cpu].book_mask;
+        return &per_cpu(cpu_topology, cpu).book_mask;
 }
+static int __init early_parse_topology(char *p)
+{
+        if (strncmp(p, "off", 3))
+                return 0;
+        topology_enabled = 0;
+        return 0;
+}
+early_param("topology", early_parse_topology);
 static struct sched_domain_topology_level s390_topology[] = {
        { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
        { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
@@ -489,6 +454,42 @@ static struct sched_domain_topology_level s390_topology[] = {
        { NULL, },
 };
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+                               struct mask_info *mask, int offset)
+{
+        int i, nr_masks;
+        nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
+        for (i = 0; i < info->mnest - offset; i++)
+                nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
+        nr_masks = max(nr_masks, 1);
+        for (i = 0; i < nr_masks; i++) {
+                mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL);
+                mask = mask->next;
+        }
+}
+static int __init s390_topology_init(void)
+{
+        struct sysinfo_15_1_x *info;
+        int i;
+        if (!MACHINE_HAS_TOPOLOGY)
+                return 0;
+        tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL);
+        info = tl_info;
+        store_topology(info);
+        pr_info("The CPU configuration topology of the machine is:");
+        for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+                printk(KERN_CONT " %d", info->mag[i]);
+        printk(KERN_CONT " / %d\n", info->mnest);
+        alloc_masks(info, &socket_info, 1);
+        alloc_masks(info, &book_info, 2);
+        set_sched_topology(s390_topology);
+        return 0;
+}
+early_initcall(s390_topology_init);
 static int __init topology_init(void)
 {
        if (MACHINE_HAS_TOPOLOGY)
@@ -498,10 +499,3 @@ static int __init topology_init(void)
        return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
 }
 device_initcall(topology_init);
-static int __init early_topology_init(void)
-{
-        set_sched_topology(s390_topology);
-        return 0;
-}
-early_initcall(early_topology_init);
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 7699e735ae28..61541fb93dc6 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -25,9 +25,7 @@ __kernel_clock_gettime:
        je      4f
        cghi    %r2,__CLOCK_REALTIME
        je      5f
-        cghi    %r2,__CLOCK_THREAD_CPUTIME_ID
+        cghi    %r2,-3          /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
-        je      9f
-        cghi    %r2,-2          /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
        je      9f
        cghi    %r2,__CLOCK_MONOTONIC_COARSE
        je      3f
@@ -106,7 +104,7 @@ __kernel_clock_gettime:
        aghi    %r15,16
        br      %r14
-        /* CLOCK_THREAD_CPUTIME_ID for this thread */
+        /* CPUCLOCK_VIRT for this thread */
 9:      icm     %r0,15,__VDSO_ECTG_OK(%r5)
        jz      12f
        ear     %r2,%a4
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index d008f638b2cd..179a2c20b01f 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -183,7 +183,10 @@ unsigned long randomize_et_dyn(void)
 {
        unsigned long base;
-        base = (STACK_TOP / 3 * 2) & (~mmap_align_mask << PAGE_SHIFT);
+        base = STACK_TOP / 3 * 2;
+        if (!is_32bit_task())
+                /* Align to 4GB */
+                base &= ~((1UL << 32) - 1);
        return base + mmap_rnd();
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb1cf898ed3c..c2fb8a87dccb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -488,6 +488,22 @@ config X86_INTEL_MID
          Intel MID platforms are based on an Intel processor and chipset which
          consume less power than most of the x86 derivatives.
+config X86_INTEL_QUARK
+        bool "Intel Quark platform support"
+        depends on X86_32
+        depends on X86_EXTENDED_PLATFORM
+        depends on X86_PLATFORM_DEVICES
+        depends on X86_TSC
+        depends on PCI
+        depends on PCI_GOANY
+        depends on X86_IO_APIC
+        select IOSF_MBI
+        select INTEL_IMR
+        ---help---
+          Select to include support for Quark X1000 SoC.
+          Say Y here if you have a Quark based system such as the Arduino
+          compatible Intel Galileo.
 config X86_INTEL_LPSS
        bool "Intel Low Power Subsystem Support"
        depends on ACPI
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 61bd2ad94281..20028da8ae18 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -313,6 +313,19 @@ config DEBUG_NMI_SELFTEST
          If unsure, say N.
+config DEBUG_IMR_SELFTEST
+        bool "Isolated Memory Region self test"
+        default n
+        depends on INTEL_IMR
+        ---help---
+          This option enables automated sanity testing of the IMR code.
+          Some simple tests are run to verify IMR bounds checking, alignment
+          and overlapping. This option is really only useful if you are
+          debugging an IMR memory map or are modifying the IMR code and want to
+          test your changes.
+          If unsure say N here.
 config X86_DEBUG_STATIC_CPU_HAS
        bool "Debug alternatives"
        depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 843feb3eb20b..0a291cdfaf77 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -51,6 +51,7 @@ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
 vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
        $(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
 $(obj)/vmlinux: $(vmlinux-objs-y) FORCE
        $(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index bb1376381985..7083c16cccba 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -14,6 +14,13 @@
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
                LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+struct kaslr_setup_data {
+        __u64 next;
+        __u32 type;
+        __u32 len;
+        __u8 data[1];
+} kaslr_setup_data;
 #define I8254_PORT_CONTROL      0x43
 #define I8254_PORT_COUNTER0     0x40
 #define I8254_CMD_READBACK      0xC0
@@ -295,7 +302,29 @@ static unsigned long find_random_addr(unsigned long minimum,
        return slots_fetch_random();
 }
-unsigned char *choose_kernel_location(unsigned char *input,
+static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled)
+{
+        struct setup_data *data;
+        kaslr_setup_data.type = SETUP_KASLR;
+        kaslr_setup_data.len = 1;
+        kaslr_setup_data.next = 0;
+        kaslr_setup_data.data[0] = enabled;
+        data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+        while (data && data->next)
+                data = (struct setup_data *)(unsigned long)data->next;
+        if (data)
+                data->next = (unsigned long)&kaslr_setup_data;
+        else
+                params->hdr.setup_data = (unsigned long)&kaslr_setup_data;
+}
+unsigned char *choose_kernel_location(struct boot_params *params,
+                                      unsigned char *input,
                                      unsigned long input_size,
                                      unsigned char *output,
                                      unsigned long output_size)
@@ -306,14 +335,17 @@ unsigned char *choose_kernel_location(unsigned char *input,
 #ifdef CONFIG_HIBERNATION
        if (!cmdline_find_option_bool("kaslr")) {
                debug_putstr("KASLR disabled by default...\n");
+                add_kaslr_setup_data(params, 0);
                goto out;
        }
 #else
        if (cmdline_find_option_bool("nokaslr")) {
                debug_putstr("KASLR disabled by cmdline...\n");
+                add_kaslr_setup_data(params, 0);
                goto out;
        }
 #endif
+        add_kaslr_setup_data(params, 1);
        /* Record the various known unsafe memory ranges. */
        mem_avoid_init((unsigned long)input, input_size,
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
index 7ff3632806b1..99494dff2113 100644
--- a/arch/x86/boot/compressed/efi_stub_64.S
+++ b/arch/x86/boot/compressed/efi_stub_64.S
@@ -3,28 +3,3 @@
 #include <asm/processor-flags.h>
 #include "../../platform/efi/efi_stub_64.S"
-#ifdef CONFIG_EFI_MIXED
-        .code64
-        .text
-ENTRY(efi64_thunk)
-        push    %rbp
-        push    %rbx
-        subq    $16, %rsp
-        leaq    efi_exit32(%rip), %rax
-        movl    %eax, 8(%rsp)
-        leaq    efi_gdt64(%rip), %rax
-        movl    %eax, 4(%rsp)
-        movl    %eax, 2(%rax)           /* Fixup the gdt base address */
-        leaq    efi32_boot_gdt(%rip), %rax
-        movl    %eax, (%rsp)
-        call    __efi64_thunk
-        addq    $16, %rsp
-        pop     %rbx
-        pop     %rbp
-        ret
-ENDPROC(efi64_thunk)
-#endif /* CONFIG_EFI_MIXED */
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
new file mode 100644
index 000000000000..630384a4c14a
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
+ *
+ * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+ *
+ * Because this thunking occurs before ExitBootServices() we have to
+ * restore the firmware's 32-bit GDT before we make EFI serivce calls,
+ * since the firmware's 32-bit IDT is still currently installed and it
+ * needs to be able to service interrupts.
+ *
+ * On the plus side, we don't have to worry about mangling 64-bit
+ * addresses into 32-bits because we're executing with an identify
+ * mapped pagetable and haven't transitioned to 64-bit virtual addresses
+ * yet.
+ */
+#include <linux/linkage.h>
+#include <asm/msr.h>
+#include <asm/page_types.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+        .code64
+        .text
+ENTRY(efi64_thunk)
+        push    %rbp
+        push    %rbx
+        subq    $8, %rsp
+        leaq    efi_exit32(%rip), %rax
+        movl    %eax, 4(%rsp)
+        leaq    efi_gdt64(%rip), %rax
+        movl    %eax, (%rsp)
+        movl    %eax, 2(%rax)           /* Fixup the gdt base address */
+        movl    %ds, %eax
+        push    %rax
+        movl    %es, %eax
+        push    %rax
+        movl    %ss, %eax
+        push    %rax
+        /*
+         * Convert x86-64 ABI params to i386 ABI
+         */
+        subq    $32, %rsp
+        movl    %esi, 0x0(%rsp)
+        movl    %edx, 0x4(%rsp)
+        movl    %ecx, 0x8(%rsp)
+        movq    %r8, %rsi
+        movl    %esi, 0xc(%rsp)
+        movq    %r9, %rsi
+        movl    %esi,  0x10(%rsp)
+        sgdt    save_gdt(%rip)
+        leaq    1f(%rip), %rbx
+        movq    %rbx, func_rt_ptr(%rip)
+        /*
+         * Switch to gdt with 32-bit segments. This is the firmware GDT
+         * that was installed when the kernel started executing. This
+         * pointer was saved at the EFI stub entry point in head_64.S.
+         */
+        leaq    efi32_boot_gdt(%rip), %rax
+        lgdt    (%rax)
+        pushq   $__KERNEL_CS
+        leaq    efi_enter32(%rip), %rax
+        pushq   %rax
+        lretq
+1:      addq    $32, %rsp
+        lgdt    save_gdt(%rip)
+        pop     %rbx
+        movl    %ebx, %ss
+        pop     %rbx
+        movl    %ebx, %es
+        pop     %rbx
+        movl    %ebx, %ds
+        /*
+         * Convert 32-bit status code into 64-bit.
+         */
+        test    %rax, %rax
+        jz      1f
+        movl    %eax, %ecx
+        andl    $0x0fffffff, %ecx
+        andl    $0xf0000000, %eax
+        shl     $32, %rax
+        or      %rcx, %rax
+1:
+        addq    $8, %rsp
+        pop     %rbx
+        pop     %rbp
+        ret
+ENDPROC(efi64_thunk)
+ENTRY(efi_exit32)
+        movq    func_rt_ptr(%rip), %rax
+        push    %rax
+        mov     %rdi, %rax
+        ret
+ENDPROC(efi_exit32)
+        .code32
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+ENTRY(efi_enter32)
+        movl    $__KERNEL_DS, %eax
+        movl    %eax, %ds
+        movl    %eax, %es
+        movl    %eax, %ss
+        /* Reload pgtables */
+        movl    %cr3, %eax
+        movl    %eax, %cr3
+        /* Disable paging */
+        movl    %cr0, %eax
+        btrl    $X86_CR0_PG_BIT, %eax
+        movl    %eax, %cr0
+        /* Disable long mode via EFER */
+        movl    $MSR_EFER, %ecx
+        rdmsr
+        btrl    $_EFER_LME, %eax
+        wrmsr
+        call    *%edi
+        /* We must preserve return value */
+        movl    %eax, %edi
+        /*
+         * Some firmware will return with interrupts enabled. Be sure to
+         * disable them before we switch GDTs.
+         */
+        cli
+        movl    56(%esp), %eax
+        movl    %eax, 2(%eax)
+        lgdtl   (%eax)
+        movl    %cr4, %eax
+        btsl    $(X86_CR4_PAE_BIT), %eax
+        movl    %eax, %cr4
+        movl    %cr3, %eax
+        movl    %eax, %cr3
+        movl    $MSR_EFER, %ecx
+        rdmsr
+        btsl    $_EFER_LME, %eax
+        wrmsr
+        xorl    %eax, %eax
+        lldt    %ax
+        movl    60(%esp), %eax
+        pushl   $__KERNEL_CS
+        pushl   %eax
+        /* Enable paging */
+        movl    %cr0, %eax
+        btsl    $X86_CR0_PG_BIT, %eax
+        movl    %eax, %cr0
+        lret
+ENDPROC(efi_enter32)
+        .data
+        .balign 8
+        .global efi32_boot_gdt
+efi32_boot_gdt: .word   0
+                .quad   0
+save_gdt:       .word   0
+                .quad   0
+func_rt_ptr:    .quad   0
+        .global efi_gdt64
+efi_gdt64:
+        .word   efi_gdt64_end - efi_gdt64
+        .long   0                       /* Filled out by user */
+        .word   0
+        .quad   0x0000000000000000      /* NULL descriptor */
+        .quad   0x00af9a000000ffff      /* __KERNEL_CS */
+        .quad   0x00cf92000000ffff      /* __KERNEL_DS */
+        .quad   0x0080890000000000      /* TS descriptor */
+        .quad   0x0000000000000000      /* TS continued */
+efi_gdt64_end:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index a950864a64da..5903089c818f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -401,7 +401,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
         * the entire decompressed kernel plus relocation table, or the
         * entire decompressed kernel plus .bss and .brk sections.
         */
-        output = choose_kernel_location(input_data, input_len, output,
+        output = choose_kernel_location(real_mode, input_data, input_len,
+                                        output,
                                        output_len > run_size ? output_len
                                                              : run_size);
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 04477d68403f..ee3576b2666b 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option);
 #if CONFIG_RANDOMIZE_BASE
 /* aslr.c */
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *params,
+                                      unsigned char *input,
                                      unsigned long input_size,
                                      unsigned char *output,
                                      unsigned long output_size);
@@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
 bool has_cpuflag(int flag);
 #else
 static inline
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *params,
+                                      unsigned char *input,
                                      unsigned long input_size,
                                      unsigned char *output,
                                      unsigned long output_size)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 92003f3c8a42..efc3b22d896e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -213,7 +213,15 @@ void register_lapic_address(unsigned long address);
 extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern int APIC_init_uniprocessor(void);
+#ifdef CONFIG_X86_64
+static inline int apic_force_enable(unsigned long addr)
+{
+        return -1;
+}
+#else
 extern int apic_force_enable(unsigned long addr);
+#endif
 extern int apic_bsp_setup(bool upmode);
 extern void apic_ap_setup(void);
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
new file mode 100644
index 000000000000..cd2ce4068441
--- /dev/null
+++ b/arch/x86/include/asm/imr.h
@@ -0,0 +1,60 @@
+/*
+ * imr.h: Isolated Memory Region API
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _IMR_H
+#define _IMR_H
+#include <linux/types.h>
+/*
+ * IMR agent access mask bits
+ * See section 12.7.4.7 from quark-x1000-datasheet.pdf for register
+ * definitions.
+ */
+#define IMR_ESRAM_FLUSH         BIT(31)
+#define IMR_CPU_SNOOP           BIT(30)         /* Applicable only to write */
+#define IMR_RMU                 BIT(29)
+#define IMR_VC1_SAI_ID3         BIT(15)
+#define IMR_VC1_SAI_ID2         BIT(14)
+#define IMR_VC1_SAI_ID1         BIT(13)
+#define IMR_VC1_SAI_ID0         BIT(12)
+#define IMR_VC0_SAI_ID3         BIT(11)
+#define IMR_VC0_SAI_ID2         BIT(10)
+#define IMR_VC0_SAI_ID1         BIT(9)
+#define IMR_VC0_SAI_ID0         BIT(8)
+#define IMR_CPU_0               BIT(1)          /* SMM mode */
+#define IMR_CPU                 BIT(0)          /* Non SMM mode */
+#define IMR_ACCESS_NONE         0
+/*
+ * Read/Write access-all bits here include some reserved bits
+ * These are the values firmware uses and are accepted by hardware.
+ * The kernel defines read/write access-all in the same way as firmware
+ * in order to have a consistent and crisp definition across firmware,
+ * bootloader and kernel.
+ */
+#define IMR_READ_ACCESS_ALL     0xBFFFFFFF
+#define IMR_WRITE_ACCESS_ALL    0xFFFFFFFF
+/* Number of IMRs provided by Quark X1000 SoC */
+#define QUARK_X1000_IMR_MAX     0x08
+#define QUARK_X1000_IMR_REGBASE 0x40
+/* IMR alignment bits - only bits 31:10 are checked for IMR validity */
+#define IMR_ALIGN               0x400
+#define IMR_MASK                (IMR_ALIGN - 1)
+int imr_add_range(phys_addr_t base, size_t size,
+                  unsigned int rmask, unsigned int wmask, bool lock);
+int imr_remove_range(phys_addr_t base, size_t size);
+#endif /* _IMR_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index f97fbe3abb67..95e11f79f123 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,6 +51,8 @@ extern int devmem_is_allowed(unsigned long pagenr);
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
+extern bool kaslr_enabled;
 static inline phys_addr_t get_max_mapped(void)
 {
        return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 7050d864f520..cf87de3fc390 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
 static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
 {
-        set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
+        set_bit(0, (volatile unsigned long *)&lock->tickets.head);
 }
 #else  /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
 }
 #endif /* CONFIG_PARAVIRT_SPINLOCKS */
+static inline int  __tickets_equal(__ticket_t one, __ticket_t two)
+{
+        return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
+}
+static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
+                                                        __ticket_t head)
+{
+        if (head & TICKET_SLOWPATH_FLAG) {
+                arch_spinlock_t old, new;
+                old.tickets.head = head;
+                new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
+                old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
+                new.tickets.tail = old.tickets.tail;
+                /* try to clear slowpath flag when there are no contenders */
+                cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
+        }
+}
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
-        return lock.tickets.head == lock.tickets.tail;
+        return __tickets_equal(lock.tickets.head, lock.tickets.tail);
 }
 /*
@@ -87,18 +107,21 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
        if (likely(inc.head == inc.tail))
                goto out;
-        inc.tail &= ~TICKET_SLOWPATH_FLAG;
        for (;;) {
                unsigned count = SPIN_THRESHOLD;
                do {
-                        if (READ_ONCE(lock->tickets.head) == inc.tail)
+                        inc.head = READ_ONCE(lock->tickets.head);
-                                goto out;
+                        if (__tickets_equal(inc.head, inc.tail))
+                                goto clear_slowpath;
                        cpu_relax();
                } while (--count);
                __ticket_lock_spinning(lock, inc.tail);
        }
-out:    barrier();      /* make sure nothing creeps before the lock is taken */
+clear_slowpath:
+        __ticket_check_and_clear_slowpath(lock, inc.head);
+out:
+        barrier();      /* make sure nothing creeps before the lock is taken */
 }
 static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -106,56 +129,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
        arch_spinlock_t old, new;
        old.tickets = READ_ONCE(lock->tickets);
-        if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
+        if (!__tickets_equal(old.tickets.head, old.tickets.tail))
                return 0;
        new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
+        new.head_tail &= ~TICKET_SLOWPATH_FLAG;
        /* cmpxchg is a full barrier, so nothing can move before it */
        return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
-static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
-                                            arch_spinlock_t old)
-{
-        arch_spinlock_t new;
-        BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
-        /* Perform the unlock on the "before" copy */
-        old.tickets.head += TICKET_LOCK_INC;
-        /* Clear the slowpath flag */
-        new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
-        /*
-         * If the lock is uncontended, clear the flag - use cmpxchg in
-         * case it changes behind our back though.
-         */
-        if (new.tickets.head != new.tickets.tail ||
-            cmpxchg(&lock->head_tail, old.head_tail,
-                                        new.head_tail) != old.head_tail) {
-                /*
-                 * Lock still has someone queued for it, so wake up an
-                 * appropriate waiter.
-                 */
-                __ticket_unlock_kick(lock, old.tickets.head);
-        }
-}
 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
        if (TICKET_SLOWPATH_FLAG &&
-            static_key_false(&paravirt_ticketlocks_enabled)) {
+                static_key_false(&paravirt_ticketlocks_enabled)) {
-                arch_spinlock_t prev;
+                __ticket_t head;
-                prev = *lock;
+                BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
-                add_smp(&lock->tickets.head, TICKET_LOCK_INC);
-                /* add_smp() is a full mb() */
+                head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
-                if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
+                if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
-                        __ticket_unlock_slowpath(lock, prev);
+                        head &= ~TICKET_SLOWPATH_FLAG;
+                        __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
+                }
        } else
                __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
 }
@@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
        struct __raw_tickets tmp = READ_ONCE(lock->tickets);
-        return tmp.tail != tmp.head;
+        return !__tickets_equal(tmp.tail, tmp.head);
 }
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
        struct __raw_tickets tmp = READ_ONCE(lock->tickets);
-        return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
+        tmp.head &= ~TICKET_SLOWPATH_FLAG;
+        return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
 #define arch_spin_is_contended  arch_spin_is_contended
@@ -191,8 +189,8 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
                 * We need to check "unlocked" in a loop, tmp.head == head
                 * can be false positive because of overflow.
                 */
-                if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) ||
+                if (__tickets_equal(tmp.head, tmp.tail) ||
-                    tmp.head != head)
+                                !__tickets_equal(tmp.head, head))
                        break;
                cpu_relax();
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 225b0988043a..44e6dd7e36a2 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -7,6 +7,7 @@
 #define SETUP_DTB                       2
 #define SETUP_PCI                       3
 #define SETUP_EFI                       4
+#define SETUP_KASLR                     5
 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK        0x07FF
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ae97ed0873c6..3d525c6124f6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -613,6 +613,11 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
 {
        int rc, irq, trigger, polarity;
+        if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+                *irqp = gsi;
+                return 0;
+        }
        rc = acpi_get_override_irq(gsi, &trigger, &polarity);
        if (rc == 0) {
                trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index c6826d1e8082..746e7fd08aad 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -196,6 +196,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
                struct microcode_header_intel mc_header;
                unsigned int mc_size;
+                if (leftover < sizeof(mc_header)) {
+                        pr_err("error! Truncated header in microcode data file\n");
+                        break;
+                }
                if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
                        break;
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
index ec9df6f9cd47..420eb933189c 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -321,7 +321,11 @@ get_matching_model_microcode(int cpu, unsigned long start,
        unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
        int i;
-        while (leftover) {
+        while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+                if (leftover < sizeof(mc_header))
+                        break;
                mc_header = (struct microcode_header_intel *)ucode_ptr;
                mc_size = get_totalsize(mc_header);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 705ef8d48e2d..67b1cbe0093a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -302,6 +302,9 @@ int check_irq_vectors_for_cpu_disable(void)
                irq = __this_cpu_read(vector_irq[vector]);
                if (irq >= 0) {
                        desc = irq_to_desc(irq);
+                        if (!desc)
+                                continue;
                        data = irq_desc_get_irq_data(desc);
                        cpumask_copy(&affinity_new, data->affinity);
                        cpu_clear(this_cpu, affinity_new);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 98f654d466e5..6a1146ea4d4d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -84,7 +84,7 @@ static volatile u32 twobyte_is_boostable[256 / 32] = {
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
        /*      ----------------------------------------------          */
        W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
-        W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
+        W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */
        W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
        W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
        W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 94f643484300..e354cc6446ab 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -609,7 +609,7 @@ static inline void check_zero(void)
        u8 ret;
        u8 old;
-        old = ACCESS_ONCE(zero_stats);
+        old = READ_ONCE(zero_stats);
        if (unlikely(old)) {
                ret = cmpxchg(&zero_stats, old, 0);
                /* This ensures only one fellow resets the stat */
@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
        int cpu;
        u64 start;
        unsigned long flags;
+        __ticket_t head;
        if (in_nmi())
                return;
@@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
         */
        __ticket_enter_slowpath(lock);
+        /* make sure enter_slowpath, which is atomic does not cross the read */
+        smp_mb__after_atomic();
        /*
         * check again make sure it didn't become free while
         * we weren't looking.
         */
-        if (ACCESS_ONCE(lock->tickets.head) == want) {
+        head = READ_ONCE(lock->tickets.head);
+        if (__tickets_equal(head, want)) {
                add_stats(TAKEN_SLOW_PICKUP, 1);
                goto out;
        }
@@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
        add_stats(RELEASED_SLOW, 1);
        for_each_cpu(cpu, &waiting_cpus) {
                const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
-                if (ACCESS_ONCE(w->lock) == lock &&
+                if (READ_ONCE(w->lock) == lock &&
-                    ACCESS_ONCE(w->want) == ticket) {
+                    READ_ONCE(w->want) == ticket) {
                        add_stats(RELEASED_SLOW_KICKED, 1);
                        kvm_kick_cpu(cpu);
                        break;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index d1ac80b72c72..9bbb9b35c144 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -47,21 +47,13 @@ do {							\
 #ifdef CONFIG_RANDOMIZE_BASE
 static unsigned long module_load_offset;
-static int randomize_modules = 1;
 /* Mutex protects the module_load_offset. */
 static DEFINE_MUTEX(module_kaslr_mutex);
-static int __init parse_nokaslr(char *p)
-{
-        randomize_modules = 0;
-        return 0;
-}
-early_param("nokaslr", parse_nokaslr);
 static unsigned long int get_module_load_offset(void)
 {
-        if (randomize_modules) {
+        if (kaslr_enabled) {
                mutex_lock(&module_kaslr_mutex);
                /*
                 * Calculate the module_load_offset the first time this
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0a2421cca01f..98dc9317286e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -122,6 +122,8 @@
 unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
+bool __read_mostly kaslr_enabled = false;
 #ifdef CONFIG_DMI
 RESERVE_BRK(dmi_alloc, 65536);
 #endif
@@ -425,6 +427,11 @@ static void __init reserve_initrd(void)
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
+static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
+{
+        kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
+}
 static void __init parse_setup_data(void)
 {
        struct setup_data *data;
@@ -450,6 +457,9 @@ static void __init parse_setup_data(void)
                case SETUP_EFI:
                        parse_efi_setup(pa_data, data_len);
                        break;
+                case SETUP_KASLR:
+                        parse_kaslr_setup(pa_data, data_len);
+                        break;
                default:
                        break;
                }
@@ -832,10 +842,14 @@ static void __init trim_low_memory_range(void)
 static int
 dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
 {
-        pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
+        if (kaslr_enabled)
-                 "(relocation range: 0x%lx-0x%lx)\n",
+                pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
-                 (unsigned long)&_text - __START_KERNEL, __START_KERNEL,
+                         (unsigned long)&_text - __START_KERNEL,
-                 __START_KERNEL_map, MODULES_VADDR-1);
+                         __START_KERNEL,
+                         __START_KERNEL_map,
+                         MODULES_VADDR-1);
+        else
+                pr_emerg("Kernel Offset: disabled\n");
        return 0;
 }
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 8b96a947021f..81f8adb0679e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -66,27 +66,54 @@
 * Good-instruction tables for 32-bit apps.  This is non-const and volatile
 * to keep gcc from statically optimizing it out, as variable_test_bit makes
 * some versions of gcc to think only *(unsigned long*) is used.
+ *
+ * Opcodes we'll probably never support:
+ * 6c-6f - ins,outs. SEGVs if used in userspace
+ * e4-e7 - in,out imm. SEGVs if used in userspace
+ * ec-ef - in,out acc. SEGVs if used in userspace
+ * cc - int3. SIGTRAP if used in userspace
+ * ce - into. Not used in userspace - no kernel support to make it useful. SEGVs
+ *      (why we support bound (62) then? it's similar, and similarly unused...)
+ * f1 - int1. SIGTRAP if used in userspace
+ * f4 - hlt. SEGVs if used in userspace
+ * fa - cli. SEGVs if used in userspace
+ * fb - sti. SEGVs if used in userspace
+ *
+ * Opcodes which need some work to be supported:
+ * 07,17,1f - pop es/ss/ds
+ *      Normally not used in userspace, but would execute if used.
+ *      Can cause GP or stack exception if tries to load wrong segment descriptor.
+ *      We hesitate to run them under single step since kernel's handling
+ *      of userspace single-stepping (TF flag) is fragile.
+ *      We can easily refuse to support push es/cs/ss/ds (06/0e/16/1e)
+ *      on the same grounds that they are never used.
+ * cd - int N.
+ *      Used by userspace for "int 80" syscall entry. (Other "int N"
+ *      cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
+ *      Not supported since kernel's handling of userspace single-stepping
+ *      (TF flag) is fragile.
+ * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
 */
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static volatile u32 good_insns_32[256 / 32] = {
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
        /*      ----------------------------------------------         */
-        W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
+        W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 00 */
        W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
-        W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
+        W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
-        W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
+        W(0x30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
        W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
        W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
-        W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+        W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
        W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
        W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
        W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
        W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
        W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
        W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
-        W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+        W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
        W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
-        W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
+        W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
        /*      ----------------------------------------------         */
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
@@ -94,27 +121,61 @@ static volatile u32 good_insns_32[256 / 32] = {
 #define good_insns_32   NULL
 #endif
-/* Good-instruction tables for 64-bit apps */
+/* Good-instruction tables for 64-bit apps.
+ *
+ * Genuinely invalid opcodes:
+ * 06,07 - formerly push/pop es
+ * 0e - formerly push cs
+ * 16,17 - formerly push/pop ss
+ * 1e,1f - formerly push/pop ds
+ * 27,2f,37,3f - formerly daa/das/aaa/aas
+ * 60,61 - formerly pusha/popa
+ * 62 - formerly bound. EVEX prefix for AVX512 (not yet supported)
+ * 82 - formerly redundant encoding of Group1
+ * 9a - formerly call seg:ofs
+ * ce - formerly into
+ * d4,d5 - formerly aam/aad
+ * d6 - formerly undocumented salc
+ * ea - formerly jmp seg:ofs
+ *
+ * Opcodes we'll probably never support:
+ * 6c-6f - ins,outs. SEGVs if used in userspace
+ * e4-e7 - in,out imm. SEGVs if used in userspace
+ * ec-ef - in,out acc. SEGVs if used in userspace
+ * cc - int3. SIGTRAP if used in userspace
+ * f1 - int1. SIGTRAP if used in userspace
+ * f4 - hlt. SEGVs if used in userspace
+ * fa - cli. SEGVs if used in userspace
+ * fb - sti. SEGVs if used in userspace
+ *
+ * Opcodes which need some work to be supported:
+ * cd - int N.
+ *      Used by userspace for "int 80" syscall entry. (Other "int N"
+ *      cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
+ *      Not supported since kernel's handling of userspace single-stepping
+ *      (TF flag) is fragile.
+ * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
+ */
 #if defined(CONFIG_X86_64)
 static volatile u32 good_insns_64[256 / 32] = {
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
        /*      ----------------------------------------------         */
-        W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
+        W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* 00 */
        W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
-        W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
+        W(0x20, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 20 */
-        W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
+        W(0x30, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 30 */
-        W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+        W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
        W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
-        W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+        W(0x60, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
        W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
        W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
-        W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+        W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) , /* 90 */
        W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
        W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
-        W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
+        W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
        W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
-        W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
+        W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) | /* e0 */
-        W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
+        W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
        /*      ----------------------------------------------         */
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
@@ -122,49 +183,55 @@ static volatile u32 good_insns_64[256 / 32] = {
 #define good_insns_64   NULL
 #endif
-/* Using this for both 64-bit and 32-bit apps */
+/* Using this for both 64-bit and 32-bit apps.
+ * Opcodes we don't support:
+ * 0f 00 - SLDT/STR/LLDT/LTR/VERR/VERW/-/- group. System insns
+ * 0f 01 - SGDT/SIDT/LGDT/LIDT/SMSW/-/LMSW/INVLPG group.
+ *      Also encodes tons of other system insns if mod=11.
+ *      Some are in fact non-system: xend, xtest, rdtscp, maybe more
+ * 0f 05 - syscall
+ * 0f 06 - clts (CPL0 insn)
+ * 0f 07 - sysret
+ * 0f 08 - invd (CPL0 insn)
+ * 0f 09 - wbinvd (CPL0 insn)
+ * 0f 0b - ud2
+ * 0f 30 - wrmsr (CPL0 insn) (then why rdmsr is allowed, it's also CPL0 insn?)
+ * 0f 34 - sysenter
+ * 0f 35 - sysexit
+ * 0f 37 - getsec
+ * 0f 78 - vmread (Intel VMX. CPL0 insn)
+ * 0f 79 - vmwrite (Intel VMX. CPL0 insn)
+ *      Note: with prefixes, these two opcodes are
+ *      extrq/insertq/AVX512 convert vector ops.
+ * 0f ae - group15: [f]xsave,[f]xrstor,[v]{ld,st}mxcsr,clflush[opt],
+ *      {rd,wr}{fs,gs}base,{s,l,m}fence.
+ *      Why? They are all user-executable.
+ */
 static volatile u32 good_2byte_insns[256 / 32] = {
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
        /*      ----------------------------------------------         */
-        W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
+        W(0x00, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1) | /* 00 */
-        W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
+        W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
-        W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+        W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
-        W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+        W(0x30, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
        W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
        W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
        W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
-        W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+        W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* 70 */
        W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
        W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
-        W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
+        W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
-        W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+        W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
        W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
-        W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+        W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
        W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
-        W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
+        W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)   /* f0 */
        /*      ----------------------------------------------         */
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
 #undef W
 /*
- * opcodes we'll probably never support:
- *
- *  6c-6d, e4-e5, ec-ed - in
- *  6e-6f, e6-e7, ee-ef - out
- *  cc, cd - int3, int
- *  cf - iret
- *  d6 - illegal instruction
- *  f1 - int1/icebp
- *  f4 - hlt
- *  fa, fb - cli, sti
- *  0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
- *
- * invalid opcodes in 64-bit mode:
- *
- *  06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
- *  63 - we support this opcode in x86_64 but not in i386.
- *
 * opcodes we may need to refine support for:
 *
 *  0f - 2-byte instructions: For many of these instructions, the validity
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 553c094b9cd7..a110efca6d06 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -238,6 +238,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
        }
 }
+static const char *page_size_string(struct map_range *mr)
+{
+        static const char str_1g[] = "1G";
+        static const char str_2m[] = "2M";
+        static const char str_4m[] = "4M";
+        static const char str_4k[] = "4k";
+        if (mr->page_size_mask & (1<<PG_LEVEL_1G))
+                return str_1g;
+        /*
+         * 32-bit without PAE has a 4M large page size.
+         * PG_LEVEL_2M is misnamed, but we can at least
+         * print out the right size in the string.
+         */
+        if (IS_ENABLED(CONFIG_X86_32) &&
+            !IS_ENABLED(CONFIG_X86_PAE) &&
+            mr->page_size_mask & (1<<PG_LEVEL_2M))
+                return str_4m;
+        if (mr->page_size_mask & (1<<PG_LEVEL_2M))
+                return str_2m;
+        return str_4k;
+}
 static int __meminit split_mem_range(struct map_range *mr, int nr_range,
                                     unsigned long start,
                                     unsigned long end)
@@ -333,8 +358,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
        for (i = 0; i < nr_range; i++)
                printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
                                mr[i].start, mr[i].end - 1,
-                        (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
+                                page_size_string(&mr[i]));
-                         (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
        return nr_range;
 }
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 919b91205cd4..df4552bd239e 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -35,12 +35,12 @@ struct va_alignment __read_mostly va_align = {
        .flags = -1,
 };
-static unsigned int stack_maxrandom_size(void)
+static unsigned long stack_maxrandom_size(void)
 {
-        unsigned int max = 0;
+        unsigned long max = 0;
        if ((current->flags & PF_RANDOMIZE) &&
                !(current->personality & ADDR_NO_RANDOMIZE)) {
-                max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT;
+                max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
        }
        return max;
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 85afde1fa3e5..a62e0be3a2f1 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -5,6 +5,7 @@ obj-y	+= geode/
 obj-y   += goldfish/
 obj-y   += iris/
 obj-y   += intel-mid/
+obj-y   += intel-quark/
 obj-y   += olpc/
 obj-y   += scx200/
 obj-y   += sfi/
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 5fcda7272550..86d0f9e08dd9 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -91,167 +91,6 @@ ENTRY(efi_call)
        ret
 ENDPROC(efi_call)
-#ifdef CONFIG_EFI_MIXED
-/*
- * We run this function from the 1:1 mapping.
- *
- * This function must be invoked with a 1:1 mapped stack.
- */
-ENTRY(__efi64_thunk)
-        movl    %ds, %eax
-        push    %rax
-        movl    %es, %eax
-        push    %rax
-        movl    %ss, %eax
-        push    %rax
-        subq    $32, %rsp
-        movl    %esi, 0x0(%rsp)
-        movl    %edx, 0x4(%rsp)
-        movl    %ecx, 0x8(%rsp)
-        movq    %r8, %rsi
-        movl    %esi, 0xc(%rsp)
-        movq    %r9, %rsi
-        movl    %esi,  0x10(%rsp)
-        sgdt    save_gdt(%rip)
-        leaq    1f(%rip), %rbx
-        movq    %rbx, func_rt_ptr(%rip)
-        /* Switch to gdt with 32-bit segments */
-        movl    64(%rsp), %eax
-        lgdt    (%rax)
-        leaq    efi_enter32(%rip), %rax
-        pushq   $__KERNEL_CS
-        pushq   %rax
-        lretq
-1:      addq    $32, %rsp
-        lgdt    save_gdt(%rip)
-        pop     %rbx
-        movl    %ebx, %ss
-        pop     %rbx
-        movl    %ebx, %es
-        pop     %rbx
-        movl    %ebx, %ds
-        /*
-         * Convert 32-bit status code into 64-bit.
-         */
-        test    %rax, %rax
-        jz      1f
-        movl    %eax, %ecx
-        andl    $0x0fffffff, %ecx
-        andl    $0xf0000000, %eax
-        shl     $32, %rax
-        or      %rcx, %rax
-1:
-        ret
-ENDPROC(__efi64_thunk)
-ENTRY(efi_exit32)
-        movq    func_rt_ptr(%rip), %rax
-        push    %rax
-        mov     %rdi, %rax
-        ret
-ENDPROC(efi_exit32)
-        .code32
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-ENTRY(efi_enter32)
-        movl    $__KERNEL_DS, %eax
-        movl    %eax, %ds
-        movl    %eax, %es
-        movl    %eax, %ss
-        /* Reload pgtables */
-        movl    %cr3, %eax
-        movl    %eax, %cr3
-        /* Disable paging */
-        movl    %cr0, %eax
-        btrl    $X86_CR0_PG_BIT, %eax
-        movl    %eax, %cr0
-        /* Disable long mode via EFER */
-        movl    $MSR_EFER, %ecx
-        rdmsr
-        btrl    $_EFER_LME, %eax
-        wrmsr
-        call    *%edi
-        /* We must preserve return value */
-        movl    %eax, %edi
-        /*
-         * Some firmware will return with interrupts enabled. Be sure to
-         * disable them before we switch GDTs.
-         */
-        cli
-        movl    68(%esp), %eax
-        movl    %eax, 2(%eax)
-        lgdtl   (%eax)
-        movl    %cr4, %eax
-        btsl    $(X86_CR4_PAE_BIT), %eax
-        movl    %eax, %cr4
-        movl    %cr3, %eax
-        movl    %eax, %cr3
-        movl    $MSR_EFER, %ecx
-        rdmsr
-        btsl    $_EFER_LME, %eax
-        wrmsr
-        xorl    %eax, %eax
-        lldt    %ax
-        movl    72(%esp), %eax
-        pushl   $__KERNEL_CS
-        pushl   %eax
-        /* Enable paging */
-        movl    %cr0, %eax
-        btsl    $X86_CR0_PG_BIT, %eax
-        movl    %eax, %cr0
-        lret
-ENDPROC(efi_enter32)
-        .data
-        .balign 8
-        .global efi32_boot_gdt
-efi32_boot_gdt: .word   0
-                .quad   0
-save_gdt:       .word   0
-                .quad   0
-func_rt_ptr:    .quad   0
-        .global efi_gdt64
-efi_gdt64:
-        .word   efi_gdt64_end - efi_gdt64
-        .long   0                       /* Filled out by user */
-        .word   0
-        .quad   0x0000000000000000      /* NULL descriptor */
-        .quad   0x00af9a000000ffff      /* __KERNEL_CS */
-        .quad   0x00cf92000000ffff      /* __KERNEL_DS */
-        .quad   0x0080890000000000      /* TS descriptor */
-        .quad   0x0000000000000000      /* TS continued */
-efi_gdt64_end:
-#endif /* CONFIG_EFI_MIXED */
        .data
 ENTRY(efi_scratch)
        .fill 3,8,0
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 8806fa73e6e6..ff85d28c50f2 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -1,9 +1,26 @@
 /*
 * Copyright (C) 2014 Intel Corporation; author Matt Fleming
+ *
+ * Support for invoking 32-bit EFI runtime services from a 64-bit
+ * kernel.
+ *
+ * The below thunking functions are only used after ExitBootServices()
+ * has been called. This simplifies things considerably as compared with
+ * the early EFI thunking because we can leave all the kernel state
+ * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime
+ * services from __KERNEL32_CS. This means we can continue to service
+ * interrupts across an EFI mixed mode call.
+ *
+ * We do however, need to handle the fact that we're running in a full
+ * 64-bit virtual address space. Things like the stack and instruction
+ * addresses need to be accessible by the 32-bit firmware, so we rely on
+ * using the identity mappings in the EFI page table to access the stack
+ * and kernel text (see efi_setup_page_tables()).
 */
 #include <linux/linkage.h>
 #include <asm/page_types.h>
+#include <asm/segment.h>
        .text
        .code64
@@ -33,14 +50,6 @@ ENTRY(efi64_thunk)
        leaq    efi_exit32(%rip), %rbx
        subq    %rax, %rbx
        movl    %ebx, 8(%rsp)
-        leaq    efi_gdt64(%rip), %rbx
-        subq    %rax, %rbx
-        movl    %ebx, 2(%ebx)
-        movl    %ebx, 4(%rsp)
-        leaq    efi_gdt32(%rip), %rbx
-        subq    %rax, %rbx
-        movl    %ebx, 2(%ebx)
-        movl    %ebx, (%rsp)
        leaq    __efi64_thunk(%rip), %rbx
        subq    %rax, %rbx
@@ -52,14 +61,92 @@ ENTRY(efi64_thunk)
        retq
 ENDPROC(efi64_thunk)
-        .data
+/*
-efi_gdt32:
+ * We run this function from the 1:1 mapping.
-        .word   efi_gdt32_end - efi_gdt32
+ *
-        .long   0                       /* Filled out above */
+ * This function must be invoked with a 1:1 mapped stack.
-        .word   0
+ */
-        .quad   0x0000000000000000      /* NULL descriptor */
+ENTRY(__efi64_thunk)
-        .quad   0x00cf9a000000ffff      /* __KERNEL_CS */
+        movl    %ds, %eax
-        .quad   0x00cf93000000ffff      /* __KERNEL_DS */
+        push    %rax
-efi_gdt32_end:
+        movl    %es, %eax
+        push    %rax
+        movl    %ss, %eax
+        push    %rax
+        subq    $32, %rsp
+        movl    %esi, 0x0(%rsp)
+        movl    %edx, 0x4(%rsp)
+        movl    %ecx, 0x8(%rsp)
+        movq    %r8, %rsi
+        movl    %esi, 0xc(%rsp)
+        movq    %r9, %rsi
+        movl    %esi,  0x10(%rsp)
+        leaq    1f(%rip), %rbx
+        movq    %rbx, func_rt_ptr(%rip)
+        /* Switch to 32-bit descriptor */
+        pushq   $__KERNEL32_CS
+        leaq    efi_enter32(%rip), %rax
+        pushq   %rax
+        lretq
+1:      addq    $32, %rsp
+        pop     %rbx
+        movl    %ebx, %ss
+        pop     %rbx
+        movl    %ebx, %es
+        pop     %rbx
+        movl    %ebx, %ds
+        /*
+         * Convert 32-bit status code into 64-bit.
+         */
+        test    %rax, %rax
+        jz      1f
+        movl    %eax, %ecx
+        andl    $0x0fffffff, %ecx
+        andl    $0xf0000000, %eax
+        shl     $32, %rax
+        or      %rcx, %rax
+1:
+        ret
+ENDPROC(__efi64_thunk)
+ENTRY(efi_exit32)
+        movq    func_rt_ptr(%rip), %rax
+        push    %rax
+        mov     %rdi, %rax
+        ret
+ENDPROC(efi_exit32)
+        .code32
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+ENTRY(efi_enter32)
+        movl    $__KERNEL_DS, %eax
+        movl    %eax, %ds
+        movl    %eax, %es
+        movl    %eax, %ss
+        call    *%edi
+        /* We must preserve return value */
+        movl    %eax, %edi
+        movl    72(%esp), %eax
+        pushl   $__KERNEL_CS
+        pushl   %eax
+        lret
+ENDPROC(efi_enter32)
+        .data
+        .balign 8
+func_rt_ptr:            .quad 0
 efi_saved_sp:           .quad 0
diff --git a/arch/x86/platform/intel-quark/Makefile b/arch/x86/platform/intel-quark/Makefile
new file mode 100644
index 000000000000..9cc57ed36022
--- /dev/null
+++ b/arch/x86/platform/intel-quark/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IMR) += imr.o
+obj-$(CONFIG_DEBUG_IMR_SELFTEST) += imr_selftest.o
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
new file mode 100644
index 000000000000..0ee619f9fcb7
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -0,0 +1,661 @@
+/**
+ * imr.c
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * IMR registers define an isolated region of memory that can
+ * be masked to prohibit certain system agents from accessing memory.
+ * When a device behind a masked port performs an access - snooped or
+ * not, an IMR may optionally prevent that transaction from changing
+ * the state of memory or from getting correct data in response to the
+ * operation.
+ *
+ * Write data will be dropped and reads will return 0xFFFFFFFF, the
+ * system will reset and system BIOS will print out an error message to
+ * inform the user that an IMR has been violated.
+ *
+ * This code is based on the Linux MTRR code and reference code from
+ * Intel's Quark BSP EFI, Linux and grub code.
+ *
+ * See quark-x1000-datasheet.pdf for register definitions.
+ * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/quark-x1000-datasheet.pdf
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <asm-generic/sections.h>
+#include <asm/cpu_device_id.h>
+#include <asm/imr.h>
+#include <asm/iosf_mbi.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/types.h>
+struct imr_device {
+        struct dentry   *file;
+        bool            init;
+        struct mutex    lock;
+        int             max_imr;
+        int             reg_base;
+};
+static struct imr_device imr_dev;
+/*
+ * IMR read/write mask control registers.
+ * See quark-x1000-datasheet.pdf sections 12.7.4.5 and 12.7.4.6 for
+ * bit definitions.
+ *
+ * addr_hi
+ * 31           Lock bit
+ * 30:24        Reserved
+ * 23:2         1 KiB aligned lo address
+ * 1:0          Reserved
+ *
+ * addr_hi
+ * 31:24        Reserved
+ * 23:2         1 KiB aligned hi address
+ * 1:0          Reserved
+ */
+#define IMR_LOCK        BIT(31)
+struct imr_regs {
+        u32 addr_lo;
+        u32 addr_hi;
+        u32 rmask;
+        u32 wmask;
+};
+#define IMR_NUM_REGS    (sizeof(struct imr_regs)/sizeof(u32))
+#define IMR_SHIFT       8
+#define imr_to_phys(x)  ((x) << IMR_SHIFT)
+#define phys_to_imr(x)  ((x) >> IMR_SHIFT)
+/**
+ * imr_is_enabled - true if an IMR is enabled false otherwise.
+ *
+ * Determines if an IMR is enabled based on address range and read/write
+ * mask. An IMR set with an address range set to zero and a read/write
+ * access mask set to all is considered to be disabled. An IMR in any
+ * other state - for example set to zero but without read/write access
+ * all is considered to be enabled. This definition of disabled is how
+ * firmware switches off an IMR and is maintained in kernel for
+ * consistency.
+ *
+ * @imr:        pointer to IMR descriptor.
+ * @return:     true if IMR enabled false if disabled.
+ */
+static inline int imr_is_enabled(struct imr_regs *imr)
+{
+        return !(imr->rmask == IMR_READ_ACCESS_ALL &&
+                 imr->wmask == IMR_WRITE_ACCESS_ALL &&
+                 imr_to_phys(imr->addr_lo) == 0 &&
+                 imr_to_phys(imr->addr_hi) == 0);
+}
+/**
+ * imr_read - read an IMR at a given index.
+ *
+ * Requires caller to hold imr mutex.
+ *
+ * @idev:       pointer to imr_device structure.
+ * @imr_id:     IMR entry to read.
+ * @imr:        IMR structure representing address and access masks.
+ * @return:     0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
+{
+        u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
+        int ret;
+        ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+                                reg++, &imr->addr_lo);
+        if (ret)
+                return ret;
+        ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+                                reg++, &imr->addr_hi);
+        if (ret)
+                return ret;
+        ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+                                reg++, &imr->rmask);
+        if (ret)
+                return ret;
+        return iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+                                reg++, &imr->wmask);
+}
+/**
+ * imr_write - write an IMR at a given index.
+ *
+ * Requires caller to hold imr mutex.
+ * Note lock bits need to be written independently of address bits.
+ *
+ * @idev:       pointer to imr_device structure.
+ * @imr_id:     IMR entry to write.
+ * @imr:        IMR structure representing address and access masks.
+ * @lock:       indicates if the IMR lock bit should be applied.
+ * @return:     0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_write(struct imr_device *idev, u32 imr_id,
+                     struct imr_regs *imr, bool lock)
+{
+        unsigned long flags;
+        u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
+        int ret;
+        local_irq_save(flags);
+        ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, reg++,
+                                imr->addr_lo);
+        if (ret)
+                goto failed;
+        ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+                                reg++, imr->addr_hi);
+        if (ret)
+                goto failed;
+        ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+                                reg++, imr->rmask);
+        if (ret)
+                goto failed;
+        ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+                                reg++, imr->wmask);
+        if (ret)
+                goto failed;
+        /* Lock bit must be set separately to addr_lo address bits. */
+        if (lock) {
+                imr->addr_lo |= IMR_LOCK;
+                ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+                                        reg - IMR_NUM_REGS, imr->addr_lo);
+                if (ret)
+                        goto failed;
+        }
+        local_irq_restore(flags);
+        return 0;
+failed:
+        /*
+         * If writing to the IOSF failed then we're in an unknown state,
+         * likely a very bad state. An IMR in an invalid state will almost
+         * certainly lead to a memory access violation.
+         */
+        local_irq_restore(flags);
+        WARN(ret, "IOSF-MBI write fail range 0x%08x-0x%08x unreliable\n",
+             imr_to_phys(imr->addr_lo), imr_to_phys(imr->addr_hi) + IMR_MASK);
+        return ret;
+}
+/**
+ * imr_dbgfs_state_show - print state of IMR registers.
+ *
+ * @s:          pointer to seq_file for output.
+ * @unused:     unused parameter.
+ * @return:     0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
+{
+        phys_addr_t base;
+        phys_addr_t end;
+        int i;
+        struct imr_device *idev = s->private;
+        struct imr_regs imr;
+        size_t size;
+        int ret = -ENODEV;
+        mutex_lock(&idev->lock);
+        for (i = 0; i < idev->max_imr; i++) {
+                ret = imr_read(idev, i, &imr);
+                if (ret)
+                        break;
+                /*
+                 * Remember to add IMR_ALIGN bytes to size to indicate the
+                 * inherent IMR_ALIGN size bytes contained in the masked away
+                 * lower ten bits.
+                 */
+                if (imr_is_enabled(&imr)) {
+                        base = imr_to_phys(imr.addr_lo);
+                        end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+                } else {
+                        base = 0;
+                        end = 0;
+                }
+                size = end - base;
+                seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
+                           "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
+                           &base, &end, size, imr.rmask, imr.wmask,
+                           imr_is_enabled(&imr) ? "enabled " : "disabled",
+                           imr.addr_lo & IMR_LOCK ? "locked" : "unlocked");
+        }
+        mutex_unlock(&idev->lock);
+        return ret;
+}
+/**
+ * imr_state_open - debugfs open callback.
+ *
+ * @inode:      pointer to struct inode.
+ * @file:       pointer to struct file.
+ * @return:     result of single open.
+ */
+static int imr_state_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, imr_dbgfs_state_show, inode->i_private);
+}
+static const struct file_operations imr_state_ops = {
+        .open           = imr_state_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+/**
+ * imr_debugfs_register - register debugfs hooks.
+ *
+ * @idev:       pointer to imr_device structure.
+ * @return:     0 on success - errno on failure.
+ */
+static int imr_debugfs_register(struct imr_device *idev)
+{
+        idev->file = debugfs_create_file("imr_state", S_IFREG | S_IRUGO, NULL,
+                                         idev, &imr_state_ops);
+        return PTR_ERR_OR_ZERO(idev->file);
+}
+/**
+ * imr_debugfs_unregister - unregister debugfs hooks.
+ *
+ * @idev:       pointer to imr_device structure.
+ * @return:
+ */
+static void imr_debugfs_unregister(struct imr_device *idev)
+{
+        debugfs_remove(idev->file);
+}
+/**
+ * imr_check_params - check passed address range IMR alignment and non-zero size
+ *
+ * @base:       base address of intended IMR.
+ * @size:       size of intended IMR.
+ * @return:     zero on valid range -EINVAL on unaligned base/size.
+ */
+static int imr_check_params(phys_addr_t base, size_t size)
+{
+        if ((base & IMR_MASK) || (size & IMR_MASK)) {
+                pr_err("base %pa size 0x%08zx must align to 1KiB\n",
+                        &base, size);
+                return -EINVAL;
+        }
+        if (size == 0)
+                return -EINVAL;
+        return 0;
+}
+/**
+ * imr_raw_size - account for the IMR_ALIGN bytes that addr_hi appends.
+ *
+ * IMR addr_hi has a built in offset of plus IMR_ALIGN (0x400) bytes from the
+ * value in the register. We need to subtract IMR_ALIGN bytes from input sizes
+ * as a result.
+ *
+ * @size:       input size bytes.
+ * @return:     reduced size.
+ */
+static inline size_t imr_raw_size(size_t size)
+{
+        return size - IMR_ALIGN;
+}
+/**
+ * imr_address_overlap - detects an address overlap.
+ *
+ * @addr:       address to check against an existing IMR.
+ * @imr:        imr being checked.
+ * @return:     true for overlap false for no overlap.
+ */
+static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr)
+{
+        return addr >= imr_to_phys(imr->addr_lo) && addr <= imr_to_phys(imr->addr_hi);
+}
+/**
+ * imr_add_range - add an Isolated Memory Region.
+ *
+ * @base:       physical base address of region aligned to 1KiB.
+ * @size:       physical size of region in bytes must be aligned to 1KiB.
+ * @read_mask:  read access mask.
+ * @write_mask: write access mask.
+ * @lock:       indicates whether or not to permanently lock this region.
+ * @return:     zero on success or negative value indicating error.
+ */
+int imr_add_range(phys_addr_t base, size_t size,
+                  unsigned int rmask, unsigned int wmask, bool lock)
+{
+        phys_addr_t end;
+        unsigned int i;
+        struct imr_device *idev = &imr_dev;
+        struct imr_regs imr;
+        size_t raw_size;
+        int reg;
+        int ret;
+        if (WARN_ONCE(idev->init == false, "driver not initialized"))
+                return -ENODEV;
+        ret = imr_check_params(base, size);
+        if (ret)
+                return ret;
+        /* Tweak the size value. */
+        raw_size = imr_raw_size(size);
+        end = base + raw_size;
+        /*
+         * Check for reserved IMR value common to firmware, kernel and grub
+         * indicating a disabled IMR.
+         */
+        imr.addr_lo = phys_to_imr(base);
+        imr.addr_hi = phys_to_imr(end);
+        imr.rmask = rmask;
+        imr.wmask = wmask;
+        if (!imr_is_enabled(&imr))
+                return -ENOTSUPP;
+        mutex_lock(&idev->lock);
+        /*
+         * Find a free IMR while checking for an existing overlapping range.
+         * Note there's no restriction in silicon to prevent IMR overlaps.
+         * For the sake of simplicity and ease in defining/debugging an IMR
+         * memory map we exclude IMR overlaps.
+         */
+        reg = -1;
+        for (i = 0; i < idev->max_imr; i++) {
+                ret = imr_read(idev, i, &imr);
+                if (ret)
+                        goto failed;
+                /* Find overlap @ base or end of requested range. */
+                ret = -EINVAL;
+                if (imr_is_enabled(&imr)) {
+                        if (imr_address_overlap(base, &imr))
+                                goto failed;
+                        if (imr_address_overlap(end, &imr))
+                                goto failed;
+                } else {
+                        reg = i;
+                }
+        }
+        /* Error out if we have no free IMR entries. */
+        if (reg == -1) {
+                ret = -ENOMEM;
+                goto failed;
+        }
+        pr_debug("add %d phys %pa-%pa size %zx mask 0x%08x wmask 0x%08x\n",
+                 reg, &base, &end, raw_size, rmask, wmask);
+        /* Enable IMR at specified range and access mask. */
+        imr.addr_lo = phys_to_imr(base);
+        imr.addr_hi = phys_to_imr(end);
+        imr.rmask = rmask;
+        imr.wmask = wmask;
+        ret = imr_write(idev, reg, &imr, lock);
+        if (ret < 0) {
+                /*
+                 * In the highly unlikely event iosf_mbi_write failed
+                 * attempt to rollback the IMR setup skipping the trapping
+                 * of further IOSF write failures.
+                 */
+                imr.addr_lo = 0;
+                imr.addr_hi = 0;
+                imr.rmask = IMR_READ_ACCESS_ALL;
+                imr.wmask = IMR_WRITE_ACCESS_ALL;
+                imr_write(idev, reg, &imr, false);
+        }
+failed:
+        mutex_unlock(&idev->lock);
+        return ret;
+}
+EXPORT_SYMBOL_GPL(imr_add_range);
+/**
+ * __imr_remove_range - delete an Isolated Memory Region.
+ *
+ * This function allows you to delete an IMR by its index specified by reg or
+ * by address range specified by base and size respectively. If you specify an
+ * index on its own the base and size parameters are ignored.
+ * imr_remove_range(0, base, size); delete IMR at index 0 base/size ignored.
+ * imr_remove_range(-1, base, size); delete IMR from base to base+size.
+ *
+ * @reg:        imr index to remove.
+ * @base:       physical base address of region aligned to 1 KiB.
+ * @size:       physical size of region in bytes aligned to 1 KiB.
+ * @return:     -EINVAL on invalid range or out or range id
+ *              -ENODEV if reg is valid but no IMR exists or is locked
+ *              0 on success.
+ */
+static int __imr_remove_range(int reg, phys_addr_t base, size_t size)
+{
+        phys_addr_t end;
+        bool found = false;
+        unsigned int i;
+        struct imr_device *idev = &imr_dev;
+        struct imr_regs imr;
+        size_t raw_size;
+        int ret = 0;
+        if (WARN_ONCE(idev->init == false, "driver not initialized"))
+                return -ENODEV;
+        /*
+         * Validate address range if deleting by address, else we are
+         * deleting by index where base and size will be ignored.
+         */
+        if (reg == -1) {
+                ret = imr_check_params(base, size);
+                if (ret)
+                        return ret;
+        }
+        /* Tweak the size value. */
+        raw_size = imr_raw_size(size);
+        end = base + raw_size;
+        mutex_lock(&idev->lock);
+        if (reg >= 0) {
+                /* If a specific IMR is given try to use it. */
+                ret = imr_read(idev, reg, &imr);
+                if (ret)
+                        goto failed;
+                if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK) {
+                        ret = -ENODEV;
+                        goto failed;
+                }
+                found = true;
+        } else {
+                /* Search for match based on address range. */
+                for (i = 0; i < idev->max_imr; i++) {
+                        ret = imr_read(idev, i, &imr);
+                        if (ret)
+                                goto failed;
+                        if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK)
+                                continue;
+                        if ((imr_to_phys(imr.addr_lo) == base) &&
+                            (imr_to_phys(imr.addr_hi) == end)) {
+                                found = true;
+                                reg = i;
+                                break;
+                        }
+                }
+        }
+        if (!found) {
+                ret = -ENODEV;
+                goto failed;
+        }
+        pr_debug("remove %d phys %pa-%pa size %zx\n", reg, &base, &end, raw_size);
+        /* Tear down the IMR. */
+        imr.addr_lo = 0;
+        imr.addr_hi = 0;
+        imr.rmask = IMR_READ_ACCESS_ALL;
+        imr.wmask = IMR_WRITE_ACCESS_ALL;
+        ret = imr_write(idev, reg, &imr, false);
+failed:
+        mutex_unlock(&idev->lock);
+        return ret;
+}
+/**
+ * imr_remove_range - delete an Isolated Memory Region by address
+ *
+ * This function allows you to delete an IMR by an address range specified
+ * by base and size respectively.
+ * imr_remove_range(base, size); delete IMR from base to base+size.
+ *
+ * @base:       physical base address of region aligned to 1 KiB.
+ * @size:       physical size of region in bytes aligned to 1 KiB.
+ * @return:     -EINVAL on invalid range or out or range id
+ *              -ENODEV if reg is valid but no IMR exists or is locked
+ *              0 on success.
+ */
+int imr_remove_range(phys_addr_t base, size_t size)
+{
+        return __imr_remove_range(-1, base, size);
+}
+EXPORT_SYMBOL_GPL(imr_remove_range);
+/**
+ * imr_clear - delete an Isolated Memory Region by index
+ *
+ * This function allows you to delete an IMR by an address range specified
+ * by the index of the IMR. Useful for initial sanitization of the IMR
+ * address map.
+ * imr_ge(base, size); delete IMR from base to base+size.
+ *
+ * @reg:        imr index to remove.
+ * @return:     -EINVAL on invalid range or out or range id
+ *              -ENODEV if reg is valid but no IMR exists or is locked
+ *              0 on success.
+ */
+static inline int imr_clear(int reg)
+{
+        return __imr_remove_range(reg, 0, 0);
+}
+/**
+ * imr_fixup_memmap - Tear down IMRs used during bootup.
+ *
+ * BIOS and Grub both setup IMRs around compressed kernel, initrd memory
+ * that need to be removed before the kernel hands out one of the IMR
+ * encased addresses to a downstream DMA agent such as the SD or Ethernet.
+ * IMRs on Galileo are setup to immediately reset the system on violation.
+ * As a result if you're running a root filesystem from SD - you'll need
+ * the boot-time IMRs torn down or you'll find seemingly random resets when
+ * using your filesystem.
+ *
+ * @idev:       pointer to imr_device structure.
+ * @return:
+ */
+static void __init imr_fixup_memmap(struct imr_device *idev)
+{
+        phys_addr_t base = virt_to_phys(&_text);
+        size_t size = virt_to_phys(&__end_rodata) - base;
+        int i;
+        int ret;
+        /* Tear down all existing unlocked IMRs. */
+        for (i = 0; i < idev->max_imr; i++)
+                imr_clear(i);
+        /*
+         * Setup a locked IMR around the physical extent of the kernel
+         * from the beginning of the .text secton to the end of the
+         * .rodata section as one physically contiguous block.
+         */
+        ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
+        if (ret < 0) {
+                pr_err("unable to setup IMR for kernel: (%p - %p)\n",
+                        &_text, &__end_rodata);
+        } else {
+                pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
+                        size / 1024, &_text, &__end_rodata);
+        }
+}
+static const struct x86_cpu_id imr_ids[] __initconst = {
+        { X86_VENDOR_INTEL, 5, 9 },     /* Intel Quark SoC X1000. */
+        {}
+};
+MODULE_DEVICE_TABLE(x86cpu, imr_ids);
+/**
+ * imr_init - entry point for IMR driver.
+ *
+ * return: -ENODEV for no IMR support 0 if good to go.
+ */
+static int __init imr_init(void)
+{
+        struct imr_device *idev = &imr_dev;
+        int ret;
+        if (!x86_match_cpu(imr_ids) || !iosf_mbi_available())
+                return -ENODEV;
+        idev->max_imr = QUARK_X1000_IMR_MAX;
+        idev->reg_base = QUARK_X1000_IMR_REGBASE;
+        idev->init = true;
+        mutex_init(&idev->lock);
+        ret = imr_debugfs_register(idev);
+        if (ret != 0)
+                pr_warn("debugfs register failed!\n");
+        imr_fixup_memmap(idev);
+        return 0;
+}
+/**
+ * imr_exit - exit point for IMR code.
+ *
+ * Deregisters debugfs, leave IMR state as-is.
+ *
+ * return:
+ */
+static void __exit imr_exit(void)
+{
+        imr_debugfs_unregister(&imr_dev);
+}
+module_init(imr_init);
+module_exit(imr_exit);
+MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
+MODULE_DESCRIPTION("Intel Isolated Memory Region driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
new file mode 100644
index 000000000000..c9a0838890e2
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -0,0 +1,129 @@
+/**
+ * imr_selftest.c
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * IMR self test. The purpose of this module is to run a set of tests on the
+ * IMR API to validate it's sanity. We check for overlapping, reserved
+ * addresses and setup/teardown sanity.
+ *
+ */
+#include <asm-generic/sections.h>
+#include <asm/imr.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#define SELFTEST KBUILD_MODNAME ": "
+/**
+ * imr_self_test_result - Print result string for self test.
+ *
+ * @res:        result code - true if test passed false otherwise.
+ * @fmt:        format string.
+ * ...          variadic argument list.
+ */
+static void __init imr_self_test_result(int res, const char *fmt, ...)
+{
+        va_list vlist;
+        /* Print pass/fail. */
+        if (res)
+                pr_info(SELFTEST "pass ");
+        else
+                pr_info(SELFTEST "fail ");
+        /* Print variable string. */
+        va_start(vlist, fmt);
+        vprintk(fmt, vlist);
+        va_end(vlist);
+        /* Optional warning. */
+        WARN(res == 0, "test failed");
+}
+#undef SELFTEST
+/**
+ * imr_self_test
+ *
+ * Verify IMR self_test with some simple tests to verify overlap,
+ * zero sized allocations and 1 KiB sized areas.
+ *
+ */
+static void __init imr_self_test(void)
+{
+        phys_addr_t base  = virt_to_phys(&_text);
+        size_t size = virt_to_phys(&__end_rodata) - base;
+        const char *fmt_over = "overlapped IMR @ (0x%08lx - 0x%08lx)\n";
+        int ret;
+        /* Test zero zero. */
+        ret = imr_add_range(0, 0, 0, 0, false);
+        imr_self_test_result(ret < 0, "zero sized IMR\n");
+        /* Test exact overlap. */
+        ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+        imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+        /* Test overlap with base inside of existing. */
+        base += size - IMR_ALIGN;
+        ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+        imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+        /* Test overlap with end inside of existing. */
+        base -= size + IMR_ALIGN * 2;
+        ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+        imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+        /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */
+        ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL,
+                            IMR_WRITE_ACCESS_ALL, false);
+        imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n");
+        /* Test that a 1 KiB IMR @ zero with CPU only will work. */
+        ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false);
+        imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n");
+        if (ret >= 0) {
+                ret = imr_remove_range(0, IMR_ALIGN);
+                imr_self_test_result(ret == 0, "teardown - cpu-access\n");
+        }
+        /* Test 2 KiB works. */
+        size = IMR_ALIGN * 2;
+        ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL,
+                            IMR_WRITE_ACCESS_ALL, false);
+        imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n");
+        if (ret >= 0) {
+                ret = imr_remove_range(0, size);
+                imr_self_test_result(ret == 0, "teardown 2KiB\n");
+        }
+}
+/**
+ * imr_self_test_init - entry point for IMR driver.
+ *
+ * return: -ENODEV for no IMR support 0 if good to go.
+ */
+static int __init imr_self_test_init(void)
+{
+        imr_self_test();
+        return 0;
+}
+/**
+ * imr_self_test_exit - exit point for IMR code.
+ *
+ * return:
+ */
+static void __exit imr_self_test_exit(void)
+{
+}
+module_init(imr_self_test_init);
+module_exit(imr_self_test_exit);
+MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
+MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 23b45eb9a89c..956374c1edbc 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -41,7 +41,7 @@ static u8 zero_stats;
 static inline void check_zero(void)
 {
        u8 ret;
-        u8 old = ACCESS_ONCE(zero_stats);
+        u8 old = READ_ONCE(zero_stats);
        if (unlikely(old)) {
                ret = cmpxchg(&zero_stats, old, 0);
                /* This ensures only one fellow resets the stat */
@@ -112,6 +112,7 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
        struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
        int cpu = smp_processor_id();
        u64 start;
+        __ticket_t head;
        unsigned long flags;
        /* If kicker interrupts not initialized yet, just spin */
@@ -159,11 +160,15 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
         */
        __ticket_enter_slowpath(lock);
+        /* make sure enter_slowpath, which is atomic does not cross the read */
+        smp_mb__after_atomic();
        /*
         * check again make sure it didn't become free while
         * we weren't looking
         */
-        if (ACCESS_ONCE(lock->tickets.head) == want) {
+        head = READ_ONCE(lock->tickets.head);
+        if (__tickets_equal(head, want)) {
                add_stats(TAKEN_SLOW_PICKUP, 1);
                goto out;
        }
@@ -204,8 +209,8 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
                const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
                /* Make sure we read lock before want */
-                if (ACCESS_ONCE(w->lock) == lock &&
+                if (READ_ONCE(w->lock) == lock &&
-                    ACCESS_ONCE(w->want) == next) {
+                    READ_ONCE(w->want) == next) {
                        add_stats(RELEASED_SLOW_KICKED, 1);
                        xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
                        break;
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index ec318bf434a6..1786574536b2 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -157,12 +157,16 @@ static int ipmi_release(struct inode *inode, struct file *file)
 {
        struct ipmi_file_private *priv = file->private_data;
        int                      rv;
+        struct  ipmi_recv_msg *msg, *next;
        rv = ipmi_destroy_user(priv->user);
        if (rv)
                return rv;
-        /* FIXME - free the messages in the list. */
+        list_for_each_entry_safe(msg, next, &priv->recv_msgs, link)
+                ipmi_free_recv_msg(msg);
        kfree(priv);
        return 0;
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 6b65fa4e0c55..9bb592872532 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -1483,14 +1483,10 @@ static inline void format_lan_msg(struct ipmi_smi_msg   *smi_msg,
        smi_msg->msgid = msgid;
 }
-static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
+static struct ipmi_smi_msg *smi_add_send_msg(ipmi_smi_t intf,
-                     struct ipmi_smi_msg *smi_msg, int priority)
+                                             struct ipmi_smi_msg *smi_msg,
+                                             int priority)
 {
-        int run_to_completion = intf->run_to_completion;
-        unsigned long flags;
-        if (!run_to_completion)
-                spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
        if (intf->curr_msg) {
                if (priority > 0)
                        list_add_tail(&smi_msg->link, &intf->hp_xmit_msgs);
@@ -1500,8 +1496,25 @@ static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
        } else {
                intf->curr_msg = smi_msg;
        }
-        if (!run_to_completion)
+        return smi_msg;
+}
+static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
+                     struct ipmi_smi_msg *smi_msg, int priority)
+{
+        int run_to_completion = intf->run_to_completion;
+        if (run_to_completion) {
+                smi_msg = smi_add_send_msg(intf, smi_msg, priority);
+        } else {
+                unsigned long flags;
+                spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+                smi_msg = smi_add_send_msg(intf, smi_msg, priority);
                spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
+        }
        if (smi_msg)
                handlers->sender(intf->send_info, smi_msg);
@@ -1985,7 +1998,9 @@ static int smi_ipmb_proc_show(struct seq_file *m, void *v)
        seq_printf(m, "%x", intf->channels[0].address);
        for (i = 1; i < IPMI_MAX_CHANNELS; i++)
                seq_printf(m, " %x", intf->channels[i].address);
-        return seq_putc(m, '\n');
+        seq_putc(m, '\n');
+        return seq_has_overflowed(m);
 }
 static int smi_ipmb_proc_open(struct inode *inode, struct file *file)
@@ -2004,9 +2019,11 @@ static int smi_version_proc_show(struct seq_file *m, void *v)
 {
        ipmi_smi_t intf = m->private;
-        return seq_printf(m, "%u.%u\n",
+        seq_printf(m, "%u.%u\n",
-                       ipmi_version_major(&intf->bmc->id),
+                   ipmi_version_major(&intf->bmc->id),
-                       ipmi_version_minor(&intf->bmc->id));
+                   ipmi_version_minor(&intf->bmc->id));
+        return seq_has_overflowed(m);
 }
 static int smi_version_proc_open(struct inode *inode, struct file *file)
@@ -2353,11 +2370,28 @@ static struct attribute *bmc_dev_attrs[] = {
        &dev_attr_additional_device_support.attr,
        &dev_attr_manufacturer_id.attr,
        &dev_attr_product_id.attr,
+        &dev_attr_aux_firmware_revision.attr,
+        &dev_attr_guid.attr,
        NULL
 };
+static umode_t bmc_dev_attr_is_visible(struct kobject *kobj,
+                                       struct attribute *attr, int idx)
+{
+        struct device *dev = kobj_to_dev(kobj);
+        struct bmc_device *bmc = to_bmc_device(dev);
+        umode_t mode = attr->mode;
+        if (attr == &dev_attr_aux_firmware_revision.attr)
+                return bmc->id.aux_firmware_revision_set ? mode : 0;
+        if (attr == &dev_attr_guid.attr)
+                return bmc->guid_set ? mode : 0;
+        return mode;
+}
 static struct attribute_group bmc_dev_attr_group = {
        .attrs          = bmc_dev_attrs,
+        .is_visible     = bmc_dev_attr_is_visible,
 };
 static const struct attribute_group *bmc_dev_attr_groups[] = {
@@ -2380,13 +2414,6 @@ cleanup_bmc_device(struct kref *ref)
 {
        struct bmc_device *bmc = container_of(ref, struct bmc_device, usecount);
-        if (bmc->id.aux_firmware_revision_set)
-                device_remove_file(&bmc->pdev.dev,
-                                   &dev_attr_aux_firmware_revision);
-        if (bmc->guid_set)
-                device_remove_file(&bmc->pdev.dev,
-                                   &dev_attr_guid);
        platform_device_unregister(&bmc->pdev);
 }
@@ -2407,33 +2434,6 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf)
        mutex_unlock(&ipmidriver_mutex);
 }
-static int create_bmc_files(struct bmc_device *bmc)
-{
-        int err;
-        if (bmc->id.aux_firmware_revision_set) {
-                err = device_create_file(&bmc->pdev.dev,
-                                         &dev_attr_aux_firmware_revision);
-                if (err)
-                        goto out;
-        }
-        if (bmc->guid_set) {
-                err = device_create_file(&bmc->pdev.dev,
-                                         &dev_attr_guid);
-                if (err)
-                        goto out_aux_firm;
-        }
-        return 0;
-out_aux_firm:
-        if (bmc->id.aux_firmware_revision_set)
-                device_remove_file(&bmc->pdev.dev,
-                                   &dev_attr_aux_firmware_revision);
-out:
-        return err;
-}
 static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum)
 {
        int               rv;
@@ -2522,15 +2522,6 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum)
                        return rv;
                }
-                rv = create_bmc_files(bmc);
-                if (rv) {
-                        mutex_lock(&ipmidriver_mutex);
-                        platform_device_unregister(&bmc->pdev);
-                        mutex_unlock(&ipmidriver_mutex);
-                        return rv;
-                }
                dev_info(intf->si_dev, "Found new BMC (man_id: 0x%6.6x, "
                         "prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n",
                         bmc->id.manufacturer_id,
@@ -4212,7 +4203,6 @@ static void need_waiter(ipmi_smi_t intf)
 static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0);
 static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0);
-/* FIXME - convert these to slabs. */
 static void free_smi_msg(struct ipmi_smi_msg *msg)
 {
        atomic_dec(&smi_msg_inuse_count);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 967b73aa4e66..f6646ed3047e 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -321,6 +321,18 @@ static int try_smi_init(struct smi_info *smi);
 static void cleanup_one_si(struct smi_info *to_clean);
 static void cleanup_ipmi_si(void);
+#ifdef DEBUG_TIMING
+void debug_timestamp(char *msg)
+{
+        struct timespec64 t;
+        getnstimeofday64(&t);
+        pr_debug("**%s: %lld.%9.9ld\n", msg, (long long) t.tv_sec, t.tv_nsec);
+}
+#else
+#define debug_timestamp(x)
+#endif
 static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list);
 static int register_xaction_notifier(struct notifier_block *nb)
 {
@@ -358,9 +370,6 @@ static void return_hosed_msg(struct smi_info *smi_info, int cCode)
 static enum si_sm_result start_next_msg(struct smi_info *smi_info)
 {
        int              rv;
-#ifdef DEBUG_TIMING
-        struct timeval t;
-#endif
        if (!smi_info->waiting_msg) {
                smi_info->curr_msg = NULL;
@@ -370,10 +379,7 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
                smi_info->curr_msg = smi_info->waiting_msg;
                smi_info->waiting_msg = NULL;
-#ifdef DEBUG_TIMING
+                debug_timestamp("Start2");
-                do_gettimeofday(&t);
-                printk(KERN_DEBUG "**Start2: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
                err = atomic_notifier_call_chain(&xaction_notifier_list,
                                0, smi_info);
                if (err & NOTIFY_STOP_MASK) {
@@ -582,12 +588,8 @@ static void check_bt_irq(struct smi_info *smi_info, bool irq_on)
 static void handle_transaction_done(struct smi_info *smi_info)
 {
        struct ipmi_smi_msg *msg;
-#ifdef DEBUG_TIMING
-        struct timeval t;
-        do_gettimeofday(&t);
+        debug_timestamp("Done");
-        printk(KERN_DEBUG "**Done: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
        switch (smi_info->si_state) {
        case SI_NORMAL:
                if (!smi_info->curr_msg)
@@ -929,24 +931,15 @@ static void sender(void                *send_info,
        struct smi_info   *smi_info = send_info;
        enum si_sm_result result;
        unsigned long     flags;
-#ifdef DEBUG_TIMING
-        struct timeval    t;
-#endif
-        BUG_ON(smi_info->waiting_msg);
-        smi_info->waiting_msg = msg;
-#ifdef DEBUG_TIMING
+        debug_timestamp("Enqueue");
-        do_gettimeofday(&t);
-        printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
        if (smi_info->run_to_completion) {
                /*
                 * If we are running to completion, start it and run
                 * transactions until everything is clear.
                 */
-                smi_info->curr_msg = smi_info->waiting_msg;
+                smi_info->curr_msg = msg;
                smi_info->waiting_msg = NULL;
                /*
@@ -964,6 +957,15 @@ static void sender(void                *send_info,
        }
        spin_lock_irqsave(&smi_info->si_lock, flags);
+        /*
+         * The following two lines don't need to be under the lock for
+         * the lock's sake, but they do need SMP memory barriers to
+         * avoid getting things out of order.  We are already claiming
+         * the lock, anyway, so just do it under the lock to avoid the
+         * ordering problem.
+         */
+        BUG_ON(smi_info->waiting_msg);
+        smi_info->waiting_msg = msg;
        check_start_timer_thread(smi_info);
        spin_unlock_irqrestore(&smi_info->si_lock, flags);
 }
@@ -989,18 +991,18 @@ static void set_run_to_completion(void *send_info, bool i_run_to_completion)
 * we are spinning in kipmid looking for something and not delaying
 * between checks
 */
-static inline void ipmi_si_set_not_busy(struct timespec *ts)
+static inline void ipmi_si_set_not_busy(struct timespec64 *ts)
 {
        ts->tv_nsec = -1;
 }
-static inline int ipmi_si_is_busy(struct timespec *ts)
+static inline int ipmi_si_is_busy(struct timespec64 *ts)
 {
        return ts->tv_nsec != -1;
 }
 static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result,
                                        const struct smi_info *smi_info,
-                                        struct timespec *busy_until)
+                                        struct timespec64 *busy_until)
 {
        unsigned int max_busy_us = 0;
@@ -1009,12 +1011,13 @@ static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result,
        if (max_busy_us == 0 || smi_result != SI_SM_CALL_WITH_DELAY)
                ipmi_si_set_not_busy(busy_until);
        else if (!ipmi_si_is_busy(busy_until)) {
-                getnstimeofday(busy_until);
+                getnstimeofday64(busy_until);
-                timespec_add_ns(busy_until, max_busy_us*NSEC_PER_USEC);
+                timespec64_add_ns(busy_until, max_busy_us*NSEC_PER_USEC);
        } else {
-                struct timespec now;
+                struct timespec64 now;
-                getnstimeofday(&now);
-                if (unlikely(timespec_compare(&now, busy_until) > 0)) {
+                getnstimeofday64(&now);
+                if (unlikely(timespec64_compare(&now, busy_until) > 0)) {
                        ipmi_si_set_not_busy(busy_until);
                        return 0;
                }
@@ -1037,7 +1040,7 @@ static int ipmi_thread(void *data)
        struct smi_info *smi_info = data;
        unsigned long flags;
        enum si_sm_result smi_result;
-        struct timespec busy_until;
+        struct timespec64 busy_until;
        ipmi_si_set_not_busy(&busy_until);
        set_user_nice(current, MAX_NICE);
@@ -1128,15 +1131,10 @@ static void smi_timeout(unsigned long data)
        unsigned long     jiffies_now;
        long              time_diff;
        long              timeout;
-#ifdef DEBUG_TIMING
-        struct timeval    t;
-#endif
        spin_lock_irqsave(&(smi_info->si_lock), flags);
-#ifdef DEBUG_TIMING
+        debug_timestamp("Timer");
-        do_gettimeofday(&t);
-        printk(KERN_DEBUG "**Timer: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
        jiffies_now = jiffies;
        time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies)
                     * SI_USEC_PER_JIFFY);
@@ -1173,18 +1171,13 @@ static irqreturn_t si_irq_handler(int irq, void *data)
 {
        struct smi_info *smi_info = data;
        unsigned long   flags;
-#ifdef DEBUG_TIMING
-        struct timeval  t;
-#endif
        spin_lock_irqsave(&(smi_info->si_lock), flags);
        smi_inc_stat(smi_info, interrupts);
-#ifdef DEBUG_TIMING
+        debug_timestamp("Interrupt");
-        do_gettimeofday(&t);
-        printk(KERN_DEBUG "**Interrupt: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
        smi_event_handler(smi_info, 0);
        spin_unlock_irqrestore(&(smi_info->si_lock), flags);
        return IRQ_HANDLED;
@@ -2038,18 +2031,13 @@ static u32 ipmi_acpi_gpe(acpi_handle gpe_device,
 {
        struct smi_info *smi_info = context;
        unsigned long   flags;
-#ifdef DEBUG_TIMING
-        struct timeval t;
-#endif
        spin_lock_irqsave(&(smi_info->si_lock), flags);
        smi_inc_stat(smi_info, interrupts);
-#ifdef DEBUG_TIMING
+        debug_timestamp("ACPI_GPE");
-        do_gettimeofday(&t);
-        printk("**ACPI_GPE: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
        smi_event_handler(smi_info, 0);
        spin_unlock_irqrestore(&(smi_info->si_lock), flags);
@@ -2071,7 +2059,6 @@ static int acpi_gpe_irq_setup(struct smi_info *info)
        if (!info->irq)
                return 0;
-        /* FIXME - is level triggered right? */
        status = acpi_install_gpe_handler(NULL,
                                          info->irq,
                                          ACPI_GPE_LEVEL_TRIGGERED,
@@ -2998,7 +2985,9 @@ static int smi_type_proc_show(struct seq_file *m, void *v)
 {
        struct smi_info *smi = m->private;
-        return seq_printf(m, "%s\n", si_to_str[smi->si_type]);
+        seq_printf(m, "%s\n", si_to_str[smi->si_type]);
+        return seq_has_overflowed(m);
 }
 static int smi_type_proc_open(struct inode *inode, struct file *file)
@@ -3060,16 +3049,18 @@ static int smi_params_proc_show(struct seq_file *m, void *v)
 {
        struct smi_info *smi = m->private;
-        return seq_printf(m,
+        seq_printf(m,
-                       "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n",
+                   "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n",
-                       si_to_str[smi->si_type],
+                   si_to_str[smi->si_type],
-                       addr_space_to_str[smi->io.addr_type],
+                   addr_space_to_str[smi->io.addr_type],
-                       smi->io.addr_data,
+                   smi->io.addr_data,
-                       smi->io.regspacing,
+                   smi->io.regspacing,
-                       smi->io.regsize,
+                   smi->io.regsize,
-                       smi->io.regshift,
+                   smi->io.regshift,
-                       smi->irq,
+                   smi->irq,
-                       smi->slave_addr);
+                   smi->slave_addr);
+        return seq_has_overflowed(m);
 }
 static int smi_params_proc_open(struct inode *inode, struct file *file)
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 982b96323f82..f6e378dac5f5 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1097,8 +1097,6 @@ static int ssif_remove(struct i2c_client *client)
        if (!ssif_info)
                return 0;
-        i2c_set_clientdata(client, NULL);
        /*
         * After this point, we won't deliver anything asychronously
         * to the message handler.  We can unregister ourself.
@@ -1198,7 +1196,9 @@ static int ssif_detect(struct i2c_client *client, struct i2c_board_info *info)
 static int smi_type_proc_show(struct seq_file *m, void *v)
 {
-        return seq_puts(m, "ssif\n");
+        seq_puts(m, "ssif\n");
+        return seq_has_overflowed(m);
 }
 static int smi_type_proc_open(struct inode *inode, struct file *file)
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index af5d63c7cc53..2fe195002021 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -75,29 +75,25 @@ efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
        unsigned long key;
        u32 desc_version;
-        *map_size = 0;
+        *map_size = sizeof(*m) * 32;
-        *desc_size = 0;
+again:
-        key = 0;
-        status = efi_call_early(get_memory_map, map_size, NULL,
-                                &key, desc_size, &desc_version);
-        if (status != EFI_BUFFER_TOO_SMALL)
-                return EFI_LOAD_ERROR;
        /*
         * Add an additional efi_memory_desc_t because we're doing an
         * allocation which may be in a new descriptor region.
         */
-        *map_size += *desc_size;
+        *map_size += sizeof(*m);
        status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
                                *map_size, (void **)&m);
        if (status != EFI_SUCCESS)
                goto fail;
+        *desc_size = 0;
+        key = 0;
        status = efi_call_early(get_memory_map, map_size, m,
                                &key, desc_size, &desc_version);
        if (status == EFI_BUFFER_TOO_SMALL) {
                efi_call_early(free_pool, m);
-                return EFI_LOAD_ERROR;
+                goto again;
        }
        if (status != EFI_SUCCESS)
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 638e797037da..97527614141b 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -735,6 +735,31 @@ config INTEL_IPS
          functionality.  If in doubt, say Y here; it will only load on
          supported platforms.
+config INTEL_IMR
+        bool "Intel Isolated Memory Region support"
+        default n
+        depends on X86_INTEL_QUARK && IOSF_MBI
+        ---help---
+          This option provides a means to manipulate Isolated Memory Regions.
+          IMRs are a set of registers that define read and write access masks
+          to prohibit certain system agents from accessing memory with 1 KiB
+          granularity.
+          IMRs make it possible to control read/write access to an address
+          by hardware agents inside the SoC. Read and write masks can be
+          defined for:
+                - eSRAM flush
+                - Dirty CPU snoop (write only)
+                - RMU access
+                - PCI Virtual Channel 0/Virtual Channel 1
+                - SMM mode
+                - Non SMM mode
+          Quark contains a set of eight IMR registers and makes use of those
+          registers during its bootup process.
+          If you are running on a Galileo/Quark say Y here.
 config IBM_RTL
        tristate "Device driver to enable PRTL support"
        depends on X86 && PCI
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 02b16910f4c9..995986b8e36b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -645,11 +645,12 @@ out:
 static unsigned long randomize_stack_top(unsigned long stack_top)
 {
-        unsigned int random_variable = 0;
+        unsigned long random_variable = 0;
        if ((current->flags & PF_RANDOMIZE) &&
                !(current->personality & ADDR_NO_RANDOMIZE)) {
-                random_variable = get_random_int() & STACK_RND_MASK;
+                random_variable = (unsigned long) get_random_int();
+                random_variable &= STACK_RND_MASK;
                random_variable <<= PAGE_SHIFT;
        }
 #ifdef CONFIG_STACK_GROWSUP
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d1ec10a940ff..1b45e4a0519b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -202,7 +202,7 @@ static __always_inline void data_access_exceeds_word_size(void)
 {
 }
-static __always_inline void __read_once_size(volatile void *p, void *res, int size)
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
 {
        switch (size) {
        case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
@@ -259,10 +259,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 */
 #define READ_ONCE(x) \
-        ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
+        ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
 #define WRITE_ONCE(x, val) \
-        ({ typeof(x) __val; __val = val; __write_once_size(&x, &__val, sizeof(__val)); __val; })
+        ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
 #endif /* __KERNEL__ */
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 75ae2e2631fc..a19bcf9e762e 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -156,8 +156,14 @@ typedef enum {
        KDB_REASON_SYSTEM_NMI,  /* In NMI due to SYSTEM cmd; regs valid */
 } kdb_reason_t;
+enum kdb_msgsrc {
+        KDB_MSGSRC_INTERNAL, /* direct call to kdb_printf() */
+        KDB_MSGSRC_PRINTK, /* trapped from printk() */
+};
 extern int kdb_trap_printk;
-extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args);
+extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
+                                      va_list args);
 extern __printf(1, 2) int kdb_printf(const char *, ...);
 typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 41c60e5302d7..6d77432e14ff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *);
 */
 extern void show_stack(struct task_struct *task, unsigned long *sp);
-void io_schedule(void);
-long io_schedule_timeout(long timeout);
 extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
@@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
+extern long io_schedule_timeout(long timeout);
+static inline void io_schedule(void)
+{
+        io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+}
 struct nsproxy;
 struct user_namespace;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 07ce18ca71e0..0874e2edd275 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -604,7 +604,7 @@ return_normal:
                   online_cpus)
                cpu_relax();
        if (!time_left)
-                pr_crit("KGDB: Timed out waiting for secondary CPUs.\n");
+                pr_crit("Timed out waiting for secondary CPUs.\n");
        /*
         * At this point the primary processor is completely
@@ -696,6 +696,14 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
        if (arch_kgdb_ops.enable_nmi)
                arch_kgdb_ops.enable_nmi(0);
+        /*
+         * Avoid entering the debugger if we were triggered due to an oops
+         * but panic_timeout indicates the system should automatically
+         * reboot on panic. We don't want to get stuck waiting for input
+         * on such systems, especially if its "just" an oops.
+         */
+        if (signo != SIGTRAP && panic_timeout)
+                return 1;
        memset(ks, 0, sizeof(struct kgdb_state));
        ks->cpu                 = raw_smp_processor_id();
@@ -828,6 +836,15 @@ static int kgdb_panic_event(struct notifier_block *self,
                            unsigned long val,
                            void *data)
 {
+        /*
+         * Avoid entering the debugger if we were triggered due to a panic
+         * We don't want to get stuck waiting for input from user in such case.
+         * panic_timeout indicates the system should automatically
+         * reboot on panic.
+         */
+        if (panic_timeout)
+                return NOTIFY_DONE;
        if (dbg_kdb_mode)
                kdb_printf("PANIC: %s\n", (char *)data);
        kgdb_breakpoint();
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 7c70812caea5..fc1ef736253c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -439,7 +439,7 @@ poll_again:
 *      substituted for %d, %x or %o in the prompt.
 */
-char *kdb_getstr(char *buffer, size_t bufsize, char *prompt)
+char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt)
 {
        if (prompt && kdb_prompt_str != prompt)
                strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);
@@ -548,7 +548,7 @@ static int kdb_search_string(char *searched, char *searchfor)
        return 0;
 }
-int vkdb_printf(const char *fmt, va_list ap)
+int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 {
        int diag;
        int linecount;
@@ -680,6 +680,12 @@ int vkdb_printf(const char *fmt, va_list ap)
                        size_avail = sizeof(kdb_buffer) - len;
                        goto kdb_print_out;
                }
+                if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH)
+                        /*
+                         * This was a interactive search (using '/' at more
+                         * prompt) and it has completed. Clear the flag.
+                         */
+                        kdb_grepping_flag = 0;
                /*
                 * at this point the string is a full line and
                 * should be printed, up to the null.
@@ -691,19 +697,20 @@ kdb_printit:
         * Write to all consoles.
         */
        retlen = strlen(kdb_buffer);
+        cp = (char *) printk_skip_level(kdb_buffer);
        if (!dbg_kdb_mode && kgdb_connected) {
-                gdbstub_msg_write(kdb_buffer, retlen);
+                gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
        } else {
                if (dbg_io_ops && !dbg_io_ops->is_console) {
-                        len = retlen;
+                        len = retlen - (cp - kdb_buffer);
-                        cp = kdb_buffer;
+                        cp2 = cp;
                        while (len--) {
-                                dbg_io_ops->write_char(*cp);
+                                dbg_io_ops->write_char(*cp2);
-                                cp++;
+                                cp2++;
                        }
                }
                while (c) {
-                        c->write(c, kdb_buffer, retlen);
+                        c->write(c, cp, retlen - (cp - kdb_buffer));
                        touch_nmi_watchdog();
                        c = c->next;
                }
@@ -711,7 +718,10 @@ kdb_printit:
        if (logging) {
                saved_loglevel = console_loglevel;
                console_loglevel = CONSOLE_LOGLEVEL_SILENT;
-                printk(KERN_INFO "%s", kdb_buffer);
+                if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK)
+                        printk("%s", kdb_buffer);
+                else
+                        pr_info("%s", kdb_buffer);
        }
        if (KDB_STATE(PAGER)) {
@@ -794,11 +804,23 @@ kdb_printit:
                        kdb_nextline = linecount - 1;
                        kdb_printf("\r");
                        suspend_grep = 1; /* for this recursion */
+                } else if (buf1[0] == '/' && !kdb_grepping_flag) {
+                        kdb_printf("\r");
+                        kdb_getstr(kdb_grep_string, KDB_GREP_STRLEN,
+                                   kdbgetenv("SEARCHPROMPT") ?: "search> ");
+                        *strchrnul(kdb_grep_string, '\n') = '\0';
+                        kdb_grepping_flag += KDB_GREPPING_FLAG_SEARCH;
+                        suspend_grep = 1; /* for this recursion */
                } else if (buf1[0] && buf1[0] != '\n') {
                        /* user hit something other than enter */
                        suspend_grep = 1; /* for this recursion */
-                        kdb_printf("\nOnly 'q' or 'Q' are processed at more "
+                        if (buf1[0] != '/')
-                                   "prompt, input ignored\n");
+                                kdb_printf(
+                                    "\nOnly 'q', 'Q' or '/' are processed at "
+                                    "more prompt, input ignored\n");
+                        else
+                                kdb_printf("\n'/' cannot be used during | "
+                                           "grep filtering, input ignored\n");
                } else if (kdb_grepping_flag) {
                        /* user hit enter */
                        suspend_grep = 1; /* for this recursion */
@@ -844,7 +866,7 @@ int kdb_printf(const char *fmt, ...)
        int r;
        va_start(ap, fmt);
-        r = vkdb_printf(fmt, ap);
+        r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
        va_end(ap);
        return r;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 7b40c5f07dce..4121345498e0 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -50,8 +50,7 @@
 static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE;
 module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600);
-#define GREP_LEN 256
+char kdb_grep_string[KDB_GREP_STRLEN];
-char kdb_grep_string[GREP_LEN];
 int kdb_grepping_flag;
 EXPORT_SYMBOL(kdb_grepping_flag);
 int kdb_grep_leading;
@@ -870,7 +869,7 @@ static void parse_grep(const char *str)
        len = strlen(cp);
        if (!len)
                return;
-        if (len >= GREP_LEN) {
+        if (len >= KDB_GREP_STRLEN) {
                kdb_printf("search string too long\n");
                return;
        }
@@ -915,13 +914,12 @@ int kdb_parse(const char *cmdstr)
        char *cp;
        char *cpp, quoted;
        kdbtab_t *tp;
-        int i, escaped, ignore_errors = 0, check_grep;
+        int i, escaped, ignore_errors = 0, check_grep = 0;
        /*
         * First tokenize the command string.
         */
        cp = (char *)cmdstr;
-        kdb_grepping_flag = check_grep = 0;
        if (KDB_FLAG(CMD_INTERRUPT)) {
                /* Previous command was interrupted, newline must not
@@ -1247,7 +1245,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
                kdb_printf("due to NonMaskable Interrupt @ "
                           kdb_machreg_fmt "\n",
                           instruction_pointer(regs));
-                kdb_dumpregs(regs);
                break;
        case KDB_REASON_SSTEP:
        case KDB_REASON_BREAK:
@@ -1281,6 +1278,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
                 */
                kdb_nextline = 1;
                KDB_STATE_CLEAR(SUPPRESS);
+                kdb_grepping_flag = 0;
+                /* ensure the old search does not leak into '/' commands */
+                kdb_grep_string[0] = '\0';
                cmdbuf = cmd_cur;
                *cmdbuf = '\0';
@@ -2256,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv)
        /*
         * Validate cpunum
         */
-        if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
+        if ((cpunum >= CONFIG_NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
                return KDB_BADCPUNUM;
        dbg_switch_cpu = cpunum;
@@ -2583,7 +2583,7 @@ static int kdb_summary(int argc, const char **argv)
 #define K(x) ((x) << (PAGE_SHIFT - 10))
        kdb_printf("\nMemTotal:       %8lu kB\nMemFree:        %8lu kB\n"
                   "Buffers:        %8lu kB\n",
-                   val.totalram, val.freeram, val.bufferram);
+                   K(val.totalram), K(val.freeram), K(val.bufferram));
        return 0;
 }
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index eaacd1693954..75014d7f4568 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -196,7 +196,9 @@ extern int kdb_main_loop(kdb_reason_t, kdb_reason_t,
 /* Miscellaneous functions and data areas */
 extern int kdb_grepping_flag;
+#define KDB_GREPPING_FLAG_SEARCH 0x8000
 extern char kdb_grep_string[];
+#define KDB_GREP_STRLEN 256
 extern int kdb_grep_leading;
 extern int kdb_grep_trailing;
 extern char *kdb_cmds[];
@@ -209,7 +211,7 @@ extern void kdb_ps1(const struct task_struct *p);
 extern void kdb_print_nameval(const char *name, unsigned long val);
 extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
 extern void kdb_meminfo_proc_show(void);
-extern char *kdb_getstr(char *, size_t, char *);
+extern char *kdb_getstr(char *, size_t, const char *);
 extern void kdb_gdb_state_pass(char *buf);
 /* Defines for kdb_symbol_print */
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3059bc2f022d..e16e5542bf13 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1193,7 +1193,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
                ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
        if (unlikely(ret)) {
-                remove_waiter(lock, &waiter);
+                if (rt_mutex_has_waiters(lock))
+                        remove_waiter(lock, &waiter);
                rt_mutex_handle_deadlock(ret, chwalk, &waiter);
        }
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c06df7de0963..01cfd69c54c6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1811,7 +1811,7 @@ int vprintk_default(const char *fmt, va_list args)
 #ifdef CONFIG_KGDB_KDB
        if (unlikely(kdb_trap_printk)) {
-                r = vkdb_printf(fmt, args);
+                r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
                return r;
        }
 #endif
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0d7bbe3095ad..0a571e9a0f1d 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -326,6 +326,7 @@ void rcu_read_unlock_special(struct task_struct *t)
        special = t->rcu_read_unlock_special;
        if (special.b.need_qs) {
                rcu_preempt_qs();
+                t->rcu_read_unlock_special.b.need_qs = false;
                if (!t->rcu_read_unlock_special.s) {
                        local_irq_restore(flags);
                        return;
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 8a2e230fb86a..eae160dd669d 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void)
         * so we don't have to move tasks around upon policy change,
         * or flail around trying to allocate bandwidth on the fly.
         * A bandwidth exception in __sched_setscheduler() allows
-         * the policy change to proceed.  Thereafter, task_group()
+         * the policy change to proceed.
-         * returns &root_task_group, so zero bandwidth is required.
         */
        free_rt_sched_group(tg);
        tg->rt_se = root_task_group.rt_se;
@@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
        if (tg != &root_task_group)
                return false;
-        if (p->sched_class != &fair_sched_class)
-                return false;
        /*
         * We can only assume the task group can't go away on us if
         * autogroup_move_group() can see us on ->thread_group list.
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 7052d3fd4e7b..8d0f35debf35 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -274,7 +274,7 @@ bool try_wait_for_completion(struct completion *x)
         * first without taking the lock so we can
         * return early in the blocking case.
         */
-        if (!ACCESS_ONCE(x->done))
+        if (!READ_ONCE(x->done))
                return 0;
        spin_lock_irqsave(&x->wait.lock, flags);
@@ -297,6 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion);
 */
 bool completion_done(struct completion *x)
 {
-        return !!ACCESS_ONCE(x->done);
+        if (!READ_ONCE(x->done))
+                return false;
+        /*
+         * If ->done, we need to wait for complete() to release ->wait.lock
+         * otherwise we can end up freeing the completion before complete()
+         * is done referencing it.
+         *
+         * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
+         * the loads of ->done and ->wait.lock such that we cannot observe
+         * the lock before complete() acquires it while observing the ->done
+         * after it's acquired the lock.
+         */
+        smp_rmb();
+        spin_unlock_wait(&x->wait.lock);
+        return true;
 }
 EXPORT_SYMBOL(completion_done);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 13049aac05a6..f0f831e8a345 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -307,66 +307,6 @@ __read_mostly int scheduler_running;
 int sysctl_sched_rt_runtime = 950000;
 /*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
-        __acquires(rq->lock)
-{
-        struct rq *rq;
-        lockdep_assert_held(&p->pi_lock);
-        for (;;) {
-                rq = task_rq(p);
-                raw_spin_lock(&rq->lock);
-                if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-                        return rq;
-                raw_spin_unlock(&rq->lock);
-                while (unlikely(task_on_rq_migrating(p)))
-                        cpu_relax();
-        }
-}
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
-        __acquires(p->pi_lock)
-        __acquires(rq->lock)
-{
-        struct rq *rq;
-        for (;;) {
-                raw_spin_lock_irqsave(&p->pi_lock, *flags);
-                rq = task_rq(p);
-                raw_spin_lock(&rq->lock);
-                if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-                        return rq;
-                raw_spin_unlock(&rq->lock);
-                raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-                while (unlikely(task_on_rq_migrating(p)))
-                        cpu_relax();
-        }
-}
-static void __task_rq_unlock(struct rq *rq)
-        __releases(rq->lock)
-{
-        raw_spin_unlock(&rq->lock);
-}
-static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
-        __releases(rq->lock)
-        __releases(p->pi_lock)
-{
-        raw_spin_unlock(&rq->lock);
-        raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-}
-/*
 * this_rq_lock - lock this runqueue and disable interrupts.
 */
 static struct rq *this_rq_lock(void)
@@ -2899,7 +2839,7 @@ void __sched schedule_preempt_disabled(void)
        preempt_disable();
 }
-static void preempt_schedule_common(void)
+static void __sched notrace preempt_schedule_common(void)
 {
        do {
                __preempt_count_add(PREEMPT_ACTIVE);
@@ -4418,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to);
 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
 * that process accounting knows that this is a task in IO wait state.
 */
-void __sched io_schedule(void)
-{
-        struct rq *rq = raw_rq();
-        delayacct_blkio_start();
-        atomic_inc(&rq->nr_iowait);
-        blk_flush_plug(current);
-        current->in_iowait = 1;
-        schedule();
-        current->in_iowait = 0;
-        atomic_dec(&rq->nr_iowait);
-        delayacct_blkio_end();
-}
-EXPORT_SYMBOL(io_schedule);
 long __sched io_schedule_timeout(long timeout)
 {
-        struct rq *rq = raw_rq();
+        int old_iowait = current->in_iowait;
+        struct rq *rq;
        long ret;
+        current->in_iowait = 1;
+        if (old_iowait)
+                blk_schedule_flush_plug(current);
+        else
+                blk_flush_plug(current);
        delayacct_blkio_start();
+        rq = raw_rq();
        atomic_inc(&rq->nr_iowait);
-        blk_flush_plug(current);
-        current->in_iowait = 1;
        ret = schedule_timeout(timeout);
-        current->in_iowait = 0;
+        current->in_iowait = old_iowait;
        atomic_dec(&rq->nr_iowait);
        delayacct_blkio_end();
        return ret;
 }
+EXPORT_SYMBOL(io_schedule_timeout);
 /**
 * sys_sched_get_priority_max - return maximum RT priority.
@@ -7642,6 +7575,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
 {
        struct task_struct *g, *p;
+        /*
+         * Autogroups do not have RT tasks; see autogroup_create().
+         */
+        if (task_group_is_autogroup(tg))
+                return 0;
        for_each_process_thread(g, p) {
                if (rt_task(p) && task_group(p) == tg)
                        return 1;
@@ -7734,6 +7673,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
 {
        int i, err = 0;
+        /*
+         * Disallowing the root group RT runtime is BAD, it would disallow the
+         * kernel creating (and or operating) RT threads.
+         */
+        if (tg == &root_task_group && rt_runtime == 0)
+                return -EINVAL;
+        /* No period doesn't make any sense. */
+        if (rt_period == 0)
+                return -EINVAL;
        mutex_lock(&rt_constraints_mutex);
        read_lock(&tasklist_lock);
        err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7790,9 +7740,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
        rt_period = (u64)rt_period_us * NSEC_PER_USEC;
        rt_runtime = tg->rt_bandwidth.rt_runtime;
-        if (rt_period == 0)
-                return -EINVAL;
        return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
 }
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a027799ae130..3fa8fa6d9403 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
                                                     struct sched_dl_entity,
                                                     dl_timer);
        struct task_struct *p = dl_task_of(dl_se);
+        unsigned long flags;
        struct rq *rq;
-again:
-        rq = task_rq(p);
-        raw_spin_lock(&rq->lock);
-        if (rq != task_rq(p)) {
+        rq = task_rq_lock(current, &flags);
-                /* Task was moved, retrying. */
-                raw_spin_unlock(&rq->lock);
-                goto again;
-        }
        /*
         * We need to take care of several possible races here:
@@ -541,6 +535,26 @@ again:
        sched_clock_tick();
        update_rq_clock(rq);
+        /*
+         * If the throttle happened during sched-out; like:
+         *
+         *   schedule()
+         *     deactivate_task()
+         *       dequeue_task_dl()
+         *         update_curr_dl()
+         *           start_dl_timer()
+         *         __dequeue_task_dl()
+         *     prev->on_rq = 0;
+         *
+         * We can be both throttled and !queued. Replenish the counter
+         * but do not enqueue -- wait for our wakeup to do that.
+         */
+        if (!task_on_rq_queued(p)) {
+                replenish_dl_entity(dl_se, dl_se);
+                goto unlock;
+        }
        enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
        if (dl_task(rq->curr))
                check_preempt_curr_dl(rq, p, 0);
@@ -555,7 +569,7 @@ again:
                push_dl_task(rq);
 #endif
 unlock:
-        raw_spin_unlock(&rq->lock);
+        task_rq_unlock(rq, current, &flags);
        return HRTIMER_NORESTART;
 }
@@ -898,6 +912,7 @@ static void yield_task_dl(struct rq *rq)
                rq->curr->dl.dl_yielded = 1;
                p->dl.runtime = 0;
        }
+        update_rq_clock(rq);
        update_curr_dl(rq);
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0870db23d79c..dc0f435a2779 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1380,6 +1380,82 @@ static inline void sched_avg_update(struct rq *rq) { }
 extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
+/*
+ * __task_rq_lock - lock the rq @p resides on.
+ */
+static inline struct rq *__task_rq_lock(struct task_struct *p)
+        __acquires(rq->lock)
+{
+        struct rq *rq;
+        lockdep_assert_held(&p->pi_lock);
+        for (;;) {
+                rq = task_rq(p);
+                raw_spin_lock(&rq->lock);
+                if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+                        return rq;
+                raw_spin_unlock(&rq->lock);
+                while (unlikely(task_on_rq_migrating(p)))
+                        cpu_relax();
+        }
+}
+/*
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
+ */
+static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+        __acquires(p->pi_lock)
+        __acquires(rq->lock)
+{
+        struct rq *rq;
+        for (;;) {
+                raw_spin_lock_irqsave(&p->pi_lock, *flags);
+                rq = task_rq(p);
+                raw_spin_lock(&rq->lock);
+                /*
+                 *      move_queued_task()              task_rq_lock()
+                 *
+                 *      ACQUIRE (rq->lock)
+                 *      [S] ->on_rq = MIGRATING         [L] rq = task_rq()
+                 *      WMB (__set_task_cpu())          ACQUIRE (rq->lock);
+                 *      [S] ->cpu = new_cpu             [L] task_rq()
+                 *                                      [L] ->on_rq
+                 *      RELEASE (rq->lock)
+                 *
+                 * If we observe the old cpu in task_rq_lock, the acquire of
+                 * the old rq->lock will fully serialize against the stores.
+                 *
+                 * If we observe the new cpu in task_rq_lock, the acquire will
+                 * pair with the WMB to ensure we must then also see migrating.
+                 */
+                if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+                        return rq;
+                raw_spin_unlock(&rq->lock);
+                raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+                while (unlikely(task_on_rq_migrating(p)))
+                        cpu_relax();
+        }
+}
+static inline void __task_rq_unlock(struct rq *rq)
+        __releases(rq->lock)
+{
+        raw_spin_unlock(&rq->lock);
+}
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+        __releases(rq->lock)
+        __releases(p->pi_lock)
+{
+        raw_spin_unlock(&rq->lock);
+        raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+}
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PREEMPT
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 4b585e0fdd22..0f60b08a4f07 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -633,10 +633,14 @@ int ntp_validate_timex(struct timex *txc)
        if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
                return -EPERM;
-        if (txc->modes & ADJ_FREQUENCY) {
+        /*
-                if (LONG_MIN / PPM_SCALE > txc->freq)
+         * Check for potential multiplication overflows that can
+         * only happen on 64-bit systems:
+         */
+        if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
+                if (LLONG_MIN / PPM_SCALE > txc->freq)
                        return -EINVAL;
-                if (LONG_MAX / PPM_SCALE < txc->freq)
+                if (LLONG_MAX / PPM_SCALE < txc->freq)
                        return -EINVAL;
        }