24 files changed, 387 insertions, 147 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 6bf3a13e3e0f..78a881b7fc41 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -40,6 +40,7 @@
 #include <xen/interface/physdev.h>
 #include <xen/interface/vcpu.h>
 #include <xen/interface/memory.h>
+#include <xen/interface/nmi.h>
 #include <xen/interface/xen-mca.h>
 #include <xen/features.h>
 #include <xen/page.h>
@@ -66,6 +67,7 @@
 #include <asm/reboot.h>
 #include <asm/stackprotector.h>
 #include <asm/hypervisor.h>
+#include <asm/mach_traps.h>
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
 #include <asm/pat.h>
@@ -1351,6 +1353,21 @@ static const struct machine_ops xen_machine_ops __initconst = {
        .emergency_restart = xen_emergency_restart,
 };
+static unsigned char xen_get_nmi_reason(void)
+{
+        unsigned char reason = 0;
+        /* Construct a value which looks like it came from port 0x61. */
+        if (test_bit(_XEN_NMIREASON_io_error,
+                     &HYPERVISOR_shared_info->arch.nmi_reason))
+                reason |= NMI_REASON_IOCHK;
+        if (test_bit(_XEN_NMIREASON_pci_serr,
+                     &HYPERVISOR_shared_info->arch.nmi_reason))
+                reason |= NMI_REASON_SERR;
+        return reason;
+}
 static void __init xen_boot_params_init_edd(void)
 {
 #if IS_ENABLED(CONFIG_EDD)
@@ -1535,9 +1552,12 @@ asmlinkage __visible void __init xen_start_kernel(void)
        pv_info = xen_info;
        pv_init_ops = xen_init_ops;
        pv_apic_ops = xen_apic_ops;
-        if (!xen_pvh_domain())
+        if (!xen_pvh_domain()) {
                pv_cpu_ops = xen_cpu_ops;
+                x86_platform.get_nmi_reason = xen_get_nmi_reason;
+        }
        if (xen_feature(XENFEAT_auto_translated_physmap))
                x86_init.resources.memory_setup = xen_auto_xlated_memory_setup;
        else
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index edbc7a63fd73..70fb5075c901 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -167,10 +167,13 @@ static void * __ref alloc_p2m_page(void)
        return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
 }
-/* Only to be called in case of a race for a page just allocated! */
+static void __ref free_p2m_page(void *p)
-static void free_p2m_page(void *p)
 {
-        BUG_ON(!slab_is_available());
+        if (unlikely(!slab_is_available())) {
+                free_bootmem((unsigned long)p, PAGE_SIZE);
+                return;
+        }
        free_page((unsigned long)p);
 }
@@ -375,7 +378,7 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m)
                        p2m_missing_pte : p2m_identity_pte;
                for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
                        pmdp = populate_extra_pmd(
-                                (unsigned long)(p2m + pfn + i * PTRS_PER_PTE));
+                                (unsigned long)(p2m + pfn) + i * PMD_SIZE);
                        set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
                }
        }
@@ -436,10 +439,9 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine);
 * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
 * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
 */
-static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
+static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
 {
        pte_t *ptechk;
-        pte_t *pteret = ptep;
        pte_t *pte_newpg[PMDS_PER_MID_PAGE];
        pmd_t *pmdp;
        unsigned int level;
@@ -473,8 +475,6 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
                if (ptechk == pte_pg) {
                        set_pmd(pmdp,
                                __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
-                        if (vaddr == (addr & ~(PMD_SIZE - 1)))
-                                pteret = pte_offset_kernel(pmdp, addr);
                        pte_newpg[i] = NULL;
                }
@@ -488,7 +488,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
                vaddr += PMD_SIZE;
        }
-        return pteret;
+        return lookup_address(addr, &level);
 }
 /*
@@ -517,7 +517,7 @@ static bool alloc_p2m(unsigned long pfn)
        if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
                /* PMD level is missing, allocate a new one */
-                ptep = alloc_p2m_pmd(addr, ptep, pte_pg);
+                ptep = alloc_p2m_pmd(addr, pte_pg);
                if (!ptep)
                        return false;
        }
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index dfd77dec8e2b..865e56cea7a0 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -140,7 +140,7 @@ static void __init xen_del_extra_mem(u64 start, u64 size)
 unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
 {
        int i;
-        unsigned long addr = PFN_PHYS(pfn);
+        phys_addr_t addr = PFN_PHYS(pfn);
        for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
                if (addr >= xen_extra_mem[i].start &&
@@ -160,6 +160,8 @@ void __init xen_inv_extra_mem(void)
        int i;
        for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+                if (!xen_extra_mem[i].size)
+                        continue;
                pfn_s = PFN_DOWN(xen_extra_mem[i].start);
                pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
                for (pfn = pfn_s; pfn < pfn_e; pfn++)
@@ -229,15 +231,14 @@ static int __init xen_free_mfn(unsigned long mfn)
 * as a fallback if the remapping fails.
 */
 static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
-        unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
+        unsigned long end_pfn, unsigned long nr_pages, unsigned long *released)
-        unsigned long *released)
 {
-        unsigned long len = 0;
        unsigned long pfn, end;
        int ret;
        WARN_ON(start_pfn > end_pfn);
+        /* Release pages first. */
        end = min(end_pfn, nr_pages);
        for (pfn = start_pfn; pfn < end; pfn++) {
                unsigned long mfn = pfn_to_mfn(pfn);
@@ -250,16 +251,14 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
                WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
                if (ret == 1) {
+                        (*released)++;
                        if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
                                break;
-                        len++;
                } else
                        break;
        }
-        /* Need to release pages first */
+        set_phys_range_identity(start_pfn, end_pfn);
-        *released += len;
-        *identity += set_phys_range_identity(start_pfn, end_pfn);
 }
 /*
@@ -287,7 +286,7 @@ static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
        }
        /* Update kernel mapping, but not for highmem. */
-        if ((pfn << PAGE_SHIFT) >= __pa(high_memory))
+        if (pfn >= PFN_UP(__pa(high_memory - 1)))
                return;
        if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
@@ -318,7 +317,6 @@ static void __init xen_do_set_identity_and_remap_chunk(
        unsigned long ident_pfn_iter, remap_pfn_iter;
        unsigned long ident_end_pfn = start_pfn + size;
        unsigned long left = size;
-        unsigned long ident_cnt = 0;
        unsigned int i, chunk;
        WARN_ON(size == 0);
@@ -347,8 +345,7 @@ static void __init xen_do_set_identity_and_remap_chunk(
                xen_remap_mfn = mfn;
                /* Set identity map */
-                ident_cnt += set_phys_range_identity(ident_pfn_iter,
+                set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk);
-                        ident_pfn_iter + chunk);
                left -= chunk;
        }
@@ -371,7 +368,7 @@ static void __init xen_do_set_identity_and_remap_chunk(
 static unsigned long __init xen_set_identity_and_remap_chunk(
        const struct e820entry *list, size_t map_size, unsigned long start_pfn,
        unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
-        unsigned long *identity, unsigned long *released)
+        unsigned long *released, unsigned long *remapped)
 {
        unsigned long pfn;
        unsigned long i = 0;
@@ -386,8 +383,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
                /* Do not remap pages beyond the current allocation */
                if (cur_pfn >= nr_pages) {
                        /* Identity map remaining pages */
-                        *identity += set_phys_range_identity(cur_pfn,
+                        set_phys_range_identity(cur_pfn, cur_pfn + size);
-                                cur_pfn + size);
                        break;
                }
                if (cur_pfn + size > nr_pages)
@@ -398,7 +394,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
                if (!remap_range_size) {
                        pr_warning("Unable to find available pfn range, not remapping identity pages\n");
                        xen_set_identity_and_release_chunk(cur_pfn,
-                                cur_pfn + left, nr_pages, identity, released);
+                                cur_pfn + left, nr_pages, released);
                        break;
                }
                /* Adjust size to fit in current e820 RAM region */
@@ -410,7 +406,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
                /* Update variables to reflect new mappings. */
                i += size;
                remap_pfn += size;
-                *identity += size;
+                *remapped += size;
        }
        /*
@@ -427,13 +423,13 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 static void __init xen_set_identity_and_remap(
        const struct e820entry *list, size_t map_size, unsigned long nr_pages,
-        unsigned long *released)
+        unsigned long *released, unsigned long *remapped)
 {
        phys_addr_t start = 0;
-        unsigned long identity = 0;
        unsigned long last_pfn = nr_pages;
        const struct e820entry *entry;
        unsigned long num_released = 0;
+        unsigned long num_remapped = 0;
        int i;
        /*
@@ -460,14 +456,14 @@ static void __init xen_set_identity_and_remap(
                                last_pfn = xen_set_identity_and_remap_chunk(
                                                list, map_size, start_pfn,
                                                end_pfn, nr_pages, last_pfn,
-                                                &identity, &num_released);
+                                                &num_released, &num_remapped);
                        start = end;
                }
        }
        *released = num_released;
+        *remapped = num_remapped;
-        pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
        pr_info("Released %ld page(s)\n", num_released);
 }
@@ -586,6 +582,7 @@ char * __init xen_memory_setup(void)
        struct xen_memory_map memmap;
        unsigned long max_pages;
        unsigned long extra_pages = 0;
+        unsigned long remapped_pages;
        int i;
        int op;
@@ -635,9 +632,10 @@ char * __init xen_memory_setup(void)
         * underlying RAM.
         */
        xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
-                                   &xen_released_pages);
+                                   &xen_released_pages, &remapped_pages);
        extra_pages += xen_released_pages;
+        extra_pages += remapped_pages;
        /*
         * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index f473d268d387..69087341d9ae 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -391,7 +391,7 @@ static const struct clock_event_device *xen_clockevent =
 struct xen_clock_event_device {
        struct clock_event_device evt;
-        char *name;
+        char name[16];
 };
 static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
@@ -420,46 +420,38 @@ void xen_teardown_timer(int cpu)
        if (evt->irq >= 0) {
                unbind_from_irqhandler(evt->irq, NULL);
                evt->irq = -1;
-                kfree(per_cpu(xen_clock_events, cpu).name);
-                per_cpu(xen_clock_events, cpu).name = NULL;
        }
 }
 void xen_setup_timer(int cpu)
 {
-        char *name;
+        struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
-        struct clock_event_device *evt;
+        struct clock_event_device *evt = &xevt->evt;
        int irq;
-        evt = &per_cpu(xen_clock_events, cpu).evt;
        WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
        if (evt->irq >= 0)
                xen_teardown_timer(cpu);
        printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
-        name = kasprintf(GFP_KERNEL, "timer%d", cpu);
+        snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
-        if (!name)
-                name = "<timer kasprintf failed>";
        irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
                                      IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
                                      IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
-                                      name, NULL);
+                                      xevt->name, NULL);
        (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
        memcpy(evt, xen_clockevent, sizeof(*evt));
        evt->cpumask = cpumask_of(cpu);
        evt->irq = irq;
-        per_cpu(xen_clock_events, cpu).name = name;
 }
 void xen_setup_cpu_clockevents(void)
 {
-        BUG_ON(preemptible());
        clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
 }
diff --git a/block/blk-core.c b/block/blk-core.c
index 30f6153a40c2..3ad405571dcc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -473,6 +473,25 @@ void blk_queue_bypass_end(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
+void blk_set_queue_dying(struct request_queue *q)
+{
+        queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
+        if (q->mq_ops)
+                blk_mq_wake_waiters(q);
+        else {
+                struct request_list *rl;
+                blk_queue_for_each_rl(rl, q) {
+                        if (rl->rq_pool) {
+                                wake_up(&rl->wait[BLK_RW_SYNC]);
+                                wake_up(&rl->wait[BLK_RW_ASYNC]);
+                        }
+                }
+        }
+}
+EXPORT_SYMBOL_GPL(blk_set_queue_dying);
 /**
 * blk_cleanup_queue - shutdown a request queue
 * @q: request queue to shutdown
@@ -486,7 +505,7 @@ void blk_cleanup_queue(struct request_queue *q)
        /* mark @q DYING, no new request or merges will be allowed afterwards */
        mutex_lock(&q->sysfs_lock);
-        queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
+        blk_set_queue_dying(q);
        spin_lock_irq(lock);
        /*
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 32e8dbb9ad1c..60c9d4a93fe4 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -68,9 +68,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 }
 /*
- * Wakeup all potentially sleeping on normal (non-reserved) tags
+ * Wakeup all potentially sleeping on tags
 */
-static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
+void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 {
        struct blk_mq_bitmap_tags *bt;
        int i, wake_index;
@@ -85,6 +85,12 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
                wake_index = bt_index_inc(wake_index);
        }
+        if (include_reserve) {
+                bt = &tags->breserved_tags;
+                if (waitqueue_active(&bt->bs[0].wait))
+                        wake_up(&bt->bs[0].wait);
+        }
 }
 /*
@@ -100,7 +106,7 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
        atomic_dec(&tags->active_queues);
-        blk_mq_tag_wakeup_all(tags);
+        blk_mq_tag_wakeup_all(tags, false);
 }
 /*
@@ -584,7 +590,7 @@ int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
         * static and should never need resizing.
         */
        bt_update_count(&tags->bitmap_tags, tdepth);
-        blk_mq_tag_wakeup_all(tags);
+        blk_mq_tag_wakeup_all(tags, false);
        return 0;
 }
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 6206ed17ef76..a6fa0fc9d41a 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -54,6 +54,7 @@ extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
 extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
 extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
 extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
+extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
 enum {
        BLK_MQ_TAG_CACHE_MIN    = 1,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index da1ab5641227..2f95747c287e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -107,7 +107,7 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
        wake_up_all(&q->mq_freeze_wq);
 }
-static void blk_mq_freeze_queue_start(struct request_queue *q)
+void blk_mq_freeze_queue_start(struct request_queue *q)
 {
        bool freeze;
@@ -120,6 +120,7 @@ static void blk_mq_freeze_queue_start(struct request_queue *q)
                blk_mq_run_queues(q, false);
        }
 }
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
 static void blk_mq_freeze_queue_wait(struct request_queue *q)
 {
@@ -136,7 +137,7 @@ void blk_mq_freeze_queue(struct request_queue *q)
        blk_mq_freeze_queue_wait(q);
 }
-static void blk_mq_unfreeze_queue(struct request_queue *q)
+void blk_mq_unfreeze_queue(struct request_queue *q)
 {
        bool wake;
@@ -149,6 +150,24 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
                wake_up_all(&q->mq_freeze_wq);
        }
 }
+EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+void blk_mq_wake_waiters(struct request_queue *q)
+{
+        struct blk_mq_hw_ctx *hctx;
+        unsigned int i;
+        queue_for_each_hw_ctx(q, hctx, i)
+                if (blk_mq_hw_queue_mapped(hctx))
+                        blk_mq_tag_wakeup_all(hctx->tags, true);
+        /*
+         * If we are called because the queue has now been marked as
+         * dying, we need to ensure that processes currently waiting on
+         * the queue are notified as well.
+         */
+        wake_up_all(&q->mq_freeze_wq);
+}
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 {
@@ -258,8 +277,10 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
                ctx = alloc_data.ctx;
        }
        blk_mq_put_ctx(ctx);
-        if (!rq)
+        if (!rq) {
+                blk_mq_queue_exit(q);
                return ERR_PTR(-EWOULDBLOCK);
+        }
        return rq;
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
@@ -383,6 +404,12 @@ void blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
+int blk_mq_request_started(struct request *rq)
+{
+        return test_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_request_started);
 void blk_mq_start_request(struct request *rq)
 {
        struct request_queue *q = rq->q;
@@ -500,12 +527,38 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
 }
 EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
+void blk_mq_cancel_requeue_work(struct request_queue *q)
+{
+        cancel_work_sync(&q->requeue_work);
+}
+EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
 void blk_mq_kick_requeue_list(struct request_queue *q)
 {
        kblockd_schedule_work(&q->requeue_work);
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
+void blk_mq_abort_requeue_list(struct request_queue *q)
+{
+        unsigned long flags;
+        LIST_HEAD(rq_list);
+        spin_lock_irqsave(&q->requeue_lock, flags);
+        list_splice_init(&q->requeue_list, &rq_list);
+        spin_unlock_irqrestore(&q->requeue_lock, flags);
+        while (!list_empty(&rq_list)) {
+                struct request *rq;
+                rq = list_first_entry(&rq_list, struct request, queuelist);
+                list_del_init(&rq->queuelist);
+                rq->errors = -EIO;
+                blk_mq_end_request(rq, rq->errors);
+        }
+}
+EXPORT_SYMBOL(blk_mq_abort_requeue_list);
 static inline bool is_flush_request(struct request *rq,
                struct blk_flush_queue *fq, unsigned int tag)
 {
@@ -566,13 +619,24 @@ void blk_mq_rq_timed_out(struct request *req, bool reserved)
                break;
        }
 }
-                
 static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
                struct request *rq, void *priv, bool reserved)
 {
        struct blk_mq_timeout_data *data = priv;
-        if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+        if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
+                /*
+                 * If a request wasn't started before the queue was
+                 * marked dying, kill it here or it'll go unnoticed.
+                 */
+                if (unlikely(blk_queue_dying(rq->q))) {
+                        rq->errors = -EIO;
+                        blk_mq_complete_request(rq);
+                }
+                return;
+        }
+        if (rq->cmd_flags & REQ_NO_TIMEOUT)
                return;
        if (time_after_eq(jiffies, rq->deadline)) {
@@ -1601,7 +1665,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
        hctx->queue = q;
        hctx->queue_num = hctx_idx;
        hctx->flags = set->flags;
-        hctx->cmd_size = set->cmd_size;
        blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
                                        blk_mq_hctx_notify, hctx);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 206230e64f79..4f4f943c22c3 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,6 +32,7 @@ void blk_mq_free_queue(struct request_queue *q);
 void blk_mq_clone_flush_request(struct request *flush_rq,
                struct request *orig_rq);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
+void blk_mq_wake_waiters(struct request_queue *q);
 /*
 * CPU hotplug helpers
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 56c025894cdf..246dfb16c3d9 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -190,6 +190,9 @@ void blk_add_timer(struct request *req)
        struct request_queue *q = req->q;
        unsigned long expiry;
+        if (req->cmd_flags & REQ_NO_TIMEOUT)
+                return;
        /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
        if (!q->mq_ops && !q->rq_timed_out_fn)
                return;
diff --git a/drivers/acpi/int340x_thermal.c b/drivers/acpi/int340x_thermal.c
index a27d31d1ba24..9dcf83682e36 100644
--- a/drivers/acpi/int340x_thermal.c
+++ b/drivers/acpi/int340x_thermal.c
@@ -14,10 +14,10 @@
 #include "internal.h"
-#define DO_ENUMERATION 0x01
+#define INT3401_DEVICE 0X01
 static const struct acpi_device_id int340x_thermal_device_ids[] = {
-        {"INT3400", DO_ENUMERATION },
+        {"INT3400"},
-        {"INT3401"},
+        {"INT3401", INT3401_DEVICE},
        {"INT3402"},
        {"INT3403"},
        {"INT3404"},
@@ -34,7 +34,10 @@ static int int340x_thermal_handler_attach(struct acpi_device *adev,
                                        const struct acpi_device_id *id)
 {
 #if defined(CONFIG_INT340X_THERMAL) || defined(CONFIG_INT340X_THERMAL_MODULE)
-        if (id->driver_data == DO_ENUMERATION)
+        acpi_create_platform_device(adev);
+#elif defined(INTEL_SOC_DTS_THERMAL) || defined(INTEL_SOC_DTS_THERMAL_MODULE)
+        /* Intel SoC DTS thermal driver needs INT3401 to set IRQ descriptor */
+        if (id->driver_data == INT3401_DEVICE)
                acpi_create_platform_device(adev);
 #endif
        return 1;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ae9f615382f6..aa2224aa7caa 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -530,7 +530,7 @@ static int null_add_dev(void)
                        goto out_cleanup_queues;
                nullb->q = blk_mq_init_queue(&nullb->tag_set);
-                if (!nullb->q) {
+                if (IS_ERR(nullb->q)) {
                        rv = -ENOMEM;
                        goto out_cleanup_tags;
                }
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index b1d5d8797315..cb529e9a82dd 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -215,6 +215,7 @@ static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx,
        cmd->fn = handler;
        cmd->ctx = ctx;
        cmd->aborted = 0;
+        blk_mq_start_request(blk_mq_rq_from_pdu(cmd));
 }
 /* Special values must be less than 0x1000 */
@@ -431,8 +432,13 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
        if (unlikely(status)) {
                if (!(status & NVME_SC_DNR || blk_noretry_request(req))
                    && (jiffies - req->start_time) < req->timeout) {
+                        unsigned long flags;
                        blk_mq_requeue_request(req);
-                        blk_mq_kick_requeue_list(req->q);
+                        spin_lock_irqsave(req->q->queue_lock, flags);
+                        if (!blk_queue_stopped(req->q))
+                                blk_mq_kick_requeue_list(req->q);
+                        spin_unlock_irqrestore(req->q->queue_lock, flags);
                        return;
                }
                req->errors = nvme_error_status(status);
@@ -664,8 +670,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
                }
        }
-        blk_mq_start_request(req);
        nvme_set_info(cmd, iod, req_completion);
        spin_lock_irq(&nvmeq->q_lock);
        if (req->cmd_flags & REQ_DISCARD)
@@ -835,6 +839,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
        if (IS_ERR(req))
                return PTR_ERR(req);
+        req->cmd_flags |= REQ_NO_TIMEOUT;
        cmd_info = blk_mq_rq_to_pdu(req);
        nvme_set_info(cmd_info, req, async_req_completion);
@@ -1016,14 +1021,19 @@ static void nvme_abort_req(struct request *req)
        struct nvme_command cmd;
        if (!nvmeq->qid || cmd_rq->aborted) {
+                unsigned long flags;
+                spin_lock_irqsave(&dev_list_lock, flags);
                if (work_busy(&dev->reset_work))
-                        return;
+                        goto out;
                list_del_init(&dev->node);
                dev_warn(&dev->pci_dev->dev,
                        "I/O %d QID %d timeout, reset controller\n",
                                                        req->tag, nvmeq->qid);
                dev->reset_workfn = nvme_reset_failed_dev;
                queue_work(nvme_workq, &dev->reset_work);
+ out:
+                spin_unlock_irqrestore(&dev_list_lock, flags);
                return;
        }
@@ -1064,15 +1074,22 @@ static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx,
        void *ctx;
        nvme_completion_fn fn;
        struct nvme_cmd_info *cmd;
-        static struct nvme_completion cqe = {
+        struct nvme_completion cqe;
-                .status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
-        };
+        if (!blk_mq_request_started(req))
+                return;
        cmd = blk_mq_rq_to_pdu(req);
        if (cmd->ctx == CMD_CTX_CANCELLED)
                return;
+        if (blk_queue_dying(req->q))
+                cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
+        else
+                cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
        dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n",
                                                req->tag, nvmeq->qid);
        ctx = cancel_cmd_info(cmd, &fn);
@@ -1084,17 +1101,29 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
        struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
        struct nvme_queue *nvmeq = cmd->nvmeq;
-        dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
-                                                        nvmeq->qid);
-        if (nvmeq->dev->initialized)
-                nvme_abort_req(req);
        /*
         * The aborted req will be completed on receiving the abort req.
         * We enable the timer again. If hit twice, it'll cause a device reset,
         * as the device then is in a faulty state.
         */
-        return BLK_EH_RESET_TIMER;
+        int ret = BLK_EH_RESET_TIMER;
+        dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
+                                                        nvmeq->qid);
+        spin_lock_irq(&nvmeq->q_lock);
+        if (!nvmeq->dev->initialized) {
+                /*
+                 * Force cancelled command frees the request, which requires we
+                 * return BLK_EH_NOT_HANDLED.
+                 */
+                nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved);
+                ret = BLK_EH_NOT_HANDLED;
+        } else
+                nvme_abort_req(req);
+        spin_unlock_irq(&nvmeq->q_lock);
+        return ret;
 }
 static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -1131,10 +1160,16 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 */
 static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 {
-        int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
+        int vector;
        spin_lock_irq(&nvmeq->q_lock);
+        if (nvmeq->cq_vector == -1) {
+                spin_unlock_irq(&nvmeq->q_lock);
+                return 1;
+        }
+        vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
        nvmeq->dev->online_queues--;
+        nvmeq->cq_vector = -1;
        spin_unlock_irq(&nvmeq->q_lock);
        irq_set_affinity_hint(vector, NULL);
@@ -1169,11 +1204,13 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
                adapter_delete_sq(dev, qid);
                adapter_delete_cq(dev, qid);
        }
+        if (!qid && dev->admin_q)
+                blk_mq_freeze_queue_start(dev->admin_q);
        nvme_clear_queue(nvmeq);
 }
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
-                                                        int depth, int vector)
+                                                        int depth)
 {
        struct device *dmadev = &dev->pci_dev->dev;
        struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
@@ -1199,7 +1236,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
        nvmeq->cq_phase = 1;
        nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
        nvmeq->q_depth = depth;
-        nvmeq->cq_vector = vector;
        nvmeq->qid = qid;
        dev->queue_count++;
        dev->queues[qid] = nvmeq;
@@ -1244,6 +1280,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
        struct nvme_dev *dev = nvmeq->dev;
        int result;
+        nvmeq->cq_vector = qid - 1;
        result = adapter_alloc_cq(dev, qid, nvmeq);
        if (result < 0)
                return result;
@@ -1355,6 +1392,14 @@ static struct blk_mq_ops nvme_mq_ops = {
        .timeout        = nvme_timeout,
 };
+static void nvme_dev_remove_admin(struct nvme_dev *dev)
+{
+        if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
+                blk_cleanup_queue(dev->admin_q);
+                blk_mq_free_tag_set(&dev->admin_tagset);
+        }
+}
 static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 {
        if (!dev->admin_q) {
@@ -1370,21 +1415,20 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
                        return -ENOMEM;
                dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
-                if (!dev->admin_q) {
+                if (IS_ERR(dev->admin_q)) {
                        blk_mq_free_tag_set(&dev->admin_tagset);
                        return -ENOMEM;
                }
-        }
+                if (!blk_get_queue(dev->admin_q)) {
+                        nvme_dev_remove_admin(dev);
+                        return -ENODEV;
+                }
+        } else
+                blk_mq_unfreeze_queue(dev->admin_q);
        return 0;
 }
-static void nvme_free_admin_tags(struct nvme_dev *dev)
-{
-        if (dev->admin_q)
-                blk_mq_free_tag_set(&dev->admin_tagset);
-}
 static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
        int result;
@@ -1416,7 +1460,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        nvmeq = dev->queues[0];
        if (!nvmeq) {
-                nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, 0);
+                nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
                if (!nvmeq)
                        return -ENOMEM;
        }
@@ -1439,18 +1483,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        if (result)
                goto free_nvmeq;
-        result = nvme_alloc_admin_tags(dev);
+        nvmeq->cq_vector = 0;
-        if (result)
-                goto free_nvmeq;
        result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
        if (result)
-                goto free_tags;
+                goto free_nvmeq;
        return result;
- free_tags:
-        nvme_free_admin_tags(dev);
 free_nvmeq:
        nvme_free_queues(dev, 0);
        return result;
@@ -1944,7 +1983,7 @@ static void nvme_create_io_queues(struct nvme_dev *dev)
        unsigned i;
        for (i = dev->queue_count; i <= dev->max_qid; i++)
-                if (!nvme_alloc_queue(dev, i, dev->q_depth, i - 1))
+                if (!nvme_alloc_queue(dev, i, dev->q_depth))
                        break;
        for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
@@ -2235,13 +2274,18 @@ static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
                        break;
                if (!schedule_timeout(ADMIN_TIMEOUT) ||
                                        fatal_signal_pending(current)) {
+                        /*
+                         * Disable the controller first since we can't trust it
+                         * at this point, but leave the admin queue enabled
+                         * until all queue deletion requests are flushed.
+                         * FIXME: This may take a while if there are more h/w
+                         * queues than admin tags.
+                         */
                        set_current_state(TASK_RUNNING);
                        nvme_disable_ctrl(dev, readq(&dev->bar->cap));
-                        nvme_disable_queue(dev, 0);
+                        nvme_clear_queue(dev->queues[0]);
-                        send_sig(SIGKILL, dq->worker->task, 1);
                        flush_kthread_worker(dq->worker);
+                        nvme_disable_queue(dev, 0);
                        return;
                }
        }
@@ -2318,7 +2362,6 @@ static void nvme_del_queue_start(struct kthread_work *work)
 {
        struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
                                                        cmdinfo.work);
-        allow_signal(SIGKILL);
        if (nvme_delete_sq(nvmeq))
                nvme_del_queue_end(nvmeq);
 }
@@ -2376,6 +2419,34 @@ static void nvme_dev_list_remove(struct nvme_dev *dev)
                kthread_stop(tmp);
 }
+static void nvme_freeze_queues(struct nvme_dev *dev)
+{
+        struct nvme_ns *ns;
+        list_for_each_entry(ns, &dev->namespaces, list) {
+                blk_mq_freeze_queue_start(ns->queue);
+                spin_lock(ns->queue->queue_lock);
+                queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
+                spin_unlock(ns->queue->queue_lock);
+                blk_mq_cancel_requeue_work(ns->queue);
+                blk_mq_stop_hw_queues(ns->queue);
+        }
+}
+static void nvme_unfreeze_queues(struct nvme_dev *dev)
+{
+        struct nvme_ns *ns;
+        list_for_each_entry(ns, &dev->namespaces, list) {
+                queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
+                blk_mq_unfreeze_queue(ns->queue);
+                blk_mq_start_stopped_hw_queues(ns->queue, true);
+                blk_mq_kick_requeue_list(ns->queue);
+        }
+}
 static void nvme_dev_shutdown(struct nvme_dev *dev)
 {
        int i;
@@ -2384,8 +2455,10 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
        dev->initialized = 0;
        nvme_dev_list_remove(dev);
-        if (dev->bar)
+        if (dev->bar) {
+                nvme_freeze_queues(dev);
                csts = readl(&dev->bar->csts);
+        }
        if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
                for (i = dev->queue_count - 1; i >= 0; i--) {
                        struct nvme_queue *nvmeq = dev->queues[i];
@@ -2400,12 +2473,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
        nvme_dev_unmap(dev);
 }
-static void nvme_dev_remove_admin(struct nvme_dev *dev)
-{
-        if (dev->admin_q && !blk_queue_dying(dev->admin_q))
-                blk_cleanup_queue(dev->admin_q);
-}
 static void nvme_dev_remove(struct nvme_dev *dev)
 {
        struct nvme_ns *ns;
@@ -2413,8 +2480,10 @@ static void nvme_dev_remove(struct nvme_dev *dev)
        list_for_each_entry(ns, &dev->namespaces, list) {
                if (ns->disk->flags & GENHD_FL_UP)
                        del_gendisk(ns->disk);
-                if (!blk_queue_dying(ns->queue))
+                if (!blk_queue_dying(ns->queue)) {
+                        blk_mq_abort_requeue_list(ns->queue);
                        blk_cleanup_queue(ns->queue);
+                }
        }
 }
@@ -2495,6 +2564,7 @@ static void nvme_free_dev(struct kref *kref)
        nvme_free_namespaces(dev);
        nvme_release_instance(dev);
        blk_mq_free_tag_set(&dev->tagset);
+        blk_put_queue(dev->admin_q);
        kfree(dev->queues);
        kfree(dev->entry);
        kfree(dev);
@@ -2591,15 +2661,20 @@ static int nvme_dev_start(struct nvme_dev *dev)
        }
        nvme_init_queue(dev->queues[0], 0);
+        result = nvme_alloc_admin_tags(dev);
+        if (result)
+                goto disable;
        result = nvme_setup_io_queues(dev);
        if (result)
-                goto disable;
+                goto free_tags;
        nvme_set_irq_hints(dev);
        return result;
+ free_tags:
+        nvme_dev_remove_admin(dev);
 disable:
        nvme_disable_queue(dev, 0);
        nvme_dev_list_remove(dev);
@@ -2639,6 +2714,9 @@ static int nvme_dev_resume(struct nvme_dev *dev)
                dev->reset_workfn = nvme_remove_disks;
                queue_work(nvme_workq, &dev->reset_work);
                spin_unlock(&dev_list_lock);
+        } else {
+                nvme_unfreeze_queues(dev);
+                nvme_set_irq_hints(dev);
        }
        dev->initialized = 1;
        return 0;
@@ -2776,11 +2854,10 @@ static void nvme_remove(struct pci_dev *pdev)
        pci_set_drvdata(pdev, NULL);
        flush_work(&dev->reset_work);
        misc_deregister(&dev->miscdev);
-        nvme_dev_remove(dev);
        nvme_dev_shutdown(dev);
+        nvme_dev_remove(dev);
        nvme_dev_remove_admin(dev);
        nvme_free_queues(dev, 0);
-        nvme_free_admin_tags(dev);
        nvme_release_prp_pools(dev);
        kref_put(&dev->kref, nvme_free_dev);
 }
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 7ef7c098708f..cdfbd21e3597 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -638,7 +638,7 @@ static int virtblk_probe(struct virtio_device *vdev)
                goto out_put_disk;
        q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
-        if (!q) {
+        if (IS_ERR(q)) {
                err = -ENOMEM;
                goto out_free_tags;
        }
diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c
index 26515c27ea8c..25e419752a7b 100644
--- a/drivers/leds/leds-netxbig.c
+++ b/drivers/leds/leds-netxbig.c
@@ -330,18 +330,18 @@ create_netxbig_led(struct platform_device *pdev,
        led_dat->sata = 0;
        led_dat->cdev.brightness = LED_OFF;
        led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
-        /*
-         * If available, expose the SATA activity blink capability through
-         * a "sata" sysfs attribute.
-         */
-        if (led_dat->mode_val[NETXBIG_LED_SATA] != NETXBIG_LED_INVALID_MODE)
-                led_dat->cdev.groups = netxbig_led_groups;
        led_dat->mode_addr = template->mode_addr;
        led_dat->mode_val = template->mode_val;
        led_dat->bright_addr = template->bright_addr;
        led_dat->bright_max = (1 << pdata->gpio_ext->num_data) - 1;
        led_dat->timer = pdata->timer;
        led_dat->num_timer = pdata->num_timer;
+        /*
+         * If available, expose the SATA activity blink capability through
+         * a "sata" sysfs attribute.
+         */
+        if (led_dat->mode_val[NETXBIG_LED_SATA] != NETXBIG_LED_INVALID_MODE)
+                led_dat->cdev.groups = netxbig_led_groups;
        return led_classdev_register(&pdev->dev, &led_dat->cdev);
 }
diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
index 231cabc16e16..2c2ec7666eb1 100644
--- a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
@@ -119,15 +119,11 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
                        continue;
                result = acpi_bus_get_device(trt->source, &adev);
-                if (!result)
+                if (result)
-                        acpi_create_platform_device(adev);
-                else
                        pr_warn("Failed to get source ACPI device\n");
                result = acpi_bus_get_device(trt->target, &adev);
-                if (!result)
+                if (result)
-                        acpi_create_platform_device(adev);
-                else
                        pr_warn("Failed to get target ACPI device\n");
        }
@@ -206,16 +202,12 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
                if (art->source) {
                        result = acpi_bus_get_device(art->source, &adev);
-                        if (!result)
+                        if (result)
-                                acpi_create_platform_device(adev);
-                        else
                                pr_warn("Failed to get source ACPI device\n");
                }
                if (art->target) {
                        result = acpi_bus_get_device(art->target, &adev);
-                        if (!result)
+                        if (result)
-                                acpi_create_platform_device(adev);
-                        else
                                pr_warn("Failed to get source ACPI device\n");
                }
        }
diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c
index 31bb553aac26..0fe5dbbea968 100644
--- a/drivers/thermal/int340x_thermal/processor_thermal_device.c
+++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c
@@ -130,6 +130,8 @@ static int proc_thermal_add(struct device *dev,
        int ret;
        adev = ACPI_COMPANION(dev);
+        if (!adev)
+                return -ENODEV;
        status = acpi_evaluate_object(adev->handle, "PPCC", NULL, &buf);
        if (ACPI_FAILURE(status))
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8aded9ab2e4e..5735e7130d63 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -34,7 +34,6 @@ struct blk_mq_hw_ctx {
        unsigned long           flags;          /* BLK_MQ_F_* flags */
        struct request_queue    *queue;
-        unsigned int            queue_num;
        struct blk_flush_queue  *fq;
        void                    *driver_data;
@@ -54,7 +53,7 @@ struct blk_mq_hw_ctx {
        unsigned long           dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
        unsigned int            numa_node;
-        unsigned int            cmd_size;       /* per-request extra data */
+        unsigned int            queue_num;
        atomic_t                nr_active;
@@ -195,13 +194,16 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
+int blk_mq_request_started(struct request *rq);
 void blk_mq_start_request(struct request *rq);
 void blk_mq_end_request(struct request *rq, int error);
 void __blk_mq_end_request(struct request *rq, int error);
 void blk_mq_requeue_request(struct request *rq);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
+void blk_mq_cancel_requeue_work(struct request_queue *q);
 void blk_mq_kick_requeue_list(struct request_queue *q);
+void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq);
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
@@ -212,6 +214,8 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
                void *priv);
+void blk_mq_unfreeze_queue(struct request_queue *q);
+void blk_mq_freeze_queue_start(struct request_queue *q);
 /*
 * Driver command data is immediately after the request. So subtract request
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 445d59231bc4..c294e3e25e37 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -190,6 +190,7 @@ enum rq_flag_bits {
        __REQ_PM,               /* runtime pm request */
        __REQ_HASHED,           /* on IO scheduler merge hash */
        __REQ_MQ_INFLIGHT,      /* track inflight for MQ */
+        __REQ_NO_TIMEOUT,       /* requests may never expire */
        __REQ_NR_BITS,          /* stops here */
 };
@@ -243,5 +244,6 @@ enum rq_flag_bits {
 #define REQ_PM                  (1ULL << __REQ_PM)
 #define REQ_HASHED              (1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT         (1ULL << __REQ_MQ_INFLIGHT)
+#define REQ_NO_TIMEOUT          (1ULL << __REQ_NO_TIMEOUT)
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index a1c81f80978e..33063f872ee3 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -215,7 +215,7 @@ static __always_inline void __read_once_size(volatile void *p, void *res, int si
        }
 }
-static __always_inline void __assign_once_size(volatile void *p, void *res, int size)
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
 {
        switch (size) {
        case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
@@ -235,15 +235,15 @@ static __always_inline void __assign_once_size(volatile void *p, void *res, int
 /*
 * Prevent the compiler from merging or refetching reads or writes. The
 * compiler is also forbidden from reordering successive instances of
- * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the
+ * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
 * compiler is aware of some particular ordering.  One way to make the
 * compiler aware of ordering is to put the two invocations of READ_ONCE,
- * ASSIGN_ONCE or ACCESS_ONCE() in different C statements.
+ * WRITE_ONCE or ACCESS_ONCE() in different C statements.
 *
 * In contrast to ACCESS_ONCE these two macros will also work on aggregate
 * data types like structs or unions. If the size of the accessed data
 * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- * READ_ONCE() and ASSIGN_ONCE()  will fall back to memcpy and print a
+ * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
 * compile-time warning.
 *
 * Their two major use cases are: (1) Mediating communication between
@@ -257,8 +257,8 @@ static __always_inline void __assign_once_size(volatile void *p, void *res, int
 #define READ_ONCE(x) \
        ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
-#define ASSIGN_ONCE(val, x) \
+#define WRITE_ONCE(x, val) \
-        ({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; })
+        ({ typeof(x) __val; __val = val; __write_once_size(&x, &__val, sizeof(__val)); __val; })
 #endif /* __KERNEL__ */
diff --git a/include/xen/interface/nmi.h b/include/xen/interface/nmi.h
new file mode 100644
index 000000000000..b47d9d06fade
--- /dev/null
+++ b/include/xen/interface/nmi.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ * nmi.h
+ *
+ * NMI callback registration and reason codes.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+#ifndef __XEN_PUBLIC_NMI_H__
+#define __XEN_PUBLIC_NMI_H__
+#include <xen/interface/xen.h>
+/*
+ * NMI reason codes:
+ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason.
+ */
+ /* I/O-check error reported via ISA port 0x61, bit 6. */
+#define _XEN_NMIREASON_io_error     0
+#define XEN_NMIREASON_io_error      (1UL << _XEN_NMIREASON_io_error)
+ /* PCI SERR reported via ISA port 0x61, bit 7. */
+#define _XEN_NMIREASON_pci_serr     1
+#define XEN_NMIREASON_pci_serr      (1UL << _XEN_NMIREASON_pci_serr)
+ /* Unknown hardware-generated NMI. */
+#define _XEN_NMIREASON_unknown      2
+#define XEN_NMIREASON_unknown       (1UL << _XEN_NMIREASON_unknown)
+/*
+ * long nmi_op(unsigned int cmd, void *arg)
+ * NB. All ops return zero on success, else a negative error code.
+ */
+/*
+ * Register NMI callback for this (calling) VCPU. Currently this only makes
+ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL.
+ * arg == pointer to xennmi_callback structure.
+ */
+#define XENNMI_register_callback   0
+struct xennmi_callback {
+    unsigned long handler_address;
+    unsigned long pad;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xennmi_callback);
+/*
+ * Deregister NMI callback for this (calling) VCPU.
+ * arg == NULL.
+ */
+#define XENNMI_unregister_callback 1
+#endif /* __XEN_PUBLIC_NMI_H__ */
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index d273624c93a6..e238c9559caf 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -62,7 +62,7 @@ static int _check_execveat_fail(int fd, const char *path, int flags,
 }
 static int check_execveat_invoked_rc(int fd, const char *path, int flags,
-                                     int expected_rc)
+                                     int expected_rc, int expected_rc2)
 {
        int status;
        int rc;
@@ -98,9 +98,10 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags,
                        child, status);
                return 1;
        }
-        if (WEXITSTATUS(status) != expected_rc) {
+        if ((WEXITSTATUS(status) != expected_rc) &&
-                printf("[FAIL] (child %d exited with %d not %d)\n",
+            (WEXITSTATUS(status) != expected_rc2)) {
-                        child, WEXITSTATUS(status), expected_rc);
+                printf("[FAIL] (child %d exited with %d not %d nor %d)\n",
+                        child, WEXITSTATUS(status), expected_rc, expected_rc2);
                return 1;
        }
        printf("[OK]\n");
@@ -109,7 +110,7 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags,
 static int check_execveat(int fd, const char *path, int flags)
 {
-        return check_execveat_invoked_rc(fd, path, flags, 99);
+        return check_execveat_invoked_rc(fd, path, flags, 99, 99);
 }
 static char *concat(const char *left, const char *right)
@@ -192,9 +193,15 @@ static int check_execveat_pathmax(int dot_dfd, const char *src, int is_script)
         * Execute as a long pathname relative to ".".  If this is a script,
         * the interpreter will launch but fail to open the script because its
         * name ("/dev/fd/5/xxx....") is bigger than PATH_MAX.
+         *
+         * The failure code is usually 127 (POSIX: "If a command is not found,
+         * the exit status shall be 127."), but some systems give 126 (POSIX:
+         * "If the command name is found, but it is not an executable utility,
+         * the exit status shall be 126."), so allow either.
         */
        if (is_script)
-                fail += check_execveat_invoked_rc(dot_dfd, longpath, 0, 127);
+                fail += check_execveat_invoked_rc(dot_dfd, longpath, 0,
+                                                  127, 126);
        else
                fail += check_execveat(dot_dfd, longpath, 0);
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index 94dae65eea41..8519e9ee97e3 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -536,10 +536,9 @@ int main(int argc, char *argv[])
 {
        struct mq_attr attr;
        char *option, *next_option;
-        int i, cpu;
+        int i, cpu, rc;
        struct sigaction sa;
        poptContext popt_context;
-        char rc;
        void *retval;
        main_thread = pthread_self();
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 4c4b1f631ecf..077828c889f1 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -7,7 +7,7 @@ BINARIES += transhuge-stress
 all: $(BINARIES)
 %: %.c
-        $(CC) $(CFLAGS) -o $@ $^
+        $(CC) $(CFLAGS) -o $@ $^ -lrt
 run_tests: all
        @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1)