Merge branch 'perf/urgent' into perf/core

Conflicts: tools/perf/util/python.c Merge reason: resolve the conflict with perf/urgent. Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2011-06-04 06:28:05 -0400
committer: Ingo Molnar <mingo@elte.hu> 2011-06-04 06:28:05 -0400
commit: 3ce2a0bc9dfb6423491afe0afc9f099e24b8cba4 (patch)
tree: 58fbef582846fef0e777b1a552aca12e21a071b1
parent: aef29bf20bd79c73992ab23d5067e9f0448b466e (diff)
parent: aa4a221875873d2a1f9656cb7fd7e545e952b4fa (diff)
47 files changed, 844 insertions, 203 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5438a2d7907f..d9a203b058f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        With this option on every unmap_single operation will
                        result in a hardware IOTLB flush operation as opposed
                        to batching them for performance.
+                sp_off [Default Off]
+                        By default, super page will be supported if Intel IOMMU
+                        has the capability. With this option, super page will
+                        not be supported.
        intremap=       [X86-64, Intel-IOMMU]
                        Format: { on (default) | off | nosid }
                        on      enable Interrupt Remapping (default)
diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile
index bebac6b4f332..0ac34206f7a7 100644
--- a/Documentation/virtual/lguest/Makefile
+++ b/Documentation/virtual/lguest/Makefile
@@ -1,5 +1,5 @@
 # This creates the demonstration utility "lguest" which runs a Linux guest.
-# Missing headers?  Add "-I../../include -I../../arch/x86/include"
+# Missing headers?  Add "-I../../../include -I../../../arch/x86/include"
 CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
 all: lguest
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
index d9da7e148538..cd9d6af61d07 100644
--- a/Documentation/virtual/lguest/lguest.c
+++ b/Documentation/virtual/lguest/lguest.c
@@ -49,7 +49,7 @@
 #include <linux/virtio_rng.h>
 #include <linux/virtio_ring.h>
 #include <asm/bootparam.h>
-#include "../../include/linux/lguest_launcher.h"
+#include "../../../include/linux/lguest_launcher.h"
 /*L:110
 * We can ignore the 42 include files we need for this program, but I do want
 * to draw attention to the use of kernel-style types.
@@ -135,9 +135,6 @@ struct device {
        /* Is it operational */
        bool running;
-        /* Does Guest want an intrrupt on empty? */
-        bool irq_on_empty;
        /* Device-specific data. */
        void *priv;
 };
@@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq)
        /* If they don't want an interrupt, don't send one... */
        if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
-                /* ... unless they've asked us to force one on empty. */
+                return;
-                if (!vq->dev->irq_on_empty
-                    || lg_last_avail(vq) != vq->vring.avail->idx)
-                        return;
        }
        /* Send the Guest an interrupt tell them we used something up. */
@@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq)
        close(vq->eventfd);
 }
-static bool accepted_feature(struct device *dev, unsigned int bit)
-{
-        const u8 *features = get_feature_bits(dev) + dev->feature_len;
-        if (dev->feature_len < bit / CHAR_BIT)
-                return false;
-        return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT));
-}
 static void start_device(struct device *dev)
 {
        unsigned int i;
@@ -1079,8 +1064,6 @@ static void start_device(struct device *dev)
                verbose(" %02x", get_feature_bits(dev)
                        [dev->feature_len+i]);
-        dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
        for (vq = dev->vq; vq; vq = vq->next) {
                if (vq->service)
                        create_thread(vq);
@@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg)
        /* Set up the tun device. */
        configure_device(ipfd, tapif, ip);
-        add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
        /* Expect Guest to handle everything except UFO */
        add_feature(dev, VIRTIO_NET_F_CSUM);
        add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 1cf0f496f744..7c928da35b17 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -320,11 +320,12 @@
 #define __NR_clock_adjtime              1328
 #define __NR_syncfs                     1329
 #define __NR_setns                      1330
+#define __NR_sendmmsg                   1331
 #ifdef __KERNEL__
-#define NR_syscalls                     307 /* length of syscall table */
+#define NR_syscalls                     308 /* length of syscall table */
 /*
 * The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 9ca80193cd4e..97dd2abdeb1a 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1776,6 +1776,7 @@ sys_call_table:
        data8 sys_clock_adjtime
        data8 sys_syncfs
        data8 sys_setns                         // 1330
+        data8 sys_sendmmsg
        .org sys_call_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls
 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 9089b0421191..7667db448aa7 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -715,7 +715,8 @@ static struct syscore_ops pmacpic_syscore_ops = {
 static int __init init_pmacpic_syscore(void)
 {
-        register_syscore_ops(&pmacpic_syscore_ops);
+        if (pmac_irq_hw[0])
+                register_syscore_ops(&pmacpic_syscore_ops);
        return 0;
 }
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f5abe3a245b8..90b06d4daee2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -8,6 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
+CFLAGS_REMOVE_tsc.o = -pg
 CFLAGS_REMOVE_rtc.o = -pg
 CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 CFLAGS_REMOVE_pvclock.o = -pg
@@ -28,6 +29,7 @@ CFLAGS_paravirt.o	:= $(nostackp)
 GCOV_PROFILE_vsyscall_64.o      := n
 GCOV_PROFILE_hpet.o             := n
 GCOV_PROFILE_tsc.o              := n
+GCOV_PROFILE_vread_tsc_64.o     := n
 GCOV_PROFILE_paravirt.o         := n
 # vread_tsc_64 is hot and should be fully optimized:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 426a5b66f7e4..2e4928d45a2d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -642,7 +642,7 @@ static int __init idle_setup(char *str)
                boot_option_idle_override = IDLE_POLL;
        } else if (!strcmp(str, "mwait")) {
                boot_option_idle_override = IDLE_FORCE_MWAIT;
-                WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n");
+                WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
        } else if (!strcmp(str, "halt")) {
                /*
                 * When the boot option of idle=halt is added, halt is
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index eefd96765e79..33a0c11797de 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1332,7 +1332,7 @@ static inline void mwait_play_dead(void)
        void *mwait_ptr;
        struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
-        if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
+        if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
                return;
        if (!this_cpu_has(X86_FEATURE_CLFLSH))
                return;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index e191c096ab90..db832fd65ecb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -993,6 +993,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
 static void lguest_time_init(void)
 {
        /* Set up the timer interrupt (0) to go to our simple timer routine */
+        lguest_setup_irq(0);
        irq_set_handler(0, lguest_time_irq);
        clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index a0aabd904a51..46b8136c31bb 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -321,7 +321,6 @@ static void pcd_init_units(void)
                strcpy(disk->disk_name, cd->name);      /* umm... */
                disk->fops = &pcd_bdops;
                disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
-                disk->events = DISK_EVENT_MEDIA_CHANGE;
        }
 }
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6ecf89cdf006..079c08808d8a 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -6,10 +6,13 @@
 #include <linux/virtio.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
+#include <linux/string_helpers.h>
+#include <scsi/scsi_cmnd.h>
 #define PART_BITS 4
 static int major, index;
+struct workqueue_struct *virtblk_wq;
 struct virtio_blk
 {
@@ -26,6 +29,9 @@ struct virtio_blk
        mempool_t *pool;
+        /* Process context for config space updates */
+        struct work_struct config_work;
        /* What host tells us, plus 2 for header & tailer. */
        unsigned int sg_elems;
@@ -141,7 +147,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
        num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
        if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
-                sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
+                sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
                sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
                           sizeof(vbr->in_hdr));
        }
@@ -291,6 +297,46 @@ static ssize_t virtblk_serial_show(struct device *dev,
 }
 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
+static void virtblk_config_changed_work(struct work_struct *work)
+{
+        struct virtio_blk *vblk =
+                container_of(work, struct virtio_blk, config_work);
+        struct virtio_device *vdev = vblk->vdev;
+        struct request_queue *q = vblk->disk->queue;
+        char cap_str_2[10], cap_str_10[10];
+        u64 capacity, size;
+        /* Host must always specify the capacity. */
+        vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
+                          &capacity, sizeof(capacity));
+        /* If capacity is too big, truncate with warning. */
+        if ((sector_t)capacity != capacity) {
+                dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
+                         (unsigned long long)capacity);
+                capacity = (sector_t)-1;
+        }
+        size = capacity * queue_logical_block_size(q);
+        string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
+        string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
+        dev_notice(&vdev->dev,
+                  "new size: %llu %d-byte logical blocks (%s/%s)\n",
+                  (unsigned long long)capacity,
+                  queue_logical_block_size(q),
+                  cap_str_10, cap_str_2);
+        set_capacity(vblk->disk, capacity);
+}
+static void virtblk_config_changed(struct virtio_device *vdev)
+{
+        struct virtio_blk *vblk = vdev->priv;
+        queue_work(virtblk_wq, &vblk->config_work);
+}
 static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
        struct virtio_blk *vblk;
@@ -327,6 +373,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
        vblk->vdev = vdev;
        vblk->sg_elems = sg_elems;
        sg_init_table(vblk->sg, vblk->sg_elems);
+        INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
        /* We expect one virtqueue, for output. */
        vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
@@ -477,6 +524,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
 {
        struct virtio_blk *vblk = vdev->priv;
+        flush_work(&vblk->config_work);
        /* Nothing should be pending. */
        BUG_ON(!list_empty(&vblk->reqs));
@@ -508,27 +557,47 @@ static unsigned int features[] = {
 * Use __refdata to avoid this warning.
 */
 static struct virtio_driver __refdata virtio_blk = {
-        .feature_table = features,
+        .feature_table          = features,
-        .feature_table_size = ARRAY_SIZE(features),
+        .feature_table_size     = ARRAY_SIZE(features),
-        .driver.name =  KBUILD_MODNAME,
+        .driver.name            = KBUILD_MODNAME,
-        .driver.owner = THIS_MODULE,
+        .driver.owner           = THIS_MODULE,
-        .id_table =     id_table,
+        .id_table               = id_table,
-        .probe =        virtblk_probe,
+        .probe                  = virtblk_probe,
-        .remove =       __devexit_p(virtblk_remove),
+        .remove                 = __devexit_p(virtblk_remove),
+        .config_changed         = virtblk_config_changed,
 };
 static int __init init(void)
 {
+        int error;
+        virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
+        if (!virtblk_wq)
+                return -ENOMEM;
        major = register_blkdev(0, "virtblk");
-        if (major < 0)
+        if (major < 0) {
-                return major;
+                error = major;
-        return register_virtio_driver(&virtio_blk);
+                goto out_destroy_workqueue;
+        }
+        error = register_virtio_driver(&virtio_blk);
+        if (error)
+                goto out_unregister_blkdev;
+        return 0;
+out_unregister_blkdev:
+        unregister_blkdev(major, "virtblk");
+out_destroy_workqueue:
+        destroy_workqueue(virtblk_wq);
+        return error;
 }
 static void __exit fini(void)
 {
        unregister_blkdev(major, "virtblk");
        unregister_virtio_driver(&virtio_blk);
+        destroy_workqueue(virtblk_wq);
 }
 module_init(init);
 module_exit(fini);
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index ae15a4ddaa9b..7878da89d29e 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -627,7 +627,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        gendisk->fops = &viocd_fops;
        gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
                         GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
-        gendisk->events = DISK_EVENT_MEDIA_CHANGE;
        set_capacity(gendisk, 0);
        gendisk->private_data = d;
        d->viocd_disk = gendisk;
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 838568a7dbf5..fb68b1295373 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1677,17 +1677,12 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
        portdev->config.max_nr_ports = 1;
        if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) {
                multiport = true;
-                vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT;
                vdev->config->get(vdev, offsetof(struct virtio_console_config,
                                                 max_nr_ports),
                                  &portdev->config.max_nr_ports,
                                  sizeof(portdev->config.max_nr_ports));
        }
-        /* Let the Host know we support multiple ports.*/
-        vdev->config->finalize_features(vdev);
        err = init_vqs(portdev);
        if (err < 0) {
                dev_err(&vdev->dev, "Error %d initializing vqs\n", err);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 6e5123b1d341..144d27261e43 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive)
        ide_cd_read_toc(drive, &sense);
        g->fops = &idecd_ops;
        g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
-        g->events = DISK_EVENT_MEDIA_CHANGE;
        add_disk(g);
        return 0;
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index b0c56313dbbb..8cebec5e85ee 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -304,7 +304,10 @@ static int check_and_rewind_pc(char *put_str, char *arg)
                return 1;
        }
        /* Readjust the instruction pointer if needed */
-        instruction_pointer_set(&kgdbts_regs, ip + offset);
+        ip += offset;
+#ifdef GDB_ADJUSTS_BREAK_OFFSET
+        instruction_pointer_set(&kgdbts_regs, ip);
+#endif
        return 0;
 }
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0cb0b0632672..f6853247a620 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
         * before it gets out of hand.  Naturally, this wastes entries. */
        if (capacity < 2+MAX_SKB_FRAGS) {
                netif_stop_queue(dev);
-                if (unlikely(!virtqueue_enable_cb(vi->svq))) {
+                if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
                        /* More just got used, free them then recheck. */
                        capacity += free_old_xmit_skbs(vi);
                        if (capacity >= 2+MAX_SKB_FRAGS) {
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 12e02bf92c4a..3dc9befa5aec 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -698,12 +698,7 @@ int __init detect_intel_iommu(void)
        {
 #ifdef CONFIG_INTR_REMAP
                struct acpi_table_dmar *dmar;
-                /*
-                 * for now we will disable dma-remapping when interrupt
-                 * remapping is enabled.
-                 * When support for queued invalidation for IOTLB invalidation
-                 * is added, we will not need this any more.
-                 */
                dmar = (struct acpi_table_dmar *) dmar_tbl;
                if (ret && cpu_has_x2apic && dmar->flags & 0x1)
                        printk(KERN_INFO
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6af6b628175b..59f17acf7f68 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -47,6 +47,8 @@
 #define ROOT_SIZE               VTD_PAGE_SIZE
 #define CONTEXT_SIZE            VTD_PAGE_SIZE
+#define IS_BRIDGE_HOST_DEVICE(pdev) \
+                            ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
        return (pfn + level_size(level) - 1) & level_mask(level);
 }
+static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
+{
+        return  1 << ((lvl - 1) * LEVEL_STRIDE);
+}
 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
   are never going to work. */
 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@@ -143,6 +150,12 @@ static void __init check_tylersburg_isoch(void);
 static int rwbf_quirk;
 /*
+ * set to 1 to panic kernel if can't successfully enable VT-d
+ * (used when kernel is launched w/ TXT)
+ */
+static int force_on = 0;
+/*
 * 0: Present
 * 1-11: Reserved
 * 12-63: Context Ptr (12 - (haw-1))
@@ -338,6 +351,9 @@ struct dmar_domain {
        int             iommu_coherency;/* indicate coherency of iommu access */
        int             iommu_snooping; /* indicate snooping control feature*/
        int             iommu_count;    /* reference count of iommu */
+        int             iommu_superpage;/* Level of superpages supported:
+                                           0 == 4KiB (no superpages), 1 == 2MiB,
+                                           2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
        spinlock_t      iommu_lock;     /* protect iommu set in domain */
        u64             max_addr;       /* maximum mapped address */
 };
@@ -387,6 +403,7 @@ int dmar_disabled = 1;
 static int dmar_map_gfx = 1;
 static int dmar_forcedac;
 static int intel_iommu_strict;
+static int intel_iommu_superpage = 1;
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str)
                        printk(KERN_INFO
                                "Intel-IOMMU: disable batched IOTLB flush\n");
                        intel_iommu_strict = 1;
+                } else if (!strncmp(str, "sp_off", 6)) {
+                        printk(KERN_INFO
+                                "Intel-IOMMU: disable supported super page\n");
+                        intel_iommu_superpage = 0;
                }
                str += strcspn(str, ",");
@@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
        }
 }
+static void domain_update_iommu_superpage(struct dmar_domain *domain)
+{
+        int i, mask = 0xf;
+        if (!intel_iommu_superpage) {
+                domain->iommu_superpage = 0;
+                return;
+        }
+        domain->iommu_superpage = 4; /* 1TiB */
+        for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
+                mask |= cap_super_page_val(g_iommus[i]->cap);
+                if (!mask) {
+                        break;
+                }
+        }
+        domain->iommu_superpage = fls(mask);
+}
 /* Some capabilities may be different across iommus */
 static void domain_update_iommu_cap(struct dmar_domain *domain)
 {
        domain_update_iommu_coherency(domain);
        domain_update_iommu_snooping(domain);
+        domain_update_iommu_superpage(domain);
 }
 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@@ -689,23 +731,31 @@ out:
 }
 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
-                                      unsigned long pfn)
+                                      unsigned long pfn, int large_level)
 {
        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
        struct dma_pte *parent, *pte = NULL;
        int level = agaw_to_level(domain->agaw);
-        int offset;
+        int offset, target_level;
        BUG_ON(!domain->pgd);
        BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
        parent = domain->pgd;
+        /* Search pte */
+        if (!large_level)
+                target_level = 1;
+        else
+                target_level = large_level;
        while (level > 0) {
                void *tmp_page;
                offset = pfn_level_offset(pfn, level);
                pte = &parent[offset];
-                if (level == 1)
+                if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
+                        break;
+                if (level == target_level)
                        break;
                if (!dma_pte_present(pte)) {
@@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
        return pte;
 }
 /* return address's pte at specific level */
 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
                                         unsigned long pfn,
-                                         int level)
+                                         int level, int *large_page)
 {
        struct dma_pte *parent, *pte = NULL;
        int total = agaw_to_level(domain->agaw);
@@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
                if (level == total)
                        return pte;
-                if (!dma_pte_present(pte))
+                if (!dma_pte_present(pte)) {
+                        *large_page = total;
                        break;
+                }
+                if (pte->val & DMA_PTE_LARGE_PAGE) {
+                        *large_page = total;
+                        return pte;
+                }
                parent = phys_to_virt(dma_pte_addr(pte));
                total--;
        }
@@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
                                unsigned long last_pfn)
 {
        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+        unsigned int large_page = 1;
        struct dma_pte *first_pte, *pte;
        BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
        /* we don't need lock here; nobody else touches the iova range */
        do {
-                first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
+                large_page = 1;
+                first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
                if (!pte) {
-                        start_pfn = align_to_level(start_pfn + 1, 2);
+                        start_pfn = align_to_level(start_pfn + 1, large_page + 1);
                        continue;
                }
-                do { 
+                do {
                        dma_clear_pte(pte);
-                        start_pfn++;
+                        start_pfn += lvl_to_nr_pages(large_page);
                        pte++;
                } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
@@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
        int total = agaw_to_level(domain->agaw);
        int level;
        unsigned long tmp;
+        int large_page = 2;
        BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
        BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
                        return;
                do {
-                        first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
+                        large_page = level;
+                        first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
+                        if (large_page > level)
+                                level = large_page + 1;
                        if (!pte) {
                                tmp = align_to_level(tmp + 1, level + 1);
                                continue;
@@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
        else
                domain->iommu_snooping = 0;
+        domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
        domain->iommu_count = 1;
        domain->nid = iommu->node;
@@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain)
        if (!domain)
                return;
+        /* Flush any lazy unmaps that may reference this domain */
+        if (!intel_iommu_strict)
+                flush_unmaps_timeout(0);
        domain_remove_dev_info(domain);
        /* destroy iovas */
        put_iova_domain(&domain->iovad);
@@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr,
        return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
 }
+/* Return largest possible superpage level for a given mapping */
+static inline int hardware_largepage_caps(struct dmar_domain *domain,
+                                          unsigned long iov_pfn,
+                                          unsigned long phy_pfn,
+                                          unsigned long pages)
+{
+        int support, level = 1;
+        unsigned long pfnmerge;
+        support = domain->iommu_superpage;
+        /* To use a large page, the virtual *and* physical addresses
+           must be aligned to 2MiB/1GiB/etc. Lower bits set in either
+           of them will mean we have to use smaller pages. So just
+           merge them and check both at once. */
+        pfnmerge = iov_pfn | phy_pfn;
+        while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
+                pages >>= VTD_STRIDE_SHIFT;
+                if (!pages)
+                        break;
+                pfnmerge >>= VTD_STRIDE_SHIFT;
+                level++;
+                support--;
+        }
+        return level;
+}
 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                            struct scatterlist *sg, unsigned long phys_pfn,
                            unsigned long nr_pages, int prot)
@@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
        phys_addr_t uninitialized_var(pteval);
        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
        unsigned long sg_res;
+        unsigned int largepage_lvl = 0;
+        unsigned long lvl_pages = 0;
        BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
@@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
        }
-        while (nr_pages--) {
+        while (nr_pages > 0) {
                uint64_t tmp;
                if (!sg_res) {
@@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                        sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
                        sg->dma_length = sg->length;
                        pteval = page_to_phys(sg_page(sg)) | prot;
+                        phys_pfn = pteval >> VTD_PAGE_SHIFT;
                }
                if (!pte) {
-                        first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
+                        largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
+                        first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
                        if (!pte)
                                return -ENOMEM;
+                        /* It is large page*/
+                        if (largepage_lvl > 1)
+                                pteval |= DMA_PTE_LARGE_PAGE;
+                        else
+                                pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
                }
                /* We don't need lock here, nobody else
                 * touches the iova range
@@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                        }
                        WARN_ON(1);
                }
+                lvl_pages = lvl_to_nr_pages(largepage_lvl);
+                BUG_ON(nr_pages < lvl_pages);
+                BUG_ON(sg_res < lvl_pages);
+                nr_pages -= lvl_pages;
+                iov_pfn += lvl_pages;
+                phys_pfn += lvl_pages;
+                pteval += lvl_pages * VTD_PAGE_SIZE;
+                sg_res -= lvl_pages;
+                /* If the next PTE would be the first in a new page, then we
+                   need to flush the cache on the entries we've just written.
+                   And then we'll need to recalculate 'pte', so clear it and
+                   let it get set again in the if (!pte) block above.
+                   If we're done (!nr_pages) we need to flush the cache too.
+                   Also if we've been setting superpages, we may need to
+                   recalculate 'pte' and switch back to smaller pages for the
+                   end of the mapping, if the trailing size is not enough to
+                   use another superpage (i.e. sg_res < lvl_pages). */
                pte++;
-                if (!nr_pages || first_pte_in_page(pte)) {
+                if (!nr_pages || first_pte_in_page(pte) ||
+                    (largepage_lvl > 1 && sg_res < lvl_pages)) {
                        domain_flush_cache(domain, first_pte,
                                           (void *)pte - (void *)first_pte);
                        pte = NULL;
                }
-                iov_pfn++;
-                pteval += VTD_PAGE_SIZE;
+                if (!sg_res && nr_pages)
-                sg_res--;
-                if (!sg_res)
                        sg = sg_next(sg);
        }
        return 0;
@@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
        if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
                return 0;
        return iommu_prepare_identity_map(pdev, rmrr->base_address,
-                rmrr->end_address + 1);
+                rmrr->end_address);
 }
 #ifdef CONFIG_DMAR_FLOPPY_WA
@@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void)
                return;
        printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
-        ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
+        ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
        if (ret)
                printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
@@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev)
        if (likely(!iommu_identity_mapping))
                return 0;
+        info = pdev->dev.archdata.iommu;
+        if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
+                return (info->domain == si_domain);
-        list_for_each_entry(info, &si_domain->devices, link)
-                if (info->dev == pdev)
-                        return 1;
        return 0;
 }
@@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
         * Assume that they will -- if they turn out not to be, then we can 
         * take them out of the 1:1 domain later.
         */
-        if (!startup)
+        if (!startup) {
-                return pdev->dma_mask > DMA_BIT_MASK(32);
+                /*
+                 * If the device's dma_mask is less than the system's memory
+                 * size then this is not a candidate for identity mapping.
+                 */
+                u64 dma_mask = pdev->dma_mask;
+                if (pdev->dev.coherent_dma_mask &&
+                    pdev->dev.coherent_dma_mask < dma_mask)
+                        dma_mask = pdev->dev.coherent_dma_mask;
+                return dma_mask >= dma_get_required_mask(&pdev->dev);
+        }
        return 1;
 }
@@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
                return -EFAULT;
        for_each_pci_dev(pdev) {
+                /* Skip Host/PCI Bridge devices */
+                if (IS_BRIDGE_HOST_DEVICE(pdev))
+                        continue;
                if (iommu_should_identity_map(pdev, 1)) {
                        printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
                               hw ? "hardware" : "software", pci_name(pdev));
@@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
        return 0;
 }
-static int __init init_dmars(int force_on)
+static int __init init_dmars(void)
 {
        struct dmar_drhd_unit *drhd;
        struct dmar_rmrr_unit *rmrr;
@@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
        iommu = domain_get_iommu(domain);
        size = aligned_nrpages(paddr, size);
-        iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
+        iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
-                                pdev->dma_mask);
        if (!iova)
                goto error;
@@ -3118,7 +3263,17 @@ static int init_iommu_hw(void)
                if (iommu->qi)
                        dmar_reenable_qi(iommu);
-        for_each_active_iommu(iommu, drhd) {
+        for_each_iommu(iommu, drhd) {
+                if (drhd->ignored) {
+                        /*
+                         * we always have to disable PMRs or DMA may fail on
+                         * this device
+                         */
+                        if (force_on)
+                                iommu_disable_protect_mem_regions(iommu);
+                        continue;
+                }
+        
                iommu_flush_write_buffer(iommu);
                iommu_set_root_entry(iommu);
@@ -3127,7 +3282,8 @@ static int init_iommu_hw(void)
                                           DMA_CCMD_GLOBAL_INVL);
                iommu->flush.flush_iotlb(iommu, 0, 0, 0,
                                         DMA_TLB_GLOBAL_FLUSH);
-                iommu_enable_translation(iommu);
+                if (iommu_enable_translation(iommu))
+                        return 1;
                iommu_disable_protect_mem_regions(iommu);
        }
@@ -3194,7 +3350,10 @@ static void iommu_resume(void)
        unsigned long flag;
        if (init_iommu_hw()) {
-                WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
+                if (force_on)
+                        panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
+                else
+                        WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
                return;
        }
@@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = {
 int __init intel_iommu_init(void)
 {
        int ret = 0;
-        int force_on = 0;
        /* VT-d is required for a TXT/tboot launch, so enforce that */
        force_on = tboot_force_iommu();
@@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void)
        init_no_remapping_devices();
-        ret = init_dmars(force_on);
+        ret = init_dmars();
        if (ret) {
                if (force_on)
                        panic("tboot: Failed to initialize DMARs\n");
@@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
        spin_lock_irqsave(&device_domain_lock, flags);
        list_for_each_safe(entry, tmp, &domain->devices) {
                info = list_entry(entry, struct device_domain_info, link);
-                /* No need to compare PCI domain; it has to be the same */
+                if (info->segment == pci_domain_nr(pdev->bus) &&
-                if (info->bus == pdev->bus->number &&
+                    info->bus == pdev->bus->number &&
                    info->devfn == pdev->devfn) {
                        list_del(&info->link);
                        list_del(&info->global);
@@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
                domain_update_iommu_cap(domain);
                spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
-                spin_lock_irqsave(&iommu->lock, tmp_flags);
+                if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
-                clear_bit(domain->id, iommu->domain_ids);
+                    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
-                iommu->domains[domain->id] = NULL;
+                        spin_lock_irqsave(&iommu->lock, tmp_flags);
-                spin_unlock_irqrestore(&iommu->lock, tmp_flags);
+                        clear_bit(domain->id, iommu->domain_ids);
+                        iommu->domains[domain->id] = NULL;
+                        spin_unlock_irqrestore(&iommu->lock, tmp_flags);
+                }
        }
        spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
        domain->iommu_count = 0;
        domain->iommu_coherency = 0;
        domain->iommu_snooping = 0;
+        domain->iommu_superpage = 0;
        domain->max_addr = 0;
        domain->nid = -1;
@@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
        struct dma_pte *pte;
        u64 phys = 0;
-        pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
+        pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
        if (pte)
                phys = dma_pte_addr(pte);
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 9606e599a475..c5c274ab5c5a 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
        curr = iovad->cached32_node;
        cached_iova = container_of(curr, struct iova, node);
-        if (free->pfn_lo >= cached_iova->pfn_lo)
+        if (free->pfn_lo >= cached_iova->pfn_lo) {
-                iovad->cached32_node = rb_next(&free->node);
+                struct rb_node *node = rb_next(&free->node);
+                struct iova *iova = container_of(node, struct iova, node);
+                /* only cache if it's below 32bit pfn */
+                if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
+                        iovad->cached32_node = node;
+                else
+                        iovad->cached32_node = NULL;
+        }
 }
 /* Computes the padding size required, to make the
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 2f7c76a85e53..e224a92baa16 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -144,7 +144,7 @@ static void handle_tx(struct vhost_net *net)
        }
        mutex_lock(&vq->mutex);
-        vhost_disable_notify(vq);
+        vhost_disable_notify(&net->dev, vq);
        if (wmem < sock->sk->sk_sndbuf / 2)
                tx_poll_stop(net);
@@ -166,8 +166,8 @@ static void handle_tx(struct vhost_net *net)
                                set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
                                break;
                        }
-                        if (unlikely(vhost_enable_notify(vq))) {
+                        if (unlikely(vhost_enable_notify(&net->dev, vq))) {
-                                vhost_disable_notify(vq);
+                                vhost_disable_notify(&net->dev, vq);
                                continue;
                        }
                        break;
@@ -315,7 +315,7 @@ static void handle_rx(struct vhost_net *net)
                return;
        mutex_lock(&vq->mutex);
-        vhost_disable_notify(vq);
+        vhost_disable_notify(&net->dev, vq);
        vhost_hlen = vq->vhost_hlen;
        sock_hlen = vq->sock_hlen;
@@ -334,10 +334,10 @@ static void handle_rx(struct vhost_net *net)
                        break;
                /* OK, now we need to know about added descriptors. */
                if (!headcount) {
-                        if (unlikely(vhost_enable_notify(vq))) {
+                        if (unlikely(vhost_enable_notify(&net->dev, vq))) {
                                /* They have slipped one in as we were
                                 * doing that: check again. */
-                                vhost_disable_notify(vq);
+                                vhost_disable_notify(&net->dev, vq);
                                continue;
                        }
                        /* Nothing new?  Wait for eventfd to tell us
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 099f30230d06..734e1d74ad80 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -49,7 +49,7 @@ static void handle_vq(struct vhost_test *n)
                return;
        mutex_lock(&vq->mutex);
-        vhost_disable_notify(vq);
+        vhost_disable_notify(&n->dev, vq);
        for (;;) {
                head = vhost_get_vq_desc(&n->dev, vq, vq->iov,
@@ -61,8 +61,8 @@ static void handle_vq(struct vhost_test *n)
                        break;
                /* Nothing new?  Wait for eventfd to tell us they refilled. */
                if (head == vq->num) {
-                        if (unlikely(vhost_enable_notify(vq))) {
+                        if (unlikely(vhost_enable_notify(&n->dev, vq))) {
-                                vhost_disable_notify(vq);
+                                vhost_disable_notify(&n->dev, vq);
                                continue;
                        }
                        break;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 7aa4eea930f1..ea966b356352 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -37,6 +37,9 @@ enum {
        VHOST_MEMORY_F_LOG = 0x1,
 };
+#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
+#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
 static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
                            poll_table *pt)
 {
@@ -161,6 +164,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
        vq->last_avail_idx = 0;
        vq->avail_idx = 0;
        vq->last_used_idx = 0;
+        vq->signalled_used = 0;
+        vq->signalled_used_valid = false;
        vq->used_flags = 0;
        vq->log_used = false;
        vq->log_addr = -1ull;
@@ -489,16 +494,17 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
        return 1;
 }
-static int vq_access_ok(unsigned int num,
+static int vq_access_ok(struct vhost_dev *d, unsigned int num,
                        struct vring_desc __user *desc,
                        struct vring_avail __user *avail,
                        struct vring_used __user *used)
 {
+        size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
        return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
               access_ok(VERIFY_READ, avail,
-                         sizeof *avail + num * sizeof *avail->ring) &&
+                         sizeof *avail + num * sizeof *avail->ring + s) &&
               access_ok(VERIFY_WRITE, used,
-                        sizeof *used + num * sizeof *used->ring);
+                        sizeof *used + num * sizeof *used->ring + s);
 }
 /* Can we log writes? */
@@ -514,9 +520,11 @@ int vhost_log_access_ok(struct vhost_dev *dev)
 /* Verify access for write logging. */
 /* Caller should have vq mutex and device mutex */
-static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
+static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq,
+                            void __user *log_base)
 {
        struct vhost_memory *mp;
+        size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
        mp = rcu_dereference_protected(vq->dev->memory,
                                       lockdep_is_held(&vq->mutex));
@@ -524,15 +532,15 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
                            vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
                (!vq->log_used || log_access_ok(log_base, vq->log_addr,
                                        sizeof *vq->used +
-                                        vq->num * sizeof *vq->used->ring));
+                                        vq->num * sizeof *vq->used->ring + s));
 }
 /* Can we start vq? */
 /* Caller should have vq mutex and device mutex */
 int vhost_vq_access_ok(struct vhost_virtqueue *vq)
 {
-        return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) &&
+        return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) &&
-                vq_log_access_ok(vq, vq->log_base);
+                vq_log_access_ok(vq->dev, vq, vq->log_base);
 }
 static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
@@ -577,6 +585,7 @@ static int init_used(struct vhost_virtqueue *vq,
        if (r)
                return r;
+        vq->signalled_used_valid = false;
        return get_user(vq->last_used_idx, &used->idx);
 }
@@ -674,7 +683,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
                 * If it is not, we don't as size might not have been setup.
                 * We will verify when backend is configured. */
                if (vq->private_data) {
-                        if (!vq_access_ok(vq->num,
+                        if (!vq_access_ok(d, vq->num,
                                (void __user *)(unsigned long)a.desc_user_addr,
                                (void __user *)(unsigned long)a.avail_user_addr,
                                (void __user *)(unsigned long)a.used_user_addr)) {
@@ -818,7 +827,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
                        vq = d->vqs + i;
                        mutex_lock(&vq->mutex);
                        /* If ring is inactive, will check when it's enabled. */
-                        if (vq->private_data && !vq_log_access_ok(vq, base))
+                        if (vq->private_data && !vq_log_access_ok(d, vq, base))
                                r = -EFAULT;
                        else
                                vq->log_base = base;
@@ -1219,6 +1228,10 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
        /* On success, increment avail index. */
        vq->last_avail_idx++;
+        /* Assume notifications from guest are disabled at this point,
+         * if they aren't we would need to update avail_event index. */
+        BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
        return head;
 }
@@ -1267,6 +1280,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
                        eventfd_signal(vq->log_ctx, 1);
        }
        vq->last_used_idx++;
+        /* If the driver never bothers to signal in a very long while,
+         * used index might wrap around. If that happens, invalidate
+         * signalled_used index we stored. TODO: make sure driver
+         * signals at least once in 2^16 and remove this. */
+        if (unlikely(vq->last_used_idx == vq->signalled_used))
+                vq->signalled_used_valid = false;
        return 0;
 }
@@ -1275,6 +1294,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
                            unsigned count)
 {
        struct vring_used_elem __user *used;
+        u16 old, new;
        int start;
        start = vq->last_used_idx % vq->num;
@@ -1292,7 +1312,14 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
                           ((void __user *)used - (void __user *)vq->used),
                          count * sizeof *used);
        }
-        vq->last_used_idx += count;
+        old = vq->last_used_idx;
+        new = (vq->last_used_idx += count);
+        /* If the driver never bothers to signal in a very long while,
+         * used index might wrap around. If that happens, invalidate
+         * signalled_used index we stored. TODO: make sure driver
+         * signals at least once in 2^16 and remove this. */
+        if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
+                vq->signalled_used_valid = false;
        return 0;
 }
@@ -1331,29 +1358,47 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
        return r;
 }
-/* This actually signals the guest, using eventfd. */
+static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
-void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
-        __u16 flags;
+        __u16 old, new, event;
+        bool v;
        /* Flush out used index updates. This is paired
         * with the barrier that the Guest executes when enabling
         * interrupts. */
        smp_mb();
-        if (__get_user(flags, &vq->avail->flags)) {
+        if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
-                vq_err(vq, "Failed to get flags");
+            unlikely(vq->avail_idx == vq->last_avail_idx))
-                return;
+                return true;
+        if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+                __u16 flags;
+                if (__get_user(flags, &vq->avail->flags)) {
+                        vq_err(vq, "Failed to get flags");
+                        return true;
+                }
+                return !(flags & VRING_AVAIL_F_NO_INTERRUPT);
        }
+        old = vq->signalled_used;
+        v = vq->signalled_used_valid;
+        new = vq->signalled_used = vq->last_used_idx;
+        vq->signalled_used_valid = true;
-        /* If they don't want an interrupt, don't signal, unless empty. */
+        if (unlikely(!v))
-        if ((flags & VRING_AVAIL_F_NO_INTERRUPT) &&
+                return true;
-            (vq->avail_idx != vq->last_avail_idx ||
-             !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY)))
-                return;
+        if (get_user(event, vhost_used_event(vq))) {
+                vq_err(vq, "Failed to get used event idx");
+                return true;
+        }
+        return vring_need_event(event, new, old);
+}
+/* This actually signals the guest, using eventfd. */
+void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
        /* Signal the Guest tell them we used something up. */
-        if (vq->call_ctx)
+        if (vq->call_ctx && vhost_notify(dev, vq))
                eventfd_signal(vq->call_ctx, 1);
 }
@@ -1376,7 +1421,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
 }
 /* OK, now we need to know about added descriptors. */
-bool vhost_enable_notify(struct vhost_virtqueue *vq)
+bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
        u16 avail_idx;
        int r;
@@ -1384,11 +1429,34 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
        if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
                return false;
        vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
-        r = put_user(vq->used_flags, &vq->used->flags);
+        if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
-        if (r) {
+                r = put_user(vq->used_flags, &vq->used->flags);
-                vq_err(vq, "Failed to enable notification at %p: %d\n",
+                if (r) {
-                       &vq->used->flags, r);
+                        vq_err(vq, "Failed to enable notification at %p: %d\n",
-                return false;
+                               &vq->used->flags, r);
+                        return false;
+                }
+        } else {
+                r = put_user(vq->avail_idx, vhost_avail_event(vq));
+                if (r) {
+                        vq_err(vq, "Failed to update avail event index at %p: %d\n",
+                               vhost_avail_event(vq), r);
+                        return false;
+                }
+        }
+        if (unlikely(vq->log_used)) {
+                void __user *used;
+                /* Make sure data is seen before log. */
+                smp_wmb();
+                used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ?
+                        &vq->used->flags : vhost_avail_event(vq);
+                /* Log used flags or event index entry write. Both are 16 bit
+                 * fields. */
+                log_write(vq->log_base, vq->log_addr +
+                           (used - (void __user *)vq->used),
+                          sizeof(u16));
+                if (vq->log_ctx)
+                        eventfd_signal(vq->log_ctx, 1);
        }
        /* They could have slipped one in as we were doing that: make
         * sure it's written, then check again. */
@@ -1404,15 +1472,17 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
 }
 /* We don't need to be notified again. */
-void vhost_disable_notify(struct vhost_virtqueue *vq)
+void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
        int r;
        if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
                return;
        vq->used_flags |= VRING_USED_F_NO_NOTIFY;
-        r = put_user(vq->used_flags, &vq->used->flags);
+        if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
-        if (r)
+                r = put_user(vq->used_flags, &vq->used->flags);
-                vq_err(vq, "Failed to enable notification at %p: %d\n",
+                if (r)
-                       &vq->used->flags, r);
+                        vq_err(vq, "Failed to enable notification at %p: %d\n",
+                               &vq->used->flags, r);
+        }
 }
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index b3363ae38518..8e03379dd30f 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -84,6 +84,12 @@ struct vhost_virtqueue {
        /* Used flags */
        u16 used_flags;
+        /* Last used index value we have signalled on */
+        u16 signalled_used;
+        /* Last used index value we have signalled on */
+        bool signalled_used_valid;
        /* Log writes to used structure. */
        bool log_used;
        u64 log_addr;
@@ -149,8 +155,8 @@ void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
 void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
                               struct vring_used_elem *heads, unsigned count);
 void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
-void vhost_disable_notify(struct vhost_virtqueue *);
+void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
-bool vhost_enable_notify(struct vhost_virtqueue *);
+bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
                    unsigned int log_num, u64 len);
@@ -162,11 +168,12 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
        } while (0)
 enum {
-        VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
+        VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
-                         (1 << VIRTIO_RING_F_INDIRECT_DESC) |
+                         (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
-                         (1 << VHOST_F_LOG_ALL) |
+                         (1ULL << VIRTIO_RING_F_EVENT_IDX) |
-                         (1 << VHOST_NET_F_VIRTIO_NET_HDR) |
+                         (1ULL << VHOST_F_LOG_ALL) |
-                         (1 << VIRTIO_NET_F_MRG_RXBUF),
+                         (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
+                         (1ULL << VIRTIO_NET_F_MRG_RXBUF),
 };
 static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 0f1da45ba47d..e058ace2a4ad 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -40,9 +40,6 @@ struct virtio_balloon
        /* Waiting for host to ack the pages we released. */
        struct completion acked;
-        /* Do we have to tell Host *before* we reuse pages? */
-        bool tell_host_first;
        /* The pages we've told the Host we're not using. */
        unsigned int num_pages;
        struct list_head pages;
@@ -151,13 +148,14 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
                vb->num_pages--;
        }
-        if (vb->tell_host_first) {
-                tell_host(vb, vb->deflate_vq);
+        /*
-                release_pages_by_pfn(vb->pfns, vb->num_pfns);
+         * Note that if
-        } else {
+         * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
-                release_pages_by_pfn(vb->pfns, vb->num_pfns);
+         * is true, we *have* to do it in this order
-                tell_host(vb, vb->deflate_vq);
+         */
-        }
+        tell_host(vb, vb->deflate_vq);
+        release_pages_by_pfn(vb->pfns, vb->num_pfns);
 }
 static inline void update_stat(struct virtio_balloon *vb, int idx,
@@ -325,9 +323,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
                goto out_del_vqs;
        }
-        vb->tell_host_first
-                = virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
        return 0;
 out_del_vqs:
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b0043fb26a4d..68b9136847af 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -82,6 +82,9 @@ struct vring_virtqueue
        /* Host supports indirect buffers */
        bool indirect;
+        /* Host publishes avail event idx */
+        bool event;
        /* Number of free buffers */
        unsigned int num_free;
        /* Head of free buffer list. */
@@ -237,18 +240,22 @@ EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
 void virtqueue_kick(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
+        u16 new, old;
        START_USE(vq);
        /* Descriptors and available array need to be set before we expose the
         * new available array entries. */
        virtio_wmb();
-        vq->vring.avail->idx += vq->num_added;
+        old = vq->vring.avail->idx;
+        new = vq->vring.avail->idx = old + vq->num_added;
        vq->num_added = 0;
        /* Need to update avail index before checking if we should notify */
        virtio_mb();
-        if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
+        if (vq->event ?
+            vring_need_event(vring_avail_event(&vq->vring), new, old) :
+            !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
                /* Prod other side to tell it about changes. */
                vq->notify(&vq->vq);
@@ -324,6 +331,14 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
        ret = vq->data[i];
        detach_buf(vq, i);
        vq->last_used_idx++;
+        /* If we expect an interrupt for the next entry, tell host
+         * by writing event index and flush out the write before
+         * the read in the next get_buf call. */
+        if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
+                vring_used_event(&vq->vring) = vq->last_used_idx;
+                virtio_mb();
+        }
        END_USE(vq);
        return ret;
 }
@@ -345,7 +360,11 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
        /* We optimistically turn back on interrupts, then check if there was
         * more to do. */
+        /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
+         * either clear the flags bit or point the event index at the next
+         * entry. Always do both to keep code simple. */
        vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+        vring_used_event(&vq->vring) = vq->last_used_idx;
        virtio_mb();
        if (unlikely(more_used(vq))) {
                END_USE(vq);
@@ -357,6 +376,33 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
+bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
+{
+        struct vring_virtqueue *vq = to_vvq(_vq);
+        u16 bufs;
+        START_USE(vq);
+        /* We optimistically turn back on interrupts, then check if there was
+         * more to do. */
+        /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+         * either clear the flags bit or point the event index at the next
+         * entry. Always do both to keep code simple. */
+        vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+        /* TODO: tune this threshold */
+        bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
+        vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
+        virtio_mb();
+        if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
+                END_USE(vq);
+                return false;
+        }
+        END_USE(vq);
+        return true;
+}
+EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
@@ -438,6 +484,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 #endif
        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
        /* No callback?  Tell other side not to bother us. */
        if (!callback)
@@ -472,6 +519,8 @@ void vring_transport_features(struct virtio_device *vdev)
                switch (i) {
                case VIRTIO_RING_F_INDIRECT_DESC:
                        break;
+                case VIRTIO_RING_F_EVENT_IDX:
+                        break;
                default:
                        /* We don't understand this bit. */
                        clear_bit(i, vdev->features);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 87d95a8cddbc..f55ae23b137e 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -583,8 +583,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
        if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
                return -EACCES;
-        dentry_unhash(dentry);
        if (atomic_dec_and_test(&ino->count)) {
                p_ino = autofs4_dentry_ino(dentry->d_parent);
                if (p_ino && dentry->d_parent != dentry)
diff --git a/fs/namei.c b/fs/namei.c
index 1ab641f2e78e..e2e4e8d032ee 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2579,6 +2579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
        if (error)
                goto out;
+        shrink_dcache_parent(dentry);
        error = dir->i_op->rmdir(dir, dentry);
        if (error)
                goto out;
@@ -2993,6 +2994,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
        if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
                goto out;
+        if (target)
+                shrink_dcache_parent(new_dentry);
        error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        if (error)
                goto out;
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 5619f8522738..bbd8661b3473 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,8 +9,12 @@
 #define VTD_PAGE_MASK           (((u64)-1) << VTD_PAGE_SHIFT)
 #define VTD_PAGE_ALIGN(addr)    (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
+#define VTD_STRIDE_SHIFT        (9)
+#define VTD_STRIDE_MASK         (((u64)-1) << VTD_STRIDE_SHIFT)
 #define DMA_PTE_READ (1)
 #define DMA_PTE_WRITE (2)
+#define DMA_PTE_LARGE_PAGE (1 << 7)
 #define DMA_PTE_SNP (1 << 11)
 #define CONTEXT_TT_MULTI_LEVEL  0
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index d40bfa1d9c91..e5f21d293c70 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -19,6 +19,7 @@
 #include <linux/mtd/partitions.h>
 struct map_info;
+struct platform_device;
 struct physmap_flash_data {
        unsigned int            width;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index aff5b4f74041..710885749605 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -51,6 +51,13 @@ struct virtqueue {
 *      This re-enables callbacks; it returns "false" if there are pending
 *      buffers in the queue, to detect a possible race between the driver
 *      checking for more work, and enabling callbacks.
+ * virtqueue_enable_cb_delayed: restart callbacks after disable_cb.
+ *      vq: the struct virtqueue we're talking about.
+ *      This re-enables callbacks but hints to the other side to delay
+ *      interrupts until most of the available buffers have been processed;
+ *      it returns "false" if there are many pending buffers in the queue,
+ *      to detect a possible race between the driver checking for more work,
+ *      and enabling callbacks.
 * virtqueue_detach_unused_buf: detach first unused buffer
 *      vq: the struct virtqueue we're talking about.
 *      Returns NULL or the "data" token handed to add_buf
@@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq);
 bool virtqueue_enable_cb(struct virtqueue *vq);
+bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
 void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 /**
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index e68b439b2860..277c4ad44e84 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_9P_H
 #define _LINUX_VIRTIO_9P_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index a50ecd1b81a2..652dc8bea921 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_BALLOON_H
 #define _LINUX_VIRTIO_BALLOON_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 167720d695ed..e0edb40ca7aa 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_BLK_H
 #define _LINUX_VIRTIO_BLK_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 800617b4ddd5..39c88c5ad19d 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_CONFIG_H
 #define _LINUX_VIRTIO_CONFIG_H
 /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
- * anyone can use the definitions to implement compatible drivers/servers. */
+ * anyone can use the definitions to implement compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 /* Virtio devices use a standardized configuration space to define their
 * features and pass configuration information, but each implementation can
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index e4d333543a33..bdf4b0034739 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -5,7 +5,31 @@
 #include <linux/virtio_config.h>
 /*
 * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
- * anyone can use the definitions to implement compatible drivers/servers.
+ * anyone can use the definitions to implement compatible drivers/servers:
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
 *
 * Copyright (C) Red Hat, Inc., 2009, 2010, 2011
 * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
index 06660c0a78d7..85bb0bb66ffc 100644
--- a/include/linux/virtio_ids.h
+++ b/include/linux/virtio_ids.h
@@ -5,7 +5,29 @@
 *
 * This header is BSD licensed so anyone can use the definitions to implement
 * compatible drivers/servers.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #define VIRTIO_ID_NET           1 /* virtio net */
 #define VIRTIO_ID_BLOCK         2 /* virtio block */
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 085e42298ce5..136040bba3e3 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_NET_H
 #define _LINUX_VIRTIO_NET_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index 9a3d7c48c622..ea66f3f60d63 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -11,6 +11,29 @@
 *
 * This header is BSD licensed so anyone can use the definitions to implement
 * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
 */
 #ifndef _LINUX_VIRTIO_PCI_H
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index e4d144b132b5..4a32cb6da425 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -7,6 +7,29 @@
 * This header is BSD licensed so anyone can use the definitions to implement
 * compatible drivers/servers.
 *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
 * Copyright Rusty Russell IBM Corporation 2007. */
 #include <linux/types.h>
@@ -29,6 +52,12 @@
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC     28
+/* The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field. */
+/* The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field. */
+#define VIRTIO_RING_F_EVENT_IDX         29
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc {
        /* Address (guest-physical). */
@@ -83,6 +112,7 @@ struct vring {
 *      __u16 avail_flags;
 *      __u16 avail_idx;
 *      __u16 available[num];
+ *      __u16 used_event_idx;
 *
 *      // Padding to the next align boundary.
 *      char pad[];
@@ -91,8 +121,14 @@ struct vring {
 *      __u16 used_flags;
 *      __u16 used_idx;
 *      struct vring_used_elem used[num];
+ *      __u16 avail_event_idx;
 * };
 */
+/* We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility. */
+#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
 static inline void vring_init(struct vring *vr, unsigned int num, void *p,
                              unsigned long align)
 {
@@ -107,7 +143,21 @@ static inline unsigned vring_size(unsigned int num, unsigned long align)
 {
        return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
                 + align - 1) & ~(align - 1))
-                + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
+                + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
+}
+/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
+/* Assuming a given event_idx value from the other size, if
+ * we have just incremented index from old to new_idx,
+ * should we trigger an event? */
+static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
+{
+        /* Note: Xen has similar logic for notification hold-off
+         * in include/xen/interface/io/ring.h with req_event and req_prod
+         * corresponding to event_idx + 1 and new_idx respectively.
+         * Note also that req_event and req_prod in Xen start at 1,
+         * event indexes in virtio start at 0. */
+        return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
 }
 #ifdef __KERNEL__
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cc5d57d1d0b6..ba89f40abe6a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7388,26 +7388,12 @@ static int __perf_cgroup_move(void *info)
        return 0;
 }
-static void perf_cgroup_move(struct task_struct *task)
+static void
+perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task)
 {
        task_function_call(task, __perf_cgroup_move, task);
 }
-static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                struct cgroup *old_cgrp, struct task_struct *task,
-                bool threadgroup)
-{
-        perf_cgroup_move(task);
-        if (threadgroup) {
-                struct task_struct *c;
-                rcu_read_lock();
-                list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
-                        perf_cgroup_move(c);
-                }
-                rcu_read_unlock();
-        }
-}
 static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
                struct cgroup *old_cgrp, struct task_struct *task)
 {
@@ -7419,7 +7405,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
        if (!(task->flags & PF_EXITING))
                return;
-        perf_cgroup_move(task);
+        perf_cgroup_attach_task(cgrp, task);
 }
 struct cgroup_subsys perf_subsys = {
@@ -7428,6 +7414,6 @@ struct cgroup_subsys perf_subsys = {
        .create         = perf_cgroup_create,
        .destroy        = perf_cgroup_destroy,
        .exit           = perf_cgroup_exit,
-        .attach         = perf_cgroup_attach,
+        .attach_task    = perf_cgroup_attach_task,
 };
 #endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 77a7671dd147..89419ff92e99 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1648,7 +1648,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
        if (IS_ERR(t))
                return PTR_ERR(t);
        kthread_bind(t, cpu);
-        set_task_state(t, TASK_INTERRUPTIBLE);
        per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
        WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
        per_cpu(rcu_cpu_kthread_task, cpu) = t;
@@ -1756,7 +1755,6 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
                if (IS_ERR(t))
                        return PTR_ERR(t);
                raw_spin_lock_irqsave(&rnp->lock, flags);
-                set_task_state(t, TASK_INTERRUPTIBLE);
                rnp->node_kthread_task = t;
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
                sp.sched_priority = 99;
@@ -1765,6 +1763,8 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
        return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
 }
+static void rcu_wake_one_boost_kthread(struct rcu_node *rnp);
 /*
 * Spawn all kthreads -- called as soon as the scheduler is running.
 */
@@ -1772,18 +1772,30 @@ static int __init rcu_spawn_kthreads(void)
 {
        int cpu;
        struct rcu_node *rnp;
+        struct task_struct *t;
        rcu_kthreads_spawnable = 1;
        for_each_possible_cpu(cpu) {
                per_cpu(rcu_cpu_has_work, cpu) = 0;
-                if (cpu_online(cpu))
+                if (cpu_online(cpu)) {
                        (void)rcu_spawn_one_cpu_kthread(cpu);
+                        t = per_cpu(rcu_cpu_kthread_task, cpu);
+                        if (t)
+                                wake_up_process(t);
+                }
        }
        rnp = rcu_get_root(rcu_state);
        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+        if (rnp->node_kthread_task)
+                wake_up_process(rnp->node_kthread_task);
        if (NUM_RCU_NODES > 1) {
-                rcu_for_each_leaf_node(rcu_state, rnp)
+                rcu_for_each_leaf_node(rcu_state, rnp) {
                        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+                        t = rnp->node_kthread_task;
+                        if (t)
+                                wake_up_process(t);
+                        rcu_wake_one_boost_kthread(rnp);
+                }
        }
        return 0;
 }
@@ -2188,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
        raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 }
-static void __cpuinit rcu_online_cpu(int cpu)
+static void __cpuinit rcu_prepare_cpu(int cpu)
 {
        rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
        rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
        rcu_preempt_init_percpu_data(cpu);
 }
-static void __cpuinit rcu_online_kthreads(int cpu)
+static void __cpuinit rcu_prepare_kthreads(int cpu)
 {
        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
        struct rcu_node *rnp = rdp->mynode;
@@ -2209,6 +2221,31 @@ static void __cpuinit rcu_online_kthreads(int cpu)
 }
 /*
+ * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state,
+ * but the RCU threads are woken on demand, and if demand is low this
+ * could be a while triggering the hung task watchdog.
+ *
+ * In order to avoid this, poke all tasks once the CPU is fully
+ * up and running.
+ */
+static void __cpuinit rcu_online_kthreads(int cpu)
+{
+        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+        struct rcu_node *rnp = rdp->mynode;
+        struct task_struct *t;
+        t = per_cpu(rcu_cpu_kthread_task, cpu);
+        if (t)
+                wake_up_process(t);
+        t = rnp->node_kthread_task;
+        if (t)
+                wake_up_process(t);
+        rcu_wake_one_boost_kthread(rnp);
+}
+/*
 * Handle CPU online/offline notification events.
 */
 static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
@@ -2221,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-                rcu_online_cpu(cpu);
+                rcu_prepare_cpu(cpu);
-                rcu_online_kthreads(cpu);
+                rcu_prepare_kthreads(cpu);
                break;
        case CPU_ONLINE:
+                rcu_online_kthreads(cpu);
        case CPU_DOWN_FAILED:
                rcu_node_kthread_setaffinity(rnp, -1);
                rcu_cpu_kthread_setrt(cpu, 1);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index a767b7dac365..c8bff3099a89 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1295,7 +1295,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
        if (IS_ERR(t))
                return PTR_ERR(t);
        raw_spin_lock_irqsave(&rnp->lock, flags);
-        set_task_state(t, TASK_INTERRUPTIBLE);
        rnp->boost_kthread_task = t;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
        sp.sched_priority = RCU_KTHREAD_PRIO;
@@ -1303,6 +1302,12 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
        return 0;
 }
+static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+{
+        if (rnp->boost_kthread_task)
+                wake_up_process(rnp->boost_kthread_task);
+}
 #else /* #ifdef CONFIG_RCU_BOOST */
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@@ -1326,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
        return 0;
 }
+static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+{
+}
 #endif /* #else #ifdef CONFIG_RCU_BOOST */
 #ifndef CONFIG_SMP
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4fc92445a29c..f175d98bd355 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -938,6 +938,12 @@ static struct ctl_table kern_table[] = {
        },
 #endif
 #ifdef CONFIG_PERF_EVENTS
+        /*
+         * User-space scripts rely on the existence of this file
+         * as a feature check for perf_events being enabled.
+         *
+         * So it's an ABI, do not remove!
+         */
        {
                .procname       = "perf_event_paranoid",
                .data           = &sysctl_perf_event_paranoid,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a4e1db3f1981..4e8985acdab8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2247,10 +2247,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
        if (should_fail_alloc_page(gfp_mask, order))
                return NULL;
-#ifndef CONFIG_ZONE_DMA
-        if (WARN_ON_ONCE(gfp_mask & __GFP_DMA))
-                return NULL;
-#endif
        /*
         * Check the zones suitable for the gfp_mask contain at least one
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index ae3a698415e6..ec1bcecf2cda 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -593,7 +593,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name,
                        sa.aad.op = OP_SETPROCATTR;
                        sa.aad.info = name;
                        sa.aad.error = -EINVAL;
-                        return aa_audit(AUDIT_APPARMOR_DENIED, NULL, GFP_KERNEL,
+                        return aa_audit(AUDIT_APPARMOR_DENIED,
+                                        __aa_current_profile(), GFP_KERNEL,
                                        &sa, NULL);
                }
        } else if (strcmp(name, "exec") == 0) {
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index df0c6d2c3860..74d3331bdaf9 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -198,6 +198,14 @@ const struct option longopts[] = {
                .val = 'h',
        },
        {
+                .name = "event-idx",
+                .val = 'E',
+        },
+        {
+                .name = "no-event-idx",
+                .val = 'e',
+        },
+        {
                .name = "indirect",
                .val = 'I',
        },
@@ -211,13 +219,17 @@ const struct option longopts[] = {
 static void help()
 {
-        fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n");
+        fprintf(stderr, "Usage: virtio_test [--help]"
+                " [--no-indirect]"
+                " [--no-event-idx]"
+                "\n");
 }
 int main(int argc, char **argv)
 {
        struct vdev_info dev;
-        unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC;
+        unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+                (1ULL << VIRTIO_RING_F_EVENT_IDX);
        int o;
        for (;;) {
@@ -228,6 +240,9 @@ int main(int argc, char **argv)
                case '?':
                        help();
                        exit(2);
+                case 'e':
+                        features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
+                        break;
                case 'h':
                        help();
                        goto done;
author	Ingo Molnar <mingo@elte.hu>	2011-06-04 06:28:05 -0400
committer	Ingo Molnar <mingo@elte.hu>	2011-06-04 06:28:05 -0400
commit	3ce2a0bc9dfb6423491afe0afc9f099e24b8cba4 (patch)
tree	58fbef582846fef0e777b1a552aca12e21a071b1
parent	aef29bf20bd79c73992ab23d5067e9f0448b466e (diff)
parent	aa4a221875873d2a1f9656cb7fd7e545e952b4fa (diff)