diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-06-04 06:28:05 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-06-04 06:28:05 -0400 |
commit | 3ce2a0bc9dfb6423491afe0afc9f099e24b8cba4 (patch) | |
tree | 58fbef582846fef0e777b1a552aca12e21a071b1 | |
parent | aef29bf20bd79c73992ab23d5067e9f0448b466e (diff) | |
parent | aa4a221875873d2a1f9656cb7fd7e545e952b4fa (diff) |
Merge branch 'perf/urgent' into perf/core
Conflicts:
tools/perf/util/python.c
Merge reason: resolve the conflict with perf/urgent.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
47 files changed, 844 insertions, 203 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5438a2d7907f..d9a203b058f1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
999 | With this option on every unmap_single operation will | 999 | With this option on every unmap_single operation will |
1000 | result in a hardware IOTLB flush operation as opposed | 1000 | result in a hardware IOTLB flush operation as opposed |
1001 | to batching them for performance. | 1001 | to batching them for performance. |
1002 | 1002 | sp_off [Default Off] | |
1003 | By default, super page will be supported if Intel IOMMU | ||
1004 | has the capability. With this option, super page will | ||
1005 | not be supported. | ||
1003 | intremap= [X86-64, Intel-IOMMU] | 1006 | intremap= [X86-64, Intel-IOMMU] |
1004 | Format: { on (default) | off | nosid } | 1007 | Format: { on (default) | off | nosid } |
1005 | on enable Interrupt Remapping (default) | 1008 | on enable Interrupt Remapping (default) |
diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile index bebac6b4f332..0ac34206f7a7 100644 --- a/Documentation/virtual/lguest/Makefile +++ b/Documentation/virtual/lguest/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | # This creates the demonstration utility "lguest" which runs a Linux guest. | 1 | # This creates the demonstration utility "lguest" which runs a Linux guest. |
2 | # Missing headers? Add "-I../../include -I../../arch/x86/include" | 2 | # Missing headers? Add "-I../../../include -I../../../arch/x86/include" |
3 | CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE | 3 | CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE |
4 | 4 | ||
5 | all: lguest | 5 | all: lguest |
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index d9da7e148538..cd9d6af61d07 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c | |||
@@ -49,7 +49,7 @@ | |||
49 | #include <linux/virtio_rng.h> | 49 | #include <linux/virtio_rng.h> |
50 | #include <linux/virtio_ring.h> | 50 | #include <linux/virtio_ring.h> |
51 | #include <asm/bootparam.h> | 51 | #include <asm/bootparam.h> |
52 | #include "../../include/linux/lguest_launcher.h" | 52 | #include "../../../include/linux/lguest_launcher.h" |
53 | /*L:110 | 53 | /*L:110 |
54 | * We can ignore the 42 include files we need for this program, but I do want | 54 | * We can ignore the 42 include files we need for this program, but I do want |
55 | * to draw attention to the use of kernel-style types. | 55 | * to draw attention to the use of kernel-style types. |
@@ -135,9 +135,6 @@ struct device { | |||
135 | /* Is it operational */ | 135 | /* Is it operational */ |
136 | bool running; | 136 | bool running; |
137 | 137 | ||
138 | /* Does Guest want an intrrupt on empty? */ | ||
139 | bool irq_on_empty; | ||
140 | |||
141 | /* Device-specific data. */ | 138 | /* Device-specific data. */ |
142 | void *priv; | 139 | void *priv; |
143 | }; | 140 | }; |
@@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq) | |||
637 | 634 | ||
638 | /* If they don't want an interrupt, don't send one... */ | 635 | /* If they don't want an interrupt, don't send one... */ |
639 | if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { | 636 | if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { |
640 | /* ... unless they've asked us to force one on empty. */ | 637 | return; |
641 | if (!vq->dev->irq_on_empty | ||
642 | || lg_last_avail(vq) != vq->vring.avail->idx) | ||
643 | return; | ||
644 | } | 638 | } |
645 | 639 | ||
646 | /* Send the Guest an interrupt tell them we used something up. */ | 640 | /* Send the Guest an interrupt tell them we used something up. */ |
@@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq) | |||
1057 | close(vq->eventfd); | 1051 | close(vq->eventfd); |
1058 | } | 1052 | } |
1059 | 1053 | ||
1060 | static bool accepted_feature(struct device *dev, unsigned int bit) | ||
1061 | { | ||
1062 | const u8 *features = get_feature_bits(dev) + dev->feature_len; | ||
1063 | |||
1064 | if (dev->feature_len < bit / CHAR_BIT) | ||
1065 | return false; | ||
1066 | return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT)); | ||
1067 | } | ||
1068 | |||
1069 | static void start_device(struct device *dev) | 1054 | static void start_device(struct device *dev) |
1070 | { | 1055 | { |
1071 | unsigned int i; | 1056 | unsigned int i; |
@@ -1079,8 +1064,6 @@ static void start_device(struct device *dev) | |||
1079 | verbose(" %02x", get_feature_bits(dev) | 1064 | verbose(" %02x", get_feature_bits(dev) |
1080 | [dev->feature_len+i]); | 1065 | [dev->feature_len+i]); |
1081 | 1066 | ||
1082 | dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); | ||
1083 | |||
1084 | for (vq = dev->vq; vq; vq = vq->next) { | 1067 | for (vq = dev->vq; vq; vq = vq->next) { |
1085 | if (vq->service) | 1068 | if (vq->service) |
1086 | create_thread(vq); | 1069 | create_thread(vq); |
@@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg) | |||
1564 | /* Set up the tun device. */ | 1547 | /* Set up the tun device. */ |
1565 | configure_device(ipfd, tapif, ip); | 1548 | configure_device(ipfd, tapif, ip); |
1566 | 1549 | ||
1567 | add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); | ||
1568 | /* Expect Guest to handle everything except UFO */ | 1550 | /* Expect Guest to handle everything except UFO */ |
1569 | add_feature(dev, VIRTIO_NET_F_CSUM); | 1551 | add_feature(dev, VIRTIO_NET_F_CSUM); |
1570 | add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); | 1552 | add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); |
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 1cf0f496f744..7c928da35b17 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h | |||
@@ -320,11 +320,12 @@ | |||
320 | #define __NR_clock_adjtime 1328 | 320 | #define __NR_clock_adjtime 1328 |
321 | #define __NR_syncfs 1329 | 321 | #define __NR_syncfs 1329 |
322 | #define __NR_setns 1330 | 322 | #define __NR_setns 1330 |
323 | #define __NR_sendmmsg 1331 | ||
323 | 324 | ||
324 | #ifdef __KERNEL__ | 325 | #ifdef __KERNEL__ |
325 | 326 | ||
326 | 327 | ||
327 | #define NR_syscalls 307 /* length of syscall table */ | 328 | #define NR_syscalls 308 /* length of syscall table */ |
328 | 329 | ||
329 | /* | 330 | /* |
330 | * The following defines stop scripts/checksyscalls.sh from complaining about | 331 | * The following defines stop scripts/checksyscalls.sh from complaining about |
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 9ca80193cd4e..97dd2abdeb1a 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S | |||
@@ -1776,6 +1776,7 @@ sys_call_table: | |||
1776 | data8 sys_clock_adjtime | 1776 | data8 sys_clock_adjtime |
1777 | data8 sys_syncfs | 1777 | data8 sys_syncfs |
1778 | data8 sys_setns // 1330 | 1778 | data8 sys_setns // 1330 |
1779 | data8 sys_sendmmsg | ||
1779 | 1780 | ||
1780 | .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls | 1781 | .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls |
1781 | #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ | 1782 | #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ |
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 9089b0421191..7667db448aa7 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c | |||
@@ -715,7 +715,8 @@ static struct syscore_ops pmacpic_syscore_ops = { | |||
715 | 715 | ||
716 | static int __init init_pmacpic_syscore(void) | 716 | static int __init init_pmacpic_syscore(void) |
717 | { | 717 | { |
718 | register_syscore_ops(&pmacpic_syscore_ops); | 718 | if (pmac_irq_hw[0]) |
719 | register_syscore_ops(&pmacpic_syscore_ops); | ||
719 | return 0; | 720 | return 0; |
720 | } | 721 | } |
721 | 722 | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f5abe3a245b8..90b06d4daee2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -8,6 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | |||
8 | 8 | ||
9 | ifdef CONFIG_FUNCTION_TRACER | 9 | ifdef CONFIG_FUNCTION_TRACER |
10 | # Do not profile debug and lowlevel utilities | 10 | # Do not profile debug and lowlevel utilities |
11 | CFLAGS_REMOVE_tsc.o = -pg | ||
11 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
12 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
13 | CFLAGS_REMOVE_pvclock.o = -pg | 14 | CFLAGS_REMOVE_pvclock.o = -pg |
@@ -28,6 +29,7 @@ CFLAGS_paravirt.o := $(nostackp) | |||
28 | GCOV_PROFILE_vsyscall_64.o := n | 29 | GCOV_PROFILE_vsyscall_64.o := n |
29 | GCOV_PROFILE_hpet.o := n | 30 | GCOV_PROFILE_hpet.o := n |
30 | GCOV_PROFILE_tsc.o := n | 31 | GCOV_PROFILE_tsc.o := n |
32 | GCOV_PROFILE_vread_tsc_64.o := n | ||
31 | GCOV_PROFILE_paravirt.o := n | 33 | GCOV_PROFILE_paravirt.o := n |
32 | 34 | ||
33 | # vread_tsc_64 is hot and should be fully optimized: | 35 | # vread_tsc_64 is hot and should be fully optimized: |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 426a5b66f7e4..2e4928d45a2d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -642,7 +642,7 @@ static int __init idle_setup(char *str) | |||
642 | boot_option_idle_override = IDLE_POLL; | 642 | boot_option_idle_override = IDLE_POLL; |
643 | } else if (!strcmp(str, "mwait")) { | 643 | } else if (!strcmp(str, "mwait")) { |
644 | boot_option_idle_override = IDLE_FORCE_MWAIT; | 644 | boot_option_idle_override = IDLE_FORCE_MWAIT; |
645 | WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n"); | 645 | WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n"); |
646 | } else if (!strcmp(str, "halt")) { | 646 | } else if (!strcmp(str, "halt")) { |
647 | /* | 647 | /* |
648 | * When the boot option of idle=halt is added, halt is | 648 | * When the boot option of idle=halt is added, halt is |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index eefd96765e79..33a0c11797de 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1332,7 +1332,7 @@ static inline void mwait_play_dead(void) | |||
1332 | void *mwait_ptr; | 1332 | void *mwait_ptr; |
1333 | struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); | 1333 | struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); |
1334 | 1334 | ||
1335 | if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)) | 1335 | if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) |
1336 | return; | 1336 | return; |
1337 | if (!this_cpu_has(X86_FEATURE_CLFLSH)) | 1337 | if (!this_cpu_has(X86_FEATURE_CLFLSH)) |
1338 | return; | 1338 | return; |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e191c096ab90..db832fd65ecb 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -993,6 +993,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) | |||
993 | static void lguest_time_init(void) | 993 | static void lguest_time_init(void) |
994 | { | 994 | { |
995 | /* Set up the timer interrupt (0) to go to our simple timer routine */ | 995 | /* Set up the timer interrupt (0) to go to our simple timer routine */ |
996 | lguest_setup_irq(0); | ||
996 | irq_set_handler(0, lguest_time_irq); | 997 | irq_set_handler(0, lguest_time_irq); |
997 | 998 | ||
998 | clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); | 999 | clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); |
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index a0aabd904a51..46b8136c31bb 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c | |||
@@ -321,7 +321,6 @@ static void pcd_init_units(void) | |||
321 | strcpy(disk->disk_name, cd->name); /* umm... */ | 321 | strcpy(disk->disk_name, cd->name); /* umm... */ |
322 | disk->fops = &pcd_bdops; | 322 | disk->fops = &pcd_bdops; |
323 | disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; | 323 | disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; |
324 | disk->events = DISK_EVENT_MEDIA_CHANGE; | ||
325 | } | 324 | } |
326 | } | 325 | } |
327 | 326 | ||
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6ecf89cdf006..079c08808d8a 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -6,10 +6,13 @@ | |||
6 | #include <linux/virtio.h> | 6 | #include <linux/virtio.h> |
7 | #include <linux/virtio_blk.h> | 7 | #include <linux/virtio_blk.h> |
8 | #include <linux/scatterlist.h> | 8 | #include <linux/scatterlist.h> |
9 | #include <linux/string_helpers.h> | ||
10 | #include <scsi/scsi_cmnd.h> | ||
9 | 11 | ||
10 | #define PART_BITS 4 | 12 | #define PART_BITS 4 |
11 | 13 | ||
12 | static int major, index; | 14 | static int major, index; |
15 | struct workqueue_struct *virtblk_wq; | ||
13 | 16 | ||
14 | struct virtio_blk | 17 | struct virtio_blk |
15 | { | 18 | { |
@@ -26,6 +29,9 @@ struct virtio_blk | |||
26 | 29 | ||
27 | mempool_t *pool; | 30 | mempool_t *pool; |
28 | 31 | ||
32 | /* Process context for config space updates */ | ||
33 | struct work_struct config_work; | ||
34 | |||
29 | /* What host tells us, plus 2 for header & tailer. */ | 35 | /* What host tells us, plus 2 for header & tailer. */ |
30 | unsigned int sg_elems; | 36 | unsigned int sg_elems; |
31 | 37 | ||
@@ -141,7 +147,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
141 | num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); | 147 | num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); |
142 | 148 | ||
143 | if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) { | 149 | if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) { |
144 | sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96); | 150 | sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE); |
145 | sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, | 151 | sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, |
146 | sizeof(vbr->in_hdr)); | 152 | sizeof(vbr->in_hdr)); |
147 | } | 153 | } |
@@ -291,6 +297,46 @@ static ssize_t virtblk_serial_show(struct device *dev, | |||
291 | } | 297 | } |
292 | DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); | 298 | DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); |
293 | 299 | ||
300 | static void virtblk_config_changed_work(struct work_struct *work) | ||
301 | { | ||
302 | struct virtio_blk *vblk = | ||
303 | container_of(work, struct virtio_blk, config_work); | ||
304 | struct virtio_device *vdev = vblk->vdev; | ||
305 | struct request_queue *q = vblk->disk->queue; | ||
306 | char cap_str_2[10], cap_str_10[10]; | ||
307 | u64 capacity, size; | ||
308 | |||
309 | /* Host must always specify the capacity. */ | ||
310 | vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), | ||
311 | &capacity, sizeof(capacity)); | ||
312 | |||
313 | /* If capacity is too big, truncate with warning. */ | ||
314 | if ((sector_t)capacity != capacity) { | ||
315 | dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", | ||
316 | (unsigned long long)capacity); | ||
317 | capacity = (sector_t)-1; | ||
318 | } | ||
319 | |||
320 | size = capacity * queue_logical_block_size(q); | ||
321 | string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); | ||
322 | string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); | ||
323 | |||
324 | dev_notice(&vdev->dev, | ||
325 | "new size: %llu %d-byte logical blocks (%s/%s)\n", | ||
326 | (unsigned long long)capacity, | ||
327 | queue_logical_block_size(q), | ||
328 | cap_str_10, cap_str_2); | ||
329 | |||
330 | set_capacity(vblk->disk, capacity); | ||
331 | } | ||
332 | |||
333 | static void virtblk_config_changed(struct virtio_device *vdev) | ||
334 | { | ||
335 | struct virtio_blk *vblk = vdev->priv; | ||
336 | |||
337 | queue_work(virtblk_wq, &vblk->config_work); | ||
338 | } | ||
339 | |||
294 | static int __devinit virtblk_probe(struct virtio_device *vdev) | 340 | static int __devinit virtblk_probe(struct virtio_device *vdev) |
295 | { | 341 | { |
296 | struct virtio_blk *vblk; | 342 | struct virtio_blk *vblk; |
@@ -327,6 +373,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
327 | vblk->vdev = vdev; | 373 | vblk->vdev = vdev; |
328 | vblk->sg_elems = sg_elems; | 374 | vblk->sg_elems = sg_elems; |
329 | sg_init_table(vblk->sg, vblk->sg_elems); | 375 | sg_init_table(vblk->sg, vblk->sg_elems); |
376 | INIT_WORK(&vblk->config_work, virtblk_config_changed_work); | ||
330 | 377 | ||
331 | /* We expect one virtqueue, for output. */ | 378 | /* We expect one virtqueue, for output. */ |
332 | vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); | 379 | vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); |
@@ -477,6 +524,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) | |||
477 | { | 524 | { |
478 | struct virtio_blk *vblk = vdev->priv; | 525 | struct virtio_blk *vblk = vdev->priv; |
479 | 526 | ||
527 | flush_work(&vblk->config_work); | ||
528 | |||
480 | /* Nothing should be pending. */ | 529 | /* Nothing should be pending. */ |
481 | BUG_ON(!list_empty(&vblk->reqs)); | 530 | BUG_ON(!list_empty(&vblk->reqs)); |
482 | 531 | ||
@@ -508,27 +557,47 @@ static unsigned int features[] = { | |||
508 | * Use __refdata to avoid this warning. | 557 | * Use __refdata to avoid this warning. |
509 | */ | 558 | */ |
510 | static struct virtio_driver __refdata virtio_blk = { | 559 | static struct virtio_driver __refdata virtio_blk = { |
511 | .feature_table = features, | 560 | .feature_table = features, |
512 | .feature_table_size = ARRAY_SIZE(features), | 561 | .feature_table_size = ARRAY_SIZE(features), |
513 | .driver.name = KBUILD_MODNAME, | 562 | .driver.name = KBUILD_MODNAME, |
514 | .driver.owner = THIS_MODULE, | 563 | .driver.owner = THIS_MODULE, |
515 | .id_table = id_table, | 564 | .id_table = id_table, |
516 | .probe = virtblk_probe, | 565 | .probe = virtblk_probe, |
517 | .remove = __devexit_p(virtblk_remove), | 566 | .remove = __devexit_p(virtblk_remove), |
567 | .config_changed = virtblk_config_changed, | ||
518 | }; | 568 | }; |
519 | 569 | ||
520 | static int __init init(void) | 570 | static int __init init(void) |
521 | { | 571 | { |
572 | int error; | ||
573 | |||
574 | virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); | ||
575 | if (!virtblk_wq) | ||
576 | return -ENOMEM; | ||
577 | |||
522 | major = register_blkdev(0, "virtblk"); | 578 | major = register_blkdev(0, "virtblk"); |
523 | if (major < 0) | 579 | if (major < 0) { |
524 | return major; | 580 | error = major; |
525 | return register_virtio_driver(&virtio_blk); | 581 | goto out_destroy_workqueue; |
582 | } | ||
583 | |||
584 | error = register_virtio_driver(&virtio_blk); | ||
585 | if (error) | ||
586 | goto out_unregister_blkdev; | ||
587 | return 0; | ||
588 | |||
589 | out_unregister_blkdev: | ||
590 | unregister_blkdev(major, "virtblk"); | ||
591 | out_destroy_workqueue: | ||
592 | destroy_workqueue(virtblk_wq); | ||
593 | return error; | ||
526 | } | 594 | } |
527 | 595 | ||
528 | static void __exit fini(void) | 596 | static void __exit fini(void) |
529 | { | 597 | { |
530 | unregister_blkdev(major, "virtblk"); | 598 | unregister_blkdev(major, "virtblk"); |
531 | unregister_virtio_driver(&virtio_blk); | 599 | unregister_virtio_driver(&virtio_blk); |
600 | destroy_workqueue(virtblk_wq); | ||
532 | } | 601 | } |
533 | module_init(init); | 602 | module_init(init); |
534 | module_exit(fini); | 603 | module_exit(fini); |
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index ae15a4ddaa9b..7878da89d29e 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c | |||
@@ -627,7 +627,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id) | |||
627 | gendisk->fops = &viocd_fops; | 627 | gendisk->fops = &viocd_fops; |
628 | gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE | | 628 | gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE | |
629 | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; | 629 | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; |
630 | gendisk->events = DISK_EVENT_MEDIA_CHANGE; | ||
631 | set_capacity(gendisk, 0); | 630 | set_capacity(gendisk, 0); |
632 | gendisk->private_data = d; | 631 | gendisk->private_data = d; |
633 | d->viocd_disk = gendisk; | 632 | d->viocd_disk = gendisk; |
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 838568a7dbf5..fb68b1295373 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c | |||
@@ -1677,17 +1677,12 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) | |||
1677 | portdev->config.max_nr_ports = 1; | 1677 | portdev->config.max_nr_ports = 1; |
1678 | if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) { | 1678 | if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) { |
1679 | multiport = true; | 1679 | multiport = true; |
1680 | vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT; | ||
1681 | |||
1682 | vdev->config->get(vdev, offsetof(struct virtio_console_config, | 1680 | vdev->config->get(vdev, offsetof(struct virtio_console_config, |
1683 | max_nr_ports), | 1681 | max_nr_ports), |
1684 | &portdev->config.max_nr_ports, | 1682 | &portdev->config.max_nr_ports, |
1685 | sizeof(portdev->config.max_nr_ports)); | 1683 | sizeof(portdev->config.max_nr_ports)); |
1686 | } | 1684 | } |
1687 | 1685 | ||
1688 | /* Let the Host know we support multiple ports.*/ | ||
1689 | vdev->config->finalize_features(vdev); | ||
1690 | |||
1691 | err = init_vqs(portdev); | 1686 | err = init_vqs(portdev); |
1692 | if (err < 0) { | 1687 | if (err < 0) { |
1693 | dev_err(&vdev->dev, "Error %d initializing vqs\n", err); | 1688 | dev_err(&vdev->dev, "Error %d initializing vqs\n", err); |
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 6e5123b1d341..144d27261e43 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c | |||
@@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive) | |||
1782 | ide_cd_read_toc(drive, &sense); | 1782 | ide_cd_read_toc(drive, &sense); |
1783 | g->fops = &idecd_ops; | 1783 | g->fops = &idecd_ops; |
1784 | g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; | 1784 | g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; |
1785 | g->events = DISK_EVENT_MEDIA_CHANGE; | ||
1786 | add_disk(g); | 1785 | add_disk(g); |
1787 | return 0; | 1786 | return 0; |
1788 | 1787 | ||
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index b0c56313dbbb..8cebec5e85ee 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c | |||
@@ -304,7 +304,10 @@ static int check_and_rewind_pc(char *put_str, char *arg) | |||
304 | return 1; | 304 | return 1; |
305 | } | 305 | } |
306 | /* Readjust the instruction pointer if needed */ | 306 | /* Readjust the instruction pointer if needed */ |
307 | instruction_pointer_set(&kgdbts_regs, ip + offset); | 307 | ip += offset; |
308 | #ifdef GDB_ADJUSTS_BREAK_OFFSET | ||
309 | instruction_pointer_set(&kgdbts_regs, ip); | ||
310 | #endif | ||
308 | return 0; | 311 | return 0; |
309 | } | 312 | } |
310 | 313 | ||
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0cb0b0632672..f6853247a620 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
609 | * before it gets out of hand. Naturally, this wastes entries. */ | 609 | * before it gets out of hand. Naturally, this wastes entries. */ |
610 | if (capacity < 2+MAX_SKB_FRAGS) { | 610 | if (capacity < 2+MAX_SKB_FRAGS) { |
611 | netif_stop_queue(dev); | 611 | netif_stop_queue(dev); |
612 | if (unlikely(!virtqueue_enable_cb(vi->svq))) { | 612 | if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) { |
613 | /* More just got used, free them then recheck. */ | 613 | /* More just got used, free them then recheck. */ |
614 | capacity += free_old_xmit_skbs(vi); | 614 | capacity += free_old_xmit_skbs(vi); |
615 | if (capacity >= 2+MAX_SKB_FRAGS) { | 615 | if (capacity >= 2+MAX_SKB_FRAGS) { |
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 12e02bf92c4a..3dc9befa5aec 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c | |||
@@ -698,12 +698,7 @@ int __init detect_intel_iommu(void) | |||
698 | { | 698 | { |
699 | #ifdef CONFIG_INTR_REMAP | 699 | #ifdef CONFIG_INTR_REMAP |
700 | struct acpi_table_dmar *dmar; | 700 | struct acpi_table_dmar *dmar; |
701 | /* | 701 | |
702 | * for now we will disable dma-remapping when interrupt | ||
703 | * remapping is enabled. | ||
704 | * When support for queued invalidation for IOTLB invalidation | ||
705 | * is added, we will not need this any more. | ||
706 | */ | ||
707 | dmar = (struct acpi_table_dmar *) dmar_tbl; | 702 | dmar = (struct acpi_table_dmar *) dmar_tbl; |
708 | if (ret && cpu_has_x2apic && dmar->flags & 0x1) | 703 | if (ret && cpu_has_x2apic && dmar->flags & 0x1) |
709 | printk(KERN_INFO | 704 | printk(KERN_INFO |
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6af6b628175b..59f17acf7f68 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
@@ -47,6 +47,8 @@ | |||
47 | #define ROOT_SIZE VTD_PAGE_SIZE | 47 | #define ROOT_SIZE VTD_PAGE_SIZE |
48 | #define CONTEXT_SIZE VTD_PAGE_SIZE | 48 | #define CONTEXT_SIZE VTD_PAGE_SIZE |
49 | 49 | ||
50 | #define IS_BRIDGE_HOST_DEVICE(pdev) \ | ||
51 | ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) | ||
50 | #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) | 52 | #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) |
51 | #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) | 53 | #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) |
52 | #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) | 54 | #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) |
@@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) | |||
116 | return (pfn + level_size(level) - 1) & level_mask(level); | 118 | return (pfn + level_size(level) - 1) & level_mask(level); |
117 | } | 119 | } |
118 | 120 | ||
121 | static inline unsigned long lvl_to_nr_pages(unsigned int lvl) | ||
122 | { | ||
123 | return 1 << ((lvl - 1) * LEVEL_STRIDE); | ||
124 | } | ||
125 | |||
119 | /* VT-d pages must always be _smaller_ than MM pages. Otherwise things | 126 | /* VT-d pages must always be _smaller_ than MM pages. Otherwise things |
120 | are never going to work. */ | 127 | are never going to work. */ |
121 | static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) | 128 | static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) |
@@ -143,6 +150,12 @@ static void __init check_tylersburg_isoch(void); | |||
143 | static int rwbf_quirk; | 150 | static int rwbf_quirk; |
144 | 151 | ||
145 | /* | 152 | /* |
153 | * set to 1 to panic kernel if can't successfully enable VT-d | ||
154 | * (used when kernel is launched w/ TXT) | ||
155 | */ | ||
156 | static int force_on = 0; | ||
157 | |||
158 | /* | ||
146 | * 0: Present | 159 | * 0: Present |
147 | * 1-11: Reserved | 160 | * 1-11: Reserved |
148 | * 12-63: Context Ptr (12 - (haw-1)) | 161 | * 12-63: Context Ptr (12 - (haw-1)) |
@@ -338,6 +351,9 @@ struct dmar_domain { | |||
338 | int iommu_coherency;/* indicate coherency of iommu access */ | 351 | int iommu_coherency;/* indicate coherency of iommu access */ |
339 | int iommu_snooping; /* indicate snooping control feature*/ | 352 | int iommu_snooping; /* indicate snooping control feature*/ |
340 | int iommu_count; /* reference count of iommu */ | 353 | int iommu_count; /* reference count of iommu */ |
354 | int iommu_superpage;/* Level of superpages supported: | ||
355 | 0 == 4KiB (no superpages), 1 == 2MiB, | ||
356 | 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ | ||
341 | spinlock_t iommu_lock; /* protect iommu set in domain */ | 357 | spinlock_t iommu_lock; /* protect iommu set in domain */ |
342 | u64 max_addr; /* maximum mapped address */ | 358 | u64 max_addr; /* maximum mapped address */ |
343 | }; | 359 | }; |
@@ -387,6 +403,7 @@ int dmar_disabled = 1; | |||
387 | static int dmar_map_gfx = 1; | 403 | static int dmar_map_gfx = 1; |
388 | static int dmar_forcedac; | 404 | static int dmar_forcedac; |
389 | static int intel_iommu_strict; | 405 | static int intel_iommu_strict; |
406 | static int intel_iommu_superpage = 1; | ||
390 | 407 | ||
391 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) | 408 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) |
392 | static DEFINE_SPINLOCK(device_domain_lock); | 409 | static DEFINE_SPINLOCK(device_domain_lock); |
@@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str) | |||
417 | printk(KERN_INFO | 434 | printk(KERN_INFO |
418 | "Intel-IOMMU: disable batched IOTLB flush\n"); | 435 | "Intel-IOMMU: disable batched IOTLB flush\n"); |
419 | intel_iommu_strict = 1; | 436 | intel_iommu_strict = 1; |
437 | } else if (!strncmp(str, "sp_off", 6)) { | ||
438 | printk(KERN_INFO | ||
439 | "Intel-IOMMU: disable supported super page\n"); | ||
440 | intel_iommu_superpage = 0; | ||
420 | } | 441 | } |
421 | 442 | ||
422 | str += strcspn(str, ","); | 443 | str += strcspn(str, ","); |
@@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain) | |||
555 | } | 576 | } |
556 | } | 577 | } |
557 | 578 | ||
579 | static void domain_update_iommu_superpage(struct dmar_domain *domain) | ||
580 | { | ||
581 | int i, mask = 0xf; | ||
582 | |||
583 | if (!intel_iommu_superpage) { | ||
584 | domain->iommu_superpage = 0; | ||
585 | return; | ||
586 | } | ||
587 | |||
588 | domain->iommu_superpage = 4; /* 1TiB */ | ||
589 | |||
590 | for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { | ||
591 | mask |= cap_super_page_val(g_iommus[i]->cap); | ||
592 | if (!mask) { | ||
593 | break; | ||
594 | } | ||
595 | } | ||
596 | domain->iommu_superpage = fls(mask); | ||
597 | } | ||
598 | |||
558 | /* Some capabilities may be different across iommus */ | 599 | /* Some capabilities may be different across iommus */ |
559 | static void domain_update_iommu_cap(struct dmar_domain *domain) | 600 | static void domain_update_iommu_cap(struct dmar_domain *domain) |
560 | { | 601 | { |
561 | domain_update_iommu_coherency(domain); | 602 | domain_update_iommu_coherency(domain); |
562 | domain_update_iommu_snooping(domain); | 603 | domain_update_iommu_snooping(domain); |
604 | domain_update_iommu_superpage(domain); | ||
563 | } | 605 | } |
564 | 606 | ||
565 | static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) | 607 | static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) |
@@ -689,23 +731,31 @@ out: | |||
689 | } | 731 | } |
690 | 732 | ||
691 | static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, | 733 | static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, |
692 | unsigned long pfn) | 734 | unsigned long pfn, int large_level) |
693 | { | 735 | { |
694 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; | 736 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
695 | struct dma_pte *parent, *pte = NULL; | 737 | struct dma_pte *parent, *pte = NULL; |
696 | int level = agaw_to_level(domain->agaw); | 738 | int level = agaw_to_level(domain->agaw); |
697 | int offset; | 739 | int offset, target_level; |
698 | 740 | ||
699 | BUG_ON(!domain->pgd); | 741 | BUG_ON(!domain->pgd); |
700 | BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); | 742 | BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); |
701 | parent = domain->pgd; | 743 | parent = domain->pgd; |
702 | 744 | ||
745 | /* Search pte */ | ||
746 | if (!large_level) | ||
747 | target_level = 1; | ||
748 | else | ||
749 | target_level = large_level; | ||
750 | |||
703 | while (level > 0) { | 751 | while (level > 0) { |
704 | void *tmp_page; | 752 | void *tmp_page; |
705 | 753 | ||
706 | offset = pfn_level_offset(pfn, level); | 754 | offset = pfn_level_offset(pfn, level); |
707 | pte = &parent[offset]; | 755 | pte = &parent[offset]; |
708 | if (level == 1) | 756 | if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE)) |
757 | break; | ||
758 | if (level == target_level) | ||
709 | break; | 759 | break; |
710 | 760 | ||
711 | if (!dma_pte_present(pte)) { | 761 | if (!dma_pte_present(pte)) { |
@@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, | |||
733 | return pte; | 783 | return pte; |
734 | } | 784 | } |
735 | 785 | ||
786 | |||
736 | /* return address's pte at specific level */ | 787 | /* return address's pte at specific level */ |
737 | static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, | 788 | static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, |
738 | unsigned long pfn, | 789 | unsigned long pfn, |
739 | int level) | 790 | int level, int *large_page) |
740 | { | 791 | { |
741 | struct dma_pte *parent, *pte = NULL; | 792 | struct dma_pte *parent, *pte = NULL; |
742 | int total = agaw_to_level(domain->agaw); | 793 | int total = agaw_to_level(domain->agaw); |
@@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, | |||
749 | if (level == total) | 800 | if (level == total) |
750 | return pte; | 801 | return pte; |
751 | 802 | ||
752 | if (!dma_pte_present(pte)) | 803 | if (!dma_pte_present(pte)) { |
804 | *large_page = total; | ||
753 | break; | 805 | break; |
806 | } | ||
807 | |||
808 | if (pte->val & DMA_PTE_LARGE_PAGE) { | ||
809 | *large_page = total; | ||
810 | return pte; | ||
811 | } | ||
812 | |||
754 | parent = phys_to_virt(dma_pte_addr(pte)); | 813 | parent = phys_to_virt(dma_pte_addr(pte)); |
755 | total--; | 814 | total--; |
756 | } | 815 | } |
@@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, | |||
763 | unsigned long last_pfn) | 822 | unsigned long last_pfn) |
764 | { | 823 | { |
765 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; | 824 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
825 | unsigned int large_page = 1; | ||
766 | struct dma_pte *first_pte, *pte; | 826 | struct dma_pte *first_pte, *pte; |
767 | 827 | ||
768 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); | 828 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); |
@@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain, | |||
771 | 831 | ||
772 | /* we don't need lock here; nobody else touches the iova range */ | 832 | /* we don't need lock here; nobody else touches the iova range */ |
773 | do { | 833 | do { |
774 | first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); | 834 | large_page = 1; |
835 | first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); | ||
775 | if (!pte) { | 836 | if (!pte) { |
776 | start_pfn = align_to_level(start_pfn + 1, 2); | 837 | start_pfn = align_to_level(start_pfn + 1, large_page + 1); |
777 | continue; | 838 | continue; |
778 | } | 839 | } |
779 | do { | 840 | do { |
780 | dma_clear_pte(pte); | 841 | dma_clear_pte(pte); |
781 | start_pfn++; | 842 | start_pfn += lvl_to_nr_pages(large_page); |
782 | pte++; | 843 | pte++; |
783 | } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); | 844 | } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); |
784 | 845 | ||
@@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, | |||
798 | int total = agaw_to_level(domain->agaw); | 859 | int total = agaw_to_level(domain->agaw); |
799 | int level; | 860 | int level; |
800 | unsigned long tmp; | 861 | unsigned long tmp; |
862 | int large_page = 2; | ||
801 | 863 | ||
802 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); | 864 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); |
803 | BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); | 865 | BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); |
@@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, | |||
813 | return; | 875 | return; |
814 | 876 | ||
815 | do { | 877 | do { |
816 | first_pte = pte = dma_pfn_level_pte(domain, tmp, level); | 878 | large_page = level; |
879 | first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); | ||
880 | if (large_page > level) | ||
881 | level = large_page + 1; | ||
817 | if (!pte) { | 882 | if (!pte) { |
818 | tmp = align_to_level(tmp + 1, level + 1); | 883 | tmp = align_to_level(tmp + 1, level + 1); |
819 | continue; | 884 | continue; |
@@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) | |||
1397 | else | 1462 | else |
1398 | domain->iommu_snooping = 0; | 1463 | domain->iommu_snooping = 0; |
1399 | 1464 | ||
1465 | domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); | ||
1400 | domain->iommu_count = 1; | 1466 | domain->iommu_count = 1; |
1401 | domain->nid = iommu->node; | 1467 | domain->nid = iommu->node; |
1402 | 1468 | ||
@@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain) | |||
1417 | if (!domain) | 1483 | if (!domain) |
1418 | return; | 1484 | return; |
1419 | 1485 | ||
1486 | /* Flush any lazy unmaps that may reference this domain */ | ||
1487 | if (!intel_iommu_strict) | ||
1488 | flush_unmaps_timeout(0); | ||
1489 | |||
1420 | domain_remove_dev_info(domain); | 1490 | domain_remove_dev_info(domain); |
1421 | /* destroy iovas */ | 1491 | /* destroy iovas */ |
1422 | put_iova_domain(&domain->iovad); | 1492 | put_iova_domain(&domain->iovad); |
@@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, | |||
1648 | return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; | 1718 | return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; |
1649 | } | 1719 | } |
1650 | 1720 | ||
1721 | /* Return largest possible superpage level for a given mapping */ | ||
1722 | static inline int hardware_largepage_caps(struct dmar_domain *domain, | ||
1723 | unsigned long iov_pfn, | ||
1724 | unsigned long phy_pfn, | ||
1725 | unsigned long pages) | ||
1726 | { | ||
1727 | int support, level = 1; | ||
1728 | unsigned long pfnmerge; | ||
1729 | |||
1730 | support = domain->iommu_superpage; | ||
1731 | |||
1732 | /* To use a large page, the virtual *and* physical addresses | ||
1733 | must be aligned to 2MiB/1GiB/etc. Lower bits set in either | ||
1734 | of them will mean we have to use smaller pages. So just | ||
1735 | merge them and check both at once. */ | ||
1736 | pfnmerge = iov_pfn | phy_pfn; | ||
1737 | |||
1738 | while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { | ||
1739 | pages >>= VTD_STRIDE_SHIFT; | ||
1740 | if (!pages) | ||
1741 | break; | ||
1742 | pfnmerge >>= VTD_STRIDE_SHIFT; | ||
1743 | level++; | ||
1744 | support--; | ||
1745 | } | ||
1746 | return level; | ||
1747 | } | ||
1748 | |||
1651 | static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | 1749 | static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, |
1652 | struct scatterlist *sg, unsigned long phys_pfn, | 1750 | struct scatterlist *sg, unsigned long phys_pfn, |
1653 | unsigned long nr_pages, int prot) | 1751 | unsigned long nr_pages, int prot) |
@@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | |||
1656 | phys_addr_t uninitialized_var(pteval); | 1754 | phys_addr_t uninitialized_var(pteval); |
1657 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; | 1755 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
1658 | unsigned long sg_res; | 1756 | unsigned long sg_res; |
1757 | unsigned int largepage_lvl = 0; | ||
1758 | unsigned long lvl_pages = 0; | ||
1659 | 1759 | ||
1660 | BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); | 1760 | BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); |
1661 | 1761 | ||
@@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | |||
1671 | pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; | 1771 | pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; |
1672 | } | 1772 | } |
1673 | 1773 | ||
1674 | while (nr_pages--) { | 1774 | while (nr_pages > 0) { |
1675 | uint64_t tmp; | 1775 | uint64_t tmp; |
1676 | 1776 | ||
1677 | if (!sg_res) { | 1777 | if (!sg_res) { |
@@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | |||
1679 | sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; | 1779 | sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; |
1680 | sg->dma_length = sg->length; | 1780 | sg->dma_length = sg->length; |
1681 | pteval = page_to_phys(sg_page(sg)) | prot; | 1781 | pteval = page_to_phys(sg_page(sg)) | prot; |
1782 | phys_pfn = pteval >> VTD_PAGE_SHIFT; | ||
1682 | } | 1783 | } |
1784 | |||
1683 | if (!pte) { | 1785 | if (!pte) { |
1684 | first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); | 1786 | largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); |
1787 | |||
1788 | first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); | ||
1685 | if (!pte) | 1789 | if (!pte) |
1686 | return -ENOMEM; | 1790 | return -ENOMEM; |
1791 | /* It is large page*/ | ||
1792 | if (largepage_lvl > 1) | ||
1793 | pteval |= DMA_PTE_LARGE_PAGE; | ||
1794 | else | ||
1795 | pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; | ||
1796 | |||
1687 | } | 1797 | } |
1688 | /* We don't need lock here, nobody else | 1798 | /* We don't need lock here, nobody else |
1689 | * touches the iova range | 1799 | * touches the iova range |
@@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | |||
1699 | } | 1809 | } |
1700 | WARN_ON(1); | 1810 | WARN_ON(1); |
1701 | } | 1811 | } |
1812 | |||
1813 | lvl_pages = lvl_to_nr_pages(largepage_lvl); | ||
1814 | |||
1815 | BUG_ON(nr_pages < lvl_pages); | ||
1816 | BUG_ON(sg_res < lvl_pages); | ||
1817 | |||
1818 | nr_pages -= lvl_pages; | ||
1819 | iov_pfn += lvl_pages; | ||
1820 | phys_pfn += lvl_pages; | ||
1821 | pteval += lvl_pages * VTD_PAGE_SIZE; | ||
1822 | sg_res -= lvl_pages; | ||
1823 | |||
1824 | /* If the next PTE would be the first in a new page, then we | ||
1825 | need to flush the cache on the entries we've just written. | ||
1826 | And then we'll need to recalculate 'pte', so clear it and | ||
1827 | let it get set again in the if (!pte) block above. | ||
1828 | |||
1829 | If we're done (!nr_pages) we need to flush the cache too. | ||
1830 | |||
1831 | Also if we've been setting superpages, we may need to | ||
1832 | recalculate 'pte' and switch back to smaller pages for the | ||
1833 | end of the mapping, if the trailing size is not enough to | ||
1834 | use another superpage (i.e. sg_res < lvl_pages). */ | ||
1702 | pte++; | 1835 | pte++; |
1703 | if (!nr_pages || first_pte_in_page(pte)) { | 1836 | if (!nr_pages || first_pte_in_page(pte) || |
1837 | (largepage_lvl > 1 && sg_res < lvl_pages)) { | ||
1704 | domain_flush_cache(domain, first_pte, | 1838 | domain_flush_cache(domain, first_pte, |
1705 | (void *)pte - (void *)first_pte); | 1839 | (void *)pte - (void *)first_pte); |
1706 | pte = NULL; | 1840 | pte = NULL; |
1707 | } | 1841 | } |
1708 | iov_pfn++; | 1842 | |
1709 | pteval += VTD_PAGE_SIZE; | 1843 | if (!sg_res && nr_pages) |
1710 | sg_res--; | ||
1711 | if (!sg_res) | ||
1712 | sg = sg_next(sg); | 1844 | sg = sg_next(sg); |
1713 | } | 1845 | } |
1714 | return 0; | 1846 | return 0; |
@@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, | |||
2016 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | 2148 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) |
2017 | return 0; | 2149 | return 0; |
2018 | return iommu_prepare_identity_map(pdev, rmrr->base_address, | 2150 | return iommu_prepare_identity_map(pdev, rmrr->base_address, |
2019 | rmrr->end_address + 1); | 2151 | rmrr->end_address); |
2020 | } | 2152 | } |
2021 | 2153 | ||
2022 | #ifdef CONFIG_DMAR_FLOPPY_WA | 2154 | #ifdef CONFIG_DMAR_FLOPPY_WA |
@@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void) | |||
2030 | return; | 2162 | return; |
2031 | 2163 | ||
2032 | printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); | 2164 | printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); |
2033 | ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); | 2165 | ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); |
2034 | 2166 | ||
2035 | if (ret) | 2167 | if (ret) |
2036 | printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " | 2168 | printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " |
@@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev) | |||
2106 | if (likely(!iommu_identity_mapping)) | 2238 | if (likely(!iommu_identity_mapping)) |
2107 | return 0; | 2239 | return 0; |
2108 | 2240 | ||
2241 | info = pdev->dev.archdata.iommu; | ||
2242 | if (info && info != DUMMY_DEVICE_DOMAIN_INFO) | ||
2243 | return (info->domain == si_domain); | ||
2109 | 2244 | ||
2110 | list_for_each_entry(info, &si_domain->devices, link) | ||
2111 | if (info->dev == pdev) | ||
2112 | return 1; | ||
2113 | return 0; | 2245 | return 0; |
2114 | } | 2246 | } |
2115 | 2247 | ||
@@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) | |||
2187 | * Assume that they will -- if they turn out not to be, then we can | 2319 | * Assume that they will -- if they turn out not to be, then we can |
2188 | * take them out of the 1:1 domain later. | 2320 | * take them out of the 1:1 domain later. |
2189 | */ | 2321 | */ |
2190 | if (!startup) | 2322 | if (!startup) { |
2191 | return pdev->dma_mask > DMA_BIT_MASK(32); | 2323 | /* |
2324 | * If the device's dma_mask is less than the system's memory | ||
2325 | * size then this is not a candidate for identity mapping. | ||
2326 | */ | ||
2327 | u64 dma_mask = pdev->dma_mask; | ||
2328 | |||
2329 | if (pdev->dev.coherent_dma_mask && | ||
2330 | pdev->dev.coherent_dma_mask < dma_mask) | ||
2331 | dma_mask = pdev->dev.coherent_dma_mask; | ||
2332 | |||
2333 | return dma_mask >= dma_get_required_mask(&pdev->dev); | ||
2334 | } | ||
2192 | 2335 | ||
2193 | return 1; | 2336 | return 1; |
2194 | } | 2337 | } |
@@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw) | |||
2203 | return -EFAULT; | 2346 | return -EFAULT; |
2204 | 2347 | ||
2205 | for_each_pci_dev(pdev) { | 2348 | for_each_pci_dev(pdev) { |
2349 | /* Skip Host/PCI Bridge devices */ | ||
2350 | if (IS_BRIDGE_HOST_DEVICE(pdev)) | ||
2351 | continue; | ||
2206 | if (iommu_should_identity_map(pdev, 1)) { | 2352 | if (iommu_should_identity_map(pdev, 1)) { |
2207 | printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", | 2353 | printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", |
2208 | hw ? "hardware" : "software", pci_name(pdev)); | 2354 | hw ? "hardware" : "software", pci_name(pdev)); |
@@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw) | |||
2218 | return 0; | 2364 | return 0; |
2219 | } | 2365 | } |
2220 | 2366 | ||
2221 | static int __init init_dmars(int force_on) | 2367 | static int __init init_dmars(void) |
2222 | { | 2368 | { |
2223 | struct dmar_drhd_unit *drhd; | 2369 | struct dmar_drhd_unit *drhd; |
2224 | struct dmar_rmrr_unit *rmrr; | 2370 | struct dmar_rmrr_unit *rmrr; |
@@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, | |||
2592 | iommu = domain_get_iommu(domain); | 2738 | iommu = domain_get_iommu(domain); |
2593 | size = aligned_nrpages(paddr, size); | 2739 | size = aligned_nrpages(paddr, size); |
2594 | 2740 | ||
2595 | iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), | 2741 | iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); |
2596 | pdev->dma_mask); | ||
2597 | if (!iova) | 2742 | if (!iova) |
2598 | goto error; | 2743 | goto error; |
2599 | 2744 | ||
@@ -3118,7 +3263,17 @@ static int init_iommu_hw(void) | |||
3118 | if (iommu->qi) | 3263 | if (iommu->qi) |
3119 | dmar_reenable_qi(iommu); | 3264 | dmar_reenable_qi(iommu); |
3120 | 3265 | ||
3121 | for_each_active_iommu(iommu, drhd) { | 3266 | for_each_iommu(iommu, drhd) { |
3267 | if (drhd->ignored) { | ||
3268 | /* | ||
3269 | * we always have to disable PMRs or DMA may fail on | ||
3270 | * this device | ||
3271 | */ | ||
3272 | if (force_on) | ||
3273 | iommu_disable_protect_mem_regions(iommu); | ||
3274 | continue; | ||
3275 | } | ||
3276 | |||
3122 | iommu_flush_write_buffer(iommu); | 3277 | iommu_flush_write_buffer(iommu); |
3123 | 3278 | ||
3124 | iommu_set_root_entry(iommu); | 3279 | iommu_set_root_entry(iommu); |
@@ -3127,7 +3282,8 @@ static int init_iommu_hw(void) | |||
3127 | DMA_CCMD_GLOBAL_INVL); | 3282 | DMA_CCMD_GLOBAL_INVL); |
3128 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, | 3283 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, |
3129 | DMA_TLB_GLOBAL_FLUSH); | 3284 | DMA_TLB_GLOBAL_FLUSH); |
3130 | iommu_enable_translation(iommu); | 3285 | if (iommu_enable_translation(iommu)) |
3286 | return 1; | ||
3131 | iommu_disable_protect_mem_regions(iommu); | 3287 | iommu_disable_protect_mem_regions(iommu); |
3132 | } | 3288 | } |
3133 | 3289 | ||
@@ -3194,7 +3350,10 @@ static void iommu_resume(void) | |||
3194 | unsigned long flag; | 3350 | unsigned long flag; |
3195 | 3351 | ||
3196 | if (init_iommu_hw()) { | 3352 | if (init_iommu_hw()) { |
3197 | WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); | 3353 | if (force_on) |
3354 | panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); | ||
3355 | else | ||
3356 | WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); | ||
3198 | return; | 3357 | return; |
3199 | } | 3358 | } |
3200 | 3359 | ||
@@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = { | |||
3271 | int __init intel_iommu_init(void) | 3430 | int __init intel_iommu_init(void) |
3272 | { | 3431 | { |
3273 | int ret = 0; | 3432 | int ret = 0; |
3274 | int force_on = 0; | ||
3275 | 3433 | ||
3276 | /* VT-d is required for a TXT/tboot launch, so enforce that */ | 3434 | /* VT-d is required for a TXT/tboot launch, so enforce that */ |
3277 | force_on = tboot_force_iommu(); | 3435 | force_on = tboot_force_iommu(); |
@@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void) | |||
3309 | 3467 | ||
3310 | init_no_remapping_devices(); | 3468 | init_no_remapping_devices(); |
3311 | 3469 | ||
3312 | ret = init_dmars(force_on); | 3470 | ret = init_dmars(); |
3313 | if (ret) { | 3471 | if (ret) { |
3314 | if (force_on) | 3472 | if (force_on) |
3315 | panic("tboot: Failed to initialize DMARs\n"); | 3473 | panic("tboot: Failed to initialize DMARs\n"); |
@@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, | |||
3380 | spin_lock_irqsave(&device_domain_lock, flags); | 3538 | spin_lock_irqsave(&device_domain_lock, flags); |
3381 | list_for_each_safe(entry, tmp, &domain->devices) { | 3539 | list_for_each_safe(entry, tmp, &domain->devices) { |
3382 | info = list_entry(entry, struct device_domain_info, link); | 3540 | info = list_entry(entry, struct device_domain_info, link); |
3383 | /* No need to compare PCI domain; it has to be the same */ | 3541 | if (info->segment == pci_domain_nr(pdev->bus) && |
3384 | if (info->bus == pdev->bus->number && | 3542 | info->bus == pdev->bus->number && |
3385 | info->devfn == pdev->devfn) { | 3543 | info->devfn == pdev->devfn) { |
3386 | list_del(&info->link); | 3544 | list_del(&info->link); |
3387 | list_del(&info->global); | 3545 | list_del(&info->global); |
@@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, | |||
3419 | domain_update_iommu_cap(domain); | 3577 | domain_update_iommu_cap(domain); |
3420 | spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); | 3578 | spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); |
3421 | 3579 | ||
3422 | spin_lock_irqsave(&iommu->lock, tmp_flags); | 3580 | if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && |
3423 | clear_bit(domain->id, iommu->domain_ids); | 3581 | !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { |
3424 | iommu->domains[domain->id] = NULL; | 3582 | spin_lock_irqsave(&iommu->lock, tmp_flags); |
3425 | spin_unlock_irqrestore(&iommu->lock, tmp_flags); | 3583 | clear_bit(domain->id, iommu->domain_ids); |
3584 | iommu->domains[domain->id] = NULL; | ||
3585 | spin_unlock_irqrestore(&iommu->lock, tmp_flags); | ||
3586 | } | ||
3426 | } | 3587 | } |
3427 | 3588 | ||
3428 | spin_unlock_irqrestore(&device_domain_lock, flags); | 3589 | spin_unlock_irqrestore(&device_domain_lock, flags); |
@@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) | |||
3505 | domain->iommu_count = 0; | 3666 | domain->iommu_count = 0; |
3506 | domain->iommu_coherency = 0; | 3667 | domain->iommu_coherency = 0; |
3507 | domain->iommu_snooping = 0; | 3668 | domain->iommu_snooping = 0; |
3669 | domain->iommu_superpage = 0; | ||
3508 | domain->max_addr = 0; | 3670 | domain->max_addr = 0; |
3509 | domain->nid = -1; | 3671 | domain->nid = -1; |
3510 | 3672 | ||
@@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, | |||
3720 | struct dma_pte *pte; | 3882 | struct dma_pte *pte; |
3721 | u64 phys = 0; | 3883 | u64 phys = 0; |
3722 | 3884 | ||
3723 | pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); | 3885 | pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); |
3724 | if (pte) | 3886 | if (pte) |
3725 | phys = dma_pte_addr(pte); | 3887 | phys = dma_pte_addr(pte); |
3726 | 3888 | ||
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 9606e599a475..c5c274ab5c5a 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c | |||
@@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) | |||
63 | curr = iovad->cached32_node; | 63 | curr = iovad->cached32_node; |
64 | cached_iova = container_of(curr, struct iova, node); | 64 | cached_iova = container_of(curr, struct iova, node); |
65 | 65 | ||
66 | if (free->pfn_lo >= cached_iova->pfn_lo) | 66 | if (free->pfn_lo >= cached_iova->pfn_lo) { |
67 | iovad->cached32_node = rb_next(&free->node); | 67 | struct rb_node *node = rb_next(&free->node); |
68 | struct iova *iova = container_of(node, struct iova, node); | ||
69 | |||
70 | /* only cache if it's below 32bit pfn */ | ||
71 | if (node && iova->pfn_lo < iovad->dma_32bit_pfn) | ||
72 | iovad->cached32_node = node; | ||
73 | else | ||
74 | iovad->cached32_node = NULL; | ||
75 | } | ||
68 | } | 76 | } |
69 | 77 | ||
70 | /* Computes the padding size required, to make the | 78 | /* Computes the padding size required, to make the |
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2f7c76a85e53..e224a92baa16 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -144,7 +144,7 @@ static void handle_tx(struct vhost_net *net) | |||
144 | } | 144 | } |
145 | 145 | ||
146 | mutex_lock(&vq->mutex); | 146 | mutex_lock(&vq->mutex); |
147 | vhost_disable_notify(vq); | 147 | vhost_disable_notify(&net->dev, vq); |
148 | 148 | ||
149 | if (wmem < sock->sk->sk_sndbuf / 2) | 149 | if (wmem < sock->sk->sk_sndbuf / 2) |
150 | tx_poll_stop(net); | 150 | tx_poll_stop(net); |
@@ -166,8 +166,8 @@ static void handle_tx(struct vhost_net *net) | |||
166 | set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | 166 | set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); |
167 | break; | 167 | break; |
168 | } | 168 | } |
169 | if (unlikely(vhost_enable_notify(vq))) { | 169 | if (unlikely(vhost_enable_notify(&net->dev, vq))) { |
170 | vhost_disable_notify(vq); | 170 | vhost_disable_notify(&net->dev, vq); |
171 | continue; | 171 | continue; |
172 | } | 172 | } |
173 | break; | 173 | break; |
@@ -315,7 +315,7 @@ static void handle_rx(struct vhost_net *net) | |||
315 | return; | 315 | return; |
316 | 316 | ||
317 | mutex_lock(&vq->mutex); | 317 | mutex_lock(&vq->mutex); |
318 | vhost_disable_notify(vq); | 318 | vhost_disable_notify(&net->dev, vq); |
319 | vhost_hlen = vq->vhost_hlen; | 319 | vhost_hlen = vq->vhost_hlen; |
320 | sock_hlen = vq->sock_hlen; | 320 | sock_hlen = vq->sock_hlen; |
321 | 321 | ||
@@ -334,10 +334,10 @@ static void handle_rx(struct vhost_net *net) | |||
334 | break; | 334 | break; |
335 | /* OK, now we need to know about added descriptors. */ | 335 | /* OK, now we need to know about added descriptors. */ |
336 | if (!headcount) { | 336 | if (!headcount) { |
337 | if (unlikely(vhost_enable_notify(vq))) { | 337 | if (unlikely(vhost_enable_notify(&net->dev, vq))) { |
338 | /* They have slipped one in as we were | 338 | /* They have slipped one in as we were |
339 | * doing that: check again. */ | 339 | * doing that: check again. */ |
340 | vhost_disable_notify(vq); | 340 | vhost_disable_notify(&net->dev, vq); |
341 | continue; | 341 | continue; |
342 | } | 342 | } |
343 | /* Nothing new? Wait for eventfd to tell us | 343 | /* Nothing new? Wait for eventfd to tell us |
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 099f30230d06..734e1d74ad80 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c | |||
@@ -49,7 +49,7 @@ static void handle_vq(struct vhost_test *n) | |||
49 | return; | 49 | return; |
50 | 50 | ||
51 | mutex_lock(&vq->mutex); | 51 | mutex_lock(&vq->mutex); |
52 | vhost_disable_notify(vq); | 52 | vhost_disable_notify(&n->dev, vq); |
53 | 53 | ||
54 | for (;;) { | 54 | for (;;) { |
55 | head = vhost_get_vq_desc(&n->dev, vq, vq->iov, | 55 | head = vhost_get_vq_desc(&n->dev, vq, vq->iov, |
@@ -61,8 +61,8 @@ static void handle_vq(struct vhost_test *n) | |||
61 | break; | 61 | break; |
62 | /* Nothing new? Wait for eventfd to tell us they refilled. */ | 62 | /* Nothing new? Wait for eventfd to tell us they refilled. */ |
63 | if (head == vq->num) { | 63 | if (head == vq->num) { |
64 | if (unlikely(vhost_enable_notify(vq))) { | 64 | if (unlikely(vhost_enable_notify(&n->dev, vq))) { |
65 | vhost_disable_notify(vq); | 65 | vhost_disable_notify(&n->dev, vq); |
66 | continue; | 66 | continue; |
67 | } | 67 | } |
68 | break; | 68 | break; |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7aa4eea930f1..ea966b356352 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -37,6 +37,9 @@ enum { | |||
37 | VHOST_MEMORY_F_LOG = 0x1, | 37 | VHOST_MEMORY_F_LOG = 0x1, |
38 | }; | 38 | }; |
39 | 39 | ||
40 | #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) | ||
41 | #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) | ||
42 | |||
40 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | 43 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, |
41 | poll_table *pt) | 44 | poll_table *pt) |
42 | { | 45 | { |
@@ -161,6 +164,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
161 | vq->last_avail_idx = 0; | 164 | vq->last_avail_idx = 0; |
162 | vq->avail_idx = 0; | 165 | vq->avail_idx = 0; |
163 | vq->last_used_idx = 0; | 166 | vq->last_used_idx = 0; |
167 | vq->signalled_used = 0; | ||
168 | vq->signalled_used_valid = false; | ||
164 | vq->used_flags = 0; | 169 | vq->used_flags = 0; |
165 | vq->log_used = false; | 170 | vq->log_used = false; |
166 | vq->log_addr = -1ull; | 171 | vq->log_addr = -1ull; |
@@ -489,16 +494,17 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, | |||
489 | return 1; | 494 | return 1; |
490 | } | 495 | } |
491 | 496 | ||
492 | static int vq_access_ok(unsigned int num, | 497 | static int vq_access_ok(struct vhost_dev *d, unsigned int num, |
493 | struct vring_desc __user *desc, | 498 | struct vring_desc __user *desc, |
494 | struct vring_avail __user *avail, | 499 | struct vring_avail __user *avail, |
495 | struct vring_used __user *used) | 500 | struct vring_used __user *used) |
496 | { | 501 | { |
502 | size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | ||
497 | return access_ok(VERIFY_READ, desc, num * sizeof *desc) && | 503 | return access_ok(VERIFY_READ, desc, num * sizeof *desc) && |
498 | access_ok(VERIFY_READ, avail, | 504 | access_ok(VERIFY_READ, avail, |
499 | sizeof *avail + num * sizeof *avail->ring) && | 505 | sizeof *avail + num * sizeof *avail->ring + s) && |
500 | access_ok(VERIFY_WRITE, used, | 506 | access_ok(VERIFY_WRITE, used, |
501 | sizeof *used + num * sizeof *used->ring); | 507 | sizeof *used + num * sizeof *used->ring + s); |
502 | } | 508 | } |
503 | 509 | ||
504 | /* Can we log writes? */ | 510 | /* Can we log writes? */ |
@@ -514,9 +520,11 @@ int vhost_log_access_ok(struct vhost_dev *dev) | |||
514 | 520 | ||
515 | /* Verify access for write logging. */ | 521 | /* Verify access for write logging. */ |
516 | /* Caller should have vq mutex and device mutex */ | 522 | /* Caller should have vq mutex and device mutex */ |
517 | static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) | 523 | static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, |
524 | void __user *log_base) | ||
518 | { | 525 | { |
519 | struct vhost_memory *mp; | 526 | struct vhost_memory *mp; |
527 | size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | ||
520 | 528 | ||
521 | mp = rcu_dereference_protected(vq->dev->memory, | 529 | mp = rcu_dereference_protected(vq->dev->memory, |
522 | lockdep_is_held(&vq->mutex)); | 530 | lockdep_is_held(&vq->mutex)); |
@@ -524,15 +532,15 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) | |||
524 | vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && | 532 | vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && |
525 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, | 533 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, |
526 | sizeof *vq->used + | 534 | sizeof *vq->used + |
527 | vq->num * sizeof *vq->used->ring)); | 535 | vq->num * sizeof *vq->used->ring + s)); |
528 | } | 536 | } |
529 | 537 | ||
530 | /* Can we start vq? */ | 538 | /* Can we start vq? */ |
531 | /* Caller should have vq mutex and device mutex */ | 539 | /* Caller should have vq mutex and device mutex */ |
532 | int vhost_vq_access_ok(struct vhost_virtqueue *vq) | 540 | int vhost_vq_access_ok(struct vhost_virtqueue *vq) |
533 | { | 541 | { |
534 | return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) && | 542 | return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && |
535 | vq_log_access_ok(vq, vq->log_base); | 543 | vq_log_access_ok(vq->dev, vq, vq->log_base); |
536 | } | 544 | } |
537 | 545 | ||
538 | static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) | 546 | static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) |
@@ -577,6 +585,7 @@ static int init_used(struct vhost_virtqueue *vq, | |||
577 | 585 | ||
578 | if (r) | 586 | if (r) |
579 | return r; | 587 | return r; |
588 | vq->signalled_used_valid = false; | ||
580 | return get_user(vq->last_used_idx, &used->idx); | 589 | return get_user(vq->last_used_idx, &used->idx); |
581 | } | 590 | } |
582 | 591 | ||
@@ -674,7 +683,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) | |||
674 | * If it is not, we don't as size might not have been setup. | 683 | * If it is not, we don't as size might not have been setup. |
675 | * We will verify when backend is configured. */ | 684 | * We will verify when backend is configured. */ |
676 | if (vq->private_data) { | 685 | if (vq->private_data) { |
677 | if (!vq_access_ok(vq->num, | 686 | if (!vq_access_ok(d, vq->num, |
678 | (void __user *)(unsigned long)a.desc_user_addr, | 687 | (void __user *)(unsigned long)a.desc_user_addr, |
679 | (void __user *)(unsigned long)a.avail_user_addr, | 688 | (void __user *)(unsigned long)a.avail_user_addr, |
680 | (void __user *)(unsigned long)a.used_user_addr)) { | 689 | (void __user *)(unsigned long)a.used_user_addr)) { |
@@ -818,7 +827,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) | |||
818 | vq = d->vqs + i; | 827 | vq = d->vqs + i; |
819 | mutex_lock(&vq->mutex); | 828 | mutex_lock(&vq->mutex); |
820 | /* If ring is inactive, will check when it's enabled. */ | 829 | /* If ring is inactive, will check when it's enabled. */ |
821 | if (vq->private_data && !vq_log_access_ok(vq, base)) | 830 | if (vq->private_data && !vq_log_access_ok(d, vq, base)) |
822 | r = -EFAULT; | 831 | r = -EFAULT; |
823 | else | 832 | else |
824 | vq->log_base = base; | 833 | vq->log_base = base; |
@@ -1219,6 +1228,10 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
1219 | 1228 | ||
1220 | /* On success, increment avail index. */ | 1229 | /* On success, increment avail index. */ |
1221 | vq->last_avail_idx++; | 1230 | vq->last_avail_idx++; |
1231 | |||
1232 | /* Assume notifications from guest are disabled at this point, | ||
1233 | * if they aren't we would need to update avail_event index. */ | ||
1234 | BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); | ||
1222 | return head; | 1235 | return head; |
1223 | } | 1236 | } |
1224 | 1237 | ||
@@ -1267,6 +1280,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) | |||
1267 | eventfd_signal(vq->log_ctx, 1); | 1280 | eventfd_signal(vq->log_ctx, 1); |
1268 | } | 1281 | } |
1269 | vq->last_used_idx++; | 1282 | vq->last_used_idx++; |
1283 | /* If the driver never bothers to signal in a very long while, | ||
1284 | * used index might wrap around. If that happens, invalidate | ||
1285 | * signalled_used index we stored. TODO: make sure driver | ||
1286 | * signals at least once in 2^16 and remove this. */ | ||
1287 | if (unlikely(vq->last_used_idx == vq->signalled_used)) | ||
1288 | vq->signalled_used_valid = false; | ||
1270 | return 0; | 1289 | return 0; |
1271 | } | 1290 | } |
1272 | 1291 | ||
@@ -1275,6 +1294,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, | |||
1275 | unsigned count) | 1294 | unsigned count) |
1276 | { | 1295 | { |
1277 | struct vring_used_elem __user *used; | 1296 | struct vring_used_elem __user *used; |
1297 | u16 old, new; | ||
1278 | int start; | 1298 | int start; |
1279 | 1299 | ||
1280 | start = vq->last_used_idx % vq->num; | 1300 | start = vq->last_used_idx % vq->num; |
@@ -1292,7 +1312,14 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, | |||
1292 | ((void __user *)used - (void __user *)vq->used), | 1312 | ((void __user *)used - (void __user *)vq->used), |
1293 | count * sizeof *used); | 1313 | count * sizeof *used); |
1294 | } | 1314 | } |
1295 | vq->last_used_idx += count; | 1315 | old = vq->last_used_idx; |
1316 | new = (vq->last_used_idx += count); | ||
1317 | /* If the driver never bothers to signal in a very long while, | ||
1318 | * used index might wrap around. If that happens, invalidate | ||
1319 | * signalled_used index we stored. TODO: make sure driver | ||
1320 | * signals at least once in 2^16 and remove this. */ | ||
1321 | if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) | ||
1322 | vq->signalled_used_valid = false; | ||
1296 | return 0; | 1323 | return 0; |
1297 | } | 1324 | } |
1298 | 1325 | ||
@@ -1331,29 +1358,47 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, | |||
1331 | return r; | 1358 | return r; |
1332 | } | 1359 | } |
1333 | 1360 | ||
1334 | /* This actually signals the guest, using eventfd. */ | 1361 | static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) |
1335 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) | ||
1336 | { | 1362 | { |
1337 | __u16 flags; | 1363 | __u16 old, new, event; |
1338 | 1364 | bool v; | |
1339 | /* Flush out used index updates. This is paired | 1365 | /* Flush out used index updates. This is paired |
1340 | * with the barrier that the Guest executes when enabling | 1366 | * with the barrier that the Guest executes when enabling |
1341 | * interrupts. */ | 1367 | * interrupts. */ |
1342 | smp_mb(); | 1368 | smp_mb(); |
1343 | 1369 | ||
1344 | if (__get_user(flags, &vq->avail->flags)) { | 1370 | if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && |
1345 | vq_err(vq, "Failed to get flags"); | 1371 | unlikely(vq->avail_idx == vq->last_avail_idx)) |
1346 | return; | 1372 | return true; |
1373 | |||
1374 | if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | ||
1375 | __u16 flags; | ||
1376 | if (__get_user(flags, &vq->avail->flags)) { | ||
1377 | vq_err(vq, "Failed to get flags"); | ||
1378 | return true; | ||
1379 | } | ||
1380 | return !(flags & VRING_AVAIL_F_NO_INTERRUPT); | ||
1347 | } | 1381 | } |
1382 | old = vq->signalled_used; | ||
1383 | v = vq->signalled_used_valid; | ||
1384 | new = vq->signalled_used = vq->last_used_idx; | ||
1385 | vq->signalled_used_valid = true; | ||
1348 | 1386 | ||
1349 | /* If they don't want an interrupt, don't signal, unless empty. */ | 1387 | if (unlikely(!v)) |
1350 | if ((flags & VRING_AVAIL_F_NO_INTERRUPT) && | 1388 | return true; |
1351 | (vq->avail_idx != vq->last_avail_idx || | ||
1352 | !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY))) | ||
1353 | return; | ||
1354 | 1389 | ||
1390 | if (get_user(event, vhost_used_event(vq))) { | ||
1391 | vq_err(vq, "Failed to get used event idx"); | ||
1392 | return true; | ||
1393 | } | ||
1394 | return vring_need_event(event, new, old); | ||
1395 | } | ||
1396 | |||
1397 | /* This actually signals the guest, using eventfd. */ | ||
1398 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) | ||
1399 | { | ||
1355 | /* Signal the Guest tell them we used something up. */ | 1400 | /* Signal the Guest tell them we used something up. */ |
1356 | if (vq->call_ctx) | 1401 | if (vq->call_ctx && vhost_notify(dev, vq)) |
1357 | eventfd_signal(vq->call_ctx, 1); | 1402 | eventfd_signal(vq->call_ctx, 1); |
1358 | } | 1403 | } |
1359 | 1404 | ||
@@ -1376,7 +1421,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, | |||
1376 | } | 1421 | } |
1377 | 1422 | ||
1378 | /* OK, now we need to know about added descriptors. */ | 1423 | /* OK, now we need to know about added descriptors. */ |
1379 | bool vhost_enable_notify(struct vhost_virtqueue *vq) | 1424 | bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) |
1380 | { | 1425 | { |
1381 | u16 avail_idx; | 1426 | u16 avail_idx; |
1382 | int r; | 1427 | int r; |
@@ -1384,11 +1429,34 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) | |||
1384 | if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) | 1429 | if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) |
1385 | return false; | 1430 | return false; |
1386 | vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; | 1431 | vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; |
1387 | r = put_user(vq->used_flags, &vq->used->flags); | 1432 | if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { |
1388 | if (r) { | 1433 | r = put_user(vq->used_flags, &vq->used->flags); |
1389 | vq_err(vq, "Failed to enable notification at %p: %d\n", | 1434 | if (r) { |
1390 | &vq->used->flags, r); | 1435 | vq_err(vq, "Failed to enable notification at %p: %d\n", |
1391 | return false; | 1436 | &vq->used->flags, r); |
1437 | return false; | ||
1438 | } | ||
1439 | } else { | ||
1440 | r = put_user(vq->avail_idx, vhost_avail_event(vq)); | ||
1441 | if (r) { | ||
1442 | vq_err(vq, "Failed to update avail event index at %p: %d\n", | ||
1443 | vhost_avail_event(vq), r); | ||
1444 | return false; | ||
1445 | } | ||
1446 | } | ||
1447 | if (unlikely(vq->log_used)) { | ||
1448 | void __user *used; | ||
1449 | /* Make sure data is seen before log. */ | ||
1450 | smp_wmb(); | ||
1451 | used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ? | ||
1452 | &vq->used->flags : vhost_avail_event(vq); | ||
1453 | /* Log used flags or event index entry write. Both are 16 bit | ||
1454 | * fields. */ | ||
1455 | log_write(vq->log_base, vq->log_addr + | ||
1456 | (used - (void __user *)vq->used), | ||
1457 | sizeof(u16)); | ||
1458 | if (vq->log_ctx) | ||
1459 | eventfd_signal(vq->log_ctx, 1); | ||
1392 | } | 1460 | } |
1393 | /* They could have slipped one in as we were doing that: make | 1461 | /* They could have slipped one in as we were doing that: make |
1394 | * sure it's written, then check again. */ | 1462 | * sure it's written, then check again. */ |
@@ -1404,15 +1472,17 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) | |||
1404 | } | 1472 | } |
1405 | 1473 | ||
1406 | /* We don't need to be notified again. */ | 1474 | /* We don't need to be notified again. */ |
1407 | void vhost_disable_notify(struct vhost_virtqueue *vq) | 1475 | void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) |
1408 | { | 1476 | { |
1409 | int r; | 1477 | int r; |
1410 | 1478 | ||
1411 | if (vq->used_flags & VRING_USED_F_NO_NOTIFY) | 1479 | if (vq->used_flags & VRING_USED_F_NO_NOTIFY) |
1412 | return; | 1480 | return; |
1413 | vq->used_flags |= VRING_USED_F_NO_NOTIFY; | 1481 | vq->used_flags |= VRING_USED_F_NO_NOTIFY; |
1414 | r = put_user(vq->used_flags, &vq->used->flags); | 1482 | if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { |
1415 | if (r) | 1483 | r = put_user(vq->used_flags, &vq->used->flags); |
1416 | vq_err(vq, "Failed to enable notification at %p: %d\n", | 1484 | if (r) |
1417 | &vq->used->flags, r); | 1485 | vq_err(vq, "Failed to enable notification at %p: %d\n", |
1486 | &vq->used->flags, r); | ||
1487 | } | ||
1418 | } | 1488 | } |
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index b3363ae38518..8e03379dd30f 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h | |||
@@ -84,6 +84,12 @@ struct vhost_virtqueue { | |||
84 | /* Used flags */ | 84 | /* Used flags */ |
85 | u16 used_flags; | 85 | u16 used_flags; |
86 | 86 | ||
87 | /* Last used index value we have signalled on */ | ||
88 | u16 signalled_used; | ||
89 | |||
90 | /* Last used index value we have signalled on */ | ||
91 | bool signalled_used_valid; | ||
92 | |||
87 | /* Log writes to used structure. */ | 93 | /* Log writes to used structure. */ |
88 | bool log_used; | 94 | bool log_used; |
89 | u64 log_addr; | 95 | u64 log_addr; |
@@ -149,8 +155,8 @@ void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *, | |||
149 | void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, | 155 | void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, |
150 | struct vring_used_elem *heads, unsigned count); | 156 | struct vring_used_elem *heads, unsigned count); |
151 | void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); | 157 | void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); |
152 | void vhost_disable_notify(struct vhost_virtqueue *); | 158 | void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); |
153 | bool vhost_enable_notify(struct vhost_virtqueue *); | 159 | bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); |
154 | 160 | ||
155 | int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, | 161 | int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, |
156 | unsigned int log_num, u64 len); | 162 | unsigned int log_num, u64 len); |
@@ -162,11 +168,12 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, | |||
162 | } while (0) | 168 | } while (0) |
163 | 169 | ||
164 | enum { | 170 | enum { |
165 | VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | | 171 | VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | |
166 | (1 << VIRTIO_RING_F_INDIRECT_DESC) | | 172 | (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | |
167 | (1 << VHOST_F_LOG_ALL) | | 173 | (1ULL << VIRTIO_RING_F_EVENT_IDX) | |
168 | (1 << VHOST_NET_F_VIRTIO_NET_HDR) | | 174 | (1ULL << VHOST_F_LOG_ALL) | |
169 | (1 << VIRTIO_NET_F_MRG_RXBUF), | 175 | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | |
176 | (1ULL << VIRTIO_NET_F_MRG_RXBUF), | ||
170 | }; | 177 | }; |
171 | 178 | ||
172 | static inline int vhost_has_feature(struct vhost_dev *dev, int bit) | 179 | static inline int vhost_has_feature(struct vhost_dev *dev, int bit) |
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0f1da45ba47d..e058ace2a4ad 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c | |||
@@ -40,9 +40,6 @@ struct virtio_balloon | |||
40 | /* Waiting for host to ack the pages we released. */ | 40 | /* Waiting for host to ack the pages we released. */ |
41 | struct completion acked; | 41 | struct completion acked; |
42 | 42 | ||
43 | /* Do we have to tell Host *before* we reuse pages? */ | ||
44 | bool tell_host_first; | ||
45 | |||
46 | /* The pages we've told the Host we're not using. */ | 43 | /* The pages we've told the Host we're not using. */ |
47 | unsigned int num_pages; | 44 | unsigned int num_pages; |
48 | struct list_head pages; | 45 | struct list_head pages; |
@@ -151,13 +148,14 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) | |||
151 | vb->num_pages--; | 148 | vb->num_pages--; |
152 | } | 149 | } |
153 | 150 | ||
154 | if (vb->tell_host_first) { | 151 | |
155 | tell_host(vb, vb->deflate_vq); | 152 | /* |
156 | release_pages_by_pfn(vb->pfns, vb->num_pfns); | 153 | * Note that if |
157 | } else { | 154 | * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); |
158 | release_pages_by_pfn(vb->pfns, vb->num_pfns); | 155 | * is true, we *have* to do it in this order |
159 | tell_host(vb, vb->deflate_vq); | 156 | */ |
160 | } | 157 | tell_host(vb, vb->deflate_vq); |
158 | release_pages_by_pfn(vb->pfns, vb->num_pfns); | ||
161 | } | 159 | } |
162 | 160 | ||
163 | static inline void update_stat(struct virtio_balloon *vb, int idx, | 161 | static inline void update_stat(struct virtio_balloon *vb, int idx, |
@@ -325,9 +323,6 @@ static int virtballoon_probe(struct virtio_device *vdev) | |||
325 | goto out_del_vqs; | 323 | goto out_del_vqs; |
326 | } | 324 | } |
327 | 325 | ||
328 | vb->tell_host_first | ||
329 | = virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); | ||
330 | |||
331 | return 0; | 326 | return 0; |
332 | 327 | ||
333 | out_del_vqs: | 328 | out_del_vqs: |
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b0043fb26a4d..68b9136847af 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
@@ -82,6 +82,9 @@ struct vring_virtqueue | |||
82 | /* Host supports indirect buffers */ | 82 | /* Host supports indirect buffers */ |
83 | bool indirect; | 83 | bool indirect; |
84 | 84 | ||
85 | /* Host publishes avail event idx */ | ||
86 | bool event; | ||
87 | |||
85 | /* Number of free buffers */ | 88 | /* Number of free buffers */ |
86 | unsigned int num_free; | 89 | unsigned int num_free; |
87 | /* Head of free buffer list. */ | 90 | /* Head of free buffer list. */ |
@@ -237,18 +240,22 @@ EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); | |||
237 | void virtqueue_kick(struct virtqueue *_vq) | 240 | void virtqueue_kick(struct virtqueue *_vq) |
238 | { | 241 | { |
239 | struct vring_virtqueue *vq = to_vvq(_vq); | 242 | struct vring_virtqueue *vq = to_vvq(_vq); |
243 | u16 new, old; | ||
240 | START_USE(vq); | 244 | START_USE(vq); |
241 | /* Descriptors and available array need to be set before we expose the | 245 | /* Descriptors and available array need to be set before we expose the |
242 | * new available array entries. */ | 246 | * new available array entries. */ |
243 | virtio_wmb(); | 247 | virtio_wmb(); |
244 | 248 | ||
245 | vq->vring.avail->idx += vq->num_added; | 249 | old = vq->vring.avail->idx; |
250 | new = vq->vring.avail->idx = old + vq->num_added; | ||
246 | vq->num_added = 0; | 251 | vq->num_added = 0; |
247 | 252 | ||
248 | /* Need to update avail index before checking if we should notify */ | 253 | /* Need to update avail index before checking if we should notify */ |
249 | virtio_mb(); | 254 | virtio_mb(); |
250 | 255 | ||
251 | if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) | 256 | if (vq->event ? |
257 | vring_need_event(vring_avail_event(&vq->vring), new, old) : | ||
258 | !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) | ||
252 | /* Prod other side to tell it about changes. */ | 259 | /* Prod other side to tell it about changes. */ |
253 | vq->notify(&vq->vq); | 260 | vq->notify(&vq->vq); |
254 | 261 | ||
@@ -324,6 +331,14 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) | |||
324 | ret = vq->data[i]; | 331 | ret = vq->data[i]; |
325 | detach_buf(vq, i); | 332 | detach_buf(vq, i); |
326 | vq->last_used_idx++; | 333 | vq->last_used_idx++; |
334 | /* If we expect an interrupt for the next entry, tell host | ||
335 | * by writing event index and flush out the write before | ||
336 | * the read in the next get_buf call. */ | ||
337 | if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { | ||
338 | vring_used_event(&vq->vring) = vq->last_used_idx; | ||
339 | virtio_mb(); | ||
340 | } | ||
341 | |||
327 | END_USE(vq); | 342 | END_USE(vq); |
328 | return ret; | 343 | return ret; |
329 | } | 344 | } |
@@ -345,7 +360,11 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) | |||
345 | 360 | ||
346 | /* We optimistically turn back on interrupts, then check if there was | 361 | /* We optimistically turn back on interrupts, then check if there was |
347 | * more to do. */ | 362 | * more to do. */ |
363 | /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to | ||
364 | * either clear the flags bit or point the event index at the next | ||
365 | * entry. Always do both to keep code simple. */ | ||
348 | vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; | 366 | vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; |
367 | vring_used_event(&vq->vring) = vq->last_used_idx; | ||
349 | virtio_mb(); | 368 | virtio_mb(); |
350 | if (unlikely(more_used(vq))) { | 369 | if (unlikely(more_used(vq))) { |
351 | END_USE(vq); | 370 | END_USE(vq); |
@@ -357,6 +376,33 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) | |||
357 | } | 376 | } |
358 | EXPORT_SYMBOL_GPL(virtqueue_enable_cb); | 377 | EXPORT_SYMBOL_GPL(virtqueue_enable_cb); |
359 | 378 | ||
379 | bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) | ||
380 | { | ||
381 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
382 | u16 bufs; | ||
383 | |||
384 | START_USE(vq); | ||
385 | |||
386 | /* We optimistically turn back on interrupts, then check if there was | ||
387 | * more to do. */ | ||
388 | /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to | ||
389 | * either clear the flags bit or point the event index at the next | ||
390 | * entry. Always do both to keep code simple. */ | ||
391 | vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; | ||
392 | /* TODO: tune this threshold */ | ||
393 | bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; | ||
394 | vring_used_event(&vq->vring) = vq->last_used_idx + bufs; | ||
395 | virtio_mb(); | ||
396 | if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { | ||
397 | END_USE(vq); | ||
398 | return false; | ||
399 | } | ||
400 | |||
401 | END_USE(vq); | ||
402 | return true; | ||
403 | } | ||
404 | EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); | ||
405 | |||
360 | void *virtqueue_detach_unused_buf(struct virtqueue *_vq) | 406 | void *virtqueue_detach_unused_buf(struct virtqueue *_vq) |
361 | { | 407 | { |
362 | struct vring_virtqueue *vq = to_vvq(_vq); | 408 | struct vring_virtqueue *vq = to_vvq(_vq); |
@@ -438,6 +484,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, | |||
438 | #endif | 484 | #endif |
439 | 485 | ||
440 | vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); | 486 | vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); |
487 | vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); | ||
441 | 488 | ||
442 | /* No callback? Tell other side not to bother us. */ | 489 | /* No callback? Tell other side not to bother us. */ |
443 | if (!callback) | 490 | if (!callback) |
@@ -472,6 +519,8 @@ void vring_transport_features(struct virtio_device *vdev) | |||
472 | switch (i) { | 519 | switch (i) { |
473 | case VIRTIO_RING_F_INDIRECT_DESC: | 520 | case VIRTIO_RING_F_INDIRECT_DESC: |
474 | break; | 521 | break; |
522 | case VIRTIO_RING_F_EVENT_IDX: | ||
523 | break; | ||
475 | default: | 524 | default: |
476 | /* We don't understand this bit. */ | 525 | /* We don't understand this bit. */ |
477 | clear_bit(i, vdev->features); | 526 | clear_bit(i, vdev->features); |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 87d95a8cddbc..f55ae23b137e 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -583,8 +583,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
583 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) | 583 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
584 | return -EACCES; | 584 | return -EACCES; |
585 | 585 | ||
586 | dentry_unhash(dentry); | ||
587 | |||
588 | if (atomic_dec_and_test(&ino->count)) { | 586 | if (atomic_dec_and_test(&ino->count)) { |
589 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 587 | p_ino = autofs4_dentry_ino(dentry->d_parent); |
590 | if (p_ino && dentry->d_parent != dentry) | 588 | if (p_ino && dentry->d_parent != dentry) |
diff --git a/fs/namei.c b/fs/namei.c index 1ab641f2e78e..e2e4e8d032ee 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -2579,6 +2579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2579 | if (error) | 2579 | if (error) |
2580 | goto out; | 2580 | goto out; |
2581 | 2581 | ||
2582 | shrink_dcache_parent(dentry); | ||
2582 | error = dir->i_op->rmdir(dir, dentry); | 2583 | error = dir->i_op->rmdir(dir, dentry); |
2583 | if (error) | 2584 | if (error) |
2584 | goto out; | 2585 | goto out; |
@@ -2993,6 +2994,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
2993 | if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) | 2994 | if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) |
2994 | goto out; | 2995 | goto out; |
2995 | 2996 | ||
2997 | if (target) | ||
2998 | shrink_dcache_parent(new_dentry); | ||
2996 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 2999 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); |
2997 | if (error) | 3000 | if (error) |
2998 | goto out; | 3001 | goto out; |
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 5619f8522738..bbd8661b3473 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h | |||
@@ -9,8 +9,12 @@ | |||
9 | #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) | 9 | #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) |
10 | #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) | 10 | #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) |
11 | 11 | ||
12 | #define VTD_STRIDE_SHIFT (9) | ||
13 | #define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) | ||
14 | |||
12 | #define DMA_PTE_READ (1) | 15 | #define DMA_PTE_READ (1) |
13 | #define DMA_PTE_WRITE (2) | 16 | #define DMA_PTE_WRITE (2) |
17 | #define DMA_PTE_LARGE_PAGE (1 << 7) | ||
14 | #define DMA_PTE_SNP (1 << 11) | 18 | #define DMA_PTE_SNP (1 << 11) |
15 | 19 | ||
16 | #define CONTEXT_TT_MULTI_LEVEL 0 | 20 | #define CONTEXT_TT_MULTI_LEVEL 0 |
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h index d40bfa1d9c91..e5f21d293c70 100644 --- a/include/linux/mtd/physmap.h +++ b/include/linux/mtd/physmap.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/mtd/partitions.h> | 19 | #include <linux/mtd/partitions.h> |
20 | 20 | ||
21 | struct map_info; | 21 | struct map_info; |
22 | struct platform_device; | ||
22 | 23 | ||
23 | struct physmap_flash_data { | 24 | struct physmap_flash_data { |
24 | unsigned int width; | 25 | unsigned int width; |
diff --git a/include/linux/virtio.h b/include/linux/virtio.h index aff5b4f74041..710885749605 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h | |||
@@ -51,6 +51,13 @@ struct virtqueue { | |||
51 | * This re-enables callbacks; it returns "false" if there are pending | 51 | * This re-enables callbacks; it returns "false" if there are pending |
52 | * buffers in the queue, to detect a possible race between the driver | 52 | * buffers in the queue, to detect a possible race between the driver |
53 | * checking for more work, and enabling callbacks. | 53 | * checking for more work, and enabling callbacks. |
54 | * virtqueue_enable_cb_delayed: restart callbacks after disable_cb. | ||
55 | * vq: the struct virtqueue we're talking about. | ||
56 | * This re-enables callbacks but hints to the other side to delay | ||
57 | * interrupts until most of the available buffers have been processed; | ||
58 | * it returns "false" if there are many pending buffers in the queue, | ||
59 | * to detect a possible race between the driver checking for more work, | ||
60 | * and enabling callbacks. | ||
54 | * virtqueue_detach_unused_buf: detach first unused buffer | 61 | * virtqueue_detach_unused_buf: detach first unused buffer |
55 | * vq: the struct virtqueue we're talking about. | 62 | * vq: the struct virtqueue we're talking about. |
56 | * Returns NULL or the "data" token handed to add_buf | 63 | * Returns NULL or the "data" token handed to add_buf |
@@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq); | |||
86 | 93 | ||
87 | bool virtqueue_enable_cb(struct virtqueue *vq); | 94 | bool virtqueue_enable_cb(struct virtqueue *vq); |
88 | 95 | ||
96 | bool virtqueue_enable_cb_delayed(struct virtqueue *vq); | ||
97 | |||
89 | void *virtqueue_detach_unused_buf(struct virtqueue *vq); | 98 | void *virtqueue_detach_unused_buf(struct virtqueue *vq); |
90 | 99 | ||
91 | /** | 100 | /** |
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index e68b439b2860..277c4ad44e84 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h | |||
@@ -1,7 +1,30 @@ | |||
1 | #ifndef _LINUX_VIRTIO_9P_H | 1 | #ifndef _LINUX_VIRTIO_9P_H |
2 | #define _LINUX_VIRTIO_9P_H | 2 | #define _LINUX_VIRTIO_9P_H |
3 | /* This header is BSD licensed so anyone can use the definitions to implement | 3 | /* This header is BSD licensed so anyone can use the definitions to implement |
4 | * compatible drivers/servers. */ | 4 | * compatible drivers/servers. |
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer. | ||
11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer in the | ||
13 | * documentation and/or other materials provided with the distribution. | ||
14 | * 3. Neither the name of IBM nor the names of its contributors | ||
15 | * may be used to endorse or promote products derived from this software | ||
16 | * without specific prior written permission. | ||
17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND | ||
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
20 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE | ||
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
27 | * SUCH DAMAGE. */ | ||
5 | #include <linux/types.h> | 28 | #include <linux/types.h> |
6 | #include <linux/virtio_ids.h> | 29 | #include <linux/virtio_ids.h> |
7 | #include <linux/virtio_config.h> | 30 | #include <linux/virtio_config.h> |
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index a50ecd1b81a2..652dc8bea921 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h | |||
@@ -1,7 +1,30 @@ | |||
1 | #ifndef _LINUX_VIRTIO_BALLOON_H | 1 | #ifndef _LINUX_VIRTIO_BALLOON_H |
2 | #define _LINUX_VIRTIO_BALLOON_H | 2 | #define _LINUX_VIRTIO_BALLOON_H |
3 | /* This header is BSD licensed so anyone can use the definitions to implement | 3 | /* This header is BSD licensed so anyone can use the definitions to implement |
4 | * compatible drivers/servers. */ | 4 | * compatible drivers/servers. |
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer. | ||
11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer in the | ||
13 | * documentation and/or other materials provided with the distribution. | ||
14 | * 3. Neither the name of IBM nor the names of its contributors | ||
15 | * may be used to endorse or promote products derived from this software | ||
16 | * without specific prior written permission. | ||
17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND | ||
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
20 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE | ||
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
27 | * SUCH DAMAGE. */ | ||
5 | #include <linux/virtio_ids.h> | 28 | #include <linux/virtio_ids.h> |
6 | #include <linux/virtio_config.h> | 29 | #include <linux/virtio_config.h> |
7 | 30 | ||
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 167720d695ed..e0edb40ca7aa 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h | |||
@@ -1,7 +1,30 @@ | |||
1 | #ifndef _LINUX_VIRTIO_BLK_H | 1 | #ifndef _LINUX_VIRTIO_BLK_H |
2 | #define _LINUX_VIRTIO_BLK_H | 2 | #define _LINUX_VIRTIO_BLK_H |
3 | /* This header is BSD licensed so anyone can use the definitions to implement | 3 | /* This header is BSD licensed so anyone can use the definitions to implement |
4 | * compatible drivers/servers. */ | 4 | * compatible drivers/servers. |
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer. | ||
11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer in the | ||
13 | * documentation and/or other materials provided with the distribution. | ||
14 | * 3. Neither the name of IBM nor the names of its contributors | ||
15 | * may be used to endorse or promote products derived from this software | ||
16 | * without specific prior written permission. | ||
17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND | ||
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
20 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE | ||
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
27 | * SUCH DAMAGE. */ | ||
5 | #include <linux/types.h> | 28 | #include <linux/types.h> |
6 | #include <linux/virtio_ids.h> | 29 | #include <linux/virtio_ids.h> |
7 | #include <linux/virtio_config.h> | 30 | #include <linux/virtio_config.h> |
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 800617b4ddd5..39c88c5ad19d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h | |||
@@ -1,7 +1,30 @@ | |||
1 | #ifndef _LINUX_VIRTIO_CONFIG_H | 1 | #ifndef _LINUX_VIRTIO_CONFIG_H |
2 | #define _LINUX_VIRTIO_CONFIG_H | 2 | #define _LINUX_VIRTIO_CONFIG_H |
3 | /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so | 3 | /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so |
4 | * anyone can use the definitions to implement compatible drivers/servers. */ | 4 | * anyone can use the definitions to implement compatible drivers/servers. |
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer. | ||
11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer in the | ||
13 | * documentation and/or other materials provided with the distribution. | ||
14 | * 3. Neither the name of IBM nor the names of its contributors | ||
15 | * may be used to endorse or promote products derived from this software | ||
16 | * without specific prior written permission. | ||
17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND | ||
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
20 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE | ||
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
27 | * SUCH DAMAGE. */ | ||
5 | 28 | ||
6 | /* Virtio devices use a standardized configuration space to define their | 29 | /* Virtio devices use a standardized configuration space to define their |
7 | * features and pass configuration information, but each implementation can | 30 | * features and pass configuration information, but each implementation can |
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index e4d333543a33..bdf4b0034739 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h | |||
@@ -5,7 +5,31 @@ | |||
5 | #include <linux/virtio_config.h> | 5 | #include <linux/virtio_config.h> |
6 | /* | 6 | /* |
7 | * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so | 7 | * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so |
8 | * anyone can use the definitions to implement compatible drivers/servers. | 8 | * anyone can use the definitions to implement compatible drivers/servers: |
9 | * | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without | ||
12 | * modification, are permitted provided that the following conditions | ||
13 | * are met: | ||
14 | * 1. Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * 2. Redistributions in binary form must reproduce the above copyright | ||
17 | * notice, this list of conditions and the following disclaimer in the | ||
18 | * documentation and/or other materials provided with the distribution. | ||
19 | * 3. Neither the name of IBM nor the names of its contributors | ||
20 | * may be used to endorse or promote products derived from this software | ||
21 | * without specific prior written permission. | ||
22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND | ||
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
25 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE | ||
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
32 | * SUCH DAMAGE. | ||
9 | * | 33 | * |
10 | * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 | 34 | * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 |
11 | * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011 | 35 | * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011 |
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h index 06660c0a78d7..85bb0bb66ffc 100644 --- a/include/linux/virtio_ids.h +++ b/include/linux/virtio_ids.h | |||
@@ -5,7 +5,29 @@ | |||
5 | * | 5 | * |
6 | * This header is BSD licensed so anyone can use the definitions to implement | 6 | * This header is BSD licensed so anyone can use the definitions to implement |
7 | * compatible drivers/servers. | 7 | * compatible drivers/servers. |
8 | */ | 8 | * |
9 | * Redistribution and use in source and binary forms, with or without | ||
10 | * modification, are permitted provided that the following conditions | ||
11 | * are met: | ||
12 | * 1. Redistributions of source code must retain the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer. | ||
14 | * 2. Redistributions in binary form must reproduce the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer in the | ||
16 | * documentation and/or other materials provided with the distribution. | ||
17 | * 3. Neither the name of IBM nor the names of its contributors | ||
18 | * may be used to endorse or promote products derived from this software | ||
19 | * without specific prior written permission. | ||
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND | ||
21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
23 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE | ||
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
26 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
28 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
29 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
30 | * SUCH DAMAGE. */ | ||
9 | 31 | ||
10 | #define VIRTIO_ID_NET 1 /* virtio net */ | 32 | #define VIRTIO_ID_NET 1 /* virtio net */ |
11 | #define VIRTIO_ID_BLOCK 2 /* virtio block */ | 33 | #define VIRTIO_ID_BLOCK 2 /* virtio block */ |
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 085e42298ce5..136040bba3e3 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h | |||
@@ -1,7 +1,30 @@ | |||
1 | #ifndef _LINUX_VIRTIO_NET_H | 1 | #ifndef _LINUX_VIRTIO_NET_H |
2 | #define _LINUX_VIRTIO_NET_H | 2 | #define _LINUX_VIRTIO_NET_H |
3 | /* This header is BSD licensed so anyone can use the definitions to implement | 3 | /* This header is BSD licensed so anyone can use the definitions to implement |
4 | * compatible drivers/servers. */ | 4 | * compatible drivers/servers. |
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer. | ||
11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer in the | ||
13 | * documentation and/or other materials provided with the distribution. | ||
14 | * 3. Neither the name of IBM nor the names of its contributors | ||
15 | * may be used to endorse or promote products derived from this software | ||
16 | * without specific prior written permission. | ||
17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND | ||
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
20 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE | ||
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
27 | * SUCH DAMAGE. */ | ||
5 | #include <linux/types.h> | 28 | #include <linux/types.h> |
6 | #include <linux/virtio_ids.h> | 29 | #include <linux/virtio_ids.h> |
7 | #include <linux/virtio_config.h> | 30 | #include <linux/virtio_config.h> |
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index 9a3d7c48c622..ea66f3f60d63 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h | |||
@@ -11,6 +11,29 @@ | |||
11 | * | 11 | * |
12 | * This header is BSD licensed so anyone can use the definitions to implement | 12 | * This header is BSD licensed so anyone can use the definitions to implement |
13 | * compatible drivers/servers. | 13 | * compatible drivers/servers. |
14 | * | ||
15 | * Redistribution and use in source and binary forms, with or without | ||
16 | * modification, are permitted provided that the following conditions | ||
17 | * are met: | ||
18 | * 1. Redistributions of source code must retain the above copyright | ||
19 | * notice, this list of conditions and the following disclaimer. | ||
20 | * 2. Redistributions in binary form must reproduce the above copyright | ||
21 | * notice, this list of conditions and the following disclaimer in the | ||
22 | * documentation and/or other materials provided with the distribution. | ||
23 | * 3. Neither the name of IBM nor the names of its contributors | ||
24 | * may be used to endorse or promote products derived from this software | ||
25 | * without specific prior written permission. | ||
26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND | ||
27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
29 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE | ||
30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
36 | * SUCH DAMAGE. | ||
14 | */ | 37 | */ |
15 | 38 | ||
16 | #ifndef _LINUX_VIRTIO_PCI_H | 39 | #ifndef _LINUX_VIRTIO_PCI_H |
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e4d144b132b5..4a32cb6da425 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h | |||
@@ -7,6 +7,29 @@ | |||
7 | * This header is BSD licensed so anyone can use the definitions to implement | 7 | * This header is BSD licensed so anyone can use the definitions to implement |
8 | * compatible drivers/servers. | 8 | * compatible drivers/servers. |
9 | * | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * 1. Redistributions of source code must retain the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer. | ||
15 | * 2. Redistributions in binary form must reproduce the above copyright | ||
16 | * notice, this list of conditions and the following disclaimer in the | ||
17 | * documentation and/or other materials provided with the distribution. | ||
18 | * 3. Neither the name of IBM nor the names of its contributors | ||
19 | * may be used to endorse or promote products derived from this software | ||
20 | * without specific prior written permission. | ||
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND | ||
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
24 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE | ||
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
31 | * SUCH DAMAGE. | ||
32 | * | ||
10 | * Copyright Rusty Russell IBM Corporation 2007. */ | 33 | * Copyright Rusty Russell IBM Corporation 2007. */ |
11 | #include <linux/types.h> | 34 | #include <linux/types.h> |
12 | 35 | ||
@@ -29,6 +52,12 @@ | |||
29 | /* We support indirect buffer descriptors */ | 52 | /* We support indirect buffer descriptors */ |
30 | #define VIRTIO_RING_F_INDIRECT_DESC 28 | 53 | #define VIRTIO_RING_F_INDIRECT_DESC 28 |
31 | 54 | ||
55 | /* The Guest publishes the used index for which it expects an interrupt | ||
56 | * at the end of the avail ring. Host should ignore the avail->flags field. */ | ||
57 | /* The Host publishes the avail index for which it expects a kick | ||
58 | * at the end of the used ring. Guest should ignore the used->flags field. */ | ||
59 | #define VIRTIO_RING_F_EVENT_IDX 29 | ||
60 | |||
32 | /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ | 61 | /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ |
33 | struct vring_desc { | 62 | struct vring_desc { |
34 | /* Address (guest-physical). */ | 63 | /* Address (guest-physical). */ |
@@ -83,6 +112,7 @@ struct vring { | |||
83 | * __u16 avail_flags; | 112 | * __u16 avail_flags; |
84 | * __u16 avail_idx; | 113 | * __u16 avail_idx; |
85 | * __u16 available[num]; | 114 | * __u16 available[num]; |
115 | * __u16 used_event_idx; | ||
86 | * | 116 | * |
87 | * // Padding to the next align boundary. | 117 | * // Padding to the next align boundary. |
88 | * char pad[]; | 118 | * char pad[]; |
@@ -91,8 +121,14 @@ struct vring { | |||
91 | * __u16 used_flags; | 121 | * __u16 used_flags; |
92 | * __u16 used_idx; | 122 | * __u16 used_idx; |
93 | * struct vring_used_elem used[num]; | 123 | * struct vring_used_elem used[num]; |
124 | * __u16 avail_event_idx; | ||
94 | * }; | 125 | * }; |
95 | */ | 126 | */ |
127 | /* We publish the used event index at the end of the available ring, and vice | ||
128 | * versa. They are at the end for backwards compatibility. */ | ||
129 | #define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) | ||
130 | #define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num]) | ||
131 | |||
96 | static inline void vring_init(struct vring *vr, unsigned int num, void *p, | 132 | static inline void vring_init(struct vring *vr, unsigned int num, void *p, |
97 | unsigned long align) | 133 | unsigned long align) |
98 | { | 134 | { |
@@ -107,7 +143,21 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) | |||
107 | { | 143 | { |
108 | return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) | 144 | return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) |
109 | + align - 1) & ~(align - 1)) | 145 | + align - 1) & ~(align - 1)) |
110 | + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; | 146 | + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; |
147 | } | ||
148 | |||
149 | /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ | ||
150 | /* Assuming a given event_idx value from the other size, if | ||
151 | * we have just incremented index from old to new_idx, | ||
152 | * should we trigger an event? */ | ||
153 | static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) | ||
154 | { | ||
155 | /* Note: Xen has similar logic for notification hold-off | ||
156 | * in include/xen/interface/io/ring.h with req_event and req_prod | ||
157 | * corresponding to event_idx + 1 and new_idx respectively. | ||
158 | * Note also that req_event and req_prod in Xen start at 1, | ||
159 | * event indexes in virtio start at 0. */ | ||
160 | return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); | ||
111 | } | 161 | } |
112 | 162 | ||
113 | #ifdef __KERNEL__ | 163 | #ifdef __KERNEL__ |
diff --git a/kernel/events/core.c b/kernel/events/core.c index cc5d57d1d0b6..ba89f40abe6a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -7388,26 +7388,12 @@ static int __perf_cgroup_move(void *info) | |||
7388 | return 0; | 7388 | return 0; |
7389 | } | 7389 | } |
7390 | 7390 | ||
7391 | static void perf_cgroup_move(struct task_struct *task) | 7391 | static void |
7392 | perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task) | ||
7392 | { | 7393 | { |
7393 | task_function_call(task, __perf_cgroup_move, task); | 7394 | task_function_call(task, __perf_cgroup_move, task); |
7394 | } | 7395 | } |
7395 | 7396 | ||
7396 | static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
7397 | struct cgroup *old_cgrp, struct task_struct *task, | ||
7398 | bool threadgroup) | ||
7399 | { | ||
7400 | perf_cgroup_move(task); | ||
7401 | if (threadgroup) { | ||
7402 | struct task_struct *c; | ||
7403 | rcu_read_lock(); | ||
7404 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | ||
7405 | perf_cgroup_move(c); | ||
7406 | } | ||
7407 | rcu_read_unlock(); | ||
7408 | } | ||
7409 | } | ||
7410 | |||
7411 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7397 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, |
7412 | struct cgroup *old_cgrp, struct task_struct *task) | 7398 | struct cgroup *old_cgrp, struct task_struct *task) |
7413 | { | 7399 | { |
@@ -7419,7 +7405,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
7419 | if (!(task->flags & PF_EXITING)) | 7405 | if (!(task->flags & PF_EXITING)) |
7420 | return; | 7406 | return; |
7421 | 7407 | ||
7422 | perf_cgroup_move(task); | 7408 | perf_cgroup_attach_task(cgrp, task); |
7423 | } | 7409 | } |
7424 | 7410 | ||
7425 | struct cgroup_subsys perf_subsys = { | 7411 | struct cgroup_subsys perf_subsys = { |
@@ -7428,6 +7414,6 @@ struct cgroup_subsys perf_subsys = { | |||
7428 | .create = perf_cgroup_create, | 7414 | .create = perf_cgroup_create, |
7429 | .destroy = perf_cgroup_destroy, | 7415 | .destroy = perf_cgroup_destroy, |
7430 | .exit = perf_cgroup_exit, | 7416 | .exit = perf_cgroup_exit, |
7431 | .attach = perf_cgroup_attach, | 7417 | .attach_task = perf_cgroup_attach_task, |
7432 | }; | 7418 | }; |
7433 | #endif /* CONFIG_CGROUP_PERF */ | 7419 | #endif /* CONFIG_CGROUP_PERF */ |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 77a7671dd147..89419ff92e99 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -1648,7 +1648,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | |||
1648 | if (IS_ERR(t)) | 1648 | if (IS_ERR(t)) |
1649 | return PTR_ERR(t); | 1649 | return PTR_ERR(t); |
1650 | kthread_bind(t, cpu); | 1650 | kthread_bind(t, cpu); |
1651 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1652 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | 1651 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; |
1653 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); | 1652 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); |
1654 | per_cpu(rcu_cpu_kthread_task, cpu) = t; | 1653 | per_cpu(rcu_cpu_kthread_task, cpu) = t; |
@@ -1756,7 +1755,6 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
1756 | if (IS_ERR(t)) | 1755 | if (IS_ERR(t)) |
1757 | return PTR_ERR(t); | 1756 | return PTR_ERR(t); |
1758 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1757 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1759 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1760 | rnp->node_kthread_task = t; | 1758 | rnp->node_kthread_task = t; |
1761 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1759 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1762 | sp.sched_priority = 99; | 1760 | sp.sched_priority = 99; |
@@ -1765,6 +1763,8 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
1765 | return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); | 1763 | return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); |
1766 | } | 1764 | } |
1767 | 1765 | ||
1766 | static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); | ||
1767 | |||
1768 | /* | 1768 | /* |
1769 | * Spawn all kthreads -- called as soon as the scheduler is running. | 1769 | * Spawn all kthreads -- called as soon as the scheduler is running. |
1770 | */ | 1770 | */ |
@@ -1772,18 +1772,30 @@ static int __init rcu_spawn_kthreads(void) | |||
1772 | { | 1772 | { |
1773 | int cpu; | 1773 | int cpu; |
1774 | struct rcu_node *rnp; | 1774 | struct rcu_node *rnp; |
1775 | struct task_struct *t; | ||
1775 | 1776 | ||
1776 | rcu_kthreads_spawnable = 1; | 1777 | rcu_kthreads_spawnable = 1; |
1777 | for_each_possible_cpu(cpu) { | 1778 | for_each_possible_cpu(cpu) { |
1778 | per_cpu(rcu_cpu_has_work, cpu) = 0; | 1779 | per_cpu(rcu_cpu_has_work, cpu) = 0; |
1779 | if (cpu_online(cpu)) | 1780 | if (cpu_online(cpu)) { |
1780 | (void)rcu_spawn_one_cpu_kthread(cpu); | 1781 | (void)rcu_spawn_one_cpu_kthread(cpu); |
1782 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
1783 | if (t) | ||
1784 | wake_up_process(t); | ||
1785 | } | ||
1781 | } | 1786 | } |
1782 | rnp = rcu_get_root(rcu_state); | 1787 | rnp = rcu_get_root(rcu_state); |
1783 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1788 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1789 | if (rnp->node_kthread_task) | ||
1790 | wake_up_process(rnp->node_kthread_task); | ||
1784 | if (NUM_RCU_NODES > 1) { | 1791 | if (NUM_RCU_NODES > 1) { |
1785 | rcu_for_each_leaf_node(rcu_state, rnp) | 1792 | rcu_for_each_leaf_node(rcu_state, rnp) { |
1786 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1793 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1794 | t = rnp->node_kthread_task; | ||
1795 | if (t) | ||
1796 | wake_up_process(t); | ||
1797 | rcu_wake_one_boost_kthread(rnp); | ||
1798 | } | ||
1787 | } | 1799 | } |
1788 | return 0; | 1800 | return 0; |
1789 | } | 1801 | } |
@@ -2188,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2188 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2200 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
2189 | } | 2201 | } |
2190 | 2202 | ||
2191 | static void __cpuinit rcu_online_cpu(int cpu) | 2203 | static void __cpuinit rcu_prepare_cpu(int cpu) |
2192 | { | 2204 | { |
2193 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); | 2205 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); |
2194 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); | 2206 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); |
2195 | rcu_preempt_init_percpu_data(cpu); | 2207 | rcu_preempt_init_percpu_data(cpu); |
2196 | } | 2208 | } |
2197 | 2209 | ||
2198 | static void __cpuinit rcu_online_kthreads(int cpu) | 2210 | static void __cpuinit rcu_prepare_kthreads(int cpu) |
2199 | { | 2211 | { |
2200 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 2212 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
2201 | struct rcu_node *rnp = rdp->mynode; | 2213 | struct rcu_node *rnp = rdp->mynode; |
@@ -2209,6 +2221,31 @@ static void __cpuinit rcu_online_kthreads(int cpu) | |||
2209 | } | 2221 | } |
2210 | 2222 | ||
2211 | /* | 2223 | /* |
2224 | * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, | ||
2225 | * but the RCU threads are woken on demand, and if demand is low this | ||
2226 | * could be a while triggering the hung task watchdog. | ||
2227 | * | ||
2228 | * In order to avoid this, poke all tasks once the CPU is fully | ||
2229 | * up and running. | ||
2230 | */ | ||
2231 | static void __cpuinit rcu_online_kthreads(int cpu) | ||
2232 | { | ||
2233 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | ||
2234 | struct rcu_node *rnp = rdp->mynode; | ||
2235 | struct task_struct *t; | ||
2236 | |||
2237 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
2238 | if (t) | ||
2239 | wake_up_process(t); | ||
2240 | |||
2241 | t = rnp->node_kthread_task; | ||
2242 | if (t) | ||
2243 | wake_up_process(t); | ||
2244 | |||
2245 | rcu_wake_one_boost_kthread(rnp); | ||
2246 | } | ||
2247 | |||
2248 | /* | ||
2212 | * Handle CPU online/offline notification events. | 2249 | * Handle CPU online/offline notification events. |
2213 | */ | 2250 | */ |
2214 | static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | 2251 | static int __cpuinit rcu_cpu_notify(struct notifier_block *self, |
@@ -2221,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2221 | switch (action) { | 2258 | switch (action) { |
2222 | case CPU_UP_PREPARE: | 2259 | case CPU_UP_PREPARE: |
2223 | case CPU_UP_PREPARE_FROZEN: | 2260 | case CPU_UP_PREPARE_FROZEN: |
2224 | rcu_online_cpu(cpu); | 2261 | rcu_prepare_cpu(cpu); |
2225 | rcu_online_kthreads(cpu); | 2262 | rcu_prepare_kthreads(cpu); |
2226 | break; | 2263 | break; |
2227 | case CPU_ONLINE: | 2264 | case CPU_ONLINE: |
2265 | rcu_online_kthreads(cpu); | ||
2228 | case CPU_DOWN_FAILED: | 2266 | case CPU_DOWN_FAILED: |
2229 | rcu_node_kthread_setaffinity(rnp, -1); | 2267 | rcu_node_kthread_setaffinity(rnp, -1); |
2230 | rcu_cpu_kthread_setrt(cpu, 1); | 2268 | rcu_cpu_kthread_setrt(cpu, 1); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a767b7dac365..c8bff3099a89 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -1295,7 +1295,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1295 | if (IS_ERR(t)) | 1295 | if (IS_ERR(t)) |
1296 | return PTR_ERR(t); | 1296 | return PTR_ERR(t); |
1297 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1297 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1298 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1299 | rnp->boost_kthread_task = t; | 1298 | rnp->boost_kthread_task = t; |
1300 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1299 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1301 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1300 | sp.sched_priority = RCU_KTHREAD_PRIO; |
@@ -1303,6 +1302,12 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1303 | return 0; | 1302 | return 0; |
1304 | } | 1303 | } |
1305 | 1304 | ||
1305 | static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) | ||
1306 | { | ||
1307 | if (rnp->boost_kthread_task) | ||
1308 | wake_up_process(rnp->boost_kthread_task); | ||
1309 | } | ||
1310 | |||
1306 | #else /* #ifdef CONFIG_RCU_BOOST */ | 1311 | #else /* #ifdef CONFIG_RCU_BOOST */ |
1307 | 1312 | ||
1308 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | 1313 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) |
@@ -1326,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1326 | return 0; | 1331 | return 0; |
1327 | } | 1332 | } |
1328 | 1333 | ||
1334 | static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) | ||
1335 | { | ||
1336 | } | ||
1337 | |||
1329 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | 1338 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ |
1330 | 1339 | ||
1331 | #ifndef CONFIG_SMP | 1340 | #ifndef CONFIG_SMP |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4fc92445a29c..f175d98bd355 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -938,6 +938,12 @@ static struct ctl_table kern_table[] = { | |||
938 | }, | 938 | }, |
939 | #endif | 939 | #endif |
940 | #ifdef CONFIG_PERF_EVENTS | 940 | #ifdef CONFIG_PERF_EVENTS |
941 | /* | ||
942 | * User-space scripts rely on the existence of this file | ||
943 | * as a feature check for perf_events being enabled. | ||
944 | * | ||
945 | * So it's an ABI, do not remove! | ||
946 | */ | ||
941 | { | 947 | { |
942 | .procname = "perf_event_paranoid", | 948 | .procname = "perf_event_paranoid", |
943 | .data = &sysctl_perf_event_paranoid, | 949 | .data = &sysctl_perf_event_paranoid, |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a4e1db3f1981..4e8985acdab8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2247,10 +2247,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2247 | 2247 | ||
2248 | if (should_fail_alloc_page(gfp_mask, order)) | 2248 | if (should_fail_alloc_page(gfp_mask, order)) |
2249 | return NULL; | 2249 | return NULL; |
2250 | #ifndef CONFIG_ZONE_DMA | ||
2251 | if (WARN_ON_ONCE(gfp_mask & __GFP_DMA)) | ||
2252 | return NULL; | ||
2253 | #endif | ||
2254 | 2250 | ||
2255 | /* | 2251 | /* |
2256 | * Check the zones suitable for the gfp_mask contain at least one | 2252 | * Check the zones suitable for the gfp_mask contain at least one |
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index ae3a698415e6..ec1bcecf2cda 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c | |||
@@ -593,7 +593,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name, | |||
593 | sa.aad.op = OP_SETPROCATTR; | 593 | sa.aad.op = OP_SETPROCATTR; |
594 | sa.aad.info = name; | 594 | sa.aad.info = name; |
595 | sa.aad.error = -EINVAL; | 595 | sa.aad.error = -EINVAL; |
596 | return aa_audit(AUDIT_APPARMOR_DENIED, NULL, GFP_KERNEL, | 596 | return aa_audit(AUDIT_APPARMOR_DENIED, |
597 | __aa_current_profile(), GFP_KERNEL, | ||
597 | &sa, NULL); | 598 | &sa, NULL); |
598 | } | 599 | } |
599 | } else if (strcmp(name, "exec") == 0) { | 600 | } else if (strcmp(name, "exec") == 0) { |
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index df0c6d2c3860..74d3331bdaf9 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c | |||
@@ -198,6 +198,14 @@ const struct option longopts[] = { | |||
198 | .val = 'h', | 198 | .val = 'h', |
199 | }, | 199 | }, |
200 | { | 200 | { |
201 | .name = "event-idx", | ||
202 | .val = 'E', | ||
203 | }, | ||
204 | { | ||
205 | .name = "no-event-idx", | ||
206 | .val = 'e', | ||
207 | }, | ||
208 | { | ||
201 | .name = "indirect", | 209 | .name = "indirect", |
202 | .val = 'I', | 210 | .val = 'I', |
203 | }, | 211 | }, |
@@ -211,13 +219,17 @@ const struct option longopts[] = { | |||
211 | 219 | ||
212 | static void help() | 220 | static void help() |
213 | { | 221 | { |
214 | fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n"); | 222 | fprintf(stderr, "Usage: virtio_test [--help]" |
223 | " [--no-indirect]" | ||
224 | " [--no-event-idx]" | ||
225 | "\n"); | ||
215 | } | 226 | } |
216 | 227 | ||
217 | int main(int argc, char **argv) | 228 | int main(int argc, char **argv) |
218 | { | 229 | { |
219 | struct vdev_info dev; | 230 | struct vdev_info dev; |
220 | unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC; | 231 | unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | |
232 | (1ULL << VIRTIO_RING_F_EVENT_IDX); | ||
221 | int o; | 233 | int o; |
222 | 234 | ||
223 | for (;;) { | 235 | for (;;) { |
@@ -228,6 +240,9 @@ int main(int argc, char **argv) | |||
228 | case '?': | 240 | case '?': |
229 | help(); | 241 | help(); |
230 | exit(2); | 242 | exit(2); |
243 | case 'e': | ||
244 | features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); | ||
245 | break; | ||
231 | case 'h': | 246 | case 'h': |
232 | help(); | 247 | help(); |
233 | goto done; | 248 | goto done; |