diff options
| -rw-r--r-- | Documentation/virtual/kvm/api.txt | 13 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 4 | ||||
| -rw-r--r-- | include/linux/kvm.h | 12 | ||||
| -rw-r--r-- | include/linux/kvm_host.h | 5 | ||||
| -rw-r--r-- | virt/kvm/eventfd.c | 150 | ||||
| -rw-r--r-- | virt/kvm/irq_comm.c | 6 |
6 files changed, 184 insertions, 6 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 36befa775fdb..f6ec3a92e621 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
| @@ -1950,6 +1950,19 @@ the guest using the specified gsi pin. The irqfd is removed using | |||
| 1950 | the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd | 1950 | the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd |
| 1951 | and kvm_irqfd.gsi. | 1951 | and kvm_irqfd.gsi. |
| 1952 | 1952 | ||
| 1953 | With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify | ||
| 1954 | mechanism allowing emulation of level-triggered, irqfd-based | ||
| 1955 | interrupts. When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an | ||
| 1956 | additional eventfd in the kvm_irqfd.resamplefd field. When operating | ||
| 1957 | in resample mode, posting of an interrupt through kvm_irq.fd asserts | ||
| 1958 | the specified gsi in the irqchip. When the irqchip is resampled, such | ||
| 1959 | as from an EOI, the gsi is de-asserted and the user is notifed via | ||
| 1960 | kvm_irqfd.resamplefd. It is the user's responsibility to re-queue | ||
| 1961 | the interrupt if the device making use of it still requires service. | ||
| 1962 | Note that closing the resamplefd is not sufficient to disable the | ||
| 1963 | irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment | ||
| 1964 | and need not be specified with KVM_IRQFD_FLAG_DEASSIGN. | ||
| 1965 | |||
| 1953 | 4.76 KVM_PPC_ALLOCATE_HTAB | 1966 | 4.76 KVM_PPC_ALLOCATE_HTAB |
| 1954 | 1967 | ||
| 1955 | Capability: KVM_CAP_PPC_ALLOC_HTAB | 1968 | Capability: KVM_CAP_PPC_ALLOC_HTAB |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc2a0a132e4b..7d44204c6041 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -2176,6 +2176,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 2176 | case KVM_CAP_PCI_2_3: | 2176 | case KVM_CAP_PCI_2_3: |
| 2177 | case KVM_CAP_KVMCLOCK_CTRL: | 2177 | case KVM_CAP_KVMCLOCK_CTRL: |
| 2178 | case KVM_CAP_READONLY_MEM: | 2178 | case KVM_CAP_READONLY_MEM: |
| 2179 | case KVM_CAP_IRQFD_RESAMPLE: | ||
| 2179 | r = 1; | 2180 | r = 1; |
| 2180 | break; | 2181 | break; |
| 2181 | case KVM_CAP_COALESCED_MMIO: | 2182 | case KVM_CAP_COALESCED_MMIO: |
| @@ -6268,6 +6269,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 6268 | 6269 | ||
| 6269 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 6270 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
| 6270 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); | 6271 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); |
| 6272 | /* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */ | ||
| 6273 | set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
| 6274 | &kvm->arch.irq_sources_bitmap); | ||
| 6271 | 6275 | ||
| 6272 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); | 6276 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); |
| 6273 | mutex_init(&kvm->arch.apic_map_lock); | 6277 | mutex_init(&kvm->arch.apic_map_lock); |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index d808694673f9..0a6d6ba44c85 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
| @@ -625,6 +625,7 @@ struct kvm_ppc_smmu_info { | |||
| 625 | #ifdef __KVM_HAVE_READONLY_MEM | 625 | #ifdef __KVM_HAVE_READONLY_MEM |
| 626 | #define KVM_CAP_READONLY_MEM 81 | 626 | #define KVM_CAP_READONLY_MEM 81 |
| 627 | #endif | 627 | #endif |
| 628 | #define KVM_CAP_IRQFD_RESAMPLE 82 | ||
| 628 | 629 | ||
| 629 | #ifdef KVM_CAP_IRQ_ROUTING | 630 | #ifdef KVM_CAP_IRQ_ROUTING |
| 630 | 631 | ||
| @@ -690,12 +691,21 @@ struct kvm_xen_hvm_config { | |||
| 690 | #endif | 691 | #endif |
| 691 | 692 | ||
| 692 | #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) | 693 | #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) |
| 694 | /* | ||
| 695 | * Available with KVM_CAP_IRQFD_RESAMPLE | ||
| 696 | * | ||
| 697 | * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies | ||
| 698 | * the irqfd to operate in resampling mode for level triggered interrupt | ||
| 699 | * emlation. See Documentation/virtual/kvm/api.txt. | ||
| 700 | */ | ||
| 701 | #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) | ||
| 693 | 702 | ||
| 694 | struct kvm_irqfd { | 703 | struct kvm_irqfd { |
| 695 | __u32 fd; | 704 | __u32 fd; |
| 696 | __u32 gsi; | 705 | __u32 gsi; |
| 697 | __u32 flags; | 706 | __u32 flags; |
| 698 | __u8 pad[20]; | 707 | __u32 resamplefd; |
| 708 | __u8 pad[16]; | ||
| 699 | }; | 709 | }; |
| 700 | 710 | ||
| 701 | struct kvm_clock_data { | 711 | struct kvm_clock_data { |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 80bfc880921e..2850656e2e96 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -119,7 +119,8 @@ static inline bool is_error_page(struct page *page) | |||
| 119 | #define KVM_REQ_PMU 16 | 119 | #define KVM_REQ_PMU 16 |
| 120 | #define KVM_REQ_PMI 17 | 120 | #define KVM_REQ_PMI 17 |
| 121 | 121 | ||
| 122 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 122 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
| 123 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 | ||
| 123 | 124 | ||
| 124 | struct kvm; | 125 | struct kvm; |
| 125 | struct kvm_vcpu; | 126 | struct kvm_vcpu; |
| @@ -343,6 +344,8 @@ struct kvm { | |||
| 343 | struct { | 344 | struct { |
| 344 | spinlock_t lock; | 345 | spinlock_t lock; |
| 345 | struct list_head items; | 346 | struct list_head items; |
| 347 | struct list_head resampler_list; | ||
| 348 | struct mutex resampler_lock; | ||
| 346 | } irqfds; | 349 | } irqfds; |
| 347 | struct list_head ioeventfds; | 350 | struct list_head ioeventfds; |
| 348 | #endif | 351 | #endif |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 7d7e2aaffece..356965c9d107 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
| @@ -43,6 +43,31 @@ | |||
| 43 | * -------------------------------------------------------------------- | 43 | * -------------------------------------------------------------------- |
| 44 | */ | 44 | */ |
| 45 | 45 | ||
| 46 | /* | ||
| 47 | * Resampling irqfds are a special variety of irqfds used to emulate | ||
| 48 | * level triggered interrupts. The interrupt is asserted on eventfd | ||
| 49 | * trigger. On acknowledgement through the irq ack notifier, the | ||
| 50 | * interrupt is de-asserted and userspace is notified through the | ||
| 51 | * resamplefd. All resamplers on the same gsi are de-asserted | ||
| 52 | * together, so we don't need to track the state of each individual | ||
| 53 | * user. We can also therefore share the same irq source ID. | ||
| 54 | */ | ||
| 55 | struct _irqfd_resampler { | ||
| 56 | struct kvm *kvm; | ||
| 57 | /* | ||
| 58 | * List of resampling struct _irqfd objects sharing this gsi. | ||
| 59 | * RCU list modified under kvm->irqfds.resampler_lock | ||
| 60 | */ | ||
| 61 | struct list_head list; | ||
| 62 | struct kvm_irq_ack_notifier notifier; | ||
| 63 | /* | ||
| 64 | * Entry in list of kvm->irqfd.resampler_list. Use for sharing | ||
| 65 | * resamplers among irqfds on the same gsi. | ||
| 66 | * Accessed and modified under kvm->irqfds.resampler_lock | ||
| 67 | */ | ||
| 68 | struct list_head link; | ||
| 69 | }; | ||
| 70 | |||
| 46 | struct _irqfd { | 71 | struct _irqfd { |
| 47 | /* Used for MSI fast-path */ | 72 | /* Used for MSI fast-path */ |
| 48 | struct kvm *kvm; | 73 | struct kvm *kvm; |
| @@ -52,6 +77,12 @@ struct _irqfd { | |||
| 52 | /* Used for level IRQ fast-path */ | 77 | /* Used for level IRQ fast-path */ |
| 53 | int gsi; | 78 | int gsi; |
| 54 | struct work_struct inject; | 79 | struct work_struct inject; |
| 80 | /* The resampler used by this irqfd (resampler-only) */ | ||
| 81 | struct _irqfd_resampler *resampler; | ||
| 82 | /* Eventfd notified on resample (resampler-only) */ | ||
| 83 | struct eventfd_ctx *resamplefd; | ||
| 84 | /* Entry in list of irqfds for a resampler (resampler-only) */ | ||
| 85 | struct list_head resampler_link; | ||
| 55 | /* Used for setup/shutdown */ | 86 | /* Used for setup/shutdown */ |
| 56 | struct eventfd_ctx *eventfd; | 87 | struct eventfd_ctx *eventfd; |
| 57 | struct list_head list; | 88 | struct list_head list; |
| @@ -67,8 +98,58 @@ irqfd_inject(struct work_struct *work) | |||
| 67 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 98 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); |
| 68 | struct kvm *kvm = irqfd->kvm; | 99 | struct kvm *kvm = irqfd->kvm; |
| 69 | 100 | ||
| 70 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | 101 | if (!irqfd->resampler) { |
| 71 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | 102 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); |
| 103 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | ||
| 104 | } else | ||
| 105 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
| 106 | irqfd->gsi, 1); | ||
| 107 | } | ||
| 108 | |||
| 109 | /* | ||
| 110 | * Since resampler irqfds share an IRQ source ID, we de-assert once | ||
| 111 | * then notify all of the resampler irqfds using this GSI. We can't | ||
| 112 | * do multiple de-asserts or we risk racing with incoming re-asserts. | ||
| 113 | */ | ||
| 114 | static void | ||
| 115 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | ||
| 116 | { | ||
| 117 | struct _irqfd_resampler *resampler; | ||
| 118 | struct _irqfd *irqfd; | ||
| 119 | |||
| 120 | resampler = container_of(kian, struct _irqfd_resampler, notifier); | ||
| 121 | |||
| 122 | kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
| 123 | resampler->notifier.gsi, 0); | ||
| 124 | |||
| 125 | rcu_read_lock(); | ||
| 126 | |||
| 127 | list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) | ||
| 128 | eventfd_signal(irqfd->resamplefd, 1); | ||
| 129 | |||
| 130 | rcu_read_unlock(); | ||
| 131 | } | ||
| 132 | |||
| 133 | static void | ||
| 134 | irqfd_resampler_shutdown(struct _irqfd *irqfd) | ||
| 135 | { | ||
| 136 | struct _irqfd_resampler *resampler = irqfd->resampler; | ||
| 137 | struct kvm *kvm = resampler->kvm; | ||
| 138 | |||
| 139 | mutex_lock(&kvm->irqfds.resampler_lock); | ||
| 140 | |||
| 141 | list_del_rcu(&irqfd->resampler_link); | ||
| 142 | synchronize_rcu(); | ||
| 143 | |||
| 144 | if (list_empty(&resampler->list)) { | ||
| 145 | list_del(&resampler->link); | ||
| 146 | kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); | ||
| 147 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
| 148 | resampler->notifier.gsi, 0); | ||
| 149 | kfree(resampler); | ||
| 150 | } | ||
| 151 | |||
| 152 | mutex_unlock(&kvm->irqfds.resampler_lock); | ||
| 72 | } | 153 | } |
| 73 | 154 | ||
| 74 | /* | 155 | /* |
| @@ -92,6 +173,11 @@ irqfd_shutdown(struct work_struct *work) | |||
| 92 | */ | 173 | */ |
| 93 | flush_work_sync(&irqfd->inject); | 174 | flush_work_sync(&irqfd->inject); |
| 94 | 175 | ||
| 176 | if (irqfd->resampler) { | ||
| 177 | irqfd_resampler_shutdown(irqfd); | ||
| 178 | eventfd_ctx_put(irqfd->resamplefd); | ||
| 179 | } | ||
| 180 | |||
| 95 | /* | 181 | /* |
| 96 | * It is now safe to release the object's resources | 182 | * It is now safe to release the object's resources |
| 97 | */ | 183 | */ |
| @@ -203,7 +289,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
| 203 | struct kvm_irq_routing_table *irq_rt; | 289 | struct kvm_irq_routing_table *irq_rt; |
| 204 | struct _irqfd *irqfd, *tmp; | 290 | struct _irqfd *irqfd, *tmp; |
| 205 | struct file *file = NULL; | 291 | struct file *file = NULL; |
| 206 | struct eventfd_ctx *eventfd = NULL; | 292 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; |
| 207 | int ret; | 293 | int ret; |
| 208 | unsigned int events; | 294 | unsigned int events; |
| 209 | 295 | ||
| @@ -231,6 +317,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
| 231 | 317 | ||
| 232 | irqfd->eventfd = eventfd; | 318 | irqfd->eventfd = eventfd; |
| 233 | 319 | ||
| 320 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { | ||
| 321 | struct _irqfd_resampler *resampler; | ||
| 322 | |||
| 323 | resamplefd = eventfd_ctx_fdget(args->resamplefd); | ||
| 324 | if (IS_ERR(resamplefd)) { | ||
| 325 | ret = PTR_ERR(resamplefd); | ||
| 326 | goto fail; | ||
| 327 | } | ||
| 328 | |||
| 329 | irqfd->resamplefd = resamplefd; | ||
| 330 | INIT_LIST_HEAD(&irqfd->resampler_link); | ||
| 331 | |||
| 332 | mutex_lock(&kvm->irqfds.resampler_lock); | ||
| 333 | |||
| 334 | list_for_each_entry(resampler, | ||
| 335 | &kvm->irqfds.resampler_list, list) { | ||
| 336 | if (resampler->notifier.gsi == irqfd->gsi) { | ||
| 337 | irqfd->resampler = resampler; | ||
| 338 | break; | ||
| 339 | } | ||
| 340 | } | ||
| 341 | |||
| 342 | if (!irqfd->resampler) { | ||
| 343 | resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); | ||
| 344 | if (!resampler) { | ||
| 345 | ret = -ENOMEM; | ||
| 346 | mutex_unlock(&kvm->irqfds.resampler_lock); | ||
| 347 | goto fail; | ||
| 348 | } | ||
| 349 | |||
| 350 | resampler->kvm = kvm; | ||
| 351 | INIT_LIST_HEAD(&resampler->list); | ||
| 352 | resampler->notifier.gsi = irqfd->gsi; | ||
| 353 | resampler->notifier.irq_acked = irqfd_resampler_ack; | ||
| 354 | INIT_LIST_HEAD(&resampler->link); | ||
| 355 | |||
| 356 | list_add(&resampler->link, &kvm->irqfds.resampler_list); | ||
| 357 | kvm_register_irq_ack_notifier(kvm, | ||
| 358 | &resampler->notifier); | ||
| 359 | irqfd->resampler = resampler; | ||
| 360 | } | ||
| 361 | |||
| 362 | list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); | ||
| 363 | synchronize_rcu(); | ||
| 364 | |||
| 365 | mutex_unlock(&kvm->irqfds.resampler_lock); | ||
| 366 | } | ||
| 367 | |||
| 234 | /* | 368 | /* |
| 235 | * Install our own custom wake-up handling so we are notified via | 369 | * Install our own custom wake-up handling so we are notified via |
| 236 | * a callback whenever someone signals the underlying eventfd | 370 | * a callback whenever someone signals the underlying eventfd |
| @@ -276,6 +410,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
| 276 | return 0; | 410 | return 0; |
| 277 | 411 | ||
| 278 | fail: | 412 | fail: |
| 413 | if (irqfd->resampler) | ||
| 414 | irqfd_resampler_shutdown(irqfd); | ||
| 415 | |||
| 416 | if (resamplefd && !IS_ERR(resamplefd)) | ||
| 417 | eventfd_ctx_put(resamplefd); | ||
| 418 | |||
| 279 | if (eventfd && !IS_ERR(eventfd)) | 419 | if (eventfd && !IS_ERR(eventfd)) |
| 280 | eventfd_ctx_put(eventfd); | 420 | eventfd_ctx_put(eventfd); |
| 281 | 421 | ||
| @@ -291,6 +431,8 @@ kvm_eventfd_init(struct kvm *kvm) | |||
| 291 | { | 431 | { |
| 292 | spin_lock_init(&kvm->irqfds.lock); | 432 | spin_lock_init(&kvm->irqfds.lock); |
| 293 | INIT_LIST_HEAD(&kvm->irqfds.items); | 433 | INIT_LIST_HEAD(&kvm->irqfds.items); |
| 434 | INIT_LIST_HEAD(&kvm->irqfds.resampler_list); | ||
| 435 | mutex_init(&kvm->irqfds.resampler_lock); | ||
| 294 | INIT_LIST_HEAD(&kvm->ioeventfds); | 436 | INIT_LIST_HEAD(&kvm->ioeventfds); |
| 295 | } | 437 | } |
| 296 | 438 | ||
| @@ -340,7 +482,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) | |||
| 340 | int | 482 | int |
| 341 | kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) | 483 | kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) |
| 342 | { | 484 | { |
| 343 | if (args->flags & ~KVM_IRQFD_FLAG_DEASSIGN) | 485 | if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) |
| 344 | return -EINVAL; | 486 | return -EINVAL; |
| 345 | 487 | ||
| 346 | if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) | 488 | if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 3ca89c451d6b..2eb58af7ee99 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
| @@ -228,6 +228,9 @@ int kvm_request_irq_source_id(struct kvm *kvm) | |||
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 230 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
| 231 | #ifdef CONFIG_X86 | ||
| 232 | ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); | ||
| 233 | #endif | ||
| 231 | set_bit(irq_source_id, bitmap); | 234 | set_bit(irq_source_id, bitmap); |
| 232 | unlock: | 235 | unlock: |
| 233 | mutex_unlock(&kvm->irq_lock); | 236 | mutex_unlock(&kvm->irq_lock); |
| @@ -238,6 +241,9 @@ unlock: | |||
| 238 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | 241 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) |
| 239 | { | 242 | { |
| 240 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 243 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
| 244 | #ifdef CONFIG_X86 | ||
| 245 | ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); | ||
| 246 | #endif | ||
| 241 | 247 | ||
| 242 | mutex_lock(&kvm->irq_lock); | 248 | mutex_lock(&kvm->irq_lock); |
| 243 | if (irq_source_id < 0 || | 249 | if (irq_source_id < 0 || |
