diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
| -rw-r--r-- | virt/kvm/kvm_main.c | 470 |
1 files changed, 375 insertions, 95 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a87f45edfae8..3a5a08298aab 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -47,6 +47,10 @@ | |||
| 47 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
| 48 | #include <asm/pgtable.h> | 48 | #include <asm/pgtable.h> |
| 49 | 49 | ||
| 50 | #ifdef CONFIG_X86 | ||
| 51 | #include <asm/msidef.h> | ||
| 52 | #endif | ||
| 53 | |||
| 50 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 54 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
| 51 | #include "coalesced_mmio.h" | 55 | #include "coalesced_mmio.h" |
| 52 | #endif | 56 | #endif |
| @@ -60,10 +64,13 @@ | |||
| 60 | MODULE_AUTHOR("Qumranet"); | 64 | MODULE_AUTHOR("Qumranet"); |
| 61 | MODULE_LICENSE("GPL"); | 65 | MODULE_LICENSE("GPL"); |
| 62 | 66 | ||
| 67 | static int msi2intx = 1; | ||
| 68 | module_param(msi2intx, bool, 0); | ||
| 69 | |||
| 63 | DEFINE_SPINLOCK(kvm_lock); | 70 | DEFINE_SPINLOCK(kvm_lock); |
| 64 | LIST_HEAD(vm_list); | 71 | LIST_HEAD(vm_list); |
| 65 | 72 | ||
| 66 | static cpumask_t cpus_hardware_enabled; | 73 | static cpumask_var_t cpus_hardware_enabled; |
| 67 | 74 | ||
| 68 | struct kmem_cache *kvm_vcpu_cache; | 75 | struct kmem_cache *kvm_vcpu_cache; |
| 69 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | 76 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); |
| @@ -75,9 +82,60 @@ struct dentry *kvm_debugfs_dir; | |||
| 75 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 82 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
| 76 | unsigned long arg); | 83 | unsigned long arg); |
| 77 | 84 | ||
| 78 | bool kvm_rebooting; | 85 | static bool kvm_rebooting; |
| 79 | 86 | ||
| 80 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | 87 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT |
| 88 | |||
| 89 | #ifdef CONFIG_X86 | ||
| 90 | static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) | ||
| 91 | { | ||
| 92 | int vcpu_id; | ||
| 93 | struct kvm_vcpu *vcpu; | ||
| 94 | struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm); | ||
| 95 | int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK) | ||
| 96 | >> MSI_ADDR_DEST_ID_SHIFT; | ||
| 97 | int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK) | ||
| 98 | >> MSI_DATA_VECTOR_SHIFT; | ||
| 99 | int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, | ||
| 100 | (unsigned long *)&dev->guest_msi.address_lo); | ||
| 101 | int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, | ||
| 102 | (unsigned long *)&dev->guest_msi.data); | ||
| 103 | int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, | ||
| 104 | (unsigned long *)&dev->guest_msi.data); | ||
| 105 | u32 deliver_bitmask; | ||
| 106 | |||
| 107 | BUG_ON(!ioapic); | ||
| 108 | |||
| 109 | deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, | ||
| 110 | dest_id, dest_mode); | ||
| 111 | /* IOAPIC delivery mode value is the same as MSI here */ | ||
| 112 | switch (delivery_mode) { | ||
| 113 | case IOAPIC_LOWEST_PRIORITY: | ||
| 114 | vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, | ||
| 115 | deliver_bitmask); | ||
| 116 | if (vcpu != NULL) | ||
| 117 | kvm_apic_set_irq(vcpu, vector, trig_mode); | ||
| 118 | else | ||
| 119 | printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); | ||
| 120 | break; | ||
| 121 | case IOAPIC_FIXED: | ||
| 122 | for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { | ||
| 123 | if (!(deliver_bitmask & (1 << vcpu_id))) | ||
| 124 | continue; | ||
| 125 | deliver_bitmask &= ~(1 << vcpu_id); | ||
| 126 | vcpu = ioapic->kvm->vcpus[vcpu_id]; | ||
| 127 | if (vcpu) | ||
| 128 | kvm_apic_set_irq(vcpu, vector, trig_mode); | ||
| 129 | } | ||
| 130 | break; | ||
| 131 | default: | ||
| 132 | printk(KERN_INFO "kvm: unsupported MSI delivery mode\n"); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | #else | ||
| 136 | static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {} | ||
| 137 | #endif | ||
| 138 | |||
| 81 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | 139 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, |
| 82 | int assigned_dev_id) | 140 | int assigned_dev_id) |
| 83 | { | 141 | { |
| @@ -104,9 +162,16 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | |||
| 104 | * finer-grained lock, update this | 162 | * finer-grained lock, update this |
| 105 | */ | 163 | */ |
| 106 | mutex_lock(&assigned_dev->kvm->lock); | 164 | mutex_lock(&assigned_dev->kvm->lock); |
| 107 | kvm_set_irq(assigned_dev->kvm, | 165 | if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) |
| 108 | assigned_dev->irq_source_id, | 166 | kvm_set_irq(assigned_dev->kvm, |
| 109 | assigned_dev->guest_irq, 1); | 167 | assigned_dev->irq_source_id, |
| 168 | assigned_dev->guest_irq, 1); | ||
| 169 | else if (assigned_dev->irq_requested_type & | ||
| 170 | KVM_ASSIGNED_DEV_GUEST_MSI) { | ||
| 171 | assigned_device_msi_dispatch(assigned_dev); | ||
| 172 | enable_irq(assigned_dev->host_irq); | ||
| 173 | assigned_dev->host_irq_disabled = false; | ||
| 174 | } | ||
| 110 | mutex_unlock(&assigned_dev->kvm->lock); | 175 | mutex_unlock(&assigned_dev->kvm->lock); |
| 111 | kvm_put_kvm(assigned_dev->kvm); | 176 | kvm_put_kvm(assigned_dev->kvm); |
| 112 | } | 177 | } |
| @@ -117,8 +182,12 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | |||
| 117 | (struct kvm_assigned_dev_kernel *) dev_id; | 182 | (struct kvm_assigned_dev_kernel *) dev_id; |
| 118 | 183 | ||
| 119 | kvm_get_kvm(assigned_dev->kvm); | 184 | kvm_get_kvm(assigned_dev->kvm); |
| 185 | |||
| 120 | schedule_work(&assigned_dev->interrupt_work); | 186 | schedule_work(&assigned_dev->interrupt_work); |
| 187 | |||
| 121 | disable_irq_nosync(irq); | 188 | disable_irq_nosync(irq); |
| 189 | assigned_dev->host_irq_disabled = true; | ||
| 190 | |||
| 122 | return IRQ_HANDLED; | 191 | return IRQ_HANDLED; |
| 123 | } | 192 | } |
| 124 | 193 | ||
| @@ -132,19 +201,32 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
| 132 | 201 | ||
| 133 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | 202 | dev = container_of(kian, struct kvm_assigned_dev_kernel, |
| 134 | ack_notifier); | 203 | ack_notifier); |
| 204 | |||
| 135 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | 205 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); |
| 136 | enable_irq(dev->host_irq); | 206 | |
| 207 | /* The guest irq may be shared so this ack may be | ||
| 208 | * from another device. | ||
| 209 | */ | ||
| 210 | if (dev->host_irq_disabled) { | ||
| 211 | enable_irq(dev->host_irq); | ||
| 212 | dev->host_irq_disabled = false; | ||
| 213 | } | ||
| 137 | } | 214 | } |
| 138 | 215 | ||
| 139 | static void kvm_free_assigned_device(struct kvm *kvm, | 216 | static void kvm_free_assigned_irq(struct kvm *kvm, |
| 140 | struct kvm_assigned_dev_kernel | 217 | struct kvm_assigned_dev_kernel *assigned_dev) |
| 141 | *assigned_dev) | ||
| 142 | { | 218 | { |
| 143 | if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) | 219 | if (!irqchip_in_kernel(kvm)) |
| 144 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | 220 | return; |
| 221 | |||
| 222 | kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); | ||
| 145 | 223 | ||
| 146 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | 224 | if (assigned_dev->irq_source_id != -1) |
| 147 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | 225 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); |
| 226 | assigned_dev->irq_source_id = -1; | ||
| 227 | |||
| 228 | if (!assigned_dev->irq_requested_type) | ||
| 229 | return; | ||
| 148 | 230 | ||
| 149 | if (cancel_work_sync(&assigned_dev->interrupt_work)) | 231 | if (cancel_work_sync(&assigned_dev->interrupt_work)) |
| 150 | /* We had pending work. That means we will have to take | 232 | /* We had pending work. That means we will have to take |
| @@ -152,6 +234,23 @@ static void kvm_free_assigned_device(struct kvm *kvm, | |||
| 152 | */ | 234 | */ |
| 153 | kvm_put_kvm(kvm); | 235 | kvm_put_kvm(kvm); |
| 154 | 236 | ||
| 237 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
| 238 | |||
| 239 | if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) | ||
| 240 | pci_disable_msi(assigned_dev->dev); | ||
| 241 | |||
| 242 | assigned_dev->irq_requested_type = 0; | ||
| 243 | } | ||
| 244 | |||
| 245 | |||
| 246 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
| 247 | struct kvm_assigned_dev_kernel | ||
| 248 | *assigned_dev) | ||
| 249 | { | ||
| 250 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
| 251 | |||
| 252 | pci_reset_function(assigned_dev->dev); | ||
| 253 | |||
| 155 | pci_release_regions(assigned_dev->dev); | 254 | pci_release_regions(assigned_dev->dev); |
| 156 | pci_disable_device(assigned_dev->dev); | 255 | pci_disable_device(assigned_dev->dev); |
| 157 | pci_dev_put(assigned_dev->dev); | 256 | pci_dev_put(assigned_dev->dev); |
| @@ -174,6 +273,95 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) | |||
| 174 | } | 273 | } |
| 175 | } | 274 | } |
| 176 | 275 | ||
| 276 | static int assigned_device_update_intx(struct kvm *kvm, | ||
| 277 | struct kvm_assigned_dev_kernel *adev, | ||
| 278 | struct kvm_assigned_irq *airq) | ||
| 279 | { | ||
| 280 | adev->guest_irq = airq->guest_irq; | ||
| 281 | adev->ack_notifier.gsi = airq->guest_irq; | ||
| 282 | |||
| 283 | if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) | ||
| 284 | return 0; | ||
| 285 | |||
| 286 | if (irqchip_in_kernel(kvm)) { | ||
| 287 | if (!msi2intx && | ||
| 288 | adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) { | ||
| 289 | free_irq(adev->host_irq, (void *)kvm); | ||
| 290 | pci_disable_msi(adev->dev); | ||
| 291 | } | ||
| 292 | |||
| 293 | if (!capable(CAP_SYS_RAWIO)) | ||
| 294 | return -EPERM; | ||
| 295 | |||
| 296 | if (airq->host_irq) | ||
| 297 | adev->host_irq = airq->host_irq; | ||
| 298 | else | ||
| 299 | adev->host_irq = adev->dev->irq; | ||
| 300 | |||
| 301 | /* Even though this is PCI, we don't want to use shared | ||
| 302 | * interrupts. Sharing host devices with guest-assigned devices | ||
| 303 | * on the same interrupt line is not a happy situation: there | ||
| 304 | * are going to be long delays in accepting, acking, etc. | ||
| 305 | */ | ||
| 306 | if (request_irq(adev->host_irq, kvm_assigned_dev_intr, | ||
| 307 | 0, "kvm_assigned_intx_device", (void *)adev)) | ||
| 308 | return -EIO; | ||
| 309 | } | ||
| 310 | |||
| 311 | adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX | | ||
| 312 | KVM_ASSIGNED_DEV_HOST_INTX; | ||
| 313 | return 0; | ||
| 314 | } | ||
| 315 | |||
| 316 | #ifdef CONFIG_X86 | ||
| 317 | static int assigned_device_update_msi(struct kvm *kvm, | ||
| 318 | struct kvm_assigned_dev_kernel *adev, | ||
| 319 | struct kvm_assigned_irq *airq) | ||
| 320 | { | ||
| 321 | int r; | ||
| 322 | |||
| 323 | if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { | ||
| 324 | /* x86 don't care upper address of guest msi message addr */ | ||
| 325 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; | ||
| 326 | adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; | ||
| 327 | adev->guest_msi.address_lo = airq->guest_msi.addr_lo; | ||
| 328 | adev->guest_msi.data = airq->guest_msi.data; | ||
| 329 | adev->ack_notifier.gsi = -1; | ||
| 330 | } else if (msi2intx) { | ||
| 331 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; | ||
| 332 | adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; | ||
| 333 | adev->guest_irq = airq->guest_irq; | ||
| 334 | adev->ack_notifier.gsi = airq->guest_irq; | ||
| 335 | } | ||
| 336 | |||
| 337 | if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) | ||
| 338 | return 0; | ||
| 339 | |||
| 340 | if (irqchip_in_kernel(kvm)) { | ||
| 341 | if (!msi2intx) { | ||
| 342 | if (adev->irq_requested_type & | ||
| 343 | KVM_ASSIGNED_DEV_HOST_INTX) | ||
| 344 | free_irq(adev->host_irq, (void *)adev); | ||
| 345 | |||
| 346 | r = pci_enable_msi(adev->dev); | ||
| 347 | if (r) | ||
| 348 | return r; | ||
| 349 | } | ||
| 350 | |||
| 351 | adev->host_irq = adev->dev->irq; | ||
| 352 | if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, | ||
| 353 | "kvm_assigned_msi_device", (void *)adev)) | ||
| 354 | return -EIO; | ||
| 355 | } | ||
| 356 | |||
| 357 | if (!msi2intx) | ||
| 358 | adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; | ||
| 359 | |||
| 360 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; | ||
| 361 | return 0; | ||
| 362 | } | ||
| 363 | #endif | ||
| 364 | |||
| 177 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | 365 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, |
| 178 | struct kvm_assigned_irq | 366 | struct kvm_assigned_irq |
| 179 | *assigned_irq) | 367 | *assigned_irq) |
| @@ -190,49 +378,68 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | |||
| 190 | return -EINVAL; | 378 | return -EINVAL; |
| 191 | } | 379 | } |
| 192 | 380 | ||
| 193 | if (match->irq_requested) { | 381 | if (!match->irq_requested_type) { |
| 194 | match->guest_irq = assigned_irq->guest_irq; | 382 | INIT_WORK(&match->interrupt_work, |
| 195 | match->ack_notifier.gsi = assigned_irq->guest_irq; | 383 | kvm_assigned_dev_interrupt_work_handler); |
| 196 | mutex_unlock(&kvm->lock); | 384 | if (irqchip_in_kernel(kvm)) { |
| 197 | return 0; | 385 | /* Register ack nofitier */ |
| 198 | } | 386 | match->ack_notifier.gsi = -1; |
| 387 | match->ack_notifier.irq_acked = | ||
| 388 | kvm_assigned_dev_ack_irq; | ||
| 389 | kvm_register_irq_ack_notifier(kvm, | ||
| 390 | &match->ack_notifier); | ||
| 391 | |||
| 392 | /* Request IRQ source ID */ | ||
| 393 | r = kvm_request_irq_source_id(kvm); | ||
| 394 | if (r < 0) | ||
| 395 | goto out_release; | ||
| 396 | else | ||
| 397 | match->irq_source_id = r; | ||
| 199 | 398 | ||
| 200 | INIT_WORK(&match->interrupt_work, | 399 | #ifdef CONFIG_X86 |
| 201 | kvm_assigned_dev_interrupt_work_handler); | 400 | /* Determine host device irq type, we can know the |
| 401 | * result from dev->msi_enabled */ | ||
| 402 | if (msi2intx) | ||
| 403 | pci_enable_msi(match->dev); | ||
| 404 | #endif | ||
| 405 | } | ||
| 406 | } | ||
| 202 | 407 | ||
| 203 | if (irqchip_in_kernel(kvm)) { | 408 | if ((!msi2intx && |
| 204 | if (!capable(CAP_SYS_RAWIO)) { | 409 | (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) || |
| 205 | r = -EPERM; | 410 | (msi2intx && match->dev->msi_enabled)) { |
| 411 | #ifdef CONFIG_X86 | ||
| 412 | r = assigned_device_update_msi(kvm, match, assigned_irq); | ||
| 413 | if (r) { | ||
| 414 | printk(KERN_WARNING "kvm: failed to enable " | ||
| 415 | "MSI device!\n"); | ||
| 206 | goto out_release; | 416 | goto out_release; |
| 207 | } | 417 | } |
| 208 | 418 | #else | |
| 209 | if (assigned_irq->host_irq) | 419 | r = -ENOTTY; |
| 210 | match->host_irq = assigned_irq->host_irq; | 420 | #endif |
| 211 | else | 421 | } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) { |
| 212 | match->host_irq = match->dev->irq; | 422 | /* Host device IRQ 0 means don't support INTx */ |
| 213 | match->guest_irq = assigned_irq->guest_irq; | 423 | if (!msi2intx) { |
| 214 | match->ack_notifier.gsi = assigned_irq->guest_irq; | 424 | printk(KERN_WARNING |
| 215 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | 425 | "kvm: wait device to enable MSI!\n"); |
| 216 | kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); | 426 | r = 0; |
| 217 | r = kvm_request_irq_source_id(kvm); | 427 | } else { |
| 218 | if (r < 0) | 428 | printk(KERN_WARNING |
| 429 | "kvm: failed to enable MSI device!\n"); | ||
| 430 | r = -ENOTTY; | ||
| 219 | goto out_release; | 431 | goto out_release; |
| 220 | else | 432 | } |
| 221 | match->irq_source_id = r; | 433 | } else { |
| 222 | 434 | /* Non-sharing INTx mode */ | |
| 223 | /* Even though this is PCI, we don't want to use shared | 435 | r = assigned_device_update_intx(kvm, match, assigned_irq); |
| 224 | * interrupts. Sharing host devices with guest-assigned devices | 436 | if (r) { |
| 225 | * on the same interrupt line is not a happy situation: there | 437 | printk(KERN_WARNING "kvm: failed to enable " |
| 226 | * are going to be long delays in accepting, acking, etc. | 438 | "INTx device!\n"); |
| 227 | */ | ||
| 228 | if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, | ||
| 229 | "kvm_assigned_device", (void *)match)) { | ||
| 230 | r = -EIO; | ||
| 231 | goto out_release; | 439 | goto out_release; |
| 232 | } | 440 | } |
| 233 | } | 441 | } |
| 234 | 442 | ||
| 235 | match->irq_requested = true; | ||
| 236 | mutex_unlock(&kvm->lock); | 443 | mutex_unlock(&kvm->lock); |
| 237 | return r; | 444 | return r; |
| 238 | out_release: | 445 | out_release: |
| @@ -283,17 +490,26 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
| 283 | __func__); | 490 | __func__); |
| 284 | goto out_disable; | 491 | goto out_disable; |
| 285 | } | 492 | } |
| 493 | |||
| 494 | pci_reset_function(dev); | ||
| 495 | |||
| 286 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | 496 | match->assigned_dev_id = assigned_dev->assigned_dev_id; |
| 287 | match->host_busnr = assigned_dev->busnr; | 497 | match->host_busnr = assigned_dev->busnr; |
| 288 | match->host_devfn = assigned_dev->devfn; | 498 | match->host_devfn = assigned_dev->devfn; |
| 499 | match->flags = assigned_dev->flags; | ||
| 289 | match->dev = dev; | 500 | match->dev = dev; |
| 290 | 501 | match->irq_source_id = -1; | |
| 291 | match->kvm = kvm; | 502 | match->kvm = kvm; |
| 292 | 503 | ||
| 293 | list_add(&match->list, &kvm->arch.assigned_dev_head); | 504 | list_add(&match->list, &kvm->arch.assigned_dev_head); |
| 294 | 505 | ||
| 295 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | 506 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { |
| 296 | r = kvm_iommu_map_guest(kvm, match); | 507 | if (!kvm->arch.iommu_domain) { |
| 508 | r = kvm_iommu_map_guest(kvm); | ||
| 509 | if (r) | ||
| 510 | goto out_list_del; | ||
| 511 | } | ||
| 512 | r = kvm_assign_device(kvm, match); | ||
| 297 | if (r) | 513 | if (r) |
| 298 | goto out_list_del; | 514 | goto out_list_del; |
| 299 | } | 515 | } |
| @@ -315,6 +531,35 @@ out_free: | |||
| 315 | } | 531 | } |
| 316 | #endif | 532 | #endif |
| 317 | 533 | ||
| 534 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
| 535 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
| 536 | struct kvm_assigned_pci_dev *assigned_dev) | ||
| 537 | { | ||
| 538 | int r = 0; | ||
| 539 | struct kvm_assigned_dev_kernel *match; | ||
| 540 | |||
| 541 | mutex_lock(&kvm->lock); | ||
| 542 | |||
| 543 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 544 | assigned_dev->assigned_dev_id); | ||
| 545 | if (!match) { | ||
| 546 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
| 547 | "so cannot be deassigned\n", __func__); | ||
| 548 | r = -EINVAL; | ||
| 549 | goto out; | ||
| 550 | } | ||
| 551 | |||
| 552 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
| 553 | kvm_deassign_device(kvm, match); | ||
| 554 | |||
| 555 | kvm_free_assigned_device(kvm, match); | ||
| 556 | |||
| 557 | out: | ||
| 558 | mutex_unlock(&kvm->lock); | ||
| 559 | return r; | ||
| 560 | } | ||
| 561 | #endif | ||
| 562 | |||
| 318 | static inline int valid_vcpu(int n) | 563 | static inline int valid_vcpu(int n) |
| 319 | { | 564 | { |
| 320 | return likely(n >= 0 && n < KVM_MAX_VCPUS); | 565 | return likely(n >= 0 && n < KVM_MAX_VCPUS); |
| @@ -355,57 +600,48 @@ static void ack_flush(void *_completed) | |||
| 355 | { | 600 | { |
| 356 | } | 601 | } |
| 357 | 602 | ||
| 358 | void kvm_flush_remote_tlbs(struct kvm *kvm) | 603 | static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) |
| 359 | { | 604 | { |
| 360 | int i, cpu, me; | 605 | int i, cpu, me; |
| 361 | cpumask_t cpus; | 606 | cpumask_var_t cpus; |
| 607 | bool called = true; | ||
| 362 | struct kvm_vcpu *vcpu; | 608 | struct kvm_vcpu *vcpu; |
| 363 | 609 | ||
| 610 | if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) | ||
| 611 | cpumask_clear(cpus); | ||
| 612 | |||
| 364 | me = get_cpu(); | 613 | me = get_cpu(); |
| 365 | cpus_clear(cpus); | ||
| 366 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 614 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
| 367 | vcpu = kvm->vcpus[i]; | 615 | vcpu = kvm->vcpus[i]; |
| 368 | if (!vcpu) | 616 | if (!vcpu) |
| 369 | continue; | 617 | continue; |
| 370 | if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | 618 | if (test_and_set_bit(req, &vcpu->requests)) |
| 371 | continue; | 619 | continue; |
| 372 | cpu = vcpu->cpu; | 620 | cpu = vcpu->cpu; |
| 373 | if (cpu != -1 && cpu != me) | 621 | if (cpus != NULL && cpu != -1 && cpu != me) |
| 374 | cpu_set(cpu, cpus); | 622 | cpumask_set_cpu(cpu, cpus); |
| 375 | } | 623 | } |
| 376 | if (cpus_empty(cpus)) | 624 | if (unlikely(cpus == NULL)) |
| 377 | goto out; | 625 | smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); |
| 378 | ++kvm->stat.remote_tlb_flush; | 626 | else if (!cpumask_empty(cpus)) |
| 379 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | 627 | smp_call_function_many(cpus, ack_flush, NULL, 1); |
| 380 | out: | 628 | else |
| 629 | called = false; | ||
| 381 | put_cpu(); | 630 | put_cpu(); |
| 631 | free_cpumask_var(cpus); | ||
| 632 | return called; | ||
| 382 | } | 633 | } |
| 383 | 634 | ||
| 384 | void kvm_reload_remote_mmus(struct kvm *kvm) | 635 | void kvm_flush_remote_tlbs(struct kvm *kvm) |
| 385 | { | 636 | { |
| 386 | int i, cpu, me; | 637 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) |
| 387 | cpumask_t cpus; | 638 | ++kvm->stat.remote_tlb_flush; |
| 388 | struct kvm_vcpu *vcpu; | ||
| 389 | |||
| 390 | me = get_cpu(); | ||
| 391 | cpus_clear(cpus); | ||
| 392 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
| 393 | vcpu = kvm->vcpus[i]; | ||
| 394 | if (!vcpu) | ||
| 395 | continue; | ||
| 396 | if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | ||
| 397 | continue; | ||
| 398 | cpu = vcpu->cpu; | ||
| 399 | if (cpu != -1 && cpu != me) | ||
| 400 | cpu_set(cpu, cpus); | ||
| 401 | } | ||
| 402 | if (cpus_empty(cpus)) | ||
| 403 | goto out; | ||
| 404 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | ||
| 405 | out: | ||
| 406 | put_cpu(); | ||
| 407 | } | 639 | } |
| 408 | 640 | ||
| 641 | void kvm_reload_remote_mmus(struct kvm *kvm) | ||
| 642 | { | ||
| 643 | make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); | ||
| 644 | } | ||
| 409 | 645 | ||
| 410 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 646 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
| 411 | { | 647 | { |
| @@ -710,6 +946,8 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 710 | goto out; | 946 | goto out; |
| 711 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) | 947 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) |
| 712 | goto out; | 948 | goto out; |
| 949 | if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) | ||
| 950 | goto out; | ||
| 713 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | 951 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) |
| 714 | goto out; | 952 | goto out; |
| 715 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) | 953 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) |
| @@ -821,7 +1059,10 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 821 | goto out_free; | 1059 | goto out_free; |
| 822 | } | 1060 | } |
| 823 | 1061 | ||
| 824 | kvm_free_physmem_slot(&old, &new); | 1062 | kvm_free_physmem_slot(&old, npages ? &new : NULL); |
| 1063 | /* Slot deletion case: we have to update the current slot */ | ||
| 1064 | if (!npages) | ||
| 1065 | *memslot = old; | ||
| 825 | #ifdef CONFIG_DMAR | 1066 | #ifdef CONFIG_DMAR |
| 826 | /* map the pages in iommu page table */ | 1067 | /* map the pages in iommu page table */ |
| 827 | r = kvm_iommu_map_pages(kvm, base_gfn, npages); | 1068 | r = kvm_iommu_map_pages(kvm, base_gfn, npages); |
| @@ -918,7 +1159,7 @@ int kvm_is_error_hva(unsigned long addr) | |||
| 918 | } | 1159 | } |
| 919 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); | 1160 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); |
| 920 | 1161 | ||
| 921 | static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 1162 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) |
| 922 | { | 1163 | { |
| 923 | int i; | 1164 | int i; |
| 924 | 1165 | ||
| @@ -931,11 +1172,12 @@ static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
| 931 | } | 1172 | } |
| 932 | return NULL; | 1173 | return NULL; |
| 933 | } | 1174 | } |
| 1175 | EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); | ||
| 934 | 1176 | ||
| 935 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 1177 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) |
| 936 | { | 1178 | { |
| 937 | gfn = unalias_gfn(kvm, gfn); | 1179 | gfn = unalias_gfn(kvm, gfn); |
| 938 | return __gfn_to_memslot(kvm, gfn); | 1180 | return gfn_to_memslot_unaliased(kvm, gfn); |
| 939 | } | 1181 | } |
| 940 | 1182 | ||
| 941 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 1183 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
| @@ -959,7 +1201,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
| 959 | struct kvm_memory_slot *slot; | 1201 | struct kvm_memory_slot *slot; |
| 960 | 1202 | ||
| 961 | gfn = unalias_gfn(kvm, gfn); | 1203 | gfn = unalias_gfn(kvm, gfn); |
| 962 | slot = __gfn_to_memslot(kvm, gfn); | 1204 | slot = gfn_to_memslot_unaliased(kvm, gfn); |
| 963 | if (!slot) | 1205 | if (!slot) |
| 964 | return bad_hva(); | 1206 | return bad_hva(); |
| 965 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | 1207 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); |
| @@ -1210,7 +1452,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
| 1210 | struct kvm_memory_slot *memslot; | 1452 | struct kvm_memory_slot *memslot; |
| 1211 | 1453 | ||
| 1212 | gfn = unalias_gfn(kvm, gfn); | 1454 | gfn = unalias_gfn(kvm, gfn); |
| 1213 | memslot = __gfn_to_memslot(kvm, gfn); | 1455 | memslot = gfn_to_memslot_unaliased(kvm, gfn); |
| 1214 | if (memslot && memslot->dirty_bitmap) { | 1456 | if (memslot && memslot->dirty_bitmap) { |
| 1215 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1457 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
| 1216 | 1458 | ||
| @@ -1295,7 +1537,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) | |||
| 1295 | return 0; | 1537 | return 0; |
| 1296 | } | 1538 | } |
| 1297 | 1539 | ||
| 1298 | static const struct file_operations kvm_vcpu_fops = { | 1540 | static struct file_operations kvm_vcpu_fops = { |
| 1299 | .release = kvm_vcpu_release, | 1541 | .release = kvm_vcpu_release, |
| 1300 | .unlocked_ioctl = kvm_vcpu_ioctl, | 1542 | .unlocked_ioctl = kvm_vcpu_ioctl, |
| 1301 | .compat_ioctl = kvm_vcpu_ioctl, | 1543 | .compat_ioctl = kvm_vcpu_ioctl, |
| @@ -1651,6 +1893,19 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 1651 | break; | 1893 | break; |
| 1652 | } | 1894 | } |
| 1653 | #endif | 1895 | #endif |
| 1896 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
| 1897 | case KVM_DEASSIGN_PCI_DEVICE: { | ||
| 1898 | struct kvm_assigned_pci_dev assigned_dev; | ||
| 1899 | |||
| 1900 | r = -EFAULT; | ||
| 1901 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
| 1902 | goto out; | ||
| 1903 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
| 1904 | if (r) | ||
| 1905 | goto out; | ||
| 1906 | break; | ||
| 1907 | } | ||
| 1908 | #endif | ||
| 1654 | default: | 1909 | default: |
| 1655 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 1910 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
| 1656 | } | 1911 | } |
| @@ -1689,7 +1944,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 1689 | return 0; | 1944 | return 0; |
| 1690 | } | 1945 | } |
| 1691 | 1946 | ||
| 1692 | static const struct file_operations kvm_vm_fops = { | 1947 | static struct file_operations kvm_vm_fops = { |
| 1693 | .release = kvm_vm_release, | 1948 | .release = kvm_vm_release, |
| 1694 | .unlocked_ioctl = kvm_vm_ioctl, | 1949 | .unlocked_ioctl = kvm_vm_ioctl, |
| 1695 | .compat_ioctl = kvm_vm_ioctl, | 1950 | .compat_ioctl = kvm_vm_ioctl, |
| @@ -1711,6 +1966,18 @@ static int kvm_dev_ioctl_create_vm(void) | |||
| 1711 | return fd; | 1966 | return fd; |
| 1712 | } | 1967 | } |
| 1713 | 1968 | ||
| 1969 | static long kvm_dev_ioctl_check_extension_generic(long arg) | ||
| 1970 | { | ||
| 1971 | switch (arg) { | ||
| 1972 | case KVM_CAP_USER_MEMORY: | ||
| 1973 | case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: | ||
| 1974 | return 1; | ||
| 1975 | default: | ||
| 1976 | break; | ||
| 1977 | } | ||
| 1978 | return kvm_dev_ioctl_check_extension(arg); | ||
| 1979 | } | ||
| 1980 | |||
| 1714 | static long kvm_dev_ioctl(struct file *filp, | 1981 | static long kvm_dev_ioctl(struct file *filp, |
| 1715 | unsigned int ioctl, unsigned long arg) | 1982 | unsigned int ioctl, unsigned long arg) |
| 1716 | { | 1983 | { |
| @@ -1730,7 +1997,7 @@ static long kvm_dev_ioctl(struct file *filp, | |||
| 1730 | r = kvm_dev_ioctl_create_vm(); | 1997 | r = kvm_dev_ioctl_create_vm(); |
| 1731 | break; | 1998 | break; |
| 1732 | case KVM_CHECK_EXTENSION: | 1999 | case KVM_CHECK_EXTENSION: |
| 1733 | r = kvm_dev_ioctl_check_extension(arg); | 2000 | r = kvm_dev_ioctl_check_extension_generic(arg); |
| 1734 | break; | 2001 | break; |
| 1735 | case KVM_GET_VCPU_MMAP_SIZE: | 2002 | case KVM_GET_VCPU_MMAP_SIZE: |
| 1736 | r = -EINVAL; | 2003 | r = -EINVAL; |
| @@ -1771,9 +2038,9 @@ static void hardware_enable(void *junk) | |||
| 1771 | { | 2038 | { |
| 1772 | int cpu = raw_smp_processor_id(); | 2039 | int cpu = raw_smp_processor_id(); |
| 1773 | 2040 | ||
| 1774 | if (cpu_isset(cpu, cpus_hardware_enabled)) | 2041 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
| 1775 | return; | 2042 | return; |
| 1776 | cpu_set(cpu, cpus_hardware_enabled); | 2043 | cpumask_set_cpu(cpu, cpus_hardware_enabled); |
| 1777 | kvm_arch_hardware_enable(NULL); | 2044 | kvm_arch_hardware_enable(NULL); |
| 1778 | } | 2045 | } |
| 1779 | 2046 | ||
| @@ -1781,9 +2048,9 @@ static void hardware_disable(void *junk) | |||
| 1781 | { | 2048 | { |
| 1782 | int cpu = raw_smp_processor_id(); | 2049 | int cpu = raw_smp_processor_id(); |
| 1783 | 2050 | ||
| 1784 | if (!cpu_isset(cpu, cpus_hardware_enabled)) | 2051 | if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
| 1785 | return; | 2052 | return; |
| 1786 | cpu_clear(cpu, cpus_hardware_enabled); | 2053 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); |
| 1787 | kvm_arch_hardware_disable(NULL); | 2054 | kvm_arch_hardware_disable(NULL); |
| 1788 | } | 2055 | } |
| 1789 | 2056 | ||
| @@ -2017,9 +2284,14 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
| 2017 | 2284 | ||
| 2018 | bad_pfn = page_to_pfn(bad_page); | 2285 | bad_pfn = page_to_pfn(bad_page); |
| 2019 | 2286 | ||
| 2287 | if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { | ||
| 2288 | r = -ENOMEM; | ||
| 2289 | goto out_free_0; | ||
| 2290 | } | ||
| 2291 | |||
| 2020 | r = kvm_arch_hardware_setup(); | 2292 | r = kvm_arch_hardware_setup(); |
| 2021 | if (r < 0) | 2293 | if (r < 0) |
| 2022 | goto out_free_0; | 2294 | goto out_free_0a; |
| 2023 | 2295 | ||
| 2024 | for_each_online_cpu(cpu) { | 2296 | for_each_online_cpu(cpu) { |
| 2025 | smp_call_function_single(cpu, | 2297 | smp_call_function_single(cpu, |
| @@ -2053,6 +2325,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
| 2053 | } | 2325 | } |
| 2054 | 2326 | ||
| 2055 | kvm_chardev_ops.owner = module; | 2327 | kvm_chardev_ops.owner = module; |
| 2328 | kvm_vm_fops.owner = module; | ||
| 2329 | kvm_vcpu_fops.owner = module; | ||
| 2056 | 2330 | ||
| 2057 | r = misc_register(&kvm_dev); | 2331 | r = misc_register(&kvm_dev); |
| 2058 | if (r) { | 2332 | if (r) { |
| @@ -2062,6 +2336,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
| 2062 | 2336 | ||
| 2063 | kvm_preempt_ops.sched_in = kvm_sched_in; | 2337 | kvm_preempt_ops.sched_in = kvm_sched_in; |
| 2064 | kvm_preempt_ops.sched_out = kvm_sched_out; | 2338 | kvm_preempt_ops.sched_out = kvm_sched_out; |
| 2339 | #ifndef CONFIG_X86 | ||
| 2340 | msi2intx = 0; | ||
| 2341 | #endif | ||
| 2065 | 2342 | ||
| 2066 | return 0; | 2343 | return 0; |
| 2067 | 2344 | ||
| @@ -2078,6 +2355,8 @@ out_free_2: | |||
| 2078 | on_each_cpu(hardware_disable, NULL, 1); | 2355 | on_each_cpu(hardware_disable, NULL, 1); |
| 2079 | out_free_1: | 2356 | out_free_1: |
| 2080 | kvm_arch_hardware_unsetup(); | 2357 | kvm_arch_hardware_unsetup(); |
| 2358 | out_free_0a: | ||
| 2359 | free_cpumask_var(cpus_hardware_enabled); | ||
| 2081 | out_free_0: | 2360 | out_free_0: |
| 2082 | __free_page(bad_page); | 2361 | __free_page(bad_page); |
| 2083 | out: | 2362 | out: |
| @@ -2101,6 +2380,7 @@ void kvm_exit(void) | |||
| 2101 | kvm_arch_hardware_unsetup(); | 2380 | kvm_arch_hardware_unsetup(); |
| 2102 | kvm_arch_exit(); | 2381 | kvm_arch_exit(); |
| 2103 | kvm_exit_debug(); | 2382 | kvm_exit_debug(); |
| 2383 | free_cpumask_var(cpus_hardware_enabled); | ||
| 2104 | __free_page(bad_page); | 2384 | __free_page(bad_page); |
| 2105 | } | 2385 | } |
| 2106 | EXPORT_SYMBOL_GPL(kvm_exit); | 2386 | EXPORT_SYMBOL_GPL(kvm_exit); |
