diff options
| -rw-r--r-- | Documentation/kernel-parameters.txt | 5 | ||||
| -rw-r--r-- | MAINTAINERS | 1 | ||||
| -rw-r--r-- | arch/ia64/include/asm/dma-mapping.h | 4 | ||||
| -rw-r--r-- | arch/x86/Kconfig | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/amd_iommu.c | 328 | ||||
| -rw-r--r-- | arch/x86/kernel/amd_iommu_init.c | 194 | ||||
| -rw-r--r-- | arch/x86/kernel/early-quirks.c | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/k8.c | 5 | ||||
| -rw-r--r-- | arch/x86/kernel/pci-calgary_64.c | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/pci-dma.c | 177 | ||||
| -rw-r--r-- | arch/x86/kernel/pci-gart_64.c | 125 | ||||
| -rw-r--r-- | arch/x86/kernel/pci-nommu.c | 10 | ||||
| -rw-r--r-- | drivers/pci/intel-iommu.c | 2 | ||||
| -rw-r--r-- | include/asm-x86/amd_iommu.h | 3 | ||||
| -rw-r--r-- | include/asm-x86/amd_iommu_types.h | 64 | ||||
| -rw-r--r-- | include/asm-x86/bitops.h | 10 | ||||
| -rw-r--r-- | include/asm-x86/dma-mapping.h | 87 | ||||
| -rw-r--r-- | include/asm-x86/gart.h | 2 | ||||
| -rw-r--r-- | include/asm-x86/iommu.h | 1 | ||||
| -rw-r--r-- | include/linux/dma-mapping.h | 12 | ||||
| -rw-r--r-- | include/linux/iommu-helper.h | 16 | ||||
| -rw-r--r-- | include/linux/pci_ids.h | 10 | ||||
| -rw-r--r-- | kernel/dma-coherent.c | 2 | ||||
| -rw-r--r-- | lib/iommu-helper.c | 5 | ||||
| -rw-r--r-- | lib/swiotlb.c | 49 |
25 files changed, 803 insertions, 346 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 329dcabe4c5e..a2701cb6b37a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -284,6 +284,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
| 284 | isolate - enable device isolation (each device, as far | 284 | isolate - enable device isolation (each device, as far |
| 285 | as possible, will get its own protection | 285 | as possible, will get its own protection |
| 286 | domain) | 286 | domain) |
| 287 | fullflush - enable flushing of IO/TLB entries when | ||
| 288 | they are unmapped. Otherwise they are | ||
| 289 | flushed before they will be reused, which | ||
| 290 | is a lot of faster | ||
| 291 | |||
| 287 | amd_iommu_size= [HW,X86-64] | 292 | amd_iommu_size= [HW,X86-64] |
| 288 | Define the size of the aperture for the AMD IOMMU | 293 | Define the size of the aperture for the AMD IOMMU |
| 289 | driver. Possible values are: | 294 | driver. Possible values are: |
diff --git a/MAINTAINERS b/MAINTAINERS index 8dae4555f10e..3c124d7989e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -387,6 +387,7 @@ AMD IOMMU (AMD-VI) | |||
| 387 | P: Joerg Roedel | 387 | P: Joerg Roedel |
| 388 | M: joerg.roedel@amd.com | 388 | M: joerg.roedel@amd.com |
| 389 | L: iommu@lists.linux-foundation.org | 389 | L: iommu@lists.linux-foundation.org |
| 390 | T: git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git | ||
| 390 | S: Supported | 391 | S: Supported |
| 391 | 392 | ||
| 392 | AMS (Apple Motion Sensor) DRIVER | 393 | AMS (Apple Motion Sensor) DRIVER |
diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 9f0df9bd46b7..06ff1ba21465 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h | |||
| @@ -8,7 +8,9 @@ | |||
| 8 | #include <asm/machvec.h> | 8 | #include <asm/machvec.h> |
| 9 | #include <linux/scatterlist.h> | 9 | #include <linux/scatterlist.h> |
| 10 | 10 | ||
| 11 | #define dma_alloc_coherent platform_dma_alloc_coherent | 11 | #define dma_alloc_coherent(dev, size, handle, gfp) \ |
| 12 | platform_dma_alloc_coherent(dev, size, handle, (gfp) | GFP_DMA) | ||
| 13 | |||
| 12 | /* coherent mem. is cheap */ | 14 | /* coherent mem. is cheap */ |
| 13 | static inline void * | 15 | static inline void * |
| 14 | dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | 16 | dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 97f0d2b6dc0c..0d7cdbbfc1ee 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -554,6 +554,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT | |||
| 554 | config AMD_IOMMU | 554 | config AMD_IOMMU |
| 555 | bool "AMD IOMMU support" | 555 | bool "AMD IOMMU support" |
| 556 | select SWIOTLB | 556 | select SWIOTLB |
| 557 | select PCI_MSI | ||
| 557 | depends on X86_64 && PCI && ACPI | 558 | depends on X86_64 && PCI && ACPI |
| 558 | help | 559 | help |
| 559 | With this option you can enable support for AMD IOMMU hardware in | 560 | With this option you can enable support for AMD IOMMU hardware in |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 042fdc27bc92..34e4d112b1ef 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -33,6 +33,10 @@ | |||
| 33 | 33 | ||
| 34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
| 35 | 35 | ||
| 36 | /* A list of preallocated protection domains */ | ||
| 37 | static LIST_HEAD(iommu_pd_list); | ||
| 38 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | ||
| 39 | |||
| 36 | /* | 40 | /* |
| 37 | * general struct to manage commands send to an IOMMU | 41 | * general struct to manage commands send to an IOMMU |
| 38 | */ | 42 | */ |
| @@ -51,6 +55,102 @@ static int iommu_has_npcache(struct amd_iommu *iommu) | |||
| 51 | 55 | ||
| 52 | /**************************************************************************** | 56 | /**************************************************************************** |
| 53 | * | 57 | * |
| 58 | * Interrupt handling functions | ||
| 59 | * | ||
| 60 | ****************************************************************************/ | ||
| 61 | |||
| 62 | static void iommu_print_event(void *__evt) | ||
| 63 | { | ||
| 64 | u32 *event = __evt; | ||
| 65 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | ||
| 66 | int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; | ||
| 67 | int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; | ||
| 68 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | ||
| 69 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | ||
| 70 | |||
| 71 | printk(KERN_ERR "AMD IOMMU: Event logged ["); | ||
| 72 | |||
| 73 | switch (type) { | ||
| 74 | case EVENT_TYPE_ILL_DEV: | ||
| 75 | printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " | ||
| 76 | "address=0x%016llx flags=0x%04x]\n", | ||
| 77 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 78 | address, flags); | ||
| 79 | break; | ||
| 80 | case EVENT_TYPE_IO_FAULT: | ||
| 81 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | ||
| 82 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
| 83 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 84 | domid, address, flags); | ||
| 85 | break; | ||
| 86 | case EVENT_TYPE_DEV_TAB_ERR: | ||
| 87 | printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
| 88 | "address=0x%016llx flags=0x%04x]\n", | ||
| 89 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 90 | address, flags); | ||
| 91 | break; | ||
| 92 | case EVENT_TYPE_PAGE_TAB_ERR: | ||
| 93 | printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
| 94 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
| 95 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 96 | domid, address, flags); | ||
| 97 | break; | ||
| 98 | case EVENT_TYPE_ILL_CMD: | ||
| 99 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | ||
| 100 | break; | ||
| 101 | case EVENT_TYPE_CMD_HARD_ERR: | ||
| 102 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | ||
| 103 | "flags=0x%04x]\n", address, flags); | ||
| 104 | break; | ||
| 105 | case EVENT_TYPE_IOTLB_INV_TO: | ||
| 106 | printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " | ||
| 107 | "address=0x%016llx]\n", | ||
| 108 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 109 | address); | ||
| 110 | break; | ||
| 111 | case EVENT_TYPE_INV_DEV_REQ: | ||
| 112 | printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " | ||
| 113 | "address=0x%016llx flags=0x%04x]\n", | ||
| 114 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 115 | address, flags); | ||
| 116 | break; | ||
| 117 | default: | ||
| 118 | printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | static void iommu_poll_events(struct amd_iommu *iommu) | ||
| 123 | { | ||
| 124 | u32 head, tail; | ||
| 125 | unsigned long flags; | ||
| 126 | |||
| 127 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 128 | |||
| 129 | head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
| 130 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | ||
| 131 | |||
| 132 | while (head != tail) { | ||
| 133 | iommu_print_event(iommu->evt_buf + head); | ||
| 134 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | ||
| 135 | } | ||
| 136 | |||
| 137 | writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
| 138 | |||
| 139 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 140 | } | ||
| 141 | |||
| 142 | irqreturn_t amd_iommu_int_handler(int irq, void *data) | ||
| 143 | { | ||
| 144 | struct amd_iommu *iommu; | ||
| 145 | |||
| 146 | list_for_each_entry(iommu, &amd_iommu_list, list) | ||
| 147 | iommu_poll_events(iommu); | ||
| 148 | |||
| 149 | return IRQ_HANDLED; | ||
| 150 | } | ||
| 151 | |||
| 152 | /**************************************************************************** | ||
| 153 | * | ||
| 54 | * IOMMU command queuing functions | 154 | * IOMMU command queuing functions |
| 55 | * | 155 | * |
| 56 | ****************************************************************************/ | 156 | ****************************************************************************/ |
| @@ -213,6 +313,14 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
| 213 | return 0; | 313 | return 0; |
| 214 | } | 314 | } |
| 215 | 315 | ||
| 316 | /* Flush the whole IO/TLB for a given protection domain */ | ||
| 317 | static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | ||
| 318 | { | ||
| 319 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
| 320 | |||
| 321 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | ||
| 322 | } | ||
| 323 | |||
| 216 | /**************************************************************************** | 324 | /**************************************************************************** |
| 217 | * | 325 | * |
| 218 | * The functions below are used the create the page table mappings for | 326 | * The functions below are used the create the page table mappings for |
| @@ -372,11 +480,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
| 372 | * efficient allocator. | 480 | * efficient allocator. |
| 373 | * | 481 | * |
| 374 | ****************************************************************************/ | 482 | ****************************************************************************/ |
| 375 | static unsigned long dma_mask_to_pages(unsigned long mask) | ||
| 376 | { | ||
| 377 | return (mask >> PAGE_SHIFT) + | ||
| 378 | (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); | ||
| 379 | } | ||
| 380 | 483 | ||
| 381 | /* | 484 | /* |
| 382 | * The address allocator core function. | 485 | * The address allocator core function. |
| @@ -385,25 +488,31 @@ static unsigned long dma_mask_to_pages(unsigned long mask) | |||
| 385 | */ | 488 | */ |
| 386 | static unsigned long dma_ops_alloc_addresses(struct device *dev, | 489 | static unsigned long dma_ops_alloc_addresses(struct device *dev, |
| 387 | struct dma_ops_domain *dom, | 490 | struct dma_ops_domain *dom, |
| 388 | unsigned int pages) | 491 | unsigned int pages, |
| 492 | unsigned long align_mask, | ||
| 493 | u64 dma_mask) | ||
| 389 | { | 494 | { |
| 390 | unsigned long limit = dma_mask_to_pages(*dev->dma_mask); | 495 | unsigned long limit; |
| 391 | unsigned long address; | 496 | unsigned long address; |
| 392 | unsigned long size = dom->aperture_size >> PAGE_SHIFT; | ||
| 393 | unsigned long boundary_size; | 497 | unsigned long boundary_size; |
| 394 | 498 | ||
| 395 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | 499 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, |
| 396 | PAGE_SIZE) >> PAGE_SHIFT; | 500 | PAGE_SIZE) >> PAGE_SHIFT; |
| 397 | limit = limit < size ? limit : size; | 501 | limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, |
| 502 | dma_mask >> PAGE_SHIFT); | ||
| 398 | 503 | ||
| 399 | if (dom->next_bit >= limit) | 504 | if (dom->next_bit >= limit) { |
| 400 | dom->next_bit = 0; | 505 | dom->next_bit = 0; |
| 506 | dom->need_flush = true; | ||
| 507 | } | ||
| 401 | 508 | ||
| 402 | address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, | 509 | address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, |
| 403 | 0 , boundary_size, 0); | 510 | 0 , boundary_size, align_mask); |
| 404 | if (address == -1) | 511 | if (address == -1) { |
| 405 | address = iommu_area_alloc(dom->bitmap, limit, 0, pages, | 512 | address = iommu_area_alloc(dom->bitmap, limit, 0, pages, |
| 406 | 0, boundary_size, 0); | 513 | 0, boundary_size, align_mask); |
| 514 | dom->need_flush = true; | ||
| 515 | } | ||
| 407 | 516 | ||
| 408 | if (likely(address != -1)) { | 517 | if (likely(address != -1)) { |
| 409 | dom->next_bit = address + pages; | 518 | dom->next_bit = address + pages; |
| @@ -469,7 +578,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | |||
| 469 | if (start_page + pages > last_page) | 578 | if (start_page + pages > last_page) |
| 470 | pages = last_page - start_page; | 579 | pages = last_page - start_page; |
| 471 | 580 | ||
| 472 | set_bit_string(dom->bitmap, start_page, pages); | 581 | iommu_area_reserve(dom->bitmap, start_page, pages); |
| 473 | } | 582 | } |
| 474 | 583 | ||
| 475 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | 584 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) |
| @@ -563,6 +672,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
| 563 | dma_dom->bitmap[0] = 1; | 672 | dma_dom->bitmap[0] = 1; |
| 564 | dma_dom->next_bit = 0; | 673 | dma_dom->next_bit = 0; |
| 565 | 674 | ||
| 675 | dma_dom->need_flush = false; | ||
| 676 | dma_dom->target_dev = 0xffff; | ||
| 677 | |||
| 566 | /* Intialize the exclusion range if necessary */ | 678 | /* Intialize the exclusion range if necessary */ |
| 567 | if (iommu->exclusion_start && | 679 | if (iommu->exclusion_start && |
| 568 | iommu->exclusion_start < dma_dom->aperture_size) { | 680 | iommu->exclusion_start < dma_dom->aperture_size) { |
| @@ -633,12 +745,13 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
| 633 | 745 | ||
| 634 | u64 pte_root = virt_to_phys(domain->pt_root); | 746 | u64 pte_root = virt_to_phys(domain->pt_root); |
| 635 | 747 | ||
| 636 | pte_root |= (domain->mode & 0x07) << 9; | 748 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) |
| 637 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; | 749 | << DEV_ENTRY_MODE_SHIFT; |
| 750 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
| 638 | 751 | ||
| 639 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 752 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
| 640 | amd_iommu_dev_table[devid].data[0] = pte_root; | 753 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); |
| 641 | amd_iommu_dev_table[devid].data[1] = pte_root >> 32; | 754 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); |
| 642 | amd_iommu_dev_table[devid].data[2] = domain->id; | 755 | amd_iommu_dev_table[devid].data[2] = domain->id; |
| 643 | 756 | ||
| 644 | amd_iommu_pd_table[devid] = domain; | 757 | amd_iommu_pd_table[devid] = domain; |
| @@ -656,6 +769,45 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
| 656 | *****************************************************************************/ | 769 | *****************************************************************************/ |
| 657 | 770 | ||
| 658 | /* | 771 | /* |
| 772 | * This function checks if the driver got a valid device from the caller to | ||
| 773 | * avoid dereferencing invalid pointers. | ||
| 774 | */ | ||
| 775 | static bool check_device(struct device *dev) | ||
| 776 | { | ||
| 777 | if (!dev || !dev->dma_mask) | ||
| 778 | return false; | ||
| 779 | |||
| 780 | return true; | ||
| 781 | } | ||
| 782 | |||
| 783 | /* | ||
| 784 | * In this function the list of preallocated protection domains is traversed to | ||
| 785 | * find the domain for a specific device | ||
| 786 | */ | ||
| 787 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
| 788 | { | ||
| 789 | struct dma_ops_domain *entry, *ret = NULL; | ||
| 790 | unsigned long flags; | ||
| 791 | |||
| 792 | if (list_empty(&iommu_pd_list)) | ||
| 793 | return NULL; | ||
| 794 | |||
| 795 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
| 796 | |||
| 797 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
| 798 | if (entry->target_dev == devid) { | ||
| 799 | ret = entry; | ||
| 800 | list_del(&ret->list); | ||
| 801 | break; | ||
| 802 | } | ||
| 803 | } | ||
| 804 | |||
| 805 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
| 806 | |||
| 807 | return ret; | ||
| 808 | } | ||
| 809 | |||
| 810 | /* | ||
| 659 | * In the dma_ops path we only have the struct device. This function | 811 | * In the dma_ops path we only have the struct device. This function |
| 660 | * finds the corresponding IOMMU, the protection domain and the | 812 | * finds the corresponding IOMMU, the protection domain and the |
| 661 | * requestor id for a given device. | 813 | * requestor id for a given device. |
| @@ -671,27 +823,30 @@ static int get_device_resources(struct device *dev, | |||
| 671 | struct pci_dev *pcidev; | 823 | struct pci_dev *pcidev; |
| 672 | u16 _bdf; | 824 | u16 _bdf; |
| 673 | 825 | ||
| 674 | BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); | 826 | *iommu = NULL; |
| 827 | *domain = NULL; | ||
| 828 | *bdf = 0xffff; | ||
| 829 | |||
| 830 | if (dev->bus != &pci_bus_type) | ||
| 831 | return 0; | ||
| 675 | 832 | ||
| 676 | pcidev = to_pci_dev(dev); | 833 | pcidev = to_pci_dev(dev); |
| 677 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | 834 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); |
| 678 | 835 | ||
| 679 | /* device not translated by any IOMMU in the system? */ | 836 | /* device not translated by any IOMMU in the system? */ |
| 680 | if (_bdf > amd_iommu_last_bdf) { | 837 | if (_bdf > amd_iommu_last_bdf) |
| 681 | *iommu = NULL; | ||
| 682 | *domain = NULL; | ||
| 683 | *bdf = 0xffff; | ||
| 684 | return 0; | 838 | return 0; |
| 685 | } | ||
| 686 | 839 | ||
| 687 | *bdf = amd_iommu_alias_table[_bdf]; | 840 | *bdf = amd_iommu_alias_table[_bdf]; |
| 688 | 841 | ||
| 689 | *iommu = amd_iommu_rlookup_table[*bdf]; | 842 | *iommu = amd_iommu_rlookup_table[*bdf]; |
| 690 | if (*iommu == NULL) | 843 | if (*iommu == NULL) |
| 691 | return 0; | 844 | return 0; |
| 692 | dma_dom = (*iommu)->default_dom; | ||
| 693 | *domain = domain_for_device(*bdf); | 845 | *domain = domain_for_device(*bdf); |
| 694 | if (*domain == NULL) { | 846 | if (*domain == NULL) { |
| 847 | dma_dom = find_protection_domain(*bdf); | ||
| 848 | if (!dma_dom) | ||
| 849 | dma_dom = (*iommu)->default_dom; | ||
| 695 | *domain = &dma_dom->domain; | 850 | *domain = &dma_dom->domain; |
| 696 | set_device_domain(*iommu, *domain, *bdf); | 851 | set_device_domain(*iommu, *domain, *bdf); |
| 697 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | 852 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " |
| @@ -770,17 +925,24 @@ static dma_addr_t __map_single(struct device *dev, | |||
| 770 | struct dma_ops_domain *dma_dom, | 925 | struct dma_ops_domain *dma_dom, |
| 771 | phys_addr_t paddr, | 926 | phys_addr_t paddr, |
| 772 | size_t size, | 927 | size_t size, |
| 773 | int dir) | 928 | int dir, |
| 929 | bool align, | ||
| 930 | u64 dma_mask) | ||
| 774 | { | 931 | { |
| 775 | dma_addr_t offset = paddr & ~PAGE_MASK; | 932 | dma_addr_t offset = paddr & ~PAGE_MASK; |
| 776 | dma_addr_t address, start; | 933 | dma_addr_t address, start; |
| 777 | unsigned int pages; | 934 | unsigned int pages; |
| 935 | unsigned long align_mask = 0; | ||
| 778 | int i; | 936 | int i; |
| 779 | 937 | ||
| 780 | pages = iommu_num_pages(paddr, size); | 938 | pages = iommu_num_pages(paddr, size); |
| 781 | paddr &= PAGE_MASK; | 939 | paddr &= PAGE_MASK; |
| 782 | 940 | ||
| 783 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); | 941 | if (align) |
| 942 | align_mask = (1UL << get_order(size)) - 1; | ||
| 943 | |||
| 944 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, | ||
| 945 | dma_mask); | ||
| 784 | if (unlikely(address == bad_dma_address)) | 946 | if (unlikely(address == bad_dma_address)) |
| 785 | goto out; | 947 | goto out; |
| 786 | 948 | ||
| @@ -792,6 +954,12 @@ static dma_addr_t __map_single(struct device *dev, | |||
| 792 | } | 954 | } |
| 793 | address += offset; | 955 | address += offset; |
| 794 | 956 | ||
| 957 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | ||
| 958 | iommu_flush_tlb(iommu, dma_dom->domain.id); | ||
| 959 | dma_dom->need_flush = false; | ||
| 960 | } else if (unlikely(iommu_has_npcache(iommu))) | ||
| 961 | iommu_flush_pages(iommu, dma_dom->domain.id, address, size); | ||
| 962 | |||
| 795 | out: | 963 | out: |
| 796 | return address; | 964 | return address; |
| 797 | } | 965 | } |
| @@ -822,6 +990,9 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
| 822 | } | 990 | } |
| 823 | 991 | ||
| 824 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 992 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
| 993 | |||
| 994 | if (amd_iommu_unmap_flush) | ||
| 995 | iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); | ||
| 825 | } | 996 | } |
| 826 | 997 | ||
| 827 | /* | 998 | /* |
| @@ -835,6 +1006,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
| 835 | struct protection_domain *domain; | 1006 | struct protection_domain *domain; |
| 836 | u16 devid; | 1007 | u16 devid; |
| 837 | dma_addr_t addr; | 1008 | dma_addr_t addr; |
| 1009 | u64 dma_mask; | ||
| 1010 | |||
| 1011 | if (!check_device(dev)) | ||
| 1012 | return bad_dma_address; | ||
| 1013 | |||
| 1014 | dma_mask = *dev->dma_mask; | ||
| 838 | 1015 | ||
| 839 | get_device_resources(dev, &iommu, &domain, &devid); | 1016 | get_device_resources(dev, &iommu, &domain, &devid); |
| 840 | 1017 | ||
| @@ -843,14 +1020,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
| 843 | return (dma_addr_t)paddr; | 1020 | return (dma_addr_t)paddr; |
| 844 | 1021 | ||
| 845 | spin_lock_irqsave(&domain->lock, flags); | 1022 | spin_lock_irqsave(&domain->lock, flags); |
| 846 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); | 1023 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, |
| 1024 | dma_mask); | ||
| 847 | if (addr == bad_dma_address) | 1025 | if (addr == bad_dma_address) |
| 848 | goto out; | 1026 | goto out; |
| 849 | 1027 | ||
| 850 | if (iommu_has_npcache(iommu)) | 1028 | if (unlikely(iommu->need_sync)) |
| 851 | iommu_flush_pages(iommu, domain->id, addr, size); | ||
| 852 | |||
| 853 | if (iommu->need_sync) | ||
| 854 | iommu_completion_wait(iommu); | 1029 | iommu_completion_wait(iommu); |
| 855 | 1030 | ||
| 856 | out: | 1031 | out: |
| @@ -870,7 +1045,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
| 870 | struct protection_domain *domain; | 1045 | struct protection_domain *domain; |
| 871 | u16 devid; | 1046 | u16 devid; |
| 872 | 1047 | ||
| 873 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 1048 | if (!check_device(dev) || |
| 1049 | !get_device_resources(dev, &iommu, &domain, &devid)) | ||
| 874 | /* device not handled by any AMD IOMMU */ | 1050 | /* device not handled by any AMD IOMMU */ |
| 875 | return; | 1051 | return; |
| 876 | 1052 | ||
| @@ -878,9 +1054,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
| 878 | 1054 | ||
| 879 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); | 1055 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); |
| 880 | 1056 | ||
| 881 | iommu_flush_pages(iommu, domain->id, dma_addr, size); | 1057 | if (unlikely(iommu->need_sync)) |
| 882 | |||
| 883 | if (iommu->need_sync) | ||
| 884 | iommu_completion_wait(iommu); | 1058 | iommu_completion_wait(iommu); |
| 885 | 1059 | ||
| 886 | spin_unlock_irqrestore(&domain->lock, flags); | 1060 | spin_unlock_irqrestore(&domain->lock, flags); |
| @@ -919,6 +1093,12 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
| 919 | struct scatterlist *s; | 1093 | struct scatterlist *s; |
| 920 | phys_addr_t paddr; | 1094 | phys_addr_t paddr; |
| 921 | int mapped_elems = 0; | 1095 | int mapped_elems = 0; |
| 1096 | u64 dma_mask; | ||
| 1097 | |||
| 1098 | if (!check_device(dev)) | ||
| 1099 | return 0; | ||
| 1100 | |||
| 1101 | dma_mask = *dev->dma_mask; | ||
| 922 | 1102 | ||
| 923 | get_device_resources(dev, &iommu, &domain, &devid); | 1103 | get_device_resources(dev, &iommu, &domain, &devid); |
| 924 | 1104 | ||
| @@ -931,19 +1111,17 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
| 931 | paddr = sg_phys(s); | 1111 | paddr = sg_phys(s); |
| 932 | 1112 | ||
| 933 | s->dma_address = __map_single(dev, iommu, domain->priv, | 1113 | s->dma_address = __map_single(dev, iommu, domain->priv, |
| 934 | paddr, s->length, dir); | 1114 | paddr, s->length, dir, false, |
| 1115 | dma_mask); | ||
| 935 | 1116 | ||
| 936 | if (s->dma_address) { | 1117 | if (s->dma_address) { |
| 937 | s->dma_length = s->length; | 1118 | s->dma_length = s->length; |
| 938 | mapped_elems++; | 1119 | mapped_elems++; |
| 939 | } else | 1120 | } else |
| 940 | goto unmap; | 1121 | goto unmap; |
| 941 | if (iommu_has_npcache(iommu)) | ||
| 942 | iommu_flush_pages(iommu, domain->id, s->dma_address, | ||
| 943 | s->dma_length); | ||
| 944 | } | 1122 | } |
| 945 | 1123 | ||
| 946 | if (iommu->need_sync) | 1124 | if (unlikely(iommu->need_sync)) |
| 947 | iommu_completion_wait(iommu); | 1125 | iommu_completion_wait(iommu); |
| 948 | 1126 | ||
| 949 | out: | 1127 | out: |
| @@ -977,7 +1155,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
| 977 | u16 devid; | 1155 | u16 devid; |
| 978 | int i; | 1156 | int i; |
| 979 | 1157 | ||
| 980 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 1158 | if (!check_device(dev) || |
| 1159 | !get_device_resources(dev, &iommu, &domain, &devid)) | ||
| 981 | return; | 1160 | return; |
| 982 | 1161 | ||
| 983 | spin_lock_irqsave(&domain->lock, flags); | 1162 | spin_lock_irqsave(&domain->lock, flags); |
| @@ -985,12 +1164,10 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
| 985 | for_each_sg(sglist, s, nelems, i) { | 1164 | for_each_sg(sglist, s, nelems, i) { |
| 986 | __unmap_single(iommu, domain->priv, s->dma_address, | 1165 | __unmap_single(iommu, domain->priv, s->dma_address, |
| 987 | s->dma_length, dir); | 1166 | s->dma_length, dir); |
| 988 | iommu_flush_pages(iommu, domain->id, s->dma_address, | ||
| 989 | s->dma_length); | ||
| 990 | s->dma_address = s->dma_length = 0; | 1167 | s->dma_address = s->dma_length = 0; |
| 991 | } | 1168 | } |
| 992 | 1169 | ||
| 993 | if (iommu->need_sync) | 1170 | if (unlikely(iommu->need_sync)) |
| 994 | iommu_completion_wait(iommu); | 1171 | iommu_completion_wait(iommu); |
| 995 | 1172 | ||
| 996 | spin_unlock_irqrestore(&domain->lock, flags); | 1173 | spin_unlock_irqrestore(&domain->lock, flags); |
| @@ -1008,25 +1185,33 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
| 1008 | struct protection_domain *domain; | 1185 | struct protection_domain *domain; |
| 1009 | u16 devid; | 1186 | u16 devid; |
| 1010 | phys_addr_t paddr; | 1187 | phys_addr_t paddr; |
| 1188 | u64 dma_mask = dev->coherent_dma_mask; | ||
| 1189 | |||
| 1190 | if (!check_device(dev)) | ||
| 1191 | return NULL; | ||
| 1192 | |||
| 1193 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | ||
| 1194 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 1011 | 1195 | ||
| 1196 | flag |= __GFP_ZERO; | ||
| 1012 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | 1197 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); |
| 1013 | if (!virt_addr) | 1198 | if (!virt_addr) |
| 1014 | return 0; | 1199 | return 0; |
| 1015 | 1200 | ||
| 1016 | memset(virt_addr, 0, size); | ||
| 1017 | paddr = virt_to_phys(virt_addr); | 1201 | paddr = virt_to_phys(virt_addr); |
| 1018 | 1202 | ||
| 1019 | get_device_resources(dev, &iommu, &domain, &devid); | ||
| 1020 | |||
| 1021 | if (!iommu || !domain) { | 1203 | if (!iommu || !domain) { |
| 1022 | *dma_addr = (dma_addr_t)paddr; | 1204 | *dma_addr = (dma_addr_t)paddr; |
| 1023 | return virt_addr; | 1205 | return virt_addr; |
| 1024 | } | 1206 | } |
| 1025 | 1207 | ||
| 1208 | if (!dma_mask) | ||
| 1209 | dma_mask = *dev->dma_mask; | ||
| 1210 | |||
| 1026 | spin_lock_irqsave(&domain->lock, flags); | 1211 | spin_lock_irqsave(&domain->lock, flags); |
| 1027 | 1212 | ||
| 1028 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 1213 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, |
| 1029 | size, DMA_BIDIRECTIONAL); | 1214 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
| 1030 | 1215 | ||
| 1031 | if (*dma_addr == bad_dma_address) { | 1216 | if (*dma_addr == bad_dma_address) { |
| 1032 | free_pages((unsigned long)virt_addr, get_order(size)); | 1217 | free_pages((unsigned long)virt_addr, get_order(size)); |
| @@ -1034,10 +1219,7 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
| 1034 | goto out; | 1219 | goto out; |
| 1035 | } | 1220 | } |
| 1036 | 1221 | ||
| 1037 | if (iommu_has_npcache(iommu)) | 1222 | if (unlikely(iommu->need_sync)) |
| 1038 | iommu_flush_pages(iommu, domain->id, *dma_addr, size); | ||
| 1039 | |||
| 1040 | if (iommu->need_sync) | ||
| 1041 | iommu_completion_wait(iommu); | 1223 | iommu_completion_wait(iommu); |
| 1042 | 1224 | ||
| 1043 | out: | 1225 | out: |
| @@ -1048,8 +1230,6 @@ out: | |||
| 1048 | 1230 | ||
| 1049 | /* | 1231 | /* |
| 1050 | * The exported free_coherent function for dma_ops. | 1232 | * The exported free_coherent function for dma_ops. |
| 1051 | * FIXME: fix the generic x86 DMA layer so that it actually calls that | ||
| 1052 | * function. | ||
| 1053 | */ | 1233 | */ |
| 1054 | static void free_coherent(struct device *dev, size_t size, | 1234 | static void free_coherent(struct device *dev, size_t size, |
| 1055 | void *virt_addr, dma_addr_t dma_addr) | 1235 | void *virt_addr, dma_addr_t dma_addr) |
| @@ -1059,6 +1239,9 @@ static void free_coherent(struct device *dev, size_t size, | |||
| 1059 | struct protection_domain *domain; | 1239 | struct protection_domain *domain; |
| 1060 | u16 devid; | 1240 | u16 devid; |
| 1061 | 1241 | ||
| 1242 | if (!check_device(dev)) | ||
| 1243 | return; | ||
| 1244 | |||
| 1062 | get_device_resources(dev, &iommu, &domain, &devid); | 1245 | get_device_resources(dev, &iommu, &domain, &devid); |
| 1063 | 1246 | ||
| 1064 | if (!iommu || !domain) | 1247 | if (!iommu || !domain) |
| @@ -1067,9 +1250,8 @@ static void free_coherent(struct device *dev, size_t size, | |||
| 1067 | spin_lock_irqsave(&domain->lock, flags); | 1250 | spin_lock_irqsave(&domain->lock, flags); |
| 1068 | 1251 | ||
| 1069 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 1252 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
| 1070 | iommu_flush_pages(iommu, domain->id, dma_addr, size); | ||
| 1071 | 1253 | ||
| 1072 | if (iommu->need_sync) | 1254 | if (unlikely(iommu->need_sync)) |
| 1073 | iommu_completion_wait(iommu); | 1255 | iommu_completion_wait(iommu); |
| 1074 | 1256 | ||
| 1075 | spin_unlock_irqrestore(&domain->lock, flags); | 1257 | spin_unlock_irqrestore(&domain->lock, flags); |
| @@ -1079,6 +1261,30 @@ free_mem: | |||
| 1079 | } | 1261 | } |
| 1080 | 1262 | ||
| 1081 | /* | 1263 | /* |
| 1264 | * This function is called by the DMA layer to find out if we can handle a | ||
| 1265 | * particular device. It is part of the dma_ops. | ||
| 1266 | */ | ||
| 1267 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) | ||
| 1268 | { | ||
| 1269 | u16 bdf; | ||
| 1270 | struct pci_dev *pcidev; | ||
| 1271 | |||
| 1272 | /* No device or no PCI device */ | ||
| 1273 | if (!dev || dev->bus != &pci_bus_type) | ||
| 1274 | return 0; | ||
| 1275 | |||
| 1276 | pcidev = to_pci_dev(dev); | ||
| 1277 | |||
| 1278 | bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | ||
| 1279 | |||
| 1280 | /* Out of our scope? */ | ||
| 1281 | if (bdf > amd_iommu_last_bdf) | ||
| 1282 | return 0; | ||
| 1283 | |||
| 1284 | return 1; | ||
| 1285 | } | ||
| 1286 | |||
| 1287 | /* | ||
| 1082 | * The function for pre-allocating protection domains. | 1288 | * The function for pre-allocating protection domains. |
| 1083 | * | 1289 | * |
| 1084 | * If the driver core informs the DMA layer if a driver grabs a device | 1290 | * If the driver core informs the DMA layer if a driver grabs a device |
| @@ -1107,10 +1313,9 @@ void prealloc_protection_domains(void) | |||
| 1107 | if (!dma_dom) | 1313 | if (!dma_dom) |
| 1108 | continue; | 1314 | continue; |
| 1109 | init_unity_mappings_for_device(dma_dom, devid); | 1315 | init_unity_mappings_for_device(dma_dom, devid); |
| 1110 | set_device_domain(iommu, &dma_dom->domain, devid); | 1316 | dma_dom->target_dev = devid; |
| 1111 | printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", | 1317 | |
| 1112 | dma_dom->domain.id); | 1318 | list_add_tail(&dma_dom->list, &iommu_pd_list); |
| 1113 | print_devid(devid, 1); | ||
| 1114 | } | 1319 | } |
| 1115 | } | 1320 | } |
| 1116 | 1321 | ||
| @@ -1121,6 +1326,7 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { | |||
| 1121 | .unmap_single = unmap_single, | 1326 | .unmap_single = unmap_single, |
| 1122 | .map_sg = map_sg, | 1327 | .map_sg = map_sg, |
| 1123 | .unmap_sg = unmap_sg, | 1328 | .unmap_sg = unmap_sg, |
| 1329 | .dma_supported = amd_iommu_dma_supported, | ||
| 1124 | }; | 1330 | }; |
| 1125 | 1331 | ||
| 1126 | /* | 1332 | /* |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index a69cc0f52042..148fcfe22f17 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -22,6 +22,8 @@ | |||
| 22 | #include <linux/gfp.h> | 22 | #include <linux/gfp.h> |
| 23 | #include <linux/list.h> | 23 | #include <linux/list.h> |
| 24 | #include <linux/sysdev.h> | 24 | #include <linux/sysdev.h> |
| 25 | #include <linux/interrupt.h> | ||
| 26 | #include <linux/msi.h> | ||
| 25 | #include <asm/pci-direct.h> | 27 | #include <asm/pci-direct.h> |
| 26 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
| 27 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
| @@ -30,7 +32,6 @@ | |||
| 30 | /* | 32 | /* |
| 31 | * definitions for the ACPI scanning code | 33 | * definitions for the ACPI scanning code |
| 32 | */ | 34 | */ |
| 33 | #define PCI_BUS(x) (((x) >> 8) & 0xff) | ||
| 34 | #define IVRS_HEADER_LENGTH 48 | 35 | #define IVRS_HEADER_LENGTH 48 |
| 35 | 36 | ||
| 36 | #define ACPI_IVHD_TYPE 0x10 | 37 | #define ACPI_IVHD_TYPE 0x10 |
| @@ -121,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | |||
| 121 | we find in ACPI */ | 122 | we find in ACPI */ |
| 122 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ | 123 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ |
| 123 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ | 124 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ |
| 125 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | ||
| 124 | 126 | ||
| 125 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | 127 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the |
| 126 | system */ | 128 | system */ |
| @@ -234,7 +236,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
| 234 | { | 236 | { |
| 235 | u32 ctrl; | 237 | u32 ctrl; |
| 236 | 238 | ||
| 237 | ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); | 239 | ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); |
| 238 | ctrl &= ~(1 << bit); | 240 | ctrl &= ~(1 << bit); |
| 239 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); | 241 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); |
| 240 | } | 242 | } |
| @@ -242,13 +244,23 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
| 242 | /* Function to enable the hardware */ | 244 | /* Function to enable the hardware */ |
| 243 | void __init iommu_enable(struct amd_iommu *iommu) | 245 | void __init iommu_enable(struct amd_iommu *iommu) |
| 244 | { | 246 | { |
| 245 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); | 247 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " |
| 246 | print_devid(iommu->devid, 0); | 248 | "at %02x:%02x.%x cap 0x%hx\n", |
| 247 | printk(" cap 0x%hx\n", iommu->cap_ptr); | 249 | iommu->dev->bus->number, |
| 250 | PCI_SLOT(iommu->dev->devfn), | ||
| 251 | PCI_FUNC(iommu->dev->devfn), | ||
| 252 | iommu->cap_ptr); | ||
| 248 | 253 | ||
| 249 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 254 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
| 250 | } | 255 | } |
| 251 | 256 | ||
| 257 | /* Function to enable IOMMU event logging and event interrupts */ | ||
| 258 | void __init iommu_enable_event_logging(struct amd_iommu *iommu) | ||
| 259 | { | ||
| 260 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | ||
| 261 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); | ||
| 262 | } | ||
| 263 | |||
| 252 | /* | 264 | /* |
| 253 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in | 265 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in |
| 254 | * the system has one. | 266 | * the system has one. |
| @@ -286,6 +298,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) | |||
| 286 | ****************************************************************************/ | 298 | ****************************************************************************/ |
| 287 | 299 | ||
| 288 | /* | 300 | /* |
| 301 | * This function calculates the length of a given IVHD entry | ||
| 302 | */ | ||
| 303 | static inline int ivhd_entry_length(u8 *ivhd) | ||
| 304 | { | ||
| 305 | return 0x04 << (*ivhd >> 6); | ||
| 306 | } | ||
| 307 | |||
| 308 | /* | ||
| 289 | * This function reads the last device id the IOMMU has to handle from the PCI | 309 | * This function reads the last device id the IOMMU has to handle from the PCI |
| 290 | * capability header for this IOMMU | 310 | * capability header for this IOMMU |
| 291 | */ | 311 | */ |
| @@ -329,7 +349,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | |||
| 329 | default: | 349 | default: |
| 330 | break; | 350 | break; |
| 331 | } | 351 | } |
| 332 | p += 0x04 << (*p >> 6); | 352 | p += ivhd_entry_length(p); |
| 333 | } | 353 | } |
| 334 | 354 | ||
| 335 | WARN_ON(p != end); | 355 | WARN_ON(p != end); |
| @@ -414,7 +434,32 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
| 414 | 434 | ||
| 415 | static void __init free_command_buffer(struct amd_iommu *iommu) | 435 | static void __init free_command_buffer(struct amd_iommu *iommu) |
| 416 | { | 436 | { |
| 417 | free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); | 437 | free_pages((unsigned long)iommu->cmd_buf, |
| 438 | get_order(iommu->cmd_buf_size)); | ||
| 439 | } | ||
| 440 | |||
| 441 | /* allocates the memory where the IOMMU will log its events to */ | ||
| 442 | static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) | ||
| 443 | { | ||
| 444 | u64 entry; | ||
| 445 | iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
| 446 | get_order(EVT_BUFFER_SIZE)); | ||
| 447 | |||
| 448 | if (iommu->evt_buf == NULL) | ||
| 449 | return NULL; | ||
| 450 | |||
| 451 | entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; | ||
| 452 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, | ||
| 453 | &entry, sizeof(entry)); | ||
| 454 | |||
| 455 | iommu->evt_buf_size = EVT_BUFFER_SIZE; | ||
| 456 | |||
| 457 | return iommu->evt_buf; | ||
| 458 | } | ||
| 459 | |||
| 460 | static void __init free_event_buffer(struct amd_iommu *iommu) | ||
| 461 | { | ||
| 462 | free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); | ||
| 418 | } | 463 | } |
| 419 | 464 | ||
| 420 | /* sets a specific bit in the device table entry. */ | 465 | /* sets a specific bit in the device table entry. */ |
| @@ -487,19 +532,21 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | |||
| 487 | */ | 532 | */ |
| 488 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) | 533 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) |
| 489 | { | 534 | { |
| 490 | int bus = PCI_BUS(iommu->devid); | ||
| 491 | int dev = PCI_SLOT(iommu->devid); | ||
| 492 | int fn = PCI_FUNC(iommu->devid); | ||
| 493 | int cap_ptr = iommu->cap_ptr; | 535 | int cap_ptr = iommu->cap_ptr; |
| 494 | u32 range; | 536 | u32 range, misc; |
| 495 | 537 | ||
| 496 | iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); | 538 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, |
| 539 | &iommu->cap); | ||
| 540 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, | ||
| 541 | &range); | ||
| 542 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, | ||
| 543 | &misc); | ||
| 497 | 544 | ||
| 498 | range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | ||
| 499 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), | 545 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), |
| 500 | MMIO_GET_FD(range)); | 546 | MMIO_GET_FD(range)); |
| 501 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), | 547 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), |
| 502 | MMIO_GET_LD(range)); | 548 | MMIO_GET_LD(range)); |
| 549 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | ||
| 503 | } | 550 | } |
| 504 | 551 | ||
| 505 | /* | 552 | /* |
| @@ -604,7 +651,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
| 604 | break; | 651 | break; |
| 605 | } | 652 | } |
| 606 | 653 | ||
| 607 | p += 0x04 << (e->type >> 6); | 654 | p += ivhd_entry_length(p); |
| 608 | } | 655 | } |
| 609 | } | 656 | } |
| 610 | 657 | ||
| @@ -622,6 +669,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu) | |||
| 622 | static void __init free_iommu_one(struct amd_iommu *iommu) | 669 | static void __init free_iommu_one(struct amd_iommu *iommu) |
| 623 | { | 670 | { |
| 624 | free_command_buffer(iommu); | 671 | free_command_buffer(iommu); |
| 672 | free_event_buffer(iommu); | ||
| 625 | iommu_unmap_mmio_space(iommu); | 673 | iommu_unmap_mmio_space(iommu); |
| 626 | } | 674 | } |
| 627 | 675 | ||
| @@ -649,8 +697,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
| 649 | /* | 697 | /* |
| 650 | * Copy data from ACPI table entry to the iommu struct | 698 | * Copy data from ACPI table entry to the iommu struct |
| 651 | */ | 699 | */ |
| 652 | iommu->devid = h->devid; | 700 | iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); |
| 701 | if (!iommu->dev) | ||
| 702 | return 1; | ||
| 703 | |||
| 653 | iommu->cap_ptr = h->cap_ptr; | 704 | iommu->cap_ptr = h->cap_ptr; |
| 705 | iommu->pci_seg = h->pci_seg; | ||
| 654 | iommu->mmio_phys = h->mmio_phys; | 706 | iommu->mmio_phys = h->mmio_phys; |
| 655 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); | 707 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); |
| 656 | if (!iommu->mmio_base) | 708 | if (!iommu->mmio_base) |
| @@ -661,10 +713,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
| 661 | if (!iommu->cmd_buf) | 713 | if (!iommu->cmd_buf) |
| 662 | return -ENOMEM; | 714 | return -ENOMEM; |
| 663 | 715 | ||
| 716 | iommu->evt_buf = alloc_event_buffer(iommu); | ||
| 717 | if (!iommu->evt_buf) | ||
| 718 | return -ENOMEM; | ||
| 719 | |||
| 720 | iommu->int_enabled = false; | ||
| 721 | |||
| 664 | init_iommu_from_pci(iommu); | 722 | init_iommu_from_pci(iommu); |
| 665 | init_iommu_from_acpi(iommu, h); | 723 | init_iommu_from_acpi(iommu, h); |
| 666 | init_iommu_devices(iommu); | 724 | init_iommu_devices(iommu); |
| 667 | 725 | ||
| 726 | pci_enable_device(iommu->dev); | ||
| 727 | |||
| 668 | return 0; | 728 | return 0; |
| 669 | } | 729 | } |
| 670 | 730 | ||
| @@ -706,6 +766,95 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
| 706 | 766 | ||
| 707 | /**************************************************************************** | 767 | /**************************************************************************** |
| 708 | * | 768 | * |
| 769 | * The following functions initialize the MSI interrupts for all IOMMUs | ||
| 770 | * in the system. Its a bit challenging because there could be multiple | ||
| 771 | * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per | ||
| 772 | * pci_dev. | ||
| 773 | * | ||
| 774 | ****************************************************************************/ | ||
| 775 | |||
| 776 | static int __init iommu_setup_msix(struct amd_iommu *iommu) | ||
| 777 | { | ||
| 778 | struct amd_iommu *curr; | ||
| 779 | struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ | ||
| 780 | int nvec = 0, i; | ||
| 781 | |||
| 782 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
| 783 | if (curr->dev == iommu->dev) { | ||
| 784 | entries[nvec].entry = curr->evt_msi_num; | ||
| 785 | entries[nvec].vector = 0; | ||
| 786 | curr->int_enabled = true; | ||
| 787 | nvec++; | ||
| 788 | } | ||
| 789 | } | ||
| 790 | |||
| 791 | if (pci_enable_msix(iommu->dev, entries, nvec)) { | ||
| 792 | pci_disable_msix(iommu->dev); | ||
| 793 | return 1; | ||
| 794 | } | ||
| 795 | |||
| 796 | for (i = 0; i < nvec; ++i) { | ||
| 797 | int r = request_irq(entries->vector, amd_iommu_int_handler, | ||
| 798 | IRQF_SAMPLE_RANDOM, | ||
| 799 | "AMD IOMMU", | ||
| 800 | NULL); | ||
| 801 | if (r) | ||
| 802 | goto out_free; | ||
| 803 | } | ||
| 804 | |||
| 805 | return 0; | ||
| 806 | |||
| 807 | out_free: | ||
| 808 | for (i -= 1; i >= 0; --i) | ||
| 809 | free_irq(entries->vector, NULL); | ||
| 810 | |||
| 811 | pci_disable_msix(iommu->dev); | ||
| 812 | |||
| 813 | return 1; | ||
| 814 | } | ||
| 815 | |||
| 816 | static int __init iommu_setup_msi(struct amd_iommu *iommu) | ||
| 817 | { | ||
| 818 | int r; | ||
| 819 | struct amd_iommu *curr; | ||
| 820 | |||
| 821 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
| 822 | if (curr->dev == iommu->dev) | ||
| 823 | curr->int_enabled = true; | ||
| 824 | } | ||
| 825 | |||
| 826 | |||
| 827 | if (pci_enable_msi(iommu->dev)) | ||
| 828 | return 1; | ||
| 829 | |||
| 830 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | ||
| 831 | IRQF_SAMPLE_RANDOM, | ||
| 832 | "AMD IOMMU", | ||
| 833 | NULL); | ||
| 834 | |||
| 835 | if (r) { | ||
| 836 | pci_disable_msi(iommu->dev); | ||
| 837 | return 1; | ||
| 838 | } | ||
| 839 | |||
| 840 | return 0; | ||
| 841 | } | ||
| 842 | |||
| 843 | static int __init iommu_init_msi(struct amd_iommu *iommu) | ||
| 844 | { | ||
| 845 | if (iommu->int_enabled) | ||
| 846 | return 0; | ||
| 847 | |||
| 848 | if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) | ||
| 849 | return iommu_setup_msix(iommu); | ||
| 850 | else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) | ||
| 851 | return iommu_setup_msi(iommu); | ||
| 852 | |||
| 853 | return 1; | ||
| 854 | } | ||
| 855 | |||
| 856 | /**************************************************************************** | ||
| 857 | * | ||
| 709 | * The next functions belong to the third pass of parsing the ACPI | 858 | * The next functions belong to the third pass of parsing the ACPI |
| 710 | * table. In this last pass the memory mapping requirements are | 859 | * table. In this last pass the memory mapping requirements are |
| 711 | * gathered (like exclusion and unity mapping reanges). | 860 | * gathered (like exclusion and unity mapping reanges). |
| @@ -811,7 +960,6 @@ static void init_device_table(void) | |||
| 811 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { | 960 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { |
| 812 | set_dev_entry_bit(devid, DEV_ENTRY_VALID); | 961 | set_dev_entry_bit(devid, DEV_ENTRY_VALID); |
| 813 | set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); | 962 | set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); |
| 814 | set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); | ||
| 815 | } | 963 | } |
| 816 | } | 964 | } |
| 817 | 965 | ||
| @@ -825,6 +973,8 @@ static void __init enable_iommus(void) | |||
| 825 | 973 | ||
| 826 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 974 | list_for_each_entry(iommu, &amd_iommu_list, list) { |
| 827 | iommu_set_exclusion_range(iommu); | 975 | iommu_set_exclusion_range(iommu); |
| 976 | iommu_init_msi(iommu); | ||
| 977 | iommu_enable_event_logging(iommu); | ||
| 828 | iommu_enable(iommu); | 978 | iommu_enable(iommu); |
| 829 | } | 979 | } |
| 830 | } | 980 | } |
| @@ -995,11 +1145,17 @@ int __init amd_iommu_init(void) | |||
| 995 | else | 1145 | else |
| 996 | printk("disabled\n"); | 1146 | printk("disabled\n"); |
| 997 | 1147 | ||
| 1148 | if (amd_iommu_unmap_flush) | ||
| 1149 | printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); | ||
| 1150 | else | ||
| 1151 | printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); | ||
| 1152 | |||
| 998 | out: | 1153 | out: |
| 999 | return ret; | 1154 | return ret; |
| 1000 | 1155 | ||
| 1001 | free: | 1156 | free: |
| 1002 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); | 1157 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, |
| 1158 | get_order(MAX_DOMAIN_ID/8)); | ||
| 1003 | 1159 | ||
| 1004 | free_pages((unsigned long)amd_iommu_pd_table, | 1160 | free_pages((unsigned long)amd_iommu_pd_table, |
| 1005 | get_order(rlookup_table_size)); | 1161 | get_order(rlookup_table_size)); |
| @@ -1057,8 +1213,10 @@ void __init amd_iommu_detect(void) | |||
| 1057 | static int __init parse_amd_iommu_options(char *str) | 1213 | static int __init parse_amd_iommu_options(char *str) |
| 1058 | { | 1214 | { |
| 1059 | for (; *str; ++str) { | 1215 | for (; *str; ++str) { |
| 1060 | if (strcmp(str, "isolate") == 0) | 1216 | if (strncmp(str, "isolate", 7) == 0) |
| 1061 | amd_iommu_isolate = 1; | 1217 | amd_iommu_isolate = 1; |
| 1218 | if (strncmp(str, "fullflush", 11) == 0) | ||
| 1219 | amd_iommu_unmap_flush = true; | ||
| 1062 | } | 1220 | } |
| 1063 | 1221 | ||
| 1064 | return 1; | 1222 | return 1; |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4353cf5e6fac..24bb5faf5efa 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
| @@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func) | |||
| 95 | 95 | ||
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | #ifdef CONFIG_DMAR | ||
| 99 | static void __init intel_g33_dmar(int num, int slot, int func) | ||
| 100 | { | ||
| 101 | struct acpi_table_header *dmar_tbl; | ||
| 102 | acpi_status status; | ||
| 103 | |||
| 104 | status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); | ||
| 105 | if (ACPI_SUCCESS(status)) { | ||
| 106 | printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); | ||
| 107 | dmar_disabled = 1; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | #endif | ||
| 111 | |||
| 98 | #define QFLAG_APPLY_ONCE 0x1 | 112 | #define QFLAG_APPLY_ONCE 0x1 |
| 99 | #define QFLAG_APPLIED 0x2 | 113 | #define QFLAG_APPLIED 0x2 |
| 100 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 114 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
| @@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = { | |||
| 114 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, | 128 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, |
| 115 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | 129 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, |
| 116 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, | 130 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, |
| 131 | #ifdef CONFIG_DMAR | ||
| 132 | { PCI_VENDOR_ID_INTEL, 0x29c0, | ||
| 133 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, | ||
| 134 | #endif | ||
| 117 | {} | 135 | {} |
| 118 | }; | 136 | }; |
| 119 | 137 | ||
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 7377ccb21335..304d8bad6559 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c | |||
| @@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges); | |||
| 16 | static u32 *flush_words; | 16 | static u32 *flush_words; |
| 17 | 17 | ||
| 18 | struct pci_device_id k8_nb_ids[] = { | 18 | struct pci_device_id k8_nb_ids[] = { |
| 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
| 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
| 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, | ||
| 21 | {} | 22 | {} |
| 22 | }; | 23 | }; |
| 23 | EXPORT_SYMBOL(k8_nb_ids); | 24 | EXPORT_SYMBOL(k8_nb_ids); |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index dcdac6c826e9..080d1d27f37a 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
| @@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
| 261 | badbit, tbl, start_addr, npages); | 261 | badbit, tbl, start_addr, npages); |
| 262 | } | 262 | } |
| 263 | 263 | ||
| 264 | set_bit_string(tbl->it_map, index, npages); | 264 | iommu_area_reserve(tbl->it_map, index, npages); |
| 265 | 265 | ||
| 266 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 266 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
| 267 | } | 267 | } |
| @@ -491,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
| 491 | npages = size >> PAGE_SHIFT; | 491 | npages = size >> PAGE_SHIFT; |
| 492 | order = get_order(size); | 492 | order = get_order(size); |
| 493 | 493 | ||
| 494 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 495 | |||
| 494 | /* alloc enough pages (and possibly more) */ | 496 | /* alloc enough pages (and possibly more) */ |
| 495 | ret = (void *)__get_free_pages(flag, order); | 497 | ret = (void *)__get_free_pages(flag, order); |
| 496 | if (!ret) | 498 | if (!ret) |
| @@ -510,8 +512,22 @@ error: | |||
| 510 | return ret; | 512 | return ret; |
| 511 | } | 513 | } |
| 512 | 514 | ||
| 515 | static void calgary_free_coherent(struct device *dev, size_t size, | ||
| 516 | void *vaddr, dma_addr_t dma_handle) | ||
| 517 | { | ||
| 518 | unsigned int npages; | ||
| 519 | struct iommu_table *tbl = find_iommu_table(dev); | ||
| 520 | |||
| 521 | size = PAGE_ALIGN(size); | ||
| 522 | npages = size >> PAGE_SHIFT; | ||
| 523 | |||
| 524 | iommu_free(tbl, dma_handle, npages); | ||
| 525 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 526 | } | ||
| 527 | |||
| 513 | static struct dma_mapping_ops calgary_dma_ops = { | 528 | static struct dma_mapping_ops calgary_dma_ops = { |
| 514 | .alloc_coherent = calgary_alloc_coherent, | 529 | .alloc_coherent = calgary_alloc_coherent, |
| 530 | .free_coherent = calgary_free_coherent, | ||
| 515 | .map_single = calgary_map_single, | 531 | .map_single = calgary_map_single, |
| 516 | .unmap_single = calgary_unmap_single, | 532 | .unmap_single = calgary_unmap_single, |
| 517 | .map_sg = calgary_map_sg, | 533 | .map_sg = calgary_map_sg, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f704cb51ff82..0a3824e837b4 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address); | |||
| 41 | /* Dummy device used for NULL arguments (normally ISA). Better would | 41 | /* Dummy device used for NULL arguments (normally ISA). Better would |
| 42 | be probably a smaller DMA mask, but this is bug-to-bug compatible | 42 | be probably a smaller DMA mask, but this is bug-to-bug compatible |
| 43 | to older i386. */ | 43 | to older i386. */ |
| 44 | struct device fallback_dev = { | 44 | struct device x86_dma_fallback_dev = { |
| 45 | .bus_id = "fallback device", | 45 | .bus_id = "fallback device", |
| 46 | .coherent_dma_mask = DMA_32BIT_MASK, | 46 | .coherent_dma_mask = DMA_32BIT_MASK, |
| 47 | .dma_mask = &fallback_dev.coherent_dma_mask, | 47 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, |
| 48 | }; | 48 | }; |
| 49 | EXPORT_SYMBOL(x86_dma_fallback_dev); | ||
| 49 | 50 | ||
| 50 | int dma_set_mask(struct device *dev, u64 mask) | 51 | int dma_set_mask(struct device *dev, u64 mask) |
| 51 | { | 52 | { |
| @@ -133,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len) | |||
| 133 | EXPORT_SYMBOL(iommu_num_pages); | 134 | EXPORT_SYMBOL(iommu_num_pages); |
| 134 | #endif | 135 | #endif |
| 135 | 136 | ||
| 137 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | ||
| 138 | dma_addr_t *dma_addr, gfp_t flag) | ||
| 139 | { | ||
| 140 | unsigned long dma_mask; | ||
| 141 | struct page *page; | ||
| 142 | dma_addr_t addr; | ||
| 143 | |||
| 144 | dma_mask = dma_alloc_coherent_mask(dev, flag); | ||
| 145 | |||
| 146 | flag |= __GFP_ZERO; | ||
| 147 | again: | ||
| 148 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | ||
| 149 | if (!page) | ||
| 150 | return NULL; | ||
| 151 | |||
| 152 | addr = page_to_phys(page); | ||
| 153 | if (!is_buffer_dma_capable(dma_mask, addr, size)) { | ||
| 154 | __free_pages(page, get_order(size)); | ||
| 155 | |||
| 156 | if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { | ||
| 157 | flag = (flag & ~GFP_DMA32) | GFP_DMA; | ||
| 158 | goto again; | ||
| 159 | } | ||
| 160 | |||
| 161 | return NULL; | ||
| 162 | } | ||
| 163 | |||
| 164 | *dma_addr = addr; | ||
| 165 | return page_address(page); | ||
| 166 | } | ||
| 167 | |||
| 136 | /* | 168 | /* |
| 137 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter | 169 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter |
| 138 | * documentation. | 170 | * documentation. |
| @@ -241,147 +273,6 @@ int dma_supported(struct device *dev, u64 mask) | |||
| 241 | } | 273 | } |
| 242 | EXPORT_SYMBOL(dma_supported); | 274 | EXPORT_SYMBOL(dma_supported); |
| 243 | 275 | ||
| 244 | /* Allocate DMA memory on node near device */ | ||
| 245 | static noinline struct page * | ||
| 246 | dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
| 247 | { | ||
| 248 | int node; | ||
| 249 | |||
| 250 | node = dev_to_node(dev); | ||
| 251 | |||
| 252 | return alloc_pages_node(node, gfp, order); | ||
| 253 | } | ||
| 254 | |||
| 255 | /* | ||
| 256 | * Allocate memory for a coherent mapping. | ||
| 257 | */ | ||
| 258 | void * | ||
| 259 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
| 260 | gfp_t gfp) | ||
| 261 | { | ||
| 262 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 263 | void *memory = NULL; | ||
| 264 | struct page *page; | ||
| 265 | unsigned long dma_mask = 0; | ||
| 266 | dma_addr_t bus; | ||
| 267 | int noretry = 0; | ||
| 268 | |||
| 269 | /* ignore region specifiers */ | ||
| 270 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 271 | |||
| 272 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) | ||
| 273 | return memory; | ||
| 274 | |||
| 275 | if (!dev) { | ||
| 276 | dev = &fallback_dev; | ||
| 277 | gfp |= GFP_DMA; | ||
| 278 | } | ||
| 279 | dma_mask = dev->coherent_dma_mask; | ||
| 280 | if (dma_mask == 0) | ||
| 281 | dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; | ||
| 282 | |||
| 283 | /* Device not DMA able */ | ||
| 284 | if (dev->dma_mask == NULL) | ||
| 285 | return NULL; | ||
| 286 | |||
| 287 | /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ | ||
| 288 | if (gfp & __GFP_DMA) | ||
| 289 | noretry = 1; | ||
| 290 | |||
| 291 | #ifdef CONFIG_X86_64 | ||
| 292 | /* Why <=? Even when the mask is smaller than 4GB it is often | ||
| 293 | larger than 16MB and in this case we have a chance of | ||
| 294 | finding fitting memory in the next higher zone first. If | ||
| 295 | not retry with true GFP_DMA. -AK */ | ||
| 296 | if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { | ||
| 297 | gfp |= GFP_DMA32; | ||
| 298 | if (dma_mask < DMA_32BIT_MASK) | ||
| 299 | noretry = 1; | ||
| 300 | } | ||
| 301 | #endif | ||
| 302 | |||
| 303 | again: | ||
| 304 | page = dma_alloc_pages(dev, | ||
| 305 | noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); | ||
| 306 | if (page == NULL) | ||
| 307 | return NULL; | ||
| 308 | |||
| 309 | { | ||
| 310 | int high, mmu; | ||
| 311 | bus = page_to_phys(page); | ||
| 312 | memory = page_address(page); | ||
| 313 | high = (bus + size) >= dma_mask; | ||
| 314 | mmu = high; | ||
| 315 | if (force_iommu && !(gfp & GFP_DMA)) | ||
| 316 | mmu = 1; | ||
| 317 | else if (high) { | ||
| 318 | free_pages((unsigned long)memory, | ||
| 319 | get_order(size)); | ||
| 320 | |||
| 321 | /* Don't use the 16MB ZONE_DMA unless absolutely | ||
| 322 | needed. It's better to use remapping first. */ | ||
| 323 | if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { | ||
| 324 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | ||
| 325 | goto again; | ||
| 326 | } | ||
| 327 | |||
| 328 | /* Let low level make its own zone decisions */ | ||
| 329 | gfp &= ~(GFP_DMA32|GFP_DMA); | ||
| 330 | |||
| 331 | if (ops->alloc_coherent) | ||
| 332 | return ops->alloc_coherent(dev, size, | ||
| 333 | dma_handle, gfp); | ||
| 334 | return NULL; | ||
| 335 | } | ||
| 336 | |||
| 337 | memset(memory, 0, size); | ||
| 338 | if (!mmu) { | ||
| 339 | *dma_handle = bus; | ||
| 340 | return memory; | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 344 | if (ops->alloc_coherent) { | ||
| 345 | free_pages((unsigned long)memory, get_order(size)); | ||
| 346 | gfp &= ~(GFP_DMA|GFP_DMA32); | ||
| 347 | return ops->alloc_coherent(dev, size, dma_handle, gfp); | ||
| 348 | } | ||
| 349 | |||
| 350 | if (ops->map_simple) { | ||
| 351 | *dma_handle = ops->map_simple(dev, virt_to_phys(memory), | ||
| 352 | size, | ||
| 353 | PCI_DMA_BIDIRECTIONAL); | ||
| 354 | if (*dma_handle != bad_dma_address) | ||
| 355 | return memory; | ||
| 356 | } | ||
| 357 | |||
| 358 | if (panic_on_overflow) | ||
| 359 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", | ||
| 360 | (unsigned long)size); | ||
| 361 | free_pages((unsigned long)memory, get_order(size)); | ||
| 362 | return NULL; | ||
| 363 | } | ||
| 364 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
| 365 | |||
| 366 | /* | ||
| 367 | * Unmap coherent memory. | ||
| 368 | * The caller must ensure that the device has finished accessing the mapping. | ||
| 369 | */ | ||
| 370 | void dma_free_coherent(struct device *dev, size_t size, | ||
| 371 | void *vaddr, dma_addr_t bus) | ||
| 372 | { | ||
| 373 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 374 | |||
| 375 | int order = get_order(size); | ||
| 376 | WARN_ON(irqs_disabled()); /* for portability */ | ||
| 377 | if (dma_release_from_coherent(dev, order, vaddr)) | ||
| 378 | return; | ||
| 379 | if (ops->unmap_single) | ||
| 380 | ops->unmap_single(dev, bus, size, 0); | ||
| 381 | free_pages((unsigned long)vaddr, order); | ||
| 382 | } | ||
| 383 | EXPORT_SYMBOL(dma_free_coherent); | ||
| 384 | |||
| 385 | static int __init pci_iommu_init(void) | 276 | static int __init pci_iommu_init(void) |
| 386 | { | 277 | { |
| 387 | calgary_iommu_init(); | 278 | calgary_iommu_init(); |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 1a895a582534..145f1c83369f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
| @@ -27,8 +27,8 @@ | |||
| 27 | #include <linux/scatterlist.h> | 27 | #include <linux/scatterlist.h> |
| 28 | #include <linux/iommu-helper.h> | 28 | #include <linux/iommu-helper.h> |
| 29 | #include <linux/sysdev.h> | 29 | #include <linux/sysdev.h> |
| 30 | #include <linux/io.h> | ||
| 30 | #include <asm/atomic.h> | 31 | #include <asm/atomic.h> |
| 31 | #include <asm/io.h> | ||
| 32 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
| 33 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
| 34 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
| @@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved; | |||
| 80 | AGPEXTERN __u32 *agp_gatt_table; | 80 | AGPEXTERN __u32 *agp_gatt_table; |
| 81 | 81 | ||
| 82 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | 82 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ |
| 83 | static int need_flush; /* global flush state. set for each gart wrap */ | 83 | static bool need_flush; /* global flush state. set for each gart wrap */ |
| 84 | 84 | ||
| 85 | static unsigned long alloc_iommu(struct device *dev, int size, | 85 | static unsigned long alloc_iommu(struct device *dev, int size, |
| 86 | unsigned long align_mask) | 86 | unsigned long align_mask) |
| @@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, | |||
| 98 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, | 98 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, |
| 99 | size, base_index, boundary_size, align_mask); | 99 | size, base_index, boundary_size, align_mask); |
| 100 | if (offset == -1) { | 100 | if (offset == -1) { |
| 101 | need_flush = 1; | 101 | need_flush = true; |
| 102 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, | 102 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, |
| 103 | size, base_index, boundary_size, | 103 | size, base_index, boundary_size, |
| 104 | align_mask); | 104 | align_mask); |
| @@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size, | |||
| 107 | next_bit = offset+size; | 107 | next_bit = offset+size; |
| 108 | if (next_bit >= iommu_pages) { | 108 | if (next_bit >= iommu_pages) { |
| 109 | next_bit = 0; | 109 | next_bit = 0; |
| 110 | need_flush = 1; | 110 | need_flush = true; |
| 111 | } | 111 | } |
| 112 | } | 112 | } |
| 113 | if (iommu_fullflush) | 113 | if (iommu_fullflush) |
| 114 | need_flush = 1; | 114 | need_flush = true; |
| 115 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 115 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
| 116 | 116 | ||
| 117 | return offset; | 117 | return offset; |
| @@ -136,7 +136,7 @@ static void flush_gart(void) | |||
| 136 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 136 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
| 137 | if (need_flush) { | 137 | if (need_flush) { |
| 138 | k8_flush_garts(); | 138 | k8_flush_garts(); |
| 139 | need_flush = 0; | 139 | need_flush = false; |
| 140 | } | 140 | } |
| 141 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 141 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
| 142 | } | 142 | } |
| @@ -175,7 +175,8 @@ static void dump_leak(void) | |||
| 175 | iommu_leak_pages); | 175 | iommu_leak_pages); |
| 176 | for (i = 0; i < iommu_leak_pages; i += 2) { | 176 | for (i = 0; i < iommu_leak_pages; i += 2) { |
| 177 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); | 177 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); |
| 178 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); | 178 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], |
| 179 | 0); | ||
| 179 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); | 180 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); |
| 180 | } | 181 | } |
| 181 | printk(KERN_DEBUG "\n"); | 182 | printk(KERN_DEBUG "\n"); |
| @@ -214,24 +215,14 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
| 214 | static inline int | 215 | static inline int |
| 215 | need_iommu(struct device *dev, unsigned long addr, size_t size) | 216 | need_iommu(struct device *dev, unsigned long addr, size_t size) |
| 216 | { | 217 | { |
| 217 | u64 mask = *dev->dma_mask; | 218 | return force_iommu || |
| 218 | int high = addr + size > mask; | 219 | !is_buffer_dma_capable(*dev->dma_mask, addr, size); |
| 219 | int mmu = high; | ||
| 220 | |||
| 221 | if (force_iommu) | ||
| 222 | mmu = 1; | ||
| 223 | |||
| 224 | return mmu; | ||
| 225 | } | 220 | } |
| 226 | 221 | ||
| 227 | static inline int | 222 | static inline int |
| 228 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 223 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
| 229 | { | 224 | { |
| 230 | u64 mask = *dev->dma_mask; | 225 | return !is_buffer_dma_capable(*dev->dma_mask, addr, size); |
| 231 | int high = addr + size > mask; | ||
| 232 | int mmu = high; | ||
| 233 | |||
| 234 | return mmu; | ||
| 235 | } | 226 | } |
| 236 | 227 | ||
| 237 | /* Map a single continuous physical area into the IOMMU. | 228 | /* Map a single continuous physical area into the IOMMU. |
| @@ -261,20 +252,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
| 261 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | 252 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); |
| 262 | } | 253 | } |
| 263 | 254 | ||
| 264 | static dma_addr_t | ||
| 265 | gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) | ||
| 266 | { | ||
| 267 | dma_addr_t map; | ||
| 268 | unsigned long align_mask; | ||
| 269 | |||
| 270 | align_mask = (1UL << get_order(size)) - 1; | ||
| 271 | map = dma_map_area(dev, paddr, size, dir, align_mask); | ||
| 272 | |||
| 273 | flush_gart(); | ||
| 274 | |||
| 275 | return map; | ||
| 276 | } | ||
| 277 | |||
| 278 | /* Map a single area into the IOMMU */ | 255 | /* Map a single area into the IOMMU */ |
| 279 | static dma_addr_t | 256 | static dma_addr_t |
| 280 | gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) | 257 | gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) |
| @@ -282,7 +259,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) | |||
| 282 | unsigned long bus; | 259 | unsigned long bus; |
| 283 | 260 | ||
| 284 | if (!dev) | 261 | if (!dev) |
| 285 | dev = &fallback_dev; | 262 | dev = &x86_dma_fallback_dev; |
| 286 | 263 | ||
| 287 | if (!need_iommu(dev, paddr, size)) | 264 | if (!need_iommu(dev, paddr, size)) |
| 288 | return paddr; | 265 | return paddr; |
| @@ -434,7 +411,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
| 434 | return 0; | 411 | return 0; |
| 435 | 412 | ||
| 436 | if (!dev) | 413 | if (!dev) |
| 437 | dev = &fallback_dev; | 414 | dev = &x86_dma_fallback_dev; |
| 438 | 415 | ||
| 439 | out = 0; | 416 | out = 0; |
| 440 | start = 0; | 417 | start = 0; |
| @@ -506,6 +483,46 @@ error: | |||
| 506 | return 0; | 483 | return 0; |
| 507 | } | 484 | } |
| 508 | 485 | ||
| 486 | /* allocate and map a coherent mapping */ | ||
| 487 | static void * | ||
| 488 | gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, | ||
| 489 | gfp_t flag) | ||
| 490 | { | ||
| 491 | dma_addr_t paddr; | ||
| 492 | unsigned long align_mask; | ||
| 493 | struct page *page; | ||
| 494 | |||
| 495 | if (force_iommu && !(flag & GFP_DMA)) { | ||
| 496 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 497 | page = alloc_pages(flag | __GFP_ZERO, get_order(size)); | ||
| 498 | if (!page) | ||
| 499 | return NULL; | ||
| 500 | |||
| 501 | align_mask = (1UL << get_order(size)) - 1; | ||
| 502 | paddr = dma_map_area(dev, page_to_phys(page), size, | ||
| 503 | DMA_BIDIRECTIONAL, align_mask); | ||
| 504 | |||
| 505 | flush_gart(); | ||
| 506 | if (paddr != bad_dma_address) { | ||
| 507 | *dma_addr = paddr; | ||
| 508 | return page_address(page); | ||
| 509 | } | ||
| 510 | __free_pages(page, get_order(size)); | ||
| 511 | } else | ||
| 512 | return dma_generic_alloc_coherent(dev, size, dma_addr, flag); | ||
| 513 | |||
| 514 | return NULL; | ||
| 515 | } | ||
| 516 | |||
| 517 | /* free a coherent mapping */ | ||
| 518 | static void | ||
| 519 | gart_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
| 520 | dma_addr_t dma_addr) | ||
| 521 | { | ||
| 522 | gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); | ||
| 523 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 524 | } | ||
| 525 | |||
| 509 | static int no_agp; | 526 | static int no_agp; |
| 510 | 527 | ||
| 511 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 528 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
| @@ -656,13 +673,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 656 | info->aper_size = aper_size >> 20; | 673 | info->aper_size = aper_size >> 20; |
| 657 | 674 | ||
| 658 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); | 675 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); |
| 659 | gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); | 676 | gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
| 677 | get_order(gatt_size)); | ||
| 660 | if (!gatt) | 678 | if (!gatt) |
| 661 | panic("Cannot allocate GATT table"); | 679 | panic("Cannot allocate GATT table"); |
| 662 | if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) | 680 | if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) |
| 663 | panic("Could not set GART PTEs to uncacheable pages"); | 681 | panic("Could not set GART PTEs to uncacheable pages"); |
| 664 | 682 | ||
| 665 | memset(gatt, 0, gatt_size); | ||
| 666 | agp_gatt_table = gatt; | 683 | agp_gatt_table = gatt; |
| 667 | 684 | ||
| 668 | enable_gart_translations(); | 685 | enable_gart_translations(); |
| @@ -671,7 +688,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 671 | if (!error) | 688 | if (!error) |
| 672 | error = sysdev_register(&device_gart); | 689 | error = sysdev_register(&device_gart); |
| 673 | if (error) | 690 | if (error) |
| 674 | panic("Could not register gart_sysdev -- would corrupt data on next suspend"); | 691 | panic("Could not register gart_sysdev -- " |
| 692 | "would corrupt data on next suspend"); | ||
| 675 | 693 | ||
| 676 | flush_gart(); | 694 | flush_gart(); |
| 677 | 695 | ||
| @@ -687,20 +705,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 687 | return -1; | 705 | return -1; |
| 688 | } | 706 | } |
| 689 | 707 | ||
| 690 | extern int agp_amd64_init(void); | ||
| 691 | |||
| 692 | static struct dma_mapping_ops gart_dma_ops = { | 708 | static struct dma_mapping_ops gart_dma_ops = { |
| 693 | .map_single = gart_map_single, | 709 | .map_single = gart_map_single, |
| 694 | .map_simple = gart_map_simple, | ||
| 695 | .unmap_single = gart_unmap_single, | 710 | .unmap_single = gart_unmap_single, |
| 696 | .sync_single_for_cpu = NULL, | ||
| 697 | .sync_single_for_device = NULL, | ||
| 698 | .sync_single_range_for_cpu = NULL, | ||
| 699 | .sync_single_range_for_device = NULL, | ||
| 700 | .sync_sg_for_cpu = NULL, | ||
| 701 | .sync_sg_for_device = NULL, | ||
| 702 | .map_sg = gart_map_sg, | 711 | .map_sg = gart_map_sg, |
| 703 | .unmap_sg = gart_unmap_sg, | 712 | .unmap_sg = gart_unmap_sg, |
| 713 | .alloc_coherent = gart_alloc_coherent, | ||
| 714 | .free_coherent = gart_free_coherent, | ||
| 704 | }; | 715 | }; |
| 705 | 716 | ||
| 706 | void gart_iommu_shutdown(void) | 717 | void gart_iommu_shutdown(void) |
| @@ -760,8 +771,8 @@ void __init gart_iommu_init(void) | |||
| 760 | (no_agp && init_k8_gatt(&info) < 0)) { | 771 | (no_agp && init_k8_gatt(&info) < 0)) { |
| 761 | if (max_pfn > MAX_DMA32_PFN) { | 772 | if (max_pfn > MAX_DMA32_PFN) { |
| 762 | printk(KERN_WARNING "More than 4GB of memory " | 773 | printk(KERN_WARNING "More than 4GB of memory " |
| 763 | "but GART IOMMU not available.\n" | 774 | "but GART IOMMU not available.\n"); |
| 764 | KERN_WARNING "falling back to iommu=soft.\n"); | 775 | printk(KERN_WARNING "falling back to iommu=soft.\n"); |
| 765 | } | 776 | } |
| 766 | return; | 777 | return; |
| 767 | } | 778 | } |
| @@ -779,19 +790,16 @@ void __init gart_iommu_init(void) | |||
| 779 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 790 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
| 780 | iommu_pages = iommu_size >> PAGE_SHIFT; | 791 | iommu_pages = iommu_size >> PAGE_SHIFT; |
| 781 | 792 | ||
| 782 | iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, | 793 | iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, |
| 783 | get_order(iommu_pages/8)); | 794 | get_order(iommu_pages/8)); |
| 784 | if (!iommu_gart_bitmap) | 795 | if (!iommu_gart_bitmap) |
| 785 | panic("Cannot allocate iommu bitmap\n"); | 796 | panic("Cannot allocate iommu bitmap\n"); |
| 786 | memset(iommu_gart_bitmap, 0, iommu_pages/8); | ||
| 787 | 797 | ||
| 788 | #ifdef CONFIG_IOMMU_LEAK | 798 | #ifdef CONFIG_IOMMU_LEAK |
| 789 | if (leak_trace) { | 799 | if (leak_trace) { |
| 790 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, | 800 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, |
| 791 | get_order(iommu_pages*sizeof(void *))); | 801 | get_order(iommu_pages*sizeof(void *))); |
| 792 | if (iommu_leak_tab) | 802 | if (!iommu_leak_tab) |
| 793 | memset(iommu_leak_tab, 0, iommu_pages * 8); | ||
| 794 | else | ||
| 795 | printk(KERN_DEBUG | 803 | printk(KERN_DEBUG |
| 796 | "PCI-DMA: Cannot allocate leak trace area\n"); | 804 | "PCI-DMA: Cannot allocate leak trace area\n"); |
| 797 | } | 805 | } |
| @@ -801,7 +809,7 @@ void __init gart_iommu_init(void) | |||
| 801 | * Out of IOMMU space handling. | 809 | * Out of IOMMU space handling. |
| 802 | * Reserve some invalid pages at the beginning of the GART. | 810 | * Reserve some invalid pages at the beginning of the GART. |
| 803 | */ | 811 | */ |
| 804 | set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | 812 | iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); |
| 805 | 813 | ||
| 806 | agp_memory_reserved = iommu_size; | 814 | agp_memory_reserved = iommu_size; |
| 807 | printk(KERN_INFO | 815 | printk(KERN_INFO |
| @@ -859,7 +867,8 @@ void __init gart_parse_options(char *p) | |||
| 859 | if (!strncmp(p, "leak", 4)) { | 867 | if (!strncmp(p, "leak", 4)) { |
| 860 | leak_trace = 1; | 868 | leak_trace = 1; |
| 861 | p += 4; | 869 | p += 4; |
| 862 | if (*p == '=') ++p; | 870 | if (*p == '=') |
| 871 | ++p; | ||
| 863 | if (isdigit(*p) && get_option(&p, &arg)) | 872 | if (isdigit(*p) && get_option(&p, &arg)) |
| 864 | iommu_leak_pages = arg; | 873 | iommu_leak_pages = arg; |
| 865 | } | 874 | } |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 3f91f71cdc3e..c70ab5a5d4c8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
| @@ -14,7 +14,7 @@ | |||
| 14 | static int | 14 | static int |
| 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) | 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
| 16 | { | 16 | { |
| 17 | if (hwdev && bus + size > *hwdev->dma_mask) { | 17 | if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { |
| 18 | if (*hwdev->dma_mask >= DMA_32BIT_MASK) | 18 | if (*hwdev->dma_mask >= DMA_32BIT_MASK) |
| 19 | printk(KERN_ERR | 19 | printk(KERN_ERR |
| 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", | 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", |
| @@ -72,7 +72,15 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
| 72 | return nents; | 72 | return nents; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
| 76 | dma_addr_t dma_addr) | ||
| 77 | { | ||
| 78 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 79 | } | ||
| 80 | |||
| 75 | struct dma_mapping_ops nommu_dma_ops = { | 81 | struct dma_mapping_ops nommu_dma_ops = { |
| 82 | .alloc_coherent = dma_generic_alloc_coherent, | ||
| 83 | .free_coherent = nommu_free_coherent, | ||
| 76 | .map_single = nommu_map_single, | 84 | .map_single = nommu_map_single, |
| 77 | .map_sg = nommu_map_sg, | 85 | .map_sg = nommu_map_sg, |
| 78 | .is_phys = 1, | 86 | .is_phys = 1, |
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c3edcdc08e72..6c4c1c3c50ee 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
| @@ -80,7 +80,7 @@ static long list_size; | |||
| 80 | 80 | ||
| 81 | static void domain_remove_dev_info(struct dmar_domain *domain); | 81 | static void domain_remove_dev_info(struct dmar_domain *domain); |
| 82 | 82 | ||
| 83 | static int dmar_disabled; | 83 | int dmar_disabled; |
| 84 | static int __initdata dmar_map_gfx = 1; | 84 | static int __initdata dmar_map_gfx = 1; |
| 85 | static int dmar_forcedac; | 85 | static int dmar_forcedac; |
| 86 | static int intel_iommu_strict; | 86 | static int intel_iommu_strict; |
diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h index 783f43e58052..041d0db7da27 100644 --- a/include/asm-x86/amd_iommu.h +++ b/include/asm-x86/amd_iommu.h | |||
| @@ -20,10 +20,13 @@ | |||
| 20 | #ifndef ASM_X86__AMD_IOMMU_H | 20 | #ifndef ASM_X86__AMD_IOMMU_H |
| 21 | #define ASM_X86__AMD_IOMMU_H | 21 | #define ASM_X86__AMD_IOMMU_H |
| 22 | 22 | ||
| 23 | #include <linux/irqreturn.h> | ||
| 24 | |||
| 23 | #ifdef CONFIG_AMD_IOMMU | 25 | #ifdef CONFIG_AMD_IOMMU |
| 24 | extern int amd_iommu_init(void); | 26 | extern int amd_iommu_init(void); |
| 25 | extern int amd_iommu_init_dma_ops(void); | 27 | extern int amd_iommu_init_dma_ops(void); |
| 26 | extern void amd_iommu_detect(void); | 28 | extern void amd_iommu_detect(void); |
| 29 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | ||
| 27 | #else | 30 | #else |
| 28 | static inline int amd_iommu_init(void) { return -ENODEV; } | 31 | static inline int amd_iommu_init(void) { return -ENODEV; } |
| 29 | static inline void amd_iommu_detect(void) { } | 32 | static inline void amd_iommu_detect(void) { } |
diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index 1ffa4e53c989..b3085869a17b 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | /* Capability offsets used by the driver */ | 37 | /* Capability offsets used by the driver */ |
| 38 | #define MMIO_CAP_HDR_OFFSET 0x00 | 38 | #define MMIO_CAP_HDR_OFFSET 0x00 |
| 39 | #define MMIO_RANGE_OFFSET 0x0c | 39 | #define MMIO_RANGE_OFFSET 0x0c |
| 40 | #define MMIO_MISC_OFFSET 0x10 | ||
| 40 | 41 | ||
| 41 | /* Masks, shifts and macros to parse the device range capability */ | 42 | /* Masks, shifts and macros to parse the device range capability */ |
| 42 | #define MMIO_RANGE_LD_MASK 0xff000000 | 43 | #define MMIO_RANGE_LD_MASK 0xff000000 |
| @@ -48,6 +49,7 @@ | |||
| 48 | #define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) | 49 | #define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) |
| 49 | #define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) | 50 | #define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) |
| 50 | #define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) | 51 | #define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) |
| 52 | #define MMIO_MSI_NUM(x) ((x) & 0x1f) | ||
| 51 | 53 | ||
| 52 | /* Flag masks for the AMD IOMMU exclusion range */ | 54 | /* Flag masks for the AMD IOMMU exclusion range */ |
| 53 | #define MMIO_EXCL_ENABLE_MASK 0x01ULL | 55 | #define MMIO_EXCL_ENABLE_MASK 0x01ULL |
| @@ -69,6 +71,25 @@ | |||
| 69 | /* MMIO status bits */ | 71 | /* MMIO status bits */ |
| 70 | #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 | 72 | #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 |
| 71 | 73 | ||
| 74 | /* event logging constants */ | ||
| 75 | #define EVENT_ENTRY_SIZE 0x10 | ||
| 76 | #define EVENT_TYPE_SHIFT 28 | ||
| 77 | #define EVENT_TYPE_MASK 0xf | ||
| 78 | #define EVENT_TYPE_ILL_DEV 0x1 | ||
| 79 | #define EVENT_TYPE_IO_FAULT 0x2 | ||
| 80 | #define EVENT_TYPE_DEV_TAB_ERR 0x3 | ||
| 81 | #define EVENT_TYPE_PAGE_TAB_ERR 0x4 | ||
| 82 | #define EVENT_TYPE_ILL_CMD 0x5 | ||
| 83 | #define EVENT_TYPE_CMD_HARD_ERR 0x6 | ||
| 84 | #define EVENT_TYPE_IOTLB_INV_TO 0x7 | ||
| 85 | #define EVENT_TYPE_INV_DEV_REQ 0x8 | ||
| 86 | #define EVENT_DEVID_MASK 0xffff | ||
| 87 | #define EVENT_DEVID_SHIFT 0 | ||
| 88 | #define EVENT_DOMID_MASK 0xffff | ||
| 89 | #define EVENT_DOMID_SHIFT 0 | ||
| 90 | #define EVENT_FLAGS_MASK 0xfff | ||
| 91 | #define EVENT_FLAGS_SHIFT 0x10 | ||
| 92 | |||
| 72 | /* feature control bits */ | 93 | /* feature control bits */ |
| 73 | #define CONTROL_IOMMU_EN 0x00ULL | 94 | #define CONTROL_IOMMU_EN 0x00ULL |
| 74 | #define CONTROL_HT_TUN_EN 0x01ULL | 95 | #define CONTROL_HT_TUN_EN 0x01ULL |
| @@ -109,6 +130,8 @@ | |||
| 109 | #define DEV_ENTRY_NMI_PASS 0xba | 130 | #define DEV_ENTRY_NMI_PASS 0xba |
| 110 | #define DEV_ENTRY_LINT0_PASS 0xbe | 131 | #define DEV_ENTRY_LINT0_PASS 0xbe |
| 111 | #define DEV_ENTRY_LINT1_PASS 0xbf | 132 | #define DEV_ENTRY_LINT1_PASS 0xbf |
| 133 | #define DEV_ENTRY_MODE_MASK 0x07 | ||
| 134 | #define DEV_ENTRY_MODE_SHIFT 0x09 | ||
| 112 | 135 | ||
| 113 | /* constants to configure the command buffer */ | 136 | /* constants to configure the command buffer */ |
| 114 | #define CMD_BUFFER_SIZE 8192 | 137 | #define CMD_BUFFER_SIZE 8192 |
| @@ -116,6 +139,10 @@ | |||
| 116 | #define MMIO_CMD_SIZE_SHIFT 56 | 139 | #define MMIO_CMD_SIZE_SHIFT 56 |
| 117 | #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) | 140 | #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) |
| 118 | 141 | ||
| 142 | /* constants for event buffer handling */ | ||
| 143 | #define EVT_BUFFER_SIZE 8192 /* 512 entries */ | ||
| 144 | #define EVT_LEN_MASK (0x9ULL << 56) | ||
| 145 | |||
| 119 | #define PAGE_MODE_1_LEVEL 0x01 | 146 | #define PAGE_MODE_1_LEVEL 0x01 |
| 120 | #define PAGE_MODE_2_LEVEL 0x02 | 147 | #define PAGE_MODE_2_LEVEL 0x02 |
| 121 | #define PAGE_MODE_3_LEVEL 0x03 | 148 | #define PAGE_MODE_3_LEVEL 0x03 |
| @@ -134,6 +161,7 @@ | |||
| 134 | #define IOMMU_MAP_SIZE_L3 (1ULL << 39) | 161 | #define IOMMU_MAP_SIZE_L3 (1ULL << 39) |
| 135 | 162 | ||
| 136 | #define IOMMU_PTE_P (1ULL << 0) | 163 | #define IOMMU_PTE_P (1ULL << 0) |
| 164 | #define IOMMU_PTE_TV (1ULL << 1) | ||
| 137 | #define IOMMU_PTE_U (1ULL << 59) | 165 | #define IOMMU_PTE_U (1ULL << 59) |
| 138 | #define IOMMU_PTE_FC (1ULL << 60) | 166 | #define IOMMU_PTE_FC (1ULL << 60) |
| 139 | #define IOMMU_PTE_IR (1ULL << 61) | 167 | #define IOMMU_PTE_IR (1ULL << 61) |
| @@ -159,6 +187,9 @@ | |||
| 159 | 187 | ||
| 160 | #define MAX_DOMAIN_ID 65536 | 188 | #define MAX_DOMAIN_ID 65536 |
| 161 | 189 | ||
| 190 | /* FIXME: move this macro to <linux/pci.h> */ | ||
| 191 | #define PCI_BUS(x) (((x) >> 8) & 0xff) | ||
| 192 | |||
| 162 | /* | 193 | /* |
| 163 | * This structure contains generic data for IOMMU protection domains | 194 | * This structure contains generic data for IOMMU protection domains |
| 164 | * independent of their use. | 195 | * independent of their use. |
| @@ -196,6 +227,15 @@ struct dma_ops_domain { | |||
| 196 | * just calculate its address in constant time. | 227 | * just calculate its address in constant time. |
| 197 | */ | 228 | */ |
| 198 | u64 **pte_pages; | 229 | u64 **pte_pages; |
| 230 | |||
| 231 | /* This will be set to true when TLB needs to be flushed */ | ||
| 232 | bool need_flush; | ||
| 233 | |||
| 234 | /* | ||
| 235 | * if this is a preallocated domain, keep the device for which it was | ||
| 236 | * preallocated in this variable | ||
| 237 | */ | ||
| 238 | u16 target_dev; | ||
| 199 | }; | 239 | }; |
| 200 | 240 | ||
| 201 | /* | 241 | /* |
| @@ -208,8 +248,9 @@ struct amd_iommu { | |||
| 208 | /* locks the accesses to the hardware */ | 248 | /* locks the accesses to the hardware */ |
| 209 | spinlock_t lock; | 249 | spinlock_t lock; |
| 210 | 250 | ||
| 211 | /* device id of this IOMMU */ | 251 | /* Pointer to PCI device of this IOMMU */ |
| 212 | u16 devid; | 252 | struct pci_dev *dev; |
| 253 | |||
| 213 | /* | 254 | /* |
| 214 | * Capability pointer. There could be more than one IOMMU per PCI | 255 | * Capability pointer. There could be more than one IOMMU per PCI |
| 215 | * device function if there are more than one AMD IOMMU capability | 256 | * device function if there are more than one AMD IOMMU capability |
| @@ -225,6 +266,9 @@ struct amd_iommu { | |||
| 225 | /* capabilities of that IOMMU read from ACPI */ | 266 | /* capabilities of that IOMMU read from ACPI */ |
| 226 | u32 cap; | 267 | u32 cap; |
| 227 | 268 | ||
| 269 | /* pci domain of this IOMMU */ | ||
| 270 | u16 pci_seg; | ||
| 271 | |||
| 228 | /* first device this IOMMU handles. read from PCI */ | 272 | /* first device this IOMMU handles. read from PCI */ |
| 229 | u16 first_device; | 273 | u16 first_device; |
| 230 | /* last device this IOMMU handles. read from PCI */ | 274 | /* last device this IOMMU handles. read from PCI */ |
| @@ -240,9 +284,19 @@ struct amd_iommu { | |||
| 240 | /* size of command buffer */ | 284 | /* size of command buffer */ |
| 241 | u32 cmd_buf_size; | 285 | u32 cmd_buf_size; |
| 242 | 286 | ||
| 287 | /* event buffer virtual address */ | ||
| 288 | u8 *evt_buf; | ||
| 289 | /* size of event buffer */ | ||
| 290 | u32 evt_buf_size; | ||
| 291 | /* MSI number for event interrupt */ | ||
| 292 | u16 evt_msi_num; | ||
| 293 | |||
| 243 | /* if one, we need to send a completion wait command */ | 294 | /* if one, we need to send a completion wait command */ |
| 244 | int need_sync; | 295 | int need_sync; |
| 245 | 296 | ||
| 297 | /* true if interrupts for this IOMMU are already enabled */ | ||
| 298 | bool int_enabled; | ||
| 299 | |||
| 246 | /* default dma_ops domain for that IOMMU */ | 300 | /* default dma_ops domain for that IOMMU */ |
| 247 | struct dma_ops_domain *default_dom; | 301 | struct dma_ops_domain *default_dom; |
| 248 | }; | 302 | }; |
| @@ -322,6 +376,12 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap; | |||
| 322 | /* will be 1 if device isolation is enabled */ | 376 | /* will be 1 if device isolation is enabled */ |
| 323 | extern int amd_iommu_isolate; | 377 | extern int amd_iommu_isolate; |
| 324 | 378 | ||
| 379 | /* | ||
| 380 | * If true, the addresses will be flushed on unmap time, not when | ||
| 381 | * they are reused | ||
| 382 | */ | ||
| 383 | extern bool amd_iommu_unmap_flush; | ||
| 384 | |||
| 325 | /* takes a PCI device id and prints it out in a readable form */ | 385 | /* takes a PCI device id and prints it out in a readable form */ |
| 326 | static inline void print_devid(u16 devid, int nl) | 386 | static inline void print_devid(u16 devid, int nl) |
| 327 | { | 387 | { |
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index 61989b93b475..451a74762bd4 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h | |||
| @@ -424,16 +424,6 @@ static inline int fls(int x) | |||
| 424 | 424 | ||
| 425 | #undef ADDR | 425 | #undef ADDR |
| 426 | 426 | ||
| 427 | static inline void set_bit_string(unsigned long *bitmap, | ||
| 428 | unsigned long i, int len) | ||
| 429 | { | ||
| 430 | unsigned long end = i + len; | ||
| 431 | while (i < end) { | ||
| 432 | __set_bit(i, bitmap); | ||
| 433 | i++; | ||
| 434 | } | ||
| 435 | } | ||
| 436 | |||
| 437 | #ifdef __KERNEL__ | 427 | #ifdef __KERNEL__ |
| 438 | 428 | ||
| 439 | #include <asm-generic/bitops/sched.h> | 429 | #include <asm-generic/bitops/sched.h> |
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index 5d200e78bd81..219c33d6361c 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h | |||
| @@ -9,12 +9,12 @@ | |||
| 9 | #include <linux/scatterlist.h> | 9 | #include <linux/scatterlist.h> |
| 10 | #include <asm/io.h> | 10 | #include <asm/io.h> |
| 11 | #include <asm/swiotlb.h> | 11 | #include <asm/swiotlb.h> |
| 12 | #include <asm-generic/dma-coherent.h> | ||
| 12 | 13 | ||
| 13 | extern dma_addr_t bad_dma_address; | 14 | extern dma_addr_t bad_dma_address; |
| 14 | extern int iommu_merge; | 15 | extern int iommu_merge; |
| 15 | extern struct device fallback_dev; | 16 | extern struct device x86_dma_fallback_dev; |
| 16 | extern int panic_on_overflow; | 17 | extern int panic_on_overflow; |
| 17 | extern int force_iommu; | ||
| 18 | 18 | ||
| 19 | struct dma_mapping_ops { | 19 | struct dma_mapping_ops { |
| 20 | int (*mapping_error)(struct device *dev, | 20 | int (*mapping_error)(struct device *dev, |
| @@ -25,9 +25,6 @@ struct dma_mapping_ops { | |||
| 25 | void *vaddr, dma_addr_t dma_handle); | 25 | void *vaddr, dma_addr_t dma_handle); |
| 26 | dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr, | 26 | dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr, |
| 27 | size_t size, int direction); | 27 | size_t size, int direction); |
| 28 | /* like map_single, but doesn't check the device mask */ | ||
| 29 | dma_addr_t (*map_simple)(struct device *hwdev, phys_addr_t ptr, | ||
| 30 | size_t size, int direction); | ||
| 31 | void (*unmap_single)(struct device *dev, dma_addr_t addr, | 28 | void (*unmap_single)(struct device *dev, dma_addr_t addr, |
| 32 | size_t size, int direction); | 29 | size_t size, int direction); |
| 33 | void (*sync_single_for_cpu)(struct device *hwdev, | 30 | void (*sync_single_for_cpu)(struct device *hwdev, |
| @@ -68,7 +65,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) | |||
| 68 | return dma_ops; | 65 | return dma_ops; |
| 69 | else | 66 | else |
| 70 | return dev->archdata.dma_ops; | 67 | return dev->archdata.dma_ops; |
| 71 | #endif | 68 | #endif /* ASM_X86__DMA_MAPPING_H */ |
| 72 | } | 69 | } |
| 73 | 70 | ||
| 74 | /* Make sure we keep the same behaviour */ | 71 | /* Make sure we keep the same behaviour */ |
| @@ -87,17 +84,14 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | |||
| 87 | 84 | ||
| 88 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | 85 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) |
| 89 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | 86 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) |
| 90 | 87 | #define dma_is_consistent(d, h) (1) | |
| 91 | void *dma_alloc_coherent(struct device *dev, size_t size, | ||
| 92 | dma_addr_t *dma_handle, gfp_t flag); | ||
| 93 | |||
| 94 | void dma_free_coherent(struct device *dev, size_t size, | ||
| 95 | void *vaddr, dma_addr_t dma_handle); | ||
| 96 | |||
| 97 | 88 | ||
| 98 | extern int dma_supported(struct device *hwdev, u64 mask); | 89 | extern int dma_supported(struct device *hwdev, u64 mask); |
| 99 | extern int dma_set_mask(struct device *dev, u64 mask); | 90 | extern int dma_set_mask(struct device *dev, u64 mask); |
| 100 | 91 | ||
| 92 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | ||
| 93 | dma_addr_t *dma_addr, gfp_t flag); | ||
| 94 | |||
| 101 | static inline dma_addr_t | 95 | static inline dma_addr_t |
| 102 | dma_map_single(struct device *hwdev, void *ptr, size_t size, | 96 | dma_map_single(struct device *hwdev, void *ptr, size_t size, |
| 103 | int direction) | 97 | int direction) |
| @@ -247,7 +241,68 @@ static inline int dma_get_cache_alignment(void) | |||
| 247 | return boot_cpu_data.x86_clflush_size; | 241 | return boot_cpu_data.x86_clflush_size; |
| 248 | } | 242 | } |
| 249 | 243 | ||
| 250 | #define dma_is_consistent(d, h) (1) | 244 | static inline unsigned long dma_alloc_coherent_mask(struct device *dev, |
| 245 | gfp_t gfp) | ||
| 246 | { | ||
| 247 | unsigned long dma_mask = 0; | ||
| 251 | 248 | ||
| 252 | #include <asm-generic/dma-coherent.h> | 249 | dma_mask = dev->coherent_dma_mask; |
| 253 | #endif /* ASM_X86__DMA_MAPPING_H */ | 250 | if (!dma_mask) |
| 251 | dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; | ||
| 252 | |||
| 253 | return dma_mask; | ||
| 254 | } | ||
| 255 | |||
| 256 | static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) | ||
| 257 | { | ||
| 258 | #ifdef CONFIG_X86_64 | ||
| 259 | unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); | ||
| 260 | |||
| 261 | if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) | ||
| 262 | gfp |= GFP_DMA32; | ||
| 263 | #endif | ||
| 264 | return gfp; | ||
| 265 | } | ||
| 266 | |||
| 267 | static inline void * | ||
| 268 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
| 269 | gfp_t gfp) | ||
| 270 | { | ||
| 271 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 272 | void *memory; | ||
| 273 | |||
| 274 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 275 | |||
| 276 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) | ||
| 277 | return memory; | ||
| 278 | |||
| 279 | if (!dev) { | ||
| 280 | dev = &x86_dma_fallback_dev; | ||
| 281 | gfp |= GFP_DMA; | ||
| 282 | } | ||
| 283 | |||
| 284 | if (!is_device_dma_capable(dev)) | ||
| 285 | return NULL; | ||
| 286 | |||
| 287 | if (!ops->alloc_coherent) | ||
| 288 | return NULL; | ||
| 289 | |||
| 290 | return ops->alloc_coherent(dev, size, dma_handle, | ||
| 291 | dma_alloc_coherent_gfp_flags(dev, gfp)); | ||
| 292 | } | ||
| 293 | |||
| 294 | static inline void dma_free_coherent(struct device *dev, size_t size, | ||
| 295 | void *vaddr, dma_addr_t bus) | ||
| 296 | { | ||
| 297 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 298 | |||
| 299 | WARN_ON(irqs_disabled()); /* for portability */ | ||
| 300 | |||
| 301 | if (dma_release_from_coherent(dev, get_order(size), vaddr)) | ||
| 302 | return; | ||
| 303 | |||
| 304 | if (ops->free_coherent) | ||
| 305 | ops->free_coherent(dev, size, vaddr, bus); | ||
| 306 | } | ||
| 307 | |||
| 308 | #endif | ||
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index baa54faba892..605edb39ef9e 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h | |||
| @@ -29,6 +29,8 @@ extern int fix_aperture; | |||
| 29 | #define AMD64_GARTCACHECTL 0x9c | 29 | #define AMD64_GARTCACHECTL 0x9c |
| 30 | #define AMD64_GARTEN (1<<0) | 30 | #define AMD64_GARTEN (1<<0) |
| 31 | 31 | ||
| 32 | extern int agp_amd64_init(void); | ||
| 33 | |||
| 32 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) | 34 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) |
| 33 | { | 35 | { |
| 34 | u32 tmp, ctl; | 36 | u32 tmp, ctl; |
diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index e86f44148c66..546ad3110fea 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h | |||
| @@ -6,6 +6,7 @@ extern void no_iommu_init(void); | |||
| 6 | extern struct dma_mapping_ops nommu_dma_ops; | 6 | extern struct dma_mapping_ops nommu_dma_ops; |
| 7 | extern int force_iommu, no_iommu; | 7 | extern int force_iommu, no_iommu; |
| 8 | extern int iommu_detected; | 8 | extern int iommu_detected; |
| 9 | extern int dmar_disabled; | ||
| 9 | 10 | ||
| 10 | extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); | 11 | extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); |
| 11 | 12 | ||
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 952e0f857ac9..ba9114ec5d3a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h | |||
| @@ -48,6 +48,11 @@ static inline int is_device_dma_capable(struct device *dev) | |||
| 48 | return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE; | 48 | return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size) | ||
| 52 | { | ||
| 53 | return addr + size <= mask; | ||
| 54 | } | ||
| 55 | |||
| 51 | #ifdef CONFIG_HAS_DMA | 56 | #ifdef CONFIG_HAS_DMA |
| 52 | #include <asm/dma-mapping.h> | 57 | #include <asm/dma-mapping.h> |
| 53 | #else | 58 | #else |
| @@ -58,6 +63,13 @@ static inline int is_device_dma_capable(struct device *dev) | |||
| 58 | #define dma_sync_single dma_sync_single_for_cpu | 63 | #define dma_sync_single dma_sync_single_for_cpu |
| 59 | #define dma_sync_sg dma_sync_sg_for_cpu | 64 | #define dma_sync_sg dma_sync_sg_for_cpu |
| 60 | 65 | ||
| 66 | static inline u64 dma_get_mask(struct device *dev) | ||
| 67 | { | ||
| 68 | if (dev && dev->dma_mask && *dev->dma_mask) | ||
| 69 | return *dev->dma_mask; | ||
| 70 | return DMA_32BIT_MASK; | ||
| 71 | } | ||
| 72 | |||
| 61 | extern u64 dma_get_required_mask(struct device *dev); | 73 | extern u64 dma_get_required_mask(struct device *dev); |
| 62 | 74 | ||
| 63 | static inline unsigned int dma_get_max_seg_size(struct device *dev) | 75 | static inline unsigned int dma_get_max_seg_size(struct device *dev) |
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index c975caf75385..a6d0586e2bf7 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h | |||
| @@ -1,6 +1,20 @@ | |||
| 1 | #ifndef _LINUX_IOMMU_HELPER_H | ||
| 2 | #define _LINUX_IOMMU_HELPER_H | ||
| 3 | |||
| 4 | static inline unsigned long iommu_device_max_index(unsigned long size, | ||
| 5 | unsigned long offset, | ||
| 6 | u64 dma_mask) | ||
| 7 | { | ||
| 8 | if (size + offset > dma_mask) | ||
| 9 | return dma_mask - offset + 1; | ||
| 10 | else | ||
| 11 | return size; | ||
| 12 | } | ||
| 13 | |||
| 1 | extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, | 14 | extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, |
| 2 | unsigned long shift, | 15 | unsigned long shift, |
| 3 | unsigned long boundary_size); | 16 | unsigned long boundary_size); |
| 17 | extern void iommu_area_reserve(unsigned long *map, unsigned long i, int len); | ||
| 4 | extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, | 18 | extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, |
| 5 | unsigned long start, unsigned int nr, | 19 | unsigned long start, unsigned int nr, |
| 6 | unsigned long shift, | 20 | unsigned long shift, |
| @@ -8,3 +22,5 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, | |||
| 8 | unsigned long align_mask); | 22 | unsigned long align_mask); |
| 9 | extern void iommu_area_free(unsigned long *map, unsigned long start, | 23 | extern void iommu_area_free(unsigned long *map, unsigned long start, |
| 10 | unsigned int nr); | 24 | unsigned int nr); |
| 25 | |||
| 26 | #endif | ||
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f1624b396754..c114103af987 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h | |||
| @@ -497,6 +497,16 @@ | |||
| 497 | #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 | 497 | #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 |
| 498 | #define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 | 498 | #define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 |
| 499 | #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 | 499 | #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 |
| 500 | #define PCI_DEVICE_ID_AMD_10H_NB_HT 0x1200 | ||
| 501 | #define PCI_DEVICE_ID_AMD_10H_NB_MAP 0x1201 | ||
| 502 | #define PCI_DEVICE_ID_AMD_10H_NB_DRAM 0x1202 | ||
| 503 | #define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203 | ||
| 504 | #define PCI_DEVICE_ID_AMD_10H_NB_LINK 0x1204 | ||
| 505 | #define PCI_DEVICE_ID_AMD_11H_NB_HT 0x1300 | ||
| 506 | #define PCI_DEVICE_ID_AMD_11H_NB_MAP 0x1301 | ||
| 507 | #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 | ||
| 508 | #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 | ||
| 509 | #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 | ||
| 500 | #define PCI_DEVICE_ID_AMD_LANCE 0x2000 | 510 | #define PCI_DEVICE_ID_AMD_LANCE 0x2000 |
| 501 | #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 | 511 | #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 |
| 502 | #define PCI_DEVICE_ID_AMD_SCSI 0x2020 | 512 | #define PCI_DEVICE_ID_AMD_SCSI 0x2020 |
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c index c1d4d5b4c61c..f013a0c2e111 100644 --- a/kernel/dma-coherent.c +++ b/kernel/dma-coherent.c | |||
| @@ -124,6 +124,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size, | |||
| 124 | } | 124 | } |
| 125 | return (mem != NULL); | 125 | return (mem != NULL); |
| 126 | } | 126 | } |
| 127 | EXPORT_SYMBOL(dma_alloc_from_coherent); | ||
| 127 | 128 | ||
| 128 | /** | 129 | /** |
| 129 | * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool | 130 | * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool |
| @@ -151,3 +152,4 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr) | |||
| 151 | } | 152 | } |
| 152 | return 0; | 153 | return 0; |
| 153 | } | 154 | } |
| 155 | EXPORT_SYMBOL(dma_release_from_coherent); | ||
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index a3b8d4c3f77a..5d90074dca75 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c | |||
| @@ -30,8 +30,7 @@ again: | |||
| 30 | return index; | 30 | return index; |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | static inline void set_bit_area(unsigned long *map, unsigned long i, | 33 | void iommu_area_reserve(unsigned long *map, unsigned long i, int len) |
| 34 | int len) | ||
| 35 | { | 34 | { |
| 36 | unsigned long end = i + len; | 35 | unsigned long end = i + len; |
| 37 | while (i < end) { | 36 | while (i < end) { |
| @@ -64,7 +63,7 @@ again: | |||
| 64 | start = index + 1; | 63 | start = index + 1; |
| 65 | goto again; | 64 | goto again; |
| 66 | } | 65 | } |
| 67 | set_bit_area(map, index, nr); | 66 | iommu_area_reserve(map, index, nr); |
| 68 | } | 67 | } |
| 69 | return index; | 68 | return index; |
| 70 | } | 69 | } |
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 8826fdf0f180..f8eebd489149 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
| @@ -274,13 +274,14 @@ cleanup1: | |||
| 274 | } | 274 | } |
| 275 | 275 | ||
| 276 | static int | 276 | static int |
| 277 | address_needs_mapping(struct device *hwdev, dma_addr_t addr) | 277 | address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) |
| 278 | { | 278 | { |
| 279 | dma_addr_t mask = 0xffffffff; | 279 | return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); |
| 280 | /* If the device has a mask, use it, otherwise default to 32 bits */ | 280 | } |
| 281 | if (hwdev && hwdev->dma_mask) | 281 | |
| 282 | mask = *hwdev->dma_mask; | 282 | static int is_swiotlb_buffer(char *addr) |
| 283 | return (addr & ~mask) != 0; | 283 | { |
| 284 | return addr >= io_tlb_start && addr < io_tlb_end; | ||
| 284 | } | 285 | } |
| 285 | 286 | ||
| 286 | /* | 287 | /* |
| @@ -467,15 +468,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
| 467 | void *ret; | 468 | void *ret; |
| 468 | int order = get_order(size); | 469 | int order = get_order(size); |
| 469 | 470 | ||
| 470 | /* | ||
| 471 | * XXX fix me: the DMA API should pass us an explicit DMA mask | ||
| 472 | * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32 | ||
| 473 | * bit range instead of a 16MB one). | ||
| 474 | */ | ||
| 475 | flags |= GFP_DMA; | ||
| 476 | |||
| 477 | ret = (void *)__get_free_pages(flags, order); | 471 | ret = (void *)__get_free_pages(flags, order); |
| 478 | if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) { | 472 | if (ret && address_needs_mapping(hwdev, virt_to_bus(ret), size)) { |
| 479 | /* | 473 | /* |
| 480 | * The allocated memory isn't reachable by the device. | 474 | * The allocated memory isn't reachable by the device. |
| 481 | * Fall back on swiotlb_map_single(). | 475 | * Fall back on swiotlb_map_single(). |
| @@ -490,19 +484,16 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
| 490 | * swiotlb_map_single(), which will grab memory from | 484 | * swiotlb_map_single(), which will grab memory from |
| 491 | * the lowest available address range. | 485 | * the lowest available address range. |
| 492 | */ | 486 | */ |
| 493 | dma_addr_t handle; | 487 | ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE); |
| 494 | handle = swiotlb_map_single(hwdev, NULL, size, DMA_FROM_DEVICE); | 488 | if (!ret) |
| 495 | if (swiotlb_dma_mapping_error(hwdev, handle)) | ||
| 496 | return NULL; | 489 | return NULL; |
| 497 | |||
| 498 | ret = bus_to_virt(handle); | ||
| 499 | } | 490 | } |
| 500 | 491 | ||
| 501 | memset(ret, 0, size); | 492 | memset(ret, 0, size); |
| 502 | dev_addr = virt_to_bus(ret); | 493 | dev_addr = virt_to_bus(ret); |
| 503 | 494 | ||
| 504 | /* Confirm address can be DMA'd by device */ | 495 | /* Confirm address can be DMA'd by device */ |
| 505 | if (address_needs_mapping(hwdev, dev_addr)) { | 496 | if (address_needs_mapping(hwdev, dev_addr, size)) { |
| 506 | printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", | 497 | printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", |
| 507 | (unsigned long long)*hwdev->dma_mask, | 498 | (unsigned long long)*hwdev->dma_mask, |
| 508 | (unsigned long long)dev_addr); | 499 | (unsigned long long)dev_addr); |
| @@ -518,12 +509,11 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, | |||
| 518 | dma_addr_t dma_handle) | 509 | dma_addr_t dma_handle) |
| 519 | { | 510 | { |
| 520 | WARN_ON(irqs_disabled()); | 511 | WARN_ON(irqs_disabled()); |
| 521 | if (!(vaddr >= (void *)io_tlb_start | 512 | if (!is_swiotlb_buffer(vaddr)) |
| 522 | && vaddr < (void *)io_tlb_end)) | ||
| 523 | free_pages((unsigned long) vaddr, get_order(size)); | 513 | free_pages((unsigned long) vaddr, get_order(size)); |
| 524 | else | 514 | else |
| 525 | /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ | 515 | /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ |
| 526 | swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); | 516 | unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); |
| 527 | } | 517 | } |
| 528 | 518 | ||
| 529 | static void | 519 | static void |
| @@ -567,7 +557,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, | |||
| 567 | * we can safely return the device addr and not worry about bounce | 557 | * we can safely return the device addr and not worry about bounce |
| 568 | * buffering it. | 558 | * buffering it. |
| 569 | */ | 559 | */ |
| 570 | if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) | 560 | if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force) |
| 571 | return dev_addr; | 561 | return dev_addr; |
| 572 | 562 | ||
| 573 | /* | 563 | /* |
| @@ -584,7 +574,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, | |||
| 584 | /* | 574 | /* |
| 585 | * Ensure that the address returned is DMA'ble | 575 | * Ensure that the address returned is DMA'ble |
| 586 | */ | 576 | */ |
| 587 | if (address_needs_mapping(hwdev, dev_addr)) | 577 | if (address_needs_mapping(hwdev, dev_addr, size)) |
| 588 | panic("map_single: bounce buffer is not DMA'ble"); | 578 | panic("map_single: bounce buffer is not DMA'ble"); |
| 589 | 579 | ||
| 590 | return dev_addr; | 580 | return dev_addr; |
| @@ -612,7 +602,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, | |||
| 612 | char *dma_addr = bus_to_virt(dev_addr); | 602 | char *dma_addr = bus_to_virt(dev_addr); |
| 613 | 603 | ||
| 614 | BUG_ON(dir == DMA_NONE); | 604 | BUG_ON(dir == DMA_NONE); |
| 615 | if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) | 605 | if (is_swiotlb_buffer(dma_addr)) |
| 616 | unmap_single(hwdev, dma_addr, size, dir); | 606 | unmap_single(hwdev, dma_addr, size, dir); |
| 617 | else if (dir == DMA_FROM_DEVICE) | 607 | else if (dir == DMA_FROM_DEVICE) |
| 618 | dma_mark_clean(dma_addr, size); | 608 | dma_mark_clean(dma_addr, size); |
| @@ -642,7 +632,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, | |||
| 642 | char *dma_addr = bus_to_virt(dev_addr); | 632 | char *dma_addr = bus_to_virt(dev_addr); |
| 643 | 633 | ||
| 644 | BUG_ON(dir == DMA_NONE); | 634 | BUG_ON(dir == DMA_NONE); |
| 645 | if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) | 635 | if (is_swiotlb_buffer(dma_addr)) |
| 646 | sync_single(hwdev, dma_addr, size, dir, target); | 636 | sync_single(hwdev, dma_addr, size, dir, target); |
| 647 | else if (dir == DMA_FROM_DEVICE) | 637 | else if (dir == DMA_FROM_DEVICE) |
| 648 | dma_mark_clean(dma_addr, size); | 638 | dma_mark_clean(dma_addr, size); |
| @@ -673,7 +663,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, | |||
| 673 | char *dma_addr = bus_to_virt(dev_addr) + offset; | 663 | char *dma_addr = bus_to_virt(dev_addr) + offset; |
| 674 | 664 | ||
| 675 | BUG_ON(dir == DMA_NONE); | 665 | BUG_ON(dir == DMA_NONE); |
| 676 | if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) | 666 | if (is_swiotlb_buffer(dma_addr)) |
| 677 | sync_single(hwdev, dma_addr, size, dir, target); | 667 | sync_single(hwdev, dma_addr, size, dir, target); |
| 678 | else if (dir == DMA_FROM_DEVICE) | 668 | else if (dir == DMA_FROM_DEVICE) |
| 679 | dma_mark_clean(dma_addr, size); | 669 | dma_mark_clean(dma_addr, size); |
| @@ -727,7 +717,8 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, | |||
| 727 | for_each_sg(sgl, sg, nelems, i) { | 717 | for_each_sg(sgl, sg, nelems, i) { |
| 728 | addr = SG_ENT_VIRT_ADDRESS(sg); | 718 | addr = SG_ENT_VIRT_ADDRESS(sg); |
| 729 | dev_addr = virt_to_bus(addr); | 719 | dev_addr = virt_to_bus(addr); |
| 730 | if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { | 720 | if (swiotlb_force || |
| 721 | address_needs_mapping(hwdev, dev_addr, sg->length)) { | ||
| 731 | void *map = map_single(hwdev, addr, sg->length, dir); | 722 | void *map = map_single(hwdev, addr, sg->length, dir); |
| 732 | if (!map) { | 723 | if (!map) { |
| 733 | /* Don't panic here, we expect map_sg users | 724 | /* Don't panic here, we expect map_sg users |
