aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/amd_iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/amd_iommu.c')
-rw-r--r--arch/x86/kernel/amd_iommu.c1247
1 files changed, 669 insertions, 578 deletions
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0285521e0a9..32fb09102a1 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -28,6 +28,7 @@
28#include <asm/proto.h> 28#include <asm/proto.h>
29#include <asm/iommu.h> 29#include <asm/iommu.h>
30#include <asm/gart.h> 30#include <asm/gart.h>
31#include <asm/amd_iommu_proto.h>
31#include <asm/amd_iommu_types.h> 32#include <asm/amd_iommu_types.h>
32#include <asm/amd_iommu.h> 33#include <asm/amd_iommu.h>
33 34
@@ -56,20 +57,115 @@ struct iommu_cmd {
56 u32 data[4]; 57 u32 data[4];
57}; 58};
58 59
59static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
60 struct unity_map_entry *e);
61static struct dma_ops_domain *find_protection_domain(u16 devid);
62static u64 *alloc_pte(struct protection_domain *domain,
63 unsigned long address, int end_lvl,
64 u64 **pte_page, gfp_t gfp);
65static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
66 unsigned long start_page,
67 unsigned int pages);
68static void reset_iommu_command_buffer(struct amd_iommu *iommu); 60static void reset_iommu_command_buffer(struct amd_iommu *iommu);
69static u64 *fetch_pte(struct protection_domain *domain,
70 unsigned long address, int map_size);
71static void update_domain(struct protection_domain *domain); 61static void update_domain(struct protection_domain *domain);
72 62
63/****************************************************************************
64 *
65 * Helper functions
66 *
67 ****************************************************************************/
68
69static inline u16 get_device_id(struct device *dev)
70{
71 struct pci_dev *pdev = to_pci_dev(dev);
72
73 return calc_devid(pdev->bus->number, pdev->devfn);
74}
75
76static struct iommu_dev_data *get_dev_data(struct device *dev)
77{
78 return dev->archdata.iommu;
79}
80
81/*
82 * In this function the list of preallocated protection domains is traversed to
83 * find the domain for a specific device
84 */
85static struct dma_ops_domain *find_protection_domain(u16 devid)
86{
87 struct dma_ops_domain *entry, *ret = NULL;
88 unsigned long flags;
89 u16 alias = amd_iommu_alias_table[devid];
90
91 if (list_empty(&iommu_pd_list))
92 return NULL;
93
94 spin_lock_irqsave(&iommu_pd_list_lock, flags);
95
96 list_for_each_entry(entry, &iommu_pd_list, list) {
97 if (entry->target_dev == devid ||
98 entry->target_dev == alias) {
99 ret = entry;
100 break;
101 }
102 }
103
104 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
105
106 return ret;
107}
108
109/*
110 * This function checks if the driver got a valid device from the caller to
111 * avoid dereferencing invalid pointers.
112 */
113static bool check_device(struct device *dev)
114{
115 u16 devid;
116
117 if (!dev || !dev->dma_mask)
118 return false;
119
120 /* No device or no PCI device */
121 if (!dev || dev->bus != &pci_bus_type)
122 return false;
123
124 devid = get_device_id(dev);
125
126 /* Out of our scope? */
127 if (devid > amd_iommu_last_bdf)
128 return false;
129
130 if (amd_iommu_rlookup_table[devid] == NULL)
131 return false;
132
133 return true;
134}
135
136static int iommu_init_device(struct device *dev)
137{
138 struct iommu_dev_data *dev_data;
139 struct pci_dev *pdev;
140 u16 devid, alias;
141
142 if (dev->archdata.iommu)
143 return 0;
144
145 dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
146 if (!dev_data)
147 return -ENOMEM;
148
149 dev_data->dev = dev;
150
151 devid = get_device_id(dev);
152 alias = amd_iommu_alias_table[devid];
153 pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff);
154 if (pdev)
155 dev_data->alias = &pdev->dev;
156
157 atomic_set(&dev_data->bind, 0);
158
159 dev->archdata.iommu = dev_data;
160
161
162 return 0;
163}
164
165static void iommu_uninit_device(struct device *dev)
166{
167 kfree(dev->archdata.iommu);
168}
73#ifdef CONFIG_AMD_IOMMU_STATS 169#ifdef CONFIG_AMD_IOMMU_STATS
74 170
75/* 171/*
@@ -90,7 +186,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem);
90DECLARE_STATS_COUNTER(total_map_requests); 186DECLARE_STATS_COUNTER(total_map_requests);
91 187
92static struct dentry *stats_dir; 188static struct dentry *stats_dir;
93static struct dentry *de_isolate;
94static struct dentry *de_fflush; 189static struct dentry *de_fflush;
95 190
96static void amd_iommu_stats_add(struct __iommu_counter *cnt) 191static void amd_iommu_stats_add(struct __iommu_counter *cnt)
@@ -108,9 +203,6 @@ static void amd_iommu_stats_init(void)
108 if (stats_dir == NULL) 203 if (stats_dir == NULL)
109 return; 204 return;
110 205
111 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
112 (u32 *)&amd_iommu_isolate);
113
114 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, 206 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
115 (u32 *)&amd_iommu_unmap_flush); 207 (u32 *)&amd_iommu_unmap_flush);
116 208
@@ -130,12 +222,6 @@ static void amd_iommu_stats_init(void)
130 222
131#endif 223#endif
132 224
133/* returns !0 if the IOMMU is caching non-present entries in its TLB */
134static int iommu_has_npcache(struct amd_iommu *iommu)
135{
136 return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
137}
138
139/**************************************************************************** 225/****************************************************************************
140 * 226 *
141 * Interrupt handling functions 227 * Interrupt handling functions
@@ -199,6 +285,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
199 break; 285 break;
200 case EVENT_TYPE_ILL_CMD: 286 case EVENT_TYPE_ILL_CMD:
201 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); 287 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
288 iommu->reset_in_progress = true;
202 reset_iommu_command_buffer(iommu); 289 reset_iommu_command_buffer(iommu);
203 dump_command(address); 290 dump_command(address);
204 break; 291 break;
@@ -321,11 +408,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
321 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; 408 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
322 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); 409 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
323 410
324 if (unlikely(i == EXIT_LOOP_COUNT)) { 411 if (unlikely(i == EXIT_LOOP_COUNT))
325 spin_unlock(&iommu->lock); 412 iommu->reset_in_progress = true;
326 reset_iommu_command_buffer(iommu);
327 spin_lock(&iommu->lock);
328 }
329} 413}
330 414
331/* 415/*
@@ -372,26 +456,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
372out: 456out:
373 spin_unlock_irqrestore(&iommu->lock, flags); 457 spin_unlock_irqrestore(&iommu->lock, flags);
374 458
459 if (iommu->reset_in_progress)
460 reset_iommu_command_buffer(iommu);
461
375 return 0; 462 return 0;
376} 463}
377 464
465static void iommu_flush_complete(struct protection_domain *domain)
466{
467 int i;
468
469 for (i = 0; i < amd_iommus_present; ++i) {
470 if (!domain->dev_iommu[i])
471 continue;
472
473 /*
474 * Devices of this domain are behind this IOMMU
475 * We need to wait for completion of all commands.
476 */
477 iommu_completion_wait(amd_iommus[i]);
478 }
479}
480
378/* 481/*
379 * Command send function for invalidating a device table entry 482 * Command send function for invalidating a device table entry
380 */ 483 */
381static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) 484static int iommu_flush_device(struct device *dev)
382{ 485{
486 struct amd_iommu *iommu;
383 struct iommu_cmd cmd; 487 struct iommu_cmd cmd;
384 int ret; 488 u16 devid;
385 489
386 BUG_ON(iommu == NULL); 490 devid = get_device_id(dev);
491 iommu = amd_iommu_rlookup_table[devid];
387 492
493 /* Build command */
388 memset(&cmd, 0, sizeof(cmd)); 494 memset(&cmd, 0, sizeof(cmd));
389 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); 495 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
390 cmd.data[0] = devid; 496 cmd.data[0] = devid;
391 497
392 ret = iommu_queue_command(iommu, &cmd); 498 return iommu_queue_command(iommu, &cmd);
393
394 return ret;
395} 499}
396 500
397static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, 501static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
@@ -430,11 +534,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
430 * It invalidates a single PTE if the range to flush is within a single 534 * It invalidates a single PTE if the range to flush is within a single
431 * page. Otherwise it flushes the whole TLB of the IOMMU. 535 * page. Otherwise it flushes the whole TLB of the IOMMU.
432 */ 536 */
433static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, 537static void __iommu_flush_pages(struct protection_domain *domain,
434 u64 address, size_t size) 538 u64 address, size_t size, int pde)
435{ 539{
436 int s = 0; 540 int s = 0, i;
437 unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); 541 unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);
438 542
439 address &= PAGE_MASK; 543 address &= PAGE_MASK;
440 544
@@ -447,142 +551,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
447 s = 1; 551 s = 1;
448 } 552 }
449 553
450 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
451 554
452 return 0; 555 for (i = 0; i < amd_iommus_present; ++i) {
556 if (!domain->dev_iommu[i])
557 continue;
558
559 /*
560 * Devices of this domain are behind this IOMMU
561 * We need a TLB flush
562 */
563 iommu_queue_inv_iommu_pages(amd_iommus[i], address,
564 domain->id, pde, s);
565 }
566
567 return;
453} 568}
454 569
455/* Flush the whole IO/TLB for a given protection domain */ 570static void iommu_flush_pages(struct protection_domain *domain,
456static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) 571 u64 address, size_t size)
457{ 572{
458 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 573 __iommu_flush_pages(domain, address, size, 0);
459 574}
460 INC_STATS_COUNTER(domain_flush_single);
461 575
462 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 576/* Flush the whole IO/TLB for a given protection domain */
577static void iommu_flush_tlb(struct protection_domain *domain)
578{
579 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
463} 580}
464 581
465/* Flush the whole IO/TLB for a given protection domain - including PDE */ 582/* Flush the whole IO/TLB for a given protection domain - including PDE */
466static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) 583static void iommu_flush_tlb_pde(struct protection_domain *domain)
467{ 584{
468 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 585 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
469
470 INC_STATS_COUNTER(domain_flush_single);
471
472 iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
473} 586}
474 587
588
475/* 589/*
476 * This function flushes one domain on one IOMMU 590 * This function flushes the DTEs for all devices in domain
477 */ 591 */
478static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) 592static void iommu_flush_domain_devices(struct protection_domain *domain)
479{ 593{
480 struct iommu_cmd cmd; 594 struct iommu_dev_data *dev_data;
481 unsigned long flags; 595 unsigned long flags;
482 596
483 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 597 spin_lock_irqsave(&domain->lock, flags);
484 domid, 1, 1);
485 598
486 spin_lock_irqsave(&iommu->lock, flags); 599 list_for_each_entry(dev_data, &domain->dev_list, list)
487 __iommu_queue_command(iommu, &cmd); 600 iommu_flush_device(dev_data->dev);
488 __iommu_completion_wait(iommu); 601
489 __iommu_wait_for_completion(iommu); 602 spin_unlock_irqrestore(&domain->lock, flags);
490 spin_unlock_irqrestore(&iommu->lock, flags);
491} 603}
492 604
493static void flush_all_domains_on_iommu(struct amd_iommu *iommu) 605static void iommu_flush_all_domain_devices(void)
494{ 606{
495 int i; 607 struct protection_domain *domain;
608 unsigned long flags;
496 609
497 for (i = 1; i < MAX_DOMAIN_ID; ++i) { 610 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
498 if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) 611
499 continue; 612 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
500 flush_domain_on_iommu(iommu, i); 613 iommu_flush_domain_devices(domain);
614 iommu_flush_complete(domain);
501 } 615 }
502 616
617 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
618}
619
620void amd_iommu_flush_all_devices(void)
621{
622 iommu_flush_all_domain_devices();
503} 623}
504 624
505/* 625/*
506 * This function is used to flush the IO/TLB for a given protection domain 626 * This function uses heavy locking and may disable irqs for some time. But
507 * on every IOMMU in the system 627 * this is no issue because it is only called during resume.
508 */ 628 */
509static void iommu_flush_domain(u16 domid) 629void amd_iommu_flush_all_domains(void)
510{ 630{
511 struct amd_iommu *iommu; 631 struct protection_domain *domain;
632 unsigned long flags;
512 633
513 INC_STATS_COUNTER(domain_flush_all); 634 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
514 635
515 for_each_iommu(iommu) 636 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
516 flush_domain_on_iommu(iommu, domid); 637 spin_lock(&domain->lock);
638 iommu_flush_tlb_pde(domain);
639 iommu_flush_complete(domain);
640 spin_unlock(&domain->lock);
641 }
642
643 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
517} 644}
518 645
519void amd_iommu_flush_all_domains(void) 646static void reset_iommu_command_buffer(struct amd_iommu *iommu)
520{ 647{
521 struct amd_iommu *iommu; 648 pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
522 649
523 for_each_iommu(iommu) 650 if (iommu->reset_in_progress)
524 flush_all_domains_on_iommu(iommu); 651 panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
652
653 amd_iommu_reset_cmd_buffer(iommu);
654 amd_iommu_flush_all_devices();
655 amd_iommu_flush_all_domains();
656
657 iommu->reset_in_progress = false;
525} 658}
526 659
527static void flush_all_devices_for_iommu(struct amd_iommu *iommu) 660/****************************************************************************
661 *
662 * The functions below are used the create the page table mappings for
663 * unity mapped regions.
664 *
665 ****************************************************************************/
666
667/*
668 * This function is used to add another level to an IO page table. Adding
669 * another level increases the size of the address space by 9 bits to a size up
670 * to 64 bits.
671 */
672static bool increase_address_space(struct protection_domain *domain,
673 gfp_t gfp)
528{ 674{
529 int i; 675 u64 *pte;
530 676
531 for (i = 0; i <= amd_iommu_last_bdf; ++i) { 677 if (domain->mode == PAGE_MODE_6_LEVEL)
532 if (iommu != amd_iommu_rlookup_table[i]) 678 /* address space already 64 bit large */
533 continue; 679 return false;
534 680
535 iommu_queue_inv_dev_entry(iommu, i); 681 pte = (void *)get_zeroed_page(gfp);
536 iommu_completion_wait(iommu); 682 if (!pte)
537 } 683 return false;
684
685 *pte = PM_LEVEL_PDE(domain->mode,
686 virt_to_phys(domain->pt_root));
687 domain->pt_root = pte;
688 domain->mode += 1;
689 domain->updated = true;
690
691 return true;
538} 692}
539 693
540static void flush_devices_by_domain(struct protection_domain *domain) 694static u64 *alloc_pte(struct protection_domain *domain,
695 unsigned long address,
696 int end_lvl,
697 u64 **pte_page,
698 gfp_t gfp)
541{ 699{
542 struct amd_iommu *iommu; 700 u64 *pte, *page;
543 int i; 701 int level;
544 702
545 for (i = 0; i <= amd_iommu_last_bdf; ++i) { 703 while (address > PM_LEVEL_SIZE(domain->mode))
546 if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || 704 increase_address_space(domain, gfp);
547 (amd_iommu_pd_table[i] != domain))
548 continue;
549 705
550 iommu = amd_iommu_rlookup_table[i]; 706 level = domain->mode - 1;
551 if (!iommu) 707 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
552 continue;
553 708
554 iommu_queue_inv_dev_entry(iommu, i); 709 while (level > end_lvl) {
555 iommu_completion_wait(iommu); 710 if (!IOMMU_PTE_PRESENT(*pte)) {
711 page = (u64 *)get_zeroed_page(gfp);
712 if (!page)
713 return NULL;
714 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
715 }
716
717 level -= 1;
718
719 pte = IOMMU_PTE_PAGE(*pte);
720
721 if (pte_page && level == end_lvl)
722 *pte_page = pte;
723
724 pte = &pte[PM_LEVEL_INDEX(level, address)];
556 } 725 }
726
727 return pte;
557} 728}
558 729
559static void reset_iommu_command_buffer(struct amd_iommu *iommu) 730/*
731 * This function checks if there is a PTE for a given dma address. If
732 * there is one, it returns the pointer to it.
733 */
734static u64 *fetch_pte(struct protection_domain *domain,
735 unsigned long address, int map_size)
560{ 736{
561 pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); 737 int level;
738 u64 *pte;
562 739
563 if (iommu->reset_in_progress) 740 level = domain->mode - 1;
564 panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); 741 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
565 742
566 iommu->reset_in_progress = true; 743 while (level > map_size) {
744 if (!IOMMU_PTE_PRESENT(*pte))
745 return NULL;
567 746
568 amd_iommu_reset_cmd_buffer(iommu); 747 level -= 1;
569 flush_all_devices_for_iommu(iommu);
570 flush_all_domains_on_iommu(iommu);
571 748
572 iommu->reset_in_progress = false; 749 pte = IOMMU_PTE_PAGE(*pte);
573} 750 pte = &pte[PM_LEVEL_INDEX(level, address)];
574 751
575void amd_iommu_flush_all_devices(void) 752 if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
576{ 753 pte = NULL;
577 flush_devices_by_domain(NULL); 754 break;
578} 755 }
756 }
579 757
580/**************************************************************************** 758 return pte;
581 * 759}
582 * The functions below are used the create the page table mappings for
583 * unity mapped regions.
584 *
585 ****************************************************************************/
586 760
587/* 761/*
588 * Generic mapping functions. It maps a physical address into a DMA 762 * Generic mapping functions. It maps a physical address into a DMA
@@ -654,28 +828,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,
654} 828}
655 829
656/* 830/*
657 * Init the unity mappings for a specific IOMMU in the system
658 *
659 * Basically iterates over all unity mapping entries and applies them to
660 * the default domain DMA of that IOMMU if necessary.
661 */
662static int iommu_init_unity_mappings(struct amd_iommu *iommu)
663{
664 struct unity_map_entry *entry;
665 int ret;
666
667 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
668 if (!iommu_for_unity_map(iommu, entry))
669 continue;
670 ret = dma_ops_unity_map(iommu->default_dom, entry);
671 if (ret)
672 return ret;
673 }
674
675 return 0;
676}
677
678/*
679 * This function actually applies the mapping to the page table of the 831 * This function actually applies the mapping to the page table of the
680 * dma_ops domain. 832 * dma_ops domain.
681 */ 833 */
@@ -704,6 +856,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
704} 856}
705 857
706/* 858/*
859 * Init the unity mappings for a specific IOMMU in the system
860 *
861 * Basically iterates over all unity mapping entries and applies them to
862 * the default domain DMA of that IOMMU if necessary.
863 */
864static int iommu_init_unity_mappings(struct amd_iommu *iommu)
865{
866 struct unity_map_entry *entry;
867 int ret;
868
869 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
870 if (!iommu_for_unity_map(iommu, entry))
871 continue;
872 ret = dma_ops_unity_map(iommu->default_dom, entry);
873 if (ret)
874 return ret;
875 }
876
877 return 0;
878}
879
880/*
707 * Inits the unity mappings required for a specific device 881 * Inits the unity mappings required for a specific device
708 */ 882 */
709static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, 883static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
@@ -740,34 +914,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
740 */ 914 */
741 915
742/* 916/*
743 * This function checks if there is a PTE for a given dma address. If 917 * Used to reserve address ranges in the aperture (e.g. for exclusion
744 * there is one, it returns the pointer to it. 918 * ranges.
745 */ 919 */
746static u64 *fetch_pte(struct protection_domain *domain, 920static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
747 unsigned long address, int map_size) 921 unsigned long start_page,
922 unsigned int pages)
748{ 923{
749 int level; 924 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
750 u64 *pte;
751
752 level = domain->mode - 1;
753 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
754
755 while (level > map_size) {
756 if (!IOMMU_PTE_PRESENT(*pte))
757 return NULL;
758
759 level -= 1;
760 925
761 pte = IOMMU_PTE_PAGE(*pte); 926 if (start_page + pages > last_page)
762 pte = &pte[PM_LEVEL_INDEX(level, address)]; 927 pages = last_page - start_page;
763 928
764 if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { 929 for (i = start_page; i < start_page + pages; ++i) {
765 pte = NULL; 930 int index = i / APERTURE_RANGE_PAGES;
766 break; 931 int page = i % APERTURE_RANGE_PAGES;
767 } 932 __set_bit(page, dom->aperture[index]->bitmap);
768 } 933 }
769
770 return pte;
771} 934}
772 935
773/* 936/*
@@ -775,11 +938,11 @@ static u64 *fetch_pte(struct protection_domain *domain,
775 * aperture in case of dma_ops domain allocation or address allocation 938 * aperture in case of dma_ops domain allocation or address allocation
776 * failure. 939 * failure.
777 */ 940 */
778static int alloc_new_range(struct amd_iommu *iommu, 941static int alloc_new_range(struct dma_ops_domain *dma_dom,
779 struct dma_ops_domain *dma_dom,
780 bool populate, gfp_t gfp) 942 bool populate, gfp_t gfp)
781{ 943{
782 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; 944 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
945 struct amd_iommu *iommu;
783 int i; 946 int i;
784 947
785#ifdef CONFIG_IOMMU_STRESS 948#ifdef CONFIG_IOMMU_STRESS
@@ -819,14 +982,17 @@ static int alloc_new_range(struct amd_iommu *iommu,
819 dma_dom->aperture_size += APERTURE_RANGE_SIZE; 982 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
820 983
821 /* Intialize the exclusion range if necessary */ 984 /* Intialize the exclusion range if necessary */
822 if (iommu->exclusion_start && 985 for_each_iommu(iommu) {
823 iommu->exclusion_start >= dma_dom->aperture[index]->offset && 986 if (iommu->exclusion_start &&
824 iommu->exclusion_start < dma_dom->aperture_size) { 987 iommu->exclusion_start >= dma_dom->aperture[index]->offset
825 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; 988 && iommu->exclusion_start < dma_dom->aperture_size) {
826 int pages = iommu_num_pages(iommu->exclusion_start, 989 unsigned long startpage;
827 iommu->exclusion_length, 990 int pages = iommu_num_pages(iommu->exclusion_start,
828 PAGE_SIZE); 991 iommu->exclusion_length,
829 dma_ops_reserve_addresses(dma_dom, startpage, pages); 992 PAGE_SIZE);
993 startpage = iommu->exclusion_start >> PAGE_SHIFT;
994 dma_ops_reserve_addresses(dma_dom, startpage, pages);
995 }
830 } 996 }
831 997
832 /* 998 /*
@@ -928,7 +1094,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
928 } 1094 }
929 1095
930 if (unlikely(address == -1)) 1096 if (unlikely(address == -1))
931 address = bad_dma_address; 1097 address = DMA_ERROR_CODE;
932 1098
933 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); 1099 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
934 1100
@@ -973,6 +1139,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
973 * 1139 *
974 ****************************************************************************/ 1140 ****************************************************************************/
975 1141
1142/*
1143 * This function adds a protection domain to the global protection domain list
1144 */
1145static void add_domain_to_list(struct protection_domain *domain)
1146{
1147 unsigned long flags;
1148
1149 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1150 list_add(&domain->list, &amd_iommu_pd_list);
1151 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1152}
1153
1154/*
1155 * This function removes a protection domain to the global
1156 * protection domain list
1157 */
1158static void del_domain_from_list(struct protection_domain *domain)
1159{
1160 unsigned long flags;
1161
1162 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1163 list_del(&domain->list);
1164 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1165}
1166
976static u16 domain_id_alloc(void) 1167static u16 domain_id_alloc(void)
977{ 1168{
978 unsigned long flags; 1169 unsigned long flags;
@@ -1000,26 +1191,6 @@ static void domain_id_free(int id)
1000 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1191 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1001} 1192}
1002 1193
1003/*
1004 * Used to reserve address ranges in the aperture (e.g. for exclusion
1005 * ranges.
1006 */
1007static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1008 unsigned long start_page,
1009 unsigned int pages)
1010{
1011 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1012
1013 if (start_page + pages > last_page)
1014 pages = last_page - start_page;
1015
1016 for (i = start_page; i < start_page + pages; ++i) {
1017 int index = i / APERTURE_RANGE_PAGES;
1018 int page = i % APERTURE_RANGE_PAGES;
1019 __set_bit(page, dom->aperture[index]->bitmap);
1020 }
1021}
1022
1023static void free_pagetable(struct protection_domain *domain) 1194static void free_pagetable(struct protection_domain *domain)
1024{ 1195{
1025 int i, j; 1196 int i, j;
@@ -1061,6 +1232,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
1061 if (!dom) 1232 if (!dom)
1062 return; 1233 return;
1063 1234
1235 del_domain_from_list(&dom->domain);
1236
1064 free_pagetable(&dom->domain); 1237 free_pagetable(&dom->domain);
1065 1238
1066 for (i = 0; i < APERTURE_MAX_RANGES; ++i) { 1239 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
@@ -1078,7 +1251,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
1078 * It also intializes the page table and the address allocator data 1251 * It also intializes the page table and the address allocator data
1079 * structures required for the dma_ops interface 1252 * structures required for the dma_ops interface
1080 */ 1253 */
1081static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) 1254static struct dma_ops_domain *dma_ops_domain_alloc(void)
1082{ 1255{
1083 struct dma_ops_domain *dma_dom; 1256 struct dma_ops_domain *dma_dom;
1084 1257
@@ -1091,6 +1264,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
1091 dma_dom->domain.id = domain_id_alloc(); 1264 dma_dom->domain.id = domain_id_alloc();
1092 if (dma_dom->domain.id == 0) 1265 if (dma_dom->domain.id == 0)
1093 goto free_dma_dom; 1266 goto free_dma_dom;
1267 INIT_LIST_HEAD(&dma_dom->domain.dev_list);
1094 dma_dom->domain.mode = PAGE_MODE_2_LEVEL; 1268 dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1095 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 1269 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1096 dma_dom->domain.flags = PD_DMA_OPS_MASK; 1270 dma_dom->domain.flags = PD_DMA_OPS_MASK;
@@ -1101,7 +1275,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
1101 dma_dom->need_flush = false; 1275 dma_dom->need_flush = false;
1102 dma_dom->target_dev = 0xffff; 1276 dma_dom->target_dev = 0xffff;
1103 1277
1104 if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) 1278 add_domain_to_list(&dma_dom->domain);
1279
1280 if (alloc_new_range(dma_dom, true, GFP_KERNEL))
1105 goto free_dma_dom; 1281 goto free_dma_dom;
1106 1282
1107 /* 1283 /*
@@ -1129,22 +1305,6 @@ static bool dma_ops_domain(struct protection_domain *domain)
1129 return domain->flags & PD_DMA_OPS_MASK; 1305 return domain->flags & PD_DMA_OPS_MASK;
1130} 1306}
1131 1307
1132/*
1133 * Find out the protection domain structure for a given PCI device. This
1134 * will give us the pointer to the page table root for example.
1135 */
1136static struct protection_domain *domain_for_device(u16 devid)
1137{
1138 struct protection_domain *dom;
1139 unsigned long flags;
1140
1141 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1142 dom = amd_iommu_pd_table[devid];
1143 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1144
1145 return dom;
1146}
1147
1148static void set_dte_entry(u16 devid, struct protection_domain *domain) 1308static void set_dte_entry(u16 devid, struct protection_domain *domain)
1149{ 1309{
1150 u64 pte_root = virt_to_phys(domain->pt_root); 1310 u64 pte_root = virt_to_phys(domain->pt_root);
@@ -1156,42 +1316,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain)
1156 amd_iommu_dev_table[devid].data[2] = domain->id; 1316 amd_iommu_dev_table[devid].data[2] = domain->id;
1157 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); 1317 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1158 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); 1318 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1319}
1320
1321static void clear_dte_entry(u16 devid)
1322{
1323 /* remove entry from the device table seen by the hardware */
1324 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1325 amd_iommu_dev_table[devid].data[1] = 0;
1326 amd_iommu_dev_table[devid].data[2] = 0;
1159 1327
1160 amd_iommu_pd_table[devid] = domain; 1328 amd_iommu_apply_erratum_63(devid);
1329}
1330
1331static void do_attach(struct device *dev, struct protection_domain *domain)
1332{
1333 struct iommu_dev_data *dev_data;
1334 struct amd_iommu *iommu;
1335 u16 devid;
1336
1337 devid = get_device_id(dev);
1338 iommu = amd_iommu_rlookup_table[devid];
1339 dev_data = get_dev_data(dev);
1340
1341 /* Update data structures */
1342 dev_data->domain = domain;
1343 list_add(&dev_data->list, &domain->dev_list);
1344 set_dte_entry(devid, domain);
1345
1346 /* Do reference counting */
1347 domain->dev_iommu[iommu->index] += 1;
1348 domain->dev_cnt += 1;
1349
1350 /* Flush the DTE entry */
1351 iommu_flush_device(dev);
1352}
1353
1354static void do_detach(struct device *dev)
1355{
1356 struct iommu_dev_data *dev_data;
1357 struct amd_iommu *iommu;
1358 u16 devid;
1359
1360 devid = get_device_id(dev);
1361 iommu = amd_iommu_rlookup_table[devid];
1362 dev_data = get_dev_data(dev);
1363
1364 /* decrease reference counters */
1365 dev_data->domain->dev_iommu[iommu->index] -= 1;
1366 dev_data->domain->dev_cnt -= 1;
1367
1368 /* Update data structures */
1369 dev_data->domain = NULL;
1370 list_del(&dev_data->list);
1371 clear_dte_entry(devid);
1372
1373 /* Flush the DTE entry */
1374 iommu_flush_device(dev);
1161} 1375}
1162 1376
1163/* 1377/*
1164 * If a device is not yet associated with a domain, this function does 1378 * If a device is not yet associated with a domain, this function does
1165 * assigns it visible for the hardware 1379 * assigns it visible for the hardware
1166 */ 1380 */
1167static void __attach_device(struct amd_iommu *iommu, 1381static int __attach_device(struct device *dev,
1168 struct protection_domain *domain, 1382 struct protection_domain *domain)
1169 u16 devid)
1170{ 1383{
1384 struct iommu_dev_data *dev_data, *alias_data;
1385
1386 dev_data = get_dev_data(dev);
1387 alias_data = get_dev_data(dev_data->alias);
1388
1389 if (!alias_data)
1390 return -EINVAL;
1391
1171 /* lock domain */ 1392 /* lock domain */
1172 spin_lock(&domain->lock); 1393 spin_lock(&domain->lock);
1173 1394
1174 /* update DTE entry */ 1395 /* Some sanity checks */
1175 set_dte_entry(devid, domain); 1396 if (alias_data->domain != NULL &&
1397 alias_data->domain != domain)
1398 return -EBUSY;
1176 1399
1177 domain->dev_cnt += 1; 1400 if (dev_data->domain != NULL &&
1401 dev_data->domain != domain)
1402 return -EBUSY;
1403
1404 /* Do real assignment */
1405 if (dev_data->alias != dev) {
1406 alias_data = get_dev_data(dev_data->alias);
1407 if (alias_data->domain == NULL)
1408 do_attach(dev_data->alias, domain);
1409
1410 atomic_inc(&alias_data->bind);
1411 }
1412
1413 if (dev_data->domain == NULL)
1414 do_attach(dev, domain);
1415
1416 atomic_inc(&dev_data->bind);
1178 1417
1179 /* ready */ 1418 /* ready */
1180 spin_unlock(&domain->lock); 1419 spin_unlock(&domain->lock);
1420
1421 return 0;
1181} 1422}
1182 1423
1183/* 1424/*
1184 * If a device is not yet associated with a domain, this function does 1425 * If a device is not yet associated with a domain, this function does
1185 * assigns it visible for the hardware 1426 * assigns it visible for the hardware
1186 */ 1427 */
1187static void attach_device(struct amd_iommu *iommu, 1428static int attach_device(struct device *dev,
1188 struct protection_domain *domain, 1429 struct protection_domain *domain)
1189 u16 devid)
1190{ 1430{
1191 unsigned long flags; 1431 unsigned long flags;
1432 int ret;
1192 1433
1193 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1434 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1194 __attach_device(iommu, domain, devid); 1435 ret = __attach_device(dev, domain);
1195 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1436 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1196 1437
1197 /* 1438 /*
@@ -1199,98 +1440,125 @@ static void attach_device(struct amd_iommu *iommu,
1199 * left the caches in the IOMMU dirty. So we have to flush 1440 * left the caches in the IOMMU dirty. So we have to flush
1200 * here to evict all dirty stuff. 1441 * here to evict all dirty stuff.
1201 */ 1442 */
1202 iommu_queue_inv_dev_entry(iommu, devid); 1443 iommu_flush_tlb_pde(domain);
1203 iommu_flush_tlb_pde(iommu, domain->id); 1444
1445 return ret;
1204} 1446}
1205 1447
1206/* 1448/*
1207 * Removes a device from a protection domain (unlocked) 1449 * Removes a device from a protection domain (unlocked)
1208 */ 1450 */
1209static void __detach_device(struct protection_domain *domain, u16 devid) 1451static void __detach_device(struct device *dev)
1210{ 1452{
1453 struct iommu_dev_data *dev_data = get_dev_data(dev);
1454 struct iommu_dev_data *alias_data;
1455 unsigned long flags;
1211 1456
1212 /* lock domain */ 1457 BUG_ON(!dev_data->domain);
1213 spin_lock(&domain->lock);
1214
1215 /* remove domain from the lookup table */
1216 amd_iommu_pd_table[devid] = NULL;
1217 1458
1218 /* remove entry from the device table seen by the hardware */ 1459 spin_lock_irqsave(&dev_data->domain->lock, flags);
1219 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1220 amd_iommu_dev_table[devid].data[1] = 0;
1221 amd_iommu_dev_table[devid].data[2] = 0;
1222 1460
1223 amd_iommu_apply_erratum_63(devid); 1461 if (dev_data->alias != dev) {
1462 alias_data = get_dev_data(dev_data->alias);
1463 if (atomic_dec_and_test(&alias_data->bind))
1464 do_detach(dev_data->alias);
1465 }
1224 1466
1225 /* decrease reference counter */ 1467 if (atomic_dec_and_test(&dev_data->bind))
1226 domain->dev_cnt -= 1; 1468 do_detach(dev);
1227 1469
1228 /* ready */ 1470 spin_unlock_irqrestore(&dev_data->domain->lock, flags);
1229 spin_unlock(&domain->lock);
1230 1471
1231 /* 1472 /*
1232 * If we run in passthrough mode the device must be assigned to the 1473 * If we run in passthrough mode the device must be assigned to the
1233 * passthrough domain if it is detached from any other domain 1474 * passthrough domain if it is detached from any other domain
1234 */ 1475 */
1235 if (iommu_pass_through) { 1476 if (iommu_pass_through && dev_data->domain == NULL)
1236 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; 1477 __attach_device(dev, pt_domain);
1237 __attach_device(iommu, pt_domain, devid);
1238 }
1239} 1478}
1240 1479
1241/* 1480/*
1242 * Removes a device from a protection domain (with devtable_lock held) 1481 * Removes a device from a protection domain (with devtable_lock held)
1243 */ 1482 */
1244static void detach_device(struct protection_domain *domain, u16 devid) 1483static void detach_device(struct device *dev)
1245{ 1484{
1246 unsigned long flags; 1485 unsigned long flags;
1247 1486
1248 /* lock device table */ 1487 /* lock device table */
1249 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1488 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1250 __detach_device(domain, devid); 1489 __detach_device(dev);
1251 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1490 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1252} 1491}
1253 1492
1493/*
1494 * Find out the protection domain structure for a given PCI device. This
1495 * will give us the pointer to the page table root for example.
1496 */
1497static struct protection_domain *domain_for_device(struct device *dev)
1498{
1499 struct protection_domain *dom;
1500 struct iommu_dev_data *dev_data, *alias_data;
1501 unsigned long flags;
1502 u16 devid, alias;
1503
1504 devid = get_device_id(dev);
1505 alias = amd_iommu_alias_table[devid];
1506 dev_data = get_dev_data(dev);
1507 alias_data = get_dev_data(dev_data->alias);
1508 if (!alias_data)
1509 return NULL;
1510
1511 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1512 dom = dev_data->domain;
1513 if (dom == NULL &&
1514 alias_data->domain != NULL) {
1515 __attach_device(dev, alias_data->domain);
1516 dom = alias_data->domain;
1517 }
1518
1519 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1520
1521 return dom;
1522}
1523
1254static int device_change_notifier(struct notifier_block *nb, 1524static int device_change_notifier(struct notifier_block *nb,
1255 unsigned long action, void *data) 1525 unsigned long action, void *data)
1256{ 1526{
1257 struct device *dev = data; 1527 struct device *dev = data;
1258 struct pci_dev *pdev = to_pci_dev(dev); 1528 u16 devid;
1259 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
1260 struct protection_domain *domain; 1529 struct protection_domain *domain;
1261 struct dma_ops_domain *dma_domain; 1530 struct dma_ops_domain *dma_domain;
1262 struct amd_iommu *iommu; 1531 struct amd_iommu *iommu;
1263 unsigned long flags; 1532 unsigned long flags;
1264 1533
1265 if (devid > amd_iommu_last_bdf) 1534 if (!check_device(dev))
1266 goto out; 1535 return 0;
1267
1268 devid = amd_iommu_alias_table[devid];
1269
1270 iommu = amd_iommu_rlookup_table[devid];
1271 if (iommu == NULL)
1272 goto out;
1273
1274 domain = domain_for_device(devid);
1275 1536
1276 if (domain && !dma_ops_domain(domain)) 1537 devid = get_device_id(dev);
1277 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " 1538 iommu = amd_iommu_rlookup_table[devid];
1278 "to a non-dma-ops domain\n", dev_name(dev));
1279 1539
1280 switch (action) { 1540 switch (action) {
1281 case BUS_NOTIFY_UNBOUND_DRIVER: 1541 case BUS_NOTIFY_UNBOUND_DRIVER:
1542
1543 domain = domain_for_device(dev);
1544
1282 if (!domain) 1545 if (!domain)
1283 goto out; 1546 goto out;
1284 if (iommu_pass_through) 1547 if (iommu_pass_through)
1285 break; 1548 break;
1286 detach_device(domain, devid); 1549 detach_device(dev);
1287 break; 1550 break;
1288 case BUS_NOTIFY_ADD_DEVICE: 1551 case BUS_NOTIFY_ADD_DEVICE:
1552
1553 iommu_init_device(dev);
1554
1555 domain = domain_for_device(dev);
1556
1289 /* allocate a protection domain if a device is added */ 1557 /* allocate a protection domain if a device is added */
1290 dma_domain = find_protection_domain(devid); 1558 dma_domain = find_protection_domain(devid);
1291 if (dma_domain) 1559 if (dma_domain)
1292 goto out; 1560 goto out;
1293 dma_domain = dma_ops_domain_alloc(iommu); 1561 dma_domain = dma_ops_domain_alloc();
1294 if (!dma_domain) 1562 if (!dma_domain)
1295 goto out; 1563 goto out;
1296 dma_domain->target_dev = devid; 1564 dma_domain->target_dev = devid;
@@ -1300,11 +1568,15 @@ static int device_change_notifier(struct notifier_block *nb,
1300 spin_unlock_irqrestore(&iommu_pd_list_lock, flags); 1568 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1301 1569
1302 break; 1570 break;
1571 case BUS_NOTIFY_DEL_DEVICE:
1572
1573 iommu_uninit_device(dev);
1574
1303 default: 1575 default:
1304 goto out; 1576 goto out;
1305 } 1577 }
1306 1578
1307 iommu_queue_inv_dev_entry(iommu, devid); 1579 iommu_flush_device(dev);
1308 iommu_completion_wait(iommu); 1580 iommu_completion_wait(iommu);
1309 1581
1310out: 1582out:
@@ -1322,106 +1594,46 @@ static struct notifier_block device_nb = {
1322 *****************************************************************************/ 1594 *****************************************************************************/
1323 1595
1324/* 1596/*
1325 * This function checks if the driver got a valid device from the caller to
1326 * avoid dereferencing invalid pointers.
1327 */
1328static bool check_device(struct device *dev)
1329{
1330 if (!dev || !dev->dma_mask)
1331 return false;
1332
1333 return true;
1334}
1335
1336/*
1337 * In this function the list of preallocated protection domains is traversed to
1338 * find the domain for a specific device
1339 */
1340static struct dma_ops_domain *find_protection_domain(u16 devid)
1341{
1342 struct dma_ops_domain *entry, *ret = NULL;
1343 unsigned long flags;
1344
1345 if (list_empty(&iommu_pd_list))
1346 return NULL;
1347
1348 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1349
1350 list_for_each_entry(entry, &iommu_pd_list, list) {
1351 if (entry->target_dev == devid) {
1352 ret = entry;
1353 break;
1354 }
1355 }
1356
1357 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1358
1359 return ret;
1360}
1361
1362/*
1363 * In the dma_ops path we only have the struct device. This function 1597 * In the dma_ops path we only have the struct device. This function
1364 * finds the corresponding IOMMU, the protection domain and the 1598 * finds the corresponding IOMMU, the protection domain and the
1365 * requestor id for a given device. 1599 * requestor id for a given device.
1366 * If the device is not yet associated with a domain this is also done 1600 * If the device is not yet associated with a domain this is also done
1367 * in this function. 1601 * in this function.
1368 */ 1602 */
1369static int get_device_resources(struct device *dev, 1603static struct protection_domain *get_domain(struct device *dev)
1370 struct amd_iommu **iommu,
1371 struct protection_domain **domain,
1372 u16 *bdf)
1373{ 1604{
1605 struct protection_domain *domain;
1374 struct dma_ops_domain *dma_dom; 1606 struct dma_ops_domain *dma_dom;
1375 struct pci_dev *pcidev; 1607 u16 devid = get_device_id(dev);
1376 u16 _bdf;
1377
1378 *iommu = NULL;
1379 *domain = NULL;
1380 *bdf = 0xffff;
1381
1382 if (dev->bus != &pci_bus_type)
1383 return 0;
1384 1608
1385 pcidev = to_pci_dev(dev); 1609 if (!check_device(dev))
1386 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); 1610 return ERR_PTR(-EINVAL);
1387 1611
1388 /* device not translated by any IOMMU in the system? */ 1612 domain = domain_for_device(dev);
1389 if (_bdf > amd_iommu_last_bdf) 1613 if (domain != NULL && !dma_ops_domain(domain))
1390 return 0; 1614 return ERR_PTR(-EBUSY);
1391 1615
1392 *bdf = amd_iommu_alias_table[_bdf]; 1616 if (domain != NULL)
1617 return domain;
1393 1618
1394 *iommu = amd_iommu_rlookup_table[*bdf]; 1619 /* Device not bount yet - bind it */
1395 if (*iommu == NULL) 1620 dma_dom = find_protection_domain(devid);
1396 return 0; 1621 if (!dma_dom)
1397 *domain = domain_for_device(*bdf); 1622 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
1398 if (*domain == NULL) { 1623 attach_device(dev, &dma_dom->domain);
1399 dma_dom = find_protection_domain(*bdf); 1624 DUMP_printk("Using protection domain %d for device %s\n",
1400 if (!dma_dom) 1625 dma_dom->domain.id, dev_name(dev));
1401 dma_dom = (*iommu)->default_dom;
1402 *domain = &dma_dom->domain;
1403 attach_device(*iommu, *domain, *bdf);
1404 DUMP_printk("Using protection domain %d for device %s\n",
1405 (*domain)->id, dev_name(dev));
1406 }
1407
1408 if (domain_for_device(_bdf) == NULL)
1409 attach_device(*iommu, *domain, _bdf);
1410 1626
1411 return 1; 1627 return &dma_dom->domain;
1412} 1628}
1413 1629
1414static void update_device_table(struct protection_domain *domain) 1630static void update_device_table(struct protection_domain *domain)
1415{ 1631{
1416 unsigned long flags; 1632 struct iommu_dev_data *dev_data;
1417 int i;
1418 1633
1419 for (i = 0; i <= amd_iommu_last_bdf; ++i) { 1634 list_for_each_entry(dev_data, &domain->dev_list, list) {
1420 if (amd_iommu_pd_table[i] != domain) 1635 u16 devid = get_device_id(dev_data->dev);
1421 continue; 1636 set_dte_entry(devid, domain);
1422 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1423 set_dte_entry(i, domain);
1424 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1425 } 1637 }
1426} 1638}
1427 1639
@@ -1431,76 +1643,13 @@ static void update_domain(struct protection_domain *domain)
1431 return; 1643 return;
1432 1644
1433 update_device_table(domain); 1645 update_device_table(domain);
1434 flush_devices_by_domain(domain); 1646 iommu_flush_domain_devices(domain);
1435 iommu_flush_domain(domain->id); 1647 iommu_flush_tlb_pde(domain);
1436 1648
1437 domain->updated = false; 1649 domain->updated = false;
1438} 1650}
1439 1651
1440/* 1652/*
1441 * This function is used to add another level to an IO page table. Adding
1442 * another level increases the size of the address space by 9 bits to a size up
1443 * to 64 bits.
1444 */
1445static bool increase_address_space(struct protection_domain *domain,
1446 gfp_t gfp)
1447{
1448 u64 *pte;
1449
1450 if (domain->mode == PAGE_MODE_6_LEVEL)
1451 /* address space already 64 bit large */
1452 return false;
1453
1454 pte = (void *)get_zeroed_page(gfp);
1455 if (!pte)
1456 return false;
1457
1458 *pte = PM_LEVEL_PDE(domain->mode,
1459 virt_to_phys(domain->pt_root));
1460 domain->pt_root = pte;
1461 domain->mode += 1;
1462 domain->updated = true;
1463
1464 return true;
1465}
1466
1467static u64 *alloc_pte(struct protection_domain *domain,
1468 unsigned long address,
1469 int end_lvl,
1470 u64 **pte_page,
1471 gfp_t gfp)
1472{
1473 u64 *pte, *page;
1474 int level;
1475
1476 while (address > PM_LEVEL_SIZE(domain->mode))
1477 increase_address_space(domain, gfp);
1478
1479 level = domain->mode - 1;
1480 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
1481
1482 while (level > end_lvl) {
1483 if (!IOMMU_PTE_PRESENT(*pte)) {
1484 page = (u64 *)get_zeroed_page(gfp);
1485 if (!page)
1486 return NULL;
1487 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
1488 }
1489
1490 level -= 1;
1491
1492 pte = IOMMU_PTE_PAGE(*pte);
1493
1494 if (pte_page && level == end_lvl)
1495 *pte_page = pte;
1496
1497 pte = &pte[PM_LEVEL_INDEX(level, address)];
1498 }
1499
1500 return pte;
1501}
1502
1503/*
1504 * This function fetches the PTE for a given address in the aperture 1653 * This function fetches the PTE for a given address in the aperture
1505 */ 1654 */
1506static u64* dma_ops_get_pte(struct dma_ops_domain *dom, 1655static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
@@ -1530,8 +1679,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1530 * This is the generic map function. It maps one 4kb page at paddr to 1679 * This is the generic map function. It maps one 4kb page at paddr to
1531 * the given address in the DMA address space for the domain. 1680 * the given address in the DMA address space for the domain.
1532 */ 1681 */
1533static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, 1682static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
1534 struct dma_ops_domain *dom,
1535 unsigned long address, 1683 unsigned long address,
1536 phys_addr_t paddr, 1684 phys_addr_t paddr,
1537 int direction) 1685 int direction)
@@ -1544,7 +1692,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1544 1692
1545 pte = dma_ops_get_pte(dom, address); 1693 pte = dma_ops_get_pte(dom, address);
1546 if (!pte) 1694 if (!pte)
1547 return bad_dma_address; 1695 return DMA_ERROR_CODE;
1548 1696
1549 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; 1697 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1550 1698
@@ -1565,8 +1713,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1565/* 1713/*
1566 * The generic unmapping function for on page in the DMA address space. 1714 * The generic unmapping function for on page in the DMA address space.
1567 */ 1715 */
1568static void dma_ops_domain_unmap(struct amd_iommu *iommu, 1716static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
1569 struct dma_ops_domain *dom,
1570 unsigned long address) 1717 unsigned long address)
1571{ 1718{
1572 struct aperture_range *aperture; 1719 struct aperture_range *aperture;
@@ -1597,7 +1744,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
1597 * Must be called with the domain lock held. 1744 * Must be called with the domain lock held.
1598 */ 1745 */
1599static dma_addr_t __map_single(struct device *dev, 1746static dma_addr_t __map_single(struct device *dev,
1600 struct amd_iommu *iommu,
1601 struct dma_ops_domain *dma_dom, 1747 struct dma_ops_domain *dma_dom,
1602 phys_addr_t paddr, 1748 phys_addr_t paddr,
1603 size_t size, 1749 size_t size,
@@ -1625,7 +1771,7 @@ static dma_addr_t __map_single(struct device *dev,
1625retry: 1771retry:
1626 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, 1772 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
1627 dma_mask); 1773 dma_mask);
1628 if (unlikely(address == bad_dma_address)) { 1774 if (unlikely(address == DMA_ERROR_CODE)) {
1629 /* 1775 /*
1630 * setting next_address here will let the address 1776 * setting next_address here will let the address
1631 * allocator only scan the new allocated range in the 1777 * allocator only scan the new allocated range in the
@@ -1633,7 +1779,7 @@ retry:
1633 */ 1779 */
1634 dma_dom->next_address = dma_dom->aperture_size; 1780 dma_dom->next_address = dma_dom->aperture_size;
1635 1781
1636 if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) 1782 if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
1637 goto out; 1783 goto out;
1638 1784
1639 /* 1785 /*
@@ -1645,8 +1791,8 @@ retry:
1645 1791
1646 start = address; 1792 start = address;
1647 for (i = 0; i < pages; ++i) { 1793 for (i = 0; i < pages; ++i) {
1648 ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); 1794 ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
1649 if (ret == bad_dma_address) 1795 if (ret == DMA_ERROR_CODE)
1650 goto out_unmap; 1796 goto out_unmap;
1651 1797
1652 paddr += PAGE_SIZE; 1798 paddr += PAGE_SIZE;
@@ -1657,10 +1803,10 @@ retry:
1657 ADD_STATS_COUNTER(alloced_io_mem, size); 1803 ADD_STATS_COUNTER(alloced_io_mem, size);
1658 1804
1659 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1805 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
1660 iommu_flush_tlb(iommu, dma_dom->domain.id); 1806 iommu_flush_tlb(&dma_dom->domain);
1661 dma_dom->need_flush = false; 1807 dma_dom->need_flush = false;
1662 } else if (unlikely(iommu_has_npcache(iommu))) 1808 } else if (unlikely(amd_iommu_np_cache))
1663 iommu_flush_pages(iommu, dma_dom->domain.id, address, size); 1809 iommu_flush_pages(&dma_dom->domain, address, size);
1664 1810
1665out: 1811out:
1666 return address; 1812 return address;
@@ -1669,20 +1815,19 @@ out_unmap:
1669 1815
1670 for (--i; i >= 0; --i) { 1816 for (--i; i >= 0; --i) {
1671 start -= PAGE_SIZE; 1817 start -= PAGE_SIZE;
1672 dma_ops_domain_unmap(iommu, dma_dom, start); 1818 dma_ops_domain_unmap(dma_dom, start);
1673 } 1819 }
1674 1820
1675 dma_ops_free_addresses(dma_dom, address, pages); 1821 dma_ops_free_addresses(dma_dom, address, pages);
1676 1822
1677 return bad_dma_address; 1823 return DMA_ERROR_CODE;
1678} 1824}
1679 1825
1680/* 1826/*
1681 * Does the reverse of the __map_single function. Must be called with 1827 * Does the reverse of the __map_single function. Must be called with
1682 * the domain lock held too 1828 * the domain lock held too
1683 */ 1829 */
1684static void __unmap_single(struct amd_iommu *iommu, 1830static void __unmap_single(struct dma_ops_domain *dma_dom,
1685 struct dma_ops_domain *dma_dom,
1686 dma_addr_t dma_addr, 1831 dma_addr_t dma_addr,
1687 size_t size, 1832 size_t size,
1688 int dir) 1833 int dir)
@@ -1690,7 +1835,7 @@ static void __unmap_single(struct amd_iommu *iommu,
1690 dma_addr_t i, start; 1835 dma_addr_t i, start;
1691 unsigned int pages; 1836 unsigned int pages;
1692 1837
1693 if ((dma_addr == bad_dma_address) || 1838 if ((dma_addr == DMA_ERROR_CODE) ||
1694 (dma_addr + size > dma_dom->aperture_size)) 1839 (dma_addr + size > dma_dom->aperture_size))
1695 return; 1840 return;
1696 1841
@@ -1699,7 +1844,7 @@ static void __unmap_single(struct amd_iommu *iommu,
1699 start = dma_addr; 1844 start = dma_addr;
1700 1845
1701 for (i = 0; i < pages; ++i) { 1846 for (i = 0; i < pages; ++i) {
1702 dma_ops_domain_unmap(iommu, dma_dom, start); 1847 dma_ops_domain_unmap(dma_dom, start);
1703 start += PAGE_SIZE; 1848 start += PAGE_SIZE;
1704 } 1849 }
1705 1850
@@ -1708,7 +1853,7 @@ static void __unmap_single(struct amd_iommu *iommu,
1708 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1853 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1709 1854
1710 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1855 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
1711 iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); 1856 iommu_flush_pages(&dma_dom->domain, dma_addr, size);
1712 dma_dom->need_flush = false; 1857 dma_dom->need_flush = false;
1713 } 1858 }
1714} 1859}
@@ -1722,36 +1867,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
1722 struct dma_attrs *attrs) 1867 struct dma_attrs *attrs)
1723{ 1868{
1724 unsigned long flags; 1869 unsigned long flags;
1725 struct amd_iommu *iommu;
1726 struct protection_domain *domain; 1870 struct protection_domain *domain;
1727 u16 devid;
1728 dma_addr_t addr; 1871 dma_addr_t addr;
1729 u64 dma_mask; 1872 u64 dma_mask;
1730 phys_addr_t paddr = page_to_phys(page) + offset; 1873 phys_addr_t paddr = page_to_phys(page) + offset;
1731 1874
1732 INC_STATS_COUNTER(cnt_map_single); 1875 INC_STATS_COUNTER(cnt_map_single);
1733 1876
1734 if (!check_device(dev)) 1877 domain = get_domain(dev);
1735 return bad_dma_address; 1878 if (PTR_ERR(domain) == -EINVAL)
1736
1737 dma_mask = *dev->dma_mask;
1738
1739 get_device_resources(dev, &iommu, &domain, &devid);
1740
1741 if (iommu == NULL || domain == NULL)
1742 /* device not handled by any AMD IOMMU */
1743 return (dma_addr_t)paddr; 1879 return (dma_addr_t)paddr;
1880 else if (IS_ERR(domain))
1881 return DMA_ERROR_CODE;
1744 1882
1745 if (!dma_ops_domain(domain)) 1883 dma_mask = *dev->dma_mask;
1746 return bad_dma_address;
1747 1884
1748 spin_lock_irqsave(&domain->lock, flags); 1885 spin_lock_irqsave(&domain->lock, flags);
1749 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, 1886
1887 addr = __map_single(dev, domain->priv, paddr, size, dir, false,
1750 dma_mask); 1888 dma_mask);
1751 if (addr == bad_dma_address) 1889 if (addr == DMA_ERROR_CODE)
1752 goto out; 1890 goto out;
1753 1891
1754 iommu_completion_wait(iommu); 1892 iommu_flush_complete(domain);
1755 1893
1756out: 1894out:
1757 spin_unlock_irqrestore(&domain->lock, flags); 1895 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1766,25 +1904,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
1766 enum dma_data_direction dir, struct dma_attrs *attrs) 1904 enum dma_data_direction dir, struct dma_attrs *attrs)
1767{ 1905{
1768 unsigned long flags; 1906 unsigned long flags;
1769 struct amd_iommu *iommu;
1770 struct protection_domain *domain; 1907 struct protection_domain *domain;
1771 u16 devid;
1772 1908
1773 INC_STATS_COUNTER(cnt_unmap_single); 1909 INC_STATS_COUNTER(cnt_unmap_single);
1774 1910
1775 if (!check_device(dev) || 1911 domain = get_domain(dev);
1776 !get_device_resources(dev, &iommu, &domain, &devid)) 1912 if (IS_ERR(domain))
1777 /* device not handled by any AMD IOMMU */
1778 return;
1779
1780 if (!dma_ops_domain(domain))
1781 return; 1913 return;
1782 1914
1783 spin_lock_irqsave(&domain->lock, flags); 1915 spin_lock_irqsave(&domain->lock, flags);
1784 1916
1785 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1917 __unmap_single(domain->priv, dma_addr, size, dir);
1786 1918
1787 iommu_completion_wait(iommu); 1919 iommu_flush_complete(domain);
1788 1920
1789 spin_unlock_irqrestore(&domain->lock, flags); 1921 spin_unlock_irqrestore(&domain->lock, flags);
1790} 1922}
@@ -1816,9 +1948,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1816 struct dma_attrs *attrs) 1948 struct dma_attrs *attrs)
1817{ 1949{
1818 unsigned long flags; 1950 unsigned long flags;
1819 struct amd_iommu *iommu;
1820 struct protection_domain *domain; 1951 struct protection_domain *domain;
1821 u16 devid;
1822 int i; 1952 int i;
1823 struct scatterlist *s; 1953 struct scatterlist *s;
1824 phys_addr_t paddr; 1954 phys_addr_t paddr;
@@ -1827,25 +1957,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1827 1957
1828 INC_STATS_COUNTER(cnt_map_sg); 1958 INC_STATS_COUNTER(cnt_map_sg);
1829 1959
1830 if (!check_device(dev)) 1960 domain = get_domain(dev);
1961 if (PTR_ERR(domain) == -EINVAL)
1962 return map_sg_no_iommu(dev, sglist, nelems, dir);
1963 else if (IS_ERR(domain))
1831 return 0; 1964 return 0;
1832 1965
1833 dma_mask = *dev->dma_mask; 1966 dma_mask = *dev->dma_mask;
1834 1967
1835 get_device_resources(dev, &iommu, &domain, &devid);
1836
1837 if (!iommu || !domain)
1838 return map_sg_no_iommu(dev, sglist, nelems, dir);
1839
1840 if (!dma_ops_domain(domain))
1841 return 0;
1842
1843 spin_lock_irqsave(&domain->lock, flags); 1968 spin_lock_irqsave(&domain->lock, flags);
1844 1969
1845 for_each_sg(sglist, s, nelems, i) { 1970 for_each_sg(sglist, s, nelems, i) {
1846 paddr = sg_phys(s); 1971 paddr = sg_phys(s);
1847 1972
1848 s->dma_address = __map_single(dev, iommu, domain->priv, 1973 s->dma_address = __map_single(dev, domain->priv,
1849 paddr, s->length, dir, false, 1974 paddr, s->length, dir, false,
1850 dma_mask); 1975 dma_mask);
1851 1976
@@ -1856,7 +1981,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1856 goto unmap; 1981 goto unmap;
1857 } 1982 }
1858 1983
1859 iommu_completion_wait(iommu); 1984 iommu_flush_complete(domain);
1860 1985
1861out: 1986out:
1862 spin_unlock_irqrestore(&domain->lock, flags); 1987 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1865,7 +1990,7 @@ out:
1865unmap: 1990unmap:
1866 for_each_sg(sglist, s, mapped_elems, i) { 1991 for_each_sg(sglist, s, mapped_elems, i) {
1867 if (s->dma_address) 1992 if (s->dma_address)
1868 __unmap_single(iommu, domain->priv, s->dma_address, 1993 __unmap_single(domain->priv, s->dma_address,
1869 s->dma_length, dir); 1994 s->dma_length, dir);
1870 s->dma_address = s->dma_length = 0; 1995 s->dma_address = s->dma_length = 0;
1871 } 1996 }
@@ -1884,30 +2009,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1884 struct dma_attrs *attrs) 2009 struct dma_attrs *attrs)
1885{ 2010{
1886 unsigned long flags; 2011 unsigned long flags;
1887 struct amd_iommu *iommu;
1888 struct protection_domain *domain; 2012 struct protection_domain *domain;
1889 struct scatterlist *s; 2013 struct scatterlist *s;
1890 u16 devid;
1891 int i; 2014 int i;
1892 2015
1893 INC_STATS_COUNTER(cnt_unmap_sg); 2016 INC_STATS_COUNTER(cnt_unmap_sg);
1894 2017
1895 if (!check_device(dev) || 2018 domain = get_domain(dev);
1896 !get_device_resources(dev, &iommu, &domain, &devid)) 2019 if (IS_ERR(domain))
1897 return;
1898
1899 if (!dma_ops_domain(domain))
1900 return; 2020 return;
1901 2021
1902 spin_lock_irqsave(&domain->lock, flags); 2022 spin_lock_irqsave(&domain->lock, flags);
1903 2023
1904 for_each_sg(sglist, s, nelems, i) { 2024 for_each_sg(sglist, s, nelems, i) {
1905 __unmap_single(iommu, domain->priv, s->dma_address, 2025 __unmap_single(domain->priv, s->dma_address,
1906 s->dma_length, dir); 2026 s->dma_length, dir);
1907 s->dma_address = s->dma_length = 0; 2027 s->dma_address = s->dma_length = 0;
1908 } 2028 }
1909 2029
1910 iommu_completion_wait(iommu); 2030 iommu_flush_complete(domain);
1911 2031
1912 spin_unlock_irqrestore(&domain->lock, flags); 2032 spin_unlock_irqrestore(&domain->lock, flags);
1913} 2033}
@@ -1920,49 +2040,44 @@ static void *alloc_coherent(struct device *dev, size_t size,
1920{ 2040{
1921 unsigned long flags; 2041 unsigned long flags;
1922 void *virt_addr; 2042 void *virt_addr;
1923 struct amd_iommu *iommu;
1924 struct protection_domain *domain; 2043 struct protection_domain *domain;
1925 u16 devid;
1926 phys_addr_t paddr; 2044 phys_addr_t paddr;
1927 u64 dma_mask = dev->coherent_dma_mask; 2045 u64 dma_mask = dev->coherent_dma_mask;
1928 2046
1929 INC_STATS_COUNTER(cnt_alloc_coherent); 2047 INC_STATS_COUNTER(cnt_alloc_coherent);
1930 2048
1931 if (!check_device(dev)) 2049 domain = get_domain(dev);
2050 if (PTR_ERR(domain) == -EINVAL) {
2051 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2052 *dma_addr = __pa(virt_addr);
2053 return virt_addr;
2054 } else if (IS_ERR(domain))
1932 return NULL; 2055 return NULL;
1933 2056
1934 if (!get_device_resources(dev, &iommu, &domain, &devid)) 2057 dma_mask = dev->coherent_dma_mask;
1935 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 2058 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2059 flag |= __GFP_ZERO;
1936 2060
1937 flag |= __GFP_ZERO;
1938 virt_addr = (void *)__get_free_pages(flag, get_order(size)); 2061 virt_addr = (void *)__get_free_pages(flag, get_order(size));
1939 if (!virt_addr) 2062 if (!virt_addr)
1940 return NULL; 2063 return NULL;
1941 2064
1942 paddr = virt_to_phys(virt_addr); 2065 paddr = virt_to_phys(virt_addr);
1943 2066
1944 if (!iommu || !domain) {
1945 *dma_addr = (dma_addr_t)paddr;
1946 return virt_addr;
1947 }
1948
1949 if (!dma_ops_domain(domain))
1950 goto out_free;
1951
1952 if (!dma_mask) 2067 if (!dma_mask)
1953 dma_mask = *dev->dma_mask; 2068 dma_mask = *dev->dma_mask;
1954 2069
1955 spin_lock_irqsave(&domain->lock, flags); 2070 spin_lock_irqsave(&domain->lock, flags);
1956 2071
1957 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 2072 *dma_addr = __map_single(dev, domain->priv, paddr,
1958 size, DMA_BIDIRECTIONAL, true, dma_mask); 2073 size, DMA_BIDIRECTIONAL, true, dma_mask);
1959 2074
1960 if (*dma_addr == bad_dma_address) { 2075 if (*dma_addr == DMA_ERROR_CODE) {
1961 spin_unlock_irqrestore(&domain->lock, flags); 2076 spin_unlock_irqrestore(&domain->lock, flags);
1962 goto out_free; 2077 goto out_free;
1963 } 2078 }
1964 2079
1965 iommu_completion_wait(iommu); 2080 iommu_flush_complete(domain);
1966 2081
1967 spin_unlock_irqrestore(&domain->lock, flags); 2082 spin_unlock_irqrestore(&domain->lock, flags);
1968 2083
@@ -1982,28 +2097,19 @@ static void free_coherent(struct device *dev, size_t size,
1982 void *virt_addr, dma_addr_t dma_addr) 2097 void *virt_addr, dma_addr_t dma_addr)
1983{ 2098{
1984 unsigned long flags; 2099 unsigned long flags;
1985 struct amd_iommu *iommu;
1986 struct protection_domain *domain; 2100 struct protection_domain *domain;
1987 u16 devid;
1988 2101
1989 INC_STATS_COUNTER(cnt_free_coherent); 2102 INC_STATS_COUNTER(cnt_free_coherent);
1990 2103
1991 if (!check_device(dev)) 2104 domain = get_domain(dev);
1992 return; 2105 if (IS_ERR(domain))
1993
1994 get_device_resources(dev, &iommu, &domain, &devid);
1995
1996 if (!iommu || !domain)
1997 goto free_mem;
1998
1999 if (!dma_ops_domain(domain))
2000 goto free_mem; 2106 goto free_mem;
2001 2107
2002 spin_lock_irqsave(&domain->lock, flags); 2108 spin_lock_irqsave(&domain->lock, flags);
2003 2109
2004 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 2110 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2005 2111
2006 iommu_completion_wait(iommu); 2112 iommu_flush_complete(domain);
2007 2113
2008 spin_unlock_irqrestore(&domain->lock, flags); 2114 spin_unlock_irqrestore(&domain->lock, flags);
2009 2115
@@ -2017,22 +2123,7 @@ free_mem:
2017 */ 2123 */
2018static int amd_iommu_dma_supported(struct device *dev, u64 mask) 2124static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2019{ 2125{
2020 u16 bdf; 2126 return check_device(dev);
2021 struct pci_dev *pcidev;
2022
2023 /* No device or no PCI device */
2024 if (!dev || dev->bus != &pci_bus_type)
2025 return 0;
2026
2027 pcidev = to_pci_dev(dev);
2028
2029 bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
2030
2031 /* Out of our scope? */
2032 if (bdf > amd_iommu_last_bdf)
2033 return 0;
2034
2035 return 1;
2036} 2127}
2037 2128
2038/* 2129/*
@@ -2046,25 +2137,30 @@ static void prealloc_protection_domains(void)
2046{ 2137{
2047 struct pci_dev *dev = NULL; 2138 struct pci_dev *dev = NULL;
2048 struct dma_ops_domain *dma_dom; 2139 struct dma_ops_domain *dma_dom;
2049 struct amd_iommu *iommu;
2050 u16 devid; 2140 u16 devid;
2051 2141
2052 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 2142 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
2053 devid = calc_devid(dev->bus->number, dev->devfn); 2143
2054 if (devid > amd_iommu_last_bdf) 2144 /* Do we handle this device? */
2055 continue; 2145 if (!check_device(&dev->dev))
2056 devid = amd_iommu_alias_table[devid];
2057 if (domain_for_device(devid))
2058 continue; 2146 continue;
2059 iommu = amd_iommu_rlookup_table[devid]; 2147
2060 if (!iommu) 2148 iommu_init_device(&dev->dev);
2149
2150 /* Is there already any domain for it? */
2151 if (domain_for_device(&dev->dev))
2061 continue; 2152 continue;
2062 dma_dom = dma_ops_domain_alloc(iommu); 2153
2154 devid = get_device_id(&dev->dev);
2155
2156 dma_dom = dma_ops_domain_alloc();
2063 if (!dma_dom) 2157 if (!dma_dom)
2064 continue; 2158 continue;
2065 init_unity_mappings_for_device(dma_dom, devid); 2159 init_unity_mappings_for_device(dma_dom, devid);
2066 dma_dom->target_dev = devid; 2160 dma_dom->target_dev = devid;
2067 2161
2162 attach_device(&dev->dev, &dma_dom->domain);
2163
2068 list_add_tail(&dma_dom->list, &iommu_pd_list); 2164 list_add_tail(&dma_dom->list, &iommu_pd_list);
2069 } 2165 }
2070} 2166}
@@ -2093,7 +2189,7 @@ int __init amd_iommu_init_dma_ops(void)
2093 * protection domain will be assigned to the default one. 2189 * protection domain will be assigned to the default one.
2094 */ 2190 */
2095 for_each_iommu(iommu) { 2191 for_each_iommu(iommu) {
2096 iommu->default_dom = dma_ops_domain_alloc(iommu); 2192 iommu->default_dom = dma_ops_domain_alloc();
2097 if (iommu->default_dom == NULL) 2193 if (iommu->default_dom == NULL)
2098 return -ENOMEM; 2194 return -ENOMEM;
2099 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 2195 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -2103,15 +2199,12 @@ int __init amd_iommu_init_dma_ops(void)
2103 } 2199 }
2104 2200
2105 /* 2201 /*
2106 * If device isolation is enabled, pre-allocate the protection 2202 * Pre-allocate the protection domains for each device.
2107 * domains for each device.
2108 */ 2203 */
2109 if (amd_iommu_isolate) 2204 prealloc_protection_domains();
2110 prealloc_protection_domains();
2111 2205
2112 iommu_detected = 1; 2206 iommu_detected = 1;
2113 force_iommu = 1; 2207 swiotlb = 0;
2114 bad_dma_address = 0;
2115#ifdef CONFIG_GART_IOMMU 2208#ifdef CONFIG_GART_IOMMU
2116 gart_iommu_aperture_disabled = 1; 2209 gart_iommu_aperture_disabled = 1;
2117 gart_iommu_aperture = 0; 2210 gart_iommu_aperture = 0;
@@ -2150,14 +2243,17 @@ free_domains:
2150 2243
2151static void cleanup_domain(struct protection_domain *domain) 2244static void cleanup_domain(struct protection_domain *domain)
2152{ 2245{
2246 struct iommu_dev_data *dev_data, *next;
2153 unsigned long flags; 2247 unsigned long flags;
2154 u16 devid;
2155 2248
2156 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 2249 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2157 2250
2158 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) 2251 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2159 if (amd_iommu_pd_table[devid] == domain) 2252 struct device *dev = dev_data->dev;
2160 __detach_device(domain, devid); 2253
2254 do_detach(dev);
2255 atomic_set(&dev_data->bind, 0);
2256 }
2161 2257
2162 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 2258 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2163} 2259}
@@ -2167,6 +2263,8 @@ static void protection_domain_free(struct protection_domain *domain)
2167 if (!domain) 2263 if (!domain)
2168 return; 2264 return;
2169 2265
2266 del_domain_from_list(domain);
2267
2170 if (domain->id) 2268 if (domain->id)
2171 domain_id_free(domain->id); 2269 domain_id_free(domain->id);
2172 2270
@@ -2185,6 +2283,9 @@ static struct protection_domain *protection_domain_alloc(void)
2185 domain->id = domain_id_alloc(); 2283 domain->id = domain_id_alloc();
2186 if (!domain->id) 2284 if (!domain->id)
2187 goto out_err; 2285 goto out_err;
2286 INIT_LIST_HEAD(&domain->dev_list);
2287
2288 add_domain_to_list(domain);
2188 2289
2189 return domain; 2290 return domain;
2190 2291
@@ -2241,26 +2342,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2241static void amd_iommu_detach_device(struct iommu_domain *dom, 2342static void amd_iommu_detach_device(struct iommu_domain *dom,
2242 struct device *dev) 2343 struct device *dev)
2243{ 2344{
2244 struct protection_domain *domain = dom->priv; 2345 struct iommu_dev_data *dev_data = dev->archdata.iommu;
2245 struct amd_iommu *iommu; 2346 struct amd_iommu *iommu;
2246 struct pci_dev *pdev;
2247 u16 devid; 2347 u16 devid;
2248 2348
2249 if (dev->bus != &pci_bus_type) 2349 if (!check_device(dev))
2250 return; 2350 return;
2251 2351
2252 pdev = to_pci_dev(dev); 2352 devid = get_device_id(dev);
2253
2254 devid = calc_devid(pdev->bus->number, pdev->devfn);
2255 2353
2256 if (devid > 0) 2354 if (dev_data->domain != NULL)
2257 detach_device(domain, devid); 2355 detach_device(dev);
2258 2356
2259 iommu = amd_iommu_rlookup_table[devid]; 2357 iommu = amd_iommu_rlookup_table[devid];
2260 if (!iommu) 2358 if (!iommu)
2261 return; 2359 return;
2262 2360
2263 iommu_queue_inv_dev_entry(iommu, devid); 2361 iommu_flush_device(dev);
2264 iommu_completion_wait(iommu); 2362 iommu_completion_wait(iommu);
2265} 2363}
2266 2364
@@ -2268,35 +2366,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
2268 struct device *dev) 2366 struct device *dev)
2269{ 2367{
2270 struct protection_domain *domain = dom->priv; 2368 struct protection_domain *domain = dom->priv;
2271 struct protection_domain *old_domain; 2369 struct iommu_dev_data *dev_data;
2272 struct amd_iommu *iommu; 2370 struct amd_iommu *iommu;
2273 struct pci_dev *pdev; 2371 int ret;
2274 u16 devid; 2372 u16 devid;
2275 2373
2276 if (dev->bus != &pci_bus_type) 2374 if (!check_device(dev))
2277 return -EINVAL; 2375 return -EINVAL;
2278 2376
2279 pdev = to_pci_dev(dev); 2377 dev_data = dev->archdata.iommu;
2280 2378
2281 devid = calc_devid(pdev->bus->number, pdev->devfn); 2379 devid = get_device_id(dev);
2282
2283 if (devid >= amd_iommu_last_bdf ||
2284 devid != amd_iommu_alias_table[devid])
2285 return -EINVAL;
2286 2380
2287 iommu = amd_iommu_rlookup_table[devid]; 2381 iommu = amd_iommu_rlookup_table[devid];
2288 if (!iommu) 2382 if (!iommu)
2289 return -EINVAL; 2383 return -EINVAL;
2290 2384
2291 old_domain = domain_for_device(devid); 2385 if (dev_data->domain)
2292 if (old_domain) 2386 detach_device(dev);
2293 detach_device(old_domain, devid);
2294 2387
2295 attach_device(iommu, domain, devid); 2388 ret = attach_device(dev, domain);
2296 2389
2297 iommu_completion_wait(iommu); 2390 iommu_completion_wait(iommu);
2298 2391
2299 return 0; 2392 return ret;
2300} 2393}
2301 2394
2302static int amd_iommu_map_range(struct iommu_domain *dom, 2395static int amd_iommu_map_range(struct iommu_domain *dom,
@@ -2342,7 +2435,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
2342 iova += PAGE_SIZE; 2435 iova += PAGE_SIZE;
2343 } 2436 }
2344 2437
2345 iommu_flush_domain(domain->id); 2438 iommu_flush_tlb_pde(domain);
2346} 2439}
2347 2440
2348static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 2441static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2393,8 +2486,9 @@ static struct iommu_ops amd_iommu_ops = {
2393 2486
2394int __init amd_iommu_init_passthrough(void) 2487int __init amd_iommu_init_passthrough(void)
2395{ 2488{
2489 struct amd_iommu *iommu;
2396 struct pci_dev *dev = NULL; 2490 struct pci_dev *dev = NULL;
2397 u16 devid, devid2; 2491 u16 devid;
2398 2492
2399 /* allocate passthroug domain */ 2493 /* allocate passthroug domain */
2400 pt_domain = protection_domain_alloc(); 2494 pt_domain = protection_domain_alloc();
@@ -2404,20 +2498,17 @@ int __init amd_iommu_init_passthrough(void)
2404 pt_domain->mode |= PAGE_MODE_NONE; 2498 pt_domain->mode |= PAGE_MODE_NONE;
2405 2499
2406 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 2500 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
2407 struct amd_iommu *iommu;
2408 2501
2409 devid = calc_devid(dev->bus->number, dev->devfn); 2502 if (!check_device(&dev->dev))
2410 if (devid > amd_iommu_last_bdf)
2411 continue; 2503 continue;
2412 2504
2413 devid2 = amd_iommu_alias_table[devid]; 2505 devid = get_device_id(&dev->dev);
2414 2506
2415 iommu = amd_iommu_rlookup_table[devid2]; 2507 iommu = amd_iommu_rlookup_table[devid];
2416 if (!iommu) 2508 if (!iommu)
2417 continue; 2509 continue;
2418 2510
2419 __attach_device(iommu, pt_domain, devid); 2511 attach_device(&dev->dev, pt_domain);
2420 __attach_device(iommu, pt_domain, devid2);
2421 } 2512 }
2422 2513
2423 pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); 2514 pr_info("AMD-Vi: Initialized for Passthrough Mode\n");