aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/amd_iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/amd_iommu.c')
-rw-r--r--arch/x86/kernel/amd_iommu.c1295
1 files changed, 713 insertions, 582 deletions
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 98f230f6a28d..23824fef789c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -19,7 +19,7 @@
19 19
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/bitops.h> 22#include <linux/bitmap.h>
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
25#include <linux/dma-mapping.h> 25#include <linux/dma-mapping.h>
@@ -28,6 +28,7 @@
28#include <asm/proto.h> 28#include <asm/proto.h>
29#include <asm/iommu.h> 29#include <asm/iommu.h>
30#include <asm/gart.h> 30#include <asm/gart.h>
31#include <asm/amd_iommu_proto.h>
31#include <asm/amd_iommu_types.h> 32#include <asm/amd_iommu_types.h>
32#include <asm/amd_iommu.h> 33#include <asm/amd_iommu.h>
33 34
@@ -56,20 +57,152 @@ struct iommu_cmd {
56 u32 data[4]; 57 u32 data[4];
57}; 58};
58 59
59static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
60 struct unity_map_entry *e);
61static struct dma_ops_domain *find_protection_domain(u16 devid);
62static u64 *alloc_pte(struct protection_domain *domain,
63 unsigned long address, int end_lvl,
64 u64 **pte_page, gfp_t gfp);
65static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
66 unsigned long start_page,
67 unsigned int pages);
68static void reset_iommu_command_buffer(struct amd_iommu *iommu); 60static void reset_iommu_command_buffer(struct amd_iommu *iommu);
69static u64 *fetch_pte(struct protection_domain *domain,
70 unsigned long address, int map_size);
71static void update_domain(struct protection_domain *domain); 61static void update_domain(struct protection_domain *domain);
72 62
63/****************************************************************************
64 *
65 * Helper functions
66 *
67 ****************************************************************************/
68
69static inline u16 get_device_id(struct device *dev)
70{
71 struct pci_dev *pdev = to_pci_dev(dev);
72
73 return calc_devid(pdev->bus->number, pdev->devfn);
74}
75
76static struct iommu_dev_data *get_dev_data(struct device *dev)
77{
78 return dev->archdata.iommu;
79}
80
81/*
82 * In this function the list of preallocated protection domains is traversed to
83 * find the domain for a specific device
84 */
85static struct dma_ops_domain *find_protection_domain(u16 devid)
86{
87 struct dma_ops_domain *entry, *ret = NULL;
88 unsigned long flags;
89 u16 alias = amd_iommu_alias_table[devid];
90
91 if (list_empty(&iommu_pd_list))
92 return NULL;
93
94 spin_lock_irqsave(&iommu_pd_list_lock, flags);
95
96 list_for_each_entry(entry, &iommu_pd_list, list) {
97 if (entry->target_dev == devid ||
98 entry->target_dev == alias) {
99 ret = entry;
100 break;
101 }
102 }
103
104 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
105
106 return ret;
107}
108
109/*
110 * This function checks if the driver got a valid device from the caller to
111 * avoid dereferencing invalid pointers.
112 */
113static bool check_device(struct device *dev)
114{
115 u16 devid;
116
117 if (!dev || !dev->dma_mask)
118 return false;
119
120 /* No device or no PCI device */
121 if (!dev || dev->bus != &pci_bus_type)
122 return false;
123
124 devid = get_device_id(dev);
125
126 /* Out of our scope? */
127 if (devid > amd_iommu_last_bdf)
128 return false;
129
130 if (amd_iommu_rlookup_table[devid] == NULL)
131 return false;
132
133 return true;
134}
135
136static int iommu_init_device(struct device *dev)
137{
138 struct iommu_dev_data *dev_data;
139 struct pci_dev *pdev;
140 u16 devid, alias;
141
142 if (dev->archdata.iommu)
143 return 0;
144
145 dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
146 if (!dev_data)
147 return -ENOMEM;
148
149 dev_data->dev = dev;
150
151 devid = get_device_id(dev);
152 alias = amd_iommu_alias_table[devid];
153 pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff);
154 if (pdev)
155 dev_data->alias = &pdev->dev;
156
157 atomic_set(&dev_data->bind, 0);
158
159 dev->archdata.iommu = dev_data;
160
161
162 return 0;
163}
164
165static void iommu_uninit_device(struct device *dev)
166{
167 kfree(dev->archdata.iommu);
168}
169
170void __init amd_iommu_uninit_devices(void)
171{
172 struct pci_dev *pdev = NULL;
173
174 for_each_pci_dev(pdev) {
175
176 if (!check_device(&pdev->dev))
177 continue;
178
179 iommu_uninit_device(&pdev->dev);
180 }
181}
182
183int __init amd_iommu_init_devices(void)
184{
185 struct pci_dev *pdev = NULL;
186 int ret = 0;
187
188 for_each_pci_dev(pdev) {
189
190 if (!check_device(&pdev->dev))
191 continue;
192
193 ret = iommu_init_device(&pdev->dev);
194 if (ret)
195 goto out_free;
196 }
197
198 return 0;
199
200out_free:
201
202 amd_iommu_uninit_devices();
203
204 return ret;
205}
73#ifdef CONFIG_AMD_IOMMU_STATS 206#ifdef CONFIG_AMD_IOMMU_STATS
74 207
75/* 208/*
@@ -90,7 +223,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem);
90DECLARE_STATS_COUNTER(total_map_requests); 223DECLARE_STATS_COUNTER(total_map_requests);
91 224
92static struct dentry *stats_dir; 225static struct dentry *stats_dir;
93static struct dentry *de_isolate;
94static struct dentry *de_fflush; 226static struct dentry *de_fflush;
95 227
96static void amd_iommu_stats_add(struct __iommu_counter *cnt) 228static void amd_iommu_stats_add(struct __iommu_counter *cnt)
@@ -108,9 +240,6 @@ static void amd_iommu_stats_init(void)
108 if (stats_dir == NULL) 240 if (stats_dir == NULL)
109 return; 241 return;
110 242
111 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
112 (u32 *)&amd_iommu_isolate);
113
114 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, 243 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
115 (u32 *)&amd_iommu_unmap_flush); 244 (u32 *)&amd_iommu_unmap_flush);
116 245
@@ -130,12 +259,6 @@ static void amd_iommu_stats_init(void)
130 259
131#endif 260#endif
132 261
133/* returns !0 if the IOMMU is caching non-present entries in its TLB */
134static int iommu_has_npcache(struct amd_iommu *iommu)
135{
136 return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
137}
138
139/**************************************************************************** 262/****************************************************************************
140 * 263 *
141 * Interrupt handling functions 264 * Interrupt handling functions
@@ -199,6 +322,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
199 break; 322 break;
200 case EVENT_TYPE_ILL_CMD: 323 case EVENT_TYPE_ILL_CMD:
201 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); 324 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
325 iommu->reset_in_progress = true;
202 reset_iommu_command_buffer(iommu); 326 reset_iommu_command_buffer(iommu);
203 dump_command(address); 327 dump_command(address);
204 break; 328 break;
@@ -321,11 +445,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
321 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; 445 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
322 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); 446 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
323 447
324 if (unlikely(i == EXIT_LOOP_COUNT)) { 448 if (unlikely(i == EXIT_LOOP_COUNT))
325 spin_unlock(&iommu->lock); 449 iommu->reset_in_progress = true;
326 reset_iommu_command_buffer(iommu);
327 spin_lock(&iommu->lock);
328 }
329} 450}
330 451
331/* 452/*
@@ -372,26 +493,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
372out: 493out:
373 spin_unlock_irqrestore(&iommu->lock, flags); 494 spin_unlock_irqrestore(&iommu->lock, flags);
374 495
496 if (iommu->reset_in_progress)
497 reset_iommu_command_buffer(iommu);
498
375 return 0; 499 return 0;
376} 500}
377 501
502static void iommu_flush_complete(struct protection_domain *domain)
503{
504 int i;
505
506 for (i = 0; i < amd_iommus_present; ++i) {
507 if (!domain->dev_iommu[i])
508 continue;
509
510 /*
511 * Devices of this domain are behind this IOMMU
512 * We need to wait for completion of all commands.
513 */
514 iommu_completion_wait(amd_iommus[i]);
515 }
516}
517
378/* 518/*
379 * Command send function for invalidating a device table entry 519 * Command send function for invalidating a device table entry
380 */ 520 */
381static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) 521static int iommu_flush_device(struct device *dev)
382{ 522{
523 struct amd_iommu *iommu;
383 struct iommu_cmd cmd; 524 struct iommu_cmd cmd;
384 int ret; 525 u16 devid;
385 526
386 BUG_ON(iommu == NULL); 527 devid = get_device_id(dev);
528 iommu = amd_iommu_rlookup_table[devid];
387 529
530 /* Build command */
388 memset(&cmd, 0, sizeof(cmd)); 531 memset(&cmd, 0, sizeof(cmd));
389 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); 532 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
390 cmd.data[0] = devid; 533 cmd.data[0] = devid;
391 534
392 ret = iommu_queue_command(iommu, &cmd); 535 return iommu_queue_command(iommu, &cmd);
393
394 return ret;
395} 536}
396 537
397static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, 538static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
@@ -430,11 +571,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
430 * It invalidates a single PTE if the range to flush is within a single 571 * It invalidates a single PTE if the range to flush is within a single
431 * page. Otherwise it flushes the whole TLB of the IOMMU. 572 * page. Otherwise it flushes the whole TLB of the IOMMU.
432 */ 573 */
433static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, 574static void __iommu_flush_pages(struct protection_domain *domain,
434 u64 address, size_t size) 575 u64 address, size_t size, int pde)
435{ 576{
436 int s = 0; 577 int s = 0, i;
437 unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); 578 unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);
438 579
439 address &= PAGE_MASK; 580 address &= PAGE_MASK;
440 581
@@ -447,142 +588,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
447 s = 1; 588 s = 1;
448 } 589 }
449 590
450 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
451 591
452 return 0; 592 for (i = 0; i < amd_iommus_present; ++i) {
593 if (!domain->dev_iommu[i])
594 continue;
595
596 /*
597 * Devices of this domain are behind this IOMMU
598 * We need a TLB flush
599 */
600 iommu_queue_inv_iommu_pages(amd_iommus[i], address,
601 domain->id, pde, s);
602 }
603
604 return;
453} 605}
454 606
455/* Flush the whole IO/TLB for a given protection domain */ 607static void iommu_flush_pages(struct protection_domain *domain,
456static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) 608 u64 address, size_t size)
457{ 609{
458 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 610 __iommu_flush_pages(domain, address, size, 0);
459 611}
460 INC_STATS_COUNTER(domain_flush_single);
461 612
462 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 613/* Flush the whole IO/TLB for a given protection domain */
614static void iommu_flush_tlb(struct protection_domain *domain)
615{
616 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
463} 617}
464 618
465/* Flush the whole IO/TLB for a given protection domain - including PDE */ 619/* Flush the whole IO/TLB for a given protection domain - including PDE */
466static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) 620static void iommu_flush_tlb_pde(struct protection_domain *domain)
467{ 621{
468 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 622 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
469
470 INC_STATS_COUNTER(domain_flush_single);
471
472 iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
473} 623}
474 624
625
475/* 626/*
476 * This function flushes one domain on one IOMMU 627 * This function flushes the DTEs for all devices in domain
477 */ 628 */
478static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) 629static void iommu_flush_domain_devices(struct protection_domain *domain)
479{ 630{
480 struct iommu_cmd cmd; 631 struct iommu_dev_data *dev_data;
481 unsigned long flags; 632 unsigned long flags;
482 633
483 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 634 spin_lock_irqsave(&domain->lock, flags);
484 domid, 1, 1);
485 635
486 spin_lock_irqsave(&iommu->lock, flags); 636 list_for_each_entry(dev_data, &domain->dev_list, list)
487 __iommu_queue_command(iommu, &cmd); 637 iommu_flush_device(dev_data->dev);
488 __iommu_completion_wait(iommu); 638
489 __iommu_wait_for_completion(iommu); 639 spin_unlock_irqrestore(&domain->lock, flags);
490 spin_unlock_irqrestore(&iommu->lock, flags);
491} 640}
492 641
493static void flush_all_domains_on_iommu(struct amd_iommu *iommu) 642static void iommu_flush_all_domain_devices(void)
494{ 643{
495 int i; 644 struct protection_domain *domain;
645 unsigned long flags;
496 646
497 for (i = 1; i < MAX_DOMAIN_ID; ++i) { 647 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
498 if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) 648
499 continue; 649 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
500 flush_domain_on_iommu(iommu, i); 650 iommu_flush_domain_devices(domain);
651 iommu_flush_complete(domain);
501 } 652 }
502 653
654 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
655}
656
657void amd_iommu_flush_all_devices(void)
658{
659 iommu_flush_all_domain_devices();
503} 660}
504 661
505/* 662/*
506 * This function is used to flush the IO/TLB for a given protection domain 663 * This function uses heavy locking and may disable irqs for some time. But
507 * on every IOMMU in the system 664 * this is no issue because it is only called during resume.
508 */ 665 */
509static void iommu_flush_domain(u16 domid) 666void amd_iommu_flush_all_domains(void)
510{ 667{
511 struct amd_iommu *iommu; 668 struct protection_domain *domain;
669 unsigned long flags;
512 670
513 INC_STATS_COUNTER(domain_flush_all); 671 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
514 672
515 for_each_iommu(iommu) 673 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
516 flush_domain_on_iommu(iommu, domid); 674 spin_lock(&domain->lock);
675 iommu_flush_tlb_pde(domain);
676 iommu_flush_complete(domain);
677 spin_unlock(&domain->lock);
678 }
679
680 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
517} 681}
518 682
519void amd_iommu_flush_all_domains(void) 683static void reset_iommu_command_buffer(struct amd_iommu *iommu)
520{ 684{
521 struct amd_iommu *iommu; 685 pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
522 686
523 for_each_iommu(iommu) 687 if (iommu->reset_in_progress)
524 flush_all_domains_on_iommu(iommu); 688 panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
689
690 amd_iommu_reset_cmd_buffer(iommu);
691 amd_iommu_flush_all_devices();
692 amd_iommu_flush_all_domains();
693
694 iommu->reset_in_progress = false;
525} 695}
526 696
527static void flush_all_devices_for_iommu(struct amd_iommu *iommu) 697/****************************************************************************
698 *
699 * The functions below are used the create the page table mappings for
700 * unity mapped regions.
701 *
702 ****************************************************************************/
703
704/*
705 * This function is used to add another level to an IO page table. Adding
706 * another level increases the size of the address space by 9 bits to a size up
707 * to 64 bits.
708 */
709static bool increase_address_space(struct protection_domain *domain,
710 gfp_t gfp)
528{ 711{
529 int i; 712 u64 *pte;
530 713
531 for (i = 0; i <= amd_iommu_last_bdf; ++i) { 714 if (domain->mode == PAGE_MODE_6_LEVEL)
532 if (iommu != amd_iommu_rlookup_table[i]) 715 /* address space already 64 bit large */
533 continue; 716 return false;
534 717
535 iommu_queue_inv_dev_entry(iommu, i); 718 pte = (void *)get_zeroed_page(gfp);
536 iommu_completion_wait(iommu); 719 if (!pte)
537 } 720 return false;
721
722 *pte = PM_LEVEL_PDE(domain->mode,
723 virt_to_phys(domain->pt_root));
724 domain->pt_root = pte;
725 domain->mode += 1;
726 domain->updated = true;
727
728 return true;
538} 729}
539 730
540static void flush_devices_by_domain(struct protection_domain *domain) 731static u64 *alloc_pte(struct protection_domain *domain,
732 unsigned long address,
733 int end_lvl,
734 u64 **pte_page,
735 gfp_t gfp)
541{ 736{
542 struct amd_iommu *iommu; 737 u64 *pte, *page;
543 int i; 738 int level;
544 739
545 for (i = 0; i <= amd_iommu_last_bdf; ++i) { 740 while (address > PM_LEVEL_SIZE(domain->mode))
546 if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || 741 increase_address_space(domain, gfp);
547 (amd_iommu_pd_table[i] != domain))
548 continue;
549 742
550 iommu = amd_iommu_rlookup_table[i]; 743 level = domain->mode - 1;
551 if (!iommu) 744 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
552 continue; 745
746 while (level > end_lvl) {
747 if (!IOMMU_PTE_PRESENT(*pte)) {
748 page = (u64 *)get_zeroed_page(gfp);
749 if (!page)
750 return NULL;
751 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
752 }
753
754 level -= 1;
755
756 pte = IOMMU_PTE_PAGE(*pte);
757
758 if (pte_page && level == end_lvl)
759 *pte_page = pte;
553 760
554 iommu_queue_inv_dev_entry(iommu, i); 761 pte = &pte[PM_LEVEL_INDEX(level, address)];
555 iommu_completion_wait(iommu);
556 } 762 }
763
764 return pte;
557} 765}
558 766
559static void reset_iommu_command_buffer(struct amd_iommu *iommu) 767/*
768 * This function checks if there is a PTE for a given dma address. If
769 * there is one, it returns the pointer to it.
770 */
771static u64 *fetch_pte(struct protection_domain *domain,
772 unsigned long address, int map_size)
560{ 773{
561 pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); 774 int level;
775 u64 *pte;
562 776
563 if (iommu->reset_in_progress) 777 level = domain->mode - 1;
564 panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); 778 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
565 779
566 iommu->reset_in_progress = true; 780 while (level > map_size) {
781 if (!IOMMU_PTE_PRESENT(*pte))
782 return NULL;
567 783
568 amd_iommu_reset_cmd_buffer(iommu); 784 level -= 1;
569 flush_all_devices_for_iommu(iommu);
570 flush_all_domains_on_iommu(iommu);
571 785
572 iommu->reset_in_progress = false; 786 pte = IOMMU_PTE_PAGE(*pte);
573} 787 pte = &pte[PM_LEVEL_INDEX(level, address)];
574 788
575void amd_iommu_flush_all_devices(void) 789 if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
576{ 790 pte = NULL;
577 flush_devices_by_domain(NULL); 791 break;
578} 792 }
793 }
579 794
580/**************************************************************************** 795 return pte;
581 * 796}
582 * The functions below are used the create the page table mappings for
583 * unity mapped regions.
584 *
585 ****************************************************************************/
586 797
587/* 798/*
588 * Generic mapping functions. It maps a physical address into a DMA 799 * Generic mapping functions. It maps a physical address into a DMA
@@ -654,28 +865,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,
654} 865}
655 866
656/* 867/*
657 * Init the unity mappings for a specific IOMMU in the system
658 *
659 * Basically iterates over all unity mapping entries and applies them to
660 * the default domain DMA of that IOMMU if necessary.
661 */
662static int iommu_init_unity_mappings(struct amd_iommu *iommu)
663{
664 struct unity_map_entry *entry;
665 int ret;
666
667 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
668 if (!iommu_for_unity_map(iommu, entry))
669 continue;
670 ret = dma_ops_unity_map(iommu->default_dom, entry);
671 if (ret)
672 return ret;
673 }
674
675 return 0;
676}
677
678/*
679 * This function actually applies the mapping to the page table of the 868 * This function actually applies the mapping to the page table of the
680 * dma_ops domain. 869 * dma_ops domain.
681 */ 870 */
@@ -704,6 +893,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
704} 893}
705 894
706/* 895/*
896 * Init the unity mappings for a specific IOMMU in the system
897 *
898 * Basically iterates over all unity mapping entries and applies them to
899 * the default domain DMA of that IOMMU if necessary.
900 */
901static int iommu_init_unity_mappings(struct amd_iommu *iommu)
902{
903 struct unity_map_entry *entry;
904 int ret;
905
906 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
907 if (!iommu_for_unity_map(iommu, entry))
908 continue;
909 ret = dma_ops_unity_map(iommu->default_dom, entry);
910 if (ret)
911 return ret;
912 }
913
914 return 0;
915}
916
917/*
707 * Inits the unity mappings required for a specific device 918 * Inits the unity mappings required for a specific device
708 */ 919 */
709static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, 920static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
@@ -740,34 +951,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
740 */ 951 */
741 952
742/* 953/*
743 * This function checks if there is a PTE for a given dma address. If 954 * Used to reserve address ranges in the aperture (e.g. for exclusion
744 * there is one, it returns the pointer to it. 955 * ranges.
745 */ 956 */
746static u64 *fetch_pte(struct protection_domain *domain, 957static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
747 unsigned long address, int map_size) 958 unsigned long start_page,
959 unsigned int pages)
748{ 960{
749 int level; 961 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
750 u64 *pte;
751
752 level = domain->mode - 1;
753 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
754
755 while (level > map_size) {
756 if (!IOMMU_PTE_PRESENT(*pte))
757 return NULL;
758
759 level -= 1;
760 962
761 pte = IOMMU_PTE_PAGE(*pte); 963 if (start_page + pages > last_page)
762 pte = &pte[PM_LEVEL_INDEX(level, address)]; 964 pages = last_page - start_page;
763 965
764 if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { 966 for (i = start_page; i < start_page + pages; ++i) {
765 pte = NULL; 967 int index = i / APERTURE_RANGE_PAGES;
766 break; 968 int page = i % APERTURE_RANGE_PAGES;
767 } 969 __set_bit(page, dom->aperture[index]->bitmap);
768 } 970 }
769
770 return pte;
771} 971}
772 972
773/* 973/*
@@ -775,11 +975,11 @@ static u64 *fetch_pte(struct protection_domain *domain,
775 * aperture in case of dma_ops domain allocation or address allocation 975 * aperture in case of dma_ops domain allocation or address allocation
776 * failure. 976 * failure.
777 */ 977 */
778static int alloc_new_range(struct amd_iommu *iommu, 978static int alloc_new_range(struct dma_ops_domain *dma_dom,
779 struct dma_ops_domain *dma_dom,
780 bool populate, gfp_t gfp) 979 bool populate, gfp_t gfp)
781{ 980{
782 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; 981 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
982 struct amd_iommu *iommu;
783 int i; 983 int i;
784 984
785#ifdef CONFIG_IOMMU_STRESS 985#ifdef CONFIG_IOMMU_STRESS
@@ -819,14 +1019,17 @@ static int alloc_new_range(struct amd_iommu *iommu,
819 dma_dom->aperture_size += APERTURE_RANGE_SIZE; 1019 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
820 1020
821 /* Intialize the exclusion range if necessary */ 1021 /* Intialize the exclusion range if necessary */
822 if (iommu->exclusion_start && 1022 for_each_iommu(iommu) {
823 iommu->exclusion_start >= dma_dom->aperture[index]->offset && 1023 if (iommu->exclusion_start &&
824 iommu->exclusion_start < dma_dom->aperture_size) { 1024 iommu->exclusion_start >= dma_dom->aperture[index]->offset
825 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; 1025 && iommu->exclusion_start < dma_dom->aperture_size) {
826 int pages = iommu_num_pages(iommu->exclusion_start, 1026 unsigned long startpage;
827 iommu->exclusion_length, 1027 int pages = iommu_num_pages(iommu->exclusion_start,
828 PAGE_SIZE); 1028 iommu->exclusion_length,
829 dma_ops_reserve_addresses(dma_dom, startpage, pages); 1029 PAGE_SIZE);
1030 startpage = iommu->exclusion_start >> PAGE_SHIFT;
1031 dma_ops_reserve_addresses(dma_dom, startpage, pages);
1032 }
830 } 1033 }
831 1034
832 /* 1035 /*
@@ -928,7 +1131,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
928 } 1131 }
929 1132
930 if (unlikely(address == -1)) 1133 if (unlikely(address == -1))
931 address = bad_dma_address; 1134 address = DMA_ERROR_CODE;
932 1135
933 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); 1136 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
934 1137
@@ -959,7 +1162,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
959 1162
960 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; 1163 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
961 1164
962 iommu_area_free(range->bitmap, address, pages); 1165 bitmap_clear(range->bitmap, address, pages);
963 1166
964} 1167}
965 1168
@@ -973,6 +1176,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
973 * 1176 *
974 ****************************************************************************/ 1177 ****************************************************************************/
975 1178
1179/*
1180 * This function adds a protection domain to the global protection domain list
1181 */
1182static void add_domain_to_list(struct protection_domain *domain)
1183{
1184 unsigned long flags;
1185
1186 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1187 list_add(&domain->list, &amd_iommu_pd_list);
1188 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1189}
1190
1191/*
1192 * This function removes a protection domain to the global
1193 * protection domain list
1194 */
1195static void del_domain_from_list(struct protection_domain *domain)
1196{
1197 unsigned long flags;
1198
1199 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1200 list_del(&domain->list);
1201 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1202}
1203
976static u16 domain_id_alloc(void) 1204static u16 domain_id_alloc(void)
977{ 1205{
978 unsigned long flags; 1206 unsigned long flags;
@@ -1000,26 +1228,6 @@ static void domain_id_free(int id)
1000 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1228 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1001} 1229}
1002 1230
1003/*
1004 * Used to reserve address ranges in the aperture (e.g. for exclusion
1005 * ranges.
1006 */
1007static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1008 unsigned long start_page,
1009 unsigned int pages)
1010{
1011 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1012
1013 if (start_page + pages > last_page)
1014 pages = last_page - start_page;
1015
1016 for (i = start_page; i < start_page + pages; ++i) {
1017 int index = i / APERTURE_RANGE_PAGES;
1018 int page = i % APERTURE_RANGE_PAGES;
1019 __set_bit(page, dom->aperture[index]->bitmap);
1020 }
1021}
1022
1023static void free_pagetable(struct protection_domain *domain) 1231static void free_pagetable(struct protection_domain *domain)
1024{ 1232{
1025 int i, j; 1233 int i, j;
@@ -1061,6 +1269,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
1061 if (!dom) 1269 if (!dom)
1062 return; 1270 return;
1063 1271
1272 del_domain_from_list(&dom->domain);
1273
1064 free_pagetable(&dom->domain); 1274 free_pagetable(&dom->domain);
1065 1275
1066 for (i = 0; i < APERTURE_MAX_RANGES; ++i) { 1276 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
@@ -1078,7 +1288,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
1078 * It also intializes the page table and the address allocator data 1288 * It also intializes the page table and the address allocator data
1079 * structures required for the dma_ops interface 1289 * structures required for the dma_ops interface
1080 */ 1290 */
1081static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) 1291static struct dma_ops_domain *dma_ops_domain_alloc(void)
1082{ 1292{
1083 struct dma_ops_domain *dma_dom; 1293 struct dma_ops_domain *dma_dom;
1084 1294
@@ -1091,6 +1301,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
1091 dma_dom->domain.id = domain_id_alloc(); 1301 dma_dom->domain.id = domain_id_alloc();
1092 if (dma_dom->domain.id == 0) 1302 if (dma_dom->domain.id == 0)
1093 goto free_dma_dom; 1303 goto free_dma_dom;
1304 INIT_LIST_HEAD(&dma_dom->domain.dev_list);
1094 dma_dom->domain.mode = PAGE_MODE_2_LEVEL; 1305 dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1095 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 1306 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1096 dma_dom->domain.flags = PD_DMA_OPS_MASK; 1307 dma_dom->domain.flags = PD_DMA_OPS_MASK;
@@ -1101,7 +1312,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
1101 dma_dom->need_flush = false; 1312 dma_dom->need_flush = false;
1102 dma_dom->target_dev = 0xffff; 1313 dma_dom->target_dev = 0xffff;
1103 1314
1104 if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) 1315 add_domain_to_list(&dma_dom->domain);
1316
1317 if (alloc_new_range(dma_dom, true, GFP_KERNEL))
1105 goto free_dma_dom; 1318 goto free_dma_dom;
1106 1319
1107 /* 1320 /*
@@ -1129,22 +1342,6 @@ static bool dma_ops_domain(struct protection_domain *domain)
1129 return domain->flags & PD_DMA_OPS_MASK; 1342 return domain->flags & PD_DMA_OPS_MASK;
1130} 1343}
1131 1344
1132/*
1133 * Find out the protection domain structure for a given PCI device. This
1134 * will give us the pointer to the page table root for example.
1135 */
1136static struct protection_domain *domain_for_device(u16 devid)
1137{
1138 struct protection_domain *dom;
1139 unsigned long flags;
1140
1141 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1142 dom = amd_iommu_pd_table[devid];
1143 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1144
1145 return dom;
1146}
1147
1148static void set_dte_entry(u16 devid, struct protection_domain *domain) 1345static void set_dte_entry(u16 devid, struct protection_domain *domain)
1149{ 1346{
1150 u64 pte_root = virt_to_phys(domain->pt_root); 1347 u64 pte_root = virt_to_phys(domain->pt_root);
@@ -1156,42 +1353,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain)
1156 amd_iommu_dev_table[devid].data[2] = domain->id; 1353 amd_iommu_dev_table[devid].data[2] = domain->id;
1157 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); 1354 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1158 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); 1355 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1356}
1159 1357
1160 amd_iommu_pd_table[devid] = domain; 1358static void clear_dte_entry(u16 devid)
1359{
1360 /* remove entry from the device table seen by the hardware */
1361 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1362 amd_iommu_dev_table[devid].data[1] = 0;
1363 amd_iommu_dev_table[devid].data[2] = 0;
1364
1365 amd_iommu_apply_erratum_63(devid);
1366}
1367
1368static void do_attach(struct device *dev, struct protection_domain *domain)
1369{
1370 struct iommu_dev_data *dev_data;
1371 struct amd_iommu *iommu;
1372 u16 devid;
1373
1374 devid = get_device_id(dev);
1375 iommu = amd_iommu_rlookup_table[devid];
1376 dev_data = get_dev_data(dev);
1377
1378 /* Update data structures */
1379 dev_data->domain = domain;
1380 list_add(&dev_data->list, &domain->dev_list);
1381 set_dte_entry(devid, domain);
1382
1383 /* Do reference counting */
1384 domain->dev_iommu[iommu->index] += 1;
1385 domain->dev_cnt += 1;
1386
1387 /* Flush the DTE entry */
1388 iommu_flush_device(dev);
1389}
1390
1391static void do_detach(struct device *dev)
1392{
1393 struct iommu_dev_data *dev_data;
1394 struct amd_iommu *iommu;
1395 u16 devid;
1396
1397 devid = get_device_id(dev);
1398 iommu = amd_iommu_rlookup_table[devid];
1399 dev_data = get_dev_data(dev);
1400
1401 /* decrease reference counters */
1402 dev_data->domain->dev_iommu[iommu->index] -= 1;
1403 dev_data->domain->dev_cnt -= 1;
1404
1405 /* Update data structures */
1406 dev_data->domain = NULL;
1407 list_del(&dev_data->list);
1408 clear_dte_entry(devid);
1409
1410 /* Flush the DTE entry */
1411 iommu_flush_device(dev);
1161} 1412}
1162 1413
1163/* 1414/*
1164 * If a device is not yet associated with a domain, this function does 1415 * If a device is not yet associated with a domain, this function does
1165 * assigns it visible for the hardware 1416 * assigns it visible for the hardware
1166 */ 1417 */
1167static void __attach_device(struct amd_iommu *iommu, 1418static int __attach_device(struct device *dev,
1168 struct protection_domain *domain, 1419 struct protection_domain *domain)
1169 u16 devid)
1170{ 1420{
1421 struct iommu_dev_data *dev_data, *alias_data;
1422
1423 dev_data = get_dev_data(dev);
1424 alias_data = get_dev_data(dev_data->alias);
1425
1426 if (!alias_data)
1427 return -EINVAL;
1428
1171 /* lock domain */ 1429 /* lock domain */
1172 spin_lock(&domain->lock); 1430 spin_lock(&domain->lock);
1173 1431
1174 /* update DTE entry */ 1432 /* Some sanity checks */
1175 set_dte_entry(devid, domain); 1433 if (alias_data->domain != NULL &&
1434 alias_data->domain != domain)
1435 return -EBUSY;
1436
1437 if (dev_data->domain != NULL &&
1438 dev_data->domain != domain)
1439 return -EBUSY;
1176 1440
1177 domain->dev_cnt += 1; 1441 /* Do real assignment */
1442 if (dev_data->alias != dev) {
1443 alias_data = get_dev_data(dev_data->alias);
1444 if (alias_data->domain == NULL)
1445 do_attach(dev_data->alias, domain);
1446
1447 atomic_inc(&alias_data->bind);
1448 }
1449
1450 if (dev_data->domain == NULL)
1451 do_attach(dev, domain);
1452
1453 atomic_inc(&dev_data->bind);
1178 1454
1179 /* ready */ 1455 /* ready */
1180 spin_unlock(&domain->lock); 1456 spin_unlock(&domain->lock);
1457
1458 return 0;
1181} 1459}
1182 1460
1183/* 1461/*
1184 * If a device is not yet associated with a domain, this function does 1462 * If a device is not yet associated with a domain, this function does
1185 * assigns it visible for the hardware 1463 * assigns it visible for the hardware
1186 */ 1464 */
1187static void attach_device(struct amd_iommu *iommu, 1465static int attach_device(struct device *dev,
1188 struct protection_domain *domain, 1466 struct protection_domain *domain)
1189 u16 devid)
1190{ 1467{
1191 unsigned long flags; 1468 unsigned long flags;
1469 int ret;
1192 1470
1193 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1471 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1194 __attach_device(iommu, domain, devid); 1472 ret = __attach_device(dev, domain);
1195 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1473 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1196 1474
1197 /* 1475 /*
@@ -1199,96 +1477,125 @@ static void attach_device(struct amd_iommu *iommu,
1199 * left the caches in the IOMMU dirty. So we have to flush 1477 * left the caches in the IOMMU dirty. So we have to flush
1200 * here to evict all dirty stuff. 1478 * here to evict all dirty stuff.
1201 */ 1479 */
1202 iommu_queue_inv_dev_entry(iommu, devid); 1480 iommu_flush_tlb_pde(domain);
1203 iommu_flush_tlb_pde(iommu, domain->id); 1481
1482 return ret;
1204} 1483}
1205 1484
1206/* 1485/*
1207 * Removes a device from a protection domain (unlocked) 1486 * Removes a device from a protection domain (unlocked)
1208 */ 1487 */
1209static void __detach_device(struct protection_domain *domain, u16 devid) 1488static void __detach_device(struct device *dev)
1210{ 1489{
1490 struct iommu_dev_data *dev_data = get_dev_data(dev);
1491 struct iommu_dev_data *alias_data;
1492 unsigned long flags;
1211 1493
1212 /* lock domain */ 1494 BUG_ON(!dev_data->domain);
1213 spin_lock(&domain->lock);
1214 1495
1215 /* remove domain from the lookup table */ 1496 spin_lock_irqsave(&dev_data->domain->lock, flags);
1216 amd_iommu_pd_table[devid] = NULL;
1217 1497
1218 /* remove entry from the device table seen by the hardware */ 1498 if (dev_data->alias != dev) {
1219 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; 1499 alias_data = get_dev_data(dev_data->alias);
1220 amd_iommu_dev_table[devid].data[1] = 0; 1500 if (atomic_dec_and_test(&alias_data->bind))
1221 amd_iommu_dev_table[devid].data[2] = 0; 1501 do_detach(dev_data->alias);
1502 }
1222 1503
1223 /* decrease reference counter */ 1504 if (atomic_dec_and_test(&dev_data->bind))
1224 domain->dev_cnt -= 1; 1505 do_detach(dev);
1225 1506
1226 /* ready */ 1507 spin_unlock_irqrestore(&dev_data->domain->lock, flags);
1227 spin_unlock(&domain->lock);
1228 1508
1229 /* 1509 /*
1230 * If we run in passthrough mode the device must be assigned to the 1510 * If we run in passthrough mode the device must be assigned to the
1231 * passthrough domain if it is detached from any other domain 1511 * passthrough domain if it is detached from any other domain
1232 */ 1512 */
1233 if (iommu_pass_through) { 1513 if (iommu_pass_through && dev_data->domain == NULL)
1234 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; 1514 __attach_device(dev, pt_domain);
1235 __attach_device(iommu, pt_domain, devid);
1236 }
1237} 1515}
1238 1516
1239/* 1517/*
1240 * Removes a device from a protection domain (with devtable_lock held) 1518 * Removes a device from a protection domain (with devtable_lock held)
1241 */ 1519 */
1242static void detach_device(struct protection_domain *domain, u16 devid) 1520static void detach_device(struct device *dev)
1243{ 1521{
1244 unsigned long flags; 1522 unsigned long flags;
1245 1523
1246 /* lock device table */ 1524 /* lock device table */
1247 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1525 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1248 __detach_device(domain, devid); 1526 __detach_device(dev);
1249 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1527 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1250} 1528}
1251 1529
1530/*
1531 * Find out the protection domain structure for a given PCI device. This
1532 * will give us the pointer to the page table root for example.
1533 */
1534static struct protection_domain *domain_for_device(struct device *dev)
1535{
1536 struct protection_domain *dom;
1537 struct iommu_dev_data *dev_data, *alias_data;
1538 unsigned long flags;
1539 u16 devid, alias;
1540
1541 devid = get_device_id(dev);
1542 alias = amd_iommu_alias_table[devid];
1543 dev_data = get_dev_data(dev);
1544 alias_data = get_dev_data(dev_data->alias);
1545 if (!alias_data)
1546 return NULL;
1547
1548 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1549 dom = dev_data->domain;
1550 if (dom == NULL &&
1551 alias_data->domain != NULL) {
1552 __attach_device(dev, alias_data->domain);
1553 dom = alias_data->domain;
1554 }
1555
1556 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1557
1558 return dom;
1559}
1560
1252static int device_change_notifier(struct notifier_block *nb, 1561static int device_change_notifier(struct notifier_block *nb,
1253 unsigned long action, void *data) 1562 unsigned long action, void *data)
1254{ 1563{
1255 struct device *dev = data; 1564 struct device *dev = data;
1256 struct pci_dev *pdev = to_pci_dev(dev); 1565 u16 devid;
1257 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
1258 struct protection_domain *domain; 1566 struct protection_domain *domain;
1259 struct dma_ops_domain *dma_domain; 1567 struct dma_ops_domain *dma_domain;
1260 struct amd_iommu *iommu; 1568 struct amd_iommu *iommu;
1261 unsigned long flags; 1569 unsigned long flags;
1262 1570
1263 if (devid > amd_iommu_last_bdf) 1571 if (!check_device(dev))
1264 goto out; 1572 return 0;
1265
1266 devid = amd_iommu_alias_table[devid];
1267
1268 iommu = amd_iommu_rlookup_table[devid];
1269 if (iommu == NULL)
1270 goto out;
1271
1272 domain = domain_for_device(devid);
1273 1573
1274 if (domain && !dma_ops_domain(domain)) 1574 devid = get_device_id(dev);
1275 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " 1575 iommu = amd_iommu_rlookup_table[devid];
1276 "to a non-dma-ops domain\n", dev_name(dev));
1277 1576
1278 switch (action) { 1577 switch (action) {
1279 case BUS_NOTIFY_UNBOUND_DRIVER: 1578 case BUS_NOTIFY_UNBOUND_DRIVER:
1579
1580 domain = domain_for_device(dev);
1581
1280 if (!domain) 1582 if (!domain)
1281 goto out; 1583 goto out;
1282 if (iommu_pass_through) 1584 if (iommu_pass_through)
1283 break; 1585 break;
1284 detach_device(domain, devid); 1586 detach_device(dev);
1285 break; 1587 break;
1286 case BUS_NOTIFY_ADD_DEVICE: 1588 case BUS_NOTIFY_ADD_DEVICE:
1589
1590 iommu_init_device(dev);
1591
1592 domain = domain_for_device(dev);
1593
1287 /* allocate a protection domain if a device is added */ 1594 /* allocate a protection domain if a device is added */
1288 dma_domain = find_protection_domain(devid); 1595 dma_domain = find_protection_domain(devid);
1289 if (dma_domain) 1596 if (dma_domain)
1290 goto out; 1597 goto out;
1291 dma_domain = dma_ops_domain_alloc(iommu); 1598 dma_domain = dma_ops_domain_alloc();
1292 if (!dma_domain) 1599 if (!dma_domain)
1293 goto out; 1600 goto out;
1294 dma_domain->target_dev = devid; 1601 dma_domain->target_dev = devid;
@@ -1298,11 +1605,15 @@ static int device_change_notifier(struct notifier_block *nb,
1298 spin_unlock_irqrestore(&iommu_pd_list_lock, flags); 1605 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1299 1606
1300 break; 1607 break;
1608 case BUS_NOTIFY_DEL_DEVICE:
1609
1610 iommu_uninit_device(dev);
1611
1301 default: 1612 default:
1302 goto out; 1613 goto out;
1303 } 1614 }
1304 1615
1305 iommu_queue_inv_dev_entry(iommu, devid); 1616 iommu_flush_device(dev);
1306 iommu_completion_wait(iommu); 1617 iommu_completion_wait(iommu);
1307 1618
1308out: 1619out:
@@ -1313,6 +1624,11 @@ static struct notifier_block device_nb = {
1313 .notifier_call = device_change_notifier, 1624 .notifier_call = device_change_notifier,
1314}; 1625};
1315 1626
1627void amd_iommu_init_notifier(void)
1628{
1629 bus_register_notifier(&pci_bus_type, &device_nb);
1630}
1631
1316/***************************************************************************** 1632/*****************************************************************************
1317 * 1633 *
1318 * The next functions belong to the dma_ops mapping/unmapping code. 1634 * The next functions belong to the dma_ops mapping/unmapping code.
@@ -1320,106 +1636,46 @@ static struct notifier_block device_nb = {
1320 *****************************************************************************/ 1636 *****************************************************************************/
1321 1637
1322/* 1638/*
1323 * This function checks if the driver got a valid device from the caller to
1324 * avoid dereferencing invalid pointers.
1325 */
1326static bool check_device(struct device *dev)
1327{
1328 if (!dev || !dev->dma_mask)
1329 return false;
1330
1331 return true;
1332}
1333
1334/*
1335 * In this function the list of preallocated protection domains is traversed to
1336 * find the domain for a specific device
1337 */
1338static struct dma_ops_domain *find_protection_domain(u16 devid)
1339{
1340 struct dma_ops_domain *entry, *ret = NULL;
1341 unsigned long flags;
1342
1343 if (list_empty(&iommu_pd_list))
1344 return NULL;
1345
1346 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1347
1348 list_for_each_entry(entry, &iommu_pd_list, list) {
1349 if (entry->target_dev == devid) {
1350 ret = entry;
1351 break;
1352 }
1353 }
1354
1355 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1356
1357 return ret;
1358}
1359
1360/*
1361 * In the dma_ops path we only have the struct device. This function 1639 * In the dma_ops path we only have the struct device. This function
1362 * finds the corresponding IOMMU, the protection domain and the 1640 * finds the corresponding IOMMU, the protection domain and the
1363 * requestor id for a given device. 1641 * requestor id for a given device.
1364 * If the device is not yet associated with a domain this is also done 1642 * If the device is not yet associated with a domain this is also done
1365 * in this function. 1643 * in this function.
1366 */ 1644 */
1367static int get_device_resources(struct device *dev, 1645static struct protection_domain *get_domain(struct device *dev)
1368 struct amd_iommu **iommu,
1369 struct protection_domain **domain,
1370 u16 *bdf)
1371{ 1646{
1647 struct protection_domain *domain;
1372 struct dma_ops_domain *dma_dom; 1648 struct dma_ops_domain *dma_dom;
1373 struct pci_dev *pcidev; 1649 u16 devid = get_device_id(dev);
1374 u16 _bdf;
1375
1376 *iommu = NULL;
1377 *domain = NULL;
1378 *bdf = 0xffff;
1379
1380 if (dev->bus != &pci_bus_type)
1381 return 0;
1382
1383 pcidev = to_pci_dev(dev);
1384 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
1385 1650
1386 /* device not translated by any IOMMU in the system? */ 1651 if (!check_device(dev))
1387 if (_bdf > amd_iommu_last_bdf) 1652 return ERR_PTR(-EINVAL);
1388 return 0;
1389 1653
1390 *bdf = amd_iommu_alias_table[_bdf]; 1654 domain = domain_for_device(dev);
1655 if (domain != NULL && !dma_ops_domain(domain))
1656 return ERR_PTR(-EBUSY);
1391 1657
1392 *iommu = amd_iommu_rlookup_table[*bdf]; 1658 if (domain != NULL)
1393 if (*iommu == NULL) 1659 return domain;
1394 return 0;
1395 *domain = domain_for_device(*bdf);
1396 if (*domain == NULL) {
1397 dma_dom = find_protection_domain(*bdf);
1398 if (!dma_dom)
1399 dma_dom = (*iommu)->default_dom;
1400 *domain = &dma_dom->domain;
1401 attach_device(*iommu, *domain, *bdf);
1402 DUMP_printk("Using protection domain %d for device %s\n",
1403 (*domain)->id, dev_name(dev));
1404 }
1405 1660
1406 if (domain_for_device(_bdf) == NULL) 1661 /* Device not bount yet - bind it */
1407 attach_device(*iommu, *domain, _bdf); 1662 dma_dom = find_protection_domain(devid);
1663 if (!dma_dom)
1664 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
1665 attach_device(dev, &dma_dom->domain);
1666 DUMP_printk("Using protection domain %d for device %s\n",
1667 dma_dom->domain.id, dev_name(dev));
1408 1668
1409 return 1; 1669 return &dma_dom->domain;
1410} 1670}
1411 1671
1412static void update_device_table(struct protection_domain *domain) 1672static void update_device_table(struct protection_domain *domain)
1413{ 1673{
1414 unsigned long flags; 1674 struct iommu_dev_data *dev_data;
1415 int i;
1416 1675
1417 for (i = 0; i <= amd_iommu_last_bdf; ++i) { 1676 list_for_each_entry(dev_data, &domain->dev_list, list) {
1418 if (amd_iommu_pd_table[i] != domain) 1677 u16 devid = get_device_id(dev_data->dev);
1419 continue; 1678 set_dte_entry(devid, domain);
1420 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1421 set_dte_entry(i, domain);
1422 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1423 } 1679 }
1424} 1680}
1425 1681
@@ -1429,76 +1685,13 @@ static void update_domain(struct protection_domain *domain)
1429 return; 1685 return;
1430 1686
1431 update_device_table(domain); 1687 update_device_table(domain);
1432 flush_devices_by_domain(domain); 1688 iommu_flush_domain_devices(domain);
1433 iommu_flush_domain(domain->id); 1689 iommu_flush_tlb_pde(domain);
1434 1690
1435 domain->updated = false; 1691 domain->updated = false;
1436} 1692}
1437 1693
1438/* 1694/*
1439 * This function is used to add another level to an IO page table. Adding
1440 * another level increases the size of the address space by 9 bits to a size up
1441 * to 64 bits.
1442 */
1443static bool increase_address_space(struct protection_domain *domain,
1444 gfp_t gfp)
1445{
1446 u64 *pte;
1447
1448 if (domain->mode == PAGE_MODE_6_LEVEL)
1449 /* address space already 64 bit large */
1450 return false;
1451
1452 pte = (void *)get_zeroed_page(gfp);
1453 if (!pte)
1454 return false;
1455
1456 *pte = PM_LEVEL_PDE(domain->mode,
1457 virt_to_phys(domain->pt_root));
1458 domain->pt_root = pte;
1459 domain->mode += 1;
1460 domain->updated = true;
1461
1462 return true;
1463}
1464
1465static u64 *alloc_pte(struct protection_domain *domain,
1466 unsigned long address,
1467 int end_lvl,
1468 u64 **pte_page,
1469 gfp_t gfp)
1470{
1471 u64 *pte, *page;
1472 int level;
1473
1474 while (address > PM_LEVEL_SIZE(domain->mode))
1475 increase_address_space(domain, gfp);
1476
1477 level = domain->mode - 1;
1478 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
1479
1480 while (level > end_lvl) {
1481 if (!IOMMU_PTE_PRESENT(*pte)) {
1482 page = (u64 *)get_zeroed_page(gfp);
1483 if (!page)
1484 return NULL;
1485 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
1486 }
1487
1488 level -= 1;
1489
1490 pte = IOMMU_PTE_PAGE(*pte);
1491
1492 if (pte_page && level == end_lvl)
1493 *pte_page = pte;
1494
1495 pte = &pte[PM_LEVEL_INDEX(level, address)];
1496 }
1497
1498 return pte;
1499}
1500
1501/*
1502 * This function fetches the PTE for a given address in the aperture 1695 * This function fetches the PTE for a given address in the aperture
1503 */ 1696 */
1504static u64* dma_ops_get_pte(struct dma_ops_domain *dom, 1697static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
@@ -1528,8 +1721,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1528 * This is the generic map function. It maps one 4kb page at paddr to 1721 * This is the generic map function. It maps one 4kb page at paddr to
1529 * the given address in the DMA address space for the domain. 1722 * the given address in the DMA address space for the domain.
1530 */ 1723 */
1531static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, 1724static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
1532 struct dma_ops_domain *dom,
1533 unsigned long address, 1725 unsigned long address,
1534 phys_addr_t paddr, 1726 phys_addr_t paddr,
1535 int direction) 1727 int direction)
@@ -1542,7 +1734,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1542 1734
1543 pte = dma_ops_get_pte(dom, address); 1735 pte = dma_ops_get_pte(dom, address);
1544 if (!pte) 1736 if (!pte)
1545 return bad_dma_address; 1737 return DMA_ERROR_CODE;
1546 1738
1547 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; 1739 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1548 1740
@@ -1563,8 +1755,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1563/* 1755/*
1564 * The generic unmapping function for on page in the DMA address space. 1756 * The generic unmapping function for on page in the DMA address space.
1565 */ 1757 */
1566static void dma_ops_domain_unmap(struct amd_iommu *iommu, 1758static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
1567 struct dma_ops_domain *dom,
1568 unsigned long address) 1759 unsigned long address)
1569{ 1760{
1570 struct aperture_range *aperture; 1761 struct aperture_range *aperture;
@@ -1595,7 +1786,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
1595 * Must be called with the domain lock held. 1786 * Must be called with the domain lock held.
1596 */ 1787 */
1597static dma_addr_t __map_single(struct device *dev, 1788static dma_addr_t __map_single(struct device *dev,
1598 struct amd_iommu *iommu,
1599 struct dma_ops_domain *dma_dom, 1789 struct dma_ops_domain *dma_dom,
1600 phys_addr_t paddr, 1790 phys_addr_t paddr,
1601 size_t size, 1791 size_t size,
@@ -1623,7 +1813,7 @@ static dma_addr_t __map_single(struct device *dev,
1623retry: 1813retry:
1624 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, 1814 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
1625 dma_mask); 1815 dma_mask);
1626 if (unlikely(address == bad_dma_address)) { 1816 if (unlikely(address == DMA_ERROR_CODE)) {
1627 /* 1817 /*
1628 * setting next_address here will let the address 1818 * setting next_address here will let the address
1629 * allocator only scan the new allocated range in the 1819 * allocator only scan the new allocated range in the
@@ -1631,11 +1821,11 @@ retry:
1631 */ 1821 */
1632 dma_dom->next_address = dma_dom->aperture_size; 1822 dma_dom->next_address = dma_dom->aperture_size;
1633 1823
1634 if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) 1824 if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
1635 goto out; 1825 goto out;
1636 1826
1637 /* 1827 /*
1638 * aperture was sucessfully enlarged by 128 MB, try 1828 * aperture was successfully enlarged by 128 MB, try
1639 * allocation again 1829 * allocation again
1640 */ 1830 */
1641 goto retry; 1831 goto retry;
@@ -1643,8 +1833,8 @@ retry:
1643 1833
1644 start = address; 1834 start = address;
1645 for (i = 0; i < pages; ++i) { 1835 for (i = 0; i < pages; ++i) {
1646 ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); 1836 ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
1647 if (ret == bad_dma_address) 1837 if (ret == DMA_ERROR_CODE)
1648 goto out_unmap; 1838 goto out_unmap;
1649 1839
1650 paddr += PAGE_SIZE; 1840 paddr += PAGE_SIZE;
@@ -1655,10 +1845,10 @@ retry:
1655 ADD_STATS_COUNTER(alloced_io_mem, size); 1845 ADD_STATS_COUNTER(alloced_io_mem, size);
1656 1846
1657 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1847 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
1658 iommu_flush_tlb(iommu, dma_dom->domain.id); 1848 iommu_flush_tlb(&dma_dom->domain);
1659 dma_dom->need_flush = false; 1849 dma_dom->need_flush = false;
1660 } else if (unlikely(iommu_has_npcache(iommu))) 1850 } else if (unlikely(amd_iommu_np_cache))
1661 iommu_flush_pages(iommu, dma_dom->domain.id, address, size); 1851 iommu_flush_pages(&dma_dom->domain, address, size);
1662 1852
1663out: 1853out:
1664 return address; 1854 return address;
@@ -1667,20 +1857,19 @@ out_unmap:
1667 1857
1668 for (--i; i >= 0; --i) { 1858 for (--i; i >= 0; --i) {
1669 start -= PAGE_SIZE; 1859 start -= PAGE_SIZE;
1670 dma_ops_domain_unmap(iommu, dma_dom, start); 1860 dma_ops_domain_unmap(dma_dom, start);
1671 } 1861 }
1672 1862
1673 dma_ops_free_addresses(dma_dom, address, pages); 1863 dma_ops_free_addresses(dma_dom, address, pages);
1674 1864
1675 return bad_dma_address; 1865 return DMA_ERROR_CODE;
1676} 1866}
1677 1867
1678/* 1868/*
1679 * Does the reverse of the __map_single function. Must be called with 1869 * Does the reverse of the __map_single function. Must be called with
1680 * the domain lock held too 1870 * the domain lock held too
1681 */ 1871 */
1682static void __unmap_single(struct amd_iommu *iommu, 1872static void __unmap_single(struct dma_ops_domain *dma_dom,
1683 struct dma_ops_domain *dma_dom,
1684 dma_addr_t dma_addr, 1873 dma_addr_t dma_addr,
1685 size_t size, 1874 size_t size,
1686 int dir) 1875 int dir)
@@ -1688,7 +1877,7 @@ static void __unmap_single(struct amd_iommu *iommu,
1688 dma_addr_t i, start; 1877 dma_addr_t i, start;
1689 unsigned int pages; 1878 unsigned int pages;
1690 1879
1691 if ((dma_addr == bad_dma_address) || 1880 if ((dma_addr == DMA_ERROR_CODE) ||
1692 (dma_addr + size > dma_dom->aperture_size)) 1881 (dma_addr + size > dma_dom->aperture_size))
1693 return; 1882 return;
1694 1883
@@ -1697,7 +1886,7 @@ static void __unmap_single(struct amd_iommu *iommu,
1697 start = dma_addr; 1886 start = dma_addr;
1698 1887
1699 for (i = 0; i < pages; ++i) { 1888 for (i = 0; i < pages; ++i) {
1700 dma_ops_domain_unmap(iommu, dma_dom, start); 1889 dma_ops_domain_unmap(dma_dom, start);
1701 start += PAGE_SIZE; 1890 start += PAGE_SIZE;
1702 } 1891 }
1703 1892
@@ -1706,7 +1895,7 @@ static void __unmap_single(struct amd_iommu *iommu,
1706 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1895 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1707 1896
1708 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1897 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
1709 iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); 1898 iommu_flush_pages(&dma_dom->domain, dma_addr, size);
1710 dma_dom->need_flush = false; 1899 dma_dom->need_flush = false;
1711 } 1900 }
1712} 1901}
@@ -1720,36 +1909,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
1720 struct dma_attrs *attrs) 1909 struct dma_attrs *attrs)
1721{ 1910{
1722 unsigned long flags; 1911 unsigned long flags;
1723 struct amd_iommu *iommu;
1724 struct protection_domain *domain; 1912 struct protection_domain *domain;
1725 u16 devid;
1726 dma_addr_t addr; 1913 dma_addr_t addr;
1727 u64 dma_mask; 1914 u64 dma_mask;
1728 phys_addr_t paddr = page_to_phys(page) + offset; 1915 phys_addr_t paddr = page_to_phys(page) + offset;
1729 1916
1730 INC_STATS_COUNTER(cnt_map_single); 1917 INC_STATS_COUNTER(cnt_map_single);
1731 1918
1732 if (!check_device(dev)) 1919 domain = get_domain(dev);
1733 return bad_dma_address; 1920 if (PTR_ERR(domain) == -EINVAL)
1734
1735 dma_mask = *dev->dma_mask;
1736
1737 get_device_resources(dev, &iommu, &domain, &devid);
1738
1739 if (iommu == NULL || domain == NULL)
1740 /* device not handled by any AMD IOMMU */
1741 return (dma_addr_t)paddr; 1921 return (dma_addr_t)paddr;
1922 else if (IS_ERR(domain))
1923 return DMA_ERROR_CODE;
1742 1924
1743 if (!dma_ops_domain(domain)) 1925 dma_mask = *dev->dma_mask;
1744 return bad_dma_address;
1745 1926
1746 spin_lock_irqsave(&domain->lock, flags); 1927 spin_lock_irqsave(&domain->lock, flags);
1747 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, 1928
1929 addr = __map_single(dev, domain->priv, paddr, size, dir, false,
1748 dma_mask); 1930 dma_mask);
1749 if (addr == bad_dma_address) 1931 if (addr == DMA_ERROR_CODE)
1750 goto out; 1932 goto out;
1751 1933
1752 iommu_completion_wait(iommu); 1934 iommu_flush_complete(domain);
1753 1935
1754out: 1936out:
1755 spin_unlock_irqrestore(&domain->lock, flags); 1937 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1764,25 +1946,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
1764 enum dma_data_direction dir, struct dma_attrs *attrs) 1946 enum dma_data_direction dir, struct dma_attrs *attrs)
1765{ 1947{
1766 unsigned long flags; 1948 unsigned long flags;
1767 struct amd_iommu *iommu;
1768 struct protection_domain *domain; 1949 struct protection_domain *domain;
1769 u16 devid;
1770 1950
1771 INC_STATS_COUNTER(cnt_unmap_single); 1951 INC_STATS_COUNTER(cnt_unmap_single);
1772 1952
1773 if (!check_device(dev) || 1953 domain = get_domain(dev);
1774 !get_device_resources(dev, &iommu, &domain, &devid)) 1954 if (IS_ERR(domain))
1775 /* device not handled by any AMD IOMMU */
1776 return;
1777
1778 if (!dma_ops_domain(domain))
1779 return; 1955 return;
1780 1956
1781 spin_lock_irqsave(&domain->lock, flags); 1957 spin_lock_irqsave(&domain->lock, flags);
1782 1958
1783 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1959 __unmap_single(domain->priv, dma_addr, size, dir);
1784 1960
1785 iommu_completion_wait(iommu); 1961 iommu_flush_complete(domain);
1786 1962
1787 spin_unlock_irqrestore(&domain->lock, flags); 1963 spin_unlock_irqrestore(&domain->lock, flags);
1788} 1964}
@@ -1814,9 +1990,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1814 struct dma_attrs *attrs) 1990 struct dma_attrs *attrs)
1815{ 1991{
1816 unsigned long flags; 1992 unsigned long flags;
1817 struct amd_iommu *iommu;
1818 struct protection_domain *domain; 1993 struct protection_domain *domain;
1819 u16 devid;
1820 int i; 1994 int i;
1821 struct scatterlist *s; 1995 struct scatterlist *s;
1822 phys_addr_t paddr; 1996 phys_addr_t paddr;
@@ -1825,25 +1999,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1825 1999
1826 INC_STATS_COUNTER(cnt_map_sg); 2000 INC_STATS_COUNTER(cnt_map_sg);
1827 2001
1828 if (!check_device(dev)) 2002 domain = get_domain(dev);
2003 if (PTR_ERR(domain) == -EINVAL)
2004 return map_sg_no_iommu(dev, sglist, nelems, dir);
2005 else if (IS_ERR(domain))
1829 return 0; 2006 return 0;
1830 2007
1831 dma_mask = *dev->dma_mask; 2008 dma_mask = *dev->dma_mask;
1832 2009
1833 get_device_resources(dev, &iommu, &domain, &devid);
1834
1835 if (!iommu || !domain)
1836 return map_sg_no_iommu(dev, sglist, nelems, dir);
1837
1838 if (!dma_ops_domain(domain))
1839 return 0;
1840
1841 spin_lock_irqsave(&domain->lock, flags); 2010 spin_lock_irqsave(&domain->lock, flags);
1842 2011
1843 for_each_sg(sglist, s, nelems, i) { 2012 for_each_sg(sglist, s, nelems, i) {
1844 paddr = sg_phys(s); 2013 paddr = sg_phys(s);
1845 2014
1846 s->dma_address = __map_single(dev, iommu, domain->priv, 2015 s->dma_address = __map_single(dev, domain->priv,
1847 paddr, s->length, dir, false, 2016 paddr, s->length, dir, false,
1848 dma_mask); 2017 dma_mask);
1849 2018
@@ -1854,7 +2023,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1854 goto unmap; 2023 goto unmap;
1855 } 2024 }
1856 2025
1857 iommu_completion_wait(iommu); 2026 iommu_flush_complete(domain);
1858 2027
1859out: 2028out:
1860 spin_unlock_irqrestore(&domain->lock, flags); 2029 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1863,7 +2032,7 @@ out:
1863unmap: 2032unmap:
1864 for_each_sg(sglist, s, mapped_elems, i) { 2033 for_each_sg(sglist, s, mapped_elems, i) {
1865 if (s->dma_address) 2034 if (s->dma_address)
1866 __unmap_single(iommu, domain->priv, s->dma_address, 2035 __unmap_single(domain->priv, s->dma_address,
1867 s->dma_length, dir); 2036 s->dma_length, dir);
1868 s->dma_address = s->dma_length = 0; 2037 s->dma_address = s->dma_length = 0;
1869 } 2038 }
@@ -1882,30 +2051,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1882 struct dma_attrs *attrs) 2051 struct dma_attrs *attrs)
1883{ 2052{
1884 unsigned long flags; 2053 unsigned long flags;
1885 struct amd_iommu *iommu;
1886 struct protection_domain *domain; 2054 struct protection_domain *domain;
1887 struct scatterlist *s; 2055 struct scatterlist *s;
1888 u16 devid;
1889 int i; 2056 int i;
1890 2057
1891 INC_STATS_COUNTER(cnt_unmap_sg); 2058 INC_STATS_COUNTER(cnt_unmap_sg);
1892 2059
1893 if (!check_device(dev) || 2060 domain = get_domain(dev);
1894 !get_device_resources(dev, &iommu, &domain, &devid)) 2061 if (IS_ERR(domain))
1895 return;
1896
1897 if (!dma_ops_domain(domain))
1898 return; 2062 return;
1899 2063
1900 spin_lock_irqsave(&domain->lock, flags); 2064 spin_lock_irqsave(&domain->lock, flags);
1901 2065
1902 for_each_sg(sglist, s, nelems, i) { 2066 for_each_sg(sglist, s, nelems, i) {
1903 __unmap_single(iommu, domain->priv, s->dma_address, 2067 __unmap_single(domain->priv, s->dma_address,
1904 s->dma_length, dir); 2068 s->dma_length, dir);
1905 s->dma_address = s->dma_length = 0; 2069 s->dma_address = s->dma_length = 0;
1906 } 2070 }
1907 2071
1908 iommu_completion_wait(iommu); 2072 iommu_flush_complete(domain);
1909 2073
1910 spin_unlock_irqrestore(&domain->lock, flags); 2074 spin_unlock_irqrestore(&domain->lock, flags);
1911} 2075}
@@ -1918,49 +2082,44 @@ static void *alloc_coherent(struct device *dev, size_t size,
1918{ 2082{
1919 unsigned long flags; 2083 unsigned long flags;
1920 void *virt_addr; 2084 void *virt_addr;
1921 struct amd_iommu *iommu;
1922 struct protection_domain *domain; 2085 struct protection_domain *domain;
1923 u16 devid;
1924 phys_addr_t paddr; 2086 phys_addr_t paddr;
1925 u64 dma_mask = dev->coherent_dma_mask; 2087 u64 dma_mask = dev->coherent_dma_mask;
1926 2088
1927 INC_STATS_COUNTER(cnt_alloc_coherent); 2089 INC_STATS_COUNTER(cnt_alloc_coherent);
1928 2090
1929 if (!check_device(dev)) 2091 domain = get_domain(dev);
2092 if (PTR_ERR(domain) == -EINVAL) {
2093 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2094 *dma_addr = __pa(virt_addr);
2095 return virt_addr;
2096 } else if (IS_ERR(domain))
1930 return NULL; 2097 return NULL;
1931 2098
1932 if (!get_device_resources(dev, &iommu, &domain, &devid)) 2099 dma_mask = dev->coherent_dma_mask;
1933 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 2100 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2101 flag |= __GFP_ZERO;
1934 2102
1935 flag |= __GFP_ZERO;
1936 virt_addr = (void *)__get_free_pages(flag, get_order(size)); 2103 virt_addr = (void *)__get_free_pages(flag, get_order(size));
1937 if (!virt_addr) 2104 if (!virt_addr)
1938 return NULL; 2105 return NULL;
1939 2106
1940 paddr = virt_to_phys(virt_addr); 2107 paddr = virt_to_phys(virt_addr);
1941 2108
1942 if (!iommu || !domain) {
1943 *dma_addr = (dma_addr_t)paddr;
1944 return virt_addr;
1945 }
1946
1947 if (!dma_ops_domain(domain))
1948 goto out_free;
1949
1950 if (!dma_mask) 2109 if (!dma_mask)
1951 dma_mask = *dev->dma_mask; 2110 dma_mask = *dev->dma_mask;
1952 2111
1953 spin_lock_irqsave(&domain->lock, flags); 2112 spin_lock_irqsave(&domain->lock, flags);
1954 2113
1955 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 2114 *dma_addr = __map_single(dev, domain->priv, paddr,
1956 size, DMA_BIDIRECTIONAL, true, dma_mask); 2115 size, DMA_BIDIRECTIONAL, true, dma_mask);
1957 2116
1958 if (*dma_addr == bad_dma_address) { 2117 if (*dma_addr == DMA_ERROR_CODE) {
1959 spin_unlock_irqrestore(&domain->lock, flags); 2118 spin_unlock_irqrestore(&domain->lock, flags);
1960 goto out_free; 2119 goto out_free;
1961 } 2120 }
1962 2121
1963 iommu_completion_wait(iommu); 2122 iommu_flush_complete(domain);
1964 2123
1965 spin_unlock_irqrestore(&domain->lock, flags); 2124 spin_unlock_irqrestore(&domain->lock, flags);
1966 2125
@@ -1980,28 +2139,19 @@ static void free_coherent(struct device *dev, size_t size,
1980 void *virt_addr, dma_addr_t dma_addr) 2139 void *virt_addr, dma_addr_t dma_addr)
1981{ 2140{
1982 unsigned long flags; 2141 unsigned long flags;
1983 struct amd_iommu *iommu;
1984 struct protection_domain *domain; 2142 struct protection_domain *domain;
1985 u16 devid;
1986 2143
1987 INC_STATS_COUNTER(cnt_free_coherent); 2144 INC_STATS_COUNTER(cnt_free_coherent);
1988 2145
1989 if (!check_device(dev)) 2146 domain = get_domain(dev);
1990 return; 2147 if (IS_ERR(domain))
1991
1992 get_device_resources(dev, &iommu, &domain, &devid);
1993
1994 if (!iommu || !domain)
1995 goto free_mem;
1996
1997 if (!dma_ops_domain(domain))
1998 goto free_mem; 2148 goto free_mem;
1999 2149
2000 spin_lock_irqsave(&domain->lock, flags); 2150 spin_lock_irqsave(&domain->lock, flags);
2001 2151
2002 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 2152 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2003 2153
2004 iommu_completion_wait(iommu); 2154 iommu_flush_complete(domain);
2005 2155
2006 spin_unlock_irqrestore(&domain->lock, flags); 2156 spin_unlock_irqrestore(&domain->lock, flags);
2007 2157
@@ -2015,22 +2165,7 @@ free_mem:
2015 */ 2165 */
2016static int amd_iommu_dma_supported(struct device *dev, u64 mask) 2166static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2017{ 2167{
2018 u16 bdf; 2168 return check_device(dev);
2019 struct pci_dev *pcidev;
2020
2021 /* No device or no PCI device */
2022 if (!dev || dev->bus != &pci_bus_type)
2023 return 0;
2024
2025 pcidev = to_pci_dev(dev);
2026
2027 bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
2028
2029 /* Out of our scope? */
2030 if (bdf > amd_iommu_last_bdf)
2031 return 0;
2032
2033 return 1;
2034} 2169}
2035 2170
2036/* 2171/*
@@ -2044,25 +2179,28 @@ static void prealloc_protection_domains(void)
2044{ 2179{
2045 struct pci_dev *dev = NULL; 2180 struct pci_dev *dev = NULL;
2046 struct dma_ops_domain *dma_dom; 2181 struct dma_ops_domain *dma_dom;
2047 struct amd_iommu *iommu;
2048 u16 devid; 2182 u16 devid;
2049 2183
2050 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 2184 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
2051 devid = calc_devid(dev->bus->number, dev->devfn); 2185
2052 if (devid > amd_iommu_last_bdf) 2186 /* Do we handle this device? */
2053 continue; 2187 if (!check_device(&dev->dev))
2054 devid = amd_iommu_alias_table[devid];
2055 if (domain_for_device(devid))
2056 continue; 2188 continue;
2057 iommu = amd_iommu_rlookup_table[devid]; 2189
2058 if (!iommu) 2190 /* Is there already any domain for it? */
2191 if (domain_for_device(&dev->dev))
2059 continue; 2192 continue;
2060 dma_dom = dma_ops_domain_alloc(iommu); 2193
2194 devid = get_device_id(&dev->dev);
2195
2196 dma_dom = dma_ops_domain_alloc();
2061 if (!dma_dom) 2197 if (!dma_dom)
2062 continue; 2198 continue;
2063 init_unity_mappings_for_device(dma_dom, devid); 2199 init_unity_mappings_for_device(dma_dom, devid);
2064 dma_dom->target_dev = devid; 2200 dma_dom->target_dev = devid;
2065 2201
2202 attach_device(&dev->dev, &dma_dom->domain);
2203
2066 list_add_tail(&dma_dom->list, &iommu_pd_list); 2204 list_add_tail(&dma_dom->list, &iommu_pd_list);
2067 } 2205 }
2068} 2206}
@@ -2091,7 +2229,7 @@ int __init amd_iommu_init_dma_ops(void)
2091 * protection domain will be assigned to the default one. 2229 * protection domain will be assigned to the default one.
2092 */ 2230 */
2093 for_each_iommu(iommu) { 2231 for_each_iommu(iommu) {
2094 iommu->default_dom = dma_ops_domain_alloc(iommu); 2232 iommu->default_dom = dma_ops_domain_alloc();
2095 if (iommu->default_dom == NULL) 2233 if (iommu->default_dom == NULL)
2096 return -ENOMEM; 2234 return -ENOMEM;
2097 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 2235 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -2101,15 +2239,12 @@ int __init amd_iommu_init_dma_ops(void)
2101 } 2239 }
2102 2240
2103 /* 2241 /*
2104 * If device isolation is enabled, pre-allocate the protection 2242 * Pre-allocate the protection domains for each device.
2105 * domains for each device.
2106 */ 2243 */
2107 if (amd_iommu_isolate) 2244 prealloc_protection_domains();
2108 prealloc_protection_domains();
2109 2245
2110 iommu_detected = 1; 2246 iommu_detected = 1;
2111 force_iommu = 1; 2247 swiotlb = 0;
2112 bad_dma_address = 0;
2113#ifdef CONFIG_GART_IOMMU 2248#ifdef CONFIG_GART_IOMMU
2114 gart_iommu_aperture_disabled = 1; 2249 gart_iommu_aperture_disabled = 1;
2115 gart_iommu_aperture = 0; 2250 gart_iommu_aperture = 0;
@@ -2120,8 +2255,6 @@ int __init amd_iommu_init_dma_ops(void)
2120 2255
2121 register_iommu(&amd_iommu_ops); 2256 register_iommu(&amd_iommu_ops);
2122 2257
2123 bus_register_notifier(&pci_bus_type, &device_nb);
2124
2125 amd_iommu_stats_init(); 2258 amd_iommu_stats_init();
2126 2259
2127 return 0; 2260 return 0;
@@ -2148,14 +2281,17 @@ free_domains:
2148 2281
2149static void cleanup_domain(struct protection_domain *domain) 2282static void cleanup_domain(struct protection_domain *domain)
2150{ 2283{
2284 struct iommu_dev_data *dev_data, *next;
2151 unsigned long flags; 2285 unsigned long flags;
2152 u16 devid;
2153 2286
2154 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 2287 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2155 2288
2156 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) 2289 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2157 if (amd_iommu_pd_table[devid] == domain) 2290 struct device *dev = dev_data->dev;
2158 __detach_device(domain, devid); 2291
2292 do_detach(dev);
2293 atomic_set(&dev_data->bind, 0);
2294 }
2159 2295
2160 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 2296 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2161} 2297}
@@ -2165,6 +2301,8 @@ static void protection_domain_free(struct protection_domain *domain)
2165 if (!domain) 2301 if (!domain)
2166 return; 2302 return;
2167 2303
2304 del_domain_from_list(domain);
2305
2168 if (domain->id) 2306 if (domain->id)
2169 domain_id_free(domain->id); 2307 domain_id_free(domain->id);
2170 2308
@@ -2183,6 +2321,9 @@ static struct protection_domain *protection_domain_alloc(void)
2183 domain->id = domain_id_alloc(); 2321 domain->id = domain_id_alloc();
2184 if (!domain->id) 2322 if (!domain->id)
2185 goto out_err; 2323 goto out_err;
2324 INIT_LIST_HEAD(&domain->dev_list);
2325
2326 add_domain_to_list(domain);
2186 2327
2187 return domain; 2328 return domain;
2188 2329
@@ -2239,26 +2380,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2239static void amd_iommu_detach_device(struct iommu_domain *dom, 2380static void amd_iommu_detach_device(struct iommu_domain *dom,
2240 struct device *dev) 2381 struct device *dev)
2241{ 2382{
2242 struct protection_domain *domain = dom->priv; 2383 struct iommu_dev_data *dev_data = dev->archdata.iommu;
2243 struct amd_iommu *iommu; 2384 struct amd_iommu *iommu;
2244 struct pci_dev *pdev;
2245 u16 devid; 2385 u16 devid;
2246 2386
2247 if (dev->bus != &pci_bus_type) 2387 if (!check_device(dev))
2248 return; 2388 return;
2249 2389
2250 pdev = to_pci_dev(dev); 2390 devid = get_device_id(dev);
2251 2391
2252 devid = calc_devid(pdev->bus->number, pdev->devfn); 2392 if (dev_data->domain != NULL)
2253 2393 detach_device(dev);
2254 if (devid > 0)
2255 detach_device(domain, devid);
2256 2394
2257 iommu = amd_iommu_rlookup_table[devid]; 2395 iommu = amd_iommu_rlookup_table[devid];
2258 if (!iommu) 2396 if (!iommu)
2259 return; 2397 return;
2260 2398
2261 iommu_queue_inv_dev_entry(iommu, devid); 2399 iommu_flush_device(dev);
2262 iommu_completion_wait(iommu); 2400 iommu_completion_wait(iommu);
2263} 2401}
2264 2402
@@ -2266,35 +2404,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
2266 struct device *dev) 2404 struct device *dev)
2267{ 2405{
2268 struct protection_domain *domain = dom->priv; 2406 struct protection_domain *domain = dom->priv;
2269 struct protection_domain *old_domain; 2407 struct iommu_dev_data *dev_data;
2270 struct amd_iommu *iommu; 2408 struct amd_iommu *iommu;
2271 struct pci_dev *pdev; 2409 int ret;
2272 u16 devid; 2410 u16 devid;
2273 2411
2274 if (dev->bus != &pci_bus_type) 2412 if (!check_device(dev))
2275 return -EINVAL; 2413 return -EINVAL;
2276 2414
2277 pdev = to_pci_dev(dev); 2415 dev_data = dev->archdata.iommu;
2278
2279 devid = calc_devid(pdev->bus->number, pdev->devfn);
2280 2416
2281 if (devid >= amd_iommu_last_bdf || 2417 devid = get_device_id(dev);
2282 devid != amd_iommu_alias_table[devid])
2283 return -EINVAL;
2284 2418
2285 iommu = amd_iommu_rlookup_table[devid]; 2419 iommu = amd_iommu_rlookup_table[devid];
2286 if (!iommu) 2420 if (!iommu)
2287 return -EINVAL; 2421 return -EINVAL;
2288 2422
2289 old_domain = domain_for_device(devid); 2423 if (dev_data->domain)
2290 if (old_domain) 2424 detach_device(dev);
2291 detach_device(old_domain, devid);
2292 2425
2293 attach_device(iommu, domain, devid); 2426 ret = attach_device(dev, domain);
2294 2427
2295 iommu_completion_wait(iommu); 2428 iommu_completion_wait(iommu);
2296 2429
2297 return 0; 2430 return ret;
2298} 2431}
2299 2432
2300static int amd_iommu_map_range(struct iommu_domain *dom, 2433static int amd_iommu_map_range(struct iommu_domain *dom,
@@ -2340,7 +2473,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
2340 iova += PAGE_SIZE; 2473 iova += PAGE_SIZE;
2341 } 2474 }
2342 2475
2343 iommu_flush_domain(domain->id); 2476 iommu_flush_tlb_pde(domain);
2344} 2477}
2345 2478
2346static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 2479static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2391,10 +2524,11 @@ static struct iommu_ops amd_iommu_ops = {
2391 2524
2392int __init amd_iommu_init_passthrough(void) 2525int __init amd_iommu_init_passthrough(void)
2393{ 2526{
2527 struct amd_iommu *iommu;
2394 struct pci_dev *dev = NULL; 2528 struct pci_dev *dev = NULL;
2395 u16 devid, devid2; 2529 u16 devid;
2396 2530
2397 /* allocate passthroug domain */ 2531 /* allocate passthrough domain */
2398 pt_domain = protection_domain_alloc(); 2532 pt_domain = protection_domain_alloc();
2399 if (!pt_domain) 2533 if (!pt_domain)
2400 return -ENOMEM; 2534 return -ENOMEM;
@@ -2402,20 +2536,17 @@ int __init amd_iommu_init_passthrough(void)
2402 pt_domain->mode |= PAGE_MODE_NONE; 2536 pt_domain->mode |= PAGE_MODE_NONE;
2403 2537
2404 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 2538 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
2405 struct amd_iommu *iommu;
2406 2539
2407 devid = calc_devid(dev->bus->number, dev->devfn); 2540 if (!check_device(&dev->dev))
2408 if (devid > amd_iommu_last_bdf)
2409 continue; 2541 continue;
2410 2542
2411 devid2 = amd_iommu_alias_table[devid]; 2543 devid = get_device_id(&dev->dev);
2412 2544
2413 iommu = amd_iommu_rlookup_table[devid2]; 2545 iommu = amd_iommu_rlookup_table[devid];
2414 if (!iommu) 2546 if (!iommu)
2415 continue; 2547 continue;
2416 2548
2417 __attach_device(iommu, pt_domain, devid); 2549 attach_device(&dev->dev, pt_domain);
2418 __attach_device(iommu, pt_domain, devid2);
2419 } 2550 }
2420 2551
2421 pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); 2552 pr_info("AMD-Vi: Initialized for Passthrough Mode\n");