aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Kardashevskiy <aik@ozlabs.ru>2013-05-20 23:33:09 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-06-20 02:55:14 -0400
commit4e13c1ac6baa1d6c2b650d66ca89e1e12727ec19 (patch)
treefad36e88c2b10b52fbfa24f5dc1ce46170392aa6
parentab9a4183fddf232a46b6255e0d3da5a09f85ecbd (diff)
powerpc/vfio: Enable on PowerNV platform
This initializes IOMMU groups based on the IOMMU configuration discovered during the PCI scan on POWERNV (POWER non virtualized) platform. The IOMMU groups are to be used later by the VFIO driver, which is used for PCI pass through. It also implements an API for mapping/unmapping pages for guest PCI drivers and providing DMA window properties. This API is going to be used later by QEMU-VFIO to handle h_put_tce hypercalls from the KVM guest. The iommu_put_tce_user_mode() does only a single page mapping as an API for adding many mappings at once is going to be added later. Although this driver has been tested only on the POWERNV platform, it should work on any platform which supports TCE tables. As h_put_tce hypercall is received by the host kernel and processed by the QEMU (what involves calling the host kernel again), performance is not the best - circa 220MB/s on 10Gb ethernet network. To enable VFIO on POWER, enable SPAPR_TCE_IOMMU config option and configure VFIO as required. Cc: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/iommu.h26
-rw-r--r--arch/powerpc/kernel/iommu.c323
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c1
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c5
-rw-r--r--arch/powerpc/platforms/powernv/pci.c2
-rw-r--r--drivers/iommu/Kconfig8
6 files changed, 364 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index cbfe678e3dbe..98d14229f893 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -76,6 +76,9 @@ struct iommu_table {
76 struct iommu_pool large_pool; 76 struct iommu_pool large_pool;
77 struct iommu_pool pools[IOMMU_NR_POOLS]; 77 struct iommu_pool pools[IOMMU_NR_POOLS];
78 unsigned long *it_map; /* A simple allocation bitmap for now */ 78 unsigned long *it_map; /* A simple allocation bitmap for now */
79#ifdef CONFIG_IOMMU_API
80 struct iommu_group *it_group;
81#endif
79}; 82};
80 83
81struct scatterlist; 84struct scatterlist;
@@ -98,6 +101,8 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
98 */ 101 */
99extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, 102extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
100 int nid); 103 int nid);
104extern void iommu_register_group(struct iommu_table *tbl,
105 int pci_domain_number, unsigned long pe_num);
101 106
102extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, 107extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
103 struct scatterlist *sglist, int nelems, 108 struct scatterlist *sglist, int nelems,
@@ -147,5 +152,26 @@ static inline void iommu_restore(void)
147} 152}
148#endif 153#endif
149 154
155/* The API to support IOMMU operations for VFIO */
156extern int iommu_tce_clear_param_check(struct iommu_table *tbl,
157 unsigned long ioba, unsigned long tce_value,
158 unsigned long npages);
159extern int iommu_tce_put_param_check(struct iommu_table *tbl,
160 unsigned long ioba, unsigned long tce);
161extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
162 unsigned long hwaddr, enum dma_data_direction direction);
163extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
164 unsigned long entry);
165extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
166 unsigned long entry, unsigned long pages);
167extern int iommu_put_tce_user_mode(struct iommu_table *tbl,
168 unsigned long entry, unsigned long tce);
169
170extern void iommu_flush_tce(struct iommu_table *tbl);
171extern int iommu_take_ownership(struct iommu_table *tbl);
172extern void iommu_release_ownership(struct iommu_table *tbl);
173
174extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
175
150#endif /* __KERNEL__ */ 176#endif /* __KERNEL__ */
151#endif /* _ASM_IOMMU_H */ 177#endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index c0d0dbddfba1..b20ff173a671 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -36,6 +36,8 @@
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/fault-inject.h> 37#include <linux/fault-inject.h>
38#include <linux/pci.h> 38#include <linux/pci.h>
39#include <linux/iommu.h>
40#include <linux/sched.h>
39#include <asm/io.h> 41#include <asm/io.h>
40#include <asm/prom.h> 42#include <asm/prom.h>
41#include <asm/iommu.h> 43#include <asm/iommu.h>
@@ -44,6 +46,7 @@
44#include <asm/kdump.h> 46#include <asm/kdump.h>
45#include <asm/fadump.h> 47#include <asm/fadump.h>
46#include <asm/vio.h> 48#include <asm/vio.h>
49#include <asm/tce.h>
47 50
48#define DBG(...) 51#define DBG(...)
49 52
@@ -724,6 +727,13 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
724 if (tbl->it_offset == 0) 727 if (tbl->it_offset == 0)
725 clear_bit(0, tbl->it_map); 728 clear_bit(0, tbl->it_map);
726 729
730#ifdef CONFIG_IOMMU_API
731 if (tbl->it_group) {
732 iommu_group_put(tbl->it_group);
733 BUG_ON(tbl->it_group);
734 }
735#endif
736
727 /* verify that table contains no entries */ 737 /* verify that table contains no entries */
728 if (!bitmap_empty(tbl->it_map, tbl->it_size)) 738 if (!bitmap_empty(tbl->it_map, tbl->it_size))
729 pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); 739 pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
@@ -860,3 +870,316 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
860 free_pages((unsigned long)vaddr, get_order(size)); 870 free_pages((unsigned long)vaddr, get_order(size));
861 } 871 }
862} 872}
873
874#ifdef CONFIG_IOMMU_API
875/*
876 * SPAPR TCE API
877 */
878static void group_release(void *iommu_data)
879{
880 struct iommu_table *tbl = iommu_data;
881 tbl->it_group = NULL;
882}
883
884void iommu_register_group(struct iommu_table *tbl,
885 int pci_domain_number, unsigned long pe_num)
886{
887 struct iommu_group *grp;
888 char *name;
889
890 grp = iommu_group_alloc();
891 if (IS_ERR(grp)) {
892 pr_warn("powerpc iommu api: cannot create new group, err=%ld\n",
893 PTR_ERR(grp));
894 return;
895 }
896 tbl->it_group = grp;
897 iommu_group_set_iommudata(grp, tbl, group_release);
898 name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
899 pci_domain_number, pe_num);
900 if (!name)
901 return;
902 iommu_group_set_name(grp, name);
903 kfree(name);
904}
905
906enum dma_data_direction iommu_tce_direction(unsigned long tce)
907{
908 if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE))
909 return DMA_BIDIRECTIONAL;
910 else if (tce & TCE_PCI_READ)
911 return DMA_TO_DEVICE;
912 else if (tce & TCE_PCI_WRITE)
913 return DMA_FROM_DEVICE;
914 else
915 return DMA_NONE;
916}
917EXPORT_SYMBOL_GPL(iommu_tce_direction);
918
919void iommu_flush_tce(struct iommu_table *tbl)
920{
921 /* Flush/invalidate TLB caches if necessary */
922 if (ppc_md.tce_flush)
923 ppc_md.tce_flush(tbl);
924
925 /* Make sure updates are seen by hardware */
926 mb();
927}
928EXPORT_SYMBOL_GPL(iommu_flush_tce);
929
930int iommu_tce_clear_param_check(struct iommu_table *tbl,
931 unsigned long ioba, unsigned long tce_value,
932 unsigned long npages)
933{
934 /* ppc_md.tce_free() does not support any value but 0 */
935 if (tce_value)
936 return -EINVAL;
937
938 if (ioba & ~IOMMU_PAGE_MASK)
939 return -EINVAL;
940
941 ioba >>= IOMMU_PAGE_SHIFT;
942 if (ioba < tbl->it_offset)
943 return -EINVAL;
944
945 if ((ioba + npages) > (tbl->it_offset + tbl->it_size))
946 return -EINVAL;
947
948 return 0;
949}
950EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
951
952int iommu_tce_put_param_check(struct iommu_table *tbl,
953 unsigned long ioba, unsigned long tce)
954{
955 if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
956 return -EINVAL;
957
958 if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ))
959 return -EINVAL;
960
961 if (ioba & ~IOMMU_PAGE_MASK)
962 return -EINVAL;
963
964 ioba >>= IOMMU_PAGE_SHIFT;
965 if (ioba < tbl->it_offset)
966 return -EINVAL;
967
968 if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
969 return -EINVAL;
970
971 return 0;
972}
973EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
974
975unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
976{
977 unsigned long oldtce;
978 struct iommu_pool *pool = get_pool(tbl, entry);
979
980 spin_lock(&(pool->lock));
981
982 oldtce = ppc_md.tce_get(tbl, entry);
983 if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
984 ppc_md.tce_free(tbl, entry, 1);
985 else
986 oldtce = 0;
987
988 spin_unlock(&(pool->lock));
989
990 return oldtce;
991}
992EXPORT_SYMBOL_GPL(iommu_clear_tce);
993
994int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
995 unsigned long entry, unsigned long pages)
996{
997 unsigned long oldtce;
998 struct page *page;
999
1000 for ( ; pages; --pages, ++entry) {
1001 oldtce = iommu_clear_tce(tbl, entry);
1002 if (!oldtce)
1003 continue;
1004
1005 page = pfn_to_page(oldtce >> PAGE_SHIFT);
1006 WARN_ON(!page);
1007 if (page) {
1008 if (oldtce & TCE_PCI_WRITE)
1009 SetPageDirty(page);
1010 put_page(page);
1011 }
1012 }
1013
1014 return 0;
1015}
1016EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
1017
1018/*
1019 * hwaddr is a kernel virtual address here (0xc... bazillion),
1020 * tce_build converts it to a physical address.
1021 */
1022int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
1023 unsigned long hwaddr, enum dma_data_direction direction)
1024{
1025 int ret = -EBUSY;
1026 unsigned long oldtce;
1027 struct iommu_pool *pool = get_pool(tbl, entry);
1028
1029 spin_lock(&(pool->lock));
1030
1031 oldtce = ppc_md.tce_get(tbl, entry);
1032 /* Add new entry if it is not busy */
1033 if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
1034 ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
1035
1036 spin_unlock(&(pool->lock));
1037
1038 /* if (unlikely(ret))
1039 pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
1040 __func__, hwaddr, entry << IOMMU_PAGE_SHIFT,
1041 hwaddr, ret); */
1042
1043 return ret;
1044}
1045EXPORT_SYMBOL_GPL(iommu_tce_build);
1046
1047int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
1048 unsigned long tce)
1049{
1050 int ret;
1051 struct page *page = NULL;
1052 unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK;
1053 enum dma_data_direction direction = iommu_tce_direction(tce);
1054
1055 ret = get_user_pages_fast(tce & PAGE_MASK, 1,
1056 direction != DMA_TO_DEVICE, &page);
1057 if (unlikely(ret != 1)) {
1058 /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
1059 tce, entry << IOMMU_PAGE_SHIFT, ret); */
1060 return -EFAULT;
1061 }
1062 hwaddr = (unsigned long) page_address(page) + offset;
1063
1064 ret = iommu_tce_build(tbl, entry, hwaddr, direction);
1065 if (ret)
1066 put_page(page);
1067
1068 if (ret < 0)
1069 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
1070 __func__, entry << IOMMU_PAGE_SHIFT, tce, ret);
1071
1072 return ret;
1073}
1074EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
1075
1076int iommu_take_ownership(struct iommu_table *tbl)
1077{
1078 unsigned long sz = (tbl->it_size + 7) >> 3;
1079
1080 if (tbl->it_offset == 0)
1081 clear_bit(0, tbl->it_map);
1082
1083 if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
1084 pr_err("iommu_tce: it_map is not empty");
1085 return -EBUSY;
1086 }
1087
1088 memset(tbl->it_map, 0xff, sz);
1089 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
1090
1091 return 0;
1092}
1093EXPORT_SYMBOL_GPL(iommu_take_ownership);
1094
1095void iommu_release_ownership(struct iommu_table *tbl)
1096{
1097 unsigned long sz = (tbl->it_size + 7) >> 3;
1098
1099 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
1100 memset(tbl->it_map, 0, sz);
1101
1102 /* Restore bit#0 set by iommu_init_table() */
1103 if (tbl->it_offset == 0)
1104 set_bit(0, tbl->it_map);
1105}
1106EXPORT_SYMBOL_GPL(iommu_release_ownership);
1107
1108static int iommu_add_device(struct device *dev)
1109{
1110 struct iommu_table *tbl;
1111 int ret = 0;
1112
1113 if (WARN_ON(dev->iommu_group)) {
1114 pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n",
1115 dev_name(dev),
1116 iommu_group_id(dev->iommu_group));
1117 return -EBUSY;
1118 }
1119
1120 tbl = get_iommu_table_base(dev);
1121 if (!tbl || !tbl->it_group) {
1122 pr_debug("iommu_tce: skipping device %s with no tbl\n",
1123 dev_name(dev));
1124 return 0;
1125 }
1126
1127 pr_debug("iommu_tce: adding %s to iommu group %d\n",
1128 dev_name(dev), iommu_group_id(tbl->it_group));
1129
1130 ret = iommu_group_add_device(tbl->it_group, dev);
1131 if (ret < 0)
1132 pr_err("iommu_tce: %s has not been added, ret=%d\n",
1133 dev_name(dev), ret);
1134
1135 return ret;
1136}
1137
1138static void iommu_del_device(struct device *dev)
1139{
1140 iommu_group_remove_device(dev);
1141}
1142
1143static int iommu_bus_notifier(struct notifier_block *nb,
1144 unsigned long action, void *data)
1145{
1146 struct device *dev = data;
1147
1148 switch (action) {
1149 case BUS_NOTIFY_ADD_DEVICE:
1150 return iommu_add_device(dev);
1151 case BUS_NOTIFY_DEL_DEVICE:
1152 iommu_del_device(dev);
1153 return 0;
1154 default:
1155 return 0;
1156 }
1157}
1158
1159static struct notifier_block tce_iommu_bus_nb = {
1160 .notifier_call = iommu_bus_notifier,
1161};
1162
1163static int __init tce_iommu_init(void)
1164{
1165 struct pci_dev *pdev = NULL;
1166
1167 BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE);
1168
1169 for_each_pci_dev(pdev)
1170 iommu_add_device(&pdev->dev);
1171
1172 bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1173 return 0;
1174}
1175
1176subsys_initcall_sync(tce_iommu_init);
1177
1178#else
1179
1180void iommu_register_group(struct iommu_table *tbl,
1181 int pci_domain_number, unsigned long pe_num)
1182{
1183}
1184
1185#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9c9d15e4cdf2..2931d97baa56 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -595,6 +595,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
595 TCE_PCI_SWINV_PAIR; 595 TCE_PCI_SWINV_PAIR;
596 } 596 }
597 iommu_init_table(tbl, phb->hose->node); 597 iommu_init_table(tbl, phb->hose->node);
598 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
598 599
599 return; 600 return;
600 fail: 601 fail:
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 92b37a0186c9..5d378f2d9e26 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -86,8 +86,11 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
86static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, 86static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
87 struct pci_dev *pdev) 87 struct pci_dev *pdev)
88{ 88{
89 if (phb->p5ioc2.iommu_table.it_map == NULL) 89 if (phb->p5ioc2.iommu_table.it_map == NULL) {
90 iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); 90 iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
91 iommu_register_group(&phb->p5ioc2.iommu_table,
92 pci_domain_nr(phb->hose->bus), phb->opal_id);
93 }
91 94
92 set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); 95 set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table);
93} 96}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 277343cc6a3d..e16b729f46f9 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -20,6 +20,7 @@
20#include <linux/irq.h> 20#include <linux/irq.h>
21#include <linux/io.h> 21#include <linux/io.h>
22#include <linux/msi.h> 22#include <linux/msi.h>
23#include <linux/iommu.h>
23 24
24#include <asm/sections.h> 25#include <asm/sections.h>
25#include <asm/io.h> 26#include <asm/io.h>
@@ -412,6 +413,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
412 pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), 413 pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
413 be32_to_cpup(sizep), 0); 414 be32_to_cpup(sizep), 0);
414 iommu_init_table(tbl, hose->node); 415 iommu_init_table(tbl, hose->node);
416 iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
415 417
416 /* Deal with SW invalidated TCEs when needed (BML way) */ 418 /* Deal with SW invalidated TCEs when needed (BML way) */
417 swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info", 419 swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index c332fb98480d..3f3abde8a7f9 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -261,4 +261,12 @@ config SHMOBILE_IOMMU_L1SIZE
261 default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB 261 default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB
262 default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB 262 default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB
263 263
264config SPAPR_TCE_IOMMU
265 bool "sPAPR TCE IOMMU Support"
266 depends on PPC_POWERNV
267 select IOMMU_API
268 help
269 Enables bits of IOMMU API required by VFIO. The iommu_ops
270 is not implemented as it is not necessary for VFIO.
271
264endif # IOMMU_SUPPORT 272endif # IOMMU_SUPPORT