aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/iommu/Kconfig29
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd_iommu.c2764
3 files changed, 2794 insertions, 0 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 21a80bfbdb52..9246c5bf25af 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -17,3 +17,32 @@ config MSM_IOMMU
17config IOMMU_PGTABLES_L2 17config IOMMU_PGTABLES_L2
18 def_bool y 18 def_bool y
19 depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n 19 depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n
20
21# AMD IOMMU support
22config AMD_IOMMU
23 bool "AMD IOMMU support"
24 select SWIOTLB
25 select PCI_MSI
26 select PCI_IOV
27 select IOMMU_API
28 depends on X86_64 && PCI && ACPI
29 ---help---
30 With this option you can enable support for AMD IOMMU hardware in
31 your system. An IOMMU is a hardware component which provides
32 remapping of DMA memory accesses from devices. With an AMD IOMMU you
33 can isolate the the DMA memory of different devices and protect the
34 system from misbehaving device drivers or hardware.
35
36 You can find out if your system has an AMD IOMMU if you look into
37 your BIOS for an option to enable it or if you have an IVRS ACPI
38 table.
39
40config AMD_IOMMU_STATS
41 bool "Export AMD IOMMU statistics to debugfs"
42 depends on AMD_IOMMU
43 select DEBUG_FS
44 ---help---
45 This option enables code in the AMD IOMMU driver to collect various
46 statistics about whats happening in the driver and exports that
47 information to userspace via debugfs.
48 If unsure, say N.
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 1a71c82b1af2..4237eaf84609 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,2 +1,3 @@
1obj-$(CONFIG_IOMMU_API) += iommu.o 1obj-$(CONFIG_IOMMU_API) += iommu.o
2obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o 2obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
3obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
new file mode 100644
index 000000000000..7c3a95e54ec5
--- /dev/null
+++ b/drivers/iommu/amd_iommu.c
@@ -0,0 +1,2764 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/pci-ats.h>
22#include <linux/bitmap.h>
23#include <linux/slab.h>
24#include <linux/debugfs.h>
25#include <linux/scatterlist.h>
26#include <linux/dma-mapping.h>
27#include <linux/iommu-helper.h>
28#include <linux/iommu.h>
29#include <linux/delay.h>
30#include <asm/proto.h>
31#include <asm/iommu.h>
32#include <asm/gart.h>
33#include <asm/dma.h>
34#include <asm/amd_iommu_proto.h>
35#include <asm/amd_iommu_types.h>
36#include <asm/amd_iommu.h>
37
38#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
39
40#define LOOP_TIMEOUT 100000
41
42static DEFINE_RWLOCK(amd_iommu_devtable_lock);
43
44/* A list of preallocated protection domains */
45static LIST_HEAD(iommu_pd_list);
46static DEFINE_SPINLOCK(iommu_pd_list_lock);
47
48/*
49 * Domain for untranslated devices - only allocated
50 * if iommu=pt passed on kernel cmd line.
51 */
52static struct protection_domain *pt_domain;
53
54static struct iommu_ops amd_iommu_ops;
55
56/*
57 * general struct to manage commands send to an IOMMU
58 */
59struct iommu_cmd {
60 u32 data[4];
61};
62
63static void update_domain(struct protection_domain *domain);
64
65/****************************************************************************
66 *
67 * Helper functions
68 *
69 ****************************************************************************/
70
71static inline u16 get_device_id(struct device *dev)
72{
73 struct pci_dev *pdev = to_pci_dev(dev);
74
75 return calc_devid(pdev->bus->number, pdev->devfn);
76}
77
78static struct iommu_dev_data *get_dev_data(struct device *dev)
79{
80 return dev->archdata.iommu;
81}
82
83/*
84 * In this function the list of preallocated protection domains is traversed to
85 * find the domain for a specific device
86 */
87static struct dma_ops_domain *find_protection_domain(u16 devid)
88{
89 struct dma_ops_domain *entry, *ret = NULL;
90 unsigned long flags;
91 u16 alias = amd_iommu_alias_table[devid];
92
93 if (list_empty(&iommu_pd_list))
94 return NULL;
95
96 spin_lock_irqsave(&iommu_pd_list_lock, flags);
97
98 list_for_each_entry(entry, &iommu_pd_list, list) {
99 if (entry->target_dev == devid ||
100 entry->target_dev == alias) {
101 ret = entry;
102 break;
103 }
104 }
105
106 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
107
108 return ret;
109}
110
111/*
112 * This function checks if the driver got a valid device from the caller to
113 * avoid dereferencing invalid pointers.
114 */
115static bool check_device(struct device *dev)
116{
117 u16 devid;
118
119 if (!dev || !dev->dma_mask)
120 return false;
121
122 /* No device or no PCI device */
123 if (dev->bus != &pci_bus_type)
124 return false;
125
126 devid = get_device_id(dev);
127
128 /* Out of our scope? */
129 if (devid > amd_iommu_last_bdf)
130 return false;
131
132 if (amd_iommu_rlookup_table[devid] == NULL)
133 return false;
134
135 return true;
136}
137
138static int iommu_init_device(struct device *dev)
139{
140 struct iommu_dev_data *dev_data;
141 struct pci_dev *pdev;
142 u16 devid, alias;
143
144 if (dev->archdata.iommu)
145 return 0;
146
147 dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
148 if (!dev_data)
149 return -ENOMEM;
150
151 dev_data->dev = dev;
152
153 devid = get_device_id(dev);
154 alias = amd_iommu_alias_table[devid];
155 pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff);
156 if (pdev)
157 dev_data->alias = &pdev->dev;
158 else {
159 kfree(dev_data);
160 return -ENOTSUPP;
161 }
162
163 atomic_set(&dev_data->bind, 0);
164
165 dev->archdata.iommu = dev_data;
166
167
168 return 0;
169}
170
171static void iommu_ignore_device(struct device *dev)
172{
173 u16 devid, alias;
174
175 devid = get_device_id(dev);
176 alias = amd_iommu_alias_table[devid];
177
178 memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
179 memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
180
181 amd_iommu_rlookup_table[devid] = NULL;
182 amd_iommu_rlookup_table[alias] = NULL;
183}
184
185static void iommu_uninit_device(struct device *dev)
186{
187 kfree(dev->archdata.iommu);
188}
189
190void __init amd_iommu_uninit_devices(void)
191{
192 struct pci_dev *pdev = NULL;
193
194 for_each_pci_dev(pdev) {
195
196 if (!check_device(&pdev->dev))
197 continue;
198
199 iommu_uninit_device(&pdev->dev);
200 }
201}
202
203int __init amd_iommu_init_devices(void)
204{
205 struct pci_dev *pdev = NULL;
206 int ret = 0;
207
208 for_each_pci_dev(pdev) {
209
210 if (!check_device(&pdev->dev))
211 continue;
212
213 ret = iommu_init_device(&pdev->dev);
214 if (ret == -ENOTSUPP)
215 iommu_ignore_device(&pdev->dev);
216 else if (ret)
217 goto out_free;
218 }
219
220 return 0;
221
222out_free:
223
224 amd_iommu_uninit_devices();
225
226 return ret;
227}
228#ifdef CONFIG_AMD_IOMMU_STATS
229
230/*
231 * Initialization code for statistics collection
232 */
233
234DECLARE_STATS_COUNTER(compl_wait);
235DECLARE_STATS_COUNTER(cnt_map_single);
236DECLARE_STATS_COUNTER(cnt_unmap_single);
237DECLARE_STATS_COUNTER(cnt_map_sg);
238DECLARE_STATS_COUNTER(cnt_unmap_sg);
239DECLARE_STATS_COUNTER(cnt_alloc_coherent);
240DECLARE_STATS_COUNTER(cnt_free_coherent);
241DECLARE_STATS_COUNTER(cross_page);
242DECLARE_STATS_COUNTER(domain_flush_single);
243DECLARE_STATS_COUNTER(domain_flush_all);
244DECLARE_STATS_COUNTER(alloced_io_mem);
245DECLARE_STATS_COUNTER(total_map_requests);
246
247static struct dentry *stats_dir;
248static struct dentry *de_fflush;
249
250static void amd_iommu_stats_add(struct __iommu_counter *cnt)
251{
252 if (stats_dir == NULL)
253 return;
254
255 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
256 &cnt->value);
257}
258
259static void amd_iommu_stats_init(void)
260{
261 stats_dir = debugfs_create_dir("amd-iommu", NULL);
262 if (stats_dir == NULL)
263 return;
264
265 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
266 (u32 *)&amd_iommu_unmap_flush);
267
268 amd_iommu_stats_add(&compl_wait);
269 amd_iommu_stats_add(&cnt_map_single);
270 amd_iommu_stats_add(&cnt_unmap_single);
271 amd_iommu_stats_add(&cnt_map_sg);
272 amd_iommu_stats_add(&cnt_unmap_sg);
273 amd_iommu_stats_add(&cnt_alloc_coherent);
274 amd_iommu_stats_add(&cnt_free_coherent);
275 amd_iommu_stats_add(&cross_page);
276 amd_iommu_stats_add(&domain_flush_single);
277 amd_iommu_stats_add(&domain_flush_all);
278 amd_iommu_stats_add(&alloced_io_mem);
279 amd_iommu_stats_add(&total_map_requests);
280}
281
282#endif
283
284/****************************************************************************
285 *
286 * Interrupt handling functions
287 *
288 ****************************************************************************/
289
290static void dump_dte_entry(u16 devid)
291{
292 int i;
293
294 for (i = 0; i < 8; ++i)
295 pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
296 amd_iommu_dev_table[devid].data[i]);
297}
298
299static void dump_command(unsigned long phys_addr)
300{
301 struct iommu_cmd *cmd = phys_to_virt(phys_addr);
302 int i;
303
304 for (i = 0; i < 4; ++i)
305 pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
306}
307
308static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
309{
310 u32 *event = __evt;
311 int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
312 int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
313 int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
314 int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
315 u64 address = (u64)(((u64)event[3]) << 32) | event[2];
316
317 printk(KERN_ERR "AMD-Vi: Event logged [");
318
319 switch (type) {
320 case EVENT_TYPE_ILL_DEV:
321 printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
322 "address=0x%016llx flags=0x%04x]\n",
323 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
324 address, flags);
325 dump_dte_entry(devid);
326 break;
327 case EVENT_TYPE_IO_FAULT:
328 printk("IO_PAGE_FAULT device=%02x:%02x.%x "
329 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
330 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
331 domid, address, flags);
332 break;
333 case EVENT_TYPE_DEV_TAB_ERR:
334 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
335 "address=0x%016llx flags=0x%04x]\n",
336 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
337 address, flags);
338 break;
339 case EVENT_TYPE_PAGE_TAB_ERR:
340 printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
341 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
342 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
343 domid, address, flags);
344 break;
345 case EVENT_TYPE_ILL_CMD:
346 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
347 dump_command(address);
348 break;
349 case EVENT_TYPE_CMD_HARD_ERR:
350 printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
351 "flags=0x%04x]\n", address, flags);
352 break;
353 case EVENT_TYPE_IOTLB_INV_TO:
354 printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
355 "address=0x%016llx]\n",
356 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
357 address);
358 break;
359 case EVENT_TYPE_INV_DEV_REQ:
360 printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
361 "address=0x%016llx flags=0x%04x]\n",
362 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
363 address, flags);
364 break;
365 default:
366 printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
367 }
368}
369
370static void iommu_poll_events(struct amd_iommu *iommu)
371{
372 u32 head, tail;
373 unsigned long flags;
374
375 spin_lock_irqsave(&iommu->lock, flags);
376
377 head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
378 tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
379
380 while (head != tail) {
381 iommu_print_event(iommu, iommu->evt_buf + head);
382 head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
383 }
384
385 writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
386
387 spin_unlock_irqrestore(&iommu->lock, flags);
388}
389
390irqreturn_t amd_iommu_int_thread(int irq, void *data)
391{
392 struct amd_iommu *iommu;
393
394 for_each_iommu(iommu)
395 iommu_poll_events(iommu);
396
397 return IRQ_HANDLED;
398}
399
400irqreturn_t amd_iommu_int_handler(int irq, void *data)
401{
402 return IRQ_WAKE_THREAD;
403}
404
405/****************************************************************************
406 *
407 * IOMMU command queuing functions
408 *
409 ****************************************************************************/
410
411static int wait_on_sem(volatile u64 *sem)
412{
413 int i = 0;
414
415 while (*sem == 0 && i < LOOP_TIMEOUT) {
416 udelay(1);
417 i += 1;
418 }
419
420 if (i == LOOP_TIMEOUT) {
421 pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
422 return -EIO;
423 }
424
425 return 0;
426}
427
428static void copy_cmd_to_buffer(struct amd_iommu *iommu,
429 struct iommu_cmd *cmd,
430 u32 tail)
431{
432 u8 *target;
433
434 target = iommu->cmd_buf + tail;
435 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
436
437 /* Copy command to buffer */
438 memcpy(target, cmd, sizeof(*cmd));
439
440 /* Tell the IOMMU about it */
441 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
442}
443
444static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
445{
446 WARN_ON(address & 0x7ULL);
447
448 memset(cmd, 0, sizeof(*cmd));
449 cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
450 cmd->data[1] = upper_32_bits(__pa(address));
451 cmd->data[2] = 1;
452 CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
453}
454
455static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
456{
457 memset(cmd, 0, sizeof(*cmd));
458 cmd->data[0] = devid;
459 CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
460}
461
462static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
463 size_t size, u16 domid, int pde)
464{
465 u64 pages;
466 int s;
467
468 pages = iommu_num_pages(address, size, PAGE_SIZE);
469 s = 0;
470
471 if (pages > 1) {
472 /*
473 * If we have to flush more than one page, flush all
474 * TLB entries for this domain
475 */
476 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
477 s = 1;
478 }
479
480 address &= PAGE_MASK;
481
482 memset(cmd, 0, sizeof(*cmd));
483 cmd->data[1] |= domid;
484 cmd->data[2] = lower_32_bits(address);
485 cmd->data[3] = upper_32_bits(address);
486 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
487 if (s) /* size bit - we flush more than one 4kb page */
488 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
489 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
490 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
491}
492
493static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
494 u64 address, size_t size)
495{
496 u64 pages;
497 int s;
498
499 pages = iommu_num_pages(address, size, PAGE_SIZE);
500 s = 0;
501
502 if (pages > 1) {
503 /*
504 * If we have to flush more than one page, flush all
505 * TLB entries for this domain
506 */
507 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
508 s = 1;
509 }
510
511 address &= PAGE_MASK;
512
513 memset(cmd, 0, sizeof(*cmd));
514 cmd->data[0] = devid;
515 cmd->data[0] |= (qdep & 0xff) << 24;
516 cmd->data[1] = devid;
517 cmd->data[2] = lower_32_bits(address);
518 cmd->data[3] = upper_32_bits(address);
519 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
520 if (s)
521 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
522}
523
524static void build_inv_all(struct iommu_cmd *cmd)
525{
526 memset(cmd, 0, sizeof(*cmd));
527 CMD_SET_TYPE(cmd, CMD_INV_ALL);
528}
529
530/*
531 * Writes the command to the IOMMUs command buffer and informs the
532 * hardware about the new command.
533 */
534static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
535{
536 u32 left, tail, head, next_tail;
537 unsigned long flags;
538
539 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
540
541again:
542 spin_lock_irqsave(&iommu->lock, flags);
543
544 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
545 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
546 next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
547 left = (head - next_tail) % iommu->cmd_buf_size;
548
549 if (left <= 2) {
550 struct iommu_cmd sync_cmd;
551 volatile u64 sem = 0;
552 int ret;
553
554 build_completion_wait(&sync_cmd, (u64)&sem);
555 copy_cmd_to_buffer(iommu, &sync_cmd, tail);
556
557 spin_unlock_irqrestore(&iommu->lock, flags);
558
559 if ((ret = wait_on_sem(&sem)) != 0)
560 return ret;
561
562 goto again;
563 }
564
565 copy_cmd_to_buffer(iommu, cmd, tail);
566
567 /* We need to sync now to make sure all commands are processed */
568 iommu->need_sync = true;
569
570 spin_unlock_irqrestore(&iommu->lock, flags);
571
572 return 0;
573}
574
575/*
576 * This function queues a completion wait command into the command
577 * buffer of an IOMMU
578 */
579static int iommu_completion_wait(struct amd_iommu *iommu)
580{
581 struct iommu_cmd cmd;
582 volatile u64 sem = 0;
583 int ret;
584
585 if (!iommu->need_sync)
586 return 0;
587
588 build_completion_wait(&cmd, (u64)&sem);
589
590 ret = iommu_queue_command(iommu, &cmd);
591 if (ret)
592 return ret;
593
594 return wait_on_sem(&sem);
595}
596
597static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
598{
599 struct iommu_cmd cmd;
600
601 build_inv_dte(&cmd, devid);
602
603 return iommu_queue_command(iommu, &cmd);
604}
605
606static void iommu_flush_dte_all(struct amd_iommu *iommu)
607{
608 u32 devid;
609
610 for (devid = 0; devid <= 0xffff; ++devid)
611 iommu_flush_dte(iommu, devid);
612
613 iommu_completion_wait(iommu);
614}
615
616/*
617 * This function uses heavy locking and may disable irqs for some time. But
618 * this is no issue because it is only called during resume.
619 */
620static void iommu_flush_tlb_all(struct amd_iommu *iommu)
621{
622 u32 dom_id;
623
624 for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
625 struct iommu_cmd cmd;
626 build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
627 dom_id, 1);
628 iommu_queue_command(iommu, &cmd);
629 }
630
631 iommu_completion_wait(iommu);
632}
633
634static void iommu_flush_all(struct amd_iommu *iommu)
635{
636 struct iommu_cmd cmd;
637
638 build_inv_all(&cmd);
639
640 iommu_queue_command(iommu, &cmd);
641 iommu_completion_wait(iommu);
642}
643
644void iommu_flush_all_caches(struct amd_iommu *iommu)
645{
646 if (iommu_feature(iommu, FEATURE_IA)) {
647 iommu_flush_all(iommu);
648 } else {
649 iommu_flush_dte_all(iommu);
650 iommu_flush_tlb_all(iommu);
651 }
652}
653
654/*
655 * Command send function for flushing on-device TLB
656 */
657static int device_flush_iotlb(struct device *dev, u64 address, size_t size)
658{
659 struct pci_dev *pdev = to_pci_dev(dev);
660 struct amd_iommu *iommu;
661 struct iommu_cmd cmd;
662 u16 devid;
663 int qdep;
664
665 qdep = pci_ats_queue_depth(pdev);
666 devid = get_device_id(dev);
667 iommu = amd_iommu_rlookup_table[devid];
668
669 build_inv_iotlb_pages(&cmd, devid, qdep, address, size);
670
671 return iommu_queue_command(iommu, &cmd);
672}
673
674/*
675 * Command send function for invalidating a device table entry
676 */
677static int device_flush_dte(struct device *dev)
678{
679 struct amd_iommu *iommu;
680 struct pci_dev *pdev;
681 u16 devid;
682 int ret;
683
684 pdev = to_pci_dev(dev);
685 devid = get_device_id(dev);
686 iommu = amd_iommu_rlookup_table[devid];
687
688 ret = iommu_flush_dte(iommu, devid);
689 if (ret)
690 return ret;
691
692 if (pci_ats_enabled(pdev))
693 ret = device_flush_iotlb(dev, 0, ~0UL);
694
695 return ret;
696}
697
698/*
699 * TLB invalidation function which is called from the mapping functions.
700 * It invalidates a single PTE if the range to flush is within a single
701 * page. Otherwise it flushes the whole TLB of the IOMMU.
702 */
703static void __domain_flush_pages(struct protection_domain *domain,
704 u64 address, size_t size, int pde)
705{
706 struct iommu_dev_data *dev_data;
707 struct iommu_cmd cmd;
708 int ret = 0, i;
709
710 build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
711
712 for (i = 0; i < amd_iommus_present; ++i) {
713 if (!domain->dev_iommu[i])
714 continue;
715
716 /*
717 * Devices of this domain are behind this IOMMU
718 * We need a TLB flush
719 */
720 ret |= iommu_queue_command(amd_iommus[i], &cmd);
721 }
722
723 list_for_each_entry(dev_data, &domain->dev_list, list) {
724 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
725
726 if (!pci_ats_enabled(pdev))
727 continue;
728
729 ret |= device_flush_iotlb(dev_data->dev, address, size);
730 }
731
732 WARN_ON(ret);
733}
734
735static void domain_flush_pages(struct protection_domain *domain,
736 u64 address, size_t size)
737{
738 __domain_flush_pages(domain, address, size, 0);
739}
740
741/* Flush the whole IO/TLB for a given protection domain */
742static void domain_flush_tlb(struct protection_domain *domain)
743{
744 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
745}
746
747/* Flush the whole IO/TLB for a given protection domain - including PDE */
748static void domain_flush_tlb_pde(struct protection_domain *domain)
749{
750 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
751}
752
753static void domain_flush_complete(struct protection_domain *domain)
754{
755 int i;
756
757 for (i = 0; i < amd_iommus_present; ++i) {
758 if (!domain->dev_iommu[i])
759 continue;
760
761 /*
762 * Devices of this domain are behind this IOMMU
763 * We need to wait for completion of all commands.
764 */
765 iommu_completion_wait(amd_iommus[i]);
766 }
767}
768
769
770/*
771 * This function flushes the DTEs for all devices in domain
772 */
773static void domain_flush_devices(struct protection_domain *domain)
774{
775 struct iommu_dev_data *dev_data;
776 unsigned long flags;
777
778 spin_lock_irqsave(&domain->lock, flags);
779
780 list_for_each_entry(dev_data, &domain->dev_list, list)
781 device_flush_dte(dev_data->dev);
782
783 spin_unlock_irqrestore(&domain->lock, flags);
784}
785
786/****************************************************************************
787 *
788 * The functions below are used the create the page table mappings for
789 * unity mapped regions.
790 *
791 ****************************************************************************/
792
793/*
794 * This function is used to add another level to an IO page table. Adding
795 * another level increases the size of the address space by 9 bits to a size up
796 * to 64 bits.
797 */
798static bool increase_address_space(struct protection_domain *domain,
799 gfp_t gfp)
800{
801 u64 *pte;
802
803 if (domain->mode == PAGE_MODE_6_LEVEL)
804 /* address space already 64 bit large */
805 return false;
806
807 pte = (void *)get_zeroed_page(gfp);
808 if (!pte)
809 return false;
810
811 *pte = PM_LEVEL_PDE(domain->mode,
812 virt_to_phys(domain->pt_root));
813 domain->pt_root = pte;
814 domain->mode += 1;
815 domain->updated = true;
816
817 return true;
818}
819
820static u64 *alloc_pte(struct protection_domain *domain,
821 unsigned long address,
822 unsigned long page_size,
823 u64 **pte_page,
824 gfp_t gfp)
825{
826 int level, end_lvl;
827 u64 *pte, *page;
828
829 BUG_ON(!is_power_of_2(page_size));
830
831 while (address > PM_LEVEL_SIZE(domain->mode))
832 increase_address_space(domain, gfp);
833
834 level = domain->mode - 1;
835 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
836 address = PAGE_SIZE_ALIGN(address, page_size);
837 end_lvl = PAGE_SIZE_LEVEL(page_size);
838
839 while (level > end_lvl) {
840 if (!IOMMU_PTE_PRESENT(*pte)) {
841 page = (u64 *)get_zeroed_page(gfp);
842 if (!page)
843 return NULL;
844 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
845 }
846
847 /* No level skipping support yet */
848 if (PM_PTE_LEVEL(*pte) != level)
849 return NULL;
850
851 level -= 1;
852
853 pte = IOMMU_PTE_PAGE(*pte);
854
855 if (pte_page && level == end_lvl)
856 *pte_page = pte;
857
858 pte = &pte[PM_LEVEL_INDEX(level, address)];
859 }
860
861 return pte;
862}
863
864/*
865 * This function checks if there is a PTE for a given dma address. If
866 * there is one, it returns the pointer to it.
867 */
868static u64 *fetch_pte(struct protection_domain *domain, unsigned long address)
869{
870 int level;
871 u64 *pte;
872
873 if (address > PM_LEVEL_SIZE(domain->mode))
874 return NULL;
875
876 level = domain->mode - 1;
877 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
878
879 while (level > 0) {
880
881 /* Not Present */
882 if (!IOMMU_PTE_PRESENT(*pte))
883 return NULL;
884
885 /* Large PTE */
886 if (PM_PTE_LEVEL(*pte) == 0x07) {
887 unsigned long pte_mask, __pte;
888
889 /*
890 * If we have a series of large PTEs, make
891 * sure to return a pointer to the first one.
892 */
893 pte_mask = PTE_PAGE_SIZE(*pte);
894 pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
895 __pte = ((unsigned long)pte) & pte_mask;
896
897 return (u64 *)__pte;
898 }
899
900 /* No level skipping support yet */
901 if (PM_PTE_LEVEL(*pte) != level)
902 return NULL;
903
904 level -= 1;
905
906 /* Walk to the next level */
907 pte = IOMMU_PTE_PAGE(*pte);
908 pte = &pte[PM_LEVEL_INDEX(level, address)];
909 }
910
911 return pte;
912}
913
914/*
915 * Generic mapping functions. It maps a physical address into a DMA
916 * address space. It allocates the page table pages if necessary.
917 * In the future it can be extended to a generic mapping function
918 * supporting all features of AMD IOMMU page tables like level skipping
919 * and full 64 bit address spaces.
920 */
921static int iommu_map_page(struct protection_domain *dom,
922 unsigned long bus_addr,
923 unsigned long phys_addr,
924 int prot,
925 unsigned long page_size)
926{
927 u64 __pte, *pte;
928 int i, count;
929
930 if (!(prot & IOMMU_PROT_MASK))
931 return -EINVAL;
932
933 bus_addr = PAGE_ALIGN(bus_addr);
934 phys_addr = PAGE_ALIGN(phys_addr);
935 count = PAGE_SIZE_PTE_COUNT(page_size);
936 pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
937
938 for (i = 0; i < count; ++i)
939 if (IOMMU_PTE_PRESENT(pte[i]))
940 return -EBUSY;
941
942 if (page_size > PAGE_SIZE) {
943 __pte = PAGE_SIZE_PTE(phys_addr, page_size);
944 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
945 } else
946 __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
947
948 if (prot & IOMMU_PROT_IR)
949 __pte |= IOMMU_PTE_IR;
950 if (prot & IOMMU_PROT_IW)
951 __pte |= IOMMU_PTE_IW;
952
953 for (i = 0; i < count; ++i)
954 pte[i] = __pte;
955
956 update_domain(dom);
957
958 return 0;
959}
960
961static unsigned long iommu_unmap_page(struct protection_domain *dom,
962 unsigned long bus_addr,
963 unsigned long page_size)
964{
965 unsigned long long unmap_size, unmapped;
966 u64 *pte;
967
968 BUG_ON(!is_power_of_2(page_size));
969
970 unmapped = 0;
971
972 while (unmapped < page_size) {
973
974 pte = fetch_pte(dom, bus_addr);
975
976 if (!pte) {
977 /*
978 * No PTE for this address
979 * move forward in 4kb steps
980 */
981 unmap_size = PAGE_SIZE;
982 } else if (PM_PTE_LEVEL(*pte) == 0) {
983 /* 4kb PTE found for this address */
984 unmap_size = PAGE_SIZE;
985 *pte = 0ULL;
986 } else {
987 int count, i;
988
989 /* Large PTE found which maps this address */
990 unmap_size = PTE_PAGE_SIZE(*pte);
991 count = PAGE_SIZE_PTE_COUNT(unmap_size);
992 for (i = 0; i < count; i++)
993 pte[i] = 0ULL;
994 }
995
996 bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size;
997 unmapped += unmap_size;
998 }
999
1000 BUG_ON(!is_power_of_2(unmapped));
1001
1002 return unmapped;
1003}
1004
1005/*
1006 * This function checks if a specific unity mapping entry is needed for
1007 * this specific IOMMU.
1008 */
1009static int iommu_for_unity_map(struct amd_iommu *iommu,
1010 struct unity_map_entry *entry)
1011{
1012 u16 bdf, i;
1013
1014 for (i = entry->devid_start; i <= entry->devid_end; ++i) {
1015 bdf = amd_iommu_alias_table[i];
1016 if (amd_iommu_rlookup_table[bdf] == iommu)
1017 return 1;
1018 }
1019
1020 return 0;
1021}
1022
1023/*
1024 * This function actually applies the mapping to the page table of the
1025 * dma_ops domain.
1026 */
1027static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
1028 struct unity_map_entry *e)
1029{
1030 u64 addr;
1031 int ret;
1032
1033 for (addr = e->address_start; addr < e->address_end;
1034 addr += PAGE_SIZE) {
1035 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
1036 PAGE_SIZE);
1037 if (ret)
1038 return ret;
1039 /*
1040 * if unity mapping is in aperture range mark the page
1041 * as allocated in the aperture
1042 */
1043 if (addr < dma_dom->aperture_size)
1044 __set_bit(addr >> PAGE_SHIFT,
1045 dma_dom->aperture[0]->bitmap);
1046 }
1047
1048 return 0;
1049}
1050
1051/*
1052 * Init the unity mappings for a specific IOMMU in the system
1053 *
1054 * Basically iterates over all unity mapping entries and applies them to
1055 * the default domain DMA of that IOMMU if necessary.
1056 */
1057static int iommu_init_unity_mappings(struct amd_iommu *iommu)
1058{
1059 struct unity_map_entry *entry;
1060 int ret;
1061
1062 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
1063 if (!iommu_for_unity_map(iommu, entry))
1064 continue;
1065 ret = dma_ops_unity_map(iommu->default_dom, entry);
1066 if (ret)
1067 return ret;
1068 }
1069
1070 return 0;
1071}
1072
1073/*
1074 * Inits the unity mappings required for a specific device
1075 */
1076static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
1077 u16 devid)
1078{
1079 struct unity_map_entry *e;
1080 int ret;
1081
1082 list_for_each_entry(e, &amd_iommu_unity_map, list) {
1083 if (!(devid >= e->devid_start && devid <= e->devid_end))
1084 continue;
1085 ret = dma_ops_unity_map(dma_dom, e);
1086 if (ret)
1087 return ret;
1088 }
1089
1090 return 0;
1091}
1092
1093/****************************************************************************
1094 *
1095 * The next functions belong to the address allocator for the dma_ops
1096 * interface functions. They work like the allocators in the other IOMMU
1097 * drivers. Its basically a bitmap which marks the allocated pages in
1098 * the aperture. Maybe it could be enhanced in the future to a more
1099 * efficient allocator.
1100 *
1101 ****************************************************************************/
1102
1103/*
1104 * The address allocator core functions.
1105 *
1106 * called with domain->lock held
1107 */
1108
1109/*
1110 * Used to reserve address ranges in the aperture (e.g. for exclusion
1111 * ranges.
1112 */
1113static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1114 unsigned long start_page,
1115 unsigned int pages)
1116{
1117 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1118
1119 if (start_page + pages > last_page)
1120 pages = last_page - start_page;
1121
1122 for (i = start_page; i < start_page + pages; ++i) {
1123 int index = i / APERTURE_RANGE_PAGES;
1124 int page = i % APERTURE_RANGE_PAGES;
1125 __set_bit(page, dom->aperture[index]->bitmap);
1126 }
1127}
1128
1129/*
1130 * This function is used to add a new aperture range to an existing
1131 * aperture in case of dma_ops domain allocation or address allocation
1132 * failure.
1133 */
1134static int alloc_new_range(struct dma_ops_domain *dma_dom,
1135 bool populate, gfp_t gfp)
1136{
1137 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
1138 struct amd_iommu *iommu;
1139 unsigned long i;
1140
1141#ifdef CONFIG_IOMMU_STRESS
1142 populate = false;
1143#endif
1144
1145 if (index >= APERTURE_MAX_RANGES)
1146 return -ENOMEM;
1147
1148 dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
1149 if (!dma_dom->aperture[index])
1150 return -ENOMEM;
1151
1152 dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
1153 if (!dma_dom->aperture[index]->bitmap)
1154 goto out_free;
1155
1156 dma_dom->aperture[index]->offset = dma_dom->aperture_size;
1157
1158 if (populate) {
1159 unsigned long address = dma_dom->aperture_size;
1160 int i, num_ptes = APERTURE_RANGE_PAGES / 512;
1161 u64 *pte, *pte_page;
1162
1163 for (i = 0; i < num_ptes; ++i) {
1164 pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
1165 &pte_page, gfp);
1166 if (!pte)
1167 goto out_free;
1168
1169 dma_dom->aperture[index]->pte_pages[i] = pte_page;
1170
1171 address += APERTURE_RANGE_SIZE / 64;
1172 }
1173 }
1174
1175 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
1176
1177 /* Initialize the exclusion range if necessary */
1178 for_each_iommu(iommu) {
1179 if (iommu->exclusion_start &&
1180 iommu->exclusion_start >= dma_dom->aperture[index]->offset
1181 && iommu->exclusion_start < dma_dom->aperture_size) {
1182 unsigned long startpage;
1183 int pages = iommu_num_pages(iommu->exclusion_start,
1184 iommu->exclusion_length,
1185 PAGE_SIZE);
1186 startpage = iommu->exclusion_start >> PAGE_SHIFT;
1187 dma_ops_reserve_addresses(dma_dom, startpage, pages);
1188 }
1189 }
1190
1191 /*
1192 * Check for areas already mapped as present in the new aperture
1193 * range and mark those pages as reserved in the allocator. Such
1194 * mappings may already exist as a result of requested unity
1195 * mappings for devices.
1196 */
1197 for (i = dma_dom->aperture[index]->offset;
1198 i < dma_dom->aperture_size;
1199 i += PAGE_SIZE) {
1200 u64 *pte = fetch_pte(&dma_dom->domain, i);
1201 if (!pte || !IOMMU_PTE_PRESENT(*pte))
1202 continue;
1203
1204 dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
1205 }
1206
1207 update_domain(&dma_dom->domain);
1208
1209 return 0;
1210
1211out_free:
1212 update_domain(&dma_dom->domain);
1213
1214 free_page((unsigned long)dma_dom->aperture[index]->bitmap);
1215
1216 kfree(dma_dom->aperture[index]);
1217 dma_dom->aperture[index] = NULL;
1218
1219 return -ENOMEM;
1220}
1221
1222static unsigned long dma_ops_area_alloc(struct device *dev,
1223 struct dma_ops_domain *dom,
1224 unsigned int pages,
1225 unsigned long align_mask,
1226 u64 dma_mask,
1227 unsigned long start)
1228{
1229 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
1230 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
1231 int i = start >> APERTURE_RANGE_SHIFT;
1232 unsigned long boundary_size;
1233 unsigned long address = -1;
1234 unsigned long limit;
1235
1236 next_bit >>= PAGE_SHIFT;
1237
1238 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
1239 PAGE_SIZE) >> PAGE_SHIFT;
1240
1241 for (;i < max_index; ++i) {
1242 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
1243
1244 if (dom->aperture[i]->offset >= dma_mask)
1245 break;
1246
1247 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
1248 dma_mask >> PAGE_SHIFT);
1249
1250 address = iommu_area_alloc(dom->aperture[i]->bitmap,
1251 limit, next_bit, pages, 0,
1252 boundary_size, align_mask);
1253 if (address != -1) {
1254 address = dom->aperture[i]->offset +
1255 (address << PAGE_SHIFT);
1256 dom->next_address = address + (pages << PAGE_SHIFT);
1257 break;
1258 }
1259
1260 next_bit = 0;
1261 }
1262
1263 return address;
1264}
1265
1266static unsigned long dma_ops_alloc_addresses(struct device *dev,
1267 struct dma_ops_domain *dom,
1268 unsigned int pages,
1269 unsigned long align_mask,
1270 u64 dma_mask)
1271{
1272 unsigned long address;
1273
1274#ifdef CONFIG_IOMMU_STRESS
1275 dom->next_address = 0;
1276 dom->need_flush = true;
1277#endif
1278
1279 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1280 dma_mask, dom->next_address);
1281
1282 if (address == -1) {
1283 dom->next_address = 0;
1284 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1285 dma_mask, 0);
1286 dom->need_flush = true;
1287 }
1288
1289 if (unlikely(address == -1))
1290 address = DMA_ERROR_CODE;
1291
1292 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
1293
1294 return address;
1295}
1296
1297/*
1298 * The address free function.
1299 *
1300 * called with domain->lock held
1301 */
1302static void dma_ops_free_addresses(struct dma_ops_domain *dom,
1303 unsigned long address,
1304 unsigned int pages)
1305{
1306 unsigned i = address >> APERTURE_RANGE_SHIFT;
1307 struct aperture_range *range = dom->aperture[i];
1308
1309 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
1310
1311#ifdef CONFIG_IOMMU_STRESS
1312 if (i < 4)
1313 return;
1314#endif
1315
1316 if (address >= dom->next_address)
1317 dom->need_flush = true;
1318
1319 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
1320
1321 bitmap_clear(range->bitmap, address, pages);
1322
1323}
1324
1325/****************************************************************************
1326 *
1327 * The next functions belong to the domain allocation. A domain is
1328 * allocated for every IOMMU as the default domain. If device isolation
1329 * is enabled, every device get its own domain. The most important thing
1330 * about domains is the page table mapping the DMA address space they
1331 * contain.
1332 *
1333 ****************************************************************************/
1334
1335/*
1336 * This function adds a protection domain to the global protection domain list
1337 */
1338static void add_domain_to_list(struct protection_domain *domain)
1339{
1340 unsigned long flags;
1341
1342 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1343 list_add(&domain->list, &amd_iommu_pd_list);
1344 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1345}
1346
1347/*
1348 * This function removes a protection domain to the global
1349 * protection domain list
1350 */
1351static void del_domain_from_list(struct protection_domain *domain)
1352{
1353 unsigned long flags;
1354
1355 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1356 list_del(&domain->list);
1357 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1358}
1359
1360static u16 domain_id_alloc(void)
1361{
1362 unsigned long flags;
1363 int id;
1364
1365 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1366 id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
1367 BUG_ON(id == 0);
1368 if (id > 0 && id < MAX_DOMAIN_ID)
1369 __set_bit(id, amd_iommu_pd_alloc_bitmap);
1370 else
1371 id = 0;
1372 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1373
1374 return id;
1375}
1376
1377static void domain_id_free(int id)
1378{
1379 unsigned long flags;
1380
1381 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1382 if (id > 0 && id < MAX_DOMAIN_ID)
1383 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
1384 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1385}
1386
1387static void free_pagetable(struct protection_domain *domain)
1388{
1389 int i, j;
1390 u64 *p1, *p2, *p3;
1391
1392 p1 = domain->pt_root;
1393
1394 if (!p1)
1395 return;
1396
1397 for (i = 0; i < 512; ++i) {
1398 if (!IOMMU_PTE_PRESENT(p1[i]))
1399 continue;
1400
1401 p2 = IOMMU_PTE_PAGE(p1[i]);
1402 for (j = 0; j < 512; ++j) {
1403 if (!IOMMU_PTE_PRESENT(p2[j]))
1404 continue;
1405 p3 = IOMMU_PTE_PAGE(p2[j]);
1406 free_page((unsigned long)p3);
1407 }
1408
1409 free_page((unsigned long)p2);
1410 }
1411
1412 free_page((unsigned long)p1);
1413
1414 domain->pt_root = NULL;
1415}
1416
1417/*
1418 * Free a domain, only used if something went wrong in the
1419 * allocation path and we need to free an already allocated page table
1420 */
1421static void dma_ops_domain_free(struct dma_ops_domain *dom)
1422{
1423 int i;
1424
1425 if (!dom)
1426 return;
1427
1428 del_domain_from_list(&dom->domain);
1429
1430 free_pagetable(&dom->domain);
1431
1432 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
1433 if (!dom->aperture[i])
1434 continue;
1435 free_page((unsigned long)dom->aperture[i]->bitmap);
1436 kfree(dom->aperture[i]);
1437 }
1438
1439 kfree(dom);
1440}
1441
1442/*
1443 * Allocates a new protection domain usable for the dma_ops functions.
1444 * It also initializes the page table and the address allocator data
1445 * structures required for the dma_ops interface
1446 */
1447static struct dma_ops_domain *dma_ops_domain_alloc(void)
1448{
1449 struct dma_ops_domain *dma_dom;
1450
1451 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
1452 if (!dma_dom)
1453 return NULL;
1454
1455 spin_lock_init(&dma_dom->domain.lock);
1456
1457 dma_dom->domain.id = domain_id_alloc();
1458 if (dma_dom->domain.id == 0)
1459 goto free_dma_dom;
1460 INIT_LIST_HEAD(&dma_dom->domain.dev_list);
1461 dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1462 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1463 dma_dom->domain.flags = PD_DMA_OPS_MASK;
1464 dma_dom->domain.priv = dma_dom;
1465 if (!dma_dom->domain.pt_root)
1466 goto free_dma_dom;
1467
1468 dma_dom->need_flush = false;
1469 dma_dom->target_dev = 0xffff;
1470
1471 add_domain_to_list(&dma_dom->domain);
1472
1473 if (alloc_new_range(dma_dom, true, GFP_KERNEL))
1474 goto free_dma_dom;
1475
1476 /*
1477 * mark the first page as allocated so we never return 0 as
1478 * a valid dma-address. So we can use 0 as error value
1479 */
1480 dma_dom->aperture[0]->bitmap[0] = 1;
1481 dma_dom->next_address = 0;
1482
1483
1484 return dma_dom;
1485
1486free_dma_dom:
1487 dma_ops_domain_free(dma_dom);
1488
1489 return NULL;
1490}
1491
1492/*
1493 * little helper function to check whether a given protection domain is a
1494 * dma_ops domain
1495 */
1496static bool dma_ops_domain(struct protection_domain *domain)
1497{
1498 return domain->flags & PD_DMA_OPS_MASK;
1499}
1500
1501static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1502{
1503 u64 pte_root = virt_to_phys(domain->pt_root);
1504 u32 flags = 0;
1505
1506 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1507 << DEV_ENTRY_MODE_SHIFT;
1508 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1509
1510 if (ats)
1511 flags |= DTE_FLAG_IOTLB;
1512
1513 amd_iommu_dev_table[devid].data[3] |= flags;
1514 amd_iommu_dev_table[devid].data[2] = domain->id;
1515 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1516 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1517}
1518
1519static void clear_dte_entry(u16 devid)
1520{
1521 /* remove entry from the device table seen by the hardware */
1522 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1523 amd_iommu_dev_table[devid].data[1] = 0;
1524 amd_iommu_dev_table[devid].data[2] = 0;
1525
1526 amd_iommu_apply_erratum_63(devid);
1527}
1528
1529static void do_attach(struct device *dev, struct protection_domain *domain)
1530{
1531 struct iommu_dev_data *dev_data;
1532 struct amd_iommu *iommu;
1533 struct pci_dev *pdev;
1534 bool ats = false;
1535 u16 devid;
1536
1537 devid = get_device_id(dev);
1538 iommu = amd_iommu_rlookup_table[devid];
1539 dev_data = get_dev_data(dev);
1540 pdev = to_pci_dev(dev);
1541
1542 if (amd_iommu_iotlb_sup)
1543 ats = pci_ats_enabled(pdev);
1544
1545 /* Update data structures */
1546 dev_data->domain = domain;
1547 list_add(&dev_data->list, &domain->dev_list);
1548 set_dte_entry(devid, domain, ats);
1549
1550 /* Do reference counting */
1551 domain->dev_iommu[iommu->index] += 1;
1552 domain->dev_cnt += 1;
1553
1554 /* Flush the DTE entry */
1555 device_flush_dte(dev);
1556}
1557
1558static void do_detach(struct device *dev)
1559{
1560 struct iommu_dev_data *dev_data;
1561 struct amd_iommu *iommu;
1562 u16 devid;
1563
1564 devid = get_device_id(dev);
1565 iommu = amd_iommu_rlookup_table[devid];
1566 dev_data = get_dev_data(dev);
1567
1568 /* decrease reference counters */
1569 dev_data->domain->dev_iommu[iommu->index] -= 1;
1570 dev_data->domain->dev_cnt -= 1;
1571
1572 /* Update data structures */
1573 dev_data->domain = NULL;
1574 list_del(&dev_data->list);
1575 clear_dte_entry(devid);
1576
1577 /* Flush the DTE entry */
1578 device_flush_dte(dev);
1579}
1580
1581/*
1582 * If a device is not yet associated with a domain, this function does
1583 * assigns it visible for the hardware
1584 */
1585static int __attach_device(struct device *dev,
1586 struct protection_domain *domain)
1587{
1588 struct iommu_dev_data *dev_data, *alias_data;
1589 int ret;
1590
1591 dev_data = get_dev_data(dev);
1592 alias_data = get_dev_data(dev_data->alias);
1593
1594 if (!alias_data)
1595 return -EINVAL;
1596
1597 /* lock domain */
1598 spin_lock(&domain->lock);
1599
1600 /* Some sanity checks */
1601 ret = -EBUSY;
1602 if (alias_data->domain != NULL &&
1603 alias_data->domain != domain)
1604 goto out_unlock;
1605
1606 if (dev_data->domain != NULL &&
1607 dev_data->domain != domain)
1608 goto out_unlock;
1609
1610 /* Do real assignment */
1611 if (dev_data->alias != dev) {
1612 alias_data = get_dev_data(dev_data->alias);
1613 if (alias_data->domain == NULL)
1614 do_attach(dev_data->alias, domain);
1615
1616 atomic_inc(&alias_data->bind);
1617 }
1618
1619 if (dev_data->domain == NULL)
1620 do_attach(dev, domain);
1621
1622 atomic_inc(&dev_data->bind);
1623
1624 ret = 0;
1625
1626out_unlock:
1627
1628 /* ready */
1629 spin_unlock(&domain->lock);
1630
1631 return ret;
1632}
1633
1634/*
1635 * If a device is not yet associated with a domain, this function does
1636 * assigns it visible for the hardware
1637 */
1638static int attach_device(struct device *dev,
1639 struct protection_domain *domain)
1640{
1641 struct pci_dev *pdev = to_pci_dev(dev);
1642 unsigned long flags;
1643 int ret;
1644
1645 if (amd_iommu_iotlb_sup)
1646 pci_enable_ats(pdev, PAGE_SHIFT);
1647
1648 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1649 ret = __attach_device(dev, domain);
1650 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1651
1652 /*
1653 * We might boot into a crash-kernel here. The crashed kernel
1654 * left the caches in the IOMMU dirty. So we have to flush
1655 * here to evict all dirty stuff.
1656 */
1657 domain_flush_tlb_pde(domain);
1658
1659 return ret;
1660}
1661
1662/*
1663 * Removes a device from a protection domain (unlocked)
1664 */
1665static void __detach_device(struct device *dev)
1666{
1667 struct iommu_dev_data *dev_data = get_dev_data(dev);
1668 struct iommu_dev_data *alias_data;
1669 struct protection_domain *domain;
1670 unsigned long flags;
1671
1672 BUG_ON(!dev_data->domain);
1673
1674 domain = dev_data->domain;
1675
1676 spin_lock_irqsave(&domain->lock, flags);
1677
1678 if (dev_data->alias != dev) {
1679 alias_data = get_dev_data(dev_data->alias);
1680 if (atomic_dec_and_test(&alias_data->bind))
1681 do_detach(dev_data->alias);
1682 }
1683
1684 if (atomic_dec_and_test(&dev_data->bind))
1685 do_detach(dev);
1686
1687 spin_unlock_irqrestore(&domain->lock, flags);
1688
1689 /*
1690 * If we run in passthrough mode the device must be assigned to the
1691 * passthrough domain if it is detached from any other domain.
1692 * Make sure we can deassign from the pt_domain itself.
1693 */
1694 if (iommu_pass_through &&
1695 (dev_data->domain == NULL && domain != pt_domain))
1696 __attach_device(dev, pt_domain);
1697}
1698
1699/*
1700 * Removes a device from a protection domain (with devtable_lock held)
1701 */
1702static void detach_device(struct device *dev)
1703{
1704 struct pci_dev *pdev = to_pci_dev(dev);
1705 unsigned long flags;
1706
1707 /* lock device table */
1708 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1709 __detach_device(dev);
1710 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1711
1712 if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev))
1713 pci_disable_ats(pdev);
1714}
1715
1716/*
1717 * Find out the protection domain structure for a given PCI device. This
1718 * will give us the pointer to the page table root for example.
1719 */
1720static struct protection_domain *domain_for_device(struct device *dev)
1721{
1722 struct protection_domain *dom;
1723 struct iommu_dev_data *dev_data, *alias_data;
1724 unsigned long flags;
1725 u16 devid;
1726
1727 devid = get_device_id(dev);
1728 dev_data = get_dev_data(dev);
1729 alias_data = get_dev_data(dev_data->alias);
1730 if (!alias_data)
1731 return NULL;
1732
1733 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1734 dom = dev_data->domain;
1735 if (dom == NULL &&
1736 alias_data->domain != NULL) {
1737 __attach_device(dev, alias_data->domain);
1738 dom = alias_data->domain;
1739 }
1740
1741 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1742
1743 return dom;
1744}
1745
1746static int device_change_notifier(struct notifier_block *nb,
1747 unsigned long action, void *data)
1748{
1749 struct device *dev = data;
1750 u16 devid;
1751 struct protection_domain *domain;
1752 struct dma_ops_domain *dma_domain;
1753 struct amd_iommu *iommu;
1754 unsigned long flags;
1755
1756 if (!check_device(dev))
1757 return 0;
1758
1759 devid = get_device_id(dev);
1760 iommu = amd_iommu_rlookup_table[devid];
1761
1762 switch (action) {
1763 case BUS_NOTIFY_UNBOUND_DRIVER:
1764
1765 domain = domain_for_device(dev);
1766
1767 if (!domain)
1768 goto out;
1769 if (iommu_pass_through)
1770 break;
1771 detach_device(dev);
1772 break;
1773 case BUS_NOTIFY_ADD_DEVICE:
1774
1775 iommu_init_device(dev);
1776
1777 domain = domain_for_device(dev);
1778
1779 /* allocate a protection domain if a device is added */
1780 dma_domain = find_protection_domain(devid);
1781 if (dma_domain)
1782 goto out;
1783 dma_domain = dma_ops_domain_alloc();
1784 if (!dma_domain)
1785 goto out;
1786 dma_domain->target_dev = devid;
1787
1788 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1789 list_add_tail(&dma_domain->list, &iommu_pd_list);
1790 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1791
1792 break;
1793 case BUS_NOTIFY_DEL_DEVICE:
1794
1795 iommu_uninit_device(dev);
1796
1797 default:
1798 goto out;
1799 }
1800
1801 device_flush_dte(dev);
1802 iommu_completion_wait(iommu);
1803
1804out:
1805 return 0;
1806}
1807
1808static struct notifier_block device_nb = {
1809 .notifier_call = device_change_notifier,
1810};
1811
1812void amd_iommu_init_notifier(void)
1813{
1814 bus_register_notifier(&pci_bus_type, &device_nb);
1815}
1816
1817/*****************************************************************************
1818 *
1819 * The next functions belong to the dma_ops mapping/unmapping code.
1820 *
1821 *****************************************************************************/
1822
1823/*
1824 * In the dma_ops path we only have the struct device. This function
1825 * finds the corresponding IOMMU, the protection domain and the
1826 * requestor id for a given device.
1827 * If the device is not yet associated with a domain this is also done
1828 * in this function.
1829 */
1830static struct protection_domain *get_domain(struct device *dev)
1831{
1832 struct protection_domain *domain;
1833 struct dma_ops_domain *dma_dom;
1834 u16 devid = get_device_id(dev);
1835
1836 if (!check_device(dev))
1837 return ERR_PTR(-EINVAL);
1838
1839 domain = domain_for_device(dev);
1840 if (domain != NULL && !dma_ops_domain(domain))
1841 return ERR_PTR(-EBUSY);
1842
1843 if (domain != NULL)
1844 return domain;
1845
1846 /* Device not bount yet - bind it */
1847 dma_dom = find_protection_domain(devid);
1848 if (!dma_dom)
1849 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
1850 attach_device(dev, &dma_dom->domain);
1851 DUMP_printk("Using protection domain %d for device %s\n",
1852 dma_dom->domain.id, dev_name(dev));
1853
1854 return &dma_dom->domain;
1855}
1856
1857static void update_device_table(struct protection_domain *domain)
1858{
1859 struct iommu_dev_data *dev_data;
1860
1861 list_for_each_entry(dev_data, &domain->dev_list, list) {
1862 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
1863 u16 devid = get_device_id(dev_data->dev);
1864 set_dte_entry(devid, domain, pci_ats_enabled(pdev));
1865 }
1866}
1867
1868static void update_domain(struct protection_domain *domain)
1869{
1870 if (!domain->updated)
1871 return;
1872
1873 update_device_table(domain);
1874
1875 domain_flush_devices(domain);
1876 domain_flush_tlb_pde(domain);
1877
1878 domain->updated = false;
1879}
1880
1881/*
1882 * This function fetches the PTE for a given address in the aperture
1883 */
1884static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1885 unsigned long address)
1886{
1887 struct aperture_range *aperture;
1888 u64 *pte, *pte_page;
1889
1890 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1891 if (!aperture)
1892 return NULL;
1893
1894 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1895 if (!pte) {
1896 pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
1897 GFP_ATOMIC);
1898 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1899 } else
1900 pte += PM_LEVEL_INDEX(0, address);
1901
1902 update_domain(&dom->domain);
1903
1904 return pte;
1905}
1906
1907/*
1908 * This is the generic map function. It maps one 4kb page at paddr to
1909 * the given address in the DMA address space for the domain.
1910 */
1911static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
1912 unsigned long address,
1913 phys_addr_t paddr,
1914 int direction)
1915{
1916 u64 *pte, __pte;
1917
1918 WARN_ON(address > dom->aperture_size);
1919
1920 paddr &= PAGE_MASK;
1921
1922 pte = dma_ops_get_pte(dom, address);
1923 if (!pte)
1924 return DMA_ERROR_CODE;
1925
1926 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1927
1928 if (direction == DMA_TO_DEVICE)
1929 __pte |= IOMMU_PTE_IR;
1930 else if (direction == DMA_FROM_DEVICE)
1931 __pte |= IOMMU_PTE_IW;
1932 else if (direction == DMA_BIDIRECTIONAL)
1933 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
1934
1935 WARN_ON(*pte);
1936
1937 *pte = __pte;
1938
1939 return (dma_addr_t)address;
1940}
1941
1942/*
1943 * The generic unmapping function for on page in the DMA address space.
1944 */
1945static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
1946 unsigned long address)
1947{
1948 struct aperture_range *aperture;
1949 u64 *pte;
1950
1951 if (address >= dom->aperture_size)
1952 return;
1953
1954 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1955 if (!aperture)
1956 return;
1957
1958 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1959 if (!pte)
1960 return;
1961
1962 pte += PM_LEVEL_INDEX(0, address);
1963
1964 WARN_ON(!*pte);
1965
1966 *pte = 0ULL;
1967}
1968
1969/*
1970 * This function contains common code for mapping of a physically
1971 * contiguous memory region into DMA address space. It is used by all
1972 * mapping functions provided with this IOMMU driver.
1973 * Must be called with the domain lock held.
1974 */
1975static dma_addr_t __map_single(struct device *dev,
1976 struct dma_ops_domain *dma_dom,
1977 phys_addr_t paddr,
1978 size_t size,
1979 int dir,
1980 bool align,
1981 u64 dma_mask)
1982{
1983 dma_addr_t offset = paddr & ~PAGE_MASK;
1984 dma_addr_t address, start, ret;
1985 unsigned int pages;
1986 unsigned long align_mask = 0;
1987 int i;
1988
1989 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
1990 paddr &= PAGE_MASK;
1991
1992 INC_STATS_COUNTER(total_map_requests);
1993
1994 if (pages > 1)
1995 INC_STATS_COUNTER(cross_page);
1996
1997 if (align)
1998 align_mask = (1UL << get_order(size)) - 1;
1999
2000retry:
2001 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
2002 dma_mask);
2003 if (unlikely(address == DMA_ERROR_CODE)) {
2004 /*
2005 * setting next_address here will let the address
2006 * allocator only scan the new allocated range in the
2007 * first run. This is a small optimization.
2008 */
2009 dma_dom->next_address = dma_dom->aperture_size;
2010
2011 if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
2012 goto out;
2013
2014 /*
2015 * aperture was successfully enlarged by 128 MB, try
2016 * allocation again
2017 */
2018 goto retry;
2019 }
2020
2021 start = address;
2022 for (i = 0; i < pages; ++i) {
2023 ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
2024 if (ret == DMA_ERROR_CODE)
2025 goto out_unmap;
2026
2027 paddr += PAGE_SIZE;
2028 start += PAGE_SIZE;
2029 }
2030 address += offset;
2031
2032 ADD_STATS_COUNTER(alloced_io_mem, size);
2033
2034 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
2035 domain_flush_tlb(&dma_dom->domain);
2036 dma_dom->need_flush = false;
2037 } else if (unlikely(amd_iommu_np_cache))
2038 domain_flush_pages(&dma_dom->domain, address, size);
2039
2040out:
2041 return address;
2042
2043out_unmap:
2044
2045 for (--i; i >= 0; --i) {
2046 start -= PAGE_SIZE;
2047 dma_ops_domain_unmap(dma_dom, start);
2048 }
2049
2050 dma_ops_free_addresses(dma_dom, address, pages);
2051
2052 return DMA_ERROR_CODE;
2053}
2054
2055/*
2056 * Does the reverse of the __map_single function. Must be called with
2057 * the domain lock held too
2058 */
2059static void __unmap_single(struct dma_ops_domain *dma_dom,
2060 dma_addr_t dma_addr,
2061 size_t size,
2062 int dir)
2063{
2064 dma_addr_t flush_addr;
2065 dma_addr_t i, start;
2066 unsigned int pages;
2067
2068 if ((dma_addr == DMA_ERROR_CODE) ||
2069 (dma_addr + size > dma_dom->aperture_size))
2070 return;
2071
2072 flush_addr = dma_addr;
2073 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
2074 dma_addr &= PAGE_MASK;
2075 start = dma_addr;
2076
2077 for (i = 0; i < pages; ++i) {
2078 dma_ops_domain_unmap(dma_dom, start);
2079 start += PAGE_SIZE;
2080 }
2081
2082 SUB_STATS_COUNTER(alloced_io_mem, size);
2083
2084 dma_ops_free_addresses(dma_dom, dma_addr, pages);
2085
2086 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
2087 domain_flush_pages(&dma_dom->domain, flush_addr, size);
2088 dma_dom->need_flush = false;
2089 }
2090}
2091
2092/*
2093 * The exported map_single function for dma_ops.
2094 */
2095static dma_addr_t map_page(struct device *dev, struct page *page,
2096 unsigned long offset, size_t size,
2097 enum dma_data_direction dir,
2098 struct dma_attrs *attrs)
2099{
2100 unsigned long flags;
2101 struct protection_domain *domain;
2102 dma_addr_t addr;
2103 u64 dma_mask;
2104 phys_addr_t paddr = page_to_phys(page) + offset;
2105
2106 INC_STATS_COUNTER(cnt_map_single);
2107
2108 domain = get_domain(dev);
2109 if (PTR_ERR(domain) == -EINVAL)
2110 return (dma_addr_t)paddr;
2111 else if (IS_ERR(domain))
2112 return DMA_ERROR_CODE;
2113
2114 dma_mask = *dev->dma_mask;
2115
2116 spin_lock_irqsave(&domain->lock, flags);
2117
2118 addr = __map_single(dev, domain->priv, paddr, size, dir, false,
2119 dma_mask);
2120 if (addr == DMA_ERROR_CODE)
2121 goto out;
2122
2123 domain_flush_complete(domain);
2124
2125out:
2126 spin_unlock_irqrestore(&domain->lock, flags);
2127
2128 return addr;
2129}
2130
2131/*
2132 * The exported unmap_single function for dma_ops.
2133 */
2134static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
2135 enum dma_data_direction dir, struct dma_attrs *attrs)
2136{
2137 unsigned long flags;
2138 struct protection_domain *domain;
2139
2140 INC_STATS_COUNTER(cnt_unmap_single);
2141
2142 domain = get_domain(dev);
2143 if (IS_ERR(domain))
2144 return;
2145
2146 spin_lock_irqsave(&domain->lock, flags);
2147
2148 __unmap_single(domain->priv, dma_addr, size, dir);
2149
2150 domain_flush_complete(domain);
2151
2152 spin_unlock_irqrestore(&domain->lock, flags);
2153}
2154
2155/*
2156 * This is a special map_sg function which is used if we should map a
2157 * device which is not handled by an AMD IOMMU in the system.
2158 */
2159static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
2160 int nelems, int dir)
2161{
2162 struct scatterlist *s;
2163 int i;
2164
2165 for_each_sg(sglist, s, nelems, i) {
2166 s->dma_address = (dma_addr_t)sg_phys(s);
2167 s->dma_length = s->length;
2168 }
2169
2170 return nelems;
2171}
2172
2173/*
2174 * The exported map_sg function for dma_ops (handles scatter-gather
2175 * lists).
2176 */
2177static int map_sg(struct device *dev, struct scatterlist *sglist,
2178 int nelems, enum dma_data_direction dir,
2179 struct dma_attrs *attrs)
2180{
2181 unsigned long flags;
2182 struct protection_domain *domain;
2183 int i;
2184 struct scatterlist *s;
2185 phys_addr_t paddr;
2186 int mapped_elems = 0;
2187 u64 dma_mask;
2188
2189 INC_STATS_COUNTER(cnt_map_sg);
2190
2191 domain = get_domain(dev);
2192 if (PTR_ERR(domain) == -EINVAL)
2193 return map_sg_no_iommu(dev, sglist, nelems, dir);
2194 else if (IS_ERR(domain))
2195 return 0;
2196
2197 dma_mask = *dev->dma_mask;
2198
2199 spin_lock_irqsave(&domain->lock, flags);
2200
2201 for_each_sg(sglist, s, nelems, i) {
2202 paddr = sg_phys(s);
2203
2204 s->dma_address = __map_single(dev, domain->priv,
2205 paddr, s->length, dir, false,
2206 dma_mask);
2207
2208 if (s->dma_address) {
2209 s->dma_length = s->length;
2210 mapped_elems++;
2211 } else
2212 goto unmap;
2213 }
2214
2215 domain_flush_complete(domain);
2216
2217out:
2218 spin_unlock_irqrestore(&domain->lock, flags);
2219
2220 return mapped_elems;
2221unmap:
2222 for_each_sg(sglist, s, mapped_elems, i) {
2223 if (s->dma_address)
2224 __unmap_single(domain->priv, s->dma_address,
2225 s->dma_length, dir);
2226 s->dma_address = s->dma_length = 0;
2227 }
2228
2229 mapped_elems = 0;
2230
2231 goto out;
2232}
2233
2234/*
2235 * The exported map_sg function for dma_ops (handles scatter-gather
2236 * lists).
2237 */
2238static void unmap_sg(struct device *dev, struct scatterlist *sglist,
2239 int nelems, enum dma_data_direction dir,
2240 struct dma_attrs *attrs)
2241{
2242 unsigned long flags;
2243 struct protection_domain *domain;
2244 struct scatterlist *s;
2245 int i;
2246
2247 INC_STATS_COUNTER(cnt_unmap_sg);
2248
2249 domain = get_domain(dev);
2250 if (IS_ERR(domain))
2251 return;
2252
2253 spin_lock_irqsave(&domain->lock, flags);
2254
2255 for_each_sg(sglist, s, nelems, i) {
2256 __unmap_single(domain->priv, s->dma_address,
2257 s->dma_length, dir);
2258 s->dma_address = s->dma_length = 0;
2259 }
2260
2261 domain_flush_complete(domain);
2262
2263 spin_unlock_irqrestore(&domain->lock, flags);
2264}
2265
2266/*
2267 * The exported alloc_coherent function for dma_ops.
2268 */
2269static void *alloc_coherent(struct device *dev, size_t size,
2270 dma_addr_t *dma_addr, gfp_t flag)
2271{
2272 unsigned long flags;
2273 void *virt_addr;
2274 struct protection_domain *domain;
2275 phys_addr_t paddr;
2276 u64 dma_mask = dev->coherent_dma_mask;
2277
2278 INC_STATS_COUNTER(cnt_alloc_coherent);
2279
2280 domain = get_domain(dev);
2281 if (PTR_ERR(domain) == -EINVAL) {
2282 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2283 *dma_addr = __pa(virt_addr);
2284 return virt_addr;
2285 } else if (IS_ERR(domain))
2286 return NULL;
2287
2288 dma_mask = dev->coherent_dma_mask;
2289 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2290 flag |= __GFP_ZERO;
2291
2292 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2293 if (!virt_addr)
2294 return NULL;
2295
2296 paddr = virt_to_phys(virt_addr);
2297
2298 if (!dma_mask)
2299 dma_mask = *dev->dma_mask;
2300
2301 spin_lock_irqsave(&domain->lock, flags);
2302
2303 *dma_addr = __map_single(dev, domain->priv, paddr,
2304 size, DMA_BIDIRECTIONAL, true, dma_mask);
2305
2306 if (*dma_addr == DMA_ERROR_CODE) {
2307 spin_unlock_irqrestore(&domain->lock, flags);
2308 goto out_free;
2309 }
2310
2311 domain_flush_complete(domain);
2312
2313 spin_unlock_irqrestore(&domain->lock, flags);
2314
2315 return virt_addr;
2316
2317out_free:
2318
2319 free_pages((unsigned long)virt_addr, get_order(size));
2320
2321 return NULL;
2322}
2323
2324/*
2325 * The exported free_coherent function for dma_ops.
2326 */
2327static void free_coherent(struct device *dev, size_t size,
2328 void *virt_addr, dma_addr_t dma_addr)
2329{
2330 unsigned long flags;
2331 struct protection_domain *domain;
2332
2333 INC_STATS_COUNTER(cnt_free_coherent);
2334
2335 domain = get_domain(dev);
2336 if (IS_ERR(domain))
2337 goto free_mem;
2338
2339 spin_lock_irqsave(&domain->lock, flags);
2340
2341 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2342
2343 domain_flush_complete(domain);
2344
2345 spin_unlock_irqrestore(&domain->lock, flags);
2346
2347free_mem:
2348 free_pages((unsigned long)virt_addr, get_order(size));
2349}
2350
2351/*
2352 * This function is called by the DMA layer to find out if we can handle a
2353 * particular device. It is part of the dma_ops.
2354 */
2355static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2356{
2357 return check_device(dev);
2358}
2359
2360/*
2361 * The function for pre-allocating protection domains.
2362 *
2363 * If the driver core informs the DMA layer if a driver grabs a device
2364 * we don't need to preallocate the protection domains anymore.
2365 * For now we have to.
2366 */
2367static void prealloc_protection_domains(void)
2368{
2369 struct pci_dev *dev = NULL;
2370 struct dma_ops_domain *dma_dom;
2371 u16 devid;
2372
2373 for_each_pci_dev(dev) {
2374
2375 /* Do we handle this device? */
2376 if (!check_device(&dev->dev))
2377 continue;
2378
2379 /* Is there already any domain for it? */
2380 if (domain_for_device(&dev->dev))
2381 continue;
2382
2383 devid = get_device_id(&dev->dev);
2384
2385 dma_dom = dma_ops_domain_alloc();
2386 if (!dma_dom)
2387 continue;
2388 init_unity_mappings_for_device(dma_dom, devid);
2389 dma_dom->target_dev = devid;
2390
2391 attach_device(&dev->dev, &dma_dom->domain);
2392
2393 list_add_tail(&dma_dom->list, &iommu_pd_list);
2394 }
2395}
2396
2397static struct dma_map_ops amd_iommu_dma_ops = {
2398 .alloc_coherent = alloc_coherent,
2399 .free_coherent = free_coherent,
2400 .map_page = map_page,
2401 .unmap_page = unmap_page,
2402 .map_sg = map_sg,
2403 .unmap_sg = unmap_sg,
2404 .dma_supported = amd_iommu_dma_supported,
2405};
2406
2407static unsigned device_dma_ops_init(void)
2408{
2409 struct pci_dev *pdev = NULL;
2410 unsigned unhandled = 0;
2411
2412 for_each_pci_dev(pdev) {
2413 if (!check_device(&pdev->dev)) {
2414 unhandled += 1;
2415 continue;
2416 }
2417
2418 pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
2419 }
2420
2421 return unhandled;
2422}
2423
2424/*
2425 * The function which clues the AMD IOMMU driver into dma_ops.
2426 */
2427
2428void __init amd_iommu_init_api(void)
2429{
2430 register_iommu(&amd_iommu_ops);
2431}
2432
2433int __init amd_iommu_init_dma_ops(void)
2434{
2435 struct amd_iommu *iommu;
2436 int ret, unhandled;
2437
2438 /*
2439 * first allocate a default protection domain for every IOMMU we
2440 * found in the system. Devices not assigned to any other
2441 * protection domain will be assigned to the default one.
2442 */
2443 for_each_iommu(iommu) {
2444 iommu->default_dom = dma_ops_domain_alloc();
2445 if (iommu->default_dom == NULL)
2446 return -ENOMEM;
2447 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
2448 ret = iommu_init_unity_mappings(iommu);
2449 if (ret)
2450 goto free_domains;
2451 }
2452
2453 /*
2454 * Pre-allocate the protection domains for each device.
2455 */
2456 prealloc_protection_domains();
2457
2458 iommu_detected = 1;
2459 swiotlb = 0;
2460
2461 /* Make the driver finally visible to the drivers */
2462 unhandled = device_dma_ops_init();
2463 if (unhandled && max_pfn > MAX_DMA32_PFN) {
2464 /* There are unhandled devices - initialize swiotlb for them */
2465 swiotlb = 1;
2466 }
2467
2468 amd_iommu_stats_init();
2469
2470 return 0;
2471
2472free_domains:
2473
2474 for_each_iommu(iommu) {
2475 if (iommu->default_dom)
2476 dma_ops_domain_free(iommu->default_dom);
2477 }
2478
2479 return ret;
2480}
2481
2482/*****************************************************************************
2483 *
2484 * The following functions belong to the exported interface of AMD IOMMU
2485 *
2486 * This interface allows access to lower level functions of the IOMMU
2487 * like protection domain handling and assignement of devices to domains
2488 * which is not possible with the dma_ops interface.
2489 *
2490 *****************************************************************************/
2491
2492static void cleanup_domain(struct protection_domain *domain)
2493{
2494 struct iommu_dev_data *dev_data, *next;
2495 unsigned long flags;
2496
2497 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2498
2499 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2500 struct device *dev = dev_data->dev;
2501
2502 __detach_device(dev);
2503 atomic_set(&dev_data->bind, 0);
2504 }
2505
2506 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2507}
2508
2509static void protection_domain_free(struct protection_domain *domain)
2510{
2511 if (!domain)
2512 return;
2513
2514 del_domain_from_list(domain);
2515
2516 if (domain->id)
2517 domain_id_free(domain->id);
2518
2519 kfree(domain);
2520}
2521
2522static struct protection_domain *protection_domain_alloc(void)
2523{
2524 struct protection_domain *domain;
2525
2526 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2527 if (!domain)
2528 return NULL;
2529
2530 spin_lock_init(&domain->lock);
2531 mutex_init(&domain->api_lock);
2532 domain->id = domain_id_alloc();
2533 if (!domain->id)
2534 goto out_err;
2535 INIT_LIST_HEAD(&domain->dev_list);
2536
2537 add_domain_to_list(domain);
2538
2539 return domain;
2540
2541out_err:
2542 kfree(domain);
2543
2544 return NULL;
2545}
2546
2547static int amd_iommu_domain_init(struct iommu_domain *dom)
2548{
2549 struct protection_domain *domain;
2550
2551 domain = protection_domain_alloc();
2552 if (!domain)
2553 goto out_free;
2554
2555 domain->mode = PAGE_MODE_3_LEVEL;
2556 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
2557 if (!domain->pt_root)
2558 goto out_free;
2559
2560 dom->priv = domain;
2561
2562 return 0;
2563
2564out_free:
2565 protection_domain_free(domain);
2566
2567 return -ENOMEM;
2568}
2569
2570static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2571{
2572 struct protection_domain *domain = dom->priv;
2573
2574 if (!domain)
2575 return;
2576
2577 if (domain->dev_cnt > 0)
2578 cleanup_domain(domain);
2579
2580 BUG_ON(domain->dev_cnt != 0);
2581
2582 free_pagetable(domain);
2583
2584 protection_domain_free(domain);
2585
2586 dom->priv = NULL;
2587}
2588
2589static void amd_iommu_detach_device(struct iommu_domain *dom,
2590 struct device *dev)
2591{
2592 struct iommu_dev_data *dev_data = dev->archdata.iommu;
2593 struct amd_iommu *iommu;
2594 u16 devid;
2595
2596 if (!check_device(dev))
2597 return;
2598
2599 devid = get_device_id(dev);
2600
2601 if (dev_data->domain != NULL)
2602 detach_device(dev);
2603
2604 iommu = amd_iommu_rlookup_table[devid];
2605 if (!iommu)
2606 return;
2607
2608 device_flush_dte(dev);
2609 iommu_completion_wait(iommu);
2610}
2611
2612static int amd_iommu_attach_device(struct iommu_domain *dom,
2613 struct device *dev)
2614{
2615 struct protection_domain *domain = dom->priv;
2616 struct iommu_dev_data *dev_data;
2617 struct amd_iommu *iommu;
2618 int ret;
2619 u16 devid;
2620
2621 if (!check_device(dev))
2622 return -EINVAL;
2623
2624 dev_data = dev->archdata.iommu;
2625
2626 devid = get_device_id(dev);
2627
2628 iommu = amd_iommu_rlookup_table[devid];
2629 if (!iommu)
2630 return -EINVAL;
2631
2632 if (dev_data->domain)
2633 detach_device(dev);
2634
2635 ret = attach_device(dev, domain);
2636
2637 iommu_completion_wait(iommu);
2638
2639 return ret;
2640}
2641
2642static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2643 phys_addr_t paddr, int gfp_order, int iommu_prot)
2644{
2645 unsigned long page_size = 0x1000UL << gfp_order;
2646 struct protection_domain *domain = dom->priv;
2647 int prot = 0;
2648 int ret;
2649
2650 if (iommu_prot & IOMMU_READ)
2651 prot |= IOMMU_PROT_IR;
2652 if (iommu_prot & IOMMU_WRITE)
2653 prot |= IOMMU_PROT_IW;
2654
2655 mutex_lock(&domain->api_lock);
2656 ret = iommu_map_page(domain, iova, paddr, prot, page_size);
2657 mutex_unlock(&domain->api_lock);
2658
2659 return ret;
2660}
2661
2662static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2663 int gfp_order)
2664{
2665 struct protection_domain *domain = dom->priv;
2666 unsigned long page_size, unmap_size;
2667
2668 page_size = 0x1000UL << gfp_order;
2669
2670 mutex_lock(&domain->api_lock);
2671 unmap_size = iommu_unmap_page(domain, iova, page_size);
2672 mutex_unlock(&domain->api_lock);
2673
2674 domain_flush_tlb_pde(domain);
2675
2676 return get_order(unmap_size);
2677}
2678
2679static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2680 unsigned long iova)
2681{
2682 struct protection_domain *domain = dom->priv;
2683 unsigned long offset_mask;
2684 phys_addr_t paddr;
2685 u64 *pte, __pte;
2686
2687 pte = fetch_pte(domain, iova);
2688
2689 if (!pte || !IOMMU_PTE_PRESENT(*pte))
2690 return 0;
2691
2692 if (PM_PTE_LEVEL(*pte) == 0)
2693 offset_mask = PAGE_SIZE - 1;
2694 else
2695 offset_mask = PTE_PAGE_SIZE(*pte) - 1;
2696
2697 __pte = *pte & PM_ADDR_MASK;
2698 paddr = (__pte & ~offset_mask) | (iova & offset_mask);
2699
2700 return paddr;
2701}
2702
2703static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
2704 unsigned long cap)
2705{
2706 switch (cap) {
2707 case IOMMU_CAP_CACHE_COHERENCY:
2708 return 1;
2709 }
2710
2711 return 0;
2712}
2713
2714static struct iommu_ops amd_iommu_ops = {
2715 .domain_init = amd_iommu_domain_init,
2716 .domain_destroy = amd_iommu_domain_destroy,
2717 .attach_dev = amd_iommu_attach_device,
2718 .detach_dev = amd_iommu_detach_device,
2719 .map = amd_iommu_map,
2720 .unmap = amd_iommu_unmap,
2721 .iova_to_phys = amd_iommu_iova_to_phys,
2722 .domain_has_cap = amd_iommu_domain_has_cap,
2723};
2724
2725/*****************************************************************************
2726 *
2727 * The next functions do a basic initialization of IOMMU for pass through
2728 * mode
2729 *
2730 * In passthrough mode the IOMMU is initialized and enabled but not used for
2731 * DMA-API translation.
2732 *
2733 *****************************************************************************/
2734
2735int __init amd_iommu_init_passthrough(void)
2736{
2737 struct amd_iommu *iommu;
2738 struct pci_dev *dev = NULL;
2739 u16 devid;
2740
2741 /* allocate passthrough domain */
2742 pt_domain = protection_domain_alloc();
2743 if (!pt_domain)
2744 return -ENOMEM;
2745
2746 pt_domain->mode |= PAGE_MODE_NONE;
2747
2748 for_each_pci_dev(dev) {
2749 if (!check_device(&dev->dev))
2750 continue;
2751
2752 devid = get_device_id(&dev->dev);
2753
2754 iommu = amd_iommu_rlookup_table[devid];
2755 if (!iommu)
2756 continue;
2757
2758 attach_device(&dev->dev, pt_domain);
2759 }
2760
2761 pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
2762
2763 return 0;
2764}