aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-23 00:38:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-23 00:38:22 -0400
commit687d680985b1438360a9ba470ece8b57cd205c3b (patch)
treeae253608531e5c3e823600974c610e722e7de759 /drivers
parent1053414068bad659479e6efa62a67403b8b1ec0a (diff)
parent008fe148cb0fb51d266baabe2c09997b21cf90c6 (diff)
Merge git://git.infradead.org/~dwmw2/iommu-2.6.31
* git://git.infradead.org/~dwmw2/iommu-2.6.31: intel-iommu: Fix one last ia64 build problem in Pass Through Support VT-d: support the device IOTLB VT-d: cleanup iommu_flush_iotlb_psi and flush_unmaps VT-d: add device IOTLB invalidation support VT-d: parse ATSR in DMA Remapping Reporting Structure PCI: handle Virtual Function ATS enabling PCI: support the ATS capability intel-iommu: dmar_set_interrupt return error value intel-iommu: Tidy up iommu->gcmd handling intel-iommu: Fix tiny theoretical race in write-buffer flush. intel-iommu: Clean up handling of "caching mode" vs. IOTLB flushing. intel-iommu: Clean up handling of "caching mode" vs. context flushing. VT-d: fix invalid domain id for KVM context flush Fix !CONFIG_DMAR build failure introduced by Intel IOMMU Pass Through Support Intel IOMMU Pass Through Support Fix up trivial conflicts in drivers/pci/{intel-iommu.c,intr_remapping.c}
Diffstat (limited to 'drivers')
-rw-r--r--drivers/pci/dmar.c235
-rw-r--r--drivers/pci/intel-iommu.c449
-rw-r--r--drivers/pci/intr_remapping.c8
-rw-r--r--drivers/pci/iov.c155
-rw-r--r--drivers/pci/pci.h39
5 files changed, 681 insertions, 205 deletions
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index fa3a11365ec3..7b287cb38b7a 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -267,6 +267,84 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
267 } 267 }
268 return ret; 268 return ret;
269} 269}
270
271static LIST_HEAD(dmar_atsr_units);
272
273static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
274{
275 struct acpi_dmar_atsr *atsr;
276 struct dmar_atsr_unit *atsru;
277
278 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
279 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
280 if (!atsru)
281 return -ENOMEM;
282
283 atsru->hdr = hdr;
284 atsru->include_all = atsr->flags & 0x1;
285
286 list_add(&atsru->list, &dmar_atsr_units);
287
288 return 0;
289}
290
291static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
292{
293 int rc;
294 struct acpi_dmar_atsr *atsr;
295
296 if (atsru->include_all)
297 return 0;
298
299 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
300 rc = dmar_parse_dev_scope((void *)(atsr + 1),
301 (void *)atsr + atsr->header.length,
302 &atsru->devices_cnt, &atsru->devices,
303 atsr->segment);
304 if (rc || !atsru->devices_cnt) {
305 list_del(&atsru->list);
306 kfree(atsru);
307 }
308
309 return rc;
310}
311
312int dmar_find_matched_atsr_unit(struct pci_dev *dev)
313{
314 int i;
315 struct pci_bus *bus;
316 struct acpi_dmar_atsr *atsr;
317 struct dmar_atsr_unit *atsru;
318
319 list_for_each_entry(atsru, &dmar_atsr_units, list) {
320 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
321 if (atsr->segment == pci_domain_nr(dev->bus))
322 goto found;
323 }
324
325 return 0;
326
327found:
328 for (bus = dev->bus; bus; bus = bus->parent) {
329 struct pci_dev *bridge = bus->self;
330
331 if (!bridge || !bridge->is_pcie ||
332 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
333 return 0;
334
335 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
336 for (i = 0; i < atsru->devices_cnt; i++)
337 if (atsru->devices[i] == bridge)
338 return 1;
339 break;
340 }
341 }
342
343 if (atsru->include_all)
344 return 1;
345
346 return 0;
347}
270#endif 348#endif
271 349
272static void __init 350static void __init
@@ -274,22 +352,28 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
274{ 352{
275 struct acpi_dmar_hardware_unit *drhd; 353 struct acpi_dmar_hardware_unit *drhd;
276 struct acpi_dmar_reserved_memory *rmrr; 354 struct acpi_dmar_reserved_memory *rmrr;
355 struct acpi_dmar_atsr *atsr;
277 356
278 switch (header->type) { 357 switch (header->type) {
279 case ACPI_DMAR_TYPE_HARDWARE_UNIT: 358 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
280 drhd = (struct acpi_dmar_hardware_unit *)header; 359 drhd = container_of(header, struct acpi_dmar_hardware_unit,
360 header);
281 printk (KERN_INFO PREFIX 361 printk (KERN_INFO PREFIX
282 "DRHD (flags: 0x%08x)base: 0x%016Lx\n", 362 "DRHD base: %#016Lx flags: %#x\n",
283 drhd->flags, (unsigned long long)drhd->address); 363 (unsigned long long)drhd->address, drhd->flags);
284 break; 364 break;
285 case ACPI_DMAR_TYPE_RESERVED_MEMORY: 365 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
286 rmrr = (struct acpi_dmar_reserved_memory *)header; 366 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
287 367 header);
288 printk (KERN_INFO PREFIX 368 printk (KERN_INFO PREFIX
289 "RMRR base: 0x%016Lx end: 0x%016Lx\n", 369 "RMRR base: %#016Lx end: %#016Lx\n",
290 (unsigned long long)rmrr->base_address, 370 (unsigned long long)rmrr->base_address,
291 (unsigned long long)rmrr->end_address); 371 (unsigned long long)rmrr->end_address);
292 break; 372 break;
373 case ACPI_DMAR_TYPE_ATSR:
374 atsr = container_of(header, struct acpi_dmar_atsr, header);
375 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
376 break;
293 } 377 }
294} 378}
295 379
@@ -363,6 +447,11 @@ parse_dmar_table(void)
363 ret = dmar_parse_one_rmrr(entry_header); 447 ret = dmar_parse_one_rmrr(entry_header);
364#endif 448#endif
365 break; 449 break;
450 case ACPI_DMAR_TYPE_ATSR:
451#ifdef CONFIG_DMAR
452 ret = dmar_parse_one_atsr(entry_header);
453#endif
454 break;
366 default: 455 default:
367 printk(KERN_WARNING PREFIX 456 printk(KERN_WARNING PREFIX
368 "Unknown DMAR structure type\n"); 457 "Unknown DMAR structure type\n");
@@ -431,11 +520,19 @@ int __init dmar_dev_scope_init(void)
431#ifdef CONFIG_DMAR 520#ifdef CONFIG_DMAR
432 { 521 {
433 struct dmar_rmrr_unit *rmrr, *rmrr_n; 522 struct dmar_rmrr_unit *rmrr, *rmrr_n;
523 struct dmar_atsr_unit *atsr, *atsr_n;
524
434 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { 525 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
435 ret = rmrr_parse_dev(rmrr); 526 ret = rmrr_parse_dev(rmrr);
436 if (ret) 527 if (ret)
437 return ret; 528 return ret;
438 } 529 }
530
531 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
532 ret = atsr_parse_dev(atsr);
533 if (ret)
534 return ret;
535 }
439 } 536 }
440#endif 537#endif
441 538
@@ -468,6 +565,9 @@ int __init dmar_table_init(void)
468#ifdef CONFIG_DMAR 565#ifdef CONFIG_DMAR
469 if (list_empty(&dmar_rmrr_units)) 566 if (list_empty(&dmar_rmrr_units))
470 printk(KERN_INFO PREFIX "No RMRR found\n"); 567 printk(KERN_INFO PREFIX "No RMRR found\n");
568
569 if (list_empty(&dmar_atsr_units))
570 printk(KERN_INFO PREFIX "No ATSR found\n");
471#endif 571#endif
472 572
473#ifdef CONFIG_INTR_REMAP 573#ifdef CONFIG_INTR_REMAP
@@ -515,6 +615,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
515 u32 ver; 615 u32 ver;
516 static int iommu_allocated = 0; 616 static int iommu_allocated = 0;
517 int agaw = 0; 617 int agaw = 0;
618 int msagaw = 0;
518 619
519 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); 620 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
520 if (!iommu) 621 if (!iommu)
@@ -535,12 +636,20 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
535 agaw = iommu_calculate_agaw(iommu); 636 agaw = iommu_calculate_agaw(iommu);
536 if (agaw < 0) { 637 if (agaw < 0) {
537 printk(KERN_ERR 638 printk(KERN_ERR
538 "Cannot get a valid agaw for iommu (seq_id = %d)\n", 639 "Cannot get a valid agaw for iommu (seq_id = %d)\n",
640 iommu->seq_id);
641 goto error;
642 }
643 msagaw = iommu_calculate_max_sagaw(iommu);
644 if (msagaw < 0) {
645 printk(KERN_ERR
646 "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
539 iommu->seq_id); 647 iommu->seq_id);
540 goto error; 648 goto error;
541 } 649 }
542#endif 650#endif
543 iommu->agaw = agaw; 651 iommu->agaw = agaw;
652 iommu->msagaw = msagaw;
544 653
545 /* the registers might be more than one page */ 654 /* the registers might be more than one page */
546 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), 655 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
@@ -590,7 +699,8 @@ void free_iommu(struct intel_iommu *iommu)
590 */ 699 */
591static inline void reclaim_free_desc(struct q_inval *qi) 700static inline void reclaim_free_desc(struct q_inval *qi)
592{ 701{
593 while (qi->desc_status[qi->free_tail] == QI_DONE) { 702 while (qi->desc_status[qi->free_tail] == QI_DONE ||
703 qi->desc_status[qi->free_tail] == QI_ABORT) {
594 qi->desc_status[qi->free_tail] = QI_FREE; 704 qi->desc_status[qi->free_tail] = QI_FREE;
595 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; 705 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
596 qi->free_cnt++; 706 qi->free_cnt++;
@@ -600,10 +710,13 @@ static inline void reclaim_free_desc(struct q_inval *qi)
600static int qi_check_fault(struct intel_iommu *iommu, int index) 710static int qi_check_fault(struct intel_iommu *iommu, int index)
601{ 711{
602 u32 fault; 712 u32 fault;
603 int head; 713 int head, tail;
604 struct q_inval *qi = iommu->qi; 714 struct q_inval *qi = iommu->qi;
605 int wait_index = (index + 1) % QI_LENGTH; 715 int wait_index = (index + 1) % QI_LENGTH;
606 716
717 if (qi->desc_status[wait_index] == QI_ABORT)
718 return -EAGAIN;
719
607 fault = readl(iommu->reg + DMAR_FSTS_REG); 720 fault = readl(iommu->reg + DMAR_FSTS_REG);
608 721
609 /* 722 /*
@@ -613,7 +726,11 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
613 */ 726 */
614 if (fault & DMA_FSTS_IQE) { 727 if (fault & DMA_FSTS_IQE) {
615 head = readl(iommu->reg + DMAR_IQH_REG); 728 head = readl(iommu->reg + DMAR_IQH_REG);
616 if ((head >> 4) == index) { 729 if ((head >> DMAR_IQ_SHIFT) == index) {
730 printk(KERN_ERR "VT-d detected invalid descriptor: "
731 "low=%llx, high=%llx\n",
732 (unsigned long long)qi->desc[index].low,
733 (unsigned long long)qi->desc[index].high);
617 memcpy(&qi->desc[index], &qi->desc[wait_index], 734 memcpy(&qi->desc[index], &qi->desc[wait_index],
618 sizeof(struct qi_desc)); 735 sizeof(struct qi_desc));
619 __iommu_flush_cache(iommu, &qi->desc[index], 736 __iommu_flush_cache(iommu, &qi->desc[index],
@@ -623,6 +740,32 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
623 } 740 }
624 } 741 }
625 742
743 /*
744 * If ITE happens, all pending wait_desc commands are aborted.
745 * No new descriptors are fetched until the ITE is cleared.
746 */
747 if (fault & DMA_FSTS_ITE) {
748 head = readl(iommu->reg + DMAR_IQH_REG);
749 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
750 head |= 1;
751 tail = readl(iommu->reg + DMAR_IQT_REG);
752 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
753
754 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
755
756 do {
757 if (qi->desc_status[head] == QI_IN_USE)
758 qi->desc_status[head] = QI_ABORT;
759 head = (head - 2 + QI_LENGTH) % QI_LENGTH;
760 } while (head != tail);
761
762 if (qi->desc_status[wait_index] == QI_ABORT)
763 return -EAGAIN;
764 }
765
766 if (fault & DMA_FSTS_ICE)
767 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
768
626 return 0; 769 return 0;
627} 770}
628 771
@@ -632,7 +775,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
632 */ 775 */
633int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) 776int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
634{ 777{
635 int rc = 0; 778 int rc;
636 struct q_inval *qi = iommu->qi; 779 struct q_inval *qi = iommu->qi;
637 struct qi_desc *hw, wait_desc; 780 struct qi_desc *hw, wait_desc;
638 int wait_index, index; 781 int wait_index, index;
@@ -643,6 +786,9 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
643 786
644 hw = qi->desc; 787 hw = qi->desc;
645 788
789restart:
790 rc = 0;
791
646 spin_lock_irqsave(&qi->q_lock, flags); 792 spin_lock_irqsave(&qi->q_lock, flags);
647 while (qi->free_cnt < 3) { 793 while (qi->free_cnt < 3) {
648 spin_unlock_irqrestore(&qi->q_lock, flags); 794 spin_unlock_irqrestore(&qi->q_lock, flags);
@@ -673,7 +819,7 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
673 * update the HW tail register indicating the presence of 819 * update the HW tail register indicating the presence of
674 * new descriptors. 820 * new descriptors.
675 */ 821 */
676 writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); 822 writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
677 823
678 while (qi->desc_status[wait_index] != QI_DONE) { 824 while (qi->desc_status[wait_index] != QI_DONE) {
679 /* 825 /*
@@ -685,18 +831,21 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
685 */ 831 */
686 rc = qi_check_fault(iommu, index); 832 rc = qi_check_fault(iommu, index);
687 if (rc) 833 if (rc)
688 goto out; 834 break;
689 835
690 spin_unlock(&qi->q_lock); 836 spin_unlock(&qi->q_lock);
691 cpu_relax(); 837 cpu_relax();
692 spin_lock(&qi->q_lock); 838 spin_lock(&qi->q_lock);
693 } 839 }
694out: 840
695 qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE; 841 qi->desc_status[index] = QI_DONE;
696 842
697 reclaim_free_desc(qi); 843 reclaim_free_desc(qi);
698 spin_unlock_irqrestore(&qi->q_lock, flags); 844 spin_unlock_irqrestore(&qi->q_lock, flags);
699 845
846 if (rc == -EAGAIN)
847 goto restart;
848
700 return rc; 849 return rc;
701} 850}
702 851
@@ -714,41 +863,26 @@ void qi_global_iec(struct intel_iommu *iommu)
714 qi_submit_sync(&desc, iommu); 863 qi_submit_sync(&desc, iommu);
715} 864}
716 865
717int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, 866void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
718 u64 type, int non_present_entry_flush) 867 u64 type)
719{ 868{
720 struct qi_desc desc; 869 struct qi_desc desc;
721 870
722 if (non_present_entry_flush) {
723 if (!cap_caching_mode(iommu->cap))
724 return 1;
725 else
726 did = 0;
727 }
728
729 desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) 871 desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
730 | QI_CC_GRAN(type) | QI_CC_TYPE; 872 | QI_CC_GRAN(type) | QI_CC_TYPE;
731 desc.high = 0; 873 desc.high = 0;
732 874
733 return qi_submit_sync(&desc, iommu); 875 qi_submit_sync(&desc, iommu);
734} 876}
735 877
736int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 878void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
737 unsigned int size_order, u64 type, 879 unsigned int size_order, u64 type)
738 int non_present_entry_flush)
739{ 880{
740 u8 dw = 0, dr = 0; 881 u8 dw = 0, dr = 0;
741 882
742 struct qi_desc desc; 883 struct qi_desc desc;
743 int ih = 0; 884 int ih = 0;
744 885
745 if (non_present_entry_flush) {
746 if (!cap_caching_mode(iommu->cap))
747 return 1;
748 else
749 did = 0;
750 }
751
752 if (cap_write_drain(iommu->cap)) 886 if (cap_write_drain(iommu->cap))
753 dw = 1; 887 dw = 1;
754 888
@@ -760,7 +894,28 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
760 desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) 894 desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
761 | QI_IOTLB_AM(size_order); 895 | QI_IOTLB_AM(size_order);
762 896
763 return qi_submit_sync(&desc, iommu); 897 qi_submit_sync(&desc, iommu);
898}
899
900void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
901 u64 addr, unsigned mask)
902{
903 struct qi_desc desc;
904
905 if (mask) {
906 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
907 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
908 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
909 } else
910 desc.high = QI_DEV_IOTLB_ADDR(addr);
911
912 if (qdep >= QI_DEV_IOTLB_MAX_INVS)
913 qdep = 0;
914
915 desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
916 QI_DIOTLB_TYPE;
917
918 qi_submit_sync(&desc, iommu);
764} 919}
765 920
766/* 921/*
@@ -790,7 +945,6 @@ void dmar_disable_qi(struct intel_iommu *iommu)
790 cpu_relax(); 945 cpu_relax();
791 946
792 iommu->gcmd &= ~DMA_GCMD_QIE; 947 iommu->gcmd &= ~DMA_GCMD_QIE;
793
794 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 948 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
795 949
796 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, 950 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
@@ -804,7 +958,7 @@ end:
804 */ 958 */
805static void __dmar_enable_qi(struct intel_iommu *iommu) 959static void __dmar_enable_qi(struct intel_iommu *iommu)
806{ 960{
807 u32 cmd, sts; 961 u32 sts;
808 unsigned long flags; 962 unsigned long flags;
809 struct q_inval *qi = iommu->qi; 963 struct q_inval *qi = iommu->qi;
810 964
@@ -818,9 +972,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
818 972
819 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); 973 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
820 974
821 cmd = iommu->gcmd | DMA_GCMD_QIE;
822 iommu->gcmd |= DMA_GCMD_QIE; 975 iommu->gcmd |= DMA_GCMD_QIE;
823 writel(cmd, iommu->reg + DMAR_GCMD_REG); 976 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
824 977
825 /* Make sure hardware complete it */ 978 /* Make sure hardware complete it */
826 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); 979 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
@@ -1096,7 +1249,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
1096 set_irq_data(irq, NULL); 1249 set_irq_data(irq, NULL);
1097 iommu->irq = 0; 1250 iommu->irq = 0;
1098 destroy_irq(irq); 1251 destroy_irq(irq);
1099 return 0; 1252 return ret;
1100 } 1253 }
1101 1254
1102 ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu); 1255 ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index cd389162735f..178853a07440 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -53,6 +53,8 @@
53 53
54#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 54#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
55 55
56#define MAX_AGAW_WIDTH 64
57
56#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
57 59
58#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 60#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
@@ -131,8 +133,6 @@ static inline void context_set_fault_enable(struct context_entry *context)
131 context->lo &= (((u64)-1) << 2) | 1; 133 context->lo &= (((u64)-1) << 2) | 1;
132} 134}
133 135
134#define CONTEXT_TT_MULTI_LEVEL 0
135
136static inline void context_set_translation_type(struct context_entry *context, 136static inline void context_set_translation_type(struct context_entry *context,
137 unsigned long value) 137 unsigned long value)
138{ 138{
@@ -256,6 +256,7 @@ struct device_domain_info {
256 u8 bus; /* PCI bus number */ 256 u8 bus; /* PCI bus number */
257 u8 devfn; /* PCI devfn number */ 257 u8 devfn; /* PCI devfn number */
258 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ 258 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
259 struct intel_iommu *iommu; /* IOMMU used by this device */
259 struct dmar_domain *domain; /* pointer to domain */ 260 struct dmar_domain *domain; /* pointer to domain */
260}; 261};
261 262
@@ -401,17 +402,13 @@ void free_iova_mem(struct iova *iova)
401 402
402static inline int width_to_agaw(int width); 403static inline int width_to_agaw(int width);
403 404
404/* calculate agaw for each iommu. 405static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
405 * "SAGAW" may be different across iommus, use a default agaw, and
406 * get a supported less agaw for iommus that don't support the default agaw.
407 */
408int iommu_calculate_agaw(struct intel_iommu *iommu)
409{ 406{
410 unsigned long sagaw; 407 unsigned long sagaw;
411 int agaw = -1; 408 int agaw = -1;
412 409
413 sagaw = cap_sagaw(iommu->cap); 410 sagaw = cap_sagaw(iommu->cap);
414 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); 411 for (agaw = width_to_agaw(max_gaw);
415 agaw >= 0; agaw--) { 412 agaw >= 0; agaw--) {
416 if (test_bit(agaw, &sagaw)) 413 if (test_bit(agaw, &sagaw))
417 break; 414 break;
@@ -420,6 +417,24 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
420 return agaw; 417 return agaw;
421} 418}
422 419
420/*
421 * Calculate max SAGAW for each iommu.
422 */
423int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
424{
425 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
426}
427
428/*
429 * calculate agaw for each iommu.
430 * "SAGAW" may be different across iommus, use a default agaw, and
431 * get a supported less agaw for iommus that don't support the default agaw.
432 */
433int iommu_calculate_agaw(struct intel_iommu *iommu)
434{
435 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
436}
437
423/* in native case, each domain is related to only one iommu */ 438/* in native case, each domain is related to only one iommu */
424static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 439static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
425{ 440{
@@ -809,7 +824,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
809static void iommu_set_root_entry(struct intel_iommu *iommu) 824static void iommu_set_root_entry(struct intel_iommu *iommu)
810{ 825{
811 void *addr; 826 void *addr;
812 u32 cmd, sts; 827 u32 sts;
813 unsigned long flag; 828 unsigned long flag;
814 829
815 addr = iommu->root_entry; 830 addr = iommu->root_entry;
@@ -817,12 +832,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
817 spin_lock_irqsave(&iommu->register_lock, flag); 832 spin_lock_irqsave(&iommu->register_lock, flag);
818 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); 833 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
819 834
820 cmd = iommu->gcmd | DMA_GCMD_SRTP; 835 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
821 writel(cmd, iommu->reg + DMAR_GCMD_REG);
822 836
823 /* Make sure hardware complete it */ 837 /* Make sure hardware complete it */
824 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 838 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
825 readl, (sts & DMA_GSTS_RTPS), sts); 839 readl, (sts & DMA_GSTS_RTPS), sts);
826 840
827 spin_unlock_irqrestore(&iommu->register_lock, flag); 841 spin_unlock_irqrestore(&iommu->register_lock, flag);
828} 842}
@@ -834,39 +848,25 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu)
834 848
835 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 849 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
836 return; 850 return;
837 val = iommu->gcmd | DMA_GCMD_WBF;
838 851
839 spin_lock_irqsave(&iommu->register_lock, flag); 852 spin_lock_irqsave(&iommu->register_lock, flag);
840 writel(val, iommu->reg + DMAR_GCMD_REG); 853 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
841 854
842 /* Make sure hardware complete it */ 855 /* Make sure hardware complete it */
843 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 856 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
844 readl, (!(val & DMA_GSTS_WBFS)), val); 857 readl, (!(val & DMA_GSTS_WBFS)), val);
845 858
846 spin_unlock_irqrestore(&iommu->register_lock, flag); 859 spin_unlock_irqrestore(&iommu->register_lock, flag);
847} 860}
848 861
849/* return value determine if we need a write buffer flush */ 862/* return value determine if we need a write buffer flush */
850static int __iommu_flush_context(struct intel_iommu *iommu, 863static void __iommu_flush_context(struct intel_iommu *iommu,
851 u16 did, u16 source_id, u8 function_mask, u64 type, 864 u16 did, u16 source_id, u8 function_mask,
852 int non_present_entry_flush) 865 u64 type)
853{ 866{
854 u64 val = 0; 867 u64 val = 0;
855 unsigned long flag; 868 unsigned long flag;
856 869
857 /*
858 * In the non-present entry flush case, if hardware doesn't cache
859 * non-present entry we do nothing and if hardware cache non-present
860 * entry, we flush entries of domain 0 (the domain id is used to cache
861 * any non-present entries)
862 */
863 if (non_present_entry_flush) {
864 if (!cap_caching_mode(iommu->cap))
865 return 1;
866 else
867 did = 0;
868 }
869
870 switch (type) { 870 switch (type) {
871 case DMA_CCMD_GLOBAL_INVL: 871 case DMA_CCMD_GLOBAL_INVL:
872 val = DMA_CCMD_GLOBAL_INVL; 872 val = DMA_CCMD_GLOBAL_INVL;
@@ -891,33 +891,16 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
891 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 891 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
892 892
893 spin_unlock_irqrestore(&iommu->register_lock, flag); 893 spin_unlock_irqrestore(&iommu->register_lock, flag);
894
895 /* flush context entry will implicitly flush write buffer */
896 return 0;
897} 894}
898 895
899/* return value determine if we need a write buffer flush */ 896/* return value determine if we need a write buffer flush */
900static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 897static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
901 u64 addr, unsigned int size_order, u64 type, 898 u64 addr, unsigned int size_order, u64 type)
902 int non_present_entry_flush)
903{ 899{
904 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 900 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
905 u64 val = 0, val_iva = 0; 901 u64 val = 0, val_iva = 0;
906 unsigned long flag; 902 unsigned long flag;
907 903
908 /*
909 * In the non-present entry flush case, if hardware doesn't cache
910 * non-present entry we do nothing and if hardware cache non-present
911 * entry, we flush entries of domain 0 (the domain id is used to cache
912 * any non-present entries)
913 */
914 if (non_present_entry_flush) {
915 if (!cap_caching_mode(iommu->cap))
916 return 1;
917 else
918 did = 0;
919 }
920
921 switch (type) { 904 switch (type) {
922 case DMA_TLB_GLOBAL_FLUSH: 905 case DMA_TLB_GLOBAL_FLUSH:
923 /* global flush doesn't need set IVA_REG */ 906 /* global flush doesn't need set IVA_REG */
@@ -965,37 +948,101 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
965 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", 948 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
966 (unsigned long long)DMA_TLB_IIRG(type), 949 (unsigned long long)DMA_TLB_IIRG(type),
967 (unsigned long long)DMA_TLB_IAIG(val)); 950 (unsigned long long)DMA_TLB_IAIG(val));
968 /* flush iotlb entry will implicitly flush write buffer */
969 return 0;
970} 951}
971 952
972static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 953static struct device_domain_info *iommu_support_dev_iotlb(
973 u64 addr, unsigned int pages, int non_present_entry_flush) 954 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
955{
956 int found = 0;
957 unsigned long flags;
958 struct device_domain_info *info;
959 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
960
961 if (!ecap_dev_iotlb_support(iommu->ecap))
962 return NULL;
963
964 if (!iommu->qi)
965 return NULL;
966
967 spin_lock_irqsave(&device_domain_lock, flags);
968 list_for_each_entry(info, &domain->devices, link)
969 if (info->bus == bus && info->devfn == devfn) {
970 found = 1;
971 break;
972 }
973 spin_unlock_irqrestore(&device_domain_lock, flags);
974
975 if (!found || !info->dev)
976 return NULL;
977
978 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
979 return NULL;
980
981 if (!dmar_find_matched_atsr_unit(info->dev))
982 return NULL;
983
984 info->iommu = iommu;
985
986 return info;
987}
988
989static void iommu_enable_dev_iotlb(struct device_domain_info *info)
974{ 990{
975 unsigned int mask; 991 if (!info)
992 return;
993
994 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
995}
996
997static void iommu_disable_dev_iotlb(struct device_domain_info *info)
998{
999 if (!info->dev || !pci_ats_enabled(info->dev))
1000 return;
1001
1002 pci_disable_ats(info->dev);
1003}
1004
1005static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1006 u64 addr, unsigned mask)
1007{
1008 u16 sid, qdep;
1009 unsigned long flags;
1010 struct device_domain_info *info;
1011
1012 spin_lock_irqsave(&device_domain_lock, flags);
1013 list_for_each_entry(info, &domain->devices, link) {
1014 if (!info->dev || !pci_ats_enabled(info->dev))
1015 continue;
1016
1017 sid = info->bus << 8 | info->devfn;
1018 qdep = pci_ats_queue_depth(info->dev);
1019 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1020 }
1021 spin_unlock_irqrestore(&device_domain_lock, flags);
1022}
1023
1024static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1025 u64 addr, unsigned int pages)
1026{
1027 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
976 1028
977 BUG_ON(addr & (~VTD_PAGE_MASK)); 1029 BUG_ON(addr & (~VTD_PAGE_MASK));
978 BUG_ON(pages == 0); 1030 BUG_ON(pages == 0);
979 1031
980 /* Fallback to domain selective flush if no PSI support */
981 if (!cap_pgsel_inv(iommu->cap))
982 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
983 DMA_TLB_DSI_FLUSH,
984 non_present_entry_flush);
985
986 /* 1032 /*
1033 * Fallback to domain selective flush if no PSI support or the size is
1034 * too big.
987 * PSI requires page size to be 2 ^ x, and the base address is naturally 1035 * PSI requires page size to be 2 ^ x, and the base address is naturally
988 * aligned to the size 1036 * aligned to the size
989 */ 1037 */
990 mask = ilog2(__roundup_pow_of_two(pages)); 1038 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
991 /* Fallback to domain selective flush if size is too big */ 1039 iommu->flush.flush_iotlb(iommu, did, 0, 0,
992 if (mask > cap_max_amask_val(iommu->cap)) 1040 DMA_TLB_DSI_FLUSH);
993 return iommu->flush.flush_iotlb(iommu, did, 0, 0, 1041 else
994 DMA_TLB_DSI_FLUSH, non_present_entry_flush); 1042 iommu->flush.flush_iotlb(iommu, did, addr, mask,
995 1043 DMA_TLB_PSI_FLUSH);
996 return iommu->flush.flush_iotlb(iommu, did, addr, mask, 1044 if (did)
997 DMA_TLB_PSI_FLUSH, 1045 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
998 non_present_entry_flush);
999} 1046}
1000 1047
1001static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1048static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1021,13 +1068,13 @@ static int iommu_enable_translation(struct intel_iommu *iommu)
1021 unsigned long flags; 1068 unsigned long flags;
1022 1069
1023 spin_lock_irqsave(&iommu->register_lock, flags); 1070 spin_lock_irqsave(&iommu->register_lock, flags);
1024 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); 1071 iommu->gcmd |= DMA_GCMD_TE;
1072 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1025 1073
1026 /* Make sure hardware complete it */ 1074 /* Make sure hardware complete it */
1027 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1075 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1028 readl, (sts & DMA_GSTS_TES), sts); 1076 readl, (sts & DMA_GSTS_TES), sts);
1029 1077
1030 iommu->gcmd |= DMA_GCMD_TE;
1031 spin_unlock_irqrestore(&iommu->register_lock, flags); 1078 spin_unlock_irqrestore(&iommu->register_lock, flags);
1032 return 0; 1079 return 0;
1033} 1080}
@@ -1043,7 +1090,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
1043 1090
1044 /* Make sure hardware complete it */ 1091 /* Make sure hardware complete it */
1045 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1092 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1046 readl, (!(sts & DMA_GSTS_TES)), sts); 1093 readl, (!(sts & DMA_GSTS_TES)), sts);
1047 1094
1048 spin_unlock_irqrestore(&iommu->register_lock, flag); 1095 spin_unlock_irqrestore(&iommu->register_lock, flag);
1049 return 0; 1096 return 0;
@@ -1325,8 +1372,8 @@ static void domain_exit(struct dmar_domain *domain)
1325 free_domain_mem(domain); 1372 free_domain_mem(domain);
1326} 1373}
1327 1374
1328static int domain_context_mapping_one(struct dmar_domain *domain, 1375static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1329 int segment, u8 bus, u8 devfn) 1376 u8 bus, u8 devfn, int translation)
1330{ 1377{
1331 struct context_entry *context; 1378 struct context_entry *context;
1332 unsigned long flags; 1379 unsigned long flags;
@@ -1336,10 +1383,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1336 unsigned long ndomains; 1383 unsigned long ndomains;
1337 int id; 1384 int id;
1338 int agaw; 1385 int agaw;
1386 struct device_domain_info *info = NULL;
1339 1387
1340 pr_debug("Set context mapping for %02x:%02x.%d\n", 1388 pr_debug("Set context mapping for %02x:%02x.%d\n",
1341 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1389 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1390
1342 BUG_ON(!domain->pgd); 1391 BUG_ON(!domain->pgd);
1392 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1393 translation != CONTEXT_TT_MULTI_LEVEL);
1343 1394
1344 iommu = device_to_iommu(segment, bus, devfn); 1395 iommu = device_to_iommu(segment, bus, devfn);
1345 if (!iommu) 1396 if (!iommu)
@@ -1399,21 +1450,44 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1399 } 1450 }
1400 1451
1401 context_set_domain_id(context, id); 1452 context_set_domain_id(context, id);
1402 context_set_address_width(context, iommu->agaw); 1453
1403 context_set_address_root(context, virt_to_phys(pgd)); 1454 if (translation != CONTEXT_TT_PASS_THROUGH) {
1404 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); 1455 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1456 translation = info ? CONTEXT_TT_DEV_IOTLB :
1457 CONTEXT_TT_MULTI_LEVEL;
1458 }
1459 /*
1460 * In pass through mode, AW must be programmed to indicate the largest
1461 * AGAW value supported by hardware. And ASR is ignored by hardware.
1462 */
1463 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1464 context_set_address_width(context, iommu->msagaw);
1465 else {
1466 context_set_address_root(context, virt_to_phys(pgd));
1467 context_set_address_width(context, iommu->agaw);
1468 }
1469
1470 context_set_translation_type(context, translation);
1405 context_set_fault_enable(context); 1471 context_set_fault_enable(context);
1406 context_set_present(context); 1472 context_set_present(context);
1407 domain_flush_cache(domain, context, sizeof(*context)); 1473 domain_flush_cache(domain, context, sizeof(*context));
1408 1474
1409 /* it's a non-present to present mapping */ 1475 /*
1410 if (iommu->flush.flush_context(iommu, domain->id, 1476 * It's a non-present to present mapping. If hardware doesn't cache
1411 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1477 * non-present entry we only need to flush the write-buffer. If the
1412 DMA_CCMD_DEVICE_INVL, 1)) 1478 * _does_ cache non-present entries, then it does so in the special
1479 * domain #0, which we have to flush:
1480 */
1481 if (cap_caching_mode(iommu->cap)) {
1482 iommu->flush.flush_context(iommu, 0,
1483 (((u16)bus) << 8) | devfn,
1484 DMA_CCMD_MASK_NOBIT,
1485 DMA_CCMD_DEVICE_INVL);
1486 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
1487 } else {
1413 iommu_flush_write_buffer(iommu); 1488 iommu_flush_write_buffer(iommu);
1414 else 1489 }
1415 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); 1490 iommu_enable_dev_iotlb(info);
1416
1417 spin_unlock_irqrestore(&iommu->lock, flags); 1491 spin_unlock_irqrestore(&iommu->lock, flags);
1418 1492
1419 spin_lock_irqsave(&domain->iommu_lock, flags); 1493 spin_lock_irqsave(&domain->iommu_lock, flags);
@@ -1426,13 +1500,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1426} 1500}
1427 1501
1428static int 1502static int
1429domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) 1503domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1504 int translation)
1430{ 1505{
1431 int ret; 1506 int ret;
1432 struct pci_dev *tmp, *parent; 1507 struct pci_dev *tmp, *parent;
1433 1508
1434 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), 1509 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1435 pdev->bus->number, pdev->devfn); 1510 pdev->bus->number, pdev->devfn,
1511 translation);
1436 if (ret) 1512 if (ret)
1437 return ret; 1513 return ret;
1438 1514
@@ -1446,7 +1522,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1446 ret = domain_context_mapping_one(domain, 1522 ret = domain_context_mapping_one(domain,
1447 pci_domain_nr(parent->bus), 1523 pci_domain_nr(parent->bus),
1448 parent->bus->number, 1524 parent->bus->number,
1449 parent->devfn); 1525 parent->devfn, translation);
1450 if (ret) 1526 if (ret)
1451 return ret; 1527 return ret;
1452 parent = parent->bus->self; 1528 parent = parent->bus->self;
@@ -1454,12 +1530,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1454 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ 1530 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1455 return domain_context_mapping_one(domain, 1531 return domain_context_mapping_one(domain,
1456 pci_domain_nr(tmp->subordinate), 1532 pci_domain_nr(tmp->subordinate),
1457 tmp->subordinate->number, 0); 1533 tmp->subordinate->number, 0,
1534 translation);
1458 else /* this is a legacy PCI bridge */ 1535 else /* this is a legacy PCI bridge */
1459 return domain_context_mapping_one(domain, 1536 return domain_context_mapping_one(domain,
1460 pci_domain_nr(tmp->bus), 1537 pci_domain_nr(tmp->bus),
1461 tmp->bus->number, 1538 tmp->bus->number,
1462 tmp->devfn); 1539 tmp->devfn,
1540 translation);
1463} 1541}
1464 1542
1465static int domain_context_mapped(struct pci_dev *pdev) 1543static int domain_context_mapped(struct pci_dev *pdev)
@@ -1540,9 +1618,8 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1540 1618
1541 clear_context_table(iommu, bus, devfn); 1619 clear_context_table(iommu, bus, devfn);
1542 iommu->flush.flush_context(iommu, 0, 0, 0, 1620 iommu->flush.flush_context(iommu, 0, 0, 0,
1543 DMA_CCMD_GLOBAL_INVL, 0); 1621 DMA_CCMD_GLOBAL_INVL);
1544 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1622 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1545 DMA_TLB_GLOBAL_FLUSH, 0);
1546} 1623}
1547 1624
1548static void domain_remove_dev_info(struct dmar_domain *domain) 1625static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -1561,6 +1638,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
1561 info->dev->dev.archdata.iommu = NULL; 1638 info->dev->dev.archdata.iommu = NULL;
1562 spin_unlock_irqrestore(&device_domain_lock, flags); 1639 spin_unlock_irqrestore(&device_domain_lock, flags);
1563 1640
1641 iommu_disable_dev_iotlb(info);
1564 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 1642 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1565 iommu_detach_dev(iommu, info->bus, info->devfn); 1643 iommu_detach_dev(iommu, info->bus, info->devfn);
1566 free_devinfo_mem(info); 1644 free_devinfo_mem(info);
@@ -1756,7 +1834,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
1756 goto error; 1834 goto error;
1757 1835
1758 /* context entry init */ 1836 /* context entry init */
1759 ret = domain_context_mapping(domain, pdev); 1837 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1760 if (!ret) 1838 if (!ret)
1761 return 0; 1839 return 0;
1762error: 1840error:
@@ -1857,6 +1935,23 @@ static inline void iommu_prepare_isa(void)
1857} 1935}
1858#endif /* !CONFIG_DMAR_FLPY_WA */ 1936#endif /* !CONFIG_DMAR_FLPY_WA */
1859 1937
1938/* Initialize each context entry as pass through.*/
1939static int __init init_context_pass_through(void)
1940{
1941 struct pci_dev *pdev = NULL;
1942 struct dmar_domain *domain;
1943 int ret;
1944
1945 for_each_pci_dev(pdev) {
1946 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1947 ret = domain_context_mapping(domain, pdev,
1948 CONTEXT_TT_PASS_THROUGH);
1949 if (ret)
1950 return ret;
1951 }
1952 return 0;
1953}
1954
1860static int __init init_dmars(void) 1955static int __init init_dmars(void)
1861{ 1956{
1862 struct dmar_drhd_unit *drhd; 1957 struct dmar_drhd_unit *drhd;
@@ -1864,6 +1959,7 @@ static int __init init_dmars(void)
1864 struct pci_dev *pdev; 1959 struct pci_dev *pdev;
1865 struct intel_iommu *iommu; 1960 struct intel_iommu *iommu;
1866 int i, ret; 1961 int i, ret;
1962 int pass_through = 1;
1867 1963
1868 /* 1964 /*
1869 * for each drhd 1965 * for each drhd
@@ -1917,7 +2013,15 @@ static int __init init_dmars(void)
1917 printk(KERN_ERR "IOMMU: allocate root entry failed\n"); 2013 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1918 goto error; 2014 goto error;
1919 } 2015 }
2016 if (!ecap_pass_through(iommu->ecap))
2017 pass_through = 0;
1920 } 2018 }
2019 if (iommu_pass_through)
2020 if (!pass_through) {
2021 printk(KERN_INFO
2022 "Pass Through is not supported by hardware.\n");
2023 iommu_pass_through = 0;
2024 }
1921 2025
1922 /* 2026 /*
1923 * Start from the sane iommu hardware state. 2027 * Start from the sane iommu hardware state.
@@ -1973,35 +2077,56 @@ static int __init init_dmars(void)
1973 } 2077 }
1974 2078
1975 /* 2079 /*
1976 * For each rmrr 2080 * If pass through is set and enabled, context entries of all pci
1977 * for each dev attached to rmrr 2081 * devices are intialized by pass through translation type.
1978 * do
1979 * locate drhd for dev, alloc domain for dev
1980 * allocate free domain
1981 * allocate page table entries for rmrr
1982 * if context not allocated for bus
1983 * allocate and init context
1984 * set present in root table for this bus
1985 * init context with domain, translation etc
1986 * endfor
1987 * endfor
1988 */ 2082 */
1989 for_each_rmrr_units(rmrr) { 2083 if (iommu_pass_through) {
1990 for (i = 0; i < rmrr->devices_cnt; i++) { 2084 ret = init_context_pass_through();
1991 pdev = rmrr->devices[i]; 2085 if (ret) {
1992 /* some BIOS lists non-exist devices in DMAR table */ 2086 printk(KERN_ERR "IOMMU: Pass through init failed.\n");
1993 if (!pdev) 2087 iommu_pass_through = 0;
1994 continue;
1995 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1996 if (ret)
1997 printk(KERN_ERR
1998 "IOMMU: mapping reserved region failed\n");
1999 } 2088 }
2000 } 2089 }
2001 2090
2002 iommu_prepare_gfx_mapping(); 2091 /*
2092 * If pass through is not set or not enabled, setup context entries for
2093 * identity mappings for rmrr, gfx, and isa.
2094 */
2095 if (!iommu_pass_through) {
2096 /*
2097 * For each rmrr
2098 * for each dev attached to rmrr
2099 * do
2100 * locate drhd for dev, alloc domain for dev
2101 * allocate free domain
2102 * allocate page table entries for rmrr
2103 * if context not allocated for bus
2104 * allocate and init context
2105 * set present in root table for this bus
2106 * init context with domain, translation etc
2107 * endfor
2108 * endfor
2109 */
2110 for_each_rmrr_units(rmrr) {
2111 for (i = 0; i < rmrr->devices_cnt; i++) {
2112 pdev = rmrr->devices[i];
2113 /*
2114 * some BIOS lists non-exist devices in DMAR
2115 * table.
2116 */
2117 if (!pdev)
2118 continue;
2119 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2120 if (ret)
2121 printk(KERN_ERR
2122 "IOMMU: mapping reserved region failed\n");
2123 }
2124 }
2125
2126 iommu_prepare_gfx_mapping();
2003 2127
2004 iommu_prepare_isa(); 2128 iommu_prepare_isa();
2129 }
2005 2130
2006 /* 2131 /*
2007 * for each drhd 2132 * for each drhd
@@ -2023,10 +2148,8 @@ static int __init init_dmars(void)
2023 2148
2024 iommu_set_root_entry(iommu); 2149 iommu_set_root_entry(iommu);
2025 2150
2026 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, 2151 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2027 0); 2152 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2028 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2029 0);
2030 iommu_disable_protect_mem_regions(iommu); 2153 iommu_disable_protect_mem_regions(iommu);
2031 2154
2032 ret = iommu_enable_translation(iommu); 2155 ret = iommu_enable_translation(iommu);
@@ -2112,7 +2235,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
2112 2235
2113 /* make sure context mapping is ok */ 2236 /* make sure context mapping is ok */
2114 if (unlikely(!domain_context_mapped(pdev))) { 2237 if (unlikely(!domain_context_mapped(pdev))) {
2115 ret = domain_context_mapping(domain, pdev); 2238 ret = domain_context_mapping(domain, pdev,
2239 CONTEXT_TT_MULTI_LEVEL);
2116 if (ret) { 2240 if (ret) {
2117 printk(KERN_ERR 2241 printk(KERN_ERR
2118 "Domain context map for %s failed", 2242 "Domain context map for %s failed",
@@ -2173,10 +2297,11 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2173 if (ret) 2297 if (ret)
2174 goto error; 2298 goto error;
2175 2299
2176 /* it's a non-present to present mapping */ 2300 /* it's a non-present to present mapping. Only flush if caching mode */
2177 ret = iommu_flush_iotlb_psi(iommu, domain->id, 2301 if (cap_caching_mode(iommu->cap))
2178 start_paddr, size >> VTD_PAGE_SHIFT, 1); 2302 iommu_flush_iotlb_psi(iommu, 0, start_paddr,
2179 if (ret) 2303 size >> VTD_PAGE_SHIFT);
2304 else
2180 iommu_flush_write_buffer(iommu); 2305 iommu_flush_write_buffer(iommu);
2181 2306
2182 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2307 return start_paddr + ((u64)paddr & (~PAGE_MASK));
@@ -2210,15 +2335,22 @@ static void flush_unmaps(void)
2210 if (!iommu) 2335 if (!iommu)
2211 continue; 2336 continue;
2212 2337
2213 if (deferred_flush[i].next) { 2338 if (!deferred_flush[i].next)
2214 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2339 continue;
2215 DMA_TLB_GLOBAL_FLUSH, 0); 2340
2216 for (j = 0; j < deferred_flush[i].next; j++) { 2341 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2217 __free_iova(&deferred_flush[i].domain[j]->iovad, 2342 DMA_TLB_GLOBAL_FLUSH);
2218 deferred_flush[i].iova[j]); 2343 for (j = 0; j < deferred_flush[i].next; j++) {
2219 } 2344 unsigned long mask;
2220 deferred_flush[i].next = 0; 2345 struct iova *iova = deferred_flush[i].iova[j];
2346
2347 mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT;
2348 mask = ilog2(mask >> VTD_PAGE_SHIFT);
2349 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2350 iova->pfn_lo << PAGE_SHIFT, mask);
2351 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2221 } 2352 }
2353 deferred_flush[i].next = 0;
2222 } 2354 }
2223 2355
2224 list_size = 0; 2356 list_size = 0;
@@ -2291,9 +2423,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2291 /* free page tables */ 2423 /* free page tables */
2292 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2424 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2293 if (intel_iommu_strict) { 2425 if (intel_iommu_strict) {
2294 if (iommu_flush_iotlb_psi(iommu, 2426 iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2295 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) 2427 size >> VTD_PAGE_SHIFT);
2296 iommu_flush_write_buffer(iommu);
2297 /* free iova */ 2428 /* free iova */
2298 __free_iova(&domain->iovad, iova); 2429 __free_iova(&domain->iovad, iova);
2299 } else { 2430 } else {
@@ -2384,9 +2515,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2384 /* free page tables */ 2515 /* free page tables */
2385 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2516 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2386 2517
2387 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2518 iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2388 size >> VTD_PAGE_SHIFT, 0)) 2519 size >> VTD_PAGE_SHIFT);
2389 iommu_flush_write_buffer(iommu);
2390 2520
2391 /* free iova */ 2521 /* free iova */
2392 __free_iova(&domain->iovad, iova); 2522 __free_iova(&domain->iovad, iova);
@@ -2478,10 +2608,13 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2478 offset += size; 2608 offset += size;
2479 } 2609 }
2480 2610
2481 /* it's a non-present to present mapping */ 2611 /* it's a non-present to present mapping. Only flush if caching mode */
2482 if (iommu_flush_iotlb_psi(iommu, domain->id, 2612 if (cap_caching_mode(iommu->cap))
2483 start_addr, offset >> VTD_PAGE_SHIFT, 1)) 2613 iommu_flush_iotlb_psi(iommu, 0, start_addr,
2614 offset >> VTD_PAGE_SHIFT);
2615 else
2484 iommu_flush_write_buffer(iommu); 2616 iommu_flush_write_buffer(iommu);
2617
2485 return nelems; 2618 return nelems;
2486} 2619}
2487 2620
@@ -2640,9 +2773,9 @@ static int init_iommu_hw(void)
2640 iommu_set_root_entry(iommu); 2773 iommu_set_root_entry(iommu);
2641 2774
2642 iommu->flush.flush_context(iommu, 0, 0, 0, 2775 iommu->flush.flush_context(iommu, 0, 0, 0,
2643 DMA_CCMD_GLOBAL_INVL, 0); 2776 DMA_CCMD_GLOBAL_INVL);
2644 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2777 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2645 DMA_TLB_GLOBAL_FLUSH, 0); 2778 DMA_TLB_GLOBAL_FLUSH);
2646 iommu_disable_protect_mem_regions(iommu); 2779 iommu_disable_protect_mem_regions(iommu);
2647 iommu_enable_translation(iommu); 2780 iommu_enable_translation(iommu);
2648 } 2781 }
@@ -2657,9 +2790,9 @@ static void iommu_flush_all(void)
2657 2790
2658 for_each_active_iommu(iommu, drhd) { 2791 for_each_active_iommu(iommu, drhd) {
2659 iommu->flush.flush_context(iommu, 0, 0, 0, 2792 iommu->flush.flush_context(iommu, 0, 0, 0,
2660 DMA_CCMD_GLOBAL_INVL, 0); 2793 DMA_CCMD_GLOBAL_INVL);
2661 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2794 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2662 DMA_TLB_GLOBAL_FLUSH, 0); 2795 DMA_TLB_GLOBAL_FLUSH);
2663 } 2796 }
2664} 2797}
2665 2798
@@ -2782,7 +2915,7 @@ int __init intel_iommu_init(void)
2782 * Check the need for DMA-remapping initialization now. 2915 * Check the need for DMA-remapping initialization now.
2783 * Above initialization will also be used by Interrupt-remapping. 2916 * Above initialization will also be used by Interrupt-remapping.
2784 */ 2917 */
2785 if (no_iommu || swiotlb || dmar_disabled) 2918 if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled)
2786 return -ENODEV; 2919 return -ENODEV;
2787 2920
2788 iommu_init_mempool(); 2921 iommu_init_mempool();
@@ -2802,7 +2935,15 @@ int __init intel_iommu_init(void)
2802 2935
2803 init_timer(&unmap_timer); 2936 init_timer(&unmap_timer);
2804 force_iommu = 1; 2937 force_iommu = 1;
2805 dma_ops = &intel_dma_ops; 2938
2939 if (!iommu_pass_through) {
2940 printk(KERN_INFO
2941 "Multi-level page-table translation for DMAR.\n");
2942 dma_ops = &intel_dma_ops;
2943 } else
2944 printk(KERN_INFO
2945 "DMAR: Pass through translation for DMAR.\n");
2946
2806 init_iommu_sysfs(); 2947 init_iommu_sysfs();
2807 2948
2808 register_iommu(&intel_iommu_ops); 2949 register_iommu(&intel_iommu_ops);
@@ -2888,6 +3029,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2888 info->dev->dev.archdata.iommu = NULL; 3029 info->dev->dev.archdata.iommu = NULL;
2889 spin_unlock_irqrestore(&device_domain_lock, flags); 3030 spin_unlock_irqrestore(&device_domain_lock, flags);
2890 3031
3032 iommu_disable_dev_iotlb(info);
2891 iommu_detach_dev(iommu, info->bus, info->devfn); 3033 iommu_detach_dev(iommu, info->bus, info->devfn);
2892 iommu_detach_dependent_devices(iommu, pdev); 3034 iommu_detach_dependent_devices(iommu, pdev);
2893 free_devinfo_mem(info); 3035 free_devinfo_mem(info);
@@ -2938,6 +3080,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2938 3080
2939 spin_unlock_irqrestore(&device_domain_lock, flags1); 3081 spin_unlock_irqrestore(&device_domain_lock, flags1);
2940 3082
3083 iommu_disable_dev_iotlb(info);
2941 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 3084 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
2942 iommu_detach_dev(iommu, info->bus, info->devfn); 3085 iommu_detach_dev(iommu, info->bus, info->devfn);
2943 iommu_detach_dependent_devices(iommu, info->dev); 3086 iommu_detach_dependent_devices(iommu, info->dev);
@@ -3142,11 +3285,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3142 return -EFAULT; 3285 return -EFAULT;
3143 } 3286 }
3144 3287
3145 ret = domain_context_mapping(dmar_domain, pdev); 3288 ret = vm_domain_add_dev_info(dmar_domain, pdev);
3146 if (ret) 3289 if (ret)
3147 return ret; 3290 return ret;
3148 3291
3149 ret = vm_domain_add_dev_info(dmar_domain, pdev); 3292 ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3150 return ret; 3293 return ret;
3151} 3294}
3152 3295
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 3a0cb0bb0593..1e83c8c5f985 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -409,7 +409,7 @@ int free_irte(int irq)
409static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) 409static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
410{ 410{
411 u64 addr; 411 u64 addr;
412 u32 cmd, sts; 412 u32 sts;
413 unsigned long flags; 413 unsigned long flags;
414 414
415 addr = virt_to_phys((void *)iommu->ir_table->base); 415 addr = virt_to_phys((void *)iommu->ir_table->base);
@@ -420,9 +420,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
420 (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); 420 (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
421 421
422 /* Set interrupt-remapping table pointer */ 422 /* Set interrupt-remapping table pointer */
423 cmd = iommu->gcmd | DMA_GCMD_SIRTP;
424 iommu->gcmd |= DMA_GCMD_SIRTP; 423 iommu->gcmd |= DMA_GCMD_SIRTP;
425 writel(cmd, iommu->reg + DMAR_GCMD_REG); 424 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
426 425
427 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 426 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
428 readl, (sts & DMA_GSTS_IRTPS), sts); 427 readl, (sts & DMA_GSTS_IRTPS), sts);
@@ -437,9 +436,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
437 spin_lock_irqsave(&iommu->register_lock, flags); 436 spin_lock_irqsave(&iommu->register_lock, flags);
438 437
439 /* Enable interrupt-remapping */ 438 /* Enable interrupt-remapping */
440 cmd = iommu->gcmd | DMA_GCMD_IRE;
441 iommu->gcmd |= DMA_GCMD_IRE; 439 iommu->gcmd |= DMA_GCMD_IRE;
442 writel(cmd, iommu->reg + DMAR_GCMD_REG); 440 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
443 441
444 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 442 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
445 readl, (sts & DMA_GSTS_IRES), sts); 443 readl, (sts & DMA_GSTS_IRES), sts);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 03c7706c0a09..e3a87210e947 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -5,6 +5,7 @@
5 * 5 *
6 * PCI Express I/O Virtualization (IOV) support. 6 * PCI Express I/O Virtualization (IOV) support.
7 * Single Root IOV 1.0 7 * Single Root IOV 1.0
8 * Address Translation Service 1.0
8 */ 9 */
9 10
10#include <linux/pci.h> 11#include <linux/pci.h>
@@ -492,10 +493,10 @@ found:
492 493
493 if (pdev) 494 if (pdev)
494 iov->dev = pci_dev_get(pdev); 495 iov->dev = pci_dev_get(pdev);
495 else { 496 else
496 iov->dev = dev; 497 iov->dev = dev;
497 mutex_init(&iov->lock); 498
498 } 499 mutex_init(&iov->lock);
499 500
500 dev->sriov = iov; 501 dev->sriov = iov;
501 dev->is_physfn = 1; 502 dev->is_physfn = 1;
@@ -515,11 +516,11 @@ static void sriov_release(struct pci_dev *dev)
515{ 516{
516 BUG_ON(dev->sriov->nr_virtfn); 517 BUG_ON(dev->sriov->nr_virtfn);
517 518
518 if (dev == dev->sriov->dev) 519 if (dev != dev->sriov->dev)
519 mutex_destroy(&dev->sriov->lock);
520 else
521 pci_dev_put(dev->sriov->dev); 520 pci_dev_put(dev->sriov->dev);
522 521
522 mutex_destroy(&dev->sriov->lock);
523
523 kfree(dev->sriov); 524 kfree(dev->sriov);
524 dev->sriov = NULL; 525 dev->sriov = NULL;
525} 526}
@@ -681,3 +682,145 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev)
681 return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE; 682 return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE;
682} 683}
683EXPORT_SYMBOL_GPL(pci_sriov_migration); 684EXPORT_SYMBOL_GPL(pci_sriov_migration);
685
686static int ats_alloc_one(struct pci_dev *dev, int ps)
687{
688 int pos;
689 u16 cap;
690 struct pci_ats *ats;
691
692 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
693 if (!pos)
694 return -ENODEV;
695
696 ats = kzalloc(sizeof(*ats), GFP_KERNEL);
697 if (!ats)
698 return -ENOMEM;
699
700 ats->pos = pos;
701 ats->stu = ps;
702 pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
703 ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
704 PCI_ATS_MAX_QDEP;
705 dev->ats = ats;
706
707 return 0;
708}
709
710static void ats_free_one(struct pci_dev *dev)
711{
712 kfree(dev->ats);
713 dev->ats = NULL;
714}
715
716/**
717 * pci_enable_ats - enable the ATS capability
718 * @dev: the PCI device
719 * @ps: the IOMMU page shift
720 *
721 * Returns 0 on success, or negative on failure.
722 */
723int pci_enable_ats(struct pci_dev *dev, int ps)
724{
725 int rc;
726 u16 ctrl;
727
728 BUG_ON(dev->ats && dev->ats->is_enabled);
729
730 if (ps < PCI_ATS_MIN_STU)
731 return -EINVAL;
732
733 if (dev->is_physfn || dev->is_virtfn) {
734 struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn;
735
736 mutex_lock(&pdev->sriov->lock);
737 if (pdev->ats)
738 rc = pdev->ats->stu == ps ? 0 : -EINVAL;
739 else
740 rc = ats_alloc_one(pdev, ps);
741
742 if (!rc)
743 pdev->ats->ref_cnt++;
744 mutex_unlock(&pdev->sriov->lock);
745 if (rc)
746 return rc;
747 }
748
749 if (!dev->is_physfn) {
750 rc = ats_alloc_one(dev, ps);
751 if (rc)
752 return rc;
753 }
754
755 ctrl = PCI_ATS_CTRL_ENABLE;
756 if (!dev->is_virtfn)
757 ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU);
758 pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
759
760 dev->ats->is_enabled = 1;
761
762 return 0;
763}
764
765/**
766 * pci_disable_ats - disable the ATS capability
767 * @dev: the PCI device
768 */
769void pci_disable_ats(struct pci_dev *dev)
770{
771 u16 ctrl;
772
773 BUG_ON(!dev->ats || !dev->ats->is_enabled);
774
775 pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl);
776 ctrl &= ~PCI_ATS_CTRL_ENABLE;
777 pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
778
779 dev->ats->is_enabled = 0;
780
781 if (dev->is_physfn || dev->is_virtfn) {
782 struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn;
783
784 mutex_lock(&pdev->sriov->lock);
785 pdev->ats->ref_cnt--;
786 if (!pdev->ats->ref_cnt)
787 ats_free_one(pdev);
788 mutex_unlock(&pdev->sriov->lock);
789 }
790
791 if (!dev->is_physfn)
792 ats_free_one(dev);
793}
794
795/**
796 * pci_ats_queue_depth - query the ATS Invalidate Queue Depth
797 * @dev: the PCI device
798 *
799 * Returns the queue depth on success, or negative on failure.
800 *
801 * The ATS spec uses 0 in the Invalidate Queue Depth field to
802 * indicate that the function can accept 32 Invalidate Request.
803 * But here we use the `real' values (i.e. 1~32) for the Queue
804 * Depth; and 0 indicates the function shares the Queue with
805 * other functions (doesn't exclusively own a Queue).
806 */
807int pci_ats_queue_depth(struct pci_dev *dev)
808{
809 int pos;
810 u16 cap;
811
812 if (dev->is_virtfn)
813 return 0;
814
815 if (dev->ats)
816 return dev->ats->qdep;
817
818 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
819 if (!pos)
820 return -ENODEV;
821
822 pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
823
824 return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
825 PCI_ATS_MAX_QDEP;
826}
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d03f6b99f292..f73bcbedf37c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -229,6 +229,15 @@ struct pci_sriov {
229 u8 __iomem *mstate; /* VF Migration State Array */ 229 u8 __iomem *mstate; /* VF Migration State Array */
230}; 230};
231 231
232/* Address Translation Service */
233struct pci_ats {
234 int pos; /* capability position */
235 int stu; /* Smallest Translation Unit */
236 int qdep; /* Invalidate Queue Depth */
237 int ref_cnt; /* Physical Function reference count */
238 int is_enabled:1; /* Enable bit is set */
239};
240
232#ifdef CONFIG_PCI_IOV 241#ifdef CONFIG_PCI_IOV
233extern int pci_iov_init(struct pci_dev *dev); 242extern int pci_iov_init(struct pci_dev *dev);
234extern void pci_iov_release(struct pci_dev *dev); 243extern void pci_iov_release(struct pci_dev *dev);
@@ -236,6 +245,20 @@ extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
236 enum pci_bar_type *type); 245 enum pci_bar_type *type);
237extern void pci_restore_iov_state(struct pci_dev *dev); 246extern void pci_restore_iov_state(struct pci_dev *dev);
238extern int pci_iov_bus_range(struct pci_bus *bus); 247extern int pci_iov_bus_range(struct pci_bus *bus);
248
249extern int pci_enable_ats(struct pci_dev *dev, int ps);
250extern void pci_disable_ats(struct pci_dev *dev);
251extern int pci_ats_queue_depth(struct pci_dev *dev);
252/**
253 * pci_ats_enabled - query the ATS status
254 * @dev: the PCI device
255 *
256 * Returns 1 if ATS capability is enabled, or 0 if not.
257 */
258static inline int pci_ats_enabled(struct pci_dev *dev)
259{
260 return dev->ats && dev->ats->is_enabled;
261}
239#else 262#else
240static inline int pci_iov_init(struct pci_dev *dev) 263static inline int pci_iov_init(struct pci_dev *dev)
241{ 264{
@@ -257,6 +280,22 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
257{ 280{
258 return 0; 281 return 0;
259} 282}
283
284static inline int pci_enable_ats(struct pci_dev *dev, int ps)
285{
286 return -ENODEV;
287}
288static inline void pci_disable_ats(struct pci_dev *dev)
289{
290}
291static inline int pci_ats_queue_depth(struct pci_dev *dev)
292{
293 return -ENODEV;
294}
295static inline int pci_ats_enabled(struct pci_dev *dev)
296{
297 return 0;
298}
260#endif /* CONFIG_PCI_IOV */ 299#endif /* CONFIG_PCI_IOV */
261 300
262#endif /* DRIVERS_PCI_H */ 301#endif /* DRIVERS_PCI_H */