aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@redhat.com>2013-06-15 12:27:19 -0400
committerJoerg Roedel <joro@8bytes.org>2013-08-14 16:21:04 -0400
commit3269ee0bd6686baf86630300d528500ac5b516d7 (patch)
treede651f5e9631b3258ff01b3fc97d91b260d29a17 /drivers/iommu
parentd4e4ab86bcba5a72779c43dc1459f71fea3d89c8 (diff)
intel-iommu: Fix leaks in pagetable freeing
At best the current code only seems to free the leaf pagetables and the root. If you're unlucky enough to have a large gap (like any QEMU guest with more than 3G of memory), only the first chunk of leaf pagetables are freed (plus the root). This is a massive memory leak. This patch re-writes the pagetable freeing function to use a recursive algorithm and manages to not only free all the pagetables, but does it without any apparent performance loss versus the current broken version. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Cc: stable@vger.kernel.org Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Joerg Roedel <joro@8bytes.org>
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/intel-iommu.c72
1 files changed, 35 insertions, 37 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index eec0d3e04bf5..15e9b57e9cf0 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -890,56 +890,54 @@ static int dma_pte_clear_range(struct dmar_domain *domain,
890 return order; 890 return order;
891} 891}
892 892
893static void dma_pte_free_level(struct dmar_domain *domain, int level,
894 struct dma_pte *pte, unsigned long pfn,
895 unsigned long start_pfn, unsigned long last_pfn)
896{
897 pfn = max(start_pfn, pfn);
898 pte = &pte[pfn_level_offset(pfn, level)];
899
900 do {
901 unsigned long level_pfn;
902 struct dma_pte *level_pte;
903
904 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
905 goto next;
906
907 level_pfn = pfn & level_mask(level - 1);
908 level_pte = phys_to_virt(dma_pte_addr(pte));
909
910 if (level > 2)
911 dma_pte_free_level(domain, level - 1, level_pte,
912 level_pfn, start_pfn, last_pfn);
913
914 /* If range covers entire pagetable, free it */
915 if (!(start_pfn > level_pfn ||
916 last_pfn < level_pfn + level_size(level))) {
917 dma_clear_pte(pte);
918 domain_flush_cache(domain, pte, sizeof(*pte));
919 free_pgtable_page(level_pte);
920 }
921next:
922 pfn += level_size(level);
923 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
924}
925
893/* free page table pages. last level pte should already be cleared */ 926/* free page table pages. last level pte should already be cleared */
894static void dma_pte_free_pagetable(struct dmar_domain *domain, 927static void dma_pte_free_pagetable(struct dmar_domain *domain,
895 unsigned long start_pfn, 928 unsigned long start_pfn,
896 unsigned long last_pfn) 929 unsigned long last_pfn)
897{ 930{
898 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 931 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
899 struct dma_pte *first_pte, *pte;
900 int total = agaw_to_level(domain->agaw);
901 int level;
902 unsigned long tmp;
903 int large_page = 2;
904 932
905 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 933 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
906 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); 934 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
907 BUG_ON(start_pfn > last_pfn); 935 BUG_ON(start_pfn > last_pfn);
908 936
909 /* We don't need lock here; nobody else touches the iova range */ 937 /* We don't need lock here; nobody else touches the iova range */
910 level = 2; 938 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
911 while (level <= total) { 939 domain->pgd, 0, start_pfn, last_pfn);
912 tmp = align_to_level(start_pfn, level);
913
914 /* If we can't even clear one PTE at this level, we're done */
915 if (tmp + level_size(level) - 1 > last_pfn)
916 return;
917
918 do {
919 large_page = level;
920 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
921 if (large_page > level)
922 level = large_page + 1;
923 if (!pte) {
924 tmp = align_to_level(tmp + 1, level + 1);
925 continue;
926 }
927 do {
928 if (dma_pte_present(pte)) {
929 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
930 dma_clear_pte(pte);
931 }
932 pte++;
933 tmp += level_size(level);
934 } while (!first_pte_in_page(pte) &&
935 tmp + level_size(level) - 1 <= last_pfn);
936 940
937 domain_flush_cache(domain, first_pte,
938 (void *)pte - (void *)first_pte);
939
940 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
941 level++;
942 }
943 /* free pgd */ 941 /* free pgd */
944 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 942 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
945 free_pgtable_page(domain->pgd); 943 free_pgtable_page(domain->pgd);