aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJulien Grall <julien.grall@citrix.com>2015-09-09 10:18:45 -0400
committerDavid Vrabel <david.vrabel@citrix.com>2015-10-23 09:20:43 -0400
commit9435cce87950d805e6c8315410f2cb8ff6b2c6a2 (patch)
tree8d7dbbe5e1ce9ccffcdfda949d0b084ae720997d
parent291be10fd7511101d44cf98166d049bd31bc7600 (diff)
xen/swiotlb: Add support for 64KB page granularity
Swiotlb is used on ARM64 to support DMA on platform where devices are not protected by an SMMU. Furthermore it's only enabled for DOM0. While Xen is always using 4KB page granularity in the stage-2 page table, Linux ARM64 may either use 4KB or 64KB. This means that a Linux page can be spanned accross multiple Xen page. The Swiotlb code has to validate that the buffer used for DMA is physically contiguous in the memory. As a Linux page can't be shared between local memory and foreign page by design (the balloon code always removing entirely a Linux page), the changes in the code are very minimal because we only need to check the first Xen PFN. Note that it may be possible to optimize the function check_page_physically_contiguous to avoid looping over every Xen PFN for local memory. Although I will let this optimization for a follow-up. Signed-off-by: Julien Grall <julien.grall@citrix.com> Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
-rw-r--r--arch/arm/include/asm/xen/page-coherent.h26
-rw-r--r--arch/arm/xen/mm.c38
-rw-r--r--drivers/xen/swiotlb-xen.c39
3 files changed, 63 insertions, 40 deletions
diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index efd562412850..0375c8caa061 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -35,11 +35,15 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
35 dma_addr_t dev_addr, unsigned long offset, size_t size, 35 dma_addr_t dev_addr, unsigned long offset, size_t size,
36 enum dma_data_direction dir, struct dma_attrs *attrs) 36 enum dma_data_direction dir, struct dma_attrs *attrs)
37{ 37{
38 bool local = PFN_DOWN(dev_addr) == page_to_pfn(page); 38 bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page);
39 /* Dom0 is mapped 1:1, so if pfn == mfn the page is local otherwise 39 /*
40 * is a foreign page grant-mapped in dom0. If the page is local we 40 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
41 * can safely call the native dma_ops function, otherwise we call 41 * multiple Xen page, it's not possible to have a mix of local and
42 * the xen specific function. */ 42 * foreign Xen page. So if the first xen_pfn == mfn the page is local
43 * otherwise it's a foreign page grant-mapped in dom0. If the page is
44 * local we can safely call the native dma_ops function, otherwise we
45 * call the xen specific function.
46 */
43 if (local) 47 if (local)
44 __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); 48 __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
45 else 49 else
@@ -51,10 +55,14 @@ static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
51 struct dma_attrs *attrs) 55 struct dma_attrs *attrs)
52{ 56{
53 unsigned long pfn = PFN_DOWN(handle); 57 unsigned long pfn = PFN_DOWN(handle);
54 /* Dom0 is mapped 1:1, so calling pfn_valid on a foreign mfn will 58 /*
55 * always return false. If the page is local we can safely call the 59 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
56 * native dma_ops function, otherwise we call the xen specific 60 * multiple Xen page, it's not possible to have a mix of local and
57 * function. */ 61 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
62 * foreign mfn will always return false. If the page is local we can
63 * safely call the native dma_ops function, otherwise we call the xen
64 * specific function.
65 */
58 if (pfn_valid(pfn)) { 66 if (pfn_valid(pfn)) {
59 if (__generic_dma_ops(hwdev)->unmap_page) 67 if (__generic_dma_ops(hwdev)->unmap_page)
60 __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); 68 __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 7b517e913762..7c34f7126b04 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -48,22 +48,22 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
48 size_t size, enum dma_data_direction dir, enum dma_cache_op op) 48 size_t size, enum dma_data_direction dir, enum dma_cache_op op)
49{ 49{
50 struct gnttab_cache_flush cflush; 50 struct gnttab_cache_flush cflush;
51 unsigned long pfn; 51 unsigned long xen_pfn;
52 size_t left = size; 52 size_t left = size;
53 53
54 pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE; 54 xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE;
55 offset %= PAGE_SIZE; 55 offset %= XEN_PAGE_SIZE;
56 56
57 do { 57 do {
58 size_t len = left; 58 size_t len = left;
59 59
60 /* buffers in highmem or foreign pages cannot cross page 60 /* buffers in highmem or foreign pages cannot cross page
61 * boundaries */ 61 * boundaries */
62 if (len + offset > PAGE_SIZE) 62 if (len + offset > XEN_PAGE_SIZE)
63 len = PAGE_SIZE - offset; 63 len = XEN_PAGE_SIZE - offset;
64 64
65 cflush.op = 0; 65 cflush.op = 0;
66 cflush.a.dev_bus_addr = pfn << PAGE_SHIFT; 66 cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT;
67 cflush.offset = offset; 67 cflush.offset = offset;
68 cflush.length = len; 68 cflush.length = len;
69 69
@@ -79,7 +79,7 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
79 HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); 79 HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
80 80
81 offset = 0; 81 offset = 0;
82 pfn++; 82 xen_pfn++;
83 left -= len; 83 left -= len;
84 } while (left); 84 } while (left);
85} 85}
@@ -141,10 +141,26 @@ bool xen_arch_need_swiotlb(struct device *dev,
141 phys_addr_t phys, 141 phys_addr_t phys,
142 dma_addr_t dev_addr) 142 dma_addr_t dev_addr)
143{ 143{
144 unsigned long pfn = PFN_DOWN(phys); 144 unsigned int xen_pfn = XEN_PFN_DOWN(phys);
145 unsigned long bfn = PFN_DOWN(dev_addr); 145 unsigned int bfn = XEN_PFN_DOWN(dev_addr);
146 146
147 return (!hypercall_cflush && (pfn != bfn) && !is_device_dma_coherent(dev)); 147 /*
148 * The swiotlb buffer should be used if
149 * - Xen doesn't have the cache flush hypercall
150 * - The Linux page refers to foreign memory
151 * - The device doesn't support coherent DMA request
152 *
153 * The Linux page may be spanned acrros multiple Xen page, although
154 * it's not possible to have a mix of local and foreign Xen page.
155 * Furthermore, range_straddles_page_boundary is already checking
156 * if buffer is physically contiguous in the host RAM.
157 *
158 * Therefore we only need to check the first Xen page to know if we
159 * require a bounce buffer because the device doesn't support coherent
160 * memory and we are not able to flush the cache.
161 */
162 return (!hypercall_cflush && (xen_pfn != bfn) &&
163 !is_device_dma_coherent(dev));
148} 164}
149 165
150int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, 166int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 0a5a0e949862..7399782c0998 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -76,27 +76,27 @@ static unsigned long xen_io_tlb_nslabs;
76static u64 start_dma_addr; 76static u64 start_dma_addr;
77 77
78/* 78/*
79 * Both of these functions should avoid PFN_PHYS because phys_addr_t 79 * Both of these functions should avoid XEN_PFN_PHYS because phys_addr_t
80 * can be 32bit when dma_addr_t is 64bit leading to a loss in 80 * can be 32bit when dma_addr_t is 64bit leading to a loss in
81 * information if the shift is done before casting to 64bit. 81 * information if the shift is done before casting to 64bit.
82 */ 82 */
83static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr) 83static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
84{ 84{
85 unsigned long bfn = pfn_to_bfn(PFN_DOWN(paddr)); 85 unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr));
86 dma_addr_t dma = (dma_addr_t)bfn << PAGE_SHIFT; 86 dma_addr_t dma = (dma_addr_t)bfn << XEN_PAGE_SHIFT;
87 87
88 dma |= paddr & ~PAGE_MASK; 88 dma |= paddr & ~XEN_PAGE_MASK;
89 89
90 return dma; 90 return dma;
91} 91}
92 92
93static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr) 93static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
94{ 94{
95 unsigned long pfn = bfn_to_pfn(PFN_DOWN(baddr)); 95 unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr));
96 dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT; 96 dma_addr_t dma = (dma_addr_t)xen_pfn << XEN_PAGE_SHIFT;
97 phys_addr_t paddr = dma; 97 phys_addr_t paddr = dma;
98 98
99 paddr |= baddr & ~PAGE_MASK; 99 paddr |= baddr & ~XEN_PAGE_MASK;
100 100
101 return paddr; 101 return paddr;
102} 102}
@@ -106,7 +106,7 @@ static inline dma_addr_t xen_virt_to_bus(void *address)
106 return xen_phys_to_bus(virt_to_phys(address)); 106 return xen_phys_to_bus(virt_to_phys(address));
107} 107}
108 108
109static int check_pages_physically_contiguous(unsigned long pfn, 109static int check_pages_physically_contiguous(unsigned long xen_pfn,
110 unsigned int offset, 110 unsigned int offset,
111 size_t length) 111 size_t length)
112{ 112{
@@ -114,11 +114,11 @@ static int check_pages_physically_contiguous(unsigned long pfn,
114 int i; 114 int i;
115 int nr_pages; 115 int nr_pages;
116 116
117 next_bfn = pfn_to_bfn(pfn); 117 next_bfn = pfn_to_bfn(xen_pfn);
118 nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; 118 nr_pages = (offset + length + XEN_PAGE_SIZE-1) >> XEN_PAGE_SHIFT;
119 119
120 for (i = 1; i < nr_pages; i++) { 120 for (i = 1; i < nr_pages; i++) {
121 if (pfn_to_bfn(++pfn) != ++next_bfn) 121 if (pfn_to_bfn(++xen_pfn) != ++next_bfn)
122 return 0; 122 return 0;
123 } 123 }
124 return 1; 124 return 1;
@@ -126,28 +126,27 @@ static int check_pages_physically_contiguous(unsigned long pfn,
126 126
127static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) 127static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
128{ 128{
129 unsigned long pfn = PFN_DOWN(p); 129 unsigned long xen_pfn = XEN_PFN_DOWN(p);
130 unsigned int offset = p & ~PAGE_MASK; 130 unsigned int offset = p & ~XEN_PAGE_MASK;
131 131
132 if (offset + size <= PAGE_SIZE) 132 if (offset + size <= XEN_PAGE_SIZE)
133 return 0; 133 return 0;
134 if (check_pages_physically_contiguous(pfn, offset, size)) 134 if (check_pages_physically_contiguous(xen_pfn, offset, size))
135 return 0; 135 return 0;
136 return 1; 136 return 1;
137} 137}
138 138
139static int is_xen_swiotlb_buffer(dma_addr_t dma_addr) 139static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
140{ 140{
141 unsigned long bfn = PFN_DOWN(dma_addr); 141 unsigned long bfn = XEN_PFN_DOWN(dma_addr);
142 unsigned long pfn = bfn_to_local_pfn(bfn); 142 unsigned long xen_pfn = bfn_to_local_pfn(bfn);
143 phys_addr_t paddr; 143 phys_addr_t paddr = XEN_PFN_PHYS(xen_pfn);
144 144
145 /* If the address is outside our domain, it CAN 145 /* If the address is outside our domain, it CAN
146 * have the same virtual address as another address 146 * have the same virtual address as another address
147 * in our domain. Therefore _only_ check address within our domain. 147 * in our domain. Therefore _only_ check address within our domain.
148 */ 148 */
149 if (pfn_valid(pfn)) { 149 if (pfn_valid(PFN_DOWN(paddr))) {
150 paddr = PFN_PHYS(pfn);
151 return paddr >= virt_to_phys(xen_io_tlb_start) && 150 return paddr >= virt_to_phys(xen_io_tlb_start) &&
152 paddr < virt_to_phys(xen_io_tlb_end); 151 paddr < virt_to_phys(xen_io_tlb_end);
153 } 152 }