aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-12 12:09:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-12 12:09:41 -0400
commit26f0cf91813bdc8e61595f8ad6660251e2ee9cf6 (patch)
treedb129d45fba03e3e59392410d0f35ba7d3e438e6
parentd862b13bc8cbab9692fbe0ef44c40d0488b81af1 (diff)
parentfe96eb404e33b59bb39f7050205f7c56c1c7d686 (diff)
Merge branch 'stable/xen-swiotlb-0.8.6' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
* 'stable/xen-swiotlb-0.8.6' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: x86: Detect whether we should use Xen SWIOTLB. pci-swiotlb-xen: Add glue code to setup dma_ops utilizing xen_swiotlb_* functions. swiotlb-xen: SWIOTLB library for Xen PV guest with PCI passthrough. xen/mmu: inhibit vmap aliases rather than trying to clear them out vmap: add flag to allow lazy unmap to be disabled at runtime xen: Add xen_create_contiguous_region xen: Rename the balloon lock xen: Allow unprivileged Xen domains to create iomap pages xen: use _PAGE_IOMAP in ioremap to do machine mappings Fix up trivial conflicts (adding both xen swiotlb and xen pci platform driver setup close to each other) in drivers/xen/{Kconfig,Makefile} and include/xen/xen-ops.h
-rw-r--r--arch/x86/include/asm/xen/page.h8
-rw-r--r--arch/x86/include/asm/xen/swiotlb-xen.h14
-rw-r--r--arch/x86/kernel/pci-dma.c7
-rw-r--r--arch/x86/xen/Makefile1
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c293
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c58
-rw-r--r--drivers/xen/Kconfig5
-rw-r--r--drivers/xen/Makefile1
-rw-r--r--drivers/xen/balloon.c15
-rw-r--r--drivers/xen/swiotlb-xen.c515
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/xen/interface/memory.h50
-rw-r--r--include/xen/swiotlb-xen.h65
-rw-r--r--include/xen/xen-ops.h6
-rw-r--r--mm/vmalloc.c4
16 files changed, 1024 insertions, 24 deletions
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 018a0a400799..bf5f7d32bd08 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -112,13 +112,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
112 */ 112 */
113static inline unsigned long mfn_to_local_pfn(unsigned long mfn) 113static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
114{ 114{
115 extern unsigned long max_mapnr;
116 unsigned long pfn = mfn_to_pfn(mfn); 115 unsigned long pfn = mfn_to_pfn(mfn);
117 if ((pfn < max_mapnr) 116 if (get_phys_to_machine(pfn) != mfn)
118 && !xen_feature(XENFEAT_auto_translated_physmap) 117 return -1; /* force !pfn_valid() */
119 && (get_phys_to_machine(pfn) != mfn))
120 return max_mapnr; /* force !pfn_valid() */
121 /* XXX fixme; not true with sparsemem */
122 return pfn; 118 return pfn;
123} 119}
124 120
diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
new file mode 100644
index 000000000000..1be1ab7d6a41
--- /dev/null
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_X86_SWIOTLB_XEN_H
2#define _ASM_X86_SWIOTLB_XEN_H
3
4#ifdef CONFIG_SWIOTLB_XEN
5extern int xen_swiotlb;
6extern int __init pci_xen_swiotlb_detect(void);
7extern void __init pci_xen_swiotlb_init(void);
8#else
9#define xen_swiotlb (0)
10static inline int __init pci_xen_swiotlb_detect(void) { return 0; }
11static inline void __init pci_xen_swiotlb_init(void) { }
12#endif
13
14#endif /* _ASM_X86_SWIOTLB_XEN_H */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 4b7e3d8b01dd..9f07cfcbd3a5 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -13,6 +13,7 @@
13#include <asm/calgary.h> 13#include <asm/calgary.h>
14#include <asm/amd_iommu.h> 14#include <asm/amd_iommu.h>
15#include <asm/x86_init.h> 15#include <asm/x86_init.h>
16#include <asm/xen/swiotlb-xen.h>
16 17
17static int forbid_dac __read_mostly; 18static int forbid_dac __read_mostly;
18 19
@@ -132,7 +133,7 @@ void __init pci_iommu_alloc(void)
132 /* free the range so iommu could get some range less than 4G */ 133 /* free the range so iommu could get some range less than 4G */
133 dma32_free_bootmem(); 134 dma32_free_bootmem();
134 135
135 if (pci_swiotlb_detect()) 136 if (pci_xen_swiotlb_detect() || pci_swiotlb_detect())
136 goto out; 137 goto out;
137 138
138 gart_iommu_hole_init(); 139 gart_iommu_hole_init();
@@ -144,6 +145,8 @@ void __init pci_iommu_alloc(void)
144 /* needs to be called after gart_iommu_hole_init */ 145 /* needs to be called after gart_iommu_hole_init */
145 amd_iommu_detect(); 146 amd_iommu_detect();
146out: 147out:
148 pci_xen_swiotlb_init();
149
147 pci_swiotlb_init(); 150 pci_swiotlb_init();
148} 151}
149 152
@@ -296,7 +299,7 @@ static int __init pci_iommu_init(void)
296#endif 299#endif
297 x86_init.iommu.iommu_init(); 300 x86_init.iommu.iommu_init();
298 301
299 if (swiotlb) { 302 if (swiotlb || xen_swiotlb) {
300 printk(KERN_INFO "PCI-DMA: " 303 printk(KERN_INFO "PCI-DMA: "
301 "Using software bounce buffering for IO (SWIOTLB)\n"); 304 "Using software bounce buffering for IO (SWIOTLB)\n");
302 swiotlb_print_info(); 305 swiotlb_print_info();
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 930954685980..779385158915 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -18,3 +18,4 @@ obj-$(CONFIG_SMP) += smp.o
18obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 18obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
19obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o 19obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
20 20
21obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d4ff5e83621d..7d46c8441418 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1172,6 +1172,10 @@ asmlinkage void __init xen_start_kernel(void)
1172 1172
1173 pgd = (pgd_t *)xen_start_info->pt_base; 1173 pgd = (pgd_t *)xen_start_info->pt_base;
1174 1174
1175 if (!xen_initial_domain())
1176 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1177
1178 __supported_pte_mask |= _PAGE_IOMAP;
1175 /* Don't do the full vcpu_info placement stuff until we have a 1179 /* Don't do the full vcpu_info placement stuff until we have a
1176 possible map and a non-dummy shared_info. */ 1180 possible map and a non-dummy shared_info. */
1177 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1181 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 413b19b3d0fe..42086ac406af 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,6 +42,7 @@
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/debugfs.h> 43#include <linux/debugfs.h>
44#include <linux/bug.h> 44#include <linux/bug.h>
45#include <linux/vmalloc.h>
45#include <linux/module.h> 46#include <linux/module.h>
46#include <linux/gfp.h> 47#include <linux/gfp.h>
47 48
@@ -51,15 +52,19 @@
51#include <asm/mmu_context.h> 52#include <asm/mmu_context.h>
52#include <asm/setup.h> 53#include <asm/setup.h>
53#include <asm/paravirt.h> 54#include <asm/paravirt.h>
55#include <asm/e820.h>
54#include <asm/linkage.h> 56#include <asm/linkage.h>
57#include <asm/page.h>
55 58
56#include <asm/xen/hypercall.h> 59#include <asm/xen/hypercall.h>
57#include <asm/xen/hypervisor.h> 60#include <asm/xen/hypervisor.h>
58 61
62#include <xen/xen.h>
59#include <xen/page.h> 63#include <xen/page.h>
60#include <xen/interface/xen.h> 64#include <xen/interface/xen.h>
61#include <xen/interface/hvm/hvm_op.h> 65#include <xen/interface/hvm/hvm_op.h>
62#include <xen/interface/version.h> 66#include <xen/interface/version.h>
67#include <xen/interface/memory.h>
63#include <xen/hvc-console.h> 68#include <xen/hvc-console.h>
64 69
65#include "multicalls.h" 70#include "multicalls.h"
@@ -68,6 +73,13 @@
68 73
69#define MMU_UPDATE_HISTO 30 74#define MMU_UPDATE_HISTO 30
70 75
76/*
77 * Protects atomic reservation decrease/increase against concurrent increases.
78 * Also protects non-atomic updates of current_pages and driver_pages, and
79 * balloon lists.
80 */
81DEFINE_SPINLOCK(xen_reservation_lock);
82
71#ifdef CONFIG_XEN_DEBUG_FS 83#ifdef CONFIG_XEN_DEBUG_FS
72 84
73static struct { 85static struct {
@@ -378,6 +390,28 @@ static bool xen_page_pinned(void *ptr)
378 return PagePinned(page); 390 return PagePinned(page);
379} 391}
380 392
393static bool xen_iomap_pte(pte_t pte)
394{
395 return pte_flags(pte) & _PAGE_IOMAP;
396}
397
398static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
399{
400 struct multicall_space mcs;
401 struct mmu_update *u;
402
403 mcs = xen_mc_entry(sizeof(*u));
404 u = mcs.args;
405
406 /* ptep might be kmapped when using 32-bit HIGHPTE */
407 u->ptr = arbitrary_virt_to_machine(ptep).maddr;
408 u->val = pte_val_ma(pteval);
409
410 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO);
411
412 xen_mc_issue(PARAVIRT_LAZY_MMU);
413}
414
381static void xen_extend_mmu_update(const struct mmu_update *update) 415static void xen_extend_mmu_update(const struct mmu_update *update)
382{ 416{
383 struct multicall_space mcs; 417 struct multicall_space mcs;
@@ -454,6 +488,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
454void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 488void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
455 pte_t *ptep, pte_t pteval) 489 pte_t *ptep, pte_t pteval)
456{ 490{
491 if (xen_iomap_pte(pteval)) {
492 xen_set_iomap_pte(ptep, pteval);
493 goto out;
494 }
495
457 ADD_STATS(set_pte_at, 1); 496 ADD_STATS(set_pte_at, 1);
458// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); 497// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
459 ADD_STATS(set_pte_at_current, mm == current->mm); 498 ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -524,8 +563,25 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
524 return val; 563 return val;
525} 564}
526 565
566static pteval_t iomap_pte(pteval_t val)
567{
568 if (val & _PAGE_PRESENT) {
569 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
570 pteval_t flags = val & PTE_FLAGS_MASK;
571
572 /* We assume the pte frame number is a MFN, so
573 just use it as-is. */
574 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
575 }
576
577 return val;
578}
579
527pteval_t xen_pte_val(pte_t pte) 580pteval_t xen_pte_val(pte_t pte)
528{ 581{
582 if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP))
583 return pte.pte;
584
529 return pte_mfn_to_pfn(pte.pte); 585 return pte_mfn_to_pfn(pte.pte);
530} 586}
531PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); 587PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -538,7 +594,22 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
538 594
539pte_t xen_make_pte(pteval_t pte) 595pte_t xen_make_pte(pteval_t pte)
540{ 596{
541 pte = pte_pfn_to_mfn(pte); 597 phys_addr_t addr = (pte & PTE_PFN_MASK);
598
599 /*
600 * Unprivileged domains are allowed to do IOMAPpings for
601 * PCI passthrough, but not map ISA space. The ISA
602 * mappings are just dummy local mappings to keep other
603 * parts of the kernel happy.
604 */
605 if (unlikely(pte & _PAGE_IOMAP) &&
606 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
607 pte = iomap_pte(pte);
608 } else {
609 pte &= ~_PAGE_IOMAP;
610 pte = pte_pfn_to_mfn(pte);
611 }
612
542 return native_make_pte(pte); 613 return native_make_pte(pte);
543} 614}
544PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 615PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
@@ -594,6 +665,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
594 665
595void xen_set_pte(pte_t *ptep, pte_t pte) 666void xen_set_pte(pte_t *ptep, pte_t pte)
596{ 667{
668 if (xen_iomap_pte(pte)) {
669 xen_set_iomap_pte(ptep, pte);
670 return;
671 }
672
597 ADD_STATS(pte_update, 1); 673 ADD_STATS(pte_update, 1);
598// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); 674// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
599 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); 675 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
@@ -610,6 +686,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
610#ifdef CONFIG_X86_PAE 686#ifdef CONFIG_X86_PAE
611void xen_set_pte_atomic(pte_t *ptep, pte_t pte) 687void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
612{ 688{
689 if (xen_iomap_pte(pte)) {
690 xen_set_iomap_pte(ptep, pte);
691 return;
692 }
693
613 set_64bit((u64 *)ptep, native_pte_val(pte)); 694 set_64bit((u64 *)ptep, native_pte_val(pte));
614} 695}
615 696
@@ -936,8 +1017,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
936 read-only, and can be pinned. */ 1017 read-only, and can be pinned. */
937static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) 1018static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
938{ 1019{
939 vm_unmap_aliases();
940
941 xen_mc_batch(); 1020 xen_mc_batch();
942 1021
943 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { 1022 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
@@ -1501,7 +1580,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
1501 if (PagePinned(virt_to_page(mm->pgd))) { 1580 if (PagePinned(virt_to_page(mm->pgd))) {
1502 SetPagePinned(page); 1581 SetPagePinned(page);
1503 1582
1504 vm_unmap_aliases();
1505 if (!PageHighMem(page)) { 1583 if (!PageHighMem(page)) {
1506 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); 1584 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
1507 if (level == PT_PTE && USE_SPLIT_PTLOCKS) 1585 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
@@ -1812,9 +1890,16 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1812 pte = pfn_pte(phys, prot); 1890 pte = pfn_pte(phys, prot);
1813 break; 1891 break;
1814 1892
1815 default: 1893 case FIX_PARAVIRT_BOOTMAP:
1894 /* This is an MFN, but it isn't an IO mapping from the
1895 IO domain */
1816 pte = mfn_pte(phys, prot); 1896 pte = mfn_pte(phys, prot);
1817 break; 1897 break;
1898
1899 default:
1900 /* By default, set_fixmap is used for hardware mappings */
1901 pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
1902 break;
1818 } 1903 }
1819 1904
1820 __native_set_fixmap(idx, pte); 1905 __native_set_fixmap(idx, pte);
@@ -1940,7 +2025,205 @@ void __init xen_init_mmu_ops(void)
1940 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; 2025 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
1941 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; 2026 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
1942 pv_mmu_ops = xen_mmu_ops; 2027 pv_mmu_ops = xen_mmu_ops;
2028
2029 vmap_lazy_unmap = false;
2030}
2031
2032/* Protected by xen_reservation_lock. */
2033#define MAX_CONTIG_ORDER 9 /* 2MB */
2034static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
2035
2036#define VOID_PTE (mfn_pte(0, __pgprot(0)))
2037static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2038 unsigned long *in_frames,
2039 unsigned long *out_frames)
2040{
2041 int i;
2042 struct multicall_space mcs;
2043
2044 xen_mc_batch();
2045 for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
2046 mcs = __xen_mc_entry(0);
2047
2048 if (in_frames)
2049 in_frames[i] = virt_to_mfn(vaddr);
2050
2051 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2052 set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2053
2054 if (out_frames)
2055 out_frames[i] = virt_to_pfn(vaddr);
2056 }
2057 xen_mc_issue(0);
2058}
2059
2060/*
2061 * Update the pfn-to-mfn mappings for a virtual address range, either to
2062 * point to an array of mfns, or contiguously from a single starting
2063 * mfn.
2064 */
2065static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
2066 unsigned long *mfns,
2067 unsigned long first_mfn)
2068{
2069 unsigned i, limit;
2070 unsigned long mfn;
2071
2072 xen_mc_batch();
2073
2074 limit = 1u << order;
2075 for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
2076 struct multicall_space mcs;
2077 unsigned flags;
2078
2079 mcs = __xen_mc_entry(0);
2080 if (mfns)
2081 mfn = mfns[i];
2082 else
2083 mfn = first_mfn + i;
2084
2085 if (i < (limit - 1))
2086 flags = 0;
2087 else {
2088 if (order == 0)
2089 flags = UVMF_INVLPG | UVMF_ALL;
2090 else
2091 flags = UVMF_TLB_FLUSH | UVMF_ALL;
2092 }
2093
2094 MULTI_update_va_mapping(mcs.mc, vaddr,
2095 mfn_pte(mfn, PAGE_KERNEL), flags);
2096
2097 set_phys_to_machine(virt_to_pfn(vaddr), mfn);
2098 }
2099
2100 xen_mc_issue(0);
2101}
2102
2103/*
2104 * Perform the hypercall to exchange a region of our pfns to point to
2105 * memory with the required contiguous alignment. Takes the pfns as
2106 * input, and populates mfns as output.
2107 *
2108 * Returns a success code indicating whether the hypervisor was able to
2109 * satisfy the request or not.
2110 */
2111static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
2112 unsigned long *pfns_in,
2113 unsigned long extents_out,
2114 unsigned int order_out,
2115 unsigned long *mfns_out,
2116 unsigned int address_bits)
2117{
2118 long rc;
2119 int success;
2120
2121 struct xen_memory_exchange exchange = {
2122 .in = {
2123 .nr_extents = extents_in,
2124 .extent_order = order_in,
2125 .extent_start = pfns_in,
2126 .domid = DOMID_SELF
2127 },
2128 .out = {
2129 .nr_extents = extents_out,
2130 .extent_order = order_out,
2131 .extent_start = mfns_out,
2132 .address_bits = address_bits,
2133 .domid = DOMID_SELF
2134 }
2135 };
2136
2137 BUG_ON(extents_in << order_in != extents_out << order_out);
2138
2139 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
2140 success = (exchange.nr_exchanged == extents_in);
2141
2142 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
2143 BUG_ON(success && (rc != 0));
2144
2145 return success;
2146}
2147
2148int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
2149 unsigned int address_bits)
2150{
2151 unsigned long *in_frames = discontig_frames, out_frame;
2152 unsigned long flags;
2153 int success;
2154
2155 /*
2156 * Currently an auto-translated guest will not perform I/O, nor will
2157 * it require PAE page directories below 4GB. Therefore any calls to
2158 * this function are redundant and can be ignored.
2159 */
2160
2161 if (xen_feature(XENFEAT_auto_translated_physmap))
2162 return 0;
2163
2164 if (unlikely(order > MAX_CONTIG_ORDER))
2165 return -ENOMEM;
2166
2167 memset((void *) vstart, 0, PAGE_SIZE << order);
2168
2169 spin_lock_irqsave(&xen_reservation_lock, flags);
2170
2171 /* 1. Zap current PTEs, remembering MFNs. */
2172 xen_zap_pfn_range(vstart, order, in_frames, NULL);
2173
2174 /* 2. Get a new contiguous memory extent. */
2175 out_frame = virt_to_pfn(vstart);
2176 success = xen_exchange_memory(1UL << order, 0, in_frames,
2177 1, order, &out_frame,
2178 address_bits);
2179
2180 /* 3. Map the new extent in place of old pages. */
2181 if (success)
2182 xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
2183 else
2184 xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2185
2186 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2187
2188 return success ? 0 : -ENOMEM;
2189}
2190EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
2191
2192void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
2193{
2194 unsigned long *out_frames = discontig_frames, in_frame;
2195 unsigned long flags;
2196 int success;
2197
2198 if (xen_feature(XENFEAT_auto_translated_physmap))
2199 return;
2200
2201 if (unlikely(order > MAX_CONTIG_ORDER))
2202 return;
2203
2204 memset((void *) vstart, 0, PAGE_SIZE << order);
2205
2206 spin_lock_irqsave(&xen_reservation_lock, flags);
2207
2208 /* 1. Find start MFN of contiguous extent. */
2209 in_frame = virt_to_mfn(vstart);
2210
2211 /* 2. Zap current PTEs. */
2212 xen_zap_pfn_range(vstart, order, NULL, out_frames);
2213
2214 /* 3. Do the exchange for non-contiguous MFNs. */
2215 success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
2216 0, out_frames, 0);
2217
2218 /* 4. Map new pages in place of old pages. */
2219 if (success)
2220 xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2221 else
2222 xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
2223
2224 spin_unlock_irqrestore(&xen_reservation_lock, flags);
1943} 2225}
2226EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
1944 2227
1945#ifdef CONFIG_XEN_PVHVM 2228#ifdef CONFIG_XEN_PVHVM
1946static void xen_hvm_exit_mmap(struct mm_struct *mm) 2229static void xen_hvm_exit_mmap(struct mm_struct *mm)
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
new file mode 100644
index 000000000000..a013ec9d0c54
--- /dev/null
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -0,0 +1,58 @@
1/* Glue code to lib/swiotlb-xen.c */
2
3#include <linux/dma-mapping.h>
4#include <xen/swiotlb-xen.h>
5
6#include <asm/xen/hypervisor.h>
7#include <xen/xen.h>
8
9int xen_swiotlb __read_mostly;
10
11static struct dma_map_ops xen_swiotlb_dma_ops = {
12 .mapping_error = xen_swiotlb_dma_mapping_error,
13 .alloc_coherent = xen_swiotlb_alloc_coherent,
14 .free_coherent = xen_swiotlb_free_coherent,
15 .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
16 .sync_single_for_device = xen_swiotlb_sync_single_for_device,
17 .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
18 .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
19 .map_sg = xen_swiotlb_map_sg_attrs,
20 .unmap_sg = xen_swiotlb_unmap_sg_attrs,
21 .map_page = xen_swiotlb_map_page,
22 .unmap_page = xen_swiotlb_unmap_page,
23 .dma_supported = xen_swiotlb_dma_supported,
24};
25
26/*
27 * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary
28 *
29 * This returns non-zero if we are forced to use xen_swiotlb (by the boot
30 * option).
31 */
32int __init pci_xen_swiotlb_detect(void)
33{
34
35 /* If running as PV guest, either iommu=soft, or swiotlb=force will
36 * activate this IOMMU. If running as PV privileged, activate it
37 * irregardlesss.
38 */
39 if ((xen_initial_domain() || swiotlb || swiotlb_force) &&
40 (xen_pv_domain()))
41 xen_swiotlb = 1;
42
43 /* If we are running under Xen, we MUST disable the native SWIOTLB.
44 * Don't worry about swiotlb_force flag activating the native, as
45 * the 'swiotlb' flag is the only one turning it on. */
46 if (xen_pv_domain())
47 swiotlb = 0;
48
49 return xen_swiotlb;
50}
51
52void __init pci_xen_swiotlb_init(void)
53{
54 if (xen_swiotlb) {
55 xen_swiotlb_init(1);
56 dma_ops = &xen_swiotlb_dma_ops;
57 }
58}
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 0a8826936639..60d71e9abe9f 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -71,4 +71,9 @@ config XEN_PLATFORM_PCI
71 initializing xenbus and grant_table when running in a Xen HVM 71 initializing xenbus and grant_table when running in a Xen HVM
72 domain. As a consequence this driver is required to run any Xen PV 72 domain. As a consequence this driver is required to run any Xen PV
73 frontend on Xen HVM. 73 frontend on Xen HVM.
74
75config SWIOTLB_XEN
76 def_bool y
77 depends on SWIOTLB
78
74endmenu 79endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index e392fb776af3..fcaf838f54be 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
11obj-$(CONFIG_XENFS) += xenfs/ 11obj-$(CONFIG_XENFS) += xenfs/
12obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o 12obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
13obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o 13obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
14obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 1a0d8c2a0354..500290b150bb 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -85,13 +85,6 @@ static struct sys_device balloon_sysdev;
85 85
86static int register_balloon(struct sys_device *sysdev); 86static int register_balloon(struct sys_device *sysdev);
87 87
88/*
89 * Protects atomic reservation decrease/increase against concurrent increases.
90 * Also protects non-atomic updates of current_pages and driver_pages, and
91 * balloon lists.
92 */
93static DEFINE_SPINLOCK(balloon_lock);
94
95static struct balloon_stats balloon_stats; 88static struct balloon_stats balloon_stats;
96 89
97/* We increase/decrease in batches which fit in a page */ 90/* We increase/decrease in batches which fit in a page */
@@ -210,7 +203,7 @@ static int increase_reservation(unsigned long nr_pages)
210 if (nr_pages > ARRAY_SIZE(frame_list)) 203 if (nr_pages > ARRAY_SIZE(frame_list))
211 nr_pages = ARRAY_SIZE(frame_list); 204 nr_pages = ARRAY_SIZE(frame_list);
212 205
213 spin_lock_irqsave(&balloon_lock, flags); 206 spin_lock_irqsave(&xen_reservation_lock, flags);
214 207
215 page = balloon_first_page(); 208 page = balloon_first_page();
216 for (i = 0; i < nr_pages; i++) { 209 for (i = 0; i < nr_pages; i++) {
@@ -254,7 +247,7 @@ static int increase_reservation(unsigned long nr_pages)
254 balloon_stats.current_pages += rc; 247 balloon_stats.current_pages += rc;
255 248
256 out: 249 out:
257 spin_unlock_irqrestore(&balloon_lock, flags); 250 spin_unlock_irqrestore(&xen_reservation_lock, flags);
258 251
259 return rc < 0 ? rc : rc != nr_pages; 252 return rc < 0 ? rc : rc != nr_pages;
260} 253}
@@ -299,7 +292,7 @@ static int decrease_reservation(unsigned long nr_pages)
299 kmap_flush_unused(); 292 kmap_flush_unused();
300 flush_tlb_all(); 293 flush_tlb_all();
301 294
302 spin_lock_irqsave(&balloon_lock, flags); 295 spin_lock_irqsave(&xen_reservation_lock, flags);
303 296
304 /* No more mappings: invalidate P2M and add to balloon. */ 297 /* No more mappings: invalidate P2M and add to balloon. */
305 for (i = 0; i < nr_pages; i++) { 298 for (i = 0; i < nr_pages; i++) {
@@ -315,7 +308,7 @@ static int decrease_reservation(unsigned long nr_pages)
315 308
316 balloon_stats.current_pages -= nr_pages; 309 balloon_stats.current_pages -= nr_pages;
317 310
318 spin_unlock_irqrestore(&balloon_lock, flags); 311 spin_unlock_irqrestore(&xen_reservation_lock, flags);
319 312
320 return need_sleep; 313 return need_sleep;
321} 314}
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
new file mode 100644
index 000000000000..54469c3eeacd
--- /dev/null
+++ b/drivers/xen/swiotlb-xen.c
@@ -0,0 +1,515 @@
1/*
2 * Copyright 2010
3 * by Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
4 *
5 * This code provides a IOMMU for Xen PV guests with PCI passthrough.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License v2.0 as published by
9 * the Free Software Foundation
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * PV guests under Xen are running in an non-contiguous memory architecture.
17 *
18 * When PCI pass-through is utilized, this necessitates an IOMMU for
19 * translating bus (DMA) to virtual and vice-versa and also providing a
20 * mechanism to have contiguous pages for device drivers operations (say DMA
21 * operations).
22 *
23 * Specifically, under Xen the Linux idea of pages is an illusion. It
24 * assumes that pages start at zero and go up to the available memory. To
25 * help with that, the Linux Xen MMU provides a lookup mechanism to
26 * translate the page frame numbers (PFN) to machine frame numbers (MFN)
27 * and vice-versa. The MFN are the "real" frame numbers. Furthermore
28 * memory is not contiguous. Xen hypervisor stitches memory for guests
29 * from different pools, which means there is no guarantee that PFN==MFN
30 * and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are
31 * allocated in descending order (high to low), meaning the guest might
32 * never get any MFN's under the 4GB mark.
33 *
34 */
35
36#include <linux/bootmem.h>
37#include <linux/dma-mapping.h>
38#include <xen/swiotlb-xen.h>
39#include <xen/page.h>
40#include <xen/xen-ops.h>
41/*
42 * Used to do a quick range check in swiotlb_tbl_unmap_single and
43 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
44 * API.
45 */
46
47static char *xen_io_tlb_start, *xen_io_tlb_end;
48static unsigned long xen_io_tlb_nslabs;
49/*
50 * Quick lookup value of the bus address of the IOTLB.
51 */
52
53u64 start_dma_addr;
54
55static dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
56{
57 return phys_to_machine(XPADDR(paddr)).maddr;;
58}
59
60static phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
61{
62 return machine_to_phys(XMADDR(baddr)).paddr;
63}
64
65static dma_addr_t xen_virt_to_bus(void *address)
66{
67 return xen_phys_to_bus(virt_to_phys(address));
68}
69
70static int check_pages_physically_contiguous(unsigned long pfn,
71 unsigned int offset,
72 size_t length)
73{
74 unsigned long next_mfn;
75 int i;
76 int nr_pages;
77
78 next_mfn = pfn_to_mfn(pfn);
79 nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
80
81 for (i = 1; i < nr_pages; i++) {
82 if (pfn_to_mfn(++pfn) != ++next_mfn)
83 return 0;
84 }
85 return 1;
86}
87
88static int range_straddles_page_boundary(phys_addr_t p, size_t size)
89{
90 unsigned long pfn = PFN_DOWN(p);
91 unsigned int offset = p & ~PAGE_MASK;
92
93 if (offset + size <= PAGE_SIZE)
94 return 0;
95 if (check_pages_physically_contiguous(pfn, offset, size))
96 return 0;
97 return 1;
98}
99
100static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
101{
102 unsigned long mfn = PFN_DOWN(dma_addr);
103 unsigned long pfn = mfn_to_local_pfn(mfn);
104 phys_addr_t paddr;
105
106 /* If the address is outside our domain, it CAN
107 * have the same virtual address as another address
108 * in our domain. Therefore _only_ check address within our domain.
109 */
110 if (pfn_valid(pfn)) {
111 paddr = PFN_PHYS(pfn);
112 return paddr >= virt_to_phys(xen_io_tlb_start) &&
113 paddr < virt_to_phys(xen_io_tlb_end);
114 }
115 return 0;
116}
117
118static int max_dma_bits = 32;
119
120static int
121xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
122{
123 int i, rc;
124 int dma_bits;
125
126 dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
127
128 i = 0;
129 do {
130 int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE);
131
132 do {
133 rc = xen_create_contiguous_region(
134 (unsigned long)buf + (i << IO_TLB_SHIFT),
135 get_order(slabs << IO_TLB_SHIFT),
136 dma_bits);
137 } while (rc && dma_bits++ < max_dma_bits);
138 if (rc)
139 return rc;
140
141 i += slabs;
142 } while (i < nslabs);
143 return 0;
144}
145
146void __init xen_swiotlb_init(int verbose)
147{
148 unsigned long bytes;
149 int rc;
150
151 xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
152 xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
153
154 bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
155
156 /*
157 * Get IO TLB memory from any location.
158 */
159 xen_io_tlb_start = alloc_bootmem(bytes);
160 if (!xen_io_tlb_start)
161 panic("Cannot allocate SWIOTLB buffer");
162
163 xen_io_tlb_end = xen_io_tlb_start + bytes;
164 /*
165 * And replace that memory with pages under 4GB.
166 */
167 rc = xen_swiotlb_fixup(xen_io_tlb_start,
168 bytes,
169 xen_io_tlb_nslabs);
170 if (rc)
171 goto error;
172
173 start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
174 swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
175
176 return;
177error:
178 panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\
179 "We either don't have the permission or you do not have enough"\
180 "free memory under 4GB!\n", rc);
181}
182
183void *
184xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
185 dma_addr_t *dma_handle, gfp_t flags)
186{
187 void *ret;
188 int order = get_order(size);
189 u64 dma_mask = DMA_BIT_MASK(32);
190 unsigned long vstart;
191
192 /*
193 * Ignore region specifiers - the kernel's ideas of
194 * pseudo-phys memory layout has nothing to do with the
195 * machine physical layout. We can't allocate highmem
196 * because we can't return a pointer to it.
197 */
198 flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
199
200 if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret))
201 return ret;
202
203 vstart = __get_free_pages(flags, order);
204 ret = (void *)vstart;
205
206 if (hwdev && hwdev->coherent_dma_mask)
207 dma_mask = dma_alloc_coherent_mask(hwdev, flags);
208
209 if (ret) {
210 if (xen_create_contiguous_region(vstart, order,
211 fls64(dma_mask)) != 0) {
212 free_pages(vstart, order);
213 return NULL;
214 }
215 memset(ret, 0, size);
216 *dma_handle = virt_to_machine(ret).maddr;
217 }
218 return ret;
219}
220EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
221
222void
223xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
224 dma_addr_t dev_addr)
225{
226 int order = get_order(size);
227
228 if (dma_release_from_coherent(hwdev, order, vaddr))
229 return;
230
231 xen_destroy_contiguous_region((unsigned long)vaddr, order);
232 free_pages((unsigned long)vaddr, order);
233}
234EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
235
236
237/*
238 * Map a single buffer of the indicated size for DMA in streaming mode. The
239 * physical address to use is returned.
240 *
241 * Once the device is given the dma address, the device owns this memory until
242 * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed.
243 */
244dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
245 unsigned long offset, size_t size,
246 enum dma_data_direction dir,
247 struct dma_attrs *attrs)
248{
249 phys_addr_t phys = page_to_phys(page) + offset;
250 dma_addr_t dev_addr = xen_phys_to_bus(phys);
251 void *map;
252
253 BUG_ON(dir == DMA_NONE);
254 /*
255 * If the address happens to be in the device's DMA window,
256 * we can safely return the device addr and not worry about bounce
257 * buffering it.
258 */
259 if (dma_capable(dev, dev_addr, size) &&
260 !range_straddles_page_boundary(phys, size) && !swiotlb_force)
261 return dev_addr;
262
263 /*
264 * Oh well, have to allocate and map a bounce buffer.
265 */
266 map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
267 if (!map)
268 return DMA_ERROR_CODE;
269
270 dev_addr = xen_virt_to_bus(map);
271
272 /*
273 * Ensure that the address returned is DMA'ble
274 */
275 if (!dma_capable(dev, dev_addr, size))
276 panic("map_single: bounce buffer is not DMA'ble");
277
278 return dev_addr;
279}
280EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
281
282/*
283 * Unmap a single streaming mode DMA translation. The dma_addr and size must
284 * match what was provided for in a previous xen_swiotlb_map_page call. All
285 * other usages are undefined.
286 *
287 * After this call, reads by the cpu to the buffer are guaranteed to see
288 * whatever the device wrote there.
289 */
290static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
291 size_t size, enum dma_data_direction dir)
292{
293 phys_addr_t paddr = xen_bus_to_phys(dev_addr);
294
295 BUG_ON(dir == DMA_NONE);
296
297 /* NOTE: We use dev_addr here, not paddr! */
298 if (is_xen_swiotlb_buffer(dev_addr)) {
299 swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
300 return;
301 }
302
303 if (dir != DMA_FROM_DEVICE)
304 return;
305
306 /*
307 * phys_to_virt doesn't work with hihgmem page but we could
308 * call dma_mark_clean() with hihgmem page here. However, we
309 * are fine since dma_mark_clean() is null on POWERPC. We can
310 * make dma_mark_clean() take a physical address if necessary.
311 */
312 dma_mark_clean(phys_to_virt(paddr), size);
313}
314
315void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
316 size_t size, enum dma_data_direction dir,
317 struct dma_attrs *attrs)
318{
319 xen_unmap_single(hwdev, dev_addr, size, dir);
320}
321EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_page);
322
323/*
324 * Make physical memory consistent for a single streaming mode DMA translation
325 * after a transfer.
326 *
327 * If you perform a xen_swiotlb_map_page() but wish to interrogate the buffer
328 * using the cpu, yet do not wish to teardown the dma mapping, you must
329 * call this function before doing so. At the next point you give the dma
330 * address back to the card, you must first perform a
331 * xen_swiotlb_dma_sync_for_device, and then the device again owns the buffer
332 */
333static void
334xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
335 size_t size, enum dma_data_direction dir,
336 enum dma_sync_target target)
337{
338 phys_addr_t paddr = xen_bus_to_phys(dev_addr);
339
340 BUG_ON(dir == DMA_NONE);
341
342 /* NOTE: We use dev_addr here, not paddr! */
343 if (is_xen_swiotlb_buffer(dev_addr)) {
344 swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
345 target);
346 return;
347 }
348
349 if (dir != DMA_FROM_DEVICE)
350 return;
351
352 dma_mark_clean(phys_to_virt(paddr), size);
353}
354
355void
356xen_swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
357 size_t size, enum dma_data_direction dir)
358{
359 xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
360}
361EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_for_cpu);
362
363void
364xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
365 size_t size, enum dma_data_direction dir)
366{
367 xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
368}
369EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_for_device);
370
371/*
372 * Map a set of buffers described by scatterlist in streaming mode for DMA.
373 * This is the scatter-gather version of the above xen_swiotlb_map_page
374 * interface. Here the scatter gather list elements are each tagged with the
375 * appropriate dma address and length. They are obtained via
376 * sg_dma_{address,length}(SG).
377 *
378 * NOTE: An implementation may be able to use a smaller number of
379 * DMA address/length pairs than there are SG table elements.
380 * (for example via virtual mapping capabilities)
381 * The routine returns the number of addr/length pairs actually
382 * used, at most nents.
383 *
384 * Device ownership issues as mentioned above for xen_swiotlb_map_page are the
385 * same here.
386 */
387int
388xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
389 int nelems, enum dma_data_direction dir,
390 struct dma_attrs *attrs)
391{
392 struct scatterlist *sg;
393 int i;
394
395 BUG_ON(dir == DMA_NONE);
396
397 for_each_sg(sgl, sg, nelems, i) {
398 phys_addr_t paddr = sg_phys(sg);
399 dma_addr_t dev_addr = xen_phys_to_bus(paddr);
400
401 if (swiotlb_force ||
402 !dma_capable(hwdev, dev_addr, sg->length) ||
403 range_straddles_page_boundary(paddr, sg->length)) {
404 void *map = swiotlb_tbl_map_single(hwdev,
405 start_dma_addr,
406 sg_phys(sg),
407 sg->length, dir);
408 if (!map) {
409 /* Don't panic here, we expect map_sg users
410 to do proper error handling. */
411 xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
412 attrs);
413 sgl[0].dma_length = 0;
414 return DMA_ERROR_CODE;
415 }
416 sg->dma_address = xen_virt_to_bus(map);
417 } else
418 sg->dma_address = dev_addr;
419 sg->dma_length = sg->length;
420 }
421 return nelems;
422}
423EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs);
424
425int
426xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
427 enum dma_data_direction dir)
428{
429 return xen_swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
430}
431EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg);
432
433/*
434 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
435 * concerning calls here are the same as for swiotlb_unmap_page() above.
436 */
437void
438xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
439 int nelems, enum dma_data_direction dir,
440 struct dma_attrs *attrs)
441{
442 struct scatterlist *sg;
443 int i;
444
445 BUG_ON(dir == DMA_NONE);
446
447 for_each_sg(sgl, sg, nelems, i)
448 xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
449
450}
451EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
452
453void
454xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
455 enum dma_data_direction dir)
456{
457 return xen_swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
458}
459EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg);
460
461/*
462 * Make physical memory consistent for a set of streaming mode DMA translations
463 * after a transfer.
464 *
465 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
466 * and usage.
467 */
468static void
469xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
470 int nelems, enum dma_data_direction dir,
471 enum dma_sync_target target)
472{
473 struct scatterlist *sg;
474 int i;
475
476 for_each_sg(sgl, sg, nelems, i)
477 xen_swiotlb_sync_single(hwdev, sg->dma_address,
478 sg->dma_length, dir, target);
479}
480
481void
482xen_swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
483 int nelems, enum dma_data_direction dir)
484{
485 xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
486}
487EXPORT_SYMBOL_GPL(xen_swiotlb_sync_sg_for_cpu);
488
489void
490xen_swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
491 int nelems, enum dma_data_direction dir)
492{
493 xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
494}
495EXPORT_SYMBOL_GPL(xen_swiotlb_sync_sg_for_device);
496
497int
498xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
499{
500 return !dma_addr;
501}
502EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mapping_error);
503
504/*
505 * Return whether the given device DMA address mask can be supported
506 * properly. For example, if your device can only drive the low 24-bits
507 * during bus mastering, then you would pass 0x00ffffff as the mask to
508 * this function.
509 */
510int
511xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
512{
513 return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask;
514}
515EXPORT_SYMBOL_GPL(xen_swiotlb_dma_supported);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index de05e96e0a70..01c2145118dc 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -7,6 +7,8 @@
7 7
8struct vm_area_struct; /* vma defining user mapping in mm_types.h */ 8struct vm_area_struct; /* vma defining user mapping in mm_types.h */
9 9
10extern bool vmap_lazy_unmap;
11
10/* bits in flags of vmalloc's vm_struct below */ 12/* bits in flags of vmalloc's vm_struct below */
11#define VM_IOREMAP 0x00000001 /* ioremap() and friends */ 13#define VM_IOREMAP 0x00000001 /* ioremap() and friends */
12#define VM_ALLOC 0x00000002 /* vmalloc() */ 14#define VM_ALLOC 0x00000002 /* vmalloc() */
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index af36ead16817..d3938d3e71f8 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -9,6 +9,8 @@
9#ifndef __XEN_PUBLIC_MEMORY_H__ 9#ifndef __XEN_PUBLIC_MEMORY_H__
10#define __XEN_PUBLIC_MEMORY_H__ 10#define __XEN_PUBLIC_MEMORY_H__
11 11
12#include <linux/spinlock.h>
13
12/* 14/*
13 * Increase or decrease the specified domain's memory reservation. Returns a 15 * Increase or decrease the specified domain's memory reservation. Returns a
14 * -ve errcode on failure, or the # extents successfully allocated or freed. 16 * -ve errcode on failure, or the # extents successfully allocated or freed.
@@ -53,6 +55,48 @@ struct xen_memory_reservation {
53DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); 55DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
54 56
55/* 57/*
58 * An atomic exchange of memory pages. If return code is zero then
59 * @out.extent_list provides GMFNs of the newly-allocated memory.
60 * Returns zero on complete success, otherwise a negative error code.
61 * On complete success then always @nr_exchanged == @in.nr_extents.
62 * On partial success @nr_exchanged indicates how much work was done.
63 */
64#define XENMEM_exchange 11
65struct xen_memory_exchange {
66 /*
67 * [IN] Details of memory extents to be exchanged (GMFN bases).
68 * Note that @in.address_bits is ignored and unused.
69 */
70 struct xen_memory_reservation in;
71
72 /*
73 * [IN/OUT] Details of new memory extents.
74 * We require that:
75 * 1. @in.domid == @out.domid
76 * 2. @in.nr_extents << @in.extent_order ==
77 * @out.nr_extents << @out.extent_order
78 * 3. @in.extent_start and @out.extent_start lists must not overlap
79 * 4. @out.extent_start lists GPFN bases to be populated
80 * 5. @out.extent_start is overwritten with allocated GMFN bases
81 */
82 struct xen_memory_reservation out;
83
84 /*
85 * [OUT] Number of input extents that were successfully exchanged:
86 * 1. The first @nr_exchanged input extents were successfully
87 * deallocated.
88 * 2. The corresponding first entries in the output extent list correctly
89 * indicate the GMFNs that were successfully exchanged.
90 * 3. All other input and output extents are untouched.
91 * 4. If not all input exents are exchanged then the return code of this
92 * command will be non-zero.
93 * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
94 */
95 unsigned long nr_exchanged;
96};
97
98DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange);
99/*
56 * Returns the maximum machine frame number of mapped RAM in this system. 100 * Returns the maximum machine frame number of mapped RAM in this system.
57 * This command always succeeds (it never returns an error code). 101 * This command always succeeds (it never returns an error code).
58 * arg == NULL. 102 * arg == NULL.
@@ -142,4 +186,10 @@ struct xen_translate_gpfn_list {
142}; 186};
143DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); 187DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
144 188
189
190/*
191 * Prevent the balloon driver from changing the memory reservation
192 * during a driver critical region.
193 */
194extern spinlock_t xen_reservation_lock;
145#endif /* __XEN_PUBLIC_MEMORY_H__ */ 195#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
new file mode 100644
index 000000000000..2ea2fdc79c16
--- /dev/null
+++ b/include/xen/swiotlb-xen.h
@@ -0,0 +1,65 @@
1#ifndef __LINUX_SWIOTLB_XEN_H
2#define __LINUX_SWIOTLB_XEN_H
3
4#include <linux/swiotlb.h>
5
6extern void xen_swiotlb_init(int verbose);
7
8extern void
9*xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
10 dma_addr_t *dma_handle, gfp_t flags);
11
12extern void
13xen_swiotlb_free_coherent(struct device *hwdev, size_t size,
14 void *vaddr, dma_addr_t dma_handle);
15
16extern dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
17 unsigned long offset, size_t size,
18 enum dma_data_direction dir,
19 struct dma_attrs *attrs);
20
21extern void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
22 size_t size, enum dma_data_direction dir,
23 struct dma_attrs *attrs);
24/*
25extern int
26xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
27 enum dma_data_direction dir);
28
29extern void
30xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
31 enum dma_data_direction dir);
32*/
33extern int
34xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
35 int nelems, enum dma_data_direction dir,
36 struct dma_attrs *attrs);
37
38extern void
39xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
40 int nelems, enum dma_data_direction dir,
41 struct dma_attrs *attrs);
42
43extern void
44xen_swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
45 size_t size, enum dma_data_direction dir);
46
47extern void
48xen_swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
49 int nelems, enum dma_data_direction dir);
50
51extern void
52xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
53 size_t size, enum dma_data_direction dir);
54
55extern void
56xen_swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
57 int nelems, enum dma_data_direction dir);
58
59extern int
60xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
61
62extern int
63xen_swiotlb_dma_supported(struct device *hwdev, u64 mask);
64
65#endif /* __LINUX_SWIOTLB_XEN_H */
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 46bc81ef74c6..351f4051f6d8 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -17,4 +17,10 @@ void xen_arch_resume(void);
17 17
18int xen_setup_shutdown_event(void); 18int xen_setup_shutdown_event(void);
19 19
20extern unsigned long *xen_contiguous_bitmap;
21int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
22 unsigned int address_bits);
23
24void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
25
20#endif /* INCLUDE_XEN_OPS_H */ 26#endif /* INCLUDE_XEN_OPS_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 918c51335d64..6b8889da69a6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,6 +31,7 @@
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <asm/shmparam.h> 32#include <asm/shmparam.h>
33 33
34bool vmap_lazy_unmap __read_mostly = true;
34 35
35/*** Page table manipulation functions ***/ 36/*** Page table manipulation functions ***/
36 37
@@ -502,6 +503,9 @@ static unsigned long lazy_max_pages(void)
502{ 503{
503 unsigned int log; 504 unsigned int log;
504 505
506 if (!vmap_lazy_unmap)
507 return 0;
508
505 log = fls(num_online_cpus()); 509 log = fls(num_online_cpus());
506 510
507 return log * (32UL * 1024 * 1024 / PAGE_SIZE); 511 return log * (32UL * 1024 * 1024 / PAGE_SIZE);