aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-16 18:36:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-16 18:36:00 -0400
commit08d19f51f05a68ce89a289320ce4ed96e757df72 (patch)
tree31c5d718d0aeaff5083fe533cd6e1f9fbbe846bb /include/linux
parent1c95e1b69073cff5ff179e592fa1a1e182c78a17 (diff)
parent2381ad241d0bea1253a37f314b270848067640bb (diff)
Merge branch 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (134 commits) KVM: ia64: Add intel iommu support for guests. KVM: ia64: add directed mmio range support for kvm guests KVM: ia64: Make pmt table be able to hold physical mmio entries. KVM: Move irqchip_in_kernel() from ioapic.h to irq.h KVM: Separate irq ack notification out of arch/x86/kvm/irq.c KVM: Change is_mmio_pfn to kvm_is_mmio_pfn, and make it common for all archs KVM: Move device assignment logic to common code KVM: Device Assignment: Move vtd.c from arch/x86/kvm/ to virt/kvm/ KVM: VMX: enable invlpg exiting if EPT is disabled KVM: x86: Silence various LAPIC-related host kernel messages KVM: Device Assignment: Map mmio pages into VT-d page table KVM: PIC: enhance IPI avoidance KVM: MMU: add "oos_shadow" parameter to disable oos KVM: MMU: speed up mmu_unsync_walk KVM: MMU: out of sync shadow core KVM: MMU: mmu_convert_notrap helper KVM: MMU: awareness of new kvm_mmu_zap_page behaviour KVM: MMU: mmu_parent_walk KVM: x86: trap invlpg KVM: MMU: sync roots on mmu reload ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/dma_remapping.h157
-rw-r--r--include/linux/intel-iommu.h327
-rw-r--r--include/linux/iova.h52
-rw-r--r--include/linux/kvm.h72
-rw-r--r--include/linux/kvm_host.h82
5 files changed, 684 insertions, 6 deletions
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
new file mode 100644
index 000000000000..bff5c65f81dc
--- /dev/null
+++ b/include/linux/dma_remapping.h
@@ -0,0 +1,157 @@
1#ifndef _DMA_REMAPPING_H
2#define _DMA_REMAPPING_H
3
4/*
5 * We need a fixed PAGE_SIZE of 4K irrespective of
6 * arch PAGE_SIZE for IOMMU page tables.
7 */
8#define PAGE_SHIFT_4K (12)
9#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
10#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
11#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
12
13#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
14#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
15#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
16
17
18/*
19 * 0: Present
20 * 1-11: Reserved
21 * 12-63: Context Ptr (12 - (haw-1))
22 * 64-127: Reserved
23 */
24struct root_entry {
25 u64 val;
26 u64 rsvd1;
27};
28#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
29static inline bool root_present(struct root_entry *root)
30{
31 return (root->val & 1);
32}
33static inline void set_root_present(struct root_entry *root)
34{
35 root->val |= 1;
36}
37static inline void set_root_value(struct root_entry *root, unsigned long value)
38{
39 root->val |= value & PAGE_MASK_4K;
40}
41
42struct context_entry;
43static inline struct context_entry *
44get_context_addr_from_root(struct root_entry *root)
45{
46 return (struct context_entry *)
47 (root_present(root)?phys_to_virt(
48 root->val & PAGE_MASK_4K):
49 NULL);
50}
51
52/*
53 * low 64 bits:
54 * 0: present
55 * 1: fault processing disable
56 * 2-3: translation type
57 * 12-63: address space root
58 * high 64 bits:
59 * 0-2: address width
60 * 3-6: aval
61 * 8-23: domain id
62 */
63struct context_entry {
64 u64 lo;
65 u64 hi;
66};
67#define context_present(c) ((c).lo & 1)
68#define context_fault_disable(c) (((c).lo >> 1) & 1)
69#define context_translation_type(c) (((c).lo >> 2) & 3)
70#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
71#define context_address_width(c) ((c).hi & 7)
72#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
73
74#define context_set_present(c) do {(c).lo |= 1;} while (0)
75#define context_set_fault_enable(c) \
76 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
77#define context_set_translation_type(c, val) \
78 do { \
79 (c).lo &= (((u64)-1) << 4) | 3; \
80 (c).lo |= ((val) & 3) << 2; \
81 } while (0)
82#define CONTEXT_TT_MULTI_LEVEL 0
83#define context_set_address_root(c, val) \
84 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
85#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
86#define context_set_domain_id(c, val) \
87 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
88#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
89
90/*
91 * 0: readable
92 * 1: writable
93 * 2-6: reserved
94 * 7: super page
95 * 8-11: available
96 * 12-63: Host physcial address
97 */
98struct dma_pte {
99 u64 val;
100};
101#define dma_clear_pte(p) do {(p).val = 0;} while (0)
102
103#define DMA_PTE_READ (1)
104#define DMA_PTE_WRITE (2)
105
106#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
107#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
108#define dma_set_pte_prot(p, prot) \
109 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
110#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
111#define dma_set_pte_addr(p, addr) do {\
112 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
113#define dma_pte_present(p) (((p).val & 3) != 0)
114
115struct intel_iommu;
116
117struct dmar_domain {
118 int id; /* domain id */
119 struct intel_iommu *iommu; /* back pointer to owning iommu */
120
121 struct list_head devices; /* all devices' list */
122 struct iova_domain iovad; /* iova's that belong to this domain */
123
124 struct dma_pte *pgd; /* virtual address */
125 spinlock_t mapping_lock; /* page table lock */
126 int gaw; /* max guest address width */
127
128 /* adjusted guest address width, 0 is level 2 30-bit */
129 int agaw;
130
131#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
132 int flags;
133};
134
135/* PCI domain-device relationship */
136struct device_domain_info {
137 struct list_head link; /* link to domain siblings */
138 struct list_head global; /* link to global list */
139 u8 bus; /* PCI bus numer */
140 u8 devfn; /* PCI devfn number */
141 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
142 struct dmar_domain *domain; /* pointer to domain */
143};
144
145extern int init_dmars(void);
146extern void free_dmar_iommu(struct intel_iommu *iommu);
147
148extern int dmar_disabled;
149
150#ifndef CONFIG_DMAR_GFX_WA
151static inline void iommu_prepare_gfx_mapping(void)
152{
153 return;
154}
155#endif /* !CONFIG_DMAR_GFX_WA */
156
157#endif
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
new file mode 100644
index 000000000000..2e117f30a76c
--- /dev/null
+++ b/include/linux/intel-iommu.h
@@ -0,0 +1,327 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20 */
21
22#ifndef _INTEL_IOMMU_H_
23#define _INTEL_IOMMU_H_
24
25#include <linux/types.h>
26#include <linux/msi.h>
27#include <linux/sysdev.h>
28#include <linux/iova.h>
29#include <linux/io.h>
30#include <linux/dma_remapping.h>
31#include <asm/cacheflush.h>
32
33/*
34 * Intel IOMMU register specification per version 1.0 public spec.
35 */
36
37#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */
38#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */
39#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */
40#define DMAR_GCMD_REG 0x18 /* Global command register */
41#define DMAR_GSTS_REG 0x1c /* Global status register */
42#define DMAR_RTADDR_REG 0x20 /* Root entry table */
43#define DMAR_CCMD_REG 0x28 /* Context command reg */
44#define DMAR_FSTS_REG 0x34 /* Fault Status register */
45#define DMAR_FECTL_REG 0x38 /* Fault control register */
46#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */
47#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */
48#define DMAR_FEUADDR_REG 0x44 /* Upper address register */
49#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */
50#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */
51#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */
52#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
53#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
54#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
55#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
56#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
57#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
58#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
59#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
60
61#define OFFSET_STRIDE (9)
62/*
63#define dmar_readl(dmar, reg) readl(dmar + reg)
64#define dmar_readq(dmar, reg) ({ \
65 u32 lo, hi; \
66 lo = readl(dmar + reg); \
67 hi = readl(dmar + reg + 4); \
68 (((u64) hi) << 32) + lo; })
69*/
70static inline u64 dmar_readq(void __iomem *addr)
71{
72 u32 lo, hi;
73 lo = readl(addr);
74 hi = readl(addr + 4);
75 return (((u64) hi) << 32) + lo;
76}
77
78static inline void dmar_writeq(void __iomem *addr, u64 val)
79{
80 writel((u32)val, addr);
81 writel((u32)(val >> 32), addr + 4);
82}
83
84#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
85#define DMAR_VER_MINOR(v) ((v) & 0x0f)
86
87/*
88 * Decoding Capability Register
89 */
90#define cap_read_drain(c) (((c) >> 55) & 1)
91#define cap_write_drain(c) (((c) >> 54) & 1)
92#define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
93#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1)
94#define cap_pgsel_inv(c) (((c) >> 39) & 1)
95
96#define cap_super_page_val(c) (((c) >> 34) & 0xf)
97#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \
98 * OFFSET_STRIDE) + 21)
99
100#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16)
101#define cap_max_fault_reg_offset(c) \
102 (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
103
104#define cap_zlr(c) (((c) >> 22) & 1)
105#define cap_isoch(c) (((c) >> 23) & 1)
106#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1)
107#define cap_sagaw(c) (((c) >> 8) & 0x1f)
108#define cap_caching_mode(c) (((c) >> 7) & 1)
109#define cap_phmr(c) (((c) >> 6) & 1)
110#define cap_plmr(c) (((c) >> 5) & 1)
111#define cap_rwbf(c) (((c) >> 4) & 1)
112#define cap_afl(c) (((c) >> 3) & 1)
113#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
114/*
115 * Extended Capability Register
116 */
117
118#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1)
119#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
120#define ecap_max_iotlb_offset(e) \
121 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
122#define ecap_coherent(e) ((e) & 0x1)
123#define ecap_qis(e) ((e) & 0x2)
124#define ecap_eim_support(e) ((e >> 4) & 0x1)
125#define ecap_ir_support(e) ((e >> 3) & 0x1)
126#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
127
128
129/* IOTLB_REG */
130#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
131#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
132#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
133#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
134#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
135#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
136#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
137#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32)
138#define DMA_TLB_IVT (((u64)1) << 63)
139#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
140#define DMA_TLB_MAX_SIZE (0x3f)
141
142/* INVALID_DESC */
143#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
144#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
145#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
146#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
147#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
148#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
149#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
150#define DMA_ID_TLB_ADDR(addr) (addr)
151#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
152
153/* PMEN_REG */
154#define DMA_PMEN_EPM (((u32)1)<<31)
155#define DMA_PMEN_PRS (((u32)1)<<0)
156
157/* GCMD_REG */
158#define DMA_GCMD_TE (((u32)1) << 31)
159#define DMA_GCMD_SRTP (((u32)1) << 30)
160#define DMA_GCMD_SFL (((u32)1) << 29)
161#define DMA_GCMD_EAFL (((u32)1) << 28)
162#define DMA_GCMD_WBF (((u32)1) << 27)
163#define DMA_GCMD_QIE (((u32)1) << 26)
164#define DMA_GCMD_SIRTP (((u32)1) << 24)
165#define DMA_GCMD_IRE (((u32) 1) << 25)
166
167/* GSTS_REG */
168#define DMA_GSTS_TES (((u32)1) << 31)
169#define DMA_GSTS_RTPS (((u32)1) << 30)
170#define DMA_GSTS_FLS (((u32)1) << 29)
171#define DMA_GSTS_AFLS (((u32)1) << 28)
172#define DMA_GSTS_WBFS (((u32)1) << 27)
173#define DMA_GSTS_QIES (((u32)1) << 26)
174#define DMA_GSTS_IRTPS (((u32)1) << 24)
175#define DMA_GSTS_IRES (((u32)1) << 25)
176
177/* CCMD_REG */
178#define DMA_CCMD_ICC (((u64)1) << 63)
179#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
180#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
181#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
182#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
183#define DMA_CCMD_MASK_NOBIT 0
184#define DMA_CCMD_MASK_1BIT 1
185#define DMA_CCMD_MASK_2BIT 2
186#define DMA_CCMD_MASK_3BIT 3
187#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
188#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
189
190/* FECTL_REG */
191#define DMA_FECTL_IM (((u32)1) << 31)
192
193/* FSTS_REG */
194#define DMA_FSTS_PPF ((u32)2)
195#define DMA_FSTS_PFO ((u32)1)
196#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
197
198/* FRCD_REG, 32 bits access */
199#define DMA_FRCD_F (((u32)1) << 31)
200#define dma_frcd_type(d) ((d >> 30) & 1)
201#define dma_frcd_fault_reason(c) (c & 0xff)
202#define dma_frcd_source_id(c) (c & 0xffff)
203#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
204
205#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
206
207#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
208{\
209 cycles_t start_time = get_cycles();\
210 while (1) {\
211 sts = op (iommu->reg + offset);\
212 if (cond)\
213 break;\
214 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
215 panic("DMAR hardware is malfunctioning\n");\
216 cpu_relax();\
217 }\
218}
219
220#define QI_LENGTH 256 /* queue length */
221
222enum {
223 QI_FREE,
224 QI_IN_USE,
225 QI_DONE
226};
227
228#define QI_CC_TYPE 0x1
229#define QI_IOTLB_TYPE 0x2
230#define QI_DIOTLB_TYPE 0x3
231#define QI_IEC_TYPE 0x4
232#define QI_IWD_TYPE 0x5
233
234#define QI_IEC_SELECTIVE (((u64)1) << 4)
235#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
236#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
237
238#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
239#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
240
241struct qi_desc {
242 u64 low, high;
243};
244
245struct q_inval {
246 spinlock_t q_lock;
247 struct qi_desc *desc; /* invalidation queue */
248 int *desc_status; /* desc status */
249 int free_head; /* first free entry */
250 int free_tail; /* last free entry */
251 int free_cnt;
252};
253
254#ifdef CONFIG_INTR_REMAP
255/* 1MB - maximum possible interrupt remapping table size */
256#define INTR_REMAP_PAGE_ORDER 8
257#define INTR_REMAP_TABLE_REG_SIZE 0xf
258
259#define INTR_REMAP_TABLE_ENTRIES 65536
260
261struct ir_table {
262 struct irte *base;
263};
264#endif
265
266struct intel_iommu {
267 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
268 u64 cap;
269 u64 ecap;
270 int seg;
271 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
272 spinlock_t register_lock; /* protect register handling */
273 int seq_id; /* sequence id of the iommu */
274
275#ifdef CONFIG_DMAR
276 unsigned long *domain_ids; /* bitmap of domains */
277 struct dmar_domain **domains; /* ptr to domains */
278 spinlock_t lock; /* protect context, domain ids */
279 struct root_entry *root_entry; /* virtual address */
280
281 unsigned int irq;
282 unsigned char name[7]; /* Device Name */
283 struct msi_msg saved_msg;
284 struct sys_device sysdev;
285#endif
286 struct q_inval *qi; /* Queued invalidation info */
287#ifdef CONFIG_INTR_REMAP
288 struct ir_table *ir_table; /* Interrupt remapping info */
289#endif
290};
291
292static inline void __iommu_flush_cache(
293 struct intel_iommu *iommu, void *addr, int size)
294{
295 if (!ecap_coherent(iommu->ecap))
296 clflush_cache_range(addr, size);
297}
298
299extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
300
301extern int alloc_iommu(struct dmar_drhd_unit *drhd);
302extern void free_iommu(struct intel_iommu *iommu);
303extern int dmar_enable_qi(struct intel_iommu *iommu);
304extern void qi_global_iec(struct intel_iommu *iommu);
305
306extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
307
308void intel_iommu_domain_exit(struct dmar_domain *domain);
309struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
310int intel_iommu_context_mapping(struct dmar_domain *domain,
311 struct pci_dev *pdev);
312int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
313 u64 hpa, size_t size, int prot);
314void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
315struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
316u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
317
318#ifdef CONFIG_DMAR
319int intel_iommu_found(void);
320#else /* CONFIG_DMAR */
321static inline int intel_iommu_found(void)
322{
323 return 0;
324}
325#endif /* CONFIG_DMAR */
326
327#endif
diff --git a/include/linux/iova.h b/include/linux/iova.h
new file mode 100644
index 000000000000..228f6c94b69c
--- /dev/null
+++ b/include/linux/iova.h
@@ -0,0 +1,52 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This file is released under the GPLv2.
5 *
6 * Copyright (C) 2006-2008 Intel Corporation
7 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
8 *
9 */
10
11#ifndef _IOVA_H_
12#define _IOVA_H_
13
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/rbtree.h>
17#include <linux/dma-mapping.h>
18
19/* IO virtual address start page frame number */
20#define IOVA_START_PFN (1)
21
22/* iova structure */
23struct iova {
24 struct rb_node node;
25 unsigned long pfn_hi; /* IOMMU dish out addr hi */
26 unsigned long pfn_lo; /* IOMMU dish out addr lo */
27};
28
29/* holds all the iova translations for a domain */
30struct iova_domain {
31 spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */
32 spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */
33 struct rb_root rbroot; /* iova domain rbtree root */
34 struct rb_node *cached32_node; /* Save last alloced node */
35 unsigned long dma_32bit_pfn;
36};
37
38struct iova *alloc_iova_mem(void);
39void free_iova_mem(struct iova *iova);
40void free_iova(struct iova_domain *iovad, unsigned long pfn);
41void __free_iova(struct iova_domain *iovad, struct iova *iova);
42struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
43 unsigned long limit_pfn,
44 bool size_aligned);
45struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
46 unsigned long pfn_hi);
47void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
48void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
49struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
50void put_iova_domain(struct iova_domain *iovad);
51
52#endif
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 70a30651cd12..797fcd781242 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -311,22 +311,33 @@ struct kvm_s390_interrupt {
311 311
312/* This structure represents a single trace buffer record. */ 312/* This structure represents a single trace buffer record. */
313struct kvm_trace_rec { 313struct kvm_trace_rec {
314 __u32 event:28; 314 /* variable rec_val
315 __u32 extra_u32:3; 315 * is split into:
316 __u32 cycle_in:1; 316 * bits 0 - 27 -> event id
317 * bits 28 -30 -> number of extra data args of size u32
318 * bits 31 -> binary indicator for if tsc is in record
319 */
320 __u32 rec_val;
317 __u32 pid; 321 __u32 pid;
318 __u32 vcpu_id; 322 __u32 vcpu_id;
319 union { 323 union {
320 struct { 324 struct {
321 __u64 cycle_u64; 325 __u64 timestamp;
322 __u32 extra_u32[KVM_TRC_EXTRA_MAX]; 326 __u32 extra_u32[KVM_TRC_EXTRA_MAX];
323 } __attribute__((packed)) cycle; 327 } __attribute__((packed)) timestamp;
324 struct { 328 struct {
325 __u32 extra_u32[KVM_TRC_EXTRA_MAX]; 329 __u32 extra_u32[KVM_TRC_EXTRA_MAX];
326 } nocycle; 330 } notimestamp;
327 } u; 331 } u;
328}; 332};
329 333
334#define TRACE_REC_EVENT_ID(val) \
335 (0x0fffffff & (val))
336#define TRACE_REC_NUM_DATA_ARGS(val) \
337 (0x70000000 & ((val) << 28))
338#define TRACE_REC_TCS(val) \
339 (0x80000000 & ((val) << 31))
340
330#define KVMIO 0xAE 341#define KVMIO 0xAE
331 342
332/* 343/*
@@ -372,6 +383,10 @@ struct kvm_trace_rec {
372#define KVM_CAP_MP_STATE 14 383#define KVM_CAP_MP_STATE 14
373#define KVM_CAP_COALESCED_MMIO 15 384#define KVM_CAP_COALESCED_MMIO 15
374#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ 385#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */
386#if defined(CONFIG_X86)||defined(CONFIG_IA64)
387#define KVM_CAP_DEVICE_ASSIGNMENT 17
388#endif
389#define KVM_CAP_IOMMU 18
375 390
376/* 391/*
377 * ioctls for VM fds 392 * ioctls for VM fds
@@ -401,6 +416,10 @@ struct kvm_trace_rec {
401 _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) 416 _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
402#define KVM_UNREGISTER_COALESCED_MMIO \ 417#define KVM_UNREGISTER_COALESCED_MMIO \
403 _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) 418 _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
419#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
420 struct kvm_assigned_pci_dev)
421#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
422 struct kvm_assigned_irq)
404 423
405/* 424/*
406 * ioctls for vcpu fds 425 * ioctls for vcpu fds
@@ -440,4 +459,45 @@ struct kvm_trace_rec {
440#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) 459#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
441#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) 460#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
442 461
462#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
463#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
464#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
465#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
466#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
467#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
468#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
469#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
470#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
471#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
472#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
473#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
474#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
475#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
476#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
477#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
478#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
479#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
480#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
481#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
482#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
483#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
484#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
485#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
486
487struct kvm_assigned_pci_dev {
488 __u32 assigned_dev_id;
489 __u32 busnr;
490 __u32 devfn;
491 __u32 flags;
492};
493
494struct kvm_assigned_irq {
495 __u32 assigned_dev_id;
496 __u32 host_irq;
497 __u32 guest_irq;
498 __u32 flags;
499};
500
501#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
502
443#endif 503#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8525afc53107..3833c48fae3a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,8 @@
34#define KVM_REQ_MMU_RELOAD 3 34#define KVM_REQ_MMU_RELOAD 3
35#define KVM_REQ_TRIPLE_FAULT 4 35#define KVM_REQ_TRIPLE_FAULT 4
36#define KVM_REQ_PENDING_TIMER 5 36#define KVM_REQ_PENDING_TIMER 5
37#define KVM_REQ_UNHALT 6
38#define KVM_REQ_MMU_SYNC 7
37 39
38struct kvm_vcpu; 40struct kvm_vcpu;
39extern struct kmem_cache *kvm_vcpu_cache; 41extern struct kmem_cache *kvm_vcpu_cache;
@@ -279,12 +281,68 @@ void kvm_free_physmem(struct kvm *kvm);
279 281
280struct kvm *kvm_arch_create_vm(void); 282struct kvm *kvm_arch_create_vm(void);
281void kvm_arch_destroy_vm(struct kvm *kvm); 283void kvm_arch_destroy_vm(struct kvm *kvm);
284void kvm_free_all_assigned_devices(struct kvm *kvm);
282 285
283int kvm_cpu_get_interrupt(struct kvm_vcpu *v); 286int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
284int kvm_cpu_has_interrupt(struct kvm_vcpu *v); 287int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
285int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); 288int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
286void kvm_vcpu_kick(struct kvm_vcpu *vcpu); 289void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
287 290
291int kvm_is_mmio_pfn(pfn_t pfn);
292
293struct kvm_irq_ack_notifier {
294 struct hlist_node link;
295 unsigned gsi;
296 void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
297};
298
299struct kvm_assigned_dev_kernel {
300 struct kvm_irq_ack_notifier ack_notifier;
301 struct work_struct interrupt_work;
302 struct list_head list;
303 int assigned_dev_id;
304 int host_busnr;
305 int host_devfn;
306 int host_irq;
307 int guest_irq;
308 int irq_requested;
309 struct pci_dev *dev;
310 struct kvm *kvm;
311};
312void kvm_set_irq(struct kvm *kvm, int irq, int level);
313void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
314void kvm_register_irq_ack_notifier(struct kvm *kvm,
315 struct kvm_irq_ack_notifier *kian);
316void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
317 struct kvm_irq_ack_notifier *kian);
318
319#ifdef CONFIG_DMAR
320int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
321 unsigned long npages);
322int kvm_iommu_map_guest(struct kvm *kvm,
323 struct kvm_assigned_dev_kernel *assigned_dev);
324int kvm_iommu_unmap_guest(struct kvm *kvm);
325#else /* CONFIG_DMAR */
326static inline int kvm_iommu_map_pages(struct kvm *kvm,
327 gfn_t base_gfn,
328 unsigned long npages)
329{
330 return 0;
331}
332
333static inline int kvm_iommu_map_guest(struct kvm *kvm,
334 struct kvm_assigned_dev_kernel
335 *assigned_dev)
336{
337 return -ENODEV;
338}
339
340static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
341{
342 return 0;
343}
344#endif /* CONFIG_DMAR */
345
288static inline void kvm_guest_enter(void) 346static inline void kvm_guest_enter(void)
289{ 347{
290 account_system_vtime(current); 348 account_system_vtime(current);
@@ -307,6 +365,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
307 return (gpa_t)gfn << PAGE_SHIFT; 365 return (gpa_t)gfn << PAGE_SHIFT;
308} 366}
309 367
368static inline hpa_t pfn_to_hpa(pfn_t pfn)
369{
370 return (hpa_t)pfn << PAGE_SHIFT;
371}
372
310static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) 373static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
311{ 374{
312 set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); 375 set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
@@ -326,6 +389,25 @@ struct kvm_stats_debugfs_item {
326extern struct kvm_stats_debugfs_item debugfs_entries[]; 389extern struct kvm_stats_debugfs_item debugfs_entries[];
327extern struct dentry *kvm_debugfs_dir; 390extern struct dentry *kvm_debugfs_dir;
328 391
392#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \
393 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
394 vcpu, 5, d1, d2, d3, d4, d5)
395#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \
396 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
397 vcpu, 4, d1, d2, d3, d4, 0)
398#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \
399 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
400 vcpu, 3, d1, d2, d3, 0, 0)
401#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \
402 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
403 vcpu, 2, d1, d2, 0, 0, 0)
404#define KVMTRACE_1D(evt, vcpu, d1, name) \
405 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
406 vcpu, 1, d1, 0, 0, 0, 0)
407#define KVMTRACE_0D(evt, vcpu, name) \
408 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
409 vcpu, 0, 0, 0, 0, 0, 0)
410
329#ifdef CONFIG_KVM_TRACE 411#ifdef CONFIG_KVM_TRACE
330int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); 412int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg);
331void kvm_trace_cleanup(void); 413void kvm_trace_cleanup(void);