aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 22:17:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 22:17:27 -0400
commita40a1d3d0a2fd613fdec6d89d3c053268ced76ed (patch)
treeed8ea274f82e0cb645632c9b1ddcd1a6afc40bad /include/linux
parent3e9a97082fa639394e905e1fc4a0a7f719ca7644 (diff)
parent89e1f7d4c66d85f42c3d52ea3866eb10cadf6153 (diff)
Merge tag 'vfio-for-v3.6' of git://github.com/awilliam/linux-vfio
Pull VFIO core from Alex Williamson: "This series includes the VFIO userspace driver interface for the 3.6 kernel merge window. This driver is intended to provide a secure interface for device access using IOMMU protection for applications like assignment of physical devices to virtual machines. Qemu will be the first user of this interface, enabling assignment of PCI devices to Qemu guests. This interface is intended to eventually replace the x86-specific assignment mechanism currently available in KVM. This interface has the advantage of being more secure, by working with IOMMU groups to ensure device isolation and providing it's own filtered resource access mechanism, and also more flexible, in not being x86 or KVM specific (extensions to enable POWER are already working). This driver is originally the work of Tom Lyon, but has since been handed over to me and gone through a complete overhaul thanks to the input from David Gibson, Ben Herrenschmidt, Chris Wright, Joerg Roedel, and others. This driver has been available in linux-next for the last month." Paul Mackerras says: "I would be glad to see it go in since we want to use it with KVM on PowerPC. If possible we'd like the PowerPC bits for it to go in as well." * tag 'vfio-for-v3.6' of git://github.com/awilliam/linux-vfio: vfio: Add PCI device driver vfio: Type1 IOMMU implementation vfio: Add documentation vfio: VFIO core
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/vfio.h445
1 files changed, 445 insertions, 0 deletions
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
new file mode 100644
index 00000000000..0a4f180a11d
--- /dev/null
+++ b/include/linux/vfio.h
@@ -0,0 +1,445 @@
1/*
2 * VFIO API definition
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#ifndef VFIO_H
12#define VFIO_H
13
14#include <linux/types.h>
15#include <linux/ioctl.h>
16
17#define VFIO_API_VERSION 0
18
19#ifdef __KERNEL__ /* Internal VFIO-core/bus driver API */
20
21#include <linux/iommu.h>
22#include <linux/mm.h>
23
24/**
25 * struct vfio_device_ops - VFIO bus driver device callbacks
26 *
27 * @open: Called when userspace creates new file descriptor for device
28 * @release: Called when userspace releases file descriptor for device
29 * @read: Perform read(2) on device file descriptor
30 * @write: Perform write(2) on device file descriptor
31 * @ioctl: Perform ioctl(2) on device file descriptor, supporting VFIO_DEVICE_*
32 * operations documented below
33 * @mmap: Perform mmap(2) on a region of the device file descriptor
34 */
35struct vfio_device_ops {
36 char *name;
37 int (*open)(void *device_data);
38 void (*release)(void *device_data);
39 ssize_t (*read)(void *device_data, char __user *buf,
40 size_t count, loff_t *ppos);
41 ssize_t (*write)(void *device_data, const char __user *buf,
42 size_t count, loff_t *size);
43 long (*ioctl)(void *device_data, unsigned int cmd,
44 unsigned long arg);
45 int (*mmap)(void *device_data, struct vm_area_struct *vma);
46};
47
48extern int vfio_add_group_dev(struct device *dev,
49 const struct vfio_device_ops *ops,
50 void *device_data);
51
52extern void *vfio_del_group_dev(struct device *dev);
53
54/**
55 * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
56 */
57struct vfio_iommu_driver_ops {
58 char *name;
59 struct module *owner;
60 void *(*open)(unsigned long arg);
61 void (*release)(void *iommu_data);
62 ssize_t (*read)(void *iommu_data, char __user *buf,
63 size_t count, loff_t *ppos);
64 ssize_t (*write)(void *iommu_data, const char __user *buf,
65 size_t count, loff_t *size);
66 long (*ioctl)(void *iommu_data, unsigned int cmd,
67 unsigned long arg);
68 int (*mmap)(void *iommu_data, struct vm_area_struct *vma);
69 int (*attach_group)(void *iommu_data,
70 struct iommu_group *group);
71 void (*detach_group)(void *iommu_data,
72 struct iommu_group *group);
73
74};
75
76extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
77
78extern void vfio_unregister_iommu_driver(
79 const struct vfio_iommu_driver_ops *ops);
80
81/**
82 * offsetofend(TYPE, MEMBER)
83 *
84 * @TYPE: The type of the structure
85 * @MEMBER: The member within the structure to get the end offset of
86 *
87 * Simple helper macro for dealing with variable sized structures passed
88 * from user space. This allows us to easily determine if the provided
89 * structure is sized to include various fields.
90 */
91#define offsetofend(TYPE, MEMBER) ({ \
92 TYPE tmp; \
93 offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); }) \
94
95#endif /* __KERNEL__ */
96
97/* Kernel & User level defines for VFIO IOCTLs. */
98
99/* Extensions */
100
101#define VFIO_TYPE1_IOMMU 1
102
103/*
104 * The IOCTL interface is designed for extensibility by embedding the
105 * structure length (argsz) and flags into structures passed between
106 * kernel and userspace. We therefore use the _IO() macro for these
107 * defines to avoid implicitly embedding a size into the ioctl request.
108 * As structure fields are added, argsz will increase to match and flag
109 * bits will be defined to indicate additional fields with valid data.
110 * It's *always* the caller's responsibility to indicate the size of
111 * the structure passed by setting argsz appropriately.
112 */
113
114#define VFIO_TYPE (';')
115#define VFIO_BASE 100
116
117/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
118
119/**
120 * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
121 *
122 * Report the version of the VFIO API. This allows us to bump the entire
123 * API version should we later need to add or change features in incompatible
124 * ways.
125 * Return: VFIO_API_VERSION
126 * Availability: Always
127 */
128#define VFIO_GET_API_VERSION _IO(VFIO_TYPE, VFIO_BASE + 0)
129
130/**
131 * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
132 *
133 * Check whether an extension is supported.
134 * Return: 0 if not supported, 1 (or some other positive integer) if supported.
135 * Availability: Always
136 */
137#define VFIO_CHECK_EXTENSION _IO(VFIO_TYPE, VFIO_BASE + 1)
138
139/**
140 * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
141 *
142 * Set the iommu to the given type. The type must be supported by an
143 * iommu driver as verified by calling CHECK_EXTENSION using the same
144 * type. A group must be set to this file descriptor before this
145 * ioctl is available. The IOMMU interfaces enabled by this call are
146 * specific to the value set.
147 * Return: 0 on success, -errno on failure
148 * Availability: When VFIO group attached
149 */
150#define VFIO_SET_IOMMU _IO(VFIO_TYPE, VFIO_BASE + 2)
151
152/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
153
154/**
155 * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
156 * struct vfio_group_status)
157 *
158 * Retrieve information about the group. Fills in provided
159 * struct vfio_group_info. Caller sets argsz.
160 * Return: 0 on succes, -errno on failure.
161 * Availability: Always
162 */
163struct vfio_group_status {
164 __u32 argsz;
165 __u32 flags;
166#define VFIO_GROUP_FLAGS_VIABLE (1 << 0)
167#define VFIO_GROUP_FLAGS_CONTAINER_SET (1 << 1)
168};
169#define VFIO_GROUP_GET_STATUS _IO(VFIO_TYPE, VFIO_BASE + 3)
170
171/**
172 * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
173 *
174 * Set the container for the VFIO group to the open VFIO file
175 * descriptor provided. Groups may only belong to a single
176 * container. Containers may, at their discretion, support multiple
177 * groups. Only when a container is set are all of the interfaces
178 * of the VFIO file descriptor and the VFIO group file descriptor
179 * available to the user.
180 * Return: 0 on success, -errno on failure.
181 * Availability: Always
182 */
183#define VFIO_GROUP_SET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 4)
184
185/**
186 * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
187 *
188 * Remove the group from the attached container. This is the
189 * opposite of the SET_CONTAINER call and returns the group to
190 * an initial state. All device file descriptors must be released
191 * prior to calling this interface. When removing the last group
192 * from a container, the IOMMU will be disabled and all state lost,
193 * effectively also returning the VFIO file descriptor to an initial
194 * state.
195 * Return: 0 on success, -errno on failure.
196 * Availability: When attached to container
197 */
198#define VFIO_GROUP_UNSET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 5)
199
200/**
201 * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
202 *
203 * Return a new file descriptor for the device object described by
204 * the provided string. The string should match a device listed in
205 * the devices subdirectory of the IOMMU group sysfs entry. The
206 * group containing the device must already be added to this context.
207 * Return: new file descriptor on success, -errno on failure.
208 * Availability: When attached to container
209 */
210#define VFIO_GROUP_GET_DEVICE_FD _IO(VFIO_TYPE, VFIO_BASE + 6)
211
212/* --------------- IOCTLs for DEVICE file descriptors --------------- */
213
214/**
215 * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
216 * struct vfio_device_info)
217 *
218 * Retrieve information about the device. Fills in provided
219 * struct vfio_device_info. Caller sets argsz.
220 * Return: 0 on success, -errno on failure.
221 */
222struct vfio_device_info {
223 __u32 argsz;
224 __u32 flags;
225#define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */
226#define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */
227 __u32 num_regions; /* Max region index + 1 */
228 __u32 num_irqs; /* Max IRQ index + 1 */
229};
230#define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7)
231
232/**
233 * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
234 * struct vfio_region_info)
235 *
236 * Retrieve information about a device region. Caller provides
237 * struct vfio_region_info with index value set. Caller sets argsz.
238 * Implementation of region mapping is bus driver specific. This is
239 * intended to describe MMIO, I/O port, as well as bus specific
240 * regions (ex. PCI config space). Zero sized regions may be used
241 * to describe unimplemented regions (ex. unimplemented PCI BARs).
242 * Return: 0 on success, -errno on failure.
243 */
244struct vfio_region_info {
245 __u32 argsz;
246 __u32 flags;
247#define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */
248#define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */
249#define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */
250 __u32 index; /* Region index */
251 __u32 resv; /* Reserved for alignment */
252 __u64 size; /* Region size (bytes) */
253 __u64 offset; /* Region offset from start of device fd */
254};
255#define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8)
256
257/**
258 * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
259 * struct vfio_irq_info)
260 *
261 * Retrieve information about a device IRQ. Caller provides
262 * struct vfio_irq_info with index value set. Caller sets argsz.
263 * Implementation of IRQ mapping is bus driver specific. Indexes
264 * using multiple IRQs are primarily intended to support MSI-like
265 * interrupt blocks. Zero count irq blocks may be used to describe
266 * unimplemented interrupt types.
267 *
268 * The EVENTFD flag indicates the interrupt index supports eventfd based
269 * signaling.
270 *
271 * The MASKABLE flags indicates the index supports MASK and UNMASK
272 * actions described below.
273 *
274 * AUTOMASKED indicates that after signaling, the interrupt line is
275 * automatically masked by VFIO and the user needs to unmask the line
276 * to receive new interrupts. This is primarily intended to distinguish
277 * level triggered interrupts.
278 *
279 * The NORESIZE flag indicates that the interrupt lines within the index
280 * are setup as a set and new subindexes cannot be enabled without first
281 * disabling the entire index. This is used for interrupts like PCI MSI
282 * and MSI-X where the driver may only use a subset of the available
283 * indexes, but VFIO needs to enable a specific number of vectors
284 * upfront. In the case of MSI-X, where the user can enable MSI-X and
285 * then add and unmask vectors, it's up to userspace to make the decision
286 * whether to allocate the maximum supported number of vectors or tear
287 * down setup and incrementally increase the vectors as each is enabled.
288 */
289struct vfio_irq_info {
290 __u32 argsz;
291 __u32 flags;
292#define VFIO_IRQ_INFO_EVENTFD (1 << 0)
293#define VFIO_IRQ_INFO_MASKABLE (1 << 1)
294#define VFIO_IRQ_INFO_AUTOMASKED (1 << 2)
295#define VFIO_IRQ_INFO_NORESIZE (1 << 3)
296 __u32 index; /* IRQ index */
297 __u32 count; /* Number of IRQs within this index */
298};
299#define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9)
300
301/**
302 * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
303 *
304 * Set signaling, masking, and unmasking of interrupts. Caller provides
305 * struct vfio_irq_set with all fields set. 'start' and 'count' indicate
306 * the range of subindexes being specified.
307 *
308 * The DATA flags specify the type of data provided. If DATA_NONE, the
309 * operation performs the specified action immediately on the specified
310 * interrupt(s). For example, to unmask AUTOMASKED interrupt [0,0]:
311 * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
312 *
313 * DATA_BOOL allows sparse support for the same on arrays of interrupts.
314 * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
315 * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
316 * data = {1,0,1}
317 *
318 * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
319 * A value of -1 can be used to either de-assign interrupts if already
320 * assigned or skip un-assigned interrupts. For example, to set an eventfd
321 * to be trigger for interrupts [0,0] and [0,2]:
322 * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
323 * data = {fd1, -1, fd2}
324 * If index [0,1] is previously set, two count = 1 ioctls calls would be
325 * required to set [0,0] and [0,2] without changing [0,1].
326 *
327 * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
328 * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
329 * from userspace (ie. simulate hardware triggering).
330 *
331 * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
332 * enables the interrupt index for the device. Individual subindex interrupts
333 * can be disabled using the -1 value for DATA_EVENTFD or the index can be
334 * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
335 *
336 * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
337 * ACTION_TRIGGER specifies kernel->user signaling.
338 */
339struct vfio_irq_set {
340 __u32 argsz;
341 __u32 flags;
342#define VFIO_IRQ_SET_DATA_NONE (1 << 0) /* Data not present */
343#define VFIO_IRQ_SET_DATA_BOOL (1 << 1) /* Data is bool (u8) */
344#define VFIO_IRQ_SET_DATA_EVENTFD (1 << 2) /* Data is eventfd (s32) */
345#define VFIO_IRQ_SET_ACTION_MASK (1 << 3) /* Mask interrupt */
346#define VFIO_IRQ_SET_ACTION_UNMASK (1 << 4) /* Unmask interrupt */
347#define VFIO_IRQ_SET_ACTION_TRIGGER (1 << 5) /* Trigger interrupt */
348 __u32 index;
349 __u32 start;
350 __u32 count;
351 __u8 data[];
352};
353#define VFIO_DEVICE_SET_IRQS _IO(VFIO_TYPE, VFIO_BASE + 10)
354
355#define VFIO_IRQ_SET_DATA_TYPE_MASK (VFIO_IRQ_SET_DATA_NONE | \
356 VFIO_IRQ_SET_DATA_BOOL | \
357 VFIO_IRQ_SET_DATA_EVENTFD)
358#define VFIO_IRQ_SET_ACTION_TYPE_MASK (VFIO_IRQ_SET_ACTION_MASK | \
359 VFIO_IRQ_SET_ACTION_UNMASK | \
360 VFIO_IRQ_SET_ACTION_TRIGGER)
361/**
362 * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
363 *
364 * Reset a device.
365 */
366#define VFIO_DEVICE_RESET _IO(VFIO_TYPE, VFIO_BASE + 11)
367
368/*
369 * The VFIO-PCI bus driver makes use of the following fixed region and
370 * IRQ index mapping. Unimplemented regions return a size of zero.
371 * Unimplemented IRQ types return a count of zero.
372 */
373
374enum {
375 VFIO_PCI_BAR0_REGION_INDEX,
376 VFIO_PCI_BAR1_REGION_INDEX,
377 VFIO_PCI_BAR2_REGION_INDEX,
378 VFIO_PCI_BAR3_REGION_INDEX,
379 VFIO_PCI_BAR4_REGION_INDEX,
380 VFIO_PCI_BAR5_REGION_INDEX,
381 VFIO_PCI_ROM_REGION_INDEX,
382 VFIO_PCI_CONFIG_REGION_INDEX,
383 VFIO_PCI_NUM_REGIONS
384};
385
386enum {
387 VFIO_PCI_INTX_IRQ_INDEX,
388 VFIO_PCI_MSI_IRQ_INDEX,
389 VFIO_PCI_MSIX_IRQ_INDEX,
390 VFIO_PCI_NUM_IRQS
391};
392
393/* -------- API for Type1 VFIO IOMMU -------- */
394
395/**
396 * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
397 *
398 * Retrieve information about the IOMMU object. Fills in provided
399 * struct vfio_iommu_info. Caller sets argsz.
400 *
401 * XXX Should we do these by CHECK_EXTENSION too?
402 */
403struct vfio_iommu_type1_info {
404 __u32 argsz;
405 __u32 flags;
406#define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */
407 __u64 iova_pgsizes; /* Bitmap of supported page sizes */
408};
409
410#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
411
412/**
413 * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
414 *
415 * Map process virtual addresses to IO virtual addresses using the
416 * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
417 */
418struct vfio_iommu_type1_dma_map {
419 __u32 argsz;
420 __u32 flags;
421#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
422#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
423 __u64 vaddr; /* Process virtual address */
424 __u64 iova; /* IO virtual address */
425 __u64 size; /* Size of mapping (bytes) */
426};
427
428#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
429
430/**
431 * VFIO_IOMMU_UNMAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 14, struct vfio_dma_unmap)
432 *
433 * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
434 * Caller sets argsz.
435 */
436struct vfio_iommu_type1_dma_unmap {
437 __u32 argsz;
438 __u32 flags;
439 __u64 iova; /* IO virtual address */
440 __u64 size; /* Size of mapping (bytes) */
441};
442
443#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
444
445#endif /* VFIO_H */