aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/vfio.h
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@redhat.com>2012-07-31 10:16:22 -0400
committerAlex Williamson <alex.williamson@redhat.com>2012-07-31 10:16:22 -0400
commitcba3345cc494ad286ca8823f44b2c16cae496679 (patch)
treea4cd502d0abb4dbf0c6e59f998152a22cb4b1606 /include/linux/vfio.h
parent2e3ee613480563a6d5c01b57d342e65cc58c06df (diff)
vfio: VFIO core
VFIO is a secure user level driver for use with both virtual machines and user level drivers. VFIO makes use of IOMMU groups to ensure the isolation of devices in use, allowing unprivileged user access. It's intended that VFIO will replace KVM device assignment and UIO drivers (in cases where the target platform includes a sufficiently capable IOMMU). New in this version of VFIO is support for IOMMU groups managed through the IOMMU core as well as a rework of the API, removing the group merge interface. We now go back to a model more similar to original VFIO with UIOMMU support where the file descriptor obtained from /dev/vfio/vfio allows access to the IOMMU, but only after a group is added, avoiding the previous privilege issues with this type of model. IOMMU support is also now fully modular as IOMMUs have vastly different interface requirements on different platforms. VFIO users are able to query and initialize the IOMMU model of their choice. Please see the follow-on Documentation commit for further description and usage example. Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'include/linux/vfio.h')
-rw-r--r--include/linux/vfio.h367
1 files changed, 367 insertions, 0 deletions
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
new file mode 100644
index 000000000000..03e56a5154b6
--- /dev/null
+++ b/include/linux/vfio.h
@@ -0,0 +1,367 @@
1/*
2 * VFIO API definition
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#ifndef VFIO_H
12#define VFIO_H
13
14#include <linux/types.h>
15#include <linux/ioctl.h>
16
17#define VFIO_API_VERSION 0
18
19#ifdef __KERNEL__ /* Internal VFIO-core/bus driver API */
20
21#include <linux/iommu.h>
22#include <linux/mm.h>
23
24/**
25 * struct vfio_device_ops - VFIO bus driver device callbacks
26 *
27 * @open: Called when userspace creates new file descriptor for device
28 * @release: Called when userspace releases file descriptor for device
29 * @read: Perform read(2) on device file descriptor
30 * @write: Perform write(2) on device file descriptor
31 * @ioctl: Perform ioctl(2) on device file descriptor, supporting VFIO_DEVICE_*
32 * operations documented below
33 * @mmap: Perform mmap(2) on a region of the device file descriptor
34 */
35struct vfio_device_ops {
36 char *name;
37 int (*open)(void *device_data);
38 void (*release)(void *device_data);
39 ssize_t (*read)(void *device_data, char __user *buf,
40 size_t count, loff_t *ppos);
41 ssize_t (*write)(void *device_data, const char __user *buf,
42 size_t count, loff_t *size);
43 long (*ioctl)(void *device_data, unsigned int cmd,
44 unsigned long arg);
45 int (*mmap)(void *device_data, struct vm_area_struct *vma);
46};
47
48extern int vfio_add_group_dev(struct device *dev,
49 const struct vfio_device_ops *ops,
50 void *device_data);
51
52extern void *vfio_del_group_dev(struct device *dev);
53
54/**
55 * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
56 */
57struct vfio_iommu_driver_ops {
58 char *name;
59 struct module *owner;
60 void *(*open)(unsigned long arg);
61 void (*release)(void *iommu_data);
62 ssize_t (*read)(void *iommu_data, char __user *buf,
63 size_t count, loff_t *ppos);
64 ssize_t (*write)(void *iommu_data, const char __user *buf,
65 size_t count, loff_t *size);
66 long (*ioctl)(void *iommu_data, unsigned int cmd,
67 unsigned long arg);
68 int (*mmap)(void *iommu_data, struct vm_area_struct *vma);
69 int (*attach_group)(void *iommu_data,
70 struct iommu_group *group);
71 void (*detach_group)(void *iommu_data,
72 struct iommu_group *group);
73
74};
75
76extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
77
78extern void vfio_unregister_iommu_driver(
79 const struct vfio_iommu_driver_ops *ops);
80
81/**
82 * offsetofend(TYPE, MEMBER)
83 *
84 * @TYPE: The type of the structure
85 * @MEMBER: The member within the structure to get the end offset of
86 *
87 * Simple helper macro for dealing with variable sized structures passed
88 * from user space. This allows us to easily determine if the provided
89 * structure is sized to include various fields.
90 */
91#define offsetofend(TYPE, MEMBER) ({ \
92 TYPE tmp; \
93 offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); }) \
94
95#endif /* __KERNEL__ */
96
97/* Kernel & User level defines for VFIO IOCTLs. */
98
99/* Extensions */
100
101/* None yet */
102
103/*
104 * The IOCTL interface is designed for extensibility by embedding the
105 * structure length (argsz) and flags into structures passed between
106 * kernel and userspace. We therefore use the _IO() macro for these
107 * defines to avoid implicitly embedding a size into the ioctl request.
108 * As structure fields are added, argsz will increase to match and flag
109 * bits will be defined to indicate additional fields with valid data.
110 * It's *always* the caller's responsibility to indicate the size of
111 * the structure passed by setting argsz appropriately.
112 */
113
114#define VFIO_TYPE (';')
115#define VFIO_BASE 100
116
117/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
118
119/**
120 * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
121 *
122 * Report the version of the VFIO API. This allows us to bump the entire
123 * API version should we later need to add or change features in incompatible
124 * ways.
125 * Return: VFIO_API_VERSION
126 * Availability: Always
127 */
128#define VFIO_GET_API_VERSION _IO(VFIO_TYPE, VFIO_BASE + 0)
129
130/**
131 * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
132 *
133 * Check whether an extension is supported.
134 * Return: 0 if not supported, 1 (or some other positive integer) if supported.
135 * Availability: Always
136 */
137#define VFIO_CHECK_EXTENSION _IO(VFIO_TYPE, VFIO_BASE + 1)
138
139/**
140 * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
141 *
142 * Set the iommu to the given type. The type must be supported by an
143 * iommu driver as verified by calling CHECK_EXTENSION using the same
144 * type. A group must be set to this file descriptor before this
145 * ioctl is available. The IOMMU interfaces enabled by this call are
146 * specific to the value set.
147 * Return: 0 on success, -errno on failure
148 * Availability: When VFIO group attached
149 */
150#define VFIO_SET_IOMMU _IO(VFIO_TYPE, VFIO_BASE + 2)
151
152/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
153
154/**
155 * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
156 * struct vfio_group_status)
157 *
158 * Retrieve information about the group. Fills in provided
159 * struct vfio_group_info. Caller sets argsz.
160 * Return: 0 on succes, -errno on failure.
161 * Availability: Always
162 */
163struct vfio_group_status {
164 __u32 argsz;
165 __u32 flags;
166#define VFIO_GROUP_FLAGS_VIABLE (1 << 0)
167#define VFIO_GROUP_FLAGS_CONTAINER_SET (1 << 1)
168};
169#define VFIO_GROUP_GET_STATUS _IO(VFIO_TYPE, VFIO_BASE + 3)
170
171/**
172 * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
173 *
174 * Set the container for the VFIO group to the open VFIO file
175 * descriptor provided. Groups may only belong to a single
176 * container. Containers may, at their discretion, support multiple
177 * groups. Only when a container is set are all of the interfaces
178 * of the VFIO file descriptor and the VFIO group file descriptor
179 * available to the user.
180 * Return: 0 on success, -errno on failure.
181 * Availability: Always
182 */
183#define VFIO_GROUP_SET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 4)
184
185/**
186 * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
187 *
188 * Remove the group from the attached container. This is the
189 * opposite of the SET_CONTAINER call and returns the group to
190 * an initial state. All device file descriptors must be released
191 * prior to calling this interface. When removing the last group
192 * from a container, the IOMMU will be disabled and all state lost,
193 * effectively also returning the VFIO file descriptor to an initial
194 * state.
195 * Return: 0 on success, -errno on failure.
196 * Availability: When attached to container
197 */
198#define VFIO_GROUP_UNSET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 5)
199
200/**
201 * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
202 *
203 * Return a new file descriptor for the device object described by
204 * the provided string. The string should match a device listed in
205 * the devices subdirectory of the IOMMU group sysfs entry. The
206 * group containing the device must already be added to this context.
207 * Return: new file descriptor on success, -errno on failure.
208 * Availability: When attached to container
209 */
210#define VFIO_GROUP_GET_DEVICE_FD _IO(VFIO_TYPE, VFIO_BASE + 6)
211
212/* --------------- IOCTLs for DEVICE file descriptors --------------- */
213
214/**
215 * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
216 * struct vfio_device_info)
217 *
218 * Retrieve information about the device. Fills in provided
219 * struct vfio_device_info. Caller sets argsz.
220 * Return: 0 on success, -errno on failure.
221 */
222struct vfio_device_info {
223 __u32 argsz;
224 __u32 flags;
225#define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */
226 __u32 num_regions; /* Max region index + 1 */
227 __u32 num_irqs; /* Max IRQ index + 1 */
228};
229#define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7)
230
231/**
232 * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
233 * struct vfio_region_info)
234 *
235 * Retrieve information about a device region. Caller provides
236 * struct vfio_region_info with index value set. Caller sets argsz.
237 * Implementation of region mapping is bus driver specific. This is
238 * intended to describe MMIO, I/O port, as well as bus specific
239 * regions (ex. PCI config space). Zero sized regions may be used
240 * to describe unimplemented regions (ex. unimplemented PCI BARs).
241 * Return: 0 on success, -errno on failure.
242 */
243struct vfio_region_info {
244 __u32 argsz;
245 __u32 flags;
246#define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */
247#define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */
248#define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */
249 __u32 index; /* Region index */
250 __u32 resv; /* Reserved for alignment */
251 __u64 size; /* Region size (bytes) */
252 __u64 offset; /* Region offset from start of device fd */
253};
254#define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8)
255
256/**
257 * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
258 * struct vfio_irq_info)
259 *
260 * Retrieve information about a device IRQ. Caller provides
261 * struct vfio_irq_info with index value set. Caller sets argsz.
262 * Implementation of IRQ mapping is bus driver specific. Indexes
263 * using multiple IRQs are primarily intended to support MSI-like
264 * interrupt blocks. Zero count irq blocks may be used to describe
265 * unimplemented interrupt types.
266 *
267 * The EVENTFD flag indicates the interrupt index supports eventfd based
268 * signaling.
269 *
270 * The MASKABLE flags indicates the index supports MASK and UNMASK
271 * actions described below.
272 *
273 * AUTOMASKED indicates that after signaling, the interrupt line is
274 * automatically masked by VFIO and the user needs to unmask the line
275 * to receive new interrupts. This is primarily intended to distinguish
276 * level triggered interrupts.
277 *
278 * The NORESIZE flag indicates that the interrupt lines within the index
279 * are setup as a set and new subindexes cannot be enabled without first
280 * disabling the entire index. This is used for interrupts like PCI MSI
281 * and MSI-X where the driver may only use a subset of the available
282 * indexes, but VFIO needs to enable a specific number of vectors
283 * upfront. In the case of MSI-X, where the user can enable MSI-X and
284 * then add and unmask vectors, it's up to userspace to make the decision
285 * whether to allocate the maximum supported number of vectors or tear
286 * down setup and incrementally increase the vectors as each is enabled.
287 */
288struct vfio_irq_info {
289 __u32 argsz;
290 __u32 flags;
291#define VFIO_IRQ_INFO_EVENTFD (1 << 0)
292#define VFIO_IRQ_INFO_MASKABLE (1 << 1)
293#define VFIO_IRQ_INFO_AUTOMASKED (1 << 2)
294#define VFIO_IRQ_INFO_NORESIZE (1 << 3)
295 __u32 index; /* IRQ index */
296 __u32 count; /* Number of IRQs within this index */
297};
298#define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9)
299
300/**
301 * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
302 *
303 * Set signaling, masking, and unmasking of interrupts. Caller provides
304 * struct vfio_irq_set with all fields set. 'start' and 'count' indicate
305 * the range of subindexes being specified.
306 *
307 * The DATA flags specify the type of data provided. If DATA_NONE, the
308 * operation performs the specified action immediately on the specified
309 * interrupt(s). For example, to unmask AUTOMASKED interrupt [0,0]:
310 * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
311 *
312 * DATA_BOOL allows sparse support for the same on arrays of interrupts.
313 * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
314 * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
315 * data = {1,0,1}
316 *
317 * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
318 * A value of -1 can be used to either de-assign interrupts if already
319 * assigned or skip un-assigned interrupts. For example, to set an eventfd
320 * to be trigger for interrupts [0,0] and [0,2]:
321 * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
322 * data = {fd1, -1, fd2}
323 * If index [0,1] is previously set, two count = 1 ioctls calls would be
324 * required to set [0,0] and [0,2] without changing [0,1].
325 *
326 * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
327 * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
328 * from userspace (ie. simulate hardware triggering).
329 *
330 * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
331 * enables the interrupt index for the device. Individual subindex interrupts
332 * can be disabled using the -1 value for DATA_EVENTFD or the index can be
333 * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
334 *
335 * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
336 * ACTION_TRIGGER specifies kernel->user signaling.
337 */
338struct vfio_irq_set {
339 __u32 argsz;
340 __u32 flags;
341#define VFIO_IRQ_SET_DATA_NONE (1 << 0) /* Data not present */
342#define VFIO_IRQ_SET_DATA_BOOL (1 << 1) /* Data is bool (u8) */
343#define VFIO_IRQ_SET_DATA_EVENTFD (1 << 2) /* Data is eventfd (s32) */
344#define VFIO_IRQ_SET_ACTION_MASK (1 << 3) /* Mask interrupt */
345#define VFIO_IRQ_SET_ACTION_UNMASK (1 << 4) /* Unmask interrupt */
346#define VFIO_IRQ_SET_ACTION_TRIGGER (1 << 5) /* Trigger interrupt */
347 __u32 index;
348 __u32 start;
349 __u32 count;
350 __u8 data[];
351};
352#define VFIO_DEVICE_SET_IRQS _IO(VFIO_TYPE, VFIO_BASE + 10)
353
354#define VFIO_IRQ_SET_DATA_TYPE_MASK (VFIO_IRQ_SET_DATA_NONE | \
355 VFIO_IRQ_SET_DATA_BOOL | \
356 VFIO_IRQ_SET_DATA_EVENTFD)
357#define VFIO_IRQ_SET_ACTION_TYPE_MASK (VFIO_IRQ_SET_ACTION_MASK | \
358 VFIO_IRQ_SET_ACTION_UNMASK | \
359 VFIO_IRQ_SET_ACTION_TRIGGER)
360/**
361 * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
362 *
363 * Reset a device.
364 */
365#define VFIO_DEVICE_RESET _IO(VFIO_TYPE, VFIO_BASE + 11)
366
367#endif /* VFIO_H */