diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-21 21:24:21 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-23 01:49:56 -0400 |
commit | 19f1537b7b8a9a82665db3ad8210a9d954d13acd (patch) | |
tree | 793c1f8763350012caa521a55c5778b1c633b7e5 /drivers | |
parent | 15045275c32bf6d15d32c2eca8157be9c0ba6e45 (diff) |
Lguest support for Virtio
This makes lguest able to use the virtio devices.
We change the device descriptor page from a simple array to a variable
length "type, config_len, status, config data..." format, and
implement virtio_config_ops to read from that config data.
We use the virtio ring implementation for an efficient Guest <-> Host
virtqueue mechanism, and the new LHCALL_NOTIFY hypercall to kick the
host when it changes.
We also use LHCALL_NOTIFY on kernel addresses for very very early
console output. We could have another hypercall, but this hack works
quite well.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/lguest/Makefile | 3 | ||||
-rw-r--r-- | drivers/lguest/lguest_device.c | 373 |
2 files changed, 376 insertions, 0 deletions
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile index a63f75dc41a1..5e8272d296d8 100644 --- a/drivers/lguest/Makefile +++ b/drivers/lguest/Makefile | |||
@@ -1,3 +1,6 @@ | |||
1 | # Guest requires the device configuration and probing code. | ||
2 | obj-$(CONFIG_LGUEST_GUEST) += lguest_device.o | ||
3 | |||
1 | # Host requires the other files, which can be a module. | 4 | # Host requires the other files, which can be a module. |
2 | obj-$(CONFIG_LGUEST) += lg.o | 5 | obj-$(CONFIG_LGUEST) += lg.o |
3 | lg-y = core.o hypercalls.o page_tables.o interrupts_and_traps.o \ | 6 | lg-y = core.o hypercalls.o page_tables.o interrupts_and_traps.o \ |
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c new file mode 100644 index 000000000000..71c64837b437 --- /dev/null +++ b/drivers/lguest/lguest_device.c | |||
@@ -0,0 +1,373 @@ | |||
1 | /*P:050 Lguest guests use a very simple method to describe devices. It's a | ||
2 | * series of device descriptors contained just above the top of normal | ||
3 | * memory. | ||
4 | * | ||
5 | * We use the standard "virtio" device infrastructure, which provides us with a | ||
6 | * console, a network and a block driver. Each one expects some configuration | ||
7 | * information and a "virtqueue" mechanism to send and receive data. :*/ | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/bootmem.h> | ||
10 | #include <linux/lguest_launcher.h> | ||
11 | #include <linux/virtio.h> | ||
12 | #include <linux/virtio_config.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/virtio_ring.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <asm/io.h> | ||
17 | #include <asm/paravirt.h> | ||
18 | #include <asm/lguest_hcall.h> | ||
19 | |||
20 | /* The pointer to our (page) of device descriptions. */ | ||
21 | static void *lguest_devices; | ||
22 | |||
23 | /* Unique numbering for lguest devices. */ | ||
24 | static unsigned int dev_index; | ||
25 | |||
26 | /* For Guests, device memory can be used as normal memory, so we cast away the | ||
27 | * __iomem to quieten sparse. */ | ||
28 | static inline void *lguest_map(unsigned long phys_addr, unsigned long pages) | ||
29 | { | ||
30 | return (__force void *)ioremap(phys_addr, PAGE_SIZE*pages); | ||
31 | } | ||
32 | |||
33 | static inline void lguest_unmap(void *addr) | ||
34 | { | ||
35 | iounmap((__force void __iomem *)addr); | ||
36 | } | ||
37 | |||
38 | /*D:100 Each lguest device is just a virtio device plus a pointer to its entry | ||
39 | * in the lguest_devices page. */ | ||
40 | struct lguest_device { | ||
41 | struct virtio_device vdev; | ||
42 | |||
43 | /* The entry in the lguest_devices page for this device. */ | ||
44 | struct lguest_device_desc *desc; | ||
45 | }; | ||
46 | |||
47 | /* Since the virtio infrastructure hands us a pointer to the virtio_device all | ||
48 | * the time, it helps to have a curt macro to get a pointer to the struct | ||
49 | * lguest_device it's enclosed in. */ | ||
50 | #define to_lgdev(vdev) container_of(vdev, struct lguest_device, vdev) | ||
51 | |||
52 | /*D:130 | ||
53 | * Device configurations | ||
54 | * | ||
55 | * The configuration information for a device consists of a series of fields. | ||
56 | * The device will look for these fields during setup. | ||
57 | * | ||
58 | * For us these fields come immediately after that device's descriptor in the | ||
59 | * lguest_devices page. | ||
60 | * | ||
61 | * Each field starts with a "type" byte, a "length" byte, then that number of | ||
62 | * bytes of configuration information. The device descriptor tells us the | ||
63 | * total configuration length so we know when we've reached the last field. */ | ||
64 | |||
65 | /* type + length bytes */ | ||
66 | #define FHDR_LEN 2 | ||
67 | |||
68 | /* This finds the first field of a given type for a device's configuration. */ | ||
69 | static void *lg_find(struct virtio_device *vdev, u8 type, unsigned int *len) | ||
70 | { | ||
71 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; | ||
72 | int i; | ||
73 | |||
74 | for (i = 0; i < desc->config_len; i += FHDR_LEN + desc->config[i+1]) { | ||
75 | if (desc->config[i] == type) { | ||
76 | /* Mark it used, so Host can know we looked at it, and | ||
77 | * also so we won't find the same one twice. */ | ||
78 | desc->config[i] |= 0x80; | ||
79 | /* Remember, the second byte is the length. */ | ||
80 | *len = desc->config[i+1]; | ||
81 | /* We return a pointer to the field header. */ | ||
82 | return desc->config + i; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | /* Not found: return NULL for failure. */ | ||
87 | return NULL; | ||
88 | } | ||
89 | |||
90 | /* Once they've found a field, getting a copy of it is easy. */ | ||
91 | static void lg_get(struct virtio_device *vdev, void *token, | ||
92 | void *buf, unsigned len) | ||
93 | { | ||
94 | /* Check they didn't ask for more than the length of the field! */ | ||
95 | BUG_ON(len > ((u8 *)token)[1]); | ||
96 | memcpy(buf, token + FHDR_LEN, len); | ||
97 | } | ||
98 | |||
99 | /* Setting the contents is also trivial. */ | ||
100 | static void lg_set(struct virtio_device *vdev, void *token, | ||
101 | const void *buf, unsigned len) | ||
102 | { | ||
103 | BUG_ON(len > ((u8 *)token)[1]); | ||
104 | memcpy(token + FHDR_LEN, buf, len); | ||
105 | } | ||
106 | |||
107 | /* The operations to get and set the status word just access the status field | ||
108 | * of the device descriptor. */ | ||
109 | static u8 lg_get_status(struct virtio_device *vdev) | ||
110 | { | ||
111 | return to_lgdev(vdev)->desc->status; | ||
112 | } | ||
113 | |||
114 | static void lg_set_status(struct virtio_device *vdev, u8 status) | ||
115 | { | ||
116 | to_lgdev(vdev)->desc->status = status; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Virtqueues | ||
121 | * | ||
122 | * The other piece of infrastructure virtio needs is a "virtqueue": a way of | ||
123 | * the Guest device registering buffers for the other side to read from or | ||
124 | * write into (ie. send and receive buffers). Each device can have multiple | ||
125 | * virtqueues: for example the console has one queue for sending and one for | ||
126 | * receiving. | ||
127 | * | ||
128 | * Fortunately for us, a very fast shared-memory-plus-descriptors virtqueue | ||
129 | * already exists in virtio_ring.c. We just need to connect it up. | ||
130 | * | ||
131 | * We start with the information we need to keep about each virtqueue. | ||
132 | */ | ||
133 | |||
134 | /*D:140 This is the information we remember about each virtqueue. */ | ||
135 | struct lguest_vq_info | ||
136 | { | ||
137 | /* A copy of the information contained in the device config. */ | ||
138 | struct lguest_vqconfig config; | ||
139 | |||
140 | /* The address where we mapped the virtio ring, so we can unmap it. */ | ||
141 | void *pages; | ||
142 | }; | ||
143 | |||
144 | /* When the virtio_ring code wants to prod the Host, it calls us here and we | ||
145 | * make a hypercall. We hand the page number of the virtqueue so the Host | ||
146 | * knows which virtqueue we're talking about. */ | ||
147 | static void lg_notify(struct virtqueue *vq) | ||
148 | { | ||
149 | /* We store our virtqueue information in the "priv" pointer of the | ||
150 | * virtqueue structure. */ | ||
151 | struct lguest_vq_info *lvq = vq->priv; | ||
152 | |||
153 | hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0); | ||
154 | } | ||
155 | |||
156 | /* This routine finds the first virtqueue described in the configuration of | ||
157 | * this device and sets it up. | ||
158 | * | ||
159 | * This is kind of an ugly duckling. It'd be nicer to have a standard | ||
160 | * representation of a virtqueue in the configuration space, but it seems that | ||
161 | * everyone wants to do it differently. The KVM guys want the Guest to | ||
162 | * allocate its own pages and tell the Host where they are, but for lguest it's | ||
163 | * simpler for the Host to simply tell us where the pages are. | ||
164 | * | ||
165 | * So we provide devices with a "find virtqueue and set it up" function. */ | ||
166 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | ||
167 | bool (*callback)(struct virtqueue *vq)) | ||
168 | { | ||
169 | struct lguest_vq_info *lvq; | ||
170 | struct virtqueue *vq; | ||
171 | unsigned int len; | ||
172 | void *token; | ||
173 | int err; | ||
174 | |||
175 | /* Look for a field of the correct type to mark a virtqueue. Note that | ||
176 | * if this succeeds, then the type will be changed so it won't be found | ||
177 | * again, and future lg_find_vq() calls will find the next | ||
178 | * virtqueue (if any). */ | ||
179 | token = vdev->config->find(vdev, VIRTIO_CONFIG_F_VIRTQUEUE, &len); | ||
180 | if (!token) | ||
181 | return ERR_PTR(-ENOENT); | ||
182 | |||
183 | lvq = kmalloc(sizeof(*lvq), GFP_KERNEL); | ||
184 | if (!lvq) | ||
185 | return ERR_PTR(-ENOMEM); | ||
186 | |||
187 | /* Note: we could use a configuration space inside here, just like we | ||
188 | * do for the device. This would allow expansion in future, because | ||
189 | * our configuration system is designed to be expansible. But this is | ||
190 | * way easier. */ | ||
191 | if (len != sizeof(lvq->config)) { | ||
192 | dev_err(&vdev->dev, "Unexpected virtio config len %u\n", len); | ||
193 | err = -EIO; | ||
194 | goto free_lvq; | ||
195 | } | ||
196 | /* Make a copy of the "struct lguest_vqconfig" field. We need a copy | ||
197 | * because the config space might not be aligned correctly. */ | ||
198 | vdev->config->get(vdev, token, &lvq->config, sizeof(lvq->config)); | ||
199 | |||
200 | /* Figure out how many pages the ring will take, and map that memory */ | ||
201 | lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, | ||
202 | DIV_ROUND_UP(vring_size(lvq->config.num), | ||
203 | PAGE_SIZE)); | ||
204 | if (!lvq->pages) { | ||
205 | err = -ENOMEM; | ||
206 | goto free_lvq; | ||
207 | } | ||
208 | |||
209 | /* OK, tell virtio_ring.c to set up a virtqueue now we know its size | ||
210 | * and we've got a pointer to its pages. */ | ||
211 | vq = vring_new_virtqueue(lvq->config.num, vdev, lvq->pages, | ||
212 | lg_notify, callback); | ||
213 | if (!vq) { | ||
214 | err = -ENOMEM; | ||
215 | goto unmap; | ||
216 | } | ||
217 | |||
218 | /* Tell the interrupt for this virtqueue to go to the virtio_ring | ||
219 | * interrupt handler. */ | ||
220 | /* FIXME: We used to have a flag for the Host to tell us we could use | ||
221 | * the interrupt as a source of randomness: it'd be nice to have that | ||
222 | * back.. */ | ||
223 | err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, | ||
224 | vdev->dev.bus_id, vq); | ||
225 | if (err) | ||
226 | goto destroy_vring; | ||
227 | |||
228 | /* Last of all we hook up our 'struct lguest_vq_info" to the | ||
229 | * virtqueue's priv pointer. */ | ||
230 | vq->priv = lvq; | ||
231 | return vq; | ||
232 | |||
233 | destroy_vring: | ||
234 | vring_del_virtqueue(vq); | ||
235 | unmap: | ||
236 | lguest_unmap(lvq->pages); | ||
237 | free_lvq: | ||
238 | kfree(lvq); | ||
239 | return ERR_PTR(err); | ||
240 | } | ||
241 | /*:*/ | ||
242 | |||
243 | /* Cleaning up a virtqueue is easy */ | ||
244 | static void lg_del_vq(struct virtqueue *vq) | ||
245 | { | ||
246 | struct lguest_vq_info *lvq = vq->priv; | ||
247 | |||
248 | /* Tell virtio_ring.c to free the virtqueue. */ | ||
249 | vring_del_virtqueue(vq); | ||
250 | /* Unmap the pages containing the ring. */ | ||
251 | lguest_unmap(lvq->pages); | ||
252 | /* Free our own queue information. */ | ||
253 | kfree(lvq); | ||
254 | } | ||
255 | |||
256 | /* The ops structure which hooks everything together. */ | ||
257 | static struct virtio_config_ops lguest_config_ops = { | ||
258 | .find = lg_find, | ||
259 | .get = lg_get, | ||
260 | .set = lg_set, | ||
261 | .get_status = lg_get_status, | ||
262 | .set_status = lg_set_status, | ||
263 | .find_vq = lg_find_vq, | ||
264 | .del_vq = lg_del_vq, | ||
265 | }; | ||
266 | |||
267 | /* The root device for the lguest virtio devices. This makes them appear as | ||
268 | * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. */ | ||
269 | static struct device lguest_root = { | ||
270 | .parent = NULL, | ||
271 | .bus_id = "lguest", | ||
272 | }; | ||
273 | |||
274 | /*D:120 This is the core of the lguest bus: actually adding a new device. | ||
275 | * It's a separate function because it's neater that way, and because an | ||
276 | * earlier version of the code supported hotplug and unplug. They were removed | ||
277 | * early on because they were never used. | ||
278 | * | ||
279 | * As Andrew Tridgell says, "Untested code is buggy code". | ||
280 | * | ||
281 | * It's worth reading this carefully: we start with a pointer to the new device | ||
282 | * descriptor in the "lguest_devices" page. */ | ||
283 | static void add_lguest_device(struct lguest_device_desc *d) | ||
284 | { | ||
285 | struct lguest_device *ldev; | ||
286 | |||
287 | ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); | ||
288 | if (!ldev) { | ||
289 | printk(KERN_EMERG "Cannot allocate lguest dev %u\n", | ||
290 | dev_index++); | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | /* This devices' parent is the lguest/ dir. */ | ||
295 | ldev->vdev.dev.parent = &lguest_root; | ||
296 | /* We have a unique device index thanks to the dev_index counter. */ | ||
297 | ldev->vdev.index = dev_index++; | ||
298 | /* The device type comes straight from the descriptor. There's also a | ||
299 | * device vendor field in the virtio_device struct, which we leave as | ||
300 | * 0. */ | ||
301 | ldev->vdev.id.device = d->type; | ||
302 | /* We have a simple set of routines for querying the device's | ||
303 | * configuration information and setting its status. */ | ||
304 | ldev->vdev.config = &lguest_config_ops; | ||
305 | /* And we remember the device's descriptor for lguest_config_ops. */ | ||
306 | ldev->desc = d; | ||
307 | |||
308 | /* register_virtio_device() sets up the generic fields for the struct | ||
309 | * virtio_device and calls device_register(). This makes the bus | ||
310 | * infrastructure look for a matching driver. */ | ||
311 | if (register_virtio_device(&ldev->vdev) != 0) { | ||
312 | printk(KERN_ERR "Failed to register lguest device %u\n", | ||
313 | ldev->vdev.index); | ||
314 | kfree(ldev); | ||
315 | } | ||
316 | } | ||
317 | |||
318 | /*D:110 scan_devices() simply iterates through the device page. The type 0 is | ||
319 | * reserved to mean "end of devices". */ | ||
320 | static void scan_devices(void) | ||
321 | { | ||
322 | unsigned int i; | ||
323 | struct lguest_device_desc *d; | ||
324 | |||
325 | /* We start at the page beginning, and skip over each entry. */ | ||
326 | for (i = 0; i < PAGE_SIZE; i += sizeof(*d) + d->config_len) { | ||
327 | d = lguest_devices + i; | ||
328 | |||
329 | /* Once we hit a zero, stop. */ | ||
330 | if (d->type == 0) | ||
331 | break; | ||
332 | |||
333 | add_lguest_device(d); | ||
334 | } | ||
335 | } | ||
336 | |||
337 | /*D:105 Fairly early in boot, lguest_devices_init() is called to set up the | ||
338 | * lguest device infrastructure. We check that we are a Guest by checking | ||
339 | * pv_info.name: there are other ways of checking, but this seems most | ||
340 | * obvious to me. | ||
341 | * | ||
342 | * So we can access the "struct lguest_device_desc"s easily, we map that memory | ||
343 | * and store the pointer in the global "lguest_devices". Then we register a | ||
344 | * root device from which all our devices will hang (this seems to be the | ||
345 | * correct sysfs incantation). | ||
346 | * | ||
347 | * Finally we call scan_devices() which adds all the devices found in the | ||
348 | * lguest_devices page. */ | ||
349 | static int __init lguest_devices_init(void) | ||
350 | { | ||
351 | if (strcmp(pv_info.name, "lguest") != 0) | ||
352 | return 0; | ||
353 | |||
354 | if (device_register(&lguest_root) != 0) | ||
355 | panic("Could not register lguest root"); | ||
356 | |||
357 | /* Devices are in a single page above top of "normal" mem */ | ||
358 | lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1); | ||
359 | |||
360 | scan_devices(); | ||
361 | return 0; | ||
362 | } | ||
363 | /* We do this after core stuff, but before the drivers. */ | ||
364 | postcore_initcall(lguest_devices_init); | ||
365 | |||
366 | /*D:150 At this point in the journey we used to now wade through the lguest | ||
367 | * devices themselves: net, block and console. Since they're all now virtio | ||
368 | * devices rather than lguest-specific, I've decided to ignore them. Mostly, | ||
369 | * they're kind of boring. But this does mean you'll never experience the | ||
370 | * thrill of reading the forbidden love scene buried deep in the block driver. | ||
371 | * | ||
372 | * "make Launcher" beckons, where we answer questions like "Where do Guests | ||
373 | * come from?", and "What do you do when someone asks for optimization?". */ | ||