diff options
author | Jens Axboe <jaxboe@fusionio.com> | 2011-05-19 03:46:00 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2011-05-19 03:46:00 -0400 |
commit | 779d530632c98967820d3bbcae15d492fc20301e (patch) | |
tree | 5911c12ead3c028d089e7a8be824f4d9f7590520 /drivers | |
parent | c9ce9e438b2a9faba74a05a71b3dbe169cde783b (diff) | |
parent | 8ab521506c4dbb144f0c04c55e3d8bec42c1b2b9 (diff) |
Merge branches 'for-jens/xen-backend-fixes' and 'for-jens/xen-blkback-v3.3' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen into for-2.6.40/drivers
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/Kconfig | 21 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/xen-blkback/Makefile | 3 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 824 | ||||
-rw-r--r-- | drivers/block/xen-blkback/common.h | 233 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 768 |
6 files changed, 1850 insertions, 0 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 83c32cb72582..717d6e4e18d3 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -470,6 +470,27 @@ config XEN_BLKDEV_FRONTEND | |||
470 | block device driver. It communicates with a back-end driver | 470 | block device driver. It communicates with a back-end driver |
471 | in another domain which drives the actual block device. | 471 | in another domain which drives the actual block device. |
472 | 472 | ||
473 | config XEN_BLKDEV_BACKEND | ||
474 | tristate "Block-device backend driver" | ||
475 | depends on XEN_BACKEND | ||
476 | help | ||
477 | The block-device backend driver allows the kernel to export its | ||
478 | block devices to other guests via a high-performance shared-memory | ||
479 | interface. | ||
480 | |||
481 | The corresponding Linux frontend driver is enabled by the | ||
482 | CONFIG_XEN_BLKDEV_FRONTEND configuration option. | ||
483 | |||
484 | The backend driver attaches itself to a any block device specified | ||
485 | in the XenBus configuration. There are no limits to what the block | ||
486 | device as long as it has a major and minor. | ||
487 | |||
488 | If you are compiling a kernel to run in a Xen block backend driver | ||
489 | domain (often this is domain 0) you should say Y here. To | ||
490 | compile this driver as a module, chose M here: the module | ||
491 | will be called xen-blkback. | ||
492 | |||
493 | |||
473 | config VIRTIO_BLK | 494 | config VIRTIO_BLK |
474 | tristate "Virtio block driver (EXPERIMENTAL)" | 495 | tristate "Virtio block driver (EXPERIMENTAL)" |
475 | depends on EXPERIMENTAL && VIRTIO | 496 | depends on EXPERIMENTAL && VIRTIO |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 40528ba56d1b..76646e9a1c91 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
@@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o | |||
36 | obj-$(CONFIG_BLK_DEV_HD) += hd.o | 36 | obj-$(CONFIG_BLK_DEV_HD) += hd.o |
37 | 37 | ||
38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o | 38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o |
39 | obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ | ||
39 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ | 40 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ |
40 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o | 41 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o |
41 | 42 | ||
diff --git a/drivers/block/xen-blkback/Makefile b/drivers/block/xen-blkback/Makefile new file mode 100644 index 000000000000..e491c1b76878 --- /dev/null +++ b/drivers/block/xen-blkback/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o | ||
2 | |||
3 | xen-blkback-y := blkback.o xenbus.o | ||
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c new file mode 100644 index 000000000000..c73910cc28c9 --- /dev/null +++ b/drivers/block/xen-blkback/blkback.c | |||
@@ -0,0 +1,824 @@ | |||
1 | /****************************************************************************** | ||
2 | * | ||
3 | * Back-end of the driver for virtual block devices. This portion of the | ||
4 | * driver exports a 'unified' block-device interface that can be accessed | ||
5 | * by any operating system that implements a compatible front end. A | ||
6 | * reference front-end implementation can be found in: | ||
7 | * drivers/block/xen-blkfront.c | ||
8 | * | ||
9 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | ||
10 | * Copyright (c) 2005, Christopher Clark | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License version 2 | ||
14 | * as published by the Free Software Foundation; or, when distributed | ||
15 | * separately from the Linux kernel or incorporated into other | ||
16 | * software packages, subject to the following license: | ||
17 | * | ||
18 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
19 | * of this source file (the "Software"), to deal in the Software without | ||
20 | * restriction, including without limitation the rights to use, copy, modify, | ||
21 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
22 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
23 | * the following conditions: | ||
24 | * | ||
25 | * The above copyright notice and this permission notice shall be included in | ||
26 | * all copies or substantial portions of the Software. | ||
27 | * | ||
28 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
29 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
30 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
31 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
32 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
33 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
34 | * IN THE SOFTWARE. | ||
35 | */ | ||
36 | |||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/kthread.h> | ||
39 | #include <linux/list.h> | ||
40 | #include <linux/delay.h> | ||
41 | #include <linux/freezer.h> | ||
42 | |||
43 | #include <xen/events.h> | ||
44 | #include <xen/page.h> | ||
45 | #include <asm/xen/hypervisor.h> | ||
46 | #include <asm/xen/hypercall.h> | ||
47 | #include "common.h" | ||
48 | |||
49 | /* | ||
50 | * These are rather arbitrary. They are fairly large because adjacent requests | ||
51 | * pulled from a communication ring are quite likely to end up being part of | ||
52 | * the same scatter/gather request at the disc. | ||
53 | * | ||
54 | * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** | ||
55 | * | ||
56 | * This will increase the chances of being able to write whole tracks. | ||
57 | * 64 should be enough to keep us competitive with Linux. | ||
58 | */ | ||
59 | static int xen_blkif_reqs = 64; | ||
60 | module_param_named(reqs, xen_blkif_reqs, int, 0); | ||
61 | MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); | ||
62 | |||
63 | /* Run-time switchable: /sys/module/blkback/parameters/ */ | ||
64 | static unsigned int log_stats; | ||
65 | module_param(log_stats, int, 0644); | ||
66 | |||
67 | /* | ||
68 | * Each outstanding request that we've passed to the lower device layers has a | ||
69 | * 'pending_req' allocated to it. Each buffer_head that completes decrements | ||
70 | * the pendcnt towards zero. When it hits zero, the specified domain has a | ||
71 | * response queued for it, with the saved 'id' passed back. | ||
72 | */ | ||
73 | struct pending_req { | ||
74 | struct xen_blkif *blkif; | ||
75 | u64 id; | ||
76 | int nr_pages; | ||
77 | atomic_t pendcnt; | ||
78 | unsigned short operation; | ||
79 | int status; | ||
80 | struct list_head free_list; | ||
81 | }; | ||
82 | |||
83 | #define BLKBACK_INVALID_HANDLE (~0) | ||
84 | |||
85 | struct xen_blkbk { | ||
86 | struct pending_req *pending_reqs; | ||
87 | /* List of all 'pending_req' available */ | ||
88 | struct list_head pending_free; | ||
89 | /* And its spinlock. */ | ||
90 | spinlock_t pending_free_lock; | ||
91 | wait_queue_head_t pending_free_wq; | ||
92 | /* The list of all pages that are available. */ | ||
93 | struct page **pending_pages; | ||
94 | /* And the grant handles that are available. */ | ||
95 | grant_handle_t *pending_grant_handles; | ||
96 | }; | ||
97 | |||
98 | static struct xen_blkbk *blkbk; | ||
99 | |||
100 | /* | ||
101 | * Little helpful macro to figure out the index and virtual address of the | ||
102 | * pending_pages[..]. For each 'pending_req' we have have up to | ||
103 | * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through | ||
104 | * 10 and would index in the pending_pages[..]. | ||
105 | */ | ||
106 | static inline int vaddr_pagenr(struct pending_req *req, int seg) | ||
107 | { | ||
108 | return (req - blkbk->pending_reqs) * | ||
109 | BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; | ||
110 | } | ||
111 | |||
112 | #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] | ||
113 | |||
114 | static inline unsigned long vaddr(struct pending_req *req, int seg) | ||
115 | { | ||
116 | unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); | ||
117 | return (unsigned long)pfn_to_kaddr(pfn); | ||
118 | } | ||
119 | |||
120 | #define pending_handle(_req, _seg) \ | ||
121 | (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) | ||
122 | |||
123 | |||
124 | static int do_block_io_op(struct xen_blkif *blkif); | ||
125 | static int dispatch_rw_block_io(struct xen_blkif *blkif, | ||
126 | struct blkif_request *req, | ||
127 | struct pending_req *pending_req); | ||
128 | static void make_response(struct xen_blkif *blkif, u64 id, | ||
129 | unsigned short op, int st); | ||
130 | |||
131 | /* | ||
132 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. | ||
133 | */ | ||
134 | static struct pending_req *alloc_req(void) | ||
135 | { | ||
136 | struct pending_req *req = NULL; | ||
137 | unsigned long flags; | ||
138 | |||
139 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | ||
140 | if (!list_empty(&blkbk->pending_free)) { | ||
141 | req = list_entry(blkbk->pending_free.next, struct pending_req, | ||
142 | free_list); | ||
143 | list_del(&req->free_list); | ||
144 | } | ||
145 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | ||
146 | return req; | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Return the 'pending_req' structure back to the freepool. We also | ||
151 | * wake up the thread if it was waiting for a free page. | ||
152 | */ | ||
153 | static void free_req(struct pending_req *req) | ||
154 | { | ||
155 | unsigned long flags; | ||
156 | int was_empty; | ||
157 | |||
158 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | ||
159 | was_empty = list_empty(&blkbk->pending_free); | ||
160 | list_add(&req->free_list, &blkbk->pending_free); | ||
161 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | ||
162 | if (was_empty) | ||
163 | wake_up(&blkbk->pending_free_wq); | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * Routines for managing virtual block devices (vbds). | ||
168 | */ | ||
169 | static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, | ||
170 | int operation) | ||
171 | { | ||
172 | struct xen_vbd *vbd = &blkif->vbd; | ||
173 | int rc = -EACCES; | ||
174 | |||
175 | if ((operation != READ) && vbd->readonly) | ||
176 | goto out; | ||
177 | |||
178 | if (likely(req->nr_sects)) { | ||
179 | blkif_sector_t end = req->sector_number + req->nr_sects; | ||
180 | |||
181 | if (unlikely(end < req->sector_number)) | ||
182 | goto out; | ||
183 | if (unlikely(end > vbd_sz(vbd))) | ||
184 | goto out; | ||
185 | } | ||
186 | |||
187 | req->dev = vbd->pdevice; | ||
188 | req->bdev = vbd->bdev; | ||
189 | rc = 0; | ||
190 | |||
191 | out: | ||
192 | return rc; | ||
193 | } | ||
194 | |||
195 | static void xen_vbd_resize(struct xen_blkif *blkif) | ||
196 | { | ||
197 | struct xen_vbd *vbd = &blkif->vbd; | ||
198 | struct xenbus_transaction xbt; | ||
199 | int err; | ||
200 | struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); | ||
201 | unsigned long long new_size = vbd_sz(vbd); | ||
202 | |||
203 | pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n", | ||
204 | blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); | ||
205 | pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size); | ||
206 | vbd->size = new_size; | ||
207 | again: | ||
208 | err = xenbus_transaction_start(&xbt); | ||
209 | if (err) { | ||
210 | pr_warn(DRV_PFX "Error starting transaction"); | ||
211 | return; | ||
212 | } | ||
213 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | ||
214 | (unsigned long long)vbd_sz(vbd)); | ||
215 | if (err) { | ||
216 | pr_warn(DRV_PFX "Error writing new size"); | ||
217 | goto abort; | ||
218 | } | ||
219 | /* | ||
220 | * Write the current state; we will use this to synchronize | ||
221 | * the front-end. If the current state is "connected" the | ||
222 | * front-end will get the new size information online. | ||
223 | */ | ||
224 | err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); | ||
225 | if (err) { | ||
226 | pr_warn(DRV_PFX "Error writing the state"); | ||
227 | goto abort; | ||
228 | } | ||
229 | |||
230 | err = xenbus_transaction_end(xbt, 0); | ||
231 | if (err == -EAGAIN) | ||
232 | goto again; | ||
233 | if (err) | ||
234 | pr_warn(DRV_PFX "Error ending transaction"); | ||
235 | return; | ||
236 | abort: | ||
237 | xenbus_transaction_end(xbt, 1); | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * Notification from the guest OS. | ||
242 | */ | ||
243 | static void blkif_notify_work(struct xen_blkif *blkif) | ||
244 | { | ||
245 | blkif->waiting_reqs = 1; | ||
246 | wake_up(&blkif->wq); | ||
247 | } | ||
248 | |||
249 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id) | ||
250 | { | ||
251 | blkif_notify_work(dev_id); | ||
252 | return IRQ_HANDLED; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * SCHEDULER FUNCTIONS | ||
257 | */ | ||
258 | |||
259 | static void print_stats(struct xen_blkif *blkif) | ||
260 | { | ||
261 | pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", | ||
262 | current->comm, blkif->st_oo_req, | ||
263 | blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); | ||
264 | blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); | ||
265 | blkif->st_rd_req = 0; | ||
266 | blkif->st_wr_req = 0; | ||
267 | blkif->st_oo_req = 0; | ||
268 | } | ||
269 | |||
270 | int xen_blkif_schedule(void *arg) | ||
271 | { | ||
272 | struct xen_blkif *blkif = arg; | ||
273 | struct xen_vbd *vbd = &blkif->vbd; | ||
274 | |||
275 | xen_blkif_get(blkif); | ||
276 | |||
277 | while (!kthread_should_stop()) { | ||
278 | if (try_to_freeze()) | ||
279 | continue; | ||
280 | if (unlikely(vbd->size != vbd_sz(vbd))) | ||
281 | xen_vbd_resize(blkif); | ||
282 | |||
283 | wait_event_interruptible( | ||
284 | blkif->wq, | ||
285 | blkif->waiting_reqs || kthread_should_stop()); | ||
286 | wait_event_interruptible( | ||
287 | blkbk->pending_free_wq, | ||
288 | !list_empty(&blkbk->pending_free) || | ||
289 | kthread_should_stop()); | ||
290 | |||
291 | blkif->waiting_reqs = 0; | ||
292 | smp_mb(); /* clear flag *before* checking for work */ | ||
293 | |||
294 | if (do_block_io_op(blkif)) | ||
295 | blkif->waiting_reqs = 1; | ||
296 | |||
297 | if (log_stats && time_after(jiffies, blkif->st_print)) | ||
298 | print_stats(blkif); | ||
299 | } | ||
300 | |||
301 | if (log_stats) | ||
302 | print_stats(blkif); | ||
303 | |||
304 | blkif->xenblkd = NULL; | ||
305 | xen_blkif_put(blkif); | ||
306 | |||
307 | return 0; | ||
308 | } | ||
309 | |||
310 | struct seg_buf { | ||
311 | unsigned long buf; | ||
312 | unsigned int nsec; | ||
313 | }; | ||
314 | /* | ||
315 | * Unmap the grant references, and also remove the M2P over-rides | ||
316 | * used in the 'pending_req'. | ||
317 | */ | ||
318 | static void xen_blkbk_unmap(struct pending_req *req) | ||
319 | { | ||
320 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
321 | unsigned int i, invcount = 0; | ||
322 | grant_handle_t handle; | ||
323 | int ret; | ||
324 | |||
325 | for (i = 0; i < req->nr_pages; i++) { | ||
326 | handle = pending_handle(req, i); | ||
327 | if (handle == BLKBACK_INVALID_HANDLE) | ||
328 | continue; | ||
329 | gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), | ||
330 | GNTMAP_host_map, handle); | ||
331 | pending_handle(req, i) = BLKBACK_INVALID_HANDLE; | ||
332 | invcount++; | ||
333 | } | ||
334 | |||
335 | ret = HYPERVISOR_grant_table_op( | ||
336 | GNTTABOP_unmap_grant_ref, unmap, invcount); | ||
337 | BUG_ON(ret); | ||
338 | /* | ||
339 | * Note, we use invcount, so nr->pages, so we can't index | ||
340 | * using vaddr(req, i). | ||
341 | */ | ||
342 | for (i = 0; i < invcount; i++) { | ||
343 | ret = m2p_remove_override( | ||
344 | virt_to_page(unmap[i].host_addr), false); | ||
345 | if (ret) { | ||
346 | pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n", | ||
347 | (unsigned long)unmap[i].host_addr); | ||
348 | continue; | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | |||
353 | static int xen_blkbk_map(struct blkif_request *req, | ||
354 | struct pending_req *pending_req, | ||
355 | struct seg_buf seg[]) | ||
356 | { | ||
357 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
358 | int i; | ||
359 | int nseg = req->nr_segments; | ||
360 | int ret = 0; | ||
361 | |||
362 | /* | ||
363 | * Fill out preq.nr_sects with proper amount of sectors, and setup | ||
364 | * assign map[..] with the PFN of the page in our domain with the | ||
365 | * corresponding grant reference for each page. | ||
366 | */ | ||
367 | for (i = 0; i < nseg; i++) { | ||
368 | uint32_t flags; | ||
369 | |||
370 | flags = GNTMAP_host_map; | ||
371 | if (pending_req->operation != BLKIF_OP_READ) | ||
372 | flags |= GNTMAP_readonly; | ||
373 | gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, | ||
374 | req->u.rw.seg[i].gref, | ||
375 | pending_req->blkif->domid); | ||
376 | } | ||
377 | |||
378 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); | ||
379 | BUG_ON(ret); | ||
380 | |||
381 | /* | ||
382 | * Now swizzle the MFN in our domain with the MFN from the other domain | ||
383 | * so that when we access vaddr(pending_req,i) it has the contents of | ||
384 | * the page from the other domain. | ||
385 | */ | ||
386 | for (i = 0; i < nseg; i++) { | ||
387 | if (unlikely(map[i].status != 0)) { | ||
388 | pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); | ||
389 | map[i].handle = BLKBACK_INVALID_HANDLE; | ||
390 | ret |= 1; | ||
391 | } | ||
392 | |||
393 | pending_handle(pending_req, i) = map[i].handle; | ||
394 | |||
395 | if (ret) | ||
396 | continue; | ||
397 | |||
398 | ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), | ||
399 | blkbk->pending_page(pending_req, i), false); | ||
400 | if (ret) { | ||
401 | pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n", | ||
402 | (unsigned long)map[i].dev_bus_addr, ret); | ||
403 | /* We could switch over to GNTTABOP_copy */ | ||
404 | continue; | ||
405 | } | ||
406 | |||
407 | seg[i].buf = map[i].dev_bus_addr | | ||
408 | (req->u.rw.seg[i].first_sect << 9); | ||
409 | } | ||
410 | return ret; | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Completion callback on the bio's. Called as bh->b_end_io() | ||
415 | */ | ||
416 | |||
417 | static void __end_block_io_op(struct pending_req *pending_req, int error) | ||
418 | { | ||
419 | /* An error fails the entire request. */ | ||
420 | if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && | ||
421 | (error == -EOPNOTSUPP)) { | ||
422 | pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); | ||
423 | xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); | ||
424 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | ||
425 | } else if (error) { | ||
426 | pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," | ||
427 | " error=%d\n", error); | ||
428 | pending_req->status = BLKIF_RSP_ERROR; | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * If all of the bio's have completed it is time to unmap | ||
433 | * the grant references associated with 'request' and provide | ||
434 | * the proper response on the ring. | ||
435 | */ | ||
436 | if (atomic_dec_and_test(&pending_req->pendcnt)) { | ||
437 | xen_blkbk_unmap(pending_req); | ||
438 | make_response(pending_req->blkif, pending_req->id, | ||
439 | pending_req->operation, pending_req->status); | ||
440 | xen_blkif_put(pending_req->blkif); | ||
441 | free_req(pending_req); | ||
442 | } | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * bio callback. | ||
447 | */ | ||
448 | static void end_block_io_op(struct bio *bio, int error) | ||
449 | { | ||
450 | __end_block_io_op(bio->bi_private, error); | ||
451 | bio_put(bio); | ||
452 | } | ||
453 | |||
454 | |||
455 | |||
456 | /* | ||
457 | * Function to copy the from the ring buffer the 'struct blkif_request' | ||
458 | * (which has the sectors we want, number of them, grant references, etc), | ||
459 | * and transmute it to the block API to hand it over to the proper block disk. | ||
460 | */ | ||
461 | static int do_block_io_op(struct xen_blkif *blkif) | ||
462 | { | ||
463 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | ||
464 | struct blkif_request req; | ||
465 | struct pending_req *pending_req; | ||
466 | RING_IDX rc, rp; | ||
467 | int more_to_do = 0; | ||
468 | |||
469 | rc = blk_rings->common.req_cons; | ||
470 | rp = blk_rings->common.sring->req_prod; | ||
471 | rmb(); /* Ensure we see queued requests up to 'rp'. */ | ||
472 | |||
473 | while (rc != rp) { | ||
474 | |||
475 | if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) | ||
476 | break; | ||
477 | |||
478 | if (kthread_should_stop()) { | ||
479 | more_to_do = 1; | ||
480 | break; | ||
481 | } | ||
482 | |||
483 | pending_req = alloc_req(); | ||
484 | if (NULL == pending_req) { | ||
485 | blkif->st_oo_req++; | ||
486 | more_to_do = 1; | ||
487 | break; | ||
488 | } | ||
489 | |||
490 | switch (blkif->blk_protocol) { | ||
491 | case BLKIF_PROTOCOL_NATIVE: | ||
492 | memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); | ||
493 | break; | ||
494 | case BLKIF_PROTOCOL_X86_32: | ||
495 | blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); | ||
496 | break; | ||
497 | case BLKIF_PROTOCOL_X86_64: | ||
498 | blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); | ||
499 | break; | ||
500 | default: | ||
501 | BUG(); | ||
502 | } | ||
503 | blk_rings->common.req_cons = ++rc; /* before make_response() */ | ||
504 | |||
505 | /* Apply all sanity checks to /private copy/ of request. */ | ||
506 | barrier(); | ||
507 | |||
508 | if (dispatch_rw_block_io(blkif, &req, pending_req)) | ||
509 | break; | ||
510 | |||
511 | /* Yield point for this unbounded loop. */ | ||
512 | cond_resched(); | ||
513 | } | ||
514 | |||
515 | return more_to_do; | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * Transmutation of the 'struct blkif_request' to a proper 'struct bio' | ||
520 | * and call the 'submit_bio' to pass it to the underlying storage. | ||
521 | */ | ||
522 | static int dispatch_rw_block_io(struct xen_blkif *blkif, | ||
523 | struct blkif_request *req, | ||
524 | struct pending_req *pending_req) | ||
525 | { | ||
526 | struct phys_req preq; | ||
527 | struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
528 | unsigned int nseg; | ||
529 | struct bio *bio = NULL; | ||
530 | struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
531 | int i, nbio = 0; | ||
532 | int operation; | ||
533 | struct blk_plug plug; | ||
534 | |||
535 | switch (req->operation) { | ||
536 | case BLKIF_OP_READ: | ||
537 | blkif->st_rd_req++; | ||
538 | operation = READ; | ||
539 | break; | ||
540 | case BLKIF_OP_WRITE: | ||
541 | blkif->st_wr_req++; | ||
542 | operation = WRITE_ODIRECT; | ||
543 | break; | ||
544 | case BLKIF_OP_FLUSH_DISKCACHE: | ||
545 | blkif->st_f_req++; | ||
546 | operation = WRITE_FLUSH; | ||
547 | break; | ||
548 | case BLKIF_OP_WRITE_BARRIER: | ||
549 | default: | ||
550 | operation = 0; /* make gcc happy */ | ||
551 | goto fail_response; | ||
552 | break; | ||
553 | } | ||
554 | |||
555 | /* Check that the number of segments is sane. */ | ||
556 | nseg = req->nr_segments; | ||
557 | if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || | ||
558 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { | ||
559 | pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", | ||
560 | nseg); | ||
561 | /* Haven't submitted any bio's yet. */ | ||
562 | goto fail_response; | ||
563 | } | ||
564 | |||
565 | preq.dev = req->handle; | ||
566 | preq.sector_number = req->u.rw.sector_number; | ||
567 | preq.nr_sects = 0; | ||
568 | |||
569 | pending_req->blkif = blkif; | ||
570 | pending_req->id = req->id; | ||
571 | pending_req->operation = req->operation; | ||
572 | pending_req->status = BLKIF_RSP_OKAY; | ||
573 | pending_req->nr_pages = nseg; | ||
574 | |||
575 | for (i = 0; i < nseg; i++) { | ||
576 | seg[i].nsec = req->u.rw.seg[i].last_sect - | ||
577 | req->u.rw.seg[i].first_sect + 1; | ||
578 | if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || | ||
579 | (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) | ||
580 | goto fail_response; | ||
581 | preq.nr_sects += seg[i].nsec; | ||
582 | |||
583 | } | ||
584 | |||
585 | if (xen_vbd_translate(&preq, blkif, operation) != 0) { | ||
586 | pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", | ||
587 | operation == READ ? "read" : "write", | ||
588 | preq.sector_number, | ||
589 | preq.sector_number + preq.nr_sects, preq.dev); | ||
590 | goto fail_response; | ||
591 | } | ||
592 | |||
593 | /* | ||
594 | * This check _MUST_ be done after xen_vbd_translate as the preq.bdev | ||
595 | * is set there. | ||
596 | */ | ||
597 | for (i = 0; i < nseg; i++) { | ||
598 | if (((int)preq.sector_number|(int)seg[i].nsec) & | ||
599 | ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { | ||
600 | pr_debug(DRV_PFX "Misaligned I/O request from domain %d", | ||
601 | blkif->domid); | ||
602 | goto fail_response; | ||
603 | } | ||
604 | } | ||
605 | |||
606 | /* | ||
607 | * If we have failed at this point, we need to undo the M2P override, | ||
608 | * set gnttab_set_unmap_op on all of the grant references and perform | ||
609 | * the hypercall to unmap the grants - that is all done in | ||
610 | * xen_blkbk_unmap. | ||
611 | */ | ||
612 | if (xen_blkbk_map(req, pending_req, seg)) | ||
613 | goto fail_flush; | ||
614 | |||
615 | /* This corresponding xen_blkif_put is done in __end_block_io_op */ | ||
616 | xen_blkif_get(blkif); | ||
617 | |||
618 | for (i = 0; i < nseg; i++) { | ||
619 | while ((bio == NULL) || | ||
620 | (bio_add_page(bio, | ||
621 | blkbk->pending_page(pending_req, i), | ||
622 | seg[i].nsec << 9, | ||
623 | seg[i].buf & ~PAGE_MASK) == 0)) { | ||
624 | |||
625 | bio = bio_alloc(GFP_KERNEL, nseg-i); | ||
626 | if (unlikely(bio == NULL)) | ||
627 | goto fail_put_bio; | ||
628 | |||
629 | biolist[nbio++] = bio; | ||
630 | bio->bi_bdev = preq.bdev; | ||
631 | bio->bi_private = pending_req; | ||
632 | bio->bi_end_io = end_block_io_op; | ||
633 | bio->bi_sector = preq.sector_number; | ||
634 | } | ||
635 | |||
636 | preq.sector_number += seg[i].nsec; | ||
637 | } | ||
638 | |||
639 | /* This will be hit if the operation was a flush. */ | ||
640 | if (!bio) { | ||
641 | BUG_ON(operation != WRITE_FLUSH); | ||
642 | |||
643 | bio = bio_alloc(GFP_KERNEL, 0); | ||
644 | if (unlikely(bio == NULL)) | ||
645 | goto fail_put_bio; | ||
646 | |||
647 | biolist[nbio++] = bio; | ||
648 | bio->bi_bdev = preq.bdev; | ||
649 | bio->bi_private = pending_req; | ||
650 | bio->bi_end_io = end_block_io_op; | ||
651 | } | ||
652 | |||
653 | /* | ||
654 | * We set it one so that the last submit_bio does not have to call | ||
655 | * atomic_inc. | ||
656 | */ | ||
657 | atomic_set(&pending_req->pendcnt, nbio); | ||
658 | |||
659 | /* Get a reference count for the disk queue and start sending I/O */ | ||
660 | blk_start_plug(&plug); | ||
661 | |||
662 | for (i = 0; i < nbio; i++) | ||
663 | submit_bio(operation, biolist[i]); | ||
664 | |||
665 | /* Let the I/Os go.. */ | ||
666 | blk_finish_plug(&plug); | ||
667 | |||
668 | if (operation == READ) | ||
669 | blkif->st_rd_sect += preq.nr_sects; | ||
670 | else if (operation == WRITE || operation == WRITE_FLUSH) | ||
671 | blkif->st_wr_sect += preq.nr_sects; | ||
672 | |||
673 | return 0; | ||
674 | |||
675 | fail_flush: | ||
676 | xen_blkbk_unmap(pending_req); | ||
677 | fail_response: | ||
678 | /* Haven't submitted any bio's yet. */ | ||
679 | make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); | ||
680 | free_req(pending_req); | ||
681 | msleep(1); /* back off a bit */ | ||
682 | return -EIO; | ||
683 | |||
684 | fail_put_bio: | ||
685 | for (i = 0; i < nbio; i++) | ||
686 | bio_put(biolist[i]); | ||
687 | __end_block_io_op(pending_req, -EINVAL); | ||
688 | msleep(1); /* back off a bit */ | ||
689 | return -EIO; | ||
690 | } | ||
691 | |||
692 | |||
693 | |||
694 | /* | ||
695 | * Put a response on the ring on how the operation fared. | ||
696 | */ | ||
697 | static void make_response(struct xen_blkif *blkif, u64 id, | ||
698 | unsigned short op, int st) | ||
699 | { | ||
700 | struct blkif_response resp; | ||
701 | unsigned long flags; | ||
702 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | ||
703 | int more_to_do = 0; | ||
704 | int notify; | ||
705 | |||
706 | resp.id = id; | ||
707 | resp.operation = op; | ||
708 | resp.status = st; | ||
709 | |||
710 | spin_lock_irqsave(&blkif->blk_ring_lock, flags); | ||
711 | /* Place on the response ring for the relevant domain. */ | ||
712 | switch (blkif->blk_protocol) { | ||
713 | case BLKIF_PROTOCOL_NATIVE: | ||
714 | memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), | ||
715 | &resp, sizeof(resp)); | ||
716 | break; | ||
717 | case BLKIF_PROTOCOL_X86_32: | ||
718 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), | ||
719 | &resp, sizeof(resp)); | ||
720 | break; | ||
721 | case BLKIF_PROTOCOL_X86_64: | ||
722 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), | ||
723 | &resp, sizeof(resp)); | ||
724 | break; | ||
725 | default: | ||
726 | BUG(); | ||
727 | } | ||
728 | blk_rings->common.rsp_prod_pvt++; | ||
729 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); | ||
730 | if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { | ||
731 | /* | ||
732 | * Tail check for pending requests. Allows frontend to avoid | ||
733 | * notifications if requests are already in flight (lower | ||
734 | * overheads and promotes batching). | ||
735 | */ | ||
736 | RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); | ||
737 | |||
738 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { | ||
739 | more_to_do = 1; | ||
740 | } | ||
741 | |||
742 | spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); | ||
743 | |||
744 | if (more_to_do) | ||
745 | blkif_notify_work(blkif); | ||
746 | if (notify) | ||
747 | notify_remote_via_irq(blkif->irq); | ||
748 | } | ||
749 | |||
750 | static int __init xen_blkif_init(void) | ||
751 | { | ||
752 | int i, mmap_pages; | ||
753 | int rc = 0; | ||
754 | |||
755 | if (!xen_pv_domain()) | ||
756 | return -ENODEV; | ||
757 | |||
758 | blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); | ||
759 | if (!blkbk) { | ||
760 | pr_alert(DRV_PFX "%s: out of memory!\n", __func__); | ||
761 | return -ENOMEM; | ||
762 | } | ||
763 | |||
764 | mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
765 | |||
766 | blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * | ||
767 | xen_blkif_reqs, GFP_KERNEL); | ||
768 | blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * | ||
769 | mmap_pages, GFP_KERNEL); | ||
770 | blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * | ||
771 | mmap_pages, GFP_KERNEL); | ||
772 | |||
773 | if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || | ||
774 | !blkbk->pending_pages) { | ||
775 | rc = -ENOMEM; | ||
776 | goto out_of_memory; | ||
777 | } | ||
778 | |||
779 | for (i = 0; i < mmap_pages; i++) { | ||
780 | blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; | ||
781 | blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); | ||
782 | if (blkbk->pending_pages[i] == NULL) { | ||
783 | rc = -ENOMEM; | ||
784 | goto out_of_memory; | ||
785 | } | ||
786 | } | ||
787 | rc = xen_blkif_interface_init(); | ||
788 | if (rc) | ||
789 | goto failed_init; | ||
790 | |||
791 | memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); | ||
792 | |||
793 | INIT_LIST_HEAD(&blkbk->pending_free); | ||
794 | spin_lock_init(&blkbk->pending_free_lock); | ||
795 | init_waitqueue_head(&blkbk->pending_free_wq); | ||
796 | |||
797 | for (i = 0; i < xen_blkif_reqs; i++) | ||
798 | list_add_tail(&blkbk->pending_reqs[i].free_list, | ||
799 | &blkbk->pending_free); | ||
800 | |||
801 | rc = xen_blkif_xenbus_init(); | ||
802 | if (rc) | ||
803 | goto failed_init; | ||
804 | |||
805 | return 0; | ||
806 | |||
807 | out_of_memory: | ||
808 | pr_alert(DRV_PFX "%s: out of memory\n", __func__); | ||
809 | failed_init: | ||
810 | kfree(blkbk->pending_reqs); | ||
811 | kfree(blkbk->pending_grant_handles); | ||
812 | for (i = 0; i < mmap_pages; i++) { | ||
813 | if (blkbk->pending_pages[i]) | ||
814 | __free_page(blkbk->pending_pages[i]); | ||
815 | } | ||
816 | kfree(blkbk->pending_pages); | ||
817 | kfree(blkbk); | ||
818 | blkbk = NULL; | ||
819 | return rc; | ||
820 | } | ||
821 | |||
822 | module_init(xen_blkif_init); | ||
823 | |||
824 | MODULE_LICENSE("Dual BSD/GPL"); | ||
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h new file mode 100644 index 000000000000..9e40b283a468 --- /dev/null +++ b/drivers/block/xen-blkback/common.h | |||
@@ -0,0 +1,233 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License version 2 | ||
4 | * as published by the Free Software Foundation; or, when distributed | ||
5 | * separately from the Linux kernel or incorporated into other | ||
6 | * software packages, subject to the following license: | ||
7 | * | ||
8 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
9 | * of this source file (the "Software"), to deal in the Software without | ||
10 | * restriction, including without limitation the rights to use, copy, modify, | ||
11 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
12 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
13 | * the following conditions: | ||
14 | * | ||
15 | * The above copyright notice and this permission notice shall be included in | ||
16 | * all copies or substantial portions of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
24 | * IN THE SOFTWARE. | ||
25 | */ | ||
26 | |||
27 | #ifndef __XEN_BLKIF__BACKEND__COMMON_H__ | ||
28 | #define __XEN_BLKIF__BACKEND__COMMON_H__ | ||
29 | |||
30 | #include <linux/version.h> | ||
31 | #include <linux/module.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/slab.h> | ||
34 | #include <linux/blkdev.h> | ||
35 | #include <linux/vmalloc.h> | ||
36 | #include <linux/wait.h> | ||
37 | #include <linux/io.h> | ||
38 | #include <asm/setup.h> | ||
39 | #include <asm/pgalloc.h> | ||
40 | #include <asm/hypervisor.h> | ||
41 | #include <xen/grant_table.h> | ||
42 | #include <xen/xenbus.h> | ||
43 | #include <xen/interface/io/ring.h> | ||
44 | #include <xen/interface/io/blkif.h> | ||
45 | #include <xen/interface/io/protocols.h> | ||
46 | |||
47 | #define DRV_PFX "xen-blkback:" | ||
48 | #define DPRINTK(fmt, args...) \ | ||
49 | pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ | ||
50 | __func__, __LINE__, ##args) | ||
51 | |||
52 | |||
53 | /* Not a real protocol. Used to generate ring structs which contain | ||
54 | * the elements common to all protocols only. This way we get a | ||
55 | * compiler-checkable way to use common struct elements, so we can | ||
56 | * avoid using switch(protocol) in a number of places. */ | ||
57 | struct blkif_common_request { | ||
58 | char dummy; | ||
59 | }; | ||
60 | struct blkif_common_response { | ||
61 | char dummy; | ||
62 | }; | ||
63 | |||
64 | /* i386 protocol version */ | ||
65 | #pragma pack(push, 4) | ||
66 | struct blkif_x86_32_request { | ||
67 | uint8_t operation; /* BLKIF_OP_??? */ | ||
68 | uint8_t nr_segments; /* number of segments */ | ||
69 | blkif_vdev_t handle; /* only for read/write requests */ | ||
70 | uint64_t id; /* private guest value, echoed in resp */ | ||
71 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | ||
72 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
73 | }; | ||
74 | struct blkif_x86_32_response { | ||
75 | uint64_t id; /* copied from request */ | ||
76 | uint8_t operation; /* copied from request */ | ||
77 | int16_t status; /* BLKIF_RSP_??? */ | ||
78 | }; | ||
79 | #pragma pack(pop) | ||
80 | |||
81 | /* x86_64 protocol version */ | ||
82 | struct blkif_x86_64_request { | ||
83 | uint8_t operation; /* BLKIF_OP_??? */ | ||
84 | uint8_t nr_segments; /* number of segments */ | ||
85 | blkif_vdev_t handle; /* only for read/write requests */ | ||
86 | uint64_t __attribute__((__aligned__(8))) id; | ||
87 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | ||
88 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
89 | }; | ||
90 | struct blkif_x86_64_response { | ||
91 | uint64_t __attribute__((__aligned__(8))) id; | ||
92 | uint8_t operation; /* copied from request */ | ||
93 | int16_t status; /* BLKIF_RSP_??? */ | ||
94 | }; | ||
95 | |||
96 | DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, | ||
97 | struct blkif_common_response); | ||
98 | DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, | ||
99 | struct blkif_x86_32_response); | ||
100 | DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, | ||
101 | struct blkif_x86_64_response); | ||
102 | |||
103 | union blkif_back_rings { | ||
104 | struct blkif_back_ring native; | ||
105 | struct blkif_common_back_ring common; | ||
106 | struct blkif_x86_32_back_ring x86_32; | ||
107 | struct blkif_x86_64_back_ring x86_64; | ||
108 | }; | ||
109 | |||
110 | enum blkif_protocol { | ||
111 | BLKIF_PROTOCOL_NATIVE = 1, | ||
112 | BLKIF_PROTOCOL_X86_32 = 2, | ||
113 | BLKIF_PROTOCOL_X86_64 = 3, | ||
114 | }; | ||
115 | |||
116 | struct xen_vbd { | ||
117 | /* What the domain refers to this vbd as. */ | ||
118 | blkif_vdev_t handle; | ||
119 | /* Non-zero -> read-only */ | ||
120 | unsigned char readonly; | ||
121 | /* VDISK_xxx */ | ||
122 | unsigned char type; | ||
123 | /* phys device that this vbd maps to. */ | ||
124 | u32 pdevice; | ||
125 | struct block_device *bdev; | ||
126 | /* Cached size parameter. */ | ||
127 | sector_t size; | ||
128 | bool flush_support; | ||
129 | }; | ||
130 | |||
131 | struct backend_info; | ||
132 | |||
133 | struct xen_blkif { | ||
134 | /* Unique identifier for this interface. */ | ||
135 | domid_t domid; | ||
136 | unsigned int handle; | ||
137 | /* Physical parameters of the comms window. */ | ||
138 | unsigned int irq; | ||
139 | /* Comms information. */ | ||
140 | enum blkif_protocol blk_protocol; | ||
141 | union blkif_back_rings blk_rings; | ||
142 | struct vm_struct *blk_ring_area; | ||
143 | /* The VBD attached to this interface. */ | ||
144 | struct xen_vbd vbd; | ||
145 | /* Back pointer to the backend_info. */ | ||
146 | struct backend_info *be; | ||
147 | /* Private fields. */ | ||
148 | spinlock_t blk_ring_lock; | ||
149 | atomic_t refcnt; | ||
150 | |||
151 | wait_queue_head_t wq; | ||
152 | /* One thread per one blkif. */ | ||
153 | struct task_struct *xenblkd; | ||
154 | unsigned int waiting_reqs; | ||
155 | |||
156 | /* statistics */ | ||
157 | unsigned long st_print; | ||
158 | int st_rd_req; | ||
159 | int st_wr_req; | ||
160 | int st_oo_req; | ||
161 | int st_f_req; | ||
162 | int st_rd_sect; | ||
163 | int st_wr_sect; | ||
164 | |||
165 | wait_queue_head_t waiting_to_free; | ||
166 | |||
167 | grant_handle_t shmem_handle; | ||
168 | grant_ref_t shmem_ref; | ||
169 | }; | ||
170 | |||
171 | |||
172 | #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ | ||
173 | (_v)->bdev->bd_part->nr_sects : \ | ||
174 | get_capacity((_v)->bdev->bd_disk)) | ||
175 | |||
176 | #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) | ||
177 | #define xen_blkif_put(_b) \ | ||
178 | do { \ | ||
179 | if (atomic_dec_and_test(&(_b)->refcnt)) \ | ||
180 | wake_up(&(_b)->waiting_to_free);\ | ||
181 | } while (0) | ||
182 | |||
183 | struct phys_req { | ||
184 | unsigned short dev; | ||
185 | unsigned short nr_sects; | ||
186 | struct block_device *bdev; | ||
187 | blkif_sector_t sector_number; | ||
188 | }; | ||
189 | int xen_blkif_interface_init(void); | ||
190 | |||
191 | int xen_blkif_xenbus_init(void); | ||
192 | |||
193 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id); | ||
194 | int xen_blkif_schedule(void *arg); | ||
195 | |||
196 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | ||
197 | struct backend_info *be, int state); | ||
198 | |||
199 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); | ||
200 | |||
201 | static inline void blkif_get_x86_32_req(struct blkif_request *dst, | ||
202 | struct blkif_x86_32_request *src) | ||
203 | { | ||
204 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
205 | dst->operation = src->operation; | ||
206 | dst->nr_segments = src->nr_segments; | ||
207 | dst->handle = src->handle; | ||
208 | dst->id = src->id; | ||
209 | dst->u.rw.sector_number = src->sector_number; | ||
210 | barrier(); | ||
211 | if (n > dst->nr_segments) | ||
212 | n = dst->nr_segments; | ||
213 | for (i = 0; i < n; i++) | ||
214 | dst->u.rw.seg[i] = src->seg[i]; | ||
215 | } | ||
216 | |||
217 | static inline void blkif_get_x86_64_req(struct blkif_request *dst, | ||
218 | struct blkif_x86_64_request *src) | ||
219 | { | ||
220 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
221 | dst->operation = src->operation; | ||
222 | dst->nr_segments = src->nr_segments; | ||
223 | dst->handle = src->handle; | ||
224 | dst->id = src->id; | ||
225 | dst->u.rw.sector_number = src->sector_number; | ||
226 | barrier(); | ||
227 | if (n > dst->nr_segments) | ||
228 | n = dst->nr_segments; | ||
229 | for (i = 0; i < n; i++) | ||
230 | dst->u.rw.seg[i] = src->seg[i]; | ||
231 | } | ||
232 | |||
233 | #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ | ||
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c new file mode 100644 index 000000000000..34570823355b --- /dev/null +++ b/drivers/block/xen-blkback/xenbus.c | |||
@@ -0,0 +1,768 @@ | |||
1 | /* Xenbus code for blkif backend | ||
2 | Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> | ||
3 | Copyright (C) 2005 XenSource Ltd | ||
4 | |||
5 | This program is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published by | ||
7 | the Free Software Foundation; either version 2 of the License, or | ||
8 | (at your option) any later version. | ||
9 | |||
10 | This program is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | GNU General Public License for more details. | ||
14 | |||
15 | */ | ||
16 | |||
17 | #include <stdarg.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/kthread.h> | ||
20 | #include <xen/events.h> | ||
21 | #include <xen/grant_table.h> | ||
22 | #include "common.h" | ||
23 | |||
24 | struct backend_info { | ||
25 | struct xenbus_device *dev; | ||
26 | struct xen_blkif *blkif; | ||
27 | struct xenbus_watch backend_watch; | ||
28 | unsigned major; | ||
29 | unsigned minor; | ||
30 | char *mode; | ||
31 | }; | ||
32 | |||
33 | static struct kmem_cache *xen_blkif_cachep; | ||
34 | static void connect(struct backend_info *); | ||
35 | static int connect_ring(struct backend_info *); | ||
36 | static void backend_changed(struct xenbus_watch *, const char **, | ||
37 | unsigned int); | ||
38 | |||
39 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) | ||
40 | { | ||
41 | return be->dev; | ||
42 | } | ||
43 | |||
44 | static int blkback_name(struct xen_blkif *blkif, char *buf) | ||
45 | { | ||
46 | char *devpath, *devname; | ||
47 | struct xenbus_device *dev = blkif->be->dev; | ||
48 | |||
49 | devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); | ||
50 | if (IS_ERR(devpath)) | ||
51 | return PTR_ERR(devpath); | ||
52 | |||
53 | devname = strstr(devpath, "/dev/"); | ||
54 | if (devname != NULL) | ||
55 | devname += strlen("/dev/"); | ||
56 | else | ||
57 | devname = devpath; | ||
58 | |||
59 | snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); | ||
60 | kfree(devpath); | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static void xen_update_blkif_status(struct xen_blkif *blkif) | ||
66 | { | ||
67 | int err; | ||
68 | char name[TASK_COMM_LEN]; | ||
69 | |||
70 | /* Not ready to connect? */ | ||
71 | if (!blkif->irq || !blkif->vbd.bdev) | ||
72 | return; | ||
73 | |||
74 | /* Already connected? */ | ||
75 | if (blkif->be->dev->state == XenbusStateConnected) | ||
76 | return; | ||
77 | |||
78 | /* Attempt to connect: exit if we fail to. */ | ||
79 | connect(blkif->be); | ||
80 | if (blkif->be->dev->state != XenbusStateConnected) | ||
81 | return; | ||
82 | |||
83 | err = blkback_name(blkif, name); | ||
84 | if (err) { | ||
85 | xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); | ||
86 | return; | ||
87 | } | ||
88 | |||
89 | err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); | ||
90 | if (err) { | ||
91 | xenbus_dev_error(blkif->be->dev, err, "block flush"); | ||
92 | return; | ||
93 | } | ||
94 | invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); | ||
95 | |||
96 | blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name); | ||
97 | if (IS_ERR(blkif->xenblkd)) { | ||
98 | err = PTR_ERR(blkif->xenblkd); | ||
99 | blkif->xenblkd = NULL; | ||
100 | xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | static struct xen_blkif *xen_blkif_alloc(domid_t domid) | ||
105 | { | ||
106 | struct xen_blkif *blkif; | ||
107 | |||
108 | blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); | ||
109 | if (!blkif) | ||
110 | return ERR_PTR(-ENOMEM); | ||
111 | |||
112 | memset(blkif, 0, sizeof(*blkif)); | ||
113 | blkif->domid = domid; | ||
114 | spin_lock_init(&blkif->blk_ring_lock); | ||
115 | atomic_set(&blkif->refcnt, 1); | ||
116 | init_waitqueue_head(&blkif->wq); | ||
117 | blkif->st_print = jiffies; | ||
118 | init_waitqueue_head(&blkif->waiting_to_free); | ||
119 | |||
120 | return blkif; | ||
121 | } | ||
122 | |||
123 | static int map_frontend_page(struct xen_blkif *blkif, unsigned long shared_page) | ||
124 | { | ||
125 | struct gnttab_map_grant_ref op; | ||
126 | |||
127 | gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, | ||
128 | GNTMAP_host_map, shared_page, blkif->domid); | ||
129 | |||
130 | if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | ||
131 | BUG(); | ||
132 | |||
133 | if (op.status) { | ||
134 | DPRINTK("Grant table operation failure !\n"); | ||
135 | return op.status; | ||
136 | } | ||
137 | |||
138 | blkif->shmem_ref = shared_page; | ||
139 | blkif->shmem_handle = op.handle; | ||
140 | |||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | static void unmap_frontend_page(struct xen_blkif *blkif) | ||
145 | { | ||
146 | struct gnttab_unmap_grant_ref op; | ||
147 | |||
148 | gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, | ||
149 | GNTMAP_host_map, blkif->shmem_handle); | ||
150 | |||
151 | if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | ||
152 | BUG(); | ||
153 | } | ||
154 | |||
155 | static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, | ||
156 | unsigned int evtchn) | ||
157 | { | ||
158 | int err; | ||
159 | |||
160 | /* Already connected through? */ | ||
161 | if (blkif->irq) | ||
162 | return 0; | ||
163 | |||
164 | blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); | ||
165 | if (!blkif->blk_ring_area) | ||
166 | return -ENOMEM; | ||
167 | |||
168 | err = map_frontend_page(blkif, shared_page); | ||
169 | if (err) { | ||
170 | free_vm_area(blkif->blk_ring_area); | ||
171 | return err; | ||
172 | } | ||
173 | |||
174 | switch (blkif->blk_protocol) { | ||
175 | case BLKIF_PROTOCOL_NATIVE: | ||
176 | { | ||
177 | struct blkif_sring *sring; | ||
178 | sring = (struct blkif_sring *)blkif->blk_ring_area->addr; | ||
179 | BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); | ||
180 | break; | ||
181 | } | ||
182 | case BLKIF_PROTOCOL_X86_32: | ||
183 | { | ||
184 | struct blkif_x86_32_sring *sring_x86_32; | ||
185 | sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; | ||
186 | BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); | ||
187 | break; | ||
188 | } | ||
189 | case BLKIF_PROTOCOL_X86_64: | ||
190 | { | ||
191 | struct blkif_x86_64_sring *sring_x86_64; | ||
192 | sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; | ||
193 | BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); | ||
194 | break; | ||
195 | } | ||
196 | default: | ||
197 | BUG(); | ||
198 | } | ||
199 | |||
200 | err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, | ||
201 | xen_blkif_be_int, 0, | ||
202 | "blkif-backend", blkif); | ||
203 | if (err < 0) { | ||
204 | unmap_frontend_page(blkif); | ||
205 | free_vm_area(blkif->blk_ring_area); | ||
206 | blkif->blk_rings.common.sring = NULL; | ||
207 | return err; | ||
208 | } | ||
209 | blkif->irq = err; | ||
210 | |||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | static void xen_blkif_disconnect(struct xen_blkif *blkif) | ||
215 | { | ||
216 | if (blkif->xenblkd) { | ||
217 | kthread_stop(blkif->xenblkd); | ||
218 | blkif->xenblkd = NULL; | ||
219 | } | ||
220 | |||
221 | atomic_dec(&blkif->refcnt); | ||
222 | wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); | ||
223 | atomic_inc(&blkif->refcnt); | ||
224 | |||
225 | if (blkif->irq) { | ||
226 | unbind_from_irqhandler(blkif->irq, blkif); | ||
227 | blkif->irq = 0; | ||
228 | } | ||
229 | |||
230 | if (blkif->blk_rings.common.sring) { | ||
231 | unmap_frontend_page(blkif); | ||
232 | free_vm_area(blkif->blk_ring_area); | ||
233 | blkif->blk_rings.common.sring = NULL; | ||
234 | } | ||
235 | } | ||
236 | |||
237 | void xen_blkif_free(struct xen_blkif *blkif) | ||
238 | { | ||
239 | if (!atomic_dec_and_test(&blkif->refcnt)) | ||
240 | BUG(); | ||
241 | kmem_cache_free(xen_blkif_cachep, blkif); | ||
242 | } | ||
243 | |||
244 | int __init xen_blkif_interface_init(void) | ||
245 | { | ||
246 | xen_blkif_cachep = kmem_cache_create("blkif_cache", | ||
247 | sizeof(struct xen_blkif), | ||
248 | 0, 0, NULL); | ||
249 | if (!xen_blkif_cachep) | ||
250 | return -ENOMEM; | ||
251 | |||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * sysfs interface for VBD I/O requests | ||
257 | */ | ||
258 | |||
259 | #define VBD_SHOW(name, format, args...) \ | ||
260 | static ssize_t show_##name(struct device *_dev, \ | ||
261 | struct device_attribute *attr, \ | ||
262 | char *buf) \ | ||
263 | { \ | ||
264 | struct xenbus_device *dev = to_xenbus_device(_dev); \ | ||
265 | struct backend_info *be = dev_get_drvdata(&dev->dev); \ | ||
266 | \ | ||
267 | return sprintf(buf, format, ##args); \ | ||
268 | } \ | ||
269 | static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) | ||
270 | |||
271 | VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); | ||
272 | VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); | ||
273 | VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); | ||
274 | VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); | ||
275 | VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); | ||
276 | VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); | ||
277 | |||
278 | static struct attribute *xen_vbdstat_attrs[] = { | ||
279 | &dev_attr_oo_req.attr, | ||
280 | &dev_attr_rd_req.attr, | ||
281 | &dev_attr_wr_req.attr, | ||
282 | &dev_attr_f_req.attr, | ||
283 | &dev_attr_rd_sect.attr, | ||
284 | &dev_attr_wr_sect.attr, | ||
285 | NULL | ||
286 | }; | ||
287 | |||
288 | static struct attribute_group xen_vbdstat_group = { | ||
289 | .name = "statistics", | ||
290 | .attrs = xen_vbdstat_attrs, | ||
291 | }; | ||
292 | |||
293 | VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); | ||
294 | VBD_SHOW(mode, "%s\n", be->mode); | ||
295 | |||
296 | int xenvbd_sysfs_addif(struct xenbus_device *dev) | ||
297 | { | ||
298 | int error; | ||
299 | |||
300 | error = device_create_file(&dev->dev, &dev_attr_physical_device); | ||
301 | if (error) | ||
302 | goto fail1; | ||
303 | |||
304 | error = device_create_file(&dev->dev, &dev_attr_mode); | ||
305 | if (error) | ||
306 | goto fail2; | ||
307 | |||
308 | error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group); | ||
309 | if (error) | ||
310 | goto fail3; | ||
311 | |||
312 | return 0; | ||
313 | |||
314 | fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); | ||
315 | fail2: device_remove_file(&dev->dev, &dev_attr_mode); | ||
316 | fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); | ||
317 | return error; | ||
318 | } | ||
319 | |||
320 | void xenvbd_sysfs_delif(struct xenbus_device *dev) | ||
321 | { | ||
322 | sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); | ||
323 | device_remove_file(&dev->dev, &dev_attr_mode); | ||
324 | device_remove_file(&dev->dev, &dev_attr_physical_device); | ||
325 | } | ||
326 | |||
327 | |||
328 | static void xen_vbd_free(struct xen_vbd *vbd) | ||
329 | { | ||
330 | if (vbd->bdev) | ||
331 | blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); | ||
332 | vbd->bdev = NULL; | ||
333 | } | ||
334 | |||
335 | static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, | ||
336 | unsigned major, unsigned minor, int readonly, | ||
337 | int cdrom) | ||
338 | { | ||
339 | struct xen_vbd *vbd; | ||
340 | struct block_device *bdev; | ||
341 | struct request_queue *q; | ||
342 | |||
343 | vbd = &blkif->vbd; | ||
344 | vbd->handle = handle; | ||
345 | vbd->readonly = readonly; | ||
346 | vbd->type = 0; | ||
347 | |||
348 | vbd->pdevice = MKDEV(major, minor); | ||
349 | |||
350 | bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? | ||
351 | FMODE_READ : FMODE_WRITE, NULL); | ||
352 | |||
353 | if (IS_ERR(bdev)) { | ||
354 | DPRINTK("xen_vbd_create: device %08x could not be opened.\n", | ||
355 | vbd->pdevice); | ||
356 | return -ENOENT; | ||
357 | } | ||
358 | |||
359 | vbd->bdev = bdev; | ||
360 | vbd->size = vbd_sz(vbd); | ||
361 | |||
362 | if (vbd->bdev->bd_disk == NULL) { | ||
363 | DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", | ||
364 | vbd->pdevice); | ||
365 | xen_vbd_free(vbd); | ||
366 | return -ENOENT; | ||
367 | } | ||
368 | |||
369 | if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) | ||
370 | vbd->type |= VDISK_CDROM; | ||
371 | if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) | ||
372 | vbd->type |= VDISK_REMOVABLE; | ||
373 | |||
374 | q = bdev_get_queue(bdev); | ||
375 | if (q && q->flush_flags) | ||
376 | vbd->flush_support = true; | ||
377 | |||
378 | DPRINTK("Successful creation of handle=%04x (dom=%u)\n", | ||
379 | handle, blkif->domid); | ||
380 | return 0; | ||
381 | } | ||
382 | static int xen_blkbk_remove(struct xenbus_device *dev) | ||
383 | { | ||
384 | struct backend_info *be = dev_get_drvdata(&dev->dev); | ||
385 | |||
386 | DPRINTK(""); | ||
387 | |||
388 | if (be->major || be->minor) | ||
389 | xenvbd_sysfs_delif(dev); | ||
390 | |||
391 | if (be->backend_watch.node) { | ||
392 | unregister_xenbus_watch(&be->backend_watch); | ||
393 | kfree(be->backend_watch.node); | ||
394 | be->backend_watch.node = NULL; | ||
395 | } | ||
396 | |||
397 | if (be->blkif) { | ||
398 | xen_blkif_disconnect(be->blkif); | ||
399 | xen_vbd_free(&be->blkif->vbd); | ||
400 | xen_blkif_free(be->blkif); | ||
401 | be->blkif = NULL; | ||
402 | } | ||
403 | |||
404 | kfree(be); | ||
405 | dev_set_drvdata(&dev->dev, NULL); | ||
406 | return 0; | ||
407 | } | ||
408 | |||
409 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | ||
410 | struct backend_info *be, int state) | ||
411 | { | ||
412 | struct xenbus_device *dev = be->dev; | ||
413 | int err; | ||
414 | |||
415 | err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", | ||
416 | "%d", state); | ||
417 | if (err) | ||
418 | xenbus_dev_fatal(dev, err, "writing feature-flush-cache"); | ||
419 | |||
420 | return err; | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * Entry point to this code when a new device is created. Allocate the basic | ||
425 | * structures, and watch the store waiting for the hotplug scripts to tell us | ||
426 | * the device's physical major and minor numbers. Switch to InitWait. | ||
427 | */ | ||
428 | static int xen_blkbk_probe(struct xenbus_device *dev, | ||
429 | const struct xenbus_device_id *id) | ||
430 | { | ||
431 | int err; | ||
432 | struct backend_info *be = kzalloc(sizeof(struct backend_info), | ||
433 | GFP_KERNEL); | ||
434 | if (!be) { | ||
435 | xenbus_dev_fatal(dev, -ENOMEM, | ||
436 | "allocating backend structure"); | ||
437 | return -ENOMEM; | ||
438 | } | ||
439 | be->dev = dev; | ||
440 | dev_set_drvdata(&dev->dev, be); | ||
441 | |||
442 | be->blkif = xen_blkif_alloc(dev->otherend_id); | ||
443 | if (IS_ERR(be->blkif)) { | ||
444 | err = PTR_ERR(be->blkif); | ||
445 | be->blkif = NULL; | ||
446 | xenbus_dev_fatal(dev, err, "creating block interface"); | ||
447 | goto fail; | ||
448 | } | ||
449 | |||
450 | /* setup back pointer */ | ||
451 | be->blkif->be = be; | ||
452 | |||
453 | err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, | ||
454 | "%s/%s", dev->nodename, "physical-device"); | ||
455 | if (err) | ||
456 | goto fail; | ||
457 | |||
458 | err = xenbus_switch_state(dev, XenbusStateInitWait); | ||
459 | if (err) | ||
460 | goto fail; | ||
461 | |||
462 | return 0; | ||
463 | |||
464 | fail: | ||
465 | DPRINTK("failed"); | ||
466 | xen_blkbk_remove(dev); | ||
467 | return err; | ||
468 | } | ||
469 | |||
470 | |||
471 | /* | ||
472 | * Callback received when the hotplug scripts have placed the physical-device | ||
473 | * node. Read it and the mode node, and create a vbd. If the frontend is | ||
474 | * ready, connect. | ||
475 | */ | ||
476 | static void backend_changed(struct xenbus_watch *watch, | ||
477 | const char **vec, unsigned int len) | ||
478 | { | ||
479 | int err; | ||
480 | unsigned major; | ||
481 | unsigned minor; | ||
482 | struct backend_info *be | ||
483 | = container_of(watch, struct backend_info, backend_watch); | ||
484 | struct xenbus_device *dev = be->dev; | ||
485 | int cdrom = 0; | ||
486 | char *device_type; | ||
487 | |||
488 | DPRINTK(""); | ||
489 | |||
490 | err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", | ||
491 | &major, &minor); | ||
492 | if (XENBUS_EXIST_ERR(err)) { | ||
493 | /* | ||
494 | * Since this watch will fire once immediately after it is | ||
495 | * registered, we expect this. Ignore it, and wait for the | ||
496 | * hotplug scripts. | ||
497 | */ | ||
498 | return; | ||
499 | } | ||
500 | if (err != 2) { | ||
501 | xenbus_dev_fatal(dev, err, "reading physical-device"); | ||
502 | return; | ||
503 | } | ||
504 | |||
505 | if ((be->major || be->minor) && | ||
506 | ((be->major != major) || (be->minor != minor))) { | ||
507 | pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", | ||
508 | be->major, be->minor, major, minor); | ||
509 | return; | ||
510 | } | ||
511 | |||
512 | be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); | ||
513 | if (IS_ERR(be->mode)) { | ||
514 | err = PTR_ERR(be->mode); | ||
515 | be->mode = NULL; | ||
516 | xenbus_dev_fatal(dev, err, "reading mode"); | ||
517 | return; | ||
518 | } | ||
519 | |||
520 | device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); | ||
521 | if (!IS_ERR(device_type)) { | ||
522 | cdrom = strcmp(device_type, "cdrom") == 0; | ||
523 | kfree(device_type); | ||
524 | } | ||
525 | |||
526 | if (be->major == 0 && be->minor == 0) { | ||
527 | /* Front end dir is a number, which is used as the handle. */ | ||
528 | |||
529 | char *p = strrchr(dev->otherend, '/') + 1; | ||
530 | long handle; | ||
531 | err = strict_strtoul(p, 0, &handle); | ||
532 | if (err) | ||
533 | return; | ||
534 | |||
535 | be->major = major; | ||
536 | be->minor = minor; | ||
537 | |||
538 | err = xen_vbd_create(be->blkif, handle, major, minor, | ||
539 | (NULL == strchr(be->mode, 'w')), cdrom); | ||
540 | if (err) { | ||
541 | be->major = 0; | ||
542 | be->minor = 0; | ||
543 | xenbus_dev_fatal(dev, err, "creating vbd structure"); | ||
544 | return; | ||
545 | } | ||
546 | |||
547 | err = xenvbd_sysfs_addif(dev); | ||
548 | if (err) { | ||
549 | xen_vbd_free(&be->blkif->vbd); | ||
550 | be->major = 0; | ||
551 | be->minor = 0; | ||
552 | xenbus_dev_fatal(dev, err, "creating sysfs entries"); | ||
553 | return; | ||
554 | } | ||
555 | |||
556 | /* We're potentially connected now */ | ||
557 | xen_update_blkif_status(be->blkif); | ||
558 | } | ||
559 | } | ||
560 | |||
561 | |||
562 | /* | ||
563 | * Callback received when the frontend's state changes. | ||
564 | */ | ||
565 | static void frontend_changed(struct xenbus_device *dev, | ||
566 | enum xenbus_state frontend_state) | ||
567 | { | ||
568 | struct backend_info *be = dev_get_drvdata(&dev->dev); | ||
569 | int err; | ||
570 | |||
571 | DPRINTK("%s", xenbus_strstate(frontend_state)); | ||
572 | |||
573 | switch (frontend_state) { | ||
574 | case XenbusStateInitialising: | ||
575 | if (dev->state == XenbusStateClosed) { | ||
576 | pr_info(DRV_PFX "%s: prepare for reconnect\n", | ||
577 | dev->nodename); | ||
578 | xenbus_switch_state(dev, XenbusStateInitWait); | ||
579 | } | ||
580 | break; | ||
581 | |||
582 | case XenbusStateInitialised: | ||
583 | case XenbusStateConnected: | ||
584 | /* | ||
585 | * Ensure we connect even when two watches fire in | ||
586 | * close successsion and we miss the intermediate value | ||
587 | * of frontend_state. | ||
588 | */ | ||
589 | if (dev->state == XenbusStateConnected) | ||
590 | break; | ||
591 | |||
592 | /* | ||
593 | * Enforce precondition before potential leak point. | ||
594 | * blkif_disconnect() is idempotent. | ||
595 | */ | ||
596 | xen_blkif_disconnect(be->blkif); | ||
597 | |||
598 | err = connect_ring(be); | ||
599 | if (err) | ||
600 | break; | ||
601 | xen_update_blkif_status(be->blkif); | ||
602 | break; | ||
603 | |||
604 | case XenbusStateClosing: | ||
605 | xen_blkif_disconnect(be->blkif); | ||
606 | xenbus_switch_state(dev, XenbusStateClosing); | ||
607 | break; | ||
608 | |||
609 | case XenbusStateClosed: | ||
610 | xenbus_switch_state(dev, XenbusStateClosed); | ||
611 | if (xenbus_dev_is_online(dev)) | ||
612 | break; | ||
613 | /* fall through if not online */ | ||
614 | case XenbusStateUnknown: | ||
615 | /* implies blkif_disconnect() via blkback_remove() */ | ||
616 | device_unregister(&dev->dev); | ||
617 | break; | ||
618 | |||
619 | default: | ||
620 | xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", | ||
621 | frontend_state); | ||
622 | break; | ||
623 | } | ||
624 | } | ||
625 | |||
626 | |||
627 | /* ** Connection ** */ | ||
628 | |||
629 | |||
630 | /* | ||
631 | * Write the physical details regarding the block device to the store, and | ||
632 | * switch to Connected state. | ||
633 | */ | ||
634 | static void connect(struct backend_info *be) | ||
635 | { | ||
636 | struct xenbus_transaction xbt; | ||
637 | int err; | ||
638 | struct xenbus_device *dev = be->dev; | ||
639 | |||
640 | DPRINTK("%s", dev->otherend); | ||
641 | |||
642 | /* Supply the information about the device the frontend needs */ | ||
643 | again: | ||
644 | err = xenbus_transaction_start(&xbt); | ||
645 | if (err) { | ||
646 | xenbus_dev_fatal(dev, err, "starting transaction"); | ||
647 | return; | ||
648 | } | ||
649 | |||
650 | err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); | ||
651 | if (err) | ||
652 | goto abort; | ||
653 | |||
654 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | ||
655 | (unsigned long long)vbd_sz(&be->blkif->vbd)); | ||
656 | if (err) { | ||
657 | xenbus_dev_fatal(dev, err, "writing %s/sectors", | ||
658 | dev->nodename); | ||
659 | goto abort; | ||
660 | } | ||
661 | |||
662 | /* FIXME: use a typename instead */ | ||
663 | err = xenbus_printf(xbt, dev->nodename, "info", "%u", | ||
664 | be->blkif->vbd.type | | ||
665 | (be->blkif->vbd.readonly ? VDISK_READONLY : 0)); | ||
666 | if (err) { | ||
667 | xenbus_dev_fatal(dev, err, "writing %s/info", | ||
668 | dev->nodename); | ||
669 | goto abort; | ||
670 | } | ||
671 | err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", | ||
672 | (unsigned long) | ||
673 | bdev_logical_block_size(be->blkif->vbd.bdev)); | ||
674 | if (err) { | ||
675 | xenbus_dev_fatal(dev, err, "writing %s/sector-size", | ||
676 | dev->nodename); | ||
677 | goto abort; | ||
678 | } | ||
679 | |||
680 | err = xenbus_transaction_end(xbt, 0); | ||
681 | if (err == -EAGAIN) | ||
682 | goto again; | ||
683 | if (err) | ||
684 | xenbus_dev_fatal(dev, err, "ending transaction"); | ||
685 | |||
686 | err = xenbus_switch_state(dev, XenbusStateConnected); | ||
687 | if (err) | ||
688 | xenbus_dev_fatal(dev, err, "switching to Connected state", | ||
689 | dev->nodename); | ||
690 | |||
691 | return; | ||
692 | abort: | ||
693 | xenbus_transaction_end(xbt, 1); | ||
694 | } | ||
695 | |||
696 | |||
697 | static int connect_ring(struct backend_info *be) | ||
698 | { | ||
699 | struct xenbus_device *dev = be->dev; | ||
700 | unsigned long ring_ref; | ||
701 | unsigned int evtchn; | ||
702 | char protocol[64] = ""; | ||
703 | int err; | ||
704 | |||
705 | DPRINTK("%s", dev->otherend); | ||
706 | |||
707 | err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", | ||
708 | &ring_ref, "event-channel", "%u", &evtchn, NULL); | ||
709 | if (err) { | ||
710 | xenbus_dev_fatal(dev, err, | ||
711 | "reading %s/ring-ref and event-channel", | ||
712 | dev->otherend); | ||
713 | return err; | ||
714 | } | ||
715 | |||
716 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | ||
717 | err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", | ||
718 | "%63s", protocol, NULL); | ||
719 | if (err) | ||
720 | strcpy(protocol, "unspecified, assuming native"); | ||
721 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) | ||
722 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | ||
723 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) | ||
724 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; | ||
725 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) | ||
726 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; | ||
727 | else { | ||
728 | xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); | ||
729 | return -1; | ||
730 | } | ||
731 | pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", | ||
732 | ring_ref, evtchn, be->blkif->blk_protocol, protocol); | ||
733 | |||
734 | /* Map the shared frame, irq etc. */ | ||
735 | err = xen_blkif_map(be->blkif, ring_ref, evtchn); | ||
736 | if (err) { | ||
737 | xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", | ||
738 | ring_ref, evtchn); | ||
739 | return err; | ||
740 | } | ||
741 | |||
742 | return 0; | ||
743 | } | ||
744 | |||
745 | |||
746 | /* ** Driver Registration ** */ | ||
747 | |||
748 | |||
749 | static const struct xenbus_device_id xen_blkbk_ids[] = { | ||
750 | { "vbd" }, | ||
751 | { "" } | ||
752 | }; | ||
753 | |||
754 | |||
755 | static struct xenbus_driver xen_blkbk = { | ||
756 | .name = "vbd", | ||
757 | .owner = THIS_MODULE, | ||
758 | .ids = xen_blkbk_ids, | ||
759 | .probe = xen_blkbk_probe, | ||
760 | .remove = xen_blkbk_remove, | ||
761 | .otherend_changed = frontend_changed | ||
762 | }; | ||
763 | |||
764 | |||
765 | int xen_blkif_xenbus_init(void) | ||
766 | { | ||
767 | return xenbus_register_backend(&xen_blkbk); | ||
768 | } | ||