diff options
author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2011-04-18 14:24:23 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2011-04-18 14:30:26 -0400 |
commit | dfc07b13dcacefda6ebdea14584ed8724dc980ef (patch) | |
tree | b4074d80a781146d6577048c237a4502f8fafb9d /drivers/block/xen-blkback | |
parent | d2436eda2e81f1993bfe6349f17f52503bffeff5 (diff) |
xen/blkback: Move it from drivers/xen to drivers/block
.. and modify the Makefile and Kconfig files appropriately.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'drivers/block/xen-blkback')
-rw-r--r-- | drivers/block/xen-blkback/Makefile | 3 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 759 | ||||
-rw-r--r-- | drivers/block/xen-blkback/common.h | 142 | ||||
-rw-r--r-- | drivers/block/xen-blkback/interface.c | 185 | ||||
-rw-r--r-- | drivers/block/xen-blkback/vbd.c | 162 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 562 |
6 files changed, 1813 insertions, 0 deletions
diff --git a/drivers/block/xen-blkback/Makefile b/drivers/block/xen-blkback/Makefile new file mode 100644 index 000000000000..f1ae1ff07a4d --- /dev/null +++ b/drivers/block/xen-blkback/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o | ||
2 | |||
3 | xen-blkback-y := blkback.o xenbus.o interface.o vbd.o | ||
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c new file mode 100644 index 000000000000..59a2bae0f35e --- /dev/null +++ b/drivers/block/xen-blkback/blkback.c | |||
@@ -0,0 +1,759 @@ | |||
1 | /****************************************************************************** | ||
2 | * | ||
3 | * Back-end of the driver for virtual block devices. This portion of the | ||
4 | * driver exports a 'unified' block-device interface that can be accessed | ||
5 | * by any operating system that implements a compatible front end. A | ||
6 | * reference front-end implementation can be found in: | ||
7 | * drivers/block/xen-blkfront.c | ||
8 | * | ||
9 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | ||
10 | * Copyright (c) 2005, Christopher Clark | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License version 2 | ||
14 | * as published by the Free Software Foundation; or, when distributed | ||
15 | * separately from the Linux kernel or incorporated into other | ||
16 | * software packages, subject to the following license: | ||
17 | * | ||
18 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
19 | * of this source file (the "Software"), to deal in the Software without | ||
20 | * restriction, including without limitation the rights to use, copy, modify, | ||
21 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
22 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
23 | * the following conditions: | ||
24 | * | ||
25 | * The above copyright notice and this permission notice shall be included in | ||
26 | * all copies or substantial portions of the Software. | ||
27 | * | ||
28 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
29 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
30 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
31 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
32 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
33 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
34 | * IN THE SOFTWARE. | ||
35 | */ | ||
36 | |||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/kthread.h> | ||
39 | #include <linux/list.h> | ||
40 | #include <linux/delay.h> | ||
41 | #include <linux/freezer.h> | ||
42 | |||
43 | #include <xen/events.h> | ||
44 | #include <xen/page.h> | ||
45 | #include <asm/xen/hypervisor.h> | ||
46 | #include <asm/xen/hypercall.h> | ||
47 | #include "common.h" | ||
48 | |||
49 | #define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) | ||
50 | |||
51 | /* | ||
52 | * These are rather arbitrary. They are fairly large because adjacent requests | ||
53 | * pulled from a communication ring are quite likely to end up being part of | ||
54 | * the same scatter/gather request at the disc. | ||
55 | * | ||
56 | * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** | ||
57 | * | ||
58 | * This will increase the chances of being able to write whole tracks. | ||
59 | * 64 should be enough to keep us competitive with Linux. | ||
60 | */ | ||
61 | static int blkif_reqs = 64; | ||
62 | module_param_named(reqs, blkif_reqs, int, 0); | ||
63 | MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); | ||
64 | |||
65 | /* Run-time switchable: /sys/module/blkback/parameters/ */ | ||
66 | static unsigned int log_stats; | ||
67 | static unsigned int debug_lvl; | ||
68 | module_param(log_stats, int, 0644); | ||
69 | module_param(debug_lvl, int, 0644); | ||
70 | |||
71 | /* | ||
72 | * Each outstanding request that we've passed to the lower device layers has a | ||
73 | * 'pending_req' allocated to it. Each buffer_head that completes decrements | ||
74 | * the pendcnt towards zero. When it hits zero, the specified domain has a | ||
75 | * response queued for it, with the saved 'id' passed back. | ||
76 | */ | ||
77 | struct pending_req { | ||
78 | struct blkif_st *blkif; | ||
79 | u64 id; | ||
80 | int nr_pages; | ||
81 | atomic_t pendcnt; | ||
82 | unsigned short operation; | ||
83 | int status; | ||
84 | struct list_head free_list; | ||
85 | }; | ||
86 | |||
87 | #define BLKBACK_INVALID_HANDLE (~0) | ||
88 | |||
89 | struct xen_blkbk { | ||
90 | struct pending_req *pending_reqs; | ||
91 | /* List of all 'pending_req' available */ | ||
92 | struct list_head pending_free; | ||
93 | /* And its spinlock. */ | ||
94 | spinlock_t pending_free_lock; | ||
95 | wait_queue_head_t pending_free_wq; | ||
96 | /* The list of all pages that are available. */ | ||
97 | struct page **pending_pages; | ||
98 | /* And the grant handles that are available. */ | ||
99 | grant_handle_t *pending_grant_handles; | ||
100 | }; | ||
101 | |||
102 | static struct xen_blkbk *blkbk; | ||
103 | |||
104 | /* | ||
105 | * Little helpful macro to figure out the index and virtual address of the | ||
106 | * pending_pages[..]. For each 'pending_req' we have have up to | ||
107 | * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through | ||
108 | * 10 and would index in the pending_pages[..]. */ | ||
109 | static inline int vaddr_pagenr(struct pending_req *req, int seg) | ||
110 | { | ||
111 | return (req - blkbk->pending_reqs) * | ||
112 | BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; | ||
113 | } | ||
114 | |||
115 | #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] | ||
116 | |||
117 | static inline unsigned long vaddr(struct pending_req *req, int seg) | ||
118 | { | ||
119 | unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); | ||
120 | return (unsigned long)pfn_to_kaddr(pfn); | ||
121 | } | ||
122 | |||
123 | #define pending_handle(_req, _seg) \ | ||
124 | (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) | ||
125 | |||
126 | |||
127 | static int do_block_io_op(struct blkif_st *blkif); | ||
128 | static void dispatch_rw_block_io(struct blkif_st *blkif, | ||
129 | struct blkif_request *req, | ||
130 | struct pending_req *pending_req); | ||
131 | static void make_response(struct blkif_st *blkif, u64 id, | ||
132 | unsigned short op, int st); | ||
133 | |||
134 | /* | ||
135 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. | ||
136 | */ | ||
137 | static struct pending_req *alloc_req(void) | ||
138 | { | ||
139 | struct pending_req *req = NULL; | ||
140 | unsigned long flags; | ||
141 | |||
142 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | ||
143 | if (!list_empty(&blkbk->pending_free)) { | ||
144 | req = list_entry(blkbk->pending_free.next, struct pending_req, | ||
145 | free_list); | ||
146 | list_del(&req->free_list); | ||
147 | } | ||
148 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | ||
149 | return req; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Return the 'pending_req' structure back to the freepool. We also | ||
154 | * wake up the thread if it was waiting for a free page. | ||
155 | */ | ||
156 | static void free_req(struct pending_req *req) | ||
157 | { | ||
158 | unsigned long flags; | ||
159 | int was_empty; | ||
160 | |||
161 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | ||
162 | was_empty = list_empty(&blkbk->pending_free); | ||
163 | list_add(&req->free_list, &blkbk->pending_free); | ||
164 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | ||
165 | if (was_empty) | ||
166 | wake_up(&blkbk->pending_free_wq); | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Notification from the guest OS. | ||
171 | */ | ||
172 | static void blkif_notify_work(struct blkif_st *blkif) | ||
173 | { | ||
174 | blkif->waiting_reqs = 1; | ||
175 | wake_up(&blkif->wq); | ||
176 | } | ||
177 | |||
178 | irqreturn_t blkif_be_int(int irq, void *dev_id) | ||
179 | { | ||
180 | blkif_notify_work(dev_id); | ||
181 | return IRQ_HANDLED; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * SCHEDULER FUNCTIONS | ||
186 | */ | ||
187 | |||
188 | static void print_stats(struct blkif_st *blkif) | ||
189 | { | ||
190 | printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", | ||
191 | current->comm, blkif->st_oo_req, | ||
192 | blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); | ||
193 | blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); | ||
194 | blkif->st_rd_req = 0; | ||
195 | blkif->st_wr_req = 0; | ||
196 | blkif->st_oo_req = 0; | ||
197 | } | ||
198 | |||
199 | int blkif_schedule(void *arg) | ||
200 | { | ||
201 | struct blkif_st *blkif = arg; | ||
202 | struct vbd *vbd = &blkif->vbd; | ||
203 | |||
204 | blkif_get(blkif); | ||
205 | |||
206 | if (debug_lvl) | ||
207 | printk(KERN_DEBUG "%s: started\n", current->comm); | ||
208 | |||
209 | while (!kthread_should_stop()) { | ||
210 | if (try_to_freeze()) | ||
211 | continue; | ||
212 | if (unlikely(vbd->size != vbd_size(vbd))) | ||
213 | vbd_resize(blkif); | ||
214 | |||
215 | wait_event_interruptible( | ||
216 | blkif->wq, | ||
217 | blkif->waiting_reqs || kthread_should_stop()); | ||
218 | wait_event_interruptible( | ||
219 | blkbk->pending_free_wq, | ||
220 | !list_empty(&blkbk->pending_free) || | ||
221 | kthread_should_stop()); | ||
222 | |||
223 | blkif->waiting_reqs = 0; | ||
224 | smp_mb(); /* clear flag *before* checking for work */ | ||
225 | |||
226 | if (do_block_io_op(blkif)) | ||
227 | blkif->waiting_reqs = 1; | ||
228 | |||
229 | if (log_stats && time_after(jiffies, blkif->st_print)) | ||
230 | print_stats(blkif); | ||
231 | } | ||
232 | |||
233 | if (log_stats) | ||
234 | print_stats(blkif); | ||
235 | if (debug_lvl) | ||
236 | printk(KERN_DEBUG "%s: exiting\n", current->comm); | ||
237 | |||
238 | blkif->xenblkd = NULL; | ||
239 | blkif_put(blkif); | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | struct seg_buf { | ||
245 | unsigned long buf; | ||
246 | unsigned int nsec; | ||
247 | }; | ||
248 | /* | ||
249 | * Unmap the grant references, and also remove the M2P over-rides | ||
250 | * used in the 'pending_req'. | ||
251 | */ | ||
252 | static void xen_blkbk_unmap(struct pending_req *req) | ||
253 | { | ||
254 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
255 | unsigned int i, invcount = 0; | ||
256 | grant_handle_t handle; | ||
257 | int ret; | ||
258 | |||
259 | for (i = 0; i < req->nr_pages; i++) { | ||
260 | handle = pending_handle(req, i); | ||
261 | if (handle == BLKBACK_INVALID_HANDLE) | ||
262 | continue; | ||
263 | gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), | ||
264 | GNTMAP_host_map, handle); | ||
265 | pending_handle(req, i) = BLKBACK_INVALID_HANDLE; | ||
266 | invcount++; | ||
267 | } | ||
268 | |||
269 | ret = HYPERVISOR_grant_table_op( | ||
270 | GNTTABOP_unmap_grant_ref, unmap, invcount); | ||
271 | BUG_ON(ret); | ||
272 | /* Note, we use invcount, so nr->pages, so we can't index | ||
273 | * using vaddr(req, i). | ||
274 | */ | ||
275 | for (i = 0; i < invcount; i++) { | ||
276 | ret = m2p_remove_override( | ||
277 | virt_to_page(unmap[i].host_addr), false); | ||
278 | if (ret) { | ||
279 | printk(KERN_ALERT "Failed to remove M2P override for " \ | ||
280 | "%lx\n", (unsigned long)unmap[i].host_addr); | ||
281 | continue; | ||
282 | } | ||
283 | } | ||
284 | } | ||
285 | static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, | ||
286 | struct seg_buf seg[]) | ||
287 | { | ||
288 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
289 | int i; | ||
290 | int nseg = req->nr_segments; | ||
291 | int ret = 0; | ||
292 | /* Fill out preq.nr_sects with proper amount of sectors, and setup | ||
293 | * assign map[..] with the PFN of the page in our domain with the | ||
294 | * corresponding grant reference for each page. | ||
295 | */ | ||
296 | for (i = 0; i < nseg; i++) { | ||
297 | uint32_t flags; | ||
298 | |||
299 | flags = GNTMAP_host_map; | ||
300 | if (pending_req->operation != BLKIF_OP_READ) | ||
301 | flags |= GNTMAP_readonly; | ||
302 | gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, | ||
303 | req->u.rw.seg[i].gref, pending_req->blkif->domid); | ||
304 | } | ||
305 | |||
306 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); | ||
307 | BUG_ON(ret); | ||
308 | |||
309 | /* Now swizzel the MFN in our domain with the MFN from the other domain | ||
310 | * so that when we access vaddr(pending_req,i) it has the contents of | ||
311 | * the page from the other domain. | ||
312 | */ | ||
313 | for (i = 0; i < nseg; i++) { | ||
314 | if (unlikely(map[i].status != 0)) { | ||
315 | DPRINTK("invalid buffer -- could not remap it\n"); | ||
316 | map[i].handle = BLKBACK_INVALID_HANDLE; | ||
317 | ret |= 1; | ||
318 | } | ||
319 | |||
320 | pending_handle(pending_req, i) = map[i].handle; | ||
321 | |||
322 | if (ret) | ||
323 | continue; | ||
324 | |||
325 | ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), | ||
326 | blkbk->pending_page(pending_req, i), false); | ||
327 | if (ret) { | ||
328 | printk(KERN_ALERT "Failed to install M2P override for"\ | ||
329 | " %lx (ret: %d)\n", (unsigned long) | ||
330 | map[i].dev_bus_addr, ret); | ||
331 | /* We could switch over to GNTTABOP_copy */ | ||
332 | continue; | ||
333 | } | ||
334 | |||
335 | seg[i].buf = map[i].dev_bus_addr | | ||
336 | (req->u.rw.seg[i].first_sect << 9); | ||
337 | } | ||
338 | return ret; | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * Completion callback on the bio's. Called as bh->b_end_io() | ||
343 | */ | ||
344 | |||
345 | static void __end_block_io_op(struct pending_req *pending_req, int error) | ||
346 | { | ||
347 | /* An error fails the entire request. */ | ||
348 | if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && | ||
349 | (error == -EOPNOTSUPP)) { | ||
350 | DPRINTK("blkback: write barrier op failed, not supported\n"); | ||
351 | blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); | ||
352 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | ||
353 | } else if (error) { | ||
354 | DPRINTK("Buffer not up-to-date at end of operation, " | ||
355 | "error=%d\n", error); | ||
356 | pending_req->status = BLKIF_RSP_ERROR; | ||
357 | } | ||
358 | |||
359 | /* If all of the bio's have completed it is time to unmap | ||
360 | * the grant references associated with 'request' and provide | ||
361 | * the proper response on the ring. | ||
362 | */ | ||
363 | if (atomic_dec_and_test(&pending_req->pendcnt)) { | ||
364 | xen_blkbk_unmap(pending_req); | ||
365 | make_response(pending_req->blkif, pending_req->id, | ||
366 | pending_req->operation, pending_req->status); | ||
367 | blkif_put(pending_req->blkif); | ||
368 | free_req(pending_req); | ||
369 | } | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * bio callback. | ||
374 | */ | ||
375 | static void end_block_io_op(struct bio *bio, int error) | ||
376 | { | ||
377 | __end_block_io_op(bio->bi_private, error); | ||
378 | bio_put(bio); | ||
379 | } | ||
380 | |||
381 | |||
382 | |||
383 | /* | ||
384 | * Function to copy the from the ring buffer the 'struct blkif_request' | ||
385 | * (which has the sectors we want, number of them, grant references, etc), | ||
386 | * and transmute it to the block API to hand it over to the proper block disk. | ||
387 | */ | ||
388 | static int do_block_io_op(struct blkif_st *blkif) | ||
389 | { | ||
390 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | ||
391 | struct blkif_request req; | ||
392 | struct pending_req *pending_req; | ||
393 | RING_IDX rc, rp; | ||
394 | int more_to_do = 0; | ||
395 | |||
396 | rc = blk_rings->common.req_cons; | ||
397 | rp = blk_rings->common.sring->req_prod; | ||
398 | rmb(); /* Ensure we see queued requests up to 'rp'. */ | ||
399 | |||
400 | while (rc != rp) { | ||
401 | |||
402 | if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) | ||
403 | break; | ||
404 | |||
405 | if (kthread_should_stop()) { | ||
406 | more_to_do = 1; | ||
407 | break; | ||
408 | } | ||
409 | |||
410 | pending_req = alloc_req(); | ||
411 | if (NULL == pending_req) { | ||
412 | blkif->st_oo_req++; | ||
413 | more_to_do = 1; | ||
414 | break; | ||
415 | } | ||
416 | |||
417 | switch (blkif->blk_protocol) { | ||
418 | case BLKIF_PROTOCOL_NATIVE: | ||
419 | memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); | ||
420 | break; | ||
421 | case BLKIF_PROTOCOL_X86_32: | ||
422 | blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); | ||
423 | break; | ||
424 | case BLKIF_PROTOCOL_X86_64: | ||
425 | blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); | ||
426 | break; | ||
427 | default: | ||
428 | BUG(); | ||
429 | } | ||
430 | blk_rings->common.req_cons = ++rc; /* before make_response() */ | ||
431 | |||
432 | /* Apply all sanity checks to /private copy/ of request. */ | ||
433 | barrier(); | ||
434 | |||
435 | switch (req.operation) { | ||
436 | case BLKIF_OP_READ: | ||
437 | blkif->st_rd_req++; | ||
438 | dispatch_rw_block_io(blkif, &req, pending_req); | ||
439 | break; | ||
440 | case BLKIF_OP_WRITE_BARRIER: | ||
441 | blkif->st_br_req++; | ||
442 | /* fall through */ | ||
443 | case BLKIF_OP_WRITE: | ||
444 | blkif->st_wr_req++; | ||
445 | dispatch_rw_block_io(blkif, &req, pending_req); | ||
446 | break; | ||
447 | default: | ||
448 | /* A good sign something is wrong: sleep for a while to | ||
449 | * avoid excessive CPU consumption by a bad guest. */ | ||
450 | msleep(1); | ||
451 | DPRINTK("error: unknown block io operation [%d]\n", | ||
452 | req.operation); | ||
453 | make_response(blkif, req.id, req.operation, | ||
454 | BLKIF_RSP_ERROR); | ||
455 | free_req(pending_req); | ||
456 | break; | ||
457 | } | ||
458 | |||
459 | /* Yield point for this unbounded loop. */ | ||
460 | cond_resched(); | ||
461 | } | ||
462 | |||
463 | return more_to_do; | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * Transumation of the 'struct blkif_request' to a proper 'struct bio' | ||
468 | * and call the 'submit_bio' to pass it to the underlaying storage. | ||
469 | */ | ||
470 | static void dispatch_rw_block_io(struct blkif_st *blkif, | ||
471 | struct blkif_request *req, | ||
472 | struct pending_req *pending_req) | ||
473 | { | ||
474 | struct phys_req preq; | ||
475 | struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
476 | unsigned int nseg; | ||
477 | struct bio *bio = NULL; | ||
478 | struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
479 | int i, nbio = 0; | ||
480 | int operation; | ||
481 | struct blk_plug plug; | ||
482 | |||
483 | switch (req->operation) { | ||
484 | case BLKIF_OP_READ: | ||
485 | operation = READ; | ||
486 | break; | ||
487 | case BLKIF_OP_WRITE: | ||
488 | operation = WRITE; | ||
489 | break; | ||
490 | case BLKIF_OP_WRITE_BARRIER: | ||
491 | operation = WRITE_BARRIER; | ||
492 | break; | ||
493 | default: | ||
494 | operation = 0; /* make gcc happy */ | ||
495 | BUG(); | ||
496 | } | ||
497 | |||
498 | /* Check that the number of segments is sane. */ | ||
499 | nseg = req->nr_segments; | ||
500 | if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || | ||
501 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { | ||
502 | DPRINTK("Bad number of segments in request (%d)\n", nseg); | ||
503 | /* Haven't submitted any bio's yet. */ | ||
504 | goto fail_response; | ||
505 | } | ||
506 | |||
507 | preq.dev = req->handle; | ||
508 | preq.sector_number = req->u.rw.sector_number; | ||
509 | preq.nr_sects = 0; | ||
510 | |||
511 | pending_req->blkif = blkif; | ||
512 | pending_req->id = req->id; | ||
513 | pending_req->operation = req->operation; | ||
514 | pending_req->status = BLKIF_RSP_OKAY; | ||
515 | pending_req->nr_pages = nseg; | ||
516 | |||
517 | for (i = 0; i < nseg; i++) { | ||
518 | seg[i].nsec = req->u.rw.seg[i].last_sect - | ||
519 | req->u.rw.seg[i].first_sect + 1; | ||
520 | if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || | ||
521 | (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) | ||
522 | goto fail_response; | ||
523 | preq.nr_sects += seg[i].nsec; | ||
524 | |||
525 | } | ||
526 | |||
527 | if (vbd_translate(&preq, blkif, operation) != 0) { | ||
528 | DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", | ||
529 | operation == READ ? "read" : "write", | ||
530 | preq.sector_number, | ||
531 | preq.sector_number + preq.nr_sects, preq.dev); | ||
532 | goto fail_response; | ||
533 | } | ||
534 | /* This check _MUST_ be done after vbd_translate as the preq.bdev | ||
535 | * is set there. */ | ||
536 | for (i = 0; i < nseg; i++) { | ||
537 | if (((int)preq.sector_number|(int)seg[i].nsec) & | ||
538 | ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { | ||
539 | DPRINTK("Misaligned I/O request from domain %d", | ||
540 | blkif->domid); | ||
541 | goto fail_response; | ||
542 | } | ||
543 | } | ||
544 | /* If we have failed at this point, we need to undo the M2P override, | ||
545 | * set gnttab_set_unmap_op on all of the grant references and perform | ||
546 | * the hypercall to unmap the grants - that is all done in | ||
547 | * xen_blkbk_unmap. | ||
548 | */ | ||
549 | if (xen_blkbk_map(req, pending_req, seg)) | ||
550 | goto fail_flush; | ||
551 | |||
552 | /* This corresponding blkif_put is done in __end_block_io_op */ | ||
553 | blkif_get(blkif); | ||
554 | |||
555 | for (i = 0; i < nseg; i++) { | ||
556 | while ((bio == NULL) || | ||
557 | (bio_add_page(bio, | ||
558 | blkbk->pending_page(pending_req, i), | ||
559 | seg[i].nsec << 9, | ||
560 | seg[i].buf & ~PAGE_MASK) == 0)) { | ||
561 | |||
562 | bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); | ||
563 | if (unlikely(bio == NULL)) | ||
564 | goto fail_put_bio; | ||
565 | |||
566 | bio->bi_bdev = preq.bdev; | ||
567 | bio->bi_private = pending_req; | ||
568 | bio->bi_end_io = end_block_io_op; | ||
569 | bio->bi_sector = preq.sector_number; | ||
570 | } | ||
571 | |||
572 | preq.sector_number += seg[i].nsec; | ||
573 | } | ||
574 | |||
575 | /* This will be hit if the operation was a barrier. */ | ||
576 | if (!bio) { | ||
577 | BUG_ON(operation != WRITE_BARRIER); | ||
578 | bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); | ||
579 | if (unlikely(bio == NULL)) | ||
580 | goto fail_put_bio; | ||
581 | |||
582 | bio->bi_bdev = preq.bdev; | ||
583 | bio->bi_private = pending_req; | ||
584 | bio->bi_end_io = end_block_io_op; | ||
585 | bio->bi_sector = -1; | ||
586 | } | ||
587 | |||
588 | |||
589 | /* We set it one so that the last submit_bio does not have to call | ||
590 | * atomic_inc. | ||
591 | */ | ||
592 | atomic_set(&pending_req->pendcnt, nbio); | ||
593 | |||
594 | /* Get a reference count for the disk queue and start sending I/O */ | ||
595 | blk_start_plug(&plug); | ||
596 | |||
597 | for (i = 0; i < nbio; i++) | ||
598 | submit_bio(operation, biolist[i]); | ||
599 | |||
600 | blk_finish_plug(&plug); | ||
601 | /* Let the I/Os go.. */ | ||
602 | |||
603 | if (operation == READ) | ||
604 | blkif->st_rd_sect += preq.nr_sects; | ||
605 | else if (operation == WRITE || operation == WRITE_BARRIER) | ||
606 | blkif->st_wr_sect += preq.nr_sects; | ||
607 | |||
608 | return; | ||
609 | |||
610 | fail_flush: | ||
611 | xen_blkbk_unmap(pending_req); | ||
612 | fail_response: | ||
613 | /* Haven't submitted any bio's yet. */ | ||
614 | make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); | ||
615 | free_req(pending_req); | ||
616 | msleep(1); /* back off a bit */ | ||
617 | return; | ||
618 | |||
619 | fail_put_bio: | ||
620 | for (i = 0; i < (nbio-1); i++) | ||
621 | bio_put(biolist[i]); | ||
622 | __end_block_io_op(pending_req, -EINVAL); | ||
623 | msleep(1); /* back off a bit */ | ||
624 | return; | ||
625 | } | ||
626 | |||
627 | |||
628 | |||
629 | /* | ||
630 | * Put a response on the ring on how the operation fared. | ||
631 | */ | ||
632 | static void make_response(struct blkif_st *blkif, u64 id, | ||
633 | unsigned short op, int st) | ||
634 | { | ||
635 | struct blkif_response resp; | ||
636 | unsigned long flags; | ||
637 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | ||
638 | int more_to_do = 0; | ||
639 | int notify; | ||
640 | |||
641 | resp.id = id; | ||
642 | resp.operation = op; | ||
643 | resp.status = st; | ||
644 | |||
645 | spin_lock_irqsave(&blkif->blk_ring_lock, flags); | ||
646 | /* Place on the response ring for the relevant domain. */ | ||
647 | switch (blkif->blk_protocol) { | ||
648 | case BLKIF_PROTOCOL_NATIVE: | ||
649 | memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), | ||
650 | &resp, sizeof(resp)); | ||
651 | break; | ||
652 | case BLKIF_PROTOCOL_X86_32: | ||
653 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), | ||
654 | &resp, sizeof(resp)); | ||
655 | break; | ||
656 | case BLKIF_PROTOCOL_X86_64: | ||
657 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), | ||
658 | &resp, sizeof(resp)); | ||
659 | break; | ||
660 | default: | ||
661 | BUG(); | ||
662 | } | ||
663 | blk_rings->common.rsp_prod_pvt++; | ||
664 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); | ||
665 | if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { | ||
666 | /* | ||
667 | * Tail check for pending requests. Allows frontend to avoid | ||
668 | * notifications if requests are already in flight (lower | ||
669 | * overheads and promotes batching). | ||
670 | */ | ||
671 | RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); | ||
672 | |||
673 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { | ||
674 | more_to_do = 1; | ||
675 | } | ||
676 | |||
677 | spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); | ||
678 | |||
679 | if (more_to_do) | ||
680 | blkif_notify_work(blkif); | ||
681 | if (notify) | ||
682 | notify_remote_via_irq(blkif->irq); | ||
683 | } | ||
684 | |||
685 | static int __init blkif_init(void) | ||
686 | { | ||
687 | int i, mmap_pages; | ||
688 | int rc = 0; | ||
689 | |||
690 | if (!xen_pv_domain()) | ||
691 | return -ENODEV; | ||
692 | |||
693 | blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); | ||
694 | if (!blkbk) { | ||
695 | printk(KERN_ALERT "%s: out of memory!\n", __func__); | ||
696 | return -ENOMEM; | ||
697 | } | ||
698 | |||
699 | mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
700 | |||
701 | blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * | ||
702 | blkif_reqs, GFP_KERNEL); | ||
703 | blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * | ||
704 | mmap_pages, GFP_KERNEL); | ||
705 | blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * | ||
706 | mmap_pages, GFP_KERNEL); | ||
707 | |||
708 | if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || | ||
709 | !blkbk->pending_pages) { | ||
710 | rc = -ENOMEM; | ||
711 | goto out_of_memory; | ||
712 | } | ||
713 | |||
714 | for (i = 0; i < mmap_pages; i++) { | ||
715 | blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; | ||
716 | blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); | ||
717 | if (blkbk->pending_pages[i] == NULL) { | ||
718 | rc = -ENOMEM; | ||
719 | goto out_of_memory; | ||
720 | } | ||
721 | } | ||
722 | rc = blkif_interface_init(); | ||
723 | if (rc) | ||
724 | goto failed_init; | ||
725 | |||
726 | memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); | ||
727 | |||
728 | INIT_LIST_HEAD(&blkbk->pending_free); | ||
729 | spin_lock_init(&blkbk->pending_free_lock); | ||
730 | init_waitqueue_head(&blkbk->pending_free_wq); | ||
731 | |||
732 | for (i = 0; i < blkif_reqs; i++) | ||
733 | list_add_tail(&blkbk->pending_reqs[i].free_list, | ||
734 | &blkbk->pending_free); | ||
735 | |||
736 | rc = blkif_xenbus_init(); | ||
737 | if (rc) | ||
738 | goto failed_init; | ||
739 | |||
740 | return 0; | ||
741 | |||
742 | out_of_memory: | ||
743 | printk(KERN_ERR "%s: out of memory\n", __func__); | ||
744 | failed_init: | ||
745 | kfree(blkbk->pending_reqs); | ||
746 | kfree(blkbk->pending_grant_handles); | ||
747 | for (i = 0; i < mmap_pages; i++) { | ||
748 | if (blkbk->pending_pages[i]) | ||
749 | __free_page(blkbk->pending_pages[i]); | ||
750 | } | ||
751 | kfree(blkbk->pending_pages); | ||
752 | kfree(blkbk); | ||
753 | blkbk = NULL; | ||
754 | return rc; | ||
755 | } | ||
756 | |||
757 | module_init(blkif_init); | ||
758 | |||
759 | MODULE_LICENSE("Dual BSD/GPL"); | ||
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h new file mode 100644 index 000000000000..6257c1106591 --- /dev/null +++ b/drivers/block/xen-blkback/common.h | |||
@@ -0,0 +1,142 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License version 2 | ||
4 | * as published by the Free Software Foundation; or, when distributed | ||
5 | * separately from the Linux kernel or incorporated into other | ||
6 | * software packages, subject to the following license: | ||
7 | * | ||
8 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
9 | * of this source file (the "Software"), to deal in the Software without | ||
10 | * restriction, including without limitation the rights to use, copy, modify, | ||
11 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
12 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
13 | * the following conditions: | ||
14 | * | ||
15 | * The above copyright notice and this permission notice shall be included in | ||
16 | * all copies or substantial portions of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
24 | * IN THE SOFTWARE. | ||
25 | */ | ||
26 | |||
27 | #ifndef __BLKIF__BACKEND__COMMON_H__ | ||
28 | #define __BLKIF__BACKEND__COMMON_H__ | ||
29 | |||
30 | #include <linux/version.h> | ||
31 | #include <linux/module.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/slab.h> | ||
34 | #include <linux/blkdev.h> | ||
35 | #include <linux/vmalloc.h> | ||
36 | #include <linux/wait.h> | ||
37 | #include <linux/io.h> | ||
38 | #include <asm/setup.h> | ||
39 | #include <asm/pgalloc.h> | ||
40 | #include <asm/hypervisor.h> | ||
41 | #include <xen/blkif.h> | ||
42 | #include <xen/grant_table.h> | ||
43 | #include <xen/xenbus.h> | ||
44 | |||
45 | #define DPRINTK(_f, _a...) \ | ||
46 | pr_debug("(file=%s, line=%d) " _f, \ | ||
47 | __FILE__ , __LINE__ , ## _a) | ||
48 | |||
49 | struct vbd { | ||
50 | blkif_vdev_t handle; /* what the domain refers to this vbd as */ | ||
51 | unsigned char readonly; /* Non-zero -> read-only */ | ||
52 | unsigned char type; /* VDISK_xxx */ | ||
53 | u32 pdevice; /* phys device that this vbd maps to */ | ||
54 | struct block_device *bdev; | ||
55 | sector_t size; /* Cached size parameter */ | ||
56 | }; | ||
57 | |||
58 | struct backend_info; | ||
59 | |||
60 | struct blkif_st { | ||
61 | /* Unique identifier for this interface. */ | ||
62 | domid_t domid; | ||
63 | unsigned int handle; | ||
64 | /* Physical parameters of the comms window. */ | ||
65 | unsigned int irq; | ||
66 | /* Comms information. */ | ||
67 | enum blkif_protocol blk_protocol; | ||
68 | union blkif_back_rings blk_rings; | ||
69 | struct vm_struct *blk_ring_area; | ||
70 | /* The VBD attached to this interface. */ | ||
71 | struct vbd vbd; | ||
72 | /* Back pointer to the backend_info. */ | ||
73 | struct backend_info *be; | ||
74 | /* Private fields. */ | ||
75 | spinlock_t blk_ring_lock; | ||
76 | atomic_t refcnt; | ||
77 | |||
78 | wait_queue_head_t wq; | ||
79 | /* One thread per one blkif. */ | ||
80 | struct task_struct *xenblkd; | ||
81 | unsigned int waiting_reqs; | ||
82 | |||
83 | /* statistics */ | ||
84 | unsigned long st_print; | ||
85 | int st_rd_req; | ||
86 | int st_wr_req; | ||
87 | int st_oo_req; | ||
88 | int st_br_req; | ||
89 | int st_rd_sect; | ||
90 | int st_wr_sect; | ||
91 | |||
92 | wait_queue_head_t waiting_to_free; | ||
93 | |||
94 | grant_handle_t shmem_handle; | ||
95 | grant_ref_t shmem_ref; | ||
96 | }; | ||
97 | |||
98 | struct blkif_st *blkif_alloc(domid_t domid); | ||
99 | void blkif_disconnect(struct blkif_st *blkif); | ||
100 | void blkif_free(struct blkif_st *blkif); | ||
101 | int blkif_map(struct blkif_st *blkif, unsigned long shared_page, | ||
102 | unsigned int evtchn); | ||
103 | void vbd_resize(struct blkif_st *blkif); | ||
104 | |||
105 | #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) | ||
106 | #define blkif_put(_b) \ | ||
107 | do { \ | ||
108 | if (atomic_dec_and_test(&(_b)->refcnt)) \ | ||
109 | wake_up(&(_b)->waiting_to_free);\ | ||
110 | } while (0) | ||
111 | |||
112 | /* Create a vbd. */ | ||
113 | int vbd_create(struct blkif_st *blkif, blkif_vdev_t vdevice, unsigned major, | ||
114 | unsigned minor, int readonly, int cdrom); | ||
115 | void vbd_free(struct vbd *vbd); | ||
116 | |||
117 | unsigned long long vbd_size(struct vbd *vbd); | ||
118 | unsigned int vbd_info(struct vbd *vbd); | ||
119 | unsigned long vbd_secsize(struct vbd *vbd); | ||
120 | |||
121 | struct phys_req { | ||
122 | unsigned short dev; | ||
123 | unsigned short nr_sects; | ||
124 | struct block_device *bdev; | ||
125 | blkif_sector_t sector_number; | ||
126 | }; | ||
127 | |||
128 | int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation); | ||
129 | |||
130 | int blkif_interface_init(void); | ||
131 | |||
132 | int blkif_xenbus_init(void); | ||
133 | |||
134 | irqreturn_t blkif_be_int(int irq, void *dev_id); | ||
135 | int blkif_schedule(void *arg); | ||
136 | |||
137 | int blkback_barrier(struct xenbus_transaction xbt, | ||
138 | struct backend_info *be, int state); | ||
139 | |||
140 | struct xenbus_device *blkback_xenbus(struct backend_info *be); | ||
141 | |||
142 | #endif /* __BLKIF__BACKEND__COMMON_H__ */ | ||
diff --git a/drivers/block/xen-blkback/interface.c b/drivers/block/xen-blkback/interface.c new file mode 100644 index 000000000000..163aed41e825 --- /dev/null +++ b/drivers/block/xen-blkback/interface.c | |||
@@ -0,0 +1,185 @@ | |||
1 | /****************************************************************************** | ||
2 | * Block-device interface management. | ||
3 | * | ||
4 | * Copyright (c) 2004, Keir Fraser | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License version 2 | ||
8 | * as published by the Free Software Foundation; or, when distributed | ||
9 | * separately from the Linux kernel or incorporated into other | ||
10 | * software packages, subject to the following license: | ||
11 | * | ||
12 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
13 | * of this source file (the "Software"), to deal in the Software without | ||
14 | * restriction, including without limitation the rights to use, copy, modify, | ||
15 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
16 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
17 | * the following conditions: | ||
18 | * | ||
19 | * The above copyright notice and this permission notice shall be included in | ||
20 | * all copies or substantial portions of the Software. | ||
21 | * | ||
22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
24 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
25 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
26 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
27 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
28 | * IN THE SOFTWARE. | ||
29 | */ | ||
30 | |||
31 | #include "common.h" | ||
32 | #include <xen/events.h> | ||
33 | #include <xen/grant_table.h> | ||
34 | #include <linux/kthread.h> | ||
35 | |||
36 | static struct kmem_cache *blkif_cachep; | ||
37 | |||
38 | struct blkif_st *blkif_alloc(domid_t domid) | ||
39 | { | ||
40 | struct blkif_st *blkif; | ||
41 | |||
42 | blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); | ||
43 | if (!blkif) | ||
44 | return ERR_PTR(-ENOMEM); | ||
45 | |||
46 | memset(blkif, 0, sizeof(*blkif)); | ||
47 | blkif->domid = domid; | ||
48 | spin_lock_init(&blkif->blk_ring_lock); | ||
49 | atomic_set(&blkif->refcnt, 1); | ||
50 | init_waitqueue_head(&blkif->wq); | ||
51 | blkif->st_print = jiffies; | ||
52 | init_waitqueue_head(&blkif->waiting_to_free); | ||
53 | |||
54 | return blkif; | ||
55 | } | ||
56 | |||
57 | static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) | ||
58 | { | ||
59 | struct gnttab_map_grant_ref op; | ||
60 | |||
61 | gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, | ||
62 | GNTMAP_host_map, shared_page, blkif->domid); | ||
63 | |||
64 | if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | ||
65 | BUG(); | ||
66 | |||
67 | if (op.status) { | ||
68 | DPRINTK(" Grant table operation failure !\n"); | ||
69 | return op.status; | ||
70 | } | ||
71 | |||
72 | blkif->shmem_ref = shared_page; | ||
73 | blkif->shmem_handle = op.handle; | ||
74 | |||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | static void unmap_frontend_page(struct blkif_st *blkif) | ||
79 | { | ||
80 | struct gnttab_unmap_grant_ref op; | ||
81 | |||
82 | gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, | ||
83 | GNTMAP_host_map, blkif->shmem_handle); | ||
84 | |||
85 | if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | ||
86 | BUG(); | ||
87 | } | ||
88 | |||
89 | int blkif_map(struct blkif_st *blkif, unsigned long shared_page, | ||
90 | unsigned int evtchn) | ||
91 | { | ||
92 | int err; | ||
93 | |||
94 | /* Already connected through? */ | ||
95 | if (blkif->irq) | ||
96 | return 0; | ||
97 | |||
98 | blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); | ||
99 | if (!blkif->blk_ring_area) | ||
100 | return -ENOMEM; | ||
101 | |||
102 | err = map_frontend_page(blkif, shared_page); | ||
103 | if (err) { | ||
104 | free_vm_area(blkif->blk_ring_area); | ||
105 | return err; | ||
106 | } | ||
107 | |||
108 | switch (blkif->blk_protocol) { | ||
109 | case BLKIF_PROTOCOL_NATIVE: | ||
110 | { | ||
111 | struct blkif_sring *sring; | ||
112 | sring = (struct blkif_sring *)blkif->blk_ring_area->addr; | ||
113 | BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); | ||
114 | break; | ||
115 | } | ||
116 | case BLKIF_PROTOCOL_X86_32: | ||
117 | { | ||
118 | struct blkif_x86_32_sring *sring_x86_32; | ||
119 | sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; | ||
120 | BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); | ||
121 | break; | ||
122 | } | ||
123 | case BLKIF_PROTOCOL_X86_64: | ||
124 | { | ||
125 | struct blkif_x86_64_sring *sring_x86_64; | ||
126 | sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; | ||
127 | BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); | ||
128 | break; | ||
129 | } | ||
130 | default: | ||
131 | BUG(); | ||
132 | } | ||
133 | |||
134 | err = bind_interdomain_evtchn_to_irqhandler( | ||
135 | blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); | ||
136 | if (err < 0) { | ||
137 | unmap_frontend_page(blkif); | ||
138 | free_vm_area(blkif->blk_ring_area); | ||
139 | blkif->blk_rings.common.sring = NULL; | ||
140 | return err; | ||
141 | } | ||
142 | blkif->irq = err; | ||
143 | |||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | void blkif_disconnect(struct blkif_st *blkif) | ||
148 | { | ||
149 | if (blkif->xenblkd) { | ||
150 | kthread_stop(blkif->xenblkd); | ||
151 | blkif->xenblkd = NULL; | ||
152 | } | ||
153 | |||
154 | atomic_dec(&blkif->refcnt); | ||
155 | wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); | ||
156 | atomic_inc(&blkif->refcnt); | ||
157 | |||
158 | if (blkif->irq) { | ||
159 | unbind_from_irqhandler(blkif->irq, blkif); | ||
160 | blkif->irq = 0; | ||
161 | } | ||
162 | |||
163 | if (blkif->blk_rings.common.sring) { | ||
164 | unmap_frontend_page(blkif); | ||
165 | free_vm_area(blkif->blk_ring_area); | ||
166 | blkif->blk_rings.common.sring = NULL; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | void blkif_free(struct blkif_st *blkif) | ||
171 | { | ||
172 | if (!atomic_dec_and_test(&blkif->refcnt)) | ||
173 | BUG(); | ||
174 | kmem_cache_free(blkif_cachep, blkif); | ||
175 | } | ||
176 | |||
177 | int __init blkif_interface_init(void) | ||
178 | { | ||
179 | blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), | ||
180 | 0, 0, NULL); | ||
181 | if (!blkif_cachep) | ||
182 | return -ENOMEM; | ||
183 | |||
184 | return 0; | ||
185 | } | ||
diff --git a/drivers/block/xen-blkback/vbd.c b/drivers/block/xen-blkback/vbd.c new file mode 100644 index 000000000000..d0ff4cf91a34 --- /dev/null +++ b/drivers/block/xen-blkback/vbd.c | |||
@@ -0,0 +1,162 @@ | |||
1 | /****************************************************************************** | ||
2 | * Routines for managing virtual block devices (VBDs). | ||
3 | * | ||
4 | * Copyright (c) 2003-2005, Keir Fraser & Steve Hand | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License version 2 | ||
8 | * as published by the Free Software Foundation; or, when distributed | ||
9 | * separately from the Linux kernel or incorporated into other | ||
10 | * software packages, subject to the following license: | ||
11 | * | ||
12 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
13 | * of this source file (the "Software"), to deal in the Software without | ||
14 | * restriction, including without limitation the rights to use, copy, modify, | ||
15 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
16 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
17 | * the following conditions: | ||
18 | * | ||
19 | * The above copyright notice and this permission notice shall be included in | ||
20 | * all copies or substantial portions of the Software. | ||
21 | * | ||
22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
24 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
25 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
26 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
27 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
28 | * IN THE SOFTWARE. | ||
29 | */ | ||
30 | |||
31 | #include "common.h" | ||
32 | |||
33 | #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ | ||
34 | (_v)->bdev->bd_part->nr_sects : \ | ||
35 | get_capacity((_v)->bdev->bd_disk)) | ||
36 | |||
37 | unsigned long long vbd_size(struct vbd *vbd) | ||
38 | { | ||
39 | return vbd_sz(vbd); | ||
40 | } | ||
41 | |||
42 | unsigned int vbd_info(struct vbd *vbd) | ||
43 | { | ||
44 | return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); | ||
45 | } | ||
46 | |||
47 | unsigned long vbd_secsize(struct vbd *vbd) | ||
48 | { | ||
49 | return bdev_logical_block_size(vbd->bdev); | ||
50 | } | ||
51 | |||
52 | int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, | ||
53 | unsigned minor, int readonly, int cdrom) | ||
54 | { | ||
55 | struct vbd *vbd; | ||
56 | struct block_device *bdev; | ||
57 | |||
58 | vbd = &blkif->vbd; | ||
59 | vbd->handle = handle; | ||
60 | vbd->readonly = readonly; | ||
61 | vbd->type = 0; | ||
62 | |||
63 | vbd->pdevice = MKDEV(major, minor); | ||
64 | |||
65 | bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? | ||
66 | FMODE_READ : FMODE_WRITE, NULL); | ||
67 | |||
68 | if (IS_ERR(bdev)) { | ||
69 | DPRINTK("vbd_creat: device %08x could not be opened.\n", | ||
70 | vbd->pdevice); | ||
71 | return -ENOENT; | ||
72 | } | ||
73 | |||
74 | vbd->bdev = bdev; | ||
75 | vbd->size = vbd_size(vbd); | ||
76 | |||
77 | if (vbd->bdev->bd_disk == NULL) { | ||
78 | DPRINTK("vbd_creat: device %08x doesn't exist.\n", | ||
79 | vbd->pdevice); | ||
80 | vbd_free(vbd); | ||
81 | return -ENOENT; | ||
82 | } | ||
83 | |||
84 | if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) | ||
85 | vbd->type |= VDISK_CDROM; | ||
86 | if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) | ||
87 | vbd->type |= VDISK_REMOVABLE; | ||
88 | |||
89 | DPRINTK("Successful creation of handle=%04x (dom=%u)\n", | ||
90 | handle, blkif->domid); | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | void vbd_free(struct vbd *vbd) | ||
95 | { | ||
96 | if (vbd->bdev) | ||
97 | blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); | ||
98 | vbd->bdev = NULL; | ||
99 | } | ||
100 | |||
101 | int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) | ||
102 | { | ||
103 | struct vbd *vbd = &blkif->vbd; | ||
104 | int rc = -EACCES; | ||
105 | |||
106 | if ((operation != READ) && vbd->readonly) | ||
107 | goto out; | ||
108 | |||
109 | if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) | ||
110 | goto out; | ||
111 | |||
112 | req->dev = vbd->pdevice; | ||
113 | req->bdev = vbd->bdev; | ||
114 | rc = 0; | ||
115 | |||
116 | out: | ||
117 | return rc; | ||
118 | } | ||
119 | |||
120 | void vbd_resize(struct blkif_st *blkif) | ||
121 | { | ||
122 | struct vbd *vbd = &blkif->vbd; | ||
123 | struct xenbus_transaction xbt; | ||
124 | int err; | ||
125 | struct xenbus_device *dev = blkback_xenbus(blkif->be); | ||
126 | unsigned long long new_size = vbd_size(vbd); | ||
127 | |||
128 | printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", | ||
129 | blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); | ||
130 | printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); | ||
131 | vbd->size = new_size; | ||
132 | again: | ||
133 | err = xenbus_transaction_start(&xbt); | ||
134 | if (err) { | ||
135 | printk(KERN_WARNING "Error starting transaction"); | ||
136 | return; | ||
137 | } | ||
138 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | ||
139 | vbd_size(vbd)); | ||
140 | if (err) { | ||
141 | printk(KERN_WARNING "Error writing new size"); | ||
142 | goto abort; | ||
143 | } | ||
144 | /* | ||
145 | * Write the current state; we will use this to synchronize | ||
146 | * the front-end. If the current state is "connected" the | ||
147 | * front-end will get the new size information online. | ||
148 | */ | ||
149 | err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); | ||
150 | if (err) { | ||
151 | printk(KERN_WARNING "Error writing the state"); | ||
152 | goto abort; | ||
153 | } | ||
154 | |||
155 | err = xenbus_transaction_end(xbt, 0); | ||
156 | if (err == -EAGAIN) | ||
157 | goto again; | ||
158 | if (err) | ||
159 | printk(KERN_WARNING "Error ending transaction"); | ||
160 | abort: | ||
161 | xenbus_transaction_end(xbt, 1); | ||
162 | } | ||
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c new file mode 100644 index 000000000000..b41ed65db2d3 --- /dev/null +++ b/drivers/block/xen-blkback/xenbus.c | |||
@@ -0,0 +1,562 @@ | |||
1 | /* Xenbus code for blkif backend | ||
2 | Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> | ||
3 | Copyright (C) 2005 XenSource Ltd | ||
4 | |||
5 | This program is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published by | ||
7 | the Free Software Foundation; either version 2 of the License, or | ||
8 | (at your option) any later version. | ||
9 | |||
10 | This program is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | GNU General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with this program; if not, write to the Free Software | ||
17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #include <stdarg.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/kthread.h> | ||
23 | #include "common.h" | ||
24 | |||
25 | #undef DPRINTK | ||
26 | #define DPRINTK(fmt, args...) \ | ||
27 | pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ | ||
28 | __func__, __LINE__, ##args) | ||
29 | |||
30 | struct backend_info { | ||
31 | struct xenbus_device *dev; | ||
32 | struct blkif_st *blkif; | ||
33 | struct xenbus_watch backend_watch; | ||
34 | unsigned major; | ||
35 | unsigned minor; | ||
36 | char *mode; | ||
37 | }; | ||
38 | |||
39 | static void connect(struct backend_info *); | ||
40 | static int connect_ring(struct backend_info *); | ||
41 | static void backend_changed(struct xenbus_watch *, const char **, | ||
42 | unsigned int); | ||
43 | |||
44 | struct xenbus_device *blkback_xenbus(struct backend_info *be) | ||
45 | { | ||
46 | return be->dev; | ||
47 | } | ||
48 | |||
49 | static int blkback_name(struct blkif_st *blkif, char *buf) | ||
50 | { | ||
51 | char *devpath, *devname; | ||
52 | struct xenbus_device *dev = blkif->be->dev; | ||
53 | |||
54 | devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); | ||
55 | if (IS_ERR(devpath)) | ||
56 | return PTR_ERR(devpath); | ||
57 | |||
58 | devname = strstr(devpath, "/dev/"); | ||
59 | if (devname != NULL) | ||
60 | devname += strlen("/dev/"); | ||
61 | else | ||
62 | devname = devpath; | ||
63 | |||
64 | snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); | ||
65 | kfree(devpath); | ||
66 | |||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static void update_blkif_status(struct blkif_st *blkif) | ||
71 | { | ||
72 | int err; | ||
73 | char name[TASK_COMM_LEN]; | ||
74 | |||
75 | /* Not ready to connect? */ | ||
76 | if (!blkif->irq || !blkif->vbd.bdev) | ||
77 | return; | ||
78 | |||
79 | /* Already connected? */ | ||
80 | if (blkif->be->dev->state == XenbusStateConnected) | ||
81 | return; | ||
82 | |||
83 | /* Attempt to connect: exit if we fail to. */ | ||
84 | connect(blkif->be); | ||
85 | if (blkif->be->dev->state != XenbusStateConnected) | ||
86 | return; | ||
87 | |||
88 | err = blkback_name(blkif, name); | ||
89 | if (err) { | ||
90 | xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); | ||
91 | return; | ||
92 | } | ||
93 | |||
94 | err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); | ||
95 | if (err) { | ||
96 | xenbus_dev_error(blkif->be->dev, err, "block flush"); | ||
97 | return; | ||
98 | } | ||
99 | invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); | ||
100 | |||
101 | blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); | ||
102 | if (IS_ERR(blkif->xenblkd)) { | ||
103 | err = PTR_ERR(blkif->xenblkd); | ||
104 | blkif->xenblkd = NULL; | ||
105 | xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); | ||
106 | } | ||
107 | } | ||
108 | |||
109 | |||
110 | /* | ||
111 | * sysfs interface for VBD I/O requests | ||
112 | */ | ||
113 | |||
114 | #define VBD_SHOW(name, format, args...) \ | ||
115 | static ssize_t show_##name(struct device *_dev, \ | ||
116 | struct device_attribute *attr, \ | ||
117 | char *buf) \ | ||
118 | { \ | ||
119 | struct xenbus_device *dev = to_xenbus_device(_dev); \ | ||
120 | struct backend_info *be = dev_get_drvdata(&dev->dev); \ | ||
121 | \ | ||
122 | return sprintf(buf, format, ##args); \ | ||
123 | } \ | ||
124 | static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) | ||
125 | |||
126 | VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); | ||
127 | VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); | ||
128 | VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); | ||
129 | VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); | ||
130 | VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); | ||
131 | VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); | ||
132 | |||
133 | static struct attribute *vbdstat_attrs[] = { | ||
134 | &dev_attr_oo_req.attr, | ||
135 | &dev_attr_rd_req.attr, | ||
136 | &dev_attr_wr_req.attr, | ||
137 | &dev_attr_br_req.attr, | ||
138 | &dev_attr_rd_sect.attr, | ||
139 | &dev_attr_wr_sect.attr, | ||
140 | NULL | ||
141 | }; | ||
142 | |||
143 | static struct attribute_group vbdstat_group = { | ||
144 | .name = "statistics", | ||
145 | .attrs = vbdstat_attrs, | ||
146 | }; | ||
147 | |||
148 | VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); | ||
149 | VBD_SHOW(mode, "%s\n", be->mode); | ||
150 | |||
151 | int xenvbd_sysfs_addif(struct xenbus_device *dev) | ||
152 | { | ||
153 | int error; | ||
154 | |||
155 | error = device_create_file(&dev->dev, &dev_attr_physical_device); | ||
156 | if (error) | ||
157 | goto fail1; | ||
158 | |||
159 | error = device_create_file(&dev->dev, &dev_attr_mode); | ||
160 | if (error) | ||
161 | goto fail2; | ||
162 | |||
163 | error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); | ||
164 | if (error) | ||
165 | goto fail3; | ||
166 | |||
167 | return 0; | ||
168 | |||
169 | fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); | ||
170 | fail2: device_remove_file(&dev->dev, &dev_attr_mode); | ||
171 | fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); | ||
172 | return error; | ||
173 | } | ||
174 | |||
175 | void xenvbd_sysfs_delif(struct xenbus_device *dev) | ||
176 | { | ||
177 | sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); | ||
178 | device_remove_file(&dev->dev, &dev_attr_mode); | ||
179 | device_remove_file(&dev->dev, &dev_attr_physical_device); | ||
180 | } | ||
181 | |||
182 | static int blkback_remove(struct xenbus_device *dev) | ||
183 | { | ||
184 | struct backend_info *be = dev_get_drvdata(&dev->dev); | ||
185 | |||
186 | DPRINTK(""); | ||
187 | |||
188 | if (be->major || be->minor) | ||
189 | xenvbd_sysfs_delif(dev); | ||
190 | |||
191 | if (be->backend_watch.node) { | ||
192 | unregister_xenbus_watch(&be->backend_watch); | ||
193 | kfree(be->backend_watch.node); | ||
194 | be->backend_watch.node = NULL; | ||
195 | } | ||
196 | |||
197 | if (be->blkif) { | ||
198 | blkif_disconnect(be->blkif); | ||
199 | vbd_free(&be->blkif->vbd); | ||
200 | blkif_free(be->blkif); | ||
201 | be->blkif = NULL; | ||
202 | } | ||
203 | |||
204 | kfree(be); | ||
205 | dev_set_drvdata(&dev->dev, NULL); | ||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | int blkback_barrier(struct xenbus_transaction xbt, | ||
210 | struct backend_info *be, int state) | ||
211 | { | ||
212 | struct xenbus_device *dev = be->dev; | ||
213 | int err; | ||
214 | |||
215 | err = xenbus_printf(xbt, dev->nodename, "feature-barrier", | ||
216 | "%d", state); | ||
217 | if (err) | ||
218 | xenbus_dev_fatal(dev, err, "writing feature-barrier"); | ||
219 | |||
220 | return err; | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * Entry point to this code when a new device is created. Allocate the basic | ||
225 | * structures, and watch the store waiting for the hotplug scripts to tell us | ||
226 | * the device's physical major and minor numbers. Switch to InitWait. | ||
227 | */ | ||
228 | static int blkback_probe(struct xenbus_device *dev, | ||
229 | const struct xenbus_device_id *id) | ||
230 | { | ||
231 | int err; | ||
232 | struct backend_info *be = kzalloc(sizeof(struct backend_info), | ||
233 | GFP_KERNEL); | ||
234 | if (!be) { | ||
235 | xenbus_dev_fatal(dev, -ENOMEM, | ||
236 | "allocating backend structure"); | ||
237 | return -ENOMEM; | ||
238 | } | ||
239 | be->dev = dev; | ||
240 | dev_set_drvdata(&dev->dev, be); | ||
241 | |||
242 | be->blkif = blkif_alloc(dev->otherend_id); | ||
243 | if (IS_ERR(be->blkif)) { | ||
244 | err = PTR_ERR(be->blkif); | ||
245 | be->blkif = NULL; | ||
246 | xenbus_dev_fatal(dev, err, "creating block interface"); | ||
247 | goto fail; | ||
248 | } | ||
249 | |||
250 | /* setup back pointer */ | ||
251 | be->blkif->be = be; | ||
252 | |||
253 | err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, | ||
254 | "%s/%s", dev->nodename, "physical-device"); | ||
255 | if (err) | ||
256 | goto fail; | ||
257 | |||
258 | err = xenbus_switch_state(dev, XenbusStateInitWait); | ||
259 | if (err) | ||
260 | goto fail; | ||
261 | |||
262 | return 0; | ||
263 | |||
264 | fail: | ||
265 | DPRINTK("failed"); | ||
266 | blkback_remove(dev); | ||
267 | return err; | ||
268 | } | ||
269 | |||
270 | |||
271 | /** | ||
272 | * Callback received when the hotplug scripts have placed the physical-device | ||
273 | * node. Read it and the mode node, and create a vbd. If the frontend is | ||
274 | * ready, connect. | ||
275 | */ | ||
276 | static void backend_changed(struct xenbus_watch *watch, | ||
277 | const char **vec, unsigned int len) | ||
278 | { | ||
279 | int err; | ||
280 | unsigned major; | ||
281 | unsigned minor; | ||
282 | struct backend_info *be | ||
283 | = container_of(watch, struct backend_info, backend_watch); | ||
284 | struct xenbus_device *dev = be->dev; | ||
285 | int cdrom = 0; | ||
286 | char *device_type; | ||
287 | |||
288 | DPRINTK(""); | ||
289 | |||
290 | err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", | ||
291 | &major, &minor); | ||
292 | if (XENBUS_EXIST_ERR(err)) { | ||
293 | /* Since this watch will fire once immediately after it is | ||
294 | registered, we expect this. Ignore it, and wait for the | ||
295 | hotplug scripts. */ | ||
296 | return; | ||
297 | } | ||
298 | if (err != 2) { | ||
299 | xenbus_dev_fatal(dev, err, "reading physical-device"); | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | if ((be->major || be->minor) && | ||
304 | ((be->major != major) || (be->minor != minor))) { | ||
305 | printk(KERN_WARNING | ||
306 | "blkback: changing physical device (from %x:%x to " | ||
307 | "%x:%x) not supported.\n", be->major, be->minor, | ||
308 | major, minor); | ||
309 | return; | ||
310 | } | ||
311 | |||
312 | be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); | ||
313 | if (IS_ERR(be->mode)) { | ||
314 | err = PTR_ERR(be->mode); | ||
315 | be->mode = NULL; | ||
316 | xenbus_dev_fatal(dev, err, "reading mode"); | ||
317 | return; | ||
318 | } | ||
319 | |||
320 | device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); | ||
321 | if (!IS_ERR(device_type)) { | ||
322 | cdrom = strcmp(device_type, "cdrom") == 0; | ||
323 | kfree(device_type); | ||
324 | } | ||
325 | |||
326 | if (be->major == 0 && be->minor == 0) { | ||
327 | /* Front end dir is a number, which is used as the handle. */ | ||
328 | |||
329 | char *p = strrchr(dev->otherend, '/') + 1; | ||
330 | long handle; | ||
331 | err = strict_strtoul(p, 0, &handle); | ||
332 | if (err) | ||
333 | return; | ||
334 | |||
335 | be->major = major; | ||
336 | be->minor = minor; | ||
337 | |||
338 | err = vbd_create(be->blkif, handle, major, minor, | ||
339 | (NULL == strchr(be->mode, 'w')), cdrom); | ||
340 | if (err) { | ||
341 | be->major = be->minor = 0; | ||
342 | xenbus_dev_fatal(dev, err, "creating vbd structure"); | ||
343 | return; | ||
344 | } | ||
345 | |||
346 | err = xenvbd_sysfs_addif(dev); | ||
347 | if (err) { | ||
348 | vbd_free(&be->blkif->vbd); | ||
349 | be->major = be->minor = 0; | ||
350 | xenbus_dev_fatal(dev, err, "creating sysfs entries"); | ||
351 | return; | ||
352 | } | ||
353 | |||
354 | /* We're potentially connected now */ | ||
355 | update_blkif_status(be->blkif); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | |||
360 | /** | ||
361 | * Callback received when the frontend's state changes. | ||
362 | */ | ||
363 | static void frontend_changed(struct xenbus_device *dev, | ||
364 | enum xenbus_state frontend_state) | ||
365 | { | ||
366 | struct backend_info *be = dev_get_drvdata(&dev->dev); | ||
367 | int err; | ||
368 | |||
369 | DPRINTK("%s", xenbus_strstate(frontend_state)); | ||
370 | |||
371 | switch (frontend_state) { | ||
372 | case XenbusStateInitialising: | ||
373 | if (dev->state == XenbusStateClosed) { | ||
374 | printk(KERN_INFO "%s: %s: prepare for reconnect\n", | ||
375 | __func__, dev->nodename); | ||
376 | xenbus_switch_state(dev, XenbusStateInitWait); | ||
377 | } | ||
378 | break; | ||
379 | |||
380 | case XenbusStateInitialised: | ||
381 | case XenbusStateConnected: | ||
382 | /* Ensure we connect even when two watches fire in | ||
383 | close successsion and we miss the intermediate value | ||
384 | of frontend_state. */ | ||
385 | if (dev->state == XenbusStateConnected) | ||
386 | break; | ||
387 | |||
388 | /* Enforce precondition before potential leak point. | ||
389 | * blkif_disconnect() is idempotent. | ||
390 | */ | ||
391 | blkif_disconnect(be->blkif); | ||
392 | |||
393 | err = connect_ring(be); | ||
394 | if (err) | ||
395 | break; | ||
396 | update_blkif_status(be->blkif); | ||
397 | break; | ||
398 | |||
399 | case XenbusStateClosing: | ||
400 | blkif_disconnect(be->blkif); | ||
401 | xenbus_switch_state(dev, XenbusStateClosing); | ||
402 | break; | ||
403 | |||
404 | case XenbusStateClosed: | ||
405 | xenbus_switch_state(dev, XenbusStateClosed); | ||
406 | if (xenbus_dev_is_online(dev)) | ||
407 | break; | ||
408 | /* fall through if not online */ | ||
409 | case XenbusStateUnknown: | ||
410 | /* implies blkif_disconnect() via blkback_remove() */ | ||
411 | device_unregister(&dev->dev); | ||
412 | break; | ||
413 | |||
414 | default: | ||
415 | xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", | ||
416 | frontend_state); | ||
417 | break; | ||
418 | } | ||
419 | } | ||
420 | |||
421 | |||
422 | /* ** Connection ** */ | ||
423 | |||
424 | |||
425 | /** | ||
426 | * Write the physical details regarding the block device to the store, and | ||
427 | * switch to Connected state. | ||
428 | */ | ||
429 | static void connect(struct backend_info *be) | ||
430 | { | ||
431 | struct xenbus_transaction xbt; | ||
432 | int err; | ||
433 | struct xenbus_device *dev = be->dev; | ||
434 | |||
435 | DPRINTK("%s", dev->otherend); | ||
436 | |||
437 | /* Supply the information about the device the frontend needs */ | ||
438 | again: | ||
439 | err = xenbus_transaction_start(&xbt); | ||
440 | if (err) { | ||
441 | xenbus_dev_fatal(dev, err, "starting transaction"); | ||
442 | return; | ||
443 | } | ||
444 | |||
445 | err = blkback_barrier(xbt, be, 1); | ||
446 | if (err) | ||
447 | goto abort; | ||
448 | |||
449 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | ||
450 | vbd_size(&be->blkif->vbd)); | ||
451 | if (err) { | ||
452 | xenbus_dev_fatal(dev, err, "writing %s/sectors", | ||
453 | dev->nodename); | ||
454 | goto abort; | ||
455 | } | ||
456 | |||
457 | /* FIXME: use a typename instead */ | ||
458 | err = xenbus_printf(xbt, dev->nodename, "info", "%u", | ||
459 | vbd_info(&be->blkif->vbd)); | ||
460 | if (err) { | ||
461 | xenbus_dev_fatal(dev, err, "writing %s/info", | ||
462 | dev->nodename); | ||
463 | goto abort; | ||
464 | } | ||
465 | err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", | ||
466 | vbd_secsize(&be->blkif->vbd)); | ||
467 | if (err) { | ||
468 | xenbus_dev_fatal(dev, err, "writing %s/sector-size", | ||
469 | dev->nodename); | ||
470 | goto abort; | ||
471 | } | ||
472 | |||
473 | err = xenbus_transaction_end(xbt, 0); | ||
474 | if (err == -EAGAIN) | ||
475 | goto again; | ||
476 | if (err) | ||
477 | xenbus_dev_fatal(dev, err, "ending transaction"); | ||
478 | |||
479 | err = xenbus_switch_state(dev, XenbusStateConnected); | ||
480 | if (err) | ||
481 | xenbus_dev_fatal(dev, err, "switching to Connected state", | ||
482 | dev->nodename); | ||
483 | |||
484 | return; | ||
485 | abort: | ||
486 | xenbus_transaction_end(xbt, 1); | ||
487 | } | ||
488 | |||
489 | |||
490 | static int connect_ring(struct backend_info *be) | ||
491 | { | ||
492 | struct xenbus_device *dev = be->dev; | ||
493 | unsigned long ring_ref; | ||
494 | unsigned int evtchn; | ||
495 | char protocol[64] = ""; | ||
496 | int err; | ||
497 | |||
498 | DPRINTK("%s", dev->otherend); | ||
499 | |||
500 | err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", | ||
501 | &ring_ref, "event-channel", "%u", &evtchn, NULL); | ||
502 | if (err) { | ||
503 | xenbus_dev_fatal(dev, err, | ||
504 | "reading %s/ring-ref and event-channel", | ||
505 | dev->otherend); | ||
506 | return err; | ||
507 | } | ||
508 | |||
509 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | ||
510 | err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", | ||
511 | "%63s", protocol, NULL); | ||
512 | if (err) | ||
513 | strcpy(protocol, "unspecified, assuming native"); | ||
514 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) | ||
515 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | ||
516 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) | ||
517 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; | ||
518 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) | ||
519 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; | ||
520 | else { | ||
521 | xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); | ||
522 | return -1; | ||
523 | } | ||
524 | printk(KERN_INFO | ||
525 | "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", | ||
526 | ring_ref, evtchn, be->blkif->blk_protocol, protocol); | ||
527 | |||
528 | /* Map the shared frame, irq etc. */ | ||
529 | err = blkif_map(be->blkif, ring_ref, evtchn); | ||
530 | if (err) { | ||
531 | xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", | ||
532 | ring_ref, evtchn); | ||
533 | return err; | ||
534 | } | ||
535 | |||
536 | return 0; | ||
537 | } | ||
538 | |||
539 | |||
540 | /* ** Driver Registration ** */ | ||
541 | |||
542 | |||
543 | static const struct xenbus_device_id blkback_ids[] = { | ||
544 | { "vbd" }, | ||
545 | { "" } | ||
546 | }; | ||
547 | |||
548 | |||
549 | static struct xenbus_driver blkback = { | ||
550 | .name = "vbd", | ||
551 | .owner = THIS_MODULE, | ||
552 | .ids = blkback_ids, | ||
553 | .probe = blkback_probe, | ||
554 | .remove = blkback_remove, | ||
555 | .otherend_changed = frontend_changed | ||
556 | }; | ||
557 | |||
558 | |||
559 | int blkif_xenbus_init(void) | ||
560 | { | ||
561 | return xenbus_register_backend(&blkback); | ||
562 | } | ||