diff options
Diffstat (limited to 'drivers/block/xen-blkback/blkback.c')
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 759 |
1 files changed, 759 insertions, 0 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c new file mode 100644 index 000000000000..59a2bae0f35e --- /dev/null +++ b/drivers/block/xen-blkback/blkback.c | |||
@@ -0,0 +1,759 @@ | |||
1 | /****************************************************************************** | ||
2 | * | ||
3 | * Back-end of the driver for virtual block devices. This portion of the | ||
4 | * driver exports a 'unified' block-device interface that can be accessed | ||
5 | * by any operating system that implements a compatible front end. A | ||
6 | * reference front-end implementation can be found in: | ||
7 | * drivers/block/xen-blkfront.c | ||
8 | * | ||
9 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | ||
10 | * Copyright (c) 2005, Christopher Clark | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License version 2 | ||
14 | * as published by the Free Software Foundation; or, when distributed | ||
15 | * separately from the Linux kernel or incorporated into other | ||
16 | * software packages, subject to the following license: | ||
17 | * | ||
18 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
19 | * of this source file (the "Software"), to deal in the Software without | ||
20 | * restriction, including without limitation the rights to use, copy, modify, | ||
21 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
22 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
23 | * the following conditions: | ||
24 | * | ||
25 | * The above copyright notice and this permission notice shall be included in | ||
26 | * all copies or substantial portions of the Software. | ||
27 | * | ||
28 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
29 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
30 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
31 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
32 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
33 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
34 | * IN THE SOFTWARE. | ||
35 | */ | ||
36 | |||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/kthread.h> | ||
39 | #include <linux/list.h> | ||
40 | #include <linux/delay.h> | ||
41 | #include <linux/freezer.h> | ||
42 | |||
43 | #include <xen/events.h> | ||
44 | #include <xen/page.h> | ||
45 | #include <asm/xen/hypervisor.h> | ||
46 | #include <asm/xen/hypercall.h> | ||
47 | #include "common.h" | ||
48 | |||
49 | #define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) | ||
50 | |||
51 | /* | ||
52 | * These are rather arbitrary. They are fairly large because adjacent requests | ||
53 | * pulled from a communication ring are quite likely to end up being part of | ||
54 | * the same scatter/gather request at the disc. | ||
55 | * | ||
56 | * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** | ||
57 | * | ||
58 | * This will increase the chances of being able to write whole tracks. | ||
59 | * 64 should be enough to keep us competitive with Linux. | ||
60 | */ | ||
61 | static int blkif_reqs = 64; | ||
62 | module_param_named(reqs, blkif_reqs, int, 0); | ||
63 | MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); | ||
64 | |||
65 | /* Run-time switchable: /sys/module/blkback/parameters/ */ | ||
66 | static unsigned int log_stats; | ||
67 | static unsigned int debug_lvl; | ||
68 | module_param(log_stats, int, 0644); | ||
69 | module_param(debug_lvl, int, 0644); | ||
70 | |||
71 | /* | ||
72 | * Each outstanding request that we've passed to the lower device layers has a | ||
73 | * 'pending_req' allocated to it. Each buffer_head that completes decrements | ||
74 | * the pendcnt towards zero. When it hits zero, the specified domain has a | ||
75 | * response queued for it, with the saved 'id' passed back. | ||
76 | */ | ||
77 | struct pending_req { | ||
78 | struct blkif_st *blkif; | ||
79 | u64 id; | ||
80 | int nr_pages; | ||
81 | atomic_t pendcnt; | ||
82 | unsigned short operation; | ||
83 | int status; | ||
84 | struct list_head free_list; | ||
85 | }; | ||
86 | |||
87 | #define BLKBACK_INVALID_HANDLE (~0) | ||
88 | |||
89 | struct xen_blkbk { | ||
90 | struct pending_req *pending_reqs; | ||
91 | /* List of all 'pending_req' available */ | ||
92 | struct list_head pending_free; | ||
93 | /* And its spinlock. */ | ||
94 | spinlock_t pending_free_lock; | ||
95 | wait_queue_head_t pending_free_wq; | ||
96 | /* The list of all pages that are available. */ | ||
97 | struct page **pending_pages; | ||
98 | /* And the grant handles that are available. */ | ||
99 | grant_handle_t *pending_grant_handles; | ||
100 | }; | ||
101 | |||
102 | static struct xen_blkbk *blkbk; | ||
103 | |||
104 | /* | ||
105 | * Little helpful macro to figure out the index and virtual address of the | ||
106 | * pending_pages[..]. For each 'pending_req' we have have up to | ||
107 | * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through | ||
108 | * 10 and would index in the pending_pages[..]. */ | ||
109 | static inline int vaddr_pagenr(struct pending_req *req, int seg) | ||
110 | { | ||
111 | return (req - blkbk->pending_reqs) * | ||
112 | BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; | ||
113 | } | ||
114 | |||
115 | #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] | ||
116 | |||
117 | static inline unsigned long vaddr(struct pending_req *req, int seg) | ||
118 | { | ||
119 | unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); | ||
120 | return (unsigned long)pfn_to_kaddr(pfn); | ||
121 | } | ||
122 | |||
123 | #define pending_handle(_req, _seg) \ | ||
124 | (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) | ||
125 | |||
126 | |||
127 | static int do_block_io_op(struct blkif_st *blkif); | ||
128 | static void dispatch_rw_block_io(struct blkif_st *blkif, | ||
129 | struct blkif_request *req, | ||
130 | struct pending_req *pending_req); | ||
131 | static void make_response(struct blkif_st *blkif, u64 id, | ||
132 | unsigned short op, int st); | ||
133 | |||
134 | /* | ||
135 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. | ||
136 | */ | ||
137 | static struct pending_req *alloc_req(void) | ||
138 | { | ||
139 | struct pending_req *req = NULL; | ||
140 | unsigned long flags; | ||
141 | |||
142 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | ||
143 | if (!list_empty(&blkbk->pending_free)) { | ||
144 | req = list_entry(blkbk->pending_free.next, struct pending_req, | ||
145 | free_list); | ||
146 | list_del(&req->free_list); | ||
147 | } | ||
148 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | ||
149 | return req; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Return the 'pending_req' structure back to the freepool. We also | ||
154 | * wake up the thread if it was waiting for a free page. | ||
155 | */ | ||
156 | static void free_req(struct pending_req *req) | ||
157 | { | ||
158 | unsigned long flags; | ||
159 | int was_empty; | ||
160 | |||
161 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | ||
162 | was_empty = list_empty(&blkbk->pending_free); | ||
163 | list_add(&req->free_list, &blkbk->pending_free); | ||
164 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | ||
165 | if (was_empty) | ||
166 | wake_up(&blkbk->pending_free_wq); | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Notification from the guest OS. | ||
171 | */ | ||
172 | static void blkif_notify_work(struct blkif_st *blkif) | ||
173 | { | ||
174 | blkif->waiting_reqs = 1; | ||
175 | wake_up(&blkif->wq); | ||
176 | } | ||
177 | |||
178 | irqreturn_t blkif_be_int(int irq, void *dev_id) | ||
179 | { | ||
180 | blkif_notify_work(dev_id); | ||
181 | return IRQ_HANDLED; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * SCHEDULER FUNCTIONS | ||
186 | */ | ||
187 | |||
188 | static void print_stats(struct blkif_st *blkif) | ||
189 | { | ||
190 | printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", | ||
191 | current->comm, blkif->st_oo_req, | ||
192 | blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); | ||
193 | blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); | ||
194 | blkif->st_rd_req = 0; | ||
195 | blkif->st_wr_req = 0; | ||
196 | blkif->st_oo_req = 0; | ||
197 | } | ||
198 | |||
199 | int blkif_schedule(void *arg) | ||
200 | { | ||
201 | struct blkif_st *blkif = arg; | ||
202 | struct vbd *vbd = &blkif->vbd; | ||
203 | |||
204 | blkif_get(blkif); | ||
205 | |||
206 | if (debug_lvl) | ||
207 | printk(KERN_DEBUG "%s: started\n", current->comm); | ||
208 | |||
209 | while (!kthread_should_stop()) { | ||
210 | if (try_to_freeze()) | ||
211 | continue; | ||
212 | if (unlikely(vbd->size != vbd_size(vbd))) | ||
213 | vbd_resize(blkif); | ||
214 | |||
215 | wait_event_interruptible( | ||
216 | blkif->wq, | ||
217 | blkif->waiting_reqs || kthread_should_stop()); | ||
218 | wait_event_interruptible( | ||
219 | blkbk->pending_free_wq, | ||
220 | !list_empty(&blkbk->pending_free) || | ||
221 | kthread_should_stop()); | ||
222 | |||
223 | blkif->waiting_reqs = 0; | ||
224 | smp_mb(); /* clear flag *before* checking for work */ | ||
225 | |||
226 | if (do_block_io_op(blkif)) | ||
227 | blkif->waiting_reqs = 1; | ||
228 | |||
229 | if (log_stats && time_after(jiffies, blkif->st_print)) | ||
230 | print_stats(blkif); | ||
231 | } | ||
232 | |||
233 | if (log_stats) | ||
234 | print_stats(blkif); | ||
235 | if (debug_lvl) | ||
236 | printk(KERN_DEBUG "%s: exiting\n", current->comm); | ||
237 | |||
238 | blkif->xenblkd = NULL; | ||
239 | blkif_put(blkif); | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | struct seg_buf { | ||
245 | unsigned long buf; | ||
246 | unsigned int nsec; | ||
247 | }; | ||
248 | /* | ||
249 | * Unmap the grant references, and also remove the M2P over-rides | ||
250 | * used in the 'pending_req'. | ||
251 | */ | ||
252 | static void xen_blkbk_unmap(struct pending_req *req) | ||
253 | { | ||
254 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
255 | unsigned int i, invcount = 0; | ||
256 | grant_handle_t handle; | ||
257 | int ret; | ||
258 | |||
259 | for (i = 0; i < req->nr_pages; i++) { | ||
260 | handle = pending_handle(req, i); | ||
261 | if (handle == BLKBACK_INVALID_HANDLE) | ||
262 | continue; | ||
263 | gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), | ||
264 | GNTMAP_host_map, handle); | ||
265 | pending_handle(req, i) = BLKBACK_INVALID_HANDLE; | ||
266 | invcount++; | ||
267 | } | ||
268 | |||
269 | ret = HYPERVISOR_grant_table_op( | ||
270 | GNTTABOP_unmap_grant_ref, unmap, invcount); | ||
271 | BUG_ON(ret); | ||
272 | /* Note, we use invcount, so nr->pages, so we can't index | ||
273 | * using vaddr(req, i). | ||
274 | */ | ||
275 | for (i = 0; i < invcount; i++) { | ||
276 | ret = m2p_remove_override( | ||
277 | virt_to_page(unmap[i].host_addr), false); | ||
278 | if (ret) { | ||
279 | printk(KERN_ALERT "Failed to remove M2P override for " \ | ||
280 | "%lx\n", (unsigned long)unmap[i].host_addr); | ||
281 | continue; | ||
282 | } | ||
283 | } | ||
284 | } | ||
285 | static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, | ||
286 | struct seg_buf seg[]) | ||
287 | { | ||
288 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
289 | int i; | ||
290 | int nseg = req->nr_segments; | ||
291 | int ret = 0; | ||
292 | /* Fill out preq.nr_sects with proper amount of sectors, and setup | ||
293 | * assign map[..] with the PFN of the page in our domain with the | ||
294 | * corresponding grant reference for each page. | ||
295 | */ | ||
296 | for (i = 0; i < nseg; i++) { | ||
297 | uint32_t flags; | ||
298 | |||
299 | flags = GNTMAP_host_map; | ||
300 | if (pending_req->operation != BLKIF_OP_READ) | ||
301 | flags |= GNTMAP_readonly; | ||
302 | gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, | ||
303 | req->u.rw.seg[i].gref, pending_req->blkif->domid); | ||
304 | } | ||
305 | |||
306 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); | ||
307 | BUG_ON(ret); | ||
308 | |||
309 | /* Now swizzel the MFN in our domain with the MFN from the other domain | ||
310 | * so that when we access vaddr(pending_req,i) it has the contents of | ||
311 | * the page from the other domain. | ||
312 | */ | ||
313 | for (i = 0; i < nseg; i++) { | ||
314 | if (unlikely(map[i].status != 0)) { | ||
315 | DPRINTK("invalid buffer -- could not remap it\n"); | ||
316 | map[i].handle = BLKBACK_INVALID_HANDLE; | ||
317 | ret |= 1; | ||
318 | } | ||
319 | |||
320 | pending_handle(pending_req, i) = map[i].handle; | ||
321 | |||
322 | if (ret) | ||
323 | continue; | ||
324 | |||
325 | ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), | ||
326 | blkbk->pending_page(pending_req, i), false); | ||
327 | if (ret) { | ||
328 | printk(KERN_ALERT "Failed to install M2P override for"\ | ||
329 | " %lx (ret: %d)\n", (unsigned long) | ||
330 | map[i].dev_bus_addr, ret); | ||
331 | /* We could switch over to GNTTABOP_copy */ | ||
332 | continue; | ||
333 | } | ||
334 | |||
335 | seg[i].buf = map[i].dev_bus_addr | | ||
336 | (req->u.rw.seg[i].first_sect << 9); | ||
337 | } | ||
338 | return ret; | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * Completion callback on the bio's. Called as bh->b_end_io() | ||
343 | */ | ||
344 | |||
345 | static void __end_block_io_op(struct pending_req *pending_req, int error) | ||
346 | { | ||
347 | /* An error fails the entire request. */ | ||
348 | if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && | ||
349 | (error == -EOPNOTSUPP)) { | ||
350 | DPRINTK("blkback: write barrier op failed, not supported\n"); | ||
351 | blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); | ||
352 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | ||
353 | } else if (error) { | ||
354 | DPRINTK("Buffer not up-to-date at end of operation, " | ||
355 | "error=%d\n", error); | ||
356 | pending_req->status = BLKIF_RSP_ERROR; | ||
357 | } | ||
358 | |||
359 | /* If all of the bio's have completed it is time to unmap | ||
360 | * the grant references associated with 'request' and provide | ||
361 | * the proper response on the ring. | ||
362 | */ | ||
363 | if (atomic_dec_and_test(&pending_req->pendcnt)) { | ||
364 | xen_blkbk_unmap(pending_req); | ||
365 | make_response(pending_req->blkif, pending_req->id, | ||
366 | pending_req->operation, pending_req->status); | ||
367 | blkif_put(pending_req->blkif); | ||
368 | free_req(pending_req); | ||
369 | } | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * bio callback. | ||
374 | */ | ||
375 | static void end_block_io_op(struct bio *bio, int error) | ||
376 | { | ||
377 | __end_block_io_op(bio->bi_private, error); | ||
378 | bio_put(bio); | ||
379 | } | ||
380 | |||
381 | |||
382 | |||
383 | /* | ||
384 | * Function to copy the from the ring buffer the 'struct blkif_request' | ||
385 | * (which has the sectors we want, number of them, grant references, etc), | ||
386 | * and transmute it to the block API to hand it over to the proper block disk. | ||
387 | */ | ||
388 | static int do_block_io_op(struct blkif_st *blkif) | ||
389 | { | ||
390 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | ||
391 | struct blkif_request req; | ||
392 | struct pending_req *pending_req; | ||
393 | RING_IDX rc, rp; | ||
394 | int more_to_do = 0; | ||
395 | |||
396 | rc = blk_rings->common.req_cons; | ||
397 | rp = blk_rings->common.sring->req_prod; | ||
398 | rmb(); /* Ensure we see queued requests up to 'rp'. */ | ||
399 | |||
400 | while (rc != rp) { | ||
401 | |||
402 | if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) | ||
403 | break; | ||
404 | |||
405 | if (kthread_should_stop()) { | ||
406 | more_to_do = 1; | ||
407 | break; | ||
408 | } | ||
409 | |||
410 | pending_req = alloc_req(); | ||
411 | if (NULL == pending_req) { | ||
412 | blkif->st_oo_req++; | ||
413 | more_to_do = 1; | ||
414 | break; | ||
415 | } | ||
416 | |||
417 | switch (blkif->blk_protocol) { | ||
418 | case BLKIF_PROTOCOL_NATIVE: | ||
419 | memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); | ||
420 | break; | ||
421 | case BLKIF_PROTOCOL_X86_32: | ||
422 | blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); | ||
423 | break; | ||
424 | case BLKIF_PROTOCOL_X86_64: | ||
425 | blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); | ||
426 | break; | ||
427 | default: | ||
428 | BUG(); | ||
429 | } | ||
430 | blk_rings->common.req_cons = ++rc; /* before make_response() */ | ||
431 | |||
432 | /* Apply all sanity checks to /private copy/ of request. */ | ||
433 | barrier(); | ||
434 | |||
435 | switch (req.operation) { | ||
436 | case BLKIF_OP_READ: | ||
437 | blkif->st_rd_req++; | ||
438 | dispatch_rw_block_io(blkif, &req, pending_req); | ||
439 | break; | ||
440 | case BLKIF_OP_WRITE_BARRIER: | ||
441 | blkif->st_br_req++; | ||
442 | /* fall through */ | ||
443 | case BLKIF_OP_WRITE: | ||
444 | blkif->st_wr_req++; | ||
445 | dispatch_rw_block_io(blkif, &req, pending_req); | ||
446 | break; | ||
447 | default: | ||
448 | /* A good sign something is wrong: sleep for a while to | ||
449 | * avoid excessive CPU consumption by a bad guest. */ | ||
450 | msleep(1); | ||
451 | DPRINTK("error: unknown block io operation [%d]\n", | ||
452 | req.operation); | ||
453 | make_response(blkif, req.id, req.operation, | ||
454 | BLKIF_RSP_ERROR); | ||
455 | free_req(pending_req); | ||
456 | break; | ||
457 | } | ||
458 | |||
459 | /* Yield point for this unbounded loop. */ | ||
460 | cond_resched(); | ||
461 | } | ||
462 | |||
463 | return more_to_do; | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * Transumation of the 'struct blkif_request' to a proper 'struct bio' | ||
468 | * and call the 'submit_bio' to pass it to the underlaying storage. | ||
469 | */ | ||
470 | static void dispatch_rw_block_io(struct blkif_st *blkif, | ||
471 | struct blkif_request *req, | ||
472 | struct pending_req *pending_req) | ||
473 | { | ||
474 | struct phys_req preq; | ||
475 | struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
476 | unsigned int nseg; | ||
477 | struct bio *bio = NULL; | ||
478 | struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
479 | int i, nbio = 0; | ||
480 | int operation; | ||
481 | struct blk_plug plug; | ||
482 | |||
483 | switch (req->operation) { | ||
484 | case BLKIF_OP_READ: | ||
485 | operation = READ; | ||
486 | break; | ||
487 | case BLKIF_OP_WRITE: | ||
488 | operation = WRITE; | ||
489 | break; | ||
490 | case BLKIF_OP_WRITE_BARRIER: | ||
491 | operation = WRITE_BARRIER; | ||
492 | break; | ||
493 | default: | ||
494 | operation = 0; /* make gcc happy */ | ||
495 | BUG(); | ||
496 | } | ||
497 | |||
498 | /* Check that the number of segments is sane. */ | ||
499 | nseg = req->nr_segments; | ||
500 | if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || | ||
501 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { | ||
502 | DPRINTK("Bad number of segments in request (%d)\n", nseg); | ||
503 | /* Haven't submitted any bio's yet. */ | ||
504 | goto fail_response; | ||
505 | } | ||
506 | |||
507 | preq.dev = req->handle; | ||
508 | preq.sector_number = req->u.rw.sector_number; | ||
509 | preq.nr_sects = 0; | ||
510 | |||
511 | pending_req->blkif = blkif; | ||
512 | pending_req->id = req->id; | ||
513 | pending_req->operation = req->operation; | ||
514 | pending_req->status = BLKIF_RSP_OKAY; | ||
515 | pending_req->nr_pages = nseg; | ||
516 | |||
517 | for (i = 0; i < nseg; i++) { | ||
518 | seg[i].nsec = req->u.rw.seg[i].last_sect - | ||
519 | req->u.rw.seg[i].first_sect + 1; | ||
520 | if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || | ||
521 | (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) | ||
522 | goto fail_response; | ||
523 | preq.nr_sects += seg[i].nsec; | ||
524 | |||
525 | } | ||
526 | |||
527 | if (vbd_translate(&preq, blkif, operation) != 0) { | ||
528 | DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", | ||
529 | operation == READ ? "read" : "write", | ||
530 | preq.sector_number, | ||
531 | preq.sector_number + preq.nr_sects, preq.dev); | ||
532 | goto fail_response; | ||
533 | } | ||
534 | /* This check _MUST_ be done after vbd_translate as the preq.bdev | ||
535 | * is set there. */ | ||
536 | for (i = 0; i < nseg; i++) { | ||
537 | if (((int)preq.sector_number|(int)seg[i].nsec) & | ||
538 | ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { | ||
539 | DPRINTK("Misaligned I/O request from domain %d", | ||
540 | blkif->domid); | ||
541 | goto fail_response; | ||
542 | } | ||
543 | } | ||
544 | /* If we have failed at this point, we need to undo the M2P override, | ||
545 | * set gnttab_set_unmap_op on all of the grant references and perform | ||
546 | * the hypercall to unmap the grants - that is all done in | ||
547 | * xen_blkbk_unmap. | ||
548 | */ | ||
549 | if (xen_blkbk_map(req, pending_req, seg)) | ||
550 | goto fail_flush; | ||
551 | |||
552 | /* This corresponding blkif_put is done in __end_block_io_op */ | ||
553 | blkif_get(blkif); | ||
554 | |||
555 | for (i = 0; i < nseg; i++) { | ||
556 | while ((bio == NULL) || | ||
557 | (bio_add_page(bio, | ||
558 | blkbk->pending_page(pending_req, i), | ||
559 | seg[i].nsec << 9, | ||
560 | seg[i].buf & ~PAGE_MASK) == 0)) { | ||
561 | |||
562 | bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); | ||
563 | if (unlikely(bio == NULL)) | ||
564 | goto fail_put_bio; | ||
565 | |||
566 | bio->bi_bdev = preq.bdev; | ||
567 | bio->bi_private = pending_req; | ||
568 | bio->bi_end_io = end_block_io_op; | ||
569 | bio->bi_sector = preq.sector_number; | ||
570 | } | ||
571 | |||
572 | preq.sector_number += seg[i].nsec; | ||
573 | } | ||
574 | |||
575 | /* This will be hit if the operation was a barrier. */ | ||
576 | if (!bio) { | ||
577 | BUG_ON(operation != WRITE_BARRIER); | ||
578 | bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); | ||
579 | if (unlikely(bio == NULL)) | ||
580 | goto fail_put_bio; | ||
581 | |||
582 | bio->bi_bdev = preq.bdev; | ||
583 | bio->bi_private = pending_req; | ||
584 | bio->bi_end_io = end_block_io_op; | ||
585 | bio->bi_sector = -1; | ||
586 | } | ||
587 | |||
588 | |||
589 | /* We set it one so that the last submit_bio does not have to call | ||
590 | * atomic_inc. | ||
591 | */ | ||
592 | atomic_set(&pending_req->pendcnt, nbio); | ||
593 | |||
594 | /* Get a reference count for the disk queue and start sending I/O */ | ||
595 | blk_start_plug(&plug); | ||
596 | |||
597 | for (i = 0; i < nbio; i++) | ||
598 | submit_bio(operation, biolist[i]); | ||
599 | |||
600 | blk_finish_plug(&plug); | ||
601 | /* Let the I/Os go.. */ | ||
602 | |||
603 | if (operation == READ) | ||
604 | blkif->st_rd_sect += preq.nr_sects; | ||
605 | else if (operation == WRITE || operation == WRITE_BARRIER) | ||
606 | blkif->st_wr_sect += preq.nr_sects; | ||
607 | |||
608 | return; | ||
609 | |||
610 | fail_flush: | ||
611 | xen_blkbk_unmap(pending_req); | ||
612 | fail_response: | ||
613 | /* Haven't submitted any bio's yet. */ | ||
614 | make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); | ||
615 | free_req(pending_req); | ||
616 | msleep(1); /* back off a bit */ | ||
617 | return; | ||
618 | |||
619 | fail_put_bio: | ||
620 | for (i = 0; i < (nbio-1); i++) | ||
621 | bio_put(biolist[i]); | ||
622 | __end_block_io_op(pending_req, -EINVAL); | ||
623 | msleep(1); /* back off a bit */ | ||
624 | return; | ||
625 | } | ||
626 | |||
627 | |||
628 | |||
629 | /* | ||
630 | * Put a response on the ring on how the operation fared. | ||
631 | */ | ||
632 | static void make_response(struct blkif_st *blkif, u64 id, | ||
633 | unsigned short op, int st) | ||
634 | { | ||
635 | struct blkif_response resp; | ||
636 | unsigned long flags; | ||
637 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | ||
638 | int more_to_do = 0; | ||
639 | int notify; | ||
640 | |||
641 | resp.id = id; | ||
642 | resp.operation = op; | ||
643 | resp.status = st; | ||
644 | |||
645 | spin_lock_irqsave(&blkif->blk_ring_lock, flags); | ||
646 | /* Place on the response ring for the relevant domain. */ | ||
647 | switch (blkif->blk_protocol) { | ||
648 | case BLKIF_PROTOCOL_NATIVE: | ||
649 | memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), | ||
650 | &resp, sizeof(resp)); | ||
651 | break; | ||
652 | case BLKIF_PROTOCOL_X86_32: | ||
653 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), | ||
654 | &resp, sizeof(resp)); | ||
655 | break; | ||
656 | case BLKIF_PROTOCOL_X86_64: | ||
657 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), | ||
658 | &resp, sizeof(resp)); | ||
659 | break; | ||
660 | default: | ||
661 | BUG(); | ||
662 | } | ||
663 | blk_rings->common.rsp_prod_pvt++; | ||
664 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); | ||
665 | if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { | ||
666 | /* | ||
667 | * Tail check for pending requests. Allows frontend to avoid | ||
668 | * notifications if requests are already in flight (lower | ||
669 | * overheads and promotes batching). | ||
670 | */ | ||
671 | RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); | ||
672 | |||
673 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { | ||
674 | more_to_do = 1; | ||
675 | } | ||
676 | |||
677 | spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); | ||
678 | |||
679 | if (more_to_do) | ||
680 | blkif_notify_work(blkif); | ||
681 | if (notify) | ||
682 | notify_remote_via_irq(blkif->irq); | ||
683 | } | ||
684 | |||
685 | static int __init blkif_init(void) | ||
686 | { | ||
687 | int i, mmap_pages; | ||
688 | int rc = 0; | ||
689 | |||
690 | if (!xen_pv_domain()) | ||
691 | return -ENODEV; | ||
692 | |||
693 | blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); | ||
694 | if (!blkbk) { | ||
695 | printk(KERN_ALERT "%s: out of memory!\n", __func__); | ||
696 | return -ENOMEM; | ||
697 | } | ||
698 | |||
699 | mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
700 | |||
701 | blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * | ||
702 | blkif_reqs, GFP_KERNEL); | ||
703 | blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * | ||
704 | mmap_pages, GFP_KERNEL); | ||
705 | blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * | ||
706 | mmap_pages, GFP_KERNEL); | ||
707 | |||
708 | if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || | ||
709 | !blkbk->pending_pages) { | ||
710 | rc = -ENOMEM; | ||
711 | goto out_of_memory; | ||
712 | } | ||
713 | |||
714 | for (i = 0; i < mmap_pages; i++) { | ||
715 | blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; | ||
716 | blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); | ||
717 | if (blkbk->pending_pages[i] == NULL) { | ||
718 | rc = -ENOMEM; | ||
719 | goto out_of_memory; | ||
720 | } | ||
721 | } | ||
722 | rc = blkif_interface_init(); | ||
723 | if (rc) | ||
724 | goto failed_init; | ||
725 | |||
726 | memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); | ||
727 | |||
728 | INIT_LIST_HEAD(&blkbk->pending_free); | ||
729 | spin_lock_init(&blkbk->pending_free_lock); | ||
730 | init_waitqueue_head(&blkbk->pending_free_wq); | ||
731 | |||
732 | for (i = 0; i < blkif_reqs; i++) | ||
733 | list_add_tail(&blkbk->pending_reqs[i].free_list, | ||
734 | &blkbk->pending_free); | ||
735 | |||
736 | rc = blkif_xenbus_init(); | ||
737 | if (rc) | ||
738 | goto failed_init; | ||
739 | |||
740 | return 0; | ||
741 | |||
742 | out_of_memory: | ||
743 | printk(KERN_ERR "%s: out of memory\n", __func__); | ||
744 | failed_init: | ||
745 | kfree(blkbk->pending_reqs); | ||
746 | kfree(blkbk->pending_grant_handles); | ||
747 | for (i = 0; i < mmap_pages; i++) { | ||
748 | if (blkbk->pending_pages[i]) | ||
749 | __free_page(blkbk->pending_pages[i]); | ||
750 | } | ||
751 | kfree(blkbk->pending_pages); | ||
752 | kfree(blkbk); | ||
753 | blkbk = NULL; | ||
754 | return rc; | ||
755 | } | ||
756 | |||
757 | module_init(blkif_init); | ||
758 | |||
759 | MODULE_LICENSE("Dual BSD/GPL"); | ||