diff options
Diffstat (limited to 'drivers/block/xen-blkback/blkback.c')
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 292 |
1 files changed, 267 insertions, 25 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 280a13846e6c..d7dd5cbdac5f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/list.h> | 39 | #include <linux/list.h> |
40 | #include <linux/delay.h> | 40 | #include <linux/delay.h> |
41 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
42 | #include <linux/bitmap.h> | ||
42 | 43 | ||
43 | #include <xen/events.h> | 44 | #include <xen/events.h> |
44 | #include <xen/page.h> | 45 | #include <xen/page.h> |
@@ -79,6 +80,7 @@ struct pending_req { | |||
79 | unsigned short operation; | 80 | unsigned short operation; |
80 | int status; | 81 | int status; |
81 | struct list_head free_list; | 82 | struct list_head free_list; |
83 | DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
82 | }; | 84 | }; |
83 | 85 | ||
84 | #define BLKBACK_INVALID_HANDLE (~0) | 86 | #define BLKBACK_INVALID_HANDLE (~0) |
@@ -99,6 +101,36 @@ struct xen_blkbk { | |||
99 | static struct xen_blkbk *blkbk; | 101 | static struct xen_blkbk *blkbk; |
100 | 102 | ||
101 | /* | 103 | /* |
104 | * Maximum number of grant pages that can be mapped in blkback. | ||
105 | * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of | ||
106 | * pages that blkback will persistently map. | ||
107 | * Currently, this is: | ||
108 | * RING_SIZE = 32 (for all known ring types) | ||
109 | * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11 | ||
110 | * sizeof(struct persistent_gnt) = 48 | ||
111 | * So the maximum memory used to store the grants is: | ||
112 | * 32 * 11 * 48 = 16896 bytes | ||
113 | */ | ||
114 | static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol) | ||
115 | { | ||
116 | switch (protocol) { | ||
117 | case BLKIF_PROTOCOL_NATIVE: | ||
118 | return __CONST_RING_SIZE(blkif, PAGE_SIZE) * | ||
119 | BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
120 | case BLKIF_PROTOCOL_X86_32: | ||
121 | return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) * | ||
122 | BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
123 | case BLKIF_PROTOCOL_X86_64: | ||
124 | return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) * | ||
125 | BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
126 | default: | ||
127 | BUG(); | ||
128 | } | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | |||
133 | /* | ||
102 | * Little helpful macro to figure out the index and virtual address of the | 134 | * Little helpful macro to figure out the index and virtual address of the |
103 | * pending_pages[..]. For each 'pending_req' we have have up to | 135 | * pending_pages[..]. For each 'pending_req' we have have up to |
104 | * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through | 136 | * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through |
@@ -129,6 +161,57 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
129 | static void make_response(struct xen_blkif *blkif, u64 id, | 161 | static void make_response(struct xen_blkif *blkif, u64 id, |
130 | unsigned short op, int st); | 162 | unsigned short op, int st); |
131 | 163 | ||
164 | #define foreach_grant(pos, rbtree, node) \ | ||
165 | for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \ | ||
166 | &(pos)->node != NULL; \ | ||
167 | (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node)) | ||
168 | |||
169 | |||
170 | static void add_persistent_gnt(struct rb_root *root, | ||
171 | struct persistent_gnt *persistent_gnt) | ||
172 | { | ||
173 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
174 | struct persistent_gnt *this; | ||
175 | |||
176 | /* Figure out where to put new node */ | ||
177 | while (*new) { | ||
178 | this = container_of(*new, struct persistent_gnt, node); | ||
179 | |||
180 | parent = *new; | ||
181 | if (persistent_gnt->gnt < this->gnt) | ||
182 | new = &((*new)->rb_left); | ||
183 | else if (persistent_gnt->gnt > this->gnt) | ||
184 | new = &((*new)->rb_right); | ||
185 | else { | ||
186 | pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n"); | ||
187 | BUG(); | ||
188 | } | ||
189 | } | ||
190 | |||
191 | /* Add new node and rebalance tree. */ | ||
192 | rb_link_node(&(persistent_gnt->node), parent, new); | ||
193 | rb_insert_color(&(persistent_gnt->node), root); | ||
194 | } | ||
195 | |||
196 | static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, | ||
197 | grant_ref_t gref) | ||
198 | { | ||
199 | struct persistent_gnt *data; | ||
200 | struct rb_node *node = root->rb_node; | ||
201 | |||
202 | while (node) { | ||
203 | data = container_of(node, struct persistent_gnt, node); | ||
204 | |||
205 | if (gref < data->gnt) | ||
206 | node = node->rb_left; | ||
207 | else if (gref > data->gnt) | ||
208 | node = node->rb_right; | ||
209 | else | ||
210 | return data; | ||
211 | } | ||
212 | return NULL; | ||
213 | } | ||
214 | |||
132 | /* | 215 | /* |
133 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. | 216 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. |
134 | */ | 217 | */ |
@@ -275,6 +358,11 @@ int xen_blkif_schedule(void *arg) | |||
275 | { | 358 | { |
276 | struct xen_blkif *blkif = arg; | 359 | struct xen_blkif *blkif = arg; |
277 | struct xen_vbd *vbd = &blkif->vbd; | 360 | struct xen_vbd *vbd = &blkif->vbd; |
361 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
362 | struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
363 | struct persistent_gnt *persistent_gnt; | ||
364 | int ret = 0; | ||
365 | int segs_to_unmap = 0; | ||
278 | 366 | ||
279 | xen_blkif_get(blkif); | 367 | xen_blkif_get(blkif); |
280 | 368 | ||
@@ -302,6 +390,36 @@ int xen_blkif_schedule(void *arg) | |||
302 | print_stats(blkif); | 390 | print_stats(blkif); |
303 | } | 391 | } |
304 | 392 | ||
393 | /* Free all persistent grant pages */ | ||
394 | if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) { | ||
395 | foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) { | ||
396 | BUG_ON(persistent_gnt->handle == | ||
397 | BLKBACK_INVALID_HANDLE); | ||
398 | gnttab_set_unmap_op(&unmap[segs_to_unmap], | ||
399 | (unsigned long) pfn_to_kaddr(page_to_pfn( | ||
400 | persistent_gnt->page)), | ||
401 | GNTMAP_host_map, | ||
402 | persistent_gnt->handle); | ||
403 | |||
404 | pages[segs_to_unmap] = persistent_gnt->page; | ||
405 | rb_erase(&persistent_gnt->node, | ||
406 | &blkif->persistent_gnts); | ||
407 | kfree(persistent_gnt); | ||
408 | blkif->persistent_gnt_c--; | ||
409 | |||
410 | if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || | ||
411 | !rb_next(&persistent_gnt->node)) { | ||
412 | ret = gnttab_unmap_refs(unmap, NULL, pages, | ||
413 | segs_to_unmap); | ||
414 | BUG_ON(ret); | ||
415 | segs_to_unmap = 0; | ||
416 | } | ||
417 | } | ||
418 | } | ||
419 | |||
420 | BUG_ON(blkif->persistent_gnt_c != 0); | ||
421 | BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); | ||
422 | |||
305 | if (log_stats) | 423 | if (log_stats) |
306 | print_stats(blkif); | 424 | print_stats(blkif); |
307 | 425 | ||
@@ -328,6 +446,8 @@ static void xen_blkbk_unmap(struct pending_req *req) | |||
328 | int ret; | 446 | int ret; |
329 | 447 | ||
330 | for (i = 0; i < req->nr_pages; i++) { | 448 | for (i = 0; i < req->nr_pages; i++) { |
449 | if (!test_bit(i, req->unmap_seg)) | ||
450 | continue; | ||
331 | handle = pending_handle(req, i); | 451 | handle = pending_handle(req, i); |
332 | if (handle == BLKBACK_INVALID_HANDLE) | 452 | if (handle == BLKBACK_INVALID_HANDLE) |
333 | continue; | 453 | continue; |
@@ -344,12 +464,26 @@ static void xen_blkbk_unmap(struct pending_req *req) | |||
344 | 464 | ||
345 | static int xen_blkbk_map(struct blkif_request *req, | 465 | static int xen_blkbk_map(struct blkif_request *req, |
346 | struct pending_req *pending_req, | 466 | struct pending_req *pending_req, |
347 | struct seg_buf seg[]) | 467 | struct seg_buf seg[], |
468 | struct page *pages[]) | ||
348 | { | 469 | { |
349 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 470 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
350 | int i; | 471 | struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
472 | struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
473 | struct persistent_gnt *persistent_gnt = NULL; | ||
474 | struct xen_blkif *blkif = pending_req->blkif; | ||
475 | phys_addr_t addr = 0; | ||
476 | int i, j; | ||
477 | bool new_map; | ||
351 | int nseg = req->u.rw.nr_segments; | 478 | int nseg = req->u.rw.nr_segments; |
479 | int segs_to_map = 0; | ||
352 | int ret = 0; | 480 | int ret = 0; |
481 | int use_persistent_gnts; | ||
482 | |||
483 | use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); | ||
484 | |||
485 | BUG_ON(blkif->persistent_gnt_c > | ||
486 | max_mapped_grant_pages(pending_req->blkif->blk_protocol)); | ||
353 | 487 | ||
354 | /* | 488 | /* |
355 | * Fill out preq.nr_sects with proper amount of sectors, and setup | 489 | * Fill out preq.nr_sects with proper amount of sectors, and setup |
@@ -359,36 +493,143 @@ static int xen_blkbk_map(struct blkif_request *req, | |||
359 | for (i = 0; i < nseg; i++) { | 493 | for (i = 0; i < nseg; i++) { |
360 | uint32_t flags; | 494 | uint32_t flags; |
361 | 495 | ||
362 | flags = GNTMAP_host_map; | 496 | if (use_persistent_gnts) |
363 | if (pending_req->operation != BLKIF_OP_READ) | 497 | persistent_gnt = get_persistent_gnt( |
364 | flags |= GNTMAP_readonly; | 498 | &blkif->persistent_gnts, |
365 | gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, | 499 | req->u.rw.seg[i].gref); |
366 | req->u.rw.seg[i].gref, | 500 | |
367 | pending_req->blkif->domid); | 501 | if (persistent_gnt) { |
502 | /* | ||
503 | * We are using persistent grants and | ||
504 | * the grant is already mapped | ||
505 | */ | ||
506 | new_map = false; | ||
507 | } else if (use_persistent_gnts && | ||
508 | blkif->persistent_gnt_c < | ||
509 | max_mapped_grant_pages(blkif->blk_protocol)) { | ||
510 | /* | ||
511 | * We are using persistent grants, the grant is | ||
512 | * not mapped but we have room for it | ||
513 | */ | ||
514 | new_map = true; | ||
515 | persistent_gnt = kzalloc( | ||
516 | sizeof(struct persistent_gnt), | ||
517 | GFP_KERNEL); | ||
518 | if (!persistent_gnt) | ||
519 | return -ENOMEM; | ||
520 | persistent_gnt->page = alloc_page(GFP_KERNEL); | ||
521 | if (!persistent_gnt->page) { | ||
522 | kfree(persistent_gnt); | ||
523 | return -ENOMEM; | ||
524 | } | ||
525 | persistent_gnt->gnt = req->u.rw.seg[i].gref; | ||
526 | |||
527 | pages_to_gnt[segs_to_map] = | ||
528 | persistent_gnt->page; | ||
529 | addr = (unsigned long) pfn_to_kaddr( | ||
530 | page_to_pfn(persistent_gnt->page)); | ||
531 | |||
532 | add_persistent_gnt(&blkif->persistent_gnts, | ||
533 | persistent_gnt); | ||
534 | blkif->persistent_gnt_c++; | ||
535 | pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", | ||
536 | persistent_gnt->gnt, blkif->persistent_gnt_c, | ||
537 | max_mapped_grant_pages(blkif->blk_protocol)); | ||
538 | } else { | ||
539 | /* | ||
540 | * We are either using persistent grants and | ||
541 | * hit the maximum limit of grants mapped, | ||
542 | * or we are not using persistent grants. | ||
543 | */ | ||
544 | if (use_persistent_gnts && | ||
545 | !blkif->vbd.overflow_max_grants) { | ||
546 | blkif->vbd.overflow_max_grants = 1; | ||
547 | pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", | ||
548 | blkif->domid, blkif->vbd.handle); | ||
549 | } | ||
550 | new_map = true; | ||
551 | pages[i] = blkbk->pending_page(pending_req, i); | ||
552 | addr = vaddr(pending_req, i); | ||
553 | pages_to_gnt[segs_to_map] = | ||
554 | blkbk->pending_page(pending_req, i); | ||
555 | } | ||
556 | |||
557 | if (persistent_gnt) { | ||
558 | pages[i] = persistent_gnt->page; | ||
559 | persistent_gnts[i] = persistent_gnt; | ||
560 | } else { | ||
561 | persistent_gnts[i] = NULL; | ||
562 | } | ||
563 | |||
564 | if (new_map) { | ||
565 | flags = GNTMAP_host_map; | ||
566 | if (!persistent_gnt && | ||
567 | (pending_req->operation != BLKIF_OP_READ)) | ||
568 | flags |= GNTMAP_readonly; | ||
569 | gnttab_set_map_op(&map[segs_to_map++], addr, | ||
570 | flags, req->u.rw.seg[i].gref, | ||
571 | blkif->domid); | ||
572 | } | ||
368 | } | 573 | } |
369 | 574 | ||
370 | ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg); | 575 | if (segs_to_map) { |
371 | BUG_ON(ret); | 576 | ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); |
577 | BUG_ON(ret); | ||
578 | } | ||
372 | 579 | ||
373 | /* | 580 | /* |
374 | * Now swizzle the MFN in our domain with the MFN from the other domain | 581 | * Now swizzle the MFN in our domain with the MFN from the other domain |
375 | * so that when we access vaddr(pending_req,i) it has the contents of | 582 | * so that when we access vaddr(pending_req,i) it has the contents of |
376 | * the page from the other domain. | 583 | * the page from the other domain. |
377 | */ | 584 | */ |
378 | for (i = 0; i < nseg; i++) { | 585 | bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); |
379 | if (unlikely(map[i].status != 0)) { | 586 | for (i = 0, j = 0; i < nseg; i++) { |
380 | pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); | 587 | if (!persistent_gnts[i] || !persistent_gnts[i]->handle) { |
381 | map[i].handle = BLKBACK_INVALID_HANDLE; | 588 | /* This is a newly mapped grant */ |
382 | ret |= 1; | 589 | BUG_ON(j >= segs_to_map); |
590 | if (unlikely(map[j].status != 0)) { | ||
591 | pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); | ||
592 | map[j].handle = BLKBACK_INVALID_HANDLE; | ||
593 | ret |= 1; | ||
594 | if (persistent_gnts[i]) { | ||
595 | rb_erase(&persistent_gnts[i]->node, | ||
596 | &blkif->persistent_gnts); | ||
597 | blkif->persistent_gnt_c--; | ||
598 | kfree(persistent_gnts[i]); | ||
599 | persistent_gnts[i] = NULL; | ||
600 | } | ||
601 | } | ||
602 | } | ||
603 | if (persistent_gnts[i]) { | ||
604 | if (!persistent_gnts[i]->handle) { | ||
605 | /* | ||
606 | * If this is a new persistent grant | ||
607 | * save the handler | ||
608 | */ | ||
609 | persistent_gnts[i]->handle = map[j].handle; | ||
610 | persistent_gnts[i]->dev_bus_addr = | ||
611 | map[j++].dev_bus_addr; | ||
612 | } | ||
613 | pending_handle(pending_req, i) = | ||
614 | persistent_gnts[i]->handle; | ||
615 | |||
616 | if (ret) | ||
617 | continue; | ||
618 | |||
619 | seg[i].buf = persistent_gnts[i]->dev_bus_addr | | ||
620 | (req->u.rw.seg[i].first_sect << 9); | ||
621 | } else { | ||
622 | pending_handle(pending_req, i) = map[j].handle; | ||
623 | bitmap_set(pending_req->unmap_seg, i, 1); | ||
624 | |||
625 | if (ret) { | ||
626 | j++; | ||
627 | continue; | ||
628 | } | ||
629 | |||
630 | seg[i].buf = map[j++].dev_bus_addr | | ||
631 | (req->u.rw.seg[i].first_sect << 9); | ||
383 | } | 632 | } |
384 | |||
385 | pending_handle(pending_req, i) = map[i].handle; | ||
386 | |||
387 | if (ret) | ||
388 | continue; | ||
389 | |||
390 | seg[i].buf = map[i].dev_bus_addr | | ||
391 | (req->u.rw.seg[i].first_sect << 9); | ||
392 | } | 633 | } |
393 | return ret; | 634 | return ret; |
394 | } | 635 | } |
@@ -591,6 +832,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
591 | int operation; | 832 | int operation; |
592 | struct blk_plug plug; | 833 | struct blk_plug plug; |
593 | bool drain = false; | 834 | bool drain = false; |
835 | struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
594 | 836 | ||
595 | switch (req->operation) { | 837 | switch (req->operation) { |
596 | case BLKIF_OP_READ: | 838 | case BLKIF_OP_READ: |
@@ -677,7 +919,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
677 | * the hypercall to unmap the grants - that is all done in | 919 | * the hypercall to unmap the grants - that is all done in |
678 | * xen_blkbk_unmap. | 920 | * xen_blkbk_unmap. |
679 | */ | 921 | */ |
680 | if (xen_blkbk_map(req, pending_req, seg)) | 922 | if (xen_blkbk_map(req, pending_req, seg, pages)) |
681 | goto fail_flush; | 923 | goto fail_flush; |
682 | 924 | ||
683 | /* | 925 | /* |
@@ -689,7 +931,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
689 | for (i = 0; i < nseg; i++) { | 931 | for (i = 0; i < nseg; i++) { |
690 | while ((bio == NULL) || | 932 | while ((bio == NULL) || |
691 | (bio_add_page(bio, | 933 | (bio_add_page(bio, |
692 | blkbk->pending_page(pending_req, i), | 934 | pages[i], |
693 | seg[i].nsec << 9, | 935 | seg[i].nsec << 9, |
694 | seg[i].buf & ~PAGE_MASK) == 0)) { | 936 | seg[i].buf & ~PAGE_MASK) == 0)) { |
695 | 937 | ||