aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/xen-blkback/blkback.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkback/blkback.c')
-rw-r--r--drivers/block/xen-blkback/blkback.c292
1 files changed, 267 insertions, 25 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 280a13846e6c..d7dd5cbdac5f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,7 @@
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/bitmap.h>
42 43
43#include <xen/events.h> 44#include <xen/events.h>
44#include <xen/page.h> 45#include <xen/page.h>
@@ -79,6 +80,7 @@ struct pending_req {
79 unsigned short operation; 80 unsigned short operation;
80 int status; 81 int status;
81 struct list_head free_list; 82 struct list_head free_list;
83 DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
82}; 84};
83 85
84#define BLKBACK_INVALID_HANDLE (~0) 86#define BLKBACK_INVALID_HANDLE (~0)
@@ -99,6 +101,36 @@ struct xen_blkbk {
99static struct xen_blkbk *blkbk; 101static struct xen_blkbk *blkbk;
100 102
101/* 103/*
104 * Maximum number of grant pages that can be mapped in blkback.
105 * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
106 * pages that blkback will persistently map.
107 * Currently, this is:
108 * RING_SIZE = 32 (for all known ring types)
109 * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
110 * sizeof(struct persistent_gnt) = 48
111 * So the maximum memory used to store the grants is:
112 * 32 * 11 * 48 = 16896 bytes
113 */
114static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
115{
116 switch (protocol) {
117 case BLKIF_PROTOCOL_NATIVE:
118 return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
119 BLKIF_MAX_SEGMENTS_PER_REQUEST;
120 case BLKIF_PROTOCOL_X86_32:
121 return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
122 BLKIF_MAX_SEGMENTS_PER_REQUEST;
123 case BLKIF_PROTOCOL_X86_64:
124 return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
125 BLKIF_MAX_SEGMENTS_PER_REQUEST;
126 default:
127 BUG();
128 }
129 return 0;
130}
131
132
133/*
102 * Little helpful macro to figure out the index and virtual address of the 134 * Little helpful macro to figure out the index and virtual address of the
103 * pending_pages[..]. For each 'pending_req' we have have up to 135 * pending_pages[..]. For each 'pending_req' we have have up to
104 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through 136 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
@@ -129,6 +161,57 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
129static void make_response(struct xen_blkif *blkif, u64 id, 161static void make_response(struct xen_blkif *blkif, u64 id,
130 unsigned short op, int st); 162 unsigned short op, int st);
131 163
164#define foreach_grant(pos, rbtree, node) \
165 for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
166 &(pos)->node != NULL; \
167 (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
168
169
170static void add_persistent_gnt(struct rb_root *root,
171 struct persistent_gnt *persistent_gnt)
172{
173 struct rb_node **new = &(root->rb_node), *parent = NULL;
174 struct persistent_gnt *this;
175
176 /* Figure out where to put new node */
177 while (*new) {
178 this = container_of(*new, struct persistent_gnt, node);
179
180 parent = *new;
181 if (persistent_gnt->gnt < this->gnt)
182 new = &((*new)->rb_left);
183 else if (persistent_gnt->gnt > this->gnt)
184 new = &((*new)->rb_right);
185 else {
186 pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
187 BUG();
188 }
189 }
190
191 /* Add new node and rebalance tree. */
192 rb_link_node(&(persistent_gnt->node), parent, new);
193 rb_insert_color(&(persistent_gnt->node), root);
194}
195
196static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
197 grant_ref_t gref)
198{
199 struct persistent_gnt *data;
200 struct rb_node *node = root->rb_node;
201
202 while (node) {
203 data = container_of(node, struct persistent_gnt, node);
204
205 if (gref < data->gnt)
206 node = node->rb_left;
207 else if (gref > data->gnt)
208 node = node->rb_right;
209 else
210 return data;
211 }
212 return NULL;
213}
214
132/* 215/*
133 * Retrieve from the 'pending_reqs' a free pending_req structure to be used. 216 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
134 */ 217 */
@@ -275,6 +358,11 @@ int xen_blkif_schedule(void *arg)
275{ 358{
276 struct xen_blkif *blkif = arg; 359 struct xen_blkif *blkif = arg;
277 struct xen_vbd *vbd = &blkif->vbd; 360 struct xen_vbd *vbd = &blkif->vbd;
361 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
362 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
363 struct persistent_gnt *persistent_gnt;
364 int ret = 0;
365 int segs_to_unmap = 0;
278 366
279 xen_blkif_get(blkif); 367 xen_blkif_get(blkif);
280 368
@@ -302,6 +390,36 @@ int xen_blkif_schedule(void *arg)
302 print_stats(blkif); 390 print_stats(blkif);
303 } 391 }
304 392
393 /* Free all persistent grant pages */
394 if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) {
395 foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) {
396 BUG_ON(persistent_gnt->handle ==
397 BLKBACK_INVALID_HANDLE);
398 gnttab_set_unmap_op(&unmap[segs_to_unmap],
399 (unsigned long) pfn_to_kaddr(page_to_pfn(
400 persistent_gnt->page)),
401 GNTMAP_host_map,
402 persistent_gnt->handle);
403
404 pages[segs_to_unmap] = persistent_gnt->page;
405 rb_erase(&persistent_gnt->node,
406 &blkif->persistent_gnts);
407 kfree(persistent_gnt);
408 blkif->persistent_gnt_c--;
409
410 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
411 !rb_next(&persistent_gnt->node)) {
412 ret = gnttab_unmap_refs(unmap, NULL, pages,
413 segs_to_unmap);
414 BUG_ON(ret);
415 segs_to_unmap = 0;
416 }
417 }
418 }
419
420 BUG_ON(blkif->persistent_gnt_c != 0);
421 BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
422
305 if (log_stats) 423 if (log_stats)
306 print_stats(blkif); 424 print_stats(blkif);
307 425
@@ -328,6 +446,8 @@ static void xen_blkbk_unmap(struct pending_req *req)
328 int ret; 446 int ret;
329 447
330 for (i = 0; i < req->nr_pages; i++) { 448 for (i = 0; i < req->nr_pages; i++) {
449 if (!test_bit(i, req->unmap_seg))
450 continue;
331 handle = pending_handle(req, i); 451 handle = pending_handle(req, i);
332 if (handle == BLKBACK_INVALID_HANDLE) 452 if (handle == BLKBACK_INVALID_HANDLE)
333 continue; 453 continue;
@@ -344,12 +464,26 @@ static void xen_blkbk_unmap(struct pending_req *req)
344 464
345static int xen_blkbk_map(struct blkif_request *req, 465static int xen_blkbk_map(struct blkif_request *req,
346 struct pending_req *pending_req, 466 struct pending_req *pending_req,
347 struct seg_buf seg[]) 467 struct seg_buf seg[],
468 struct page *pages[])
348{ 469{
349 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 470 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
350 int i; 471 struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
472 struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
473 struct persistent_gnt *persistent_gnt = NULL;
474 struct xen_blkif *blkif = pending_req->blkif;
475 phys_addr_t addr = 0;
476 int i, j;
477 bool new_map;
351 int nseg = req->u.rw.nr_segments; 478 int nseg = req->u.rw.nr_segments;
479 int segs_to_map = 0;
352 int ret = 0; 480 int ret = 0;
481 int use_persistent_gnts;
482
483 use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
484
485 BUG_ON(blkif->persistent_gnt_c >
486 max_mapped_grant_pages(pending_req->blkif->blk_protocol));
353 487
354 /* 488 /*
355 * Fill out preq.nr_sects with proper amount of sectors, and setup 489 * Fill out preq.nr_sects with proper amount of sectors, and setup
@@ -359,36 +493,143 @@ static int xen_blkbk_map(struct blkif_request *req,
359 for (i = 0; i < nseg; i++) { 493 for (i = 0; i < nseg; i++) {
360 uint32_t flags; 494 uint32_t flags;
361 495
362 flags = GNTMAP_host_map; 496 if (use_persistent_gnts)
363 if (pending_req->operation != BLKIF_OP_READ) 497 persistent_gnt = get_persistent_gnt(
364 flags |= GNTMAP_readonly; 498 &blkif->persistent_gnts,
365 gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, 499 req->u.rw.seg[i].gref);
366 req->u.rw.seg[i].gref, 500
367 pending_req->blkif->domid); 501 if (persistent_gnt) {
502 /*
503 * We are using persistent grants and
504 * the grant is already mapped
505 */
506 new_map = false;
507 } else if (use_persistent_gnts &&
508 blkif->persistent_gnt_c <
509 max_mapped_grant_pages(blkif->blk_protocol)) {
510 /*
511 * We are using persistent grants, the grant is
512 * not mapped but we have room for it
513 */
514 new_map = true;
515 persistent_gnt = kzalloc(
516 sizeof(struct persistent_gnt),
517 GFP_KERNEL);
518 if (!persistent_gnt)
519 return -ENOMEM;
520 persistent_gnt->page = alloc_page(GFP_KERNEL);
521 if (!persistent_gnt->page) {
522 kfree(persistent_gnt);
523 return -ENOMEM;
524 }
525 persistent_gnt->gnt = req->u.rw.seg[i].gref;
526
527 pages_to_gnt[segs_to_map] =
528 persistent_gnt->page;
529 addr = (unsigned long) pfn_to_kaddr(
530 page_to_pfn(persistent_gnt->page));
531
532 add_persistent_gnt(&blkif->persistent_gnts,
533 persistent_gnt);
534 blkif->persistent_gnt_c++;
535 pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
536 persistent_gnt->gnt, blkif->persistent_gnt_c,
537 max_mapped_grant_pages(blkif->blk_protocol));
538 } else {
539 /*
540 * We are either using persistent grants and
541 * hit the maximum limit of grants mapped,
542 * or we are not using persistent grants.
543 */
544 if (use_persistent_gnts &&
545 !blkif->vbd.overflow_max_grants) {
546 blkif->vbd.overflow_max_grants = 1;
547 pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
548 blkif->domid, blkif->vbd.handle);
549 }
550 new_map = true;
551 pages[i] = blkbk->pending_page(pending_req, i);
552 addr = vaddr(pending_req, i);
553 pages_to_gnt[segs_to_map] =
554 blkbk->pending_page(pending_req, i);
555 }
556
557 if (persistent_gnt) {
558 pages[i] = persistent_gnt->page;
559 persistent_gnts[i] = persistent_gnt;
560 } else {
561 persistent_gnts[i] = NULL;
562 }
563
564 if (new_map) {
565 flags = GNTMAP_host_map;
566 if (!persistent_gnt &&
567 (pending_req->operation != BLKIF_OP_READ))
568 flags |= GNTMAP_readonly;
569 gnttab_set_map_op(&map[segs_to_map++], addr,
570 flags, req->u.rw.seg[i].gref,
571 blkif->domid);
572 }
368 } 573 }
369 574
370 ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg); 575 if (segs_to_map) {
371 BUG_ON(ret); 576 ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
577 BUG_ON(ret);
578 }
372 579
373 /* 580 /*
374 * Now swizzle the MFN in our domain with the MFN from the other domain 581 * Now swizzle the MFN in our domain with the MFN from the other domain
375 * so that when we access vaddr(pending_req,i) it has the contents of 582 * so that when we access vaddr(pending_req,i) it has the contents of
376 * the page from the other domain. 583 * the page from the other domain.
377 */ 584 */
378 for (i = 0; i < nseg; i++) { 585 bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
379 if (unlikely(map[i].status != 0)) { 586 for (i = 0, j = 0; i < nseg; i++) {
380 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); 587 if (!persistent_gnts[i] || !persistent_gnts[i]->handle) {
381 map[i].handle = BLKBACK_INVALID_HANDLE; 588 /* This is a newly mapped grant */
382 ret |= 1; 589 BUG_ON(j >= segs_to_map);
590 if (unlikely(map[j].status != 0)) {
591 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
592 map[j].handle = BLKBACK_INVALID_HANDLE;
593 ret |= 1;
594 if (persistent_gnts[i]) {
595 rb_erase(&persistent_gnts[i]->node,
596 &blkif->persistent_gnts);
597 blkif->persistent_gnt_c--;
598 kfree(persistent_gnts[i]);
599 persistent_gnts[i] = NULL;
600 }
601 }
602 }
603 if (persistent_gnts[i]) {
604 if (!persistent_gnts[i]->handle) {
605 /*
606 * If this is a new persistent grant
607 * save the handler
608 */
609 persistent_gnts[i]->handle = map[j].handle;
610 persistent_gnts[i]->dev_bus_addr =
611 map[j++].dev_bus_addr;
612 }
613 pending_handle(pending_req, i) =
614 persistent_gnts[i]->handle;
615
616 if (ret)
617 continue;
618
619 seg[i].buf = persistent_gnts[i]->dev_bus_addr |
620 (req->u.rw.seg[i].first_sect << 9);
621 } else {
622 pending_handle(pending_req, i) = map[j].handle;
623 bitmap_set(pending_req->unmap_seg, i, 1);
624
625 if (ret) {
626 j++;
627 continue;
628 }
629
630 seg[i].buf = map[j++].dev_bus_addr |
631 (req->u.rw.seg[i].first_sect << 9);
383 } 632 }
384
385 pending_handle(pending_req, i) = map[i].handle;
386
387 if (ret)
388 continue;
389
390 seg[i].buf = map[i].dev_bus_addr |
391 (req->u.rw.seg[i].first_sect << 9);
392 } 633 }
393 return ret; 634 return ret;
394} 635}
@@ -591,6 +832,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
591 int operation; 832 int operation;
592 struct blk_plug plug; 833 struct blk_plug plug;
593 bool drain = false; 834 bool drain = false;
835 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
594 836
595 switch (req->operation) { 837 switch (req->operation) {
596 case BLKIF_OP_READ: 838 case BLKIF_OP_READ:
@@ -677,7 +919,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
677 * the hypercall to unmap the grants - that is all done in 919 * the hypercall to unmap the grants - that is all done in
678 * xen_blkbk_unmap. 920 * xen_blkbk_unmap.
679 */ 921 */
680 if (xen_blkbk_map(req, pending_req, seg)) 922 if (xen_blkbk_map(req, pending_req, seg, pages))
681 goto fail_flush; 923 goto fail_flush;
682 924
683 /* 925 /*
@@ -689,7 +931,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
689 for (i = 0; i < nseg; i++) { 931 for (i = 0; i < nseg; i++) {
690 while ((bio == NULL) || 932 while ((bio == NULL) ||
691 (bio_add_page(bio, 933 (bio_add_page(bio,
692 blkbk->pending_page(pending_req, i), 934 pages[i],
693 seg[i].nsec << 9, 935 seg[i].nsec << 9,
694 seg[i].buf & ~PAGE_MASK) == 0)) { 936 seg[i].buf & ~PAGE_MASK) == 0)) {
695 937