aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/xen-blkback
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkback')
-rw-r--r--drivers/block/xen-blkback/blkback.c292
-rw-r--r--drivers/block/xen-blkback/common.h17
-rw-r--r--drivers/block/xen-blkback/xenbus.c23
3 files changed, 305 insertions, 27 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 280a13846e6c..d7dd5cbdac5f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,7 @@
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/bitmap.h>
42 43
43#include <xen/events.h> 44#include <xen/events.h>
44#include <xen/page.h> 45#include <xen/page.h>
@@ -79,6 +80,7 @@ struct pending_req {
79 unsigned short operation; 80 unsigned short operation;
80 int status; 81 int status;
81 struct list_head free_list; 82 struct list_head free_list;
83 DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
82}; 84};
83 85
84#define BLKBACK_INVALID_HANDLE (~0) 86#define BLKBACK_INVALID_HANDLE (~0)
@@ -99,6 +101,36 @@ struct xen_blkbk {
99static struct xen_blkbk *blkbk; 101static struct xen_blkbk *blkbk;
100 102
101/* 103/*
104 * Maximum number of grant pages that can be mapped in blkback.
105 * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
106 * pages that blkback will persistently map.
107 * Currently, this is:
108 * RING_SIZE = 32 (for all known ring types)
109 * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
110 * sizeof(struct persistent_gnt) = 48
111 * So the maximum memory used to store the grants is:
112 * 32 * 11 * 48 = 16896 bytes
113 */
114static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
115{
116 switch (protocol) {
117 case BLKIF_PROTOCOL_NATIVE:
118 return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
119 BLKIF_MAX_SEGMENTS_PER_REQUEST;
120 case BLKIF_PROTOCOL_X86_32:
121 return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
122 BLKIF_MAX_SEGMENTS_PER_REQUEST;
123 case BLKIF_PROTOCOL_X86_64:
124 return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
125 BLKIF_MAX_SEGMENTS_PER_REQUEST;
126 default:
127 BUG();
128 }
129 return 0;
130}
131
132
133/*
102 * Little helpful macro to figure out the index and virtual address of the 134 * Little helpful macro to figure out the index and virtual address of the
103 * pending_pages[..]. For each 'pending_req' we have have up to 135 * pending_pages[..]. For each 'pending_req' we have have up to
104 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through 136 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
@@ -129,6 +161,57 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
129static void make_response(struct xen_blkif *blkif, u64 id, 161static void make_response(struct xen_blkif *blkif, u64 id,
130 unsigned short op, int st); 162 unsigned short op, int st);
131 163
164#define foreach_grant(pos, rbtree, node) \
165 for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
166 &(pos)->node != NULL; \
167 (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
168
169
170static void add_persistent_gnt(struct rb_root *root,
171 struct persistent_gnt *persistent_gnt)
172{
173 struct rb_node **new = &(root->rb_node), *parent = NULL;
174 struct persistent_gnt *this;
175
176 /* Figure out where to put new node */
177 while (*new) {
178 this = container_of(*new, struct persistent_gnt, node);
179
180 parent = *new;
181 if (persistent_gnt->gnt < this->gnt)
182 new = &((*new)->rb_left);
183 else if (persistent_gnt->gnt > this->gnt)
184 new = &((*new)->rb_right);
185 else {
186 pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
187 BUG();
188 }
189 }
190
191 /* Add new node and rebalance tree. */
192 rb_link_node(&(persistent_gnt->node), parent, new);
193 rb_insert_color(&(persistent_gnt->node), root);
194}
195
196static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
197 grant_ref_t gref)
198{
199 struct persistent_gnt *data;
200 struct rb_node *node = root->rb_node;
201
202 while (node) {
203 data = container_of(node, struct persistent_gnt, node);
204
205 if (gref < data->gnt)
206 node = node->rb_left;
207 else if (gref > data->gnt)
208 node = node->rb_right;
209 else
210 return data;
211 }
212 return NULL;
213}
214
132/* 215/*
133 * Retrieve from the 'pending_reqs' a free pending_req structure to be used. 216 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
134 */ 217 */
@@ -275,6 +358,11 @@ int xen_blkif_schedule(void *arg)
275{ 358{
276 struct xen_blkif *blkif = arg; 359 struct xen_blkif *blkif = arg;
277 struct xen_vbd *vbd = &blkif->vbd; 360 struct xen_vbd *vbd = &blkif->vbd;
361 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
362 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
363 struct persistent_gnt *persistent_gnt;
364 int ret = 0;
365 int segs_to_unmap = 0;
278 366
279 xen_blkif_get(blkif); 367 xen_blkif_get(blkif);
280 368
@@ -302,6 +390,36 @@ int xen_blkif_schedule(void *arg)
302 print_stats(blkif); 390 print_stats(blkif);
303 } 391 }
304 392
393 /* Free all persistent grant pages */
394 if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) {
395 foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) {
396 BUG_ON(persistent_gnt->handle ==
397 BLKBACK_INVALID_HANDLE);
398 gnttab_set_unmap_op(&unmap[segs_to_unmap],
399 (unsigned long) pfn_to_kaddr(page_to_pfn(
400 persistent_gnt->page)),
401 GNTMAP_host_map,
402 persistent_gnt->handle);
403
404 pages[segs_to_unmap] = persistent_gnt->page;
405 rb_erase(&persistent_gnt->node,
406 &blkif->persistent_gnts);
407 kfree(persistent_gnt);
408 blkif->persistent_gnt_c--;
409
410 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
411 !rb_next(&persistent_gnt->node)) {
412 ret = gnttab_unmap_refs(unmap, NULL, pages,
413 segs_to_unmap);
414 BUG_ON(ret);
415 segs_to_unmap = 0;
416 }
417 }
418 }
419
420 BUG_ON(blkif->persistent_gnt_c != 0);
421 BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
422
305 if (log_stats) 423 if (log_stats)
306 print_stats(blkif); 424 print_stats(blkif);
307 425
@@ -328,6 +446,8 @@ static void xen_blkbk_unmap(struct pending_req *req)
328 int ret; 446 int ret;
329 447
330 for (i = 0; i < req->nr_pages; i++) { 448 for (i = 0; i < req->nr_pages; i++) {
449 if (!test_bit(i, req->unmap_seg))
450 continue;
331 handle = pending_handle(req, i); 451 handle = pending_handle(req, i);
332 if (handle == BLKBACK_INVALID_HANDLE) 452 if (handle == BLKBACK_INVALID_HANDLE)
333 continue; 453 continue;
@@ -344,12 +464,26 @@ static void xen_blkbk_unmap(struct pending_req *req)
344 464
345static int xen_blkbk_map(struct blkif_request *req, 465static int xen_blkbk_map(struct blkif_request *req,
346 struct pending_req *pending_req, 466 struct pending_req *pending_req,
347 struct seg_buf seg[]) 467 struct seg_buf seg[],
468 struct page *pages[])
348{ 469{
349 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 470 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
350 int i; 471 struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
472 struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
473 struct persistent_gnt *persistent_gnt = NULL;
474 struct xen_blkif *blkif = pending_req->blkif;
475 phys_addr_t addr = 0;
476 int i, j;
477 bool new_map;
351 int nseg = req->u.rw.nr_segments; 478 int nseg = req->u.rw.nr_segments;
479 int segs_to_map = 0;
352 int ret = 0; 480 int ret = 0;
481 int use_persistent_gnts;
482
483 use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
484
485 BUG_ON(blkif->persistent_gnt_c >
486 max_mapped_grant_pages(pending_req->blkif->blk_protocol));
353 487
354 /* 488 /*
355 * Fill out preq.nr_sects with proper amount of sectors, and setup 489 * Fill out preq.nr_sects with proper amount of sectors, and setup
@@ -359,36 +493,143 @@ static int xen_blkbk_map(struct blkif_request *req,
359 for (i = 0; i < nseg; i++) { 493 for (i = 0; i < nseg; i++) {
360 uint32_t flags; 494 uint32_t flags;
361 495
362 flags = GNTMAP_host_map; 496 if (use_persistent_gnts)
363 if (pending_req->operation != BLKIF_OP_READ) 497 persistent_gnt = get_persistent_gnt(
364 flags |= GNTMAP_readonly; 498 &blkif->persistent_gnts,
365 gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, 499 req->u.rw.seg[i].gref);
366 req->u.rw.seg[i].gref, 500
367 pending_req->blkif->domid); 501 if (persistent_gnt) {
502 /*
503 * We are using persistent grants and
504 * the grant is already mapped
505 */
506 new_map = false;
507 } else if (use_persistent_gnts &&
508 blkif->persistent_gnt_c <
509 max_mapped_grant_pages(blkif->blk_protocol)) {
510 /*
511 * We are using persistent grants, the grant is
512 * not mapped but we have room for it
513 */
514 new_map = true;
515 persistent_gnt = kzalloc(
516 sizeof(struct persistent_gnt),
517 GFP_KERNEL);
518 if (!persistent_gnt)
519 return -ENOMEM;
520 persistent_gnt->page = alloc_page(GFP_KERNEL);
521 if (!persistent_gnt->page) {
522 kfree(persistent_gnt);
523 return -ENOMEM;
524 }
525 persistent_gnt->gnt = req->u.rw.seg[i].gref;
526
527 pages_to_gnt[segs_to_map] =
528 persistent_gnt->page;
529 addr = (unsigned long) pfn_to_kaddr(
530 page_to_pfn(persistent_gnt->page));
531
532 add_persistent_gnt(&blkif->persistent_gnts,
533 persistent_gnt);
534 blkif->persistent_gnt_c++;
535 pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
536 persistent_gnt->gnt, blkif->persistent_gnt_c,
537 max_mapped_grant_pages(blkif->blk_protocol));
538 } else {
539 /*
540 * We are either using persistent grants and
541 * hit the maximum limit of grants mapped,
542 * or we are not using persistent grants.
543 */
544 if (use_persistent_gnts &&
545 !blkif->vbd.overflow_max_grants) {
546 blkif->vbd.overflow_max_grants = 1;
547 pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
548 blkif->domid, blkif->vbd.handle);
549 }
550 new_map = true;
551 pages[i] = blkbk->pending_page(pending_req, i);
552 addr = vaddr(pending_req, i);
553 pages_to_gnt[segs_to_map] =
554 blkbk->pending_page(pending_req, i);
555 }
556
557 if (persistent_gnt) {
558 pages[i] = persistent_gnt->page;
559 persistent_gnts[i] = persistent_gnt;
560 } else {
561 persistent_gnts[i] = NULL;
562 }
563
564 if (new_map) {
565 flags = GNTMAP_host_map;
566 if (!persistent_gnt &&
567 (pending_req->operation != BLKIF_OP_READ))
568 flags |= GNTMAP_readonly;
569 gnttab_set_map_op(&map[segs_to_map++], addr,
570 flags, req->u.rw.seg[i].gref,
571 blkif->domid);
572 }
368 } 573 }
369 574
370 ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg); 575 if (segs_to_map) {
371 BUG_ON(ret); 576 ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
577 BUG_ON(ret);
578 }
372 579
373 /* 580 /*
374 * Now swizzle the MFN in our domain with the MFN from the other domain 581 * Now swizzle the MFN in our domain with the MFN from the other domain
375 * so that when we access vaddr(pending_req,i) it has the contents of 582 * so that when we access vaddr(pending_req,i) it has the contents of
376 * the page from the other domain. 583 * the page from the other domain.
377 */ 584 */
378 for (i = 0; i < nseg; i++) { 585 bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
379 if (unlikely(map[i].status != 0)) { 586 for (i = 0, j = 0; i < nseg; i++) {
380 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); 587 if (!persistent_gnts[i] || !persistent_gnts[i]->handle) {
381 map[i].handle = BLKBACK_INVALID_HANDLE; 588 /* This is a newly mapped grant */
382 ret |= 1; 589 BUG_ON(j >= segs_to_map);
590 if (unlikely(map[j].status != 0)) {
591 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
592 map[j].handle = BLKBACK_INVALID_HANDLE;
593 ret |= 1;
594 if (persistent_gnts[i]) {
595 rb_erase(&persistent_gnts[i]->node,
596 &blkif->persistent_gnts);
597 blkif->persistent_gnt_c--;
598 kfree(persistent_gnts[i]);
599 persistent_gnts[i] = NULL;
600 }
601 }
602 }
603 if (persistent_gnts[i]) {
604 if (!persistent_gnts[i]->handle) {
605 /*
606 * If this is a new persistent grant
607 * save the handler
608 */
609 persistent_gnts[i]->handle = map[j].handle;
610 persistent_gnts[i]->dev_bus_addr =
611 map[j++].dev_bus_addr;
612 }
613 pending_handle(pending_req, i) =
614 persistent_gnts[i]->handle;
615
616 if (ret)
617 continue;
618
619 seg[i].buf = persistent_gnts[i]->dev_bus_addr |
620 (req->u.rw.seg[i].first_sect << 9);
621 } else {
622 pending_handle(pending_req, i) = map[j].handle;
623 bitmap_set(pending_req->unmap_seg, i, 1);
624
625 if (ret) {
626 j++;
627 continue;
628 }
629
630 seg[i].buf = map[j++].dev_bus_addr |
631 (req->u.rw.seg[i].first_sect << 9);
383 } 632 }
384
385 pending_handle(pending_req, i) = map[i].handle;
386
387 if (ret)
388 continue;
389
390 seg[i].buf = map[i].dev_bus_addr |
391 (req->u.rw.seg[i].first_sect << 9);
392 } 633 }
393 return ret; 634 return ret;
394} 635}
@@ -591,6 +832,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
591 int operation; 832 int operation;
592 struct blk_plug plug; 833 struct blk_plug plug;
593 bool drain = false; 834 bool drain = false;
835 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
594 836
595 switch (req->operation) { 837 switch (req->operation) {
596 case BLKIF_OP_READ: 838 case BLKIF_OP_READ:
@@ -677,7 +919,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
677 * the hypercall to unmap the grants - that is all done in 919 * the hypercall to unmap the grants - that is all done in
678 * xen_blkbk_unmap. 920 * xen_blkbk_unmap.
679 */ 921 */
680 if (xen_blkbk_map(req, pending_req, seg)) 922 if (xen_blkbk_map(req, pending_req, seg, pages))
681 goto fail_flush; 923 goto fail_flush;
682 924
683 /* 925 /*
@@ -689,7 +931,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
689 for (i = 0; i < nseg; i++) { 931 for (i = 0; i < nseg; i++) {
690 while ((bio == NULL) || 932 while ((bio == NULL) ||
691 (bio_add_page(bio, 933 (bio_add_page(bio,
692 blkbk->pending_page(pending_req, i), 934 pages[i],
693 seg[i].nsec << 9, 935 seg[i].nsec << 9,
694 seg[i].buf & ~PAGE_MASK) == 0)) { 936 seg[i].buf & ~PAGE_MASK) == 0)) {
695 937
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 9ad3b5ec1dc1..ae7951f0e268 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -34,6 +34,7 @@
34#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
35#include <linux/wait.h> 35#include <linux/wait.h>
36#include <linux/io.h> 36#include <linux/io.h>
37#include <linux/rbtree.h>
37#include <asm/setup.h> 38#include <asm/setup.h>
38#include <asm/pgalloc.h> 39#include <asm/pgalloc.h>
39#include <asm/hypervisor.h> 40#include <asm/hypervisor.h>
@@ -160,10 +161,22 @@ struct xen_vbd {
160 sector_t size; 161 sector_t size;
161 bool flush_support; 162 bool flush_support;
162 bool discard_secure; 163 bool discard_secure;
164
165 unsigned int feature_gnt_persistent:1;
166 unsigned int overflow_max_grants:1;
163}; 167};
164 168
165struct backend_info; 169struct backend_info;
166 170
171
172struct persistent_gnt {
173 struct page *page;
174 grant_ref_t gnt;
175 grant_handle_t handle;
176 uint64_t dev_bus_addr;
177 struct rb_node node;
178};
179
167struct xen_blkif { 180struct xen_blkif {
168 /* Unique identifier for this interface. */ 181 /* Unique identifier for this interface. */
169 domid_t domid; 182 domid_t domid;
@@ -190,6 +203,10 @@ struct xen_blkif {
190 struct task_struct *xenblkd; 203 struct task_struct *xenblkd;
191 unsigned int waiting_reqs; 204 unsigned int waiting_reqs;
192 205
206 /* tree to store persistent grants */
207 struct rb_root persistent_gnts;
208 unsigned int persistent_gnt_c;
209
193 /* statistics */ 210 /* statistics */
194 unsigned long st_print; 211 unsigned long st_print;
195 int st_rd_req; 212 int st_rd_req;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 4f66171c6683..b2250265308a 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -118,6 +118,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
118 atomic_set(&blkif->drain, 0); 118 atomic_set(&blkif->drain, 0);
119 blkif->st_print = jiffies; 119 blkif->st_print = jiffies;
120 init_waitqueue_head(&blkif->waiting_to_free); 120 init_waitqueue_head(&blkif->waiting_to_free);
121 blkif->persistent_gnts.rb_node = NULL;
121 122
122 return blkif; 123 return blkif;
123} 124}
@@ -673,6 +674,13 @@ again:
673 674
674 xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); 675 xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
675 676
677 err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
678 if (err) {
679 xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
680 dev->nodename);
681 goto abort;
682 }
683
676 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 684 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
677 (unsigned long long)vbd_sz(&be->blkif->vbd)); 685 (unsigned long long)vbd_sz(&be->blkif->vbd));
678 if (err) { 686 if (err) {
@@ -721,6 +729,7 @@ static int connect_ring(struct backend_info *be)
721 struct xenbus_device *dev = be->dev; 729 struct xenbus_device *dev = be->dev;
722 unsigned long ring_ref; 730 unsigned long ring_ref;
723 unsigned int evtchn; 731 unsigned int evtchn;
732 unsigned int pers_grants;
724 char protocol[64] = ""; 733 char protocol[64] = "";
725 int err; 734 int err;
726 735
@@ -750,8 +759,18 @@ static int connect_ring(struct backend_info *be)
750 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); 759 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
751 return -1; 760 return -1;
752 } 761 }
753 pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", 762 err = xenbus_gather(XBT_NIL, dev->otherend,
754 ring_ref, evtchn, be->blkif->blk_protocol, protocol); 763 "feature-persistent-grants", "%u",
764 &pers_grants, NULL);
765 if (err)
766 pers_grants = 0;
767
768 be->blkif->vbd.feature_gnt_persistent = pers_grants;
769 be->blkif->vbd.overflow_max_grants = 0;
770
771 pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
772 ring_ref, evtchn, be->blkif->blk_protocol, protocol,
773 pers_grants ? "persistent grants" : "");
755 774
756 /* Map the shared frame, irq etc. */ 775 /* Map the shared frame, irq etc. */
757 err = xen_blkif_map(be->blkif, ring_ref, evtchn); 776 err = xen_blkif_map(be->blkif, ring_ref, evtchn);