aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/xen-blkback/blkback.c292
-rw-r--r--drivers/block/xen-blkback/common.h17
-rw-r--r--drivers/block/xen-blkback/xenbus.c23
-rw-r--r--drivers/block/xen-blkfront.c197
4 files changed, 474 insertions, 55 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 280a13846e6c..d7dd5cbdac5f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,7 @@
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/bitmap.h>
42 43
43#include <xen/events.h> 44#include <xen/events.h>
44#include <xen/page.h> 45#include <xen/page.h>
@@ -79,6 +80,7 @@ struct pending_req {
79 unsigned short operation; 80 unsigned short operation;
80 int status; 81 int status;
81 struct list_head free_list; 82 struct list_head free_list;
83 DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
82}; 84};
83 85
84#define BLKBACK_INVALID_HANDLE (~0) 86#define BLKBACK_INVALID_HANDLE (~0)
@@ -99,6 +101,36 @@ struct xen_blkbk {
99static struct xen_blkbk *blkbk; 101static struct xen_blkbk *blkbk;
100 102
101/* 103/*
104 * Maximum number of grant pages that can be mapped in blkback.
105 * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
106 * pages that blkback will persistently map.
107 * Currently, this is:
108 * RING_SIZE = 32 (for all known ring types)
109 * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
110 * sizeof(struct persistent_gnt) = 48
111 * So the maximum memory used to store the grants is:
112 * 32 * 11 * 48 = 16896 bytes
113 */
114static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
115{
116 switch (protocol) {
117 case BLKIF_PROTOCOL_NATIVE:
118 return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
119 BLKIF_MAX_SEGMENTS_PER_REQUEST;
120 case BLKIF_PROTOCOL_X86_32:
121 return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
122 BLKIF_MAX_SEGMENTS_PER_REQUEST;
123 case BLKIF_PROTOCOL_X86_64:
124 return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
125 BLKIF_MAX_SEGMENTS_PER_REQUEST;
126 default:
127 BUG();
128 }
129 return 0;
130}
131
132
133/*
102 * Little helpful macro to figure out the index and virtual address of the 134 * Little helpful macro to figure out the index and virtual address of the
103 * pending_pages[..]. For each 'pending_req' we have have up to 135 * pending_pages[..]. For each 'pending_req' we have have up to
104 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through 136 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
@@ -129,6 +161,57 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
129static void make_response(struct xen_blkif *blkif, u64 id, 161static void make_response(struct xen_blkif *blkif, u64 id,
130 unsigned short op, int st); 162 unsigned short op, int st);
131 163
164#define foreach_grant(pos, rbtree, node) \
165 for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
166 &(pos)->node != NULL; \
167 (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
168
169
170static void add_persistent_gnt(struct rb_root *root,
171 struct persistent_gnt *persistent_gnt)
172{
173 struct rb_node **new = &(root->rb_node), *parent = NULL;
174 struct persistent_gnt *this;
175
176 /* Figure out where to put new node */
177 while (*new) {
178 this = container_of(*new, struct persistent_gnt, node);
179
180 parent = *new;
181 if (persistent_gnt->gnt < this->gnt)
182 new = &((*new)->rb_left);
183 else if (persistent_gnt->gnt > this->gnt)
184 new = &((*new)->rb_right);
185 else {
186 pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
187 BUG();
188 }
189 }
190
191 /* Add new node and rebalance tree. */
192 rb_link_node(&(persistent_gnt->node), parent, new);
193 rb_insert_color(&(persistent_gnt->node), root);
194}
195
196static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
197 grant_ref_t gref)
198{
199 struct persistent_gnt *data;
200 struct rb_node *node = root->rb_node;
201
202 while (node) {
203 data = container_of(node, struct persistent_gnt, node);
204
205 if (gref < data->gnt)
206 node = node->rb_left;
207 else if (gref > data->gnt)
208 node = node->rb_right;
209 else
210 return data;
211 }
212 return NULL;
213}
214
132/* 215/*
133 * Retrieve from the 'pending_reqs' a free pending_req structure to be used. 216 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
134 */ 217 */
@@ -275,6 +358,11 @@ int xen_blkif_schedule(void *arg)
275{ 358{
276 struct xen_blkif *blkif = arg; 359 struct xen_blkif *blkif = arg;
277 struct xen_vbd *vbd = &blkif->vbd; 360 struct xen_vbd *vbd = &blkif->vbd;
361 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
362 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
363 struct persistent_gnt *persistent_gnt;
364 int ret = 0;
365 int segs_to_unmap = 0;
278 366
279 xen_blkif_get(blkif); 367 xen_blkif_get(blkif);
280 368
@@ -302,6 +390,36 @@ int xen_blkif_schedule(void *arg)
302 print_stats(blkif); 390 print_stats(blkif);
303 } 391 }
304 392
393 /* Free all persistent grant pages */
394 if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) {
395 foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) {
396 BUG_ON(persistent_gnt->handle ==
397 BLKBACK_INVALID_HANDLE);
398 gnttab_set_unmap_op(&unmap[segs_to_unmap],
399 (unsigned long) pfn_to_kaddr(page_to_pfn(
400 persistent_gnt->page)),
401 GNTMAP_host_map,
402 persistent_gnt->handle);
403
404 pages[segs_to_unmap] = persistent_gnt->page;
405 rb_erase(&persistent_gnt->node,
406 &blkif->persistent_gnts);
407 kfree(persistent_gnt);
408 blkif->persistent_gnt_c--;
409
410 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
411 !rb_next(&persistent_gnt->node)) {
412 ret = gnttab_unmap_refs(unmap, NULL, pages,
413 segs_to_unmap);
414 BUG_ON(ret);
415 segs_to_unmap = 0;
416 }
417 }
418 }
419
420 BUG_ON(blkif->persistent_gnt_c != 0);
421 BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
422
305 if (log_stats) 423 if (log_stats)
306 print_stats(blkif); 424 print_stats(blkif);
307 425
@@ -328,6 +446,8 @@ static void xen_blkbk_unmap(struct pending_req *req)
328 int ret; 446 int ret;
329 447
330 for (i = 0; i < req->nr_pages; i++) { 448 for (i = 0; i < req->nr_pages; i++) {
449 if (!test_bit(i, req->unmap_seg))
450 continue;
331 handle = pending_handle(req, i); 451 handle = pending_handle(req, i);
332 if (handle == BLKBACK_INVALID_HANDLE) 452 if (handle == BLKBACK_INVALID_HANDLE)
333 continue; 453 continue;
@@ -344,12 +464,26 @@ static void xen_blkbk_unmap(struct pending_req *req)
344 464
345static int xen_blkbk_map(struct blkif_request *req, 465static int xen_blkbk_map(struct blkif_request *req,
346 struct pending_req *pending_req, 466 struct pending_req *pending_req,
347 struct seg_buf seg[]) 467 struct seg_buf seg[],
468 struct page *pages[])
348{ 469{
349 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 470 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
350 int i; 471 struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
472 struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
473 struct persistent_gnt *persistent_gnt = NULL;
474 struct xen_blkif *blkif = pending_req->blkif;
475 phys_addr_t addr = 0;
476 int i, j;
477 bool new_map;
351 int nseg = req->u.rw.nr_segments; 478 int nseg = req->u.rw.nr_segments;
479 int segs_to_map = 0;
352 int ret = 0; 480 int ret = 0;
481 int use_persistent_gnts;
482
483 use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
484
485 BUG_ON(blkif->persistent_gnt_c >
486 max_mapped_grant_pages(pending_req->blkif->blk_protocol));
353 487
354 /* 488 /*
355 * Fill out preq.nr_sects with proper amount of sectors, and setup 489 * Fill out preq.nr_sects with proper amount of sectors, and setup
@@ -359,36 +493,143 @@ static int xen_blkbk_map(struct blkif_request *req,
359 for (i = 0; i < nseg; i++) { 493 for (i = 0; i < nseg; i++) {
360 uint32_t flags; 494 uint32_t flags;
361 495
362 flags = GNTMAP_host_map; 496 if (use_persistent_gnts)
363 if (pending_req->operation != BLKIF_OP_READ) 497 persistent_gnt = get_persistent_gnt(
364 flags |= GNTMAP_readonly; 498 &blkif->persistent_gnts,
365 gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, 499 req->u.rw.seg[i].gref);
366 req->u.rw.seg[i].gref, 500
367 pending_req->blkif->domid); 501 if (persistent_gnt) {
502 /*
503 * We are using persistent grants and
504 * the grant is already mapped
505 */
506 new_map = false;
507 } else if (use_persistent_gnts &&
508 blkif->persistent_gnt_c <
509 max_mapped_grant_pages(blkif->blk_protocol)) {
510 /*
511 * We are using persistent grants, the grant is
512 * not mapped but we have room for it
513 */
514 new_map = true;
515 persistent_gnt = kzalloc(
516 sizeof(struct persistent_gnt),
517 GFP_KERNEL);
518 if (!persistent_gnt)
519 return -ENOMEM;
520 persistent_gnt->page = alloc_page(GFP_KERNEL);
521 if (!persistent_gnt->page) {
522 kfree(persistent_gnt);
523 return -ENOMEM;
524 }
525 persistent_gnt->gnt = req->u.rw.seg[i].gref;
526
527 pages_to_gnt[segs_to_map] =
528 persistent_gnt->page;
529 addr = (unsigned long) pfn_to_kaddr(
530 page_to_pfn(persistent_gnt->page));
531
532 add_persistent_gnt(&blkif->persistent_gnts,
533 persistent_gnt);
534 blkif->persistent_gnt_c++;
535 pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
536 persistent_gnt->gnt, blkif->persistent_gnt_c,
537 max_mapped_grant_pages(blkif->blk_protocol));
538 } else {
539 /*
540 * We are either using persistent grants and
541 * hit the maximum limit of grants mapped,
542 * or we are not using persistent grants.
543 */
544 if (use_persistent_gnts &&
545 !blkif->vbd.overflow_max_grants) {
546 blkif->vbd.overflow_max_grants = 1;
547 pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
548 blkif->domid, blkif->vbd.handle);
549 }
550 new_map = true;
551 pages[i] = blkbk->pending_page(pending_req, i);
552 addr = vaddr(pending_req, i);
553 pages_to_gnt[segs_to_map] =
554 blkbk->pending_page(pending_req, i);
555 }
556
557 if (persistent_gnt) {
558 pages[i] = persistent_gnt->page;
559 persistent_gnts[i] = persistent_gnt;
560 } else {
561 persistent_gnts[i] = NULL;
562 }
563
564 if (new_map) {
565 flags = GNTMAP_host_map;
566 if (!persistent_gnt &&
567 (pending_req->operation != BLKIF_OP_READ))
568 flags |= GNTMAP_readonly;
569 gnttab_set_map_op(&map[segs_to_map++], addr,
570 flags, req->u.rw.seg[i].gref,
571 blkif->domid);
572 }
368 } 573 }
369 574
370 ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg); 575 if (segs_to_map) {
371 BUG_ON(ret); 576 ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
577 BUG_ON(ret);
578 }
372 579
373 /* 580 /*
374 * Now swizzle the MFN in our domain with the MFN from the other domain 581 * Now swizzle the MFN in our domain with the MFN from the other domain
375 * so that when we access vaddr(pending_req,i) it has the contents of 582 * so that when we access vaddr(pending_req,i) it has the contents of
376 * the page from the other domain. 583 * the page from the other domain.
377 */ 584 */
378 for (i = 0; i < nseg; i++) { 585 bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
379 if (unlikely(map[i].status != 0)) { 586 for (i = 0, j = 0; i < nseg; i++) {
380 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); 587 if (!persistent_gnts[i] || !persistent_gnts[i]->handle) {
381 map[i].handle = BLKBACK_INVALID_HANDLE; 588 /* This is a newly mapped grant */
382 ret |= 1; 589 BUG_ON(j >= segs_to_map);
590 if (unlikely(map[j].status != 0)) {
591 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
592 map[j].handle = BLKBACK_INVALID_HANDLE;
593 ret |= 1;
594 if (persistent_gnts[i]) {
595 rb_erase(&persistent_gnts[i]->node,
596 &blkif->persistent_gnts);
597 blkif->persistent_gnt_c--;
598 kfree(persistent_gnts[i]);
599 persistent_gnts[i] = NULL;
600 }
601 }
602 }
603 if (persistent_gnts[i]) {
604 if (!persistent_gnts[i]->handle) {
605 /*
606 * If this is a new persistent grant
607 * save the handler
608 */
609 persistent_gnts[i]->handle = map[j].handle;
610 persistent_gnts[i]->dev_bus_addr =
611 map[j++].dev_bus_addr;
612 }
613 pending_handle(pending_req, i) =
614 persistent_gnts[i]->handle;
615
616 if (ret)
617 continue;
618
619 seg[i].buf = persistent_gnts[i]->dev_bus_addr |
620 (req->u.rw.seg[i].first_sect << 9);
621 } else {
622 pending_handle(pending_req, i) = map[j].handle;
623 bitmap_set(pending_req->unmap_seg, i, 1);
624
625 if (ret) {
626 j++;
627 continue;
628 }
629
630 seg[i].buf = map[j++].dev_bus_addr |
631 (req->u.rw.seg[i].first_sect << 9);
383 } 632 }
384
385 pending_handle(pending_req, i) = map[i].handle;
386
387 if (ret)
388 continue;
389
390 seg[i].buf = map[i].dev_bus_addr |
391 (req->u.rw.seg[i].first_sect << 9);
392 } 633 }
393 return ret; 634 return ret;
394} 635}
@@ -591,6 +832,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
591 int operation; 832 int operation;
592 struct blk_plug plug; 833 struct blk_plug plug;
593 bool drain = false; 834 bool drain = false;
835 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
594 836
595 switch (req->operation) { 837 switch (req->operation) {
596 case BLKIF_OP_READ: 838 case BLKIF_OP_READ:
@@ -677,7 +919,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
677 * the hypercall to unmap the grants - that is all done in 919 * the hypercall to unmap the grants - that is all done in
678 * xen_blkbk_unmap. 920 * xen_blkbk_unmap.
679 */ 921 */
680 if (xen_blkbk_map(req, pending_req, seg)) 922 if (xen_blkbk_map(req, pending_req, seg, pages))
681 goto fail_flush; 923 goto fail_flush;
682 924
683 /* 925 /*
@@ -689,7 +931,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
689 for (i = 0; i < nseg; i++) { 931 for (i = 0; i < nseg; i++) {
690 while ((bio == NULL) || 932 while ((bio == NULL) ||
691 (bio_add_page(bio, 933 (bio_add_page(bio,
692 blkbk->pending_page(pending_req, i), 934 pages[i],
693 seg[i].nsec << 9, 935 seg[i].nsec << 9,
694 seg[i].buf & ~PAGE_MASK) == 0)) { 936 seg[i].buf & ~PAGE_MASK) == 0)) {
695 937
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 9ad3b5ec1dc1..ae7951f0e268 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -34,6 +34,7 @@
34#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
35#include <linux/wait.h> 35#include <linux/wait.h>
36#include <linux/io.h> 36#include <linux/io.h>
37#include <linux/rbtree.h>
37#include <asm/setup.h> 38#include <asm/setup.h>
38#include <asm/pgalloc.h> 39#include <asm/pgalloc.h>
39#include <asm/hypervisor.h> 40#include <asm/hypervisor.h>
@@ -160,10 +161,22 @@ struct xen_vbd {
160 sector_t size; 161 sector_t size;
161 bool flush_support; 162 bool flush_support;
162 bool discard_secure; 163 bool discard_secure;
164
165 unsigned int feature_gnt_persistent:1;
166 unsigned int overflow_max_grants:1;
163}; 167};
164 168
165struct backend_info; 169struct backend_info;
166 170
171
172struct persistent_gnt {
173 struct page *page;
174 grant_ref_t gnt;
175 grant_handle_t handle;
176 uint64_t dev_bus_addr;
177 struct rb_node node;
178};
179
167struct xen_blkif { 180struct xen_blkif {
168 /* Unique identifier for this interface. */ 181 /* Unique identifier for this interface. */
169 domid_t domid; 182 domid_t domid;
@@ -190,6 +203,10 @@ struct xen_blkif {
190 struct task_struct *xenblkd; 203 struct task_struct *xenblkd;
191 unsigned int waiting_reqs; 204 unsigned int waiting_reqs;
192 205
206 /* tree to store persistent grants */
207 struct rb_root persistent_gnts;
208 unsigned int persistent_gnt_c;
209
193 /* statistics */ 210 /* statistics */
194 unsigned long st_print; 211 unsigned long st_print;
195 int st_rd_req; 212 int st_rd_req;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 4f66171c6683..b2250265308a 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -118,6 +118,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
118 atomic_set(&blkif->drain, 0); 118 atomic_set(&blkif->drain, 0);
119 blkif->st_print = jiffies; 119 blkif->st_print = jiffies;
120 init_waitqueue_head(&blkif->waiting_to_free); 120 init_waitqueue_head(&blkif->waiting_to_free);
121 blkif->persistent_gnts.rb_node = NULL;
121 122
122 return blkif; 123 return blkif;
123} 124}
@@ -673,6 +674,13 @@ again:
673 674
674 xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); 675 xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
675 676
677 err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
678 if (err) {
679 xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
680 dev->nodename);
681 goto abort;
682 }
683
676 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 684 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
677 (unsigned long long)vbd_sz(&be->blkif->vbd)); 685 (unsigned long long)vbd_sz(&be->blkif->vbd));
678 if (err) { 686 if (err) {
@@ -721,6 +729,7 @@ static int connect_ring(struct backend_info *be)
721 struct xenbus_device *dev = be->dev; 729 struct xenbus_device *dev = be->dev;
722 unsigned long ring_ref; 730 unsigned long ring_ref;
723 unsigned int evtchn; 731 unsigned int evtchn;
732 unsigned int pers_grants;
724 char protocol[64] = ""; 733 char protocol[64] = "";
725 int err; 734 int err;
726 735
@@ -750,8 +759,18 @@ static int connect_ring(struct backend_info *be)
750 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); 759 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
751 return -1; 760 return -1;
752 } 761 }
753 pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", 762 err = xenbus_gather(XBT_NIL, dev->otherend,
754 ring_ref, evtchn, be->blkif->blk_protocol, protocol); 763 "feature-persistent-grants", "%u",
764 &pers_grants, NULL);
765 if (err)
766 pers_grants = 0;
767
768 be->blkif->vbd.feature_gnt_persistent = pers_grants;
769 be->blkif->vbd.overflow_max_grants = 0;
770
771 pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
772 ring_ref, evtchn, be->blkif->blk_protocol, protocol,
773 pers_grants ? "persistent grants" : "");
755 774
756 /* Map the shared frame, irq etc. */ 775 /* Map the shared frame, irq etc. */
757 err = xen_blkif_map(be->blkif, ring_ref, evtchn); 776 err = xen_blkif_map(be->blkif, ring_ref, evtchn);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 007db8986e84..911d733d21b6 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -44,6 +44,7 @@
44#include <linux/mutex.h> 44#include <linux/mutex.h>
45#include <linux/scatterlist.h> 45#include <linux/scatterlist.h>
46#include <linux/bitmap.h> 46#include <linux/bitmap.h>
47#include <linux/llist.h>
47 48
48#include <xen/xen.h> 49#include <xen/xen.h>
49#include <xen/xenbus.h> 50#include <xen/xenbus.h>
@@ -64,10 +65,17 @@ enum blkif_state {
64 BLKIF_STATE_SUSPENDED, 65 BLKIF_STATE_SUSPENDED,
65}; 66};
66 67
68struct grant {
69 grant_ref_t gref;
70 unsigned long pfn;
71 struct llist_node node;
72};
73
67struct blk_shadow { 74struct blk_shadow {
68 struct blkif_request req; 75 struct blkif_request req;
69 struct request *request; 76 struct request *request;
70 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 77 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
78 struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
71}; 79};
72 80
73static DEFINE_MUTEX(blkfront_mutex); 81static DEFINE_MUTEX(blkfront_mutex);
@@ -97,6 +105,8 @@ struct blkfront_info
97 struct work_struct work; 105 struct work_struct work;
98 struct gnttab_free_callback callback; 106 struct gnttab_free_callback callback;
99 struct blk_shadow shadow[BLK_RING_SIZE]; 107 struct blk_shadow shadow[BLK_RING_SIZE];
108 struct llist_head persistent_gnts;
109 unsigned int persistent_gnts_c;
100 unsigned long shadow_free; 110 unsigned long shadow_free;
101 unsigned int feature_flush; 111 unsigned int feature_flush;
102 unsigned int flush_op; 112 unsigned int flush_op;
@@ -104,6 +114,7 @@ struct blkfront_info
104 unsigned int feature_secdiscard:1; 114 unsigned int feature_secdiscard:1;
105 unsigned int discard_granularity; 115 unsigned int discard_granularity;
106 unsigned int discard_alignment; 116 unsigned int discard_alignment;
117 unsigned int feature_persistent:1;
107 int is_ready; 118 int is_ready;
108}; 119};
109 120
@@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req)
287 unsigned long id; 298 unsigned long id;
288 unsigned int fsect, lsect; 299 unsigned int fsect, lsect;
289 int i, ref; 300 int i, ref;
301
302 /*
303 * Used to store if we are able to queue the request by just using
304 * existing persistent grants, or if we have to get new grants,
305 * as there are not sufficiently many free.
306 */
307 bool new_persistent_gnts;
290 grant_ref_t gref_head; 308 grant_ref_t gref_head;
309 struct page *granted_page;
310 struct grant *gnt_list_entry = NULL;
291 struct scatterlist *sg; 311 struct scatterlist *sg;
292 312
293 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 313 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
294 return 1; 314 return 1;
295 315
296 if (gnttab_alloc_grant_references( 316 /* Check if we have enought grants to allocate a requests */
297 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 317 if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
298 gnttab_request_free_callback( 318 new_persistent_gnts = 1;
299 &info->callback, 319 if (gnttab_alloc_grant_references(
300 blkif_restart_queue_callback, 320 BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
301 info, 321 &gref_head) < 0) {
302 BLKIF_MAX_SEGMENTS_PER_REQUEST); 322 gnttab_request_free_callback(
303 return 1; 323 &info->callback,
304 } 324 blkif_restart_queue_callback,
325 info,
326 BLKIF_MAX_SEGMENTS_PER_REQUEST);
327 return 1;
328 }
329 } else
330 new_persistent_gnts = 0;
305 331
306 /* Fill out a communications ring structure. */ 332 /* Fill out a communications ring structure. */
307 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 333 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
@@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req)
341 BLKIF_MAX_SEGMENTS_PER_REQUEST); 367 BLKIF_MAX_SEGMENTS_PER_REQUEST);
342 368
343 for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { 369 for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
344 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
345 fsect = sg->offset >> 9; 370 fsect = sg->offset >> 9;
346 lsect = fsect + (sg->length >> 9) - 1; 371 lsect = fsect + (sg->length >> 9) - 1;
347 /* install a grant reference. */
348 ref = gnttab_claim_grant_reference(&gref_head);
349 BUG_ON(ref == -ENOSPC);
350 372
351 gnttab_grant_foreign_access_ref( 373 if (info->persistent_gnts_c) {
352 ref, 374 BUG_ON(llist_empty(&info->persistent_gnts));
375 gnt_list_entry = llist_entry(
376 llist_del_first(&info->persistent_gnts),
377 struct grant, node);
378
379 ref = gnt_list_entry->gref;
380 buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
381 info->persistent_gnts_c--;
382 } else {
383 ref = gnttab_claim_grant_reference(&gref_head);
384 BUG_ON(ref == -ENOSPC);
385
386 gnt_list_entry =
387 kmalloc(sizeof(struct grant),
388 GFP_ATOMIC);
389 if (!gnt_list_entry)
390 return -ENOMEM;
391
392 granted_page = alloc_page(GFP_ATOMIC);
393 if (!granted_page) {
394 kfree(gnt_list_entry);
395 return -ENOMEM;
396 }
397
398 gnt_list_entry->pfn =
399 page_to_pfn(granted_page);
400 gnt_list_entry->gref = ref;
401
402 buffer_mfn = pfn_to_mfn(page_to_pfn(
403 granted_page));
404 gnttab_grant_foreign_access_ref(ref,
353 info->xbdev->otherend_id, 405 info->xbdev->otherend_id,
354 buffer_mfn, 406 buffer_mfn, 0);
355 rq_data_dir(req)); 407 }
408
409 info->shadow[id].grants_used[i] = gnt_list_entry;
410
411 if (rq_data_dir(req)) {
412 char *bvec_data;
413 void *shared_data;
414
415 BUG_ON(sg->offset + sg->length > PAGE_SIZE);
416
417 shared_data = kmap_atomic(
418 pfn_to_page(gnt_list_entry->pfn));
419 bvec_data = kmap_atomic(sg_page(sg));
420
421 /*
422 * this does not wipe data stored outside the
423 * range sg->offset..sg->offset+sg->length.
424 * Therefore, blkback *could* see data from
425 * previous requests. This is OK as long as
426 * persistent grants are shared with just one
427 * domain. It may need refactoring if this
428 * changes
429 */
430 memcpy(shared_data + sg->offset,
431 bvec_data + sg->offset,
432 sg->length);
433
434 kunmap_atomic(bvec_data);
435 kunmap_atomic(shared_data);
436 }
356 437
357 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 438 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
358 ring_req->u.rw.seg[i] = 439 ring_req->u.rw.seg[i] =
@@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req)
368 /* Keep a private copy so we can reissue requests when recovering. */ 449 /* Keep a private copy so we can reissue requests when recovering. */
369 info->shadow[id].req = *ring_req; 450 info->shadow[id].req = *ring_req;
370 451
371 gnttab_free_grant_references(gref_head); 452 if (new_persistent_gnts)
453 gnttab_free_grant_references(gref_head);
372 454
373 return 0; 455 return 0;
374} 456}
@@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
480static void xlvbd_flush(struct blkfront_info *info) 562static void xlvbd_flush(struct blkfront_info *info)
481{ 563{
482 blk_queue_flush(info->rq, info->feature_flush); 564 blk_queue_flush(info->rq, info->feature_flush);
483 printk(KERN_INFO "blkfront: %s: %s: %s\n", 565 printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
484 info->gd->disk_name, 566 info->gd->disk_name,
485 info->flush_op == BLKIF_OP_WRITE_BARRIER ? 567 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
486 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? 568 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
487 "flush diskcache" : "barrier or flush"), 569 "flush diskcache" : "barrier or flush"),
488 info->feature_flush ? "enabled" : "disabled"); 570 info->feature_flush ? "enabled" : "disabled",
571 info->feature_persistent ? "using persistent grants" : "");
489} 572}
490 573
491static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) 574static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work)
707 790
708static void blkif_free(struct blkfront_info *info, int suspend) 791static void blkif_free(struct blkfront_info *info, int suspend)
709{ 792{
793 struct llist_node *all_gnts;
794 struct grant *persistent_gnt;
795
710 /* Prevent new requests being issued until we fix things up. */ 796 /* Prevent new requests being issued until we fix things up. */
711 spin_lock_irq(&info->io_lock); 797 spin_lock_irq(&info->io_lock);
712 info->connected = suspend ? 798 info->connected = suspend ?
@@ -714,6 +800,17 @@ static void blkif_free(struct blkfront_info *info, int suspend)
714 /* No more blkif_request(). */ 800 /* No more blkif_request(). */
715 if (info->rq) 801 if (info->rq)
716 blk_stop_queue(info->rq); 802 blk_stop_queue(info->rq);
803
804 /* Remove all persistent grants */
805 if (info->persistent_gnts_c) {
806 all_gnts = llist_del_all(&info->persistent_gnts);
807 llist_for_each_entry(persistent_gnt, all_gnts, node) {
808 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
809 kfree(persistent_gnt);
810 }
811 info->persistent_gnts_c = 0;
812 }
813
717 /* No more gnttab callback work. */ 814 /* No more gnttab callback work. */
718 gnttab_cancel_free_callback(&info->callback); 815 gnttab_cancel_free_callback(&info->callback);
719 spin_unlock_irq(&info->io_lock); 816 spin_unlock_irq(&info->io_lock);
@@ -734,13 +831,42 @@ static void blkif_free(struct blkfront_info *info, int suspend)
734 831
735} 832}
736 833
737static void blkif_completion(struct blk_shadow *s) 834static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
835 struct blkif_response *bret)
738{ 836{
739 int i; 837 int i;
740 /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place 838 struct bio_vec *bvec;
741 * flag. */ 839 struct req_iterator iter;
742 for (i = 0; i < s->req.u.rw.nr_segments; i++) 840 unsigned long flags;
743 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); 841 char *bvec_data;
842 void *shared_data;
843 unsigned int offset = 0;
844
845 if (bret->operation == BLKIF_OP_READ) {
846 /*
847 * Copy the data received from the backend into the bvec.
848 * Since bv_offset can be different than 0, and bv_len different
849 * than PAGE_SIZE, we have to keep track of the current offset,
850 * to be sure we are copying the data from the right shared page.
851 */
852 rq_for_each_segment(bvec, s->request, iter) {
853 BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
854 i = offset >> PAGE_SHIFT;
855 shared_data = kmap_atomic(
856 pfn_to_page(s->grants_used[i]->pfn));
857 bvec_data = bvec_kmap_irq(bvec, &flags);
858 memcpy(bvec_data, shared_data + bvec->bv_offset,
859 bvec->bv_len);
860 bvec_kunmap_irq(bvec_data, &flags);
861 kunmap_atomic(shared_data);
862 offset += bvec->bv_len;
863 }
864 }
865 /* Add the persistent grant into the list of free grants */
866 for (i = 0; i < s->req.u.rw.nr_segments; i++) {
867 llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
868 info->persistent_gnts_c++;
869 }
744} 870}
745 871
746static irqreturn_t blkif_interrupt(int irq, void *dev_id) 872static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -783,7 +909,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
783 req = info->shadow[id].request; 909 req = info->shadow[id].request;
784 910
785 if (bret->operation != BLKIF_OP_DISCARD) 911 if (bret->operation != BLKIF_OP_DISCARD)
786 blkif_completion(&info->shadow[id]); 912 blkif_completion(&info->shadow[id], info, bret);
787 913
788 if (add_id_to_freelist(info, id)) { 914 if (add_id_to_freelist(info, id)) {
789 WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", 915 WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
@@ -942,6 +1068,11 @@ again:
942 message = "writing protocol"; 1068 message = "writing protocol";
943 goto abort_transaction; 1069 goto abort_transaction;
944 } 1070 }
1071 err = xenbus_printf(xbt, dev->nodename,
1072 "feature-persistent-grants", "%u", 1);
1073 if (err)
1074 dev_warn(&dev->dev,
1075 "writing persistent grants feature to xenbus");
945 1076
946 err = xenbus_transaction_end(xbt, 0); 1077 err = xenbus_transaction_end(xbt, 0);
947 if (err) { 1078 if (err) {
@@ -1029,6 +1160,8 @@ static int blkfront_probe(struct xenbus_device *dev,
1029 spin_lock_init(&info->io_lock); 1160 spin_lock_init(&info->io_lock);
1030 info->xbdev = dev; 1161 info->xbdev = dev;
1031 info->vdevice = vdevice; 1162 info->vdevice = vdevice;
1163 init_llist_head(&info->persistent_gnts);
1164 info->persistent_gnts_c = 0;
1032 info->connected = BLKIF_STATE_DISCONNECTED; 1165 info->connected = BLKIF_STATE_DISCONNECTED;
1033 INIT_WORK(&info->work, blkif_restart_queue); 1166 INIT_WORK(&info->work, blkif_restart_queue);
1034 1167
@@ -1093,7 +1226,7 @@ static int blkif_recover(struct blkfront_info *info)
1093 req->u.rw.seg[j].gref, 1226 req->u.rw.seg[j].gref,
1094 info->xbdev->otherend_id, 1227 info->xbdev->otherend_id,
1095 pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), 1228 pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
1096 rq_data_dir(info->shadow[req->u.rw.id].request)); 1229 0);
1097 } 1230 }
1098 info->shadow[req->u.rw.id].req = *req; 1231 info->shadow[req->u.rw.id].req = *req;
1099 1232
@@ -1225,7 +1358,7 @@ static void blkfront_connect(struct blkfront_info *info)
1225 unsigned long sector_size; 1358 unsigned long sector_size;
1226 unsigned int binfo; 1359 unsigned int binfo;
1227 int err; 1360 int err;
1228 int barrier, flush, discard; 1361 int barrier, flush, discard, persistent;
1229 1362
1230 switch (info->connected) { 1363 switch (info->connected) {
1231 case BLKIF_STATE_CONNECTED: 1364 case BLKIF_STATE_CONNECTED:
@@ -1303,6 +1436,14 @@ static void blkfront_connect(struct blkfront_info *info)
1303 if (!err && discard) 1436 if (!err && discard)
1304 blkfront_setup_discard(info); 1437 blkfront_setup_discard(info);
1305 1438
1439 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1440 "feature-persistent", "%u", &persistent,
1441 NULL);
1442 if (err)
1443 info->feature_persistent = 0;
1444 else
1445 info->feature_persistent = persistent;
1446
1306 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1447 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1307 if (err) { 1448 if (err) {
1308 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 1449 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",