aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/xen-blkback
diff options
context:
space:
mode:
authorRoger Pau Monne <roger.pau@citrix.com>2012-10-24 12:58:45 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-10-30 09:50:04 -0400
commit0a8704a51f386cab7394e38ff1d66eef924d8ab8 (patch)
tree8fb3897bad957fa592ff54cacc97de924246c125 /drivers/block/xen-blkback
parent8f0d8163b50e01f398b14bcd4dc039ac5ab18d64 (diff)
xen/blkback: Persistent grant maps for xen blk drivers
This patch implements persistent grants for the xen-blk{front,back} mechanism. The effect of this change is to reduce the number of unmap operations performed, since they cause a (costly) TLB shootdown. This allows the I/O performance to scale better when a large number of VMs are performing I/O. Previously, the blkfront driver was supplied a bvec[] from the request queue. This was granted to dom0; dom0 performed the I/O and wrote directly into the grant-mapped memory and unmapped it; blkfront then removed foreign access for that grant. The cost of unmapping scales badly with the number of CPUs in Dom0. An experiment showed that when Dom0 has 24 VCPUs, and guests are performing parallel I/O to a ramdisk, the IPIs from performing unmap's is a bottleneck at 5 guests (at which point 650,000 IOPS are being performed in total). If more than 5 guests are used, the performance declines. By 10 guests, only 400,000 IOPS are being performed. This patch improves performance by only unmapping when the connection between blkfront and back is broken. On startup blkfront notifies blkback that it is using persistent grants, and blkback will do the same. If blkback is not capable of persistent mapping, blkfront will still use the same grants, since it is compatible with the previous protocol, and simplifies the code complexity in blkfront. To perform a read, in persistent mode, blkfront uses a separate pool of pages that it maps to dom0. When a request comes in, blkfront transmutes the request so that blkback will write into one of these free pages. Blkback keeps note of which grefs it has already mapped. When a new ring request comes to blkback, it looks to see if it has already mapped that page. If so, it will not map it again. If the page hasn't been previously mapped, it is mapped now, and a record is kept of this mapping. Blkback proceeds as usual. When blkfront is notified that blkback has completed a request, it memcpy's from the shared memory, into the bvec supplied. A record that the {gref, page} tuple is mapped, and not inflight is kept. Writes are similar, except that the memcpy is peformed from the supplied bvecs, into the shared pages, before the request is put onto the ring. Blkback stores a mapping of grefs=>{page mapped to by gref} in a red-black tree. As the grefs are not known apriori, and provide no guarantees on their ordering, we have to perform a search through this tree to find the page, for every gref we receive. This operation takes O(log n) time in the worst case. In blkfront grants are stored using a single linked list. The maximum number of grants that blkback will persistenly map is currently set to RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST, to prevent a malicios guest from attempting a DoS, by supplying fresh grefs, causing the Dom0 kernel to map excessively. If a guest is using persistent grants and exceeds the maximum number of grants to map persistenly the newly passed grefs will be mapped and unmaped. Using this approach, we can have requests that mix persistent and non-persistent grants, and we need to handle them correctly. This allows us to set the maximum number of persistent grants to a lower value than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST, although setting it will lead to unpredictable performance. In writing this patch, the question arrises as to if the additional cost of performing memcpys in the guest (to/from the pool of granted pages) outweigh the gains of not performing TLB shootdowns. The answer to that question is `no'. There appears to be very little, if any additional cost to the guest of using persistent grants. There is perhaps a small saving, from the reduced number of hypercalls performed in granting, and ending foreign access. Signed-off-by: Oliver Chick <oliver.chick@citrix.com> Signed-off-by: Roger Pau Monne <roger.pau@citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> [v1: Fixed up the misuse of bool as int]
Diffstat (limited to 'drivers/block/xen-blkback')
-rw-r--r--drivers/block/xen-blkback/blkback.c292
-rw-r--r--drivers/block/xen-blkback/common.h17
-rw-r--r--drivers/block/xen-blkback/xenbus.c23
3 files changed, 305 insertions, 27 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 280a13846e6c..d7dd5cbdac5f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,7 @@
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/bitmap.h>
42 43
43#include <xen/events.h> 44#include <xen/events.h>
44#include <xen/page.h> 45#include <xen/page.h>
@@ -79,6 +80,7 @@ struct pending_req {
79 unsigned short operation; 80 unsigned short operation;
80 int status; 81 int status;
81 struct list_head free_list; 82 struct list_head free_list;
83 DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
82}; 84};
83 85
84#define BLKBACK_INVALID_HANDLE (~0) 86#define BLKBACK_INVALID_HANDLE (~0)
@@ -99,6 +101,36 @@ struct xen_blkbk {
99static struct xen_blkbk *blkbk; 101static struct xen_blkbk *blkbk;
100 102
101/* 103/*
104 * Maximum number of grant pages that can be mapped in blkback.
105 * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
106 * pages that blkback will persistently map.
107 * Currently, this is:
108 * RING_SIZE = 32 (for all known ring types)
109 * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
110 * sizeof(struct persistent_gnt) = 48
111 * So the maximum memory used to store the grants is:
112 * 32 * 11 * 48 = 16896 bytes
113 */
114static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
115{
116 switch (protocol) {
117 case BLKIF_PROTOCOL_NATIVE:
118 return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
119 BLKIF_MAX_SEGMENTS_PER_REQUEST;
120 case BLKIF_PROTOCOL_X86_32:
121 return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
122 BLKIF_MAX_SEGMENTS_PER_REQUEST;
123 case BLKIF_PROTOCOL_X86_64:
124 return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
125 BLKIF_MAX_SEGMENTS_PER_REQUEST;
126 default:
127 BUG();
128 }
129 return 0;
130}
131
132
133/*
102 * Little helpful macro to figure out the index and virtual address of the 134 * Little helpful macro to figure out the index and virtual address of the
103 * pending_pages[..]. For each 'pending_req' we have have up to 135 * pending_pages[..]. For each 'pending_req' we have have up to
104 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through 136 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
@@ -129,6 +161,57 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
129static void make_response(struct xen_blkif *blkif, u64 id, 161static void make_response(struct xen_blkif *blkif, u64 id,
130 unsigned short op, int st); 162 unsigned short op, int st);
131 163
164#define foreach_grant(pos, rbtree, node) \
165 for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
166 &(pos)->node != NULL; \
167 (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
168
169
170static void add_persistent_gnt(struct rb_root *root,
171 struct persistent_gnt *persistent_gnt)
172{
173 struct rb_node **new = &(root->rb_node), *parent = NULL;
174 struct persistent_gnt *this;
175
176 /* Figure out where to put new node */
177 while (*new) {
178 this = container_of(*new, struct persistent_gnt, node);
179
180 parent = *new;
181 if (persistent_gnt->gnt < this->gnt)
182 new = &((*new)->rb_left);
183 else if (persistent_gnt->gnt > this->gnt)
184 new = &((*new)->rb_right);
185 else {
186 pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
187 BUG();
188 }
189 }
190
191 /* Add new node and rebalance tree. */
192 rb_link_node(&(persistent_gnt->node), parent, new);
193 rb_insert_color(&(persistent_gnt->node), root);
194}
195
196static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
197 grant_ref_t gref)
198{
199 struct persistent_gnt *data;
200 struct rb_node *node = root->rb_node;
201
202 while (node) {
203 data = container_of(node, struct persistent_gnt, node);
204
205 if (gref < data->gnt)
206 node = node->rb_left;
207 else if (gref > data->gnt)
208 node = node->rb_right;
209 else
210 return data;
211 }
212 return NULL;
213}
214
132/* 215/*
133 * Retrieve from the 'pending_reqs' a free pending_req structure to be used. 216 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
134 */ 217 */
@@ -275,6 +358,11 @@ int xen_blkif_schedule(void *arg)
275{ 358{
276 struct xen_blkif *blkif = arg; 359 struct xen_blkif *blkif = arg;
277 struct xen_vbd *vbd = &blkif->vbd; 360 struct xen_vbd *vbd = &blkif->vbd;
361 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
362 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
363 struct persistent_gnt *persistent_gnt;
364 int ret = 0;
365 int segs_to_unmap = 0;
278 366
279 xen_blkif_get(blkif); 367 xen_blkif_get(blkif);
280 368
@@ -302,6 +390,36 @@ int xen_blkif_schedule(void *arg)
302 print_stats(blkif); 390 print_stats(blkif);
303 } 391 }
304 392
393 /* Free all persistent grant pages */
394 if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) {
395 foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) {
396 BUG_ON(persistent_gnt->handle ==
397 BLKBACK_INVALID_HANDLE);
398 gnttab_set_unmap_op(&unmap[segs_to_unmap],
399 (unsigned long) pfn_to_kaddr(page_to_pfn(
400 persistent_gnt->page)),
401 GNTMAP_host_map,
402 persistent_gnt->handle);
403
404 pages[segs_to_unmap] = persistent_gnt->page;
405 rb_erase(&persistent_gnt->node,
406 &blkif->persistent_gnts);
407 kfree(persistent_gnt);
408 blkif->persistent_gnt_c--;
409
410 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
411 !rb_next(&persistent_gnt->node)) {
412 ret = gnttab_unmap_refs(unmap, NULL, pages,
413 segs_to_unmap);
414 BUG_ON(ret);
415 segs_to_unmap = 0;
416 }
417 }
418 }
419
420 BUG_ON(blkif->persistent_gnt_c != 0);
421 BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
422
305 if (log_stats) 423 if (log_stats)
306 print_stats(blkif); 424 print_stats(blkif);
307 425
@@ -328,6 +446,8 @@ static void xen_blkbk_unmap(struct pending_req *req)
328 int ret; 446 int ret;
329 447
330 for (i = 0; i < req->nr_pages; i++) { 448 for (i = 0; i < req->nr_pages; i++) {
449 if (!test_bit(i, req->unmap_seg))
450 continue;
331 handle = pending_handle(req, i); 451 handle = pending_handle(req, i);
332 if (handle == BLKBACK_INVALID_HANDLE) 452 if (handle == BLKBACK_INVALID_HANDLE)
333 continue; 453 continue;
@@ -344,12 +464,26 @@ static void xen_blkbk_unmap(struct pending_req *req)
344 464
345static int xen_blkbk_map(struct blkif_request *req, 465static int xen_blkbk_map(struct blkif_request *req,
346 struct pending_req *pending_req, 466 struct pending_req *pending_req,
347 struct seg_buf seg[]) 467 struct seg_buf seg[],
468 struct page *pages[])
348{ 469{
349 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 470 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
350 int i; 471 struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
472 struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
473 struct persistent_gnt *persistent_gnt = NULL;
474 struct xen_blkif *blkif = pending_req->blkif;
475 phys_addr_t addr = 0;
476 int i, j;
477 bool new_map;
351 int nseg = req->u.rw.nr_segments; 478 int nseg = req->u.rw.nr_segments;
479 int segs_to_map = 0;
352 int ret = 0; 480 int ret = 0;
481 int use_persistent_gnts;
482
483 use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
484
485 BUG_ON(blkif->persistent_gnt_c >
486 max_mapped_grant_pages(pending_req->blkif->blk_protocol));
353 487
354 /* 488 /*
355 * Fill out preq.nr_sects with proper amount of sectors, and setup 489 * Fill out preq.nr_sects with proper amount of sectors, and setup
@@ -359,36 +493,143 @@ static int xen_blkbk_map(struct blkif_request *req,
359 for (i = 0; i < nseg; i++) { 493 for (i = 0; i < nseg; i++) {
360 uint32_t flags; 494 uint32_t flags;
361 495
362 flags = GNTMAP_host_map; 496 if (use_persistent_gnts)
363 if (pending_req->operation != BLKIF_OP_READ) 497 persistent_gnt = get_persistent_gnt(
364 flags |= GNTMAP_readonly; 498 &blkif->persistent_gnts,
365 gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, 499 req->u.rw.seg[i].gref);
366 req->u.rw.seg[i].gref, 500
367 pending_req->blkif->domid); 501 if (persistent_gnt) {
502 /*
503 * We are using persistent grants and
504 * the grant is already mapped
505 */
506 new_map = false;
507 } else if (use_persistent_gnts &&
508 blkif->persistent_gnt_c <
509 max_mapped_grant_pages(blkif->blk_protocol)) {
510 /*
511 * We are using persistent grants, the grant is
512 * not mapped but we have room for it
513 */
514 new_map = true;
515 persistent_gnt = kzalloc(
516 sizeof(struct persistent_gnt),
517 GFP_KERNEL);
518 if (!persistent_gnt)
519 return -ENOMEM;
520 persistent_gnt->page = alloc_page(GFP_KERNEL);
521 if (!persistent_gnt->page) {
522 kfree(persistent_gnt);
523 return -ENOMEM;
524 }
525 persistent_gnt->gnt = req->u.rw.seg[i].gref;
526
527 pages_to_gnt[segs_to_map] =
528 persistent_gnt->page;
529 addr = (unsigned long) pfn_to_kaddr(
530 page_to_pfn(persistent_gnt->page));
531
532 add_persistent_gnt(&blkif->persistent_gnts,
533 persistent_gnt);
534 blkif->persistent_gnt_c++;
535 pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
536 persistent_gnt->gnt, blkif->persistent_gnt_c,
537 max_mapped_grant_pages(blkif->blk_protocol));
538 } else {
539 /*
540 * We are either using persistent grants and
541 * hit the maximum limit of grants mapped,
542 * or we are not using persistent grants.
543 */
544 if (use_persistent_gnts &&
545 !blkif->vbd.overflow_max_grants) {
546 blkif->vbd.overflow_max_grants = 1;
547 pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
548 blkif->domid, blkif->vbd.handle);
549 }
550 new_map = true;
551 pages[i] = blkbk->pending_page(pending_req, i);
552 addr = vaddr(pending_req, i);
553 pages_to_gnt[segs_to_map] =
554 blkbk->pending_page(pending_req, i);
555 }
556
557 if (persistent_gnt) {
558 pages[i] = persistent_gnt->page;
559 persistent_gnts[i] = persistent_gnt;
560 } else {
561 persistent_gnts[i] = NULL;
562 }
563
564 if (new_map) {
565 flags = GNTMAP_host_map;
566 if (!persistent_gnt &&
567 (pending_req->operation != BLKIF_OP_READ))
568 flags |= GNTMAP_readonly;
569 gnttab_set_map_op(&map[segs_to_map++], addr,
570 flags, req->u.rw.seg[i].gref,
571 blkif->domid);
572 }
368 } 573 }
369 574
370 ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg); 575 if (segs_to_map) {
371 BUG_ON(ret); 576 ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
577 BUG_ON(ret);
578 }
372 579
373 /* 580 /*
374 * Now swizzle the MFN in our domain with the MFN from the other domain 581 * Now swizzle the MFN in our domain with the MFN from the other domain
375 * so that when we access vaddr(pending_req,i) it has the contents of 582 * so that when we access vaddr(pending_req,i) it has the contents of
376 * the page from the other domain. 583 * the page from the other domain.
377 */ 584 */
378 for (i = 0; i < nseg; i++) { 585 bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
379 if (unlikely(map[i].status != 0)) { 586 for (i = 0, j = 0; i < nseg; i++) {
380 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); 587 if (!persistent_gnts[i] || !persistent_gnts[i]->handle) {
381 map[i].handle = BLKBACK_INVALID_HANDLE; 588 /* This is a newly mapped grant */
382 ret |= 1; 589 BUG_ON(j >= segs_to_map);
590 if (unlikely(map[j].status != 0)) {
591 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
592 map[j].handle = BLKBACK_INVALID_HANDLE;
593 ret |= 1;
594 if (persistent_gnts[i]) {
595 rb_erase(&persistent_gnts[i]->node,
596 &blkif->persistent_gnts);
597 blkif->persistent_gnt_c--;
598 kfree(persistent_gnts[i]);
599 persistent_gnts[i] = NULL;
600 }
601 }
602 }
603 if (persistent_gnts[i]) {
604 if (!persistent_gnts[i]->handle) {
605 /*
606 * If this is a new persistent grant
607 * save the handler
608 */
609 persistent_gnts[i]->handle = map[j].handle;
610 persistent_gnts[i]->dev_bus_addr =
611 map[j++].dev_bus_addr;
612 }
613 pending_handle(pending_req, i) =
614 persistent_gnts[i]->handle;
615
616 if (ret)
617 continue;
618
619 seg[i].buf = persistent_gnts[i]->dev_bus_addr |
620 (req->u.rw.seg[i].first_sect << 9);
621 } else {
622 pending_handle(pending_req, i) = map[j].handle;
623 bitmap_set(pending_req->unmap_seg, i, 1);
624
625 if (ret) {
626 j++;
627 continue;
628 }
629
630 seg[i].buf = map[j++].dev_bus_addr |
631 (req->u.rw.seg[i].first_sect << 9);
383 } 632 }
384
385 pending_handle(pending_req, i) = map[i].handle;
386
387 if (ret)
388 continue;
389
390 seg[i].buf = map[i].dev_bus_addr |
391 (req->u.rw.seg[i].first_sect << 9);
392 } 633 }
393 return ret; 634 return ret;
394} 635}
@@ -591,6 +832,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
591 int operation; 832 int operation;
592 struct blk_plug plug; 833 struct blk_plug plug;
593 bool drain = false; 834 bool drain = false;
835 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
594 836
595 switch (req->operation) { 837 switch (req->operation) {
596 case BLKIF_OP_READ: 838 case BLKIF_OP_READ:
@@ -677,7 +919,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
677 * the hypercall to unmap the grants - that is all done in 919 * the hypercall to unmap the grants - that is all done in
678 * xen_blkbk_unmap. 920 * xen_blkbk_unmap.
679 */ 921 */
680 if (xen_blkbk_map(req, pending_req, seg)) 922 if (xen_blkbk_map(req, pending_req, seg, pages))
681 goto fail_flush; 923 goto fail_flush;
682 924
683 /* 925 /*
@@ -689,7 +931,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
689 for (i = 0; i < nseg; i++) { 931 for (i = 0; i < nseg; i++) {
690 while ((bio == NULL) || 932 while ((bio == NULL) ||
691 (bio_add_page(bio, 933 (bio_add_page(bio,
692 blkbk->pending_page(pending_req, i), 934 pages[i],
693 seg[i].nsec << 9, 935 seg[i].nsec << 9,
694 seg[i].buf & ~PAGE_MASK) == 0)) { 936 seg[i].buf & ~PAGE_MASK) == 0)) {
695 937
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 9ad3b5ec1dc1..ae7951f0e268 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -34,6 +34,7 @@
34#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
35#include <linux/wait.h> 35#include <linux/wait.h>
36#include <linux/io.h> 36#include <linux/io.h>
37#include <linux/rbtree.h>
37#include <asm/setup.h> 38#include <asm/setup.h>
38#include <asm/pgalloc.h> 39#include <asm/pgalloc.h>
39#include <asm/hypervisor.h> 40#include <asm/hypervisor.h>
@@ -160,10 +161,22 @@ struct xen_vbd {
160 sector_t size; 161 sector_t size;
161 bool flush_support; 162 bool flush_support;
162 bool discard_secure; 163 bool discard_secure;
164
165 unsigned int feature_gnt_persistent:1;
166 unsigned int overflow_max_grants:1;
163}; 167};
164 168
165struct backend_info; 169struct backend_info;
166 170
171
172struct persistent_gnt {
173 struct page *page;
174 grant_ref_t gnt;
175 grant_handle_t handle;
176 uint64_t dev_bus_addr;
177 struct rb_node node;
178};
179
167struct xen_blkif { 180struct xen_blkif {
168 /* Unique identifier for this interface. */ 181 /* Unique identifier for this interface. */
169 domid_t domid; 182 domid_t domid;
@@ -190,6 +203,10 @@ struct xen_blkif {
190 struct task_struct *xenblkd; 203 struct task_struct *xenblkd;
191 unsigned int waiting_reqs; 204 unsigned int waiting_reqs;
192 205
206 /* tree to store persistent grants */
207 struct rb_root persistent_gnts;
208 unsigned int persistent_gnt_c;
209
193 /* statistics */ 210 /* statistics */
194 unsigned long st_print; 211 unsigned long st_print;
195 int st_rd_req; 212 int st_rd_req;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 4f66171c6683..b2250265308a 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -118,6 +118,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
118 atomic_set(&blkif->drain, 0); 118 atomic_set(&blkif->drain, 0);
119 blkif->st_print = jiffies; 119 blkif->st_print = jiffies;
120 init_waitqueue_head(&blkif->waiting_to_free); 120 init_waitqueue_head(&blkif->waiting_to_free);
121 blkif->persistent_gnts.rb_node = NULL;
121 122
122 return blkif; 123 return blkif;
123} 124}
@@ -673,6 +674,13 @@ again:
673 674
674 xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); 675 xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
675 676
677 err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
678 if (err) {
679 xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
680 dev->nodename);
681 goto abort;
682 }
683
676 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 684 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
677 (unsigned long long)vbd_sz(&be->blkif->vbd)); 685 (unsigned long long)vbd_sz(&be->blkif->vbd));
678 if (err) { 686 if (err) {
@@ -721,6 +729,7 @@ static int connect_ring(struct backend_info *be)
721 struct xenbus_device *dev = be->dev; 729 struct xenbus_device *dev = be->dev;
722 unsigned long ring_ref; 730 unsigned long ring_ref;
723 unsigned int evtchn; 731 unsigned int evtchn;
732 unsigned int pers_grants;
724 char protocol[64] = ""; 733 char protocol[64] = "";
725 int err; 734 int err;
726 735
@@ -750,8 +759,18 @@ static int connect_ring(struct backend_info *be)
750 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); 759 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
751 return -1; 760 return -1;
752 } 761 }
753 pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", 762 err = xenbus_gather(XBT_NIL, dev->otherend,
754 ring_ref, evtchn, be->blkif->blk_protocol, protocol); 763 "feature-persistent-grants", "%u",
764 &pers_grants, NULL);
765 if (err)
766 pers_grants = 0;
767
768 be->blkif->vbd.feature_gnt_persistent = pers_grants;
769 be->blkif->vbd.overflow_max_grants = 0;
770
771 pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
772 ring_ref, evtchn, be->blkif->blk_protocol, protocol,
773 pers_grants ? "persistent grants" : "");
755 774
756 /* Map the shared frame, irq etc. */ 775 /* Map the shared frame, irq etc. */
757 err = xen_blkif_map(be->blkif, ring_ref, evtchn); 776 err = xen_blkif_map(be->blkif, ring_ref, evtchn);