aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/xen-blkback
diff options
context:
space:
mode:
authorRoger Pau Monne <roger.pau@citrix.com>2013-04-17 14:18:57 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-04-18 09:29:23 -0400
commit3f3aad5e6686ed49242bbf86de378b39f119ec9d (patch)
treebe9ab4bdda11d09a33722b0a276b141069e946c8 /drivers/block/xen-blkback
parentc6cc142dac52e62e1e8a2aff5de1300202b96c66 (diff)
xen-blkback: implement LRU mechanism for persistent grants
This mechanism allows blkback to change the number of grants persistently mapped at run time. The algorithm uses a simple LRU mechanism that removes (if needed) the persistent grants that have not been used since the last LRU run, or if all grants have been used it removes the first grants in the list (that are not in use). The algorithm allows the user to change the maximum number of persistent grants, by changing max_persistent_grants in sysfs. Since we are storing the persistent grants used inside the request struct (to be able to mark them as "unused" when unmapping), we no longer need the bitmap (unmap_seg). Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: xen-devel@lists.xen.org Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'drivers/block/xen-blkback')
-rw-r--r--drivers/block/xen-blkback/blkback.c287
-rw-r--r--drivers/block/xen-blkback/common.h18
-rw-r--r--drivers/block/xen-blkback/xenbus.c2
3 files changed, 250 insertions, 57 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 8245c6bb9539..17052f74ebe5 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -78,6 +78,36 @@ module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
78MODULE_PARM_DESC(max_buffer_pages, 78MODULE_PARM_DESC(max_buffer_pages,
79"Maximum number of free pages to keep in each block backend buffer"); 79"Maximum number of free pages to keep in each block backend buffer");
80 80
81/*
82 * Maximum number of grants to map persistently in blkback. For maximum
83 * performance this should be the total numbers of grants that can be used
84 * to fill the ring, but since this might become too high, specially with
85 * the use of indirect descriptors, we set it to a value that provides good
86 * performance without using too much memory.
87 *
88 * When the list of persistent grants is full we clean it up using a LRU
89 * algorithm.
90 */
91
92static int xen_blkif_max_pgrants = 352;
93module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
94MODULE_PARM_DESC(max_persistent_grants,
95 "Maximum number of grants to map persistently");
96
97/*
98 * The LRU mechanism to clean the lists of persistent grants needs to
99 * be executed periodically. The time interval between consecutive executions
100 * of the purge mechanism is set in ms.
101 */
102#define LRU_INTERVAL 100
103
104/*
105 * When the persistent grants list is full we will remove unused grants
106 * from the list. The percent number of grants to be removed at each LRU
107 * execution.
108 */
109#define LRU_PERCENT_CLEAN 5
110
81/* Run-time switchable: /sys/module/blkback/parameters/ */ 111/* Run-time switchable: /sys/module/blkback/parameters/ */
82static unsigned int log_stats; 112static unsigned int log_stats;
83module_param(log_stats, int, 0644); 113module_param(log_stats, int, 0644);
@@ -96,8 +126,8 @@ struct pending_req {
96 unsigned short operation; 126 unsigned short operation;
97 int status; 127 int status;
98 struct list_head free_list; 128 struct list_head free_list;
99 DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
100 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 129 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
130 struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
101}; 131};
102 132
103#define BLKBACK_INVALID_HANDLE (~0) 133#define BLKBACK_INVALID_HANDLE (~0)
@@ -119,36 +149,6 @@ struct xen_blkbk {
119static struct xen_blkbk *blkbk; 149static struct xen_blkbk *blkbk;
120 150
121/* 151/*
122 * Maximum number of grant pages that can be mapped in blkback.
123 * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
124 * pages that blkback will persistently map.
125 * Currently, this is:
126 * RING_SIZE = 32 (for all known ring types)
127 * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
128 * sizeof(struct persistent_gnt) = 48
129 * So the maximum memory used to store the grants is:
130 * 32 * 11 * 48 = 16896 bytes
131 */
132static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
133{
134 switch (protocol) {
135 case BLKIF_PROTOCOL_NATIVE:
136 return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
137 BLKIF_MAX_SEGMENTS_PER_REQUEST;
138 case BLKIF_PROTOCOL_X86_32:
139 return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
140 BLKIF_MAX_SEGMENTS_PER_REQUEST;
141 case BLKIF_PROTOCOL_X86_64:
142 return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
143 BLKIF_MAX_SEGMENTS_PER_REQUEST;
144 default:
145 BUG();
146 }
147 return 0;
148}
149
150
151/*
152 * Little helpful macro to figure out the index and virtual address of the 152 * Little helpful macro to figure out the index and virtual address of the
153 * pending_pages[..]. For each 'pending_req' we have have up to 153 * pending_pages[..]. For each 'pending_req' we have have up to
154 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through 154 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
@@ -239,13 +239,29 @@ static void make_response(struct xen_blkif *blkif, u64 id,
239 (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) 239 (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
240 240
241 241
242static int add_persistent_gnt(struct rb_root *root, 242/*
243 * We don't need locking around the persistent grant helpers
244 * because blkback uses a single-thread for each backed, so we
245 * can be sure that this functions will never be called recursively.
246 *
247 * The only exception to that is put_persistent_grant, that can be called
248 * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
249 * bit operations to modify the flags of a persistent grant and to count
250 * the number of used grants.
251 */
252static int add_persistent_gnt(struct xen_blkif *blkif,
243 struct persistent_gnt *persistent_gnt) 253 struct persistent_gnt *persistent_gnt)
244{ 254{
245 struct rb_node **new = &(root->rb_node), *parent = NULL; 255 struct rb_node **new = NULL, *parent = NULL;
246 struct persistent_gnt *this; 256 struct persistent_gnt *this;
247 257
258 if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
259 if (!blkif->vbd.overflow_max_grants)
260 blkif->vbd.overflow_max_grants = 1;
261 return -EBUSY;
262 }
248 /* Figure out where to put new node */ 263 /* Figure out where to put new node */
264 new = &blkif->persistent_gnts.rb_node;
249 while (*new) { 265 while (*new) {
250 this = container_of(*new, struct persistent_gnt, node); 266 this = container_of(*new, struct persistent_gnt, node);
251 267
@@ -260,18 +276,23 @@ static int add_persistent_gnt(struct rb_root *root,
260 } 276 }
261 } 277 }
262 278
279 bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
280 set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
263 /* Add new node and rebalance tree. */ 281 /* Add new node and rebalance tree. */
264 rb_link_node(&(persistent_gnt->node), parent, new); 282 rb_link_node(&(persistent_gnt->node), parent, new);
265 rb_insert_color(&(persistent_gnt->node), root); 283 rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
284 blkif->persistent_gnt_c++;
285 atomic_inc(&blkif->persistent_gnt_in_use);
266 return 0; 286 return 0;
267} 287}
268 288
269static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, 289static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
270 grant_ref_t gref) 290 grant_ref_t gref)
271{ 291{
272 struct persistent_gnt *data; 292 struct persistent_gnt *data;
273 struct rb_node *node = root->rb_node; 293 struct rb_node *node = NULL;
274 294
295 node = blkif->persistent_gnts.rb_node;
275 while (node) { 296 while (node) {
276 data = container_of(node, struct persistent_gnt, node); 297 data = container_of(node, struct persistent_gnt, node);
277 298
@@ -279,12 +300,29 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
279 node = node->rb_left; 300 node = node->rb_left;
280 else if (gref > data->gnt) 301 else if (gref > data->gnt)
281 node = node->rb_right; 302 node = node->rb_right;
282 else 303 else {
304 if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
305 pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n");
306 return NULL;
307 }
308 set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
309 atomic_inc(&blkif->persistent_gnt_in_use);
283 return data; 310 return data;
311 }
284 } 312 }
285 return NULL; 313 return NULL;
286} 314}
287 315
316static void put_persistent_gnt(struct xen_blkif *blkif,
317 struct persistent_gnt *persistent_gnt)
318{
319 if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
320 pr_alert_ratelimited(DRV_PFX " freeing a grant already unused");
321 set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
322 clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
323 atomic_dec(&blkif->persistent_gnt_in_use);
324}
325
288static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, 326static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
289 unsigned int num) 327 unsigned int num)
290{ 328{
@@ -322,6 +360,129 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
322 BUG_ON(num != 0); 360 BUG_ON(num != 0);
323} 361}
324 362
363static void unmap_purged_grants(struct work_struct *work)
364{
365 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
366 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
367 struct persistent_gnt *persistent_gnt;
368 int ret, segs_to_unmap = 0;
369 struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
370
371 while(!list_empty(&blkif->persistent_purge_list)) {
372 persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
373 struct persistent_gnt,
374 remove_node);
375 list_del(&persistent_gnt->remove_node);
376
377 gnttab_set_unmap_op(&unmap[segs_to_unmap],
378 vaddr(persistent_gnt->page),
379 GNTMAP_host_map,
380 persistent_gnt->handle);
381
382 pages[segs_to_unmap] = persistent_gnt->page;
383
384 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
385 ret = gnttab_unmap_refs(unmap, NULL, pages,
386 segs_to_unmap);
387 BUG_ON(ret);
388 put_free_pages(blkif, pages, segs_to_unmap);
389 segs_to_unmap = 0;
390 }
391 kfree(persistent_gnt);
392 }
393 if (segs_to_unmap > 0) {
394 ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
395 BUG_ON(ret);
396 put_free_pages(blkif, pages, segs_to_unmap);
397 }
398}
399
400static void purge_persistent_gnt(struct xen_blkif *blkif)
401{
402 struct persistent_gnt *persistent_gnt;
403 struct rb_node *n;
404 unsigned int num_clean, total;
405 bool scan_used = false;
406 struct rb_root *root;
407
408 if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
409 (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
410 !blkif->vbd.overflow_max_grants)) {
411 return;
412 }
413
414 if (work_pending(&blkif->persistent_purge_work)) {
415 pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n");
416 return;
417 }
418
419 num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
420 num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
421 num_clean = min(blkif->persistent_gnt_c, num_clean);
422 if (num_clean >
423 (blkif->persistent_gnt_c -
424 atomic_read(&blkif->persistent_gnt_in_use)))
425 return;
426
427 /*
428 * At this point, we can assure that there will be no calls
429 * to get_persistent_grant (because we are executing this code from
430 * xen_blkif_schedule), there can only be calls to put_persistent_gnt,
431 * which means that the number of currently used grants will go down,
432 * but never up, so we will always be able to remove the requested
433 * number of grants.
434 */
435
436 total = num_clean;
437
438 pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
439
440 INIT_LIST_HEAD(&blkif->persistent_purge_list);
441 root = &blkif->persistent_gnts;
442purge_list:
443 foreach_grant_safe(persistent_gnt, n, root, node) {
444 BUG_ON(persistent_gnt->handle ==
445 BLKBACK_INVALID_HANDLE);
446
447 if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
448 continue;
449 if (!scan_used &&
450 (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
451 continue;
452
453 rb_erase(&persistent_gnt->node, root);
454 list_add(&persistent_gnt->remove_node,
455 &blkif->persistent_purge_list);
456 if (--num_clean == 0)
457 goto finished;
458 }
459 /*
460 * If we get here it means we also need to start cleaning
461 * grants that were used since last purge in order to cope
462 * with the requested num
463 */
464 if (!scan_used) {
465 pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean);
466 scan_used = true;
467 goto purge_list;
468 }
469finished:
470 /* Remove the "used" flag from all the persistent grants */
471 foreach_grant_safe(persistent_gnt, n, root, node) {
472 BUG_ON(persistent_gnt->handle ==
473 BLKBACK_INVALID_HANDLE);
474 clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
475 }
476 blkif->persistent_gnt_c -= (total - num_clean);
477 blkif->vbd.overflow_max_grants = 0;
478
479 /* We can defer this work */
480 INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants);
481 schedule_work(&blkif->persistent_purge_work);
482 pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
483 return;
484}
485
325/* 486/*
326 * Retrieve from the 'pending_reqs' a free pending_req structure to be used. 487 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
327 */ 488 */
@@ -453,12 +614,12 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
453static void print_stats(struct xen_blkif *blkif) 614static void print_stats(struct xen_blkif *blkif)
454{ 615{
455 pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu" 616 pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu"
456 " | ds %4llu | pg: %4u/%4u\n", 617 " | ds %4llu | pg: %4u/%4d\n",
457 current->comm, blkif->st_oo_req, 618 current->comm, blkif->st_oo_req,
458 blkif->st_rd_req, blkif->st_wr_req, 619 blkif->st_rd_req, blkif->st_wr_req,
459 blkif->st_f_req, blkif->st_ds_req, 620 blkif->st_f_req, blkif->st_ds_req,
460 blkif->persistent_gnt_c, 621 blkif->persistent_gnt_c,
461 max_mapped_grant_pages(blkif->blk_protocol)); 622 xen_blkif_max_pgrants);
462 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); 623 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
463 blkif->st_rd_req = 0; 624 blkif->st_rd_req = 0;
464 blkif->st_wr_req = 0; 625 blkif->st_wr_req = 0;
@@ -470,6 +631,7 @@ int xen_blkif_schedule(void *arg)
470{ 631{
471 struct xen_blkif *blkif = arg; 632 struct xen_blkif *blkif = arg;
472 struct xen_vbd *vbd = &blkif->vbd; 633 struct xen_vbd *vbd = &blkif->vbd;
634 unsigned long timeout;
473 635
474 xen_blkif_get(blkif); 636 xen_blkif_get(blkif);
475 637
@@ -479,13 +641,21 @@ int xen_blkif_schedule(void *arg)
479 if (unlikely(vbd->size != vbd_sz(vbd))) 641 if (unlikely(vbd->size != vbd_sz(vbd)))
480 xen_vbd_resize(blkif); 642 xen_vbd_resize(blkif);
481 643
482 wait_event_interruptible( 644 timeout = msecs_to_jiffies(LRU_INTERVAL);
645
646 timeout = wait_event_interruptible_timeout(
483 blkif->wq, 647 blkif->wq,
484 blkif->waiting_reqs || kthread_should_stop()); 648 blkif->waiting_reqs || kthread_should_stop(),
485 wait_event_interruptible( 649 timeout);
650 if (timeout == 0)
651 goto purge_gnt_list;
652 timeout = wait_event_interruptible_timeout(
486 blkbk->pending_free_wq, 653 blkbk->pending_free_wq,
487 !list_empty(&blkbk->pending_free) || 654 !list_empty(&blkbk->pending_free) ||
488 kthread_should_stop()); 655 kthread_should_stop(),
656 timeout);
657 if (timeout == 0)
658 goto purge_gnt_list;
489 659
490 blkif->waiting_reqs = 0; 660 blkif->waiting_reqs = 0;
491 smp_mb(); /* clear flag *before* checking for work */ 661 smp_mb(); /* clear flag *before* checking for work */
@@ -493,6 +663,13 @@ int xen_blkif_schedule(void *arg)
493 if (do_block_io_op(blkif)) 663 if (do_block_io_op(blkif))
494 blkif->waiting_reqs = 1; 664 blkif->waiting_reqs = 1;
495 665
666purge_gnt_list:
667 if (blkif->vbd.feature_gnt_persistent &&
668 time_after(jiffies, blkif->next_lru)) {
669 purge_persistent_gnt(blkif);
670 blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
671 }
672
496 /* Shrink if we have more than xen_blkif_max_buffer_pages */ 673 /* Shrink if we have more than xen_blkif_max_buffer_pages */
497 shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages); 674 shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
498 675
@@ -538,8 +715,10 @@ static void xen_blkbk_unmap(struct pending_req *req)
538 int ret; 715 int ret;
539 716
540 for (i = 0; i < req->nr_pages; i++) { 717 for (i = 0; i < req->nr_pages; i++) {
541 if (!test_bit(i, req->unmap_seg)) 718 if (req->persistent_gnts[i] != NULL) {
719 put_persistent_gnt(blkif, req->persistent_gnts[i]);
542 continue; 720 continue;
721 }
543 handle = pending_handle(req, i); 722 handle = pending_handle(req, i);
544 pages[invcount] = req->pages[i]; 723 pages[invcount] = req->pages[i];
545 if (handle == BLKBACK_INVALID_HANDLE) 724 if (handle == BLKBACK_INVALID_HANDLE)
@@ -561,8 +740,8 @@ static int xen_blkbk_map(struct blkif_request *req,
561 struct page *pages[]) 740 struct page *pages[])
562{ 741{
563 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 742 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
564 struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
565 struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 743 struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
744 struct persistent_gnt **persistent_gnts = pending_req->persistent_gnts;
566 struct persistent_gnt *persistent_gnt = NULL; 745 struct persistent_gnt *persistent_gnt = NULL;
567 struct xen_blkif *blkif = pending_req->blkif; 746 struct xen_blkif *blkif = pending_req->blkif;
568 phys_addr_t addr = 0; 747 phys_addr_t addr = 0;
@@ -574,9 +753,6 @@ static int xen_blkbk_map(struct blkif_request *req,
574 753
575 use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); 754 use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
576 755
577 BUG_ON(blkif->persistent_gnt_c >
578 max_mapped_grant_pages(pending_req->blkif->blk_protocol));
579
580 /* 756 /*
581 * Fill out preq.nr_sects with proper amount of sectors, and setup 757 * Fill out preq.nr_sects with proper amount of sectors, and setup
582 * assign map[..] with the PFN of the page in our domain with the 758 * assign map[..] with the PFN of the page in our domain with the
@@ -587,7 +763,7 @@ static int xen_blkbk_map(struct blkif_request *req,
587 763
588 if (use_persistent_gnts) 764 if (use_persistent_gnts)
589 persistent_gnt = get_persistent_gnt( 765 persistent_gnt = get_persistent_gnt(
590 &blkif->persistent_gnts, 766 blkif,
591 req->u.rw.seg[i].gref); 767 req->u.rw.seg[i].gref);
592 768
593 if (persistent_gnt) { 769 if (persistent_gnt) {
@@ -623,7 +799,6 @@ static int xen_blkbk_map(struct blkif_request *req,
623 * so that when we access vaddr(pending_req,i) it has the contents of 799 * so that when we access vaddr(pending_req,i) it has the contents of
624 * the page from the other domain. 800 * the page from the other domain.
625 */ 801 */
626 bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
627 for (seg_idx = 0, new_map_idx = 0; seg_idx < nseg; seg_idx++) { 802 for (seg_idx = 0, new_map_idx = 0; seg_idx < nseg; seg_idx++) {
628 if (!persistent_gnts[seg_idx]) { 803 if (!persistent_gnts[seg_idx]) {
629 /* This is a newly mapped grant */ 804 /* This is a newly mapped grant */
@@ -646,11 +821,10 @@ static int xen_blkbk_map(struct blkif_request *req,
646 goto next; 821 goto next;
647 } 822 }
648 if (use_persistent_gnts && 823 if (use_persistent_gnts &&
649 blkif->persistent_gnt_c < 824 blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
650 max_mapped_grant_pages(blkif->blk_protocol)) {
651 /* 825 /*
652 * We are using persistent grants, the grant is 826 * We are using persistent grants, the grant is
653 * not mapped but we have room for it 827 * not mapped but we might have room for it.
654 */ 828 */
655 persistent_gnt = kmalloc(sizeof(struct persistent_gnt), 829 persistent_gnt = kmalloc(sizeof(struct persistent_gnt),
656 GFP_KERNEL); 830 GFP_KERNEL);
@@ -665,16 +839,16 @@ static int xen_blkbk_map(struct blkif_request *req,
665 persistent_gnt->gnt = map[new_map_idx].ref; 839 persistent_gnt->gnt = map[new_map_idx].ref;
666 persistent_gnt->handle = map[new_map_idx].handle; 840 persistent_gnt->handle = map[new_map_idx].handle;
667 persistent_gnt->page = pages[seg_idx]; 841 persistent_gnt->page = pages[seg_idx];
668 if (add_persistent_gnt(&blkif->persistent_gnts, 842 if (add_persistent_gnt(blkif,
669 persistent_gnt)) { 843 persistent_gnt)) {
670 kfree(persistent_gnt); 844 kfree(persistent_gnt);
671 persistent_gnt = NULL; 845 persistent_gnt = NULL;
672 goto next_unmap; 846 goto next_unmap;
673 } 847 }
674 blkif->persistent_gnt_c++; 848 persistent_gnts[seg_idx] = persistent_gnt;
675 pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", 849 pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
676 persistent_gnt->gnt, blkif->persistent_gnt_c, 850 persistent_gnt->gnt, blkif->persistent_gnt_c,
677 max_mapped_grant_pages(blkif->blk_protocol)); 851 xen_blkif_max_pgrants);
678 new_map_idx++; 852 new_map_idx++;
679 goto next; 853 goto next;
680 } 854 }
@@ -688,7 +862,6 @@ next_unmap:
688 * We could not map this grant persistently, so use it as 862 * We could not map this grant persistently, so use it as
689 * a non-persistent grant. 863 * a non-persistent grant.
690 */ 864 */
691 bitmap_set(pending_req->unmap_seg, seg_idx, 1);
692 new_map_idx++; 865 new_map_idx++;
693next: 866next:
694 seg[seg_idx].offset = (req->u.rw.seg[seg_idx].first_sect << 9); 867 seg[seg_idx].offset = (req->u.rw.seg[seg_idx].first_sect << 9);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 6c73c3855e65..af9bed48f773 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -182,12 +182,23 @@ struct xen_vbd {
182 182
183struct backend_info; 183struct backend_info;
184 184
185/* Number of available flags */
186#define PERSISTENT_GNT_FLAGS_SIZE 2
187/* This persistent grant is currently in use */
188#define PERSISTENT_GNT_ACTIVE 0
189/*
190 * This persistent grant has been used, this flag is set when we remove the
191 * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
192 */
193#define PERSISTENT_GNT_WAS_ACTIVE 1
185 194
186struct persistent_gnt { 195struct persistent_gnt {
187 struct page *page; 196 struct page *page;
188 grant_ref_t gnt; 197 grant_ref_t gnt;
189 grant_handle_t handle; 198 grant_handle_t handle;
199 DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
190 struct rb_node node; 200 struct rb_node node;
201 struct list_head remove_node;
191}; 202};
192 203
193struct xen_blkif { 204struct xen_blkif {
@@ -219,6 +230,12 @@ struct xen_blkif {
219 /* tree to store persistent grants */ 230 /* tree to store persistent grants */
220 struct rb_root persistent_gnts; 231 struct rb_root persistent_gnts;
221 unsigned int persistent_gnt_c; 232 unsigned int persistent_gnt_c;
233 atomic_t persistent_gnt_in_use;
234 unsigned long next_lru;
235
236 /* used by the kworker that offload work from the persistent purge */
237 struct list_head persistent_purge_list;
238 struct work_struct persistent_purge_work;
222 239
223 /* buffer of free pages to map grant refs */ 240 /* buffer of free pages to map grant refs */
224 spinlock_t free_pages_lock; 241 spinlock_t free_pages_lock;
@@ -262,6 +279,7 @@ int xen_blkif_xenbus_init(void);
262 279
263irqreturn_t xen_blkif_be_int(int irq, void *dev_id); 280irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
264int xen_blkif_schedule(void *arg); 281int xen_blkif_schedule(void *arg);
282int xen_blkif_purge_persistent(void *arg);
265 283
266int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, 284int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
267 struct backend_info *be, int state); 285 struct backend_info *be, int state);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24f7f6d87717..e0fd92a2a4cd 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -98,6 +98,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
98 err = PTR_ERR(blkif->xenblkd); 98 err = PTR_ERR(blkif->xenblkd);
99 blkif->xenblkd = NULL; 99 blkif->xenblkd = NULL;
100 xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); 100 xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
101 return;
101 } 102 }
102} 103}
103 104
@@ -121,6 +122,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
121 spin_lock_init(&blkif->free_pages_lock); 122 spin_lock_init(&blkif->free_pages_lock);
122 INIT_LIST_HEAD(&blkif->free_pages); 123 INIT_LIST_HEAD(&blkif->free_pages);
123 blkif->free_pages_num = 0; 124 blkif->free_pages_num = 0;
125 atomic_set(&blkif->persistent_gnt_in_use, 0);
124 126
125 return blkif; 127 return blkif;
126} 128}