aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/xen-blkfront.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkfront.c')
-rw-r--r--drivers/block/xen-blkfront.c199
1 files changed, 171 insertions, 28 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 007db8986e84..96e9b00db081 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -44,6 +44,7 @@
44#include <linux/mutex.h> 44#include <linux/mutex.h>
45#include <linux/scatterlist.h> 45#include <linux/scatterlist.h>
46#include <linux/bitmap.h> 46#include <linux/bitmap.h>
47#include <linux/llist.h>
47 48
48#include <xen/xen.h> 49#include <xen/xen.h>
49#include <xen/xenbus.h> 50#include <xen/xenbus.h>
@@ -64,10 +65,17 @@ enum blkif_state {
64 BLKIF_STATE_SUSPENDED, 65 BLKIF_STATE_SUSPENDED,
65}; 66};
66 67
68struct grant {
69 grant_ref_t gref;
70 unsigned long pfn;
71 struct llist_node node;
72};
73
67struct blk_shadow { 74struct blk_shadow {
68 struct blkif_request req; 75 struct blkif_request req;
69 struct request *request; 76 struct request *request;
70 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 77 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
78 struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
71}; 79};
72 80
73static DEFINE_MUTEX(blkfront_mutex); 81static DEFINE_MUTEX(blkfront_mutex);
@@ -97,6 +105,8 @@ struct blkfront_info
97 struct work_struct work; 105 struct work_struct work;
98 struct gnttab_free_callback callback; 106 struct gnttab_free_callback callback;
99 struct blk_shadow shadow[BLK_RING_SIZE]; 107 struct blk_shadow shadow[BLK_RING_SIZE];
108 struct llist_head persistent_gnts;
109 unsigned int persistent_gnts_c;
100 unsigned long shadow_free; 110 unsigned long shadow_free;
101 unsigned int feature_flush; 111 unsigned int feature_flush;
102 unsigned int flush_op; 112 unsigned int flush_op;
@@ -104,6 +114,7 @@ struct blkfront_info
104 unsigned int feature_secdiscard:1; 114 unsigned int feature_secdiscard:1;
105 unsigned int discard_granularity; 115 unsigned int discard_granularity;
106 unsigned int discard_alignment; 116 unsigned int discard_alignment;
117 unsigned int feature_persistent:1;
107 int is_ready; 118 int is_ready;
108}; 119};
109 120
@@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req)
287 unsigned long id; 298 unsigned long id;
288 unsigned int fsect, lsect; 299 unsigned int fsect, lsect;
289 int i, ref; 300 int i, ref;
301
302 /*
303 * Used to store if we are able to queue the request by just using
304 * existing persistent grants, or if we have to get new grants,
305 * as there are not sufficiently many free.
306 */
307 bool new_persistent_gnts;
290 grant_ref_t gref_head; 308 grant_ref_t gref_head;
309 struct page *granted_page;
310 struct grant *gnt_list_entry = NULL;
291 struct scatterlist *sg; 311 struct scatterlist *sg;
292 312
293 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 313 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
294 return 1; 314 return 1;
295 315
296 if (gnttab_alloc_grant_references( 316 /* Check if we have enought grants to allocate a requests */
297 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 317 if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
298 gnttab_request_free_callback( 318 new_persistent_gnts = 1;
299 &info->callback, 319 if (gnttab_alloc_grant_references(
300 blkif_restart_queue_callback, 320 BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
301 info, 321 &gref_head) < 0) {
302 BLKIF_MAX_SEGMENTS_PER_REQUEST); 322 gnttab_request_free_callback(
303 return 1; 323 &info->callback,
304 } 324 blkif_restart_queue_callback,
325 info,
326 BLKIF_MAX_SEGMENTS_PER_REQUEST);
327 return 1;
328 }
329 } else
330 new_persistent_gnts = 0;
305 331
306 /* Fill out a communications ring structure. */ 332 /* Fill out a communications ring structure. */
307 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 333 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
@@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req)
341 BLKIF_MAX_SEGMENTS_PER_REQUEST); 367 BLKIF_MAX_SEGMENTS_PER_REQUEST);
342 368
343 for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { 369 for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
344 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
345 fsect = sg->offset >> 9; 370 fsect = sg->offset >> 9;
346 lsect = fsect + (sg->length >> 9) - 1; 371 lsect = fsect + (sg->length >> 9) - 1;
347 /* install a grant reference. */
348 ref = gnttab_claim_grant_reference(&gref_head);
349 BUG_ON(ref == -ENOSPC);
350 372
351 gnttab_grant_foreign_access_ref( 373 if (info->persistent_gnts_c) {
352 ref, 374 BUG_ON(llist_empty(&info->persistent_gnts));
375 gnt_list_entry = llist_entry(
376 llist_del_first(&info->persistent_gnts),
377 struct grant, node);
378
379 ref = gnt_list_entry->gref;
380 buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
381 info->persistent_gnts_c--;
382 } else {
383 ref = gnttab_claim_grant_reference(&gref_head);
384 BUG_ON(ref == -ENOSPC);
385
386 gnt_list_entry =
387 kmalloc(sizeof(struct grant),
388 GFP_ATOMIC);
389 if (!gnt_list_entry)
390 return -ENOMEM;
391
392 granted_page = alloc_page(GFP_ATOMIC);
393 if (!granted_page) {
394 kfree(gnt_list_entry);
395 return -ENOMEM;
396 }
397
398 gnt_list_entry->pfn =
399 page_to_pfn(granted_page);
400 gnt_list_entry->gref = ref;
401
402 buffer_mfn = pfn_to_mfn(page_to_pfn(
403 granted_page));
404 gnttab_grant_foreign_access_ref(ref,
353 info->xbdev->otherend_id, 405 info->xbdev->otherend_id,
354 buffer_mfn, 406 buffer_mfn, 0);
355 rq_data_dir(req)); 407 }
408
409 info->shadow[id].grants_used[i] = gnt_list_entry;
410
411 if (rq_data_dir(req)) {
412 char *bvec_data;
413 void *shared_data;
414
415 BUG_ON(sg->offset + sg->length > PAGE_SIZE);
416
417 shared_data = kmap_atomic(
418 pfn_to_page(gnt_list_entry->pfn));
419 bvec_data = kmap_atomic(sg_page(sg));
420
421 /*
422 * this does not wipe data stored outside the
423 * range sg->offset..sg->offset+sg->length.
424 * Therefore, blkback *could* see data from
425 * previous requests. This is OK as long as
426 * persistent grants are shared with just one
427 * domain. It may need refactoring if this
428 * changes
429 */
430 memcpy(shared_data + sg->offset,
431 bvec_data + sg->offset,
432 sg->length);
433
434 kunmap_atomic(bvec_data);
435 kunmap_atomic(shared_data);
436 }
356 437
357 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 438 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
358 ring_req->u.rw.seg[i] = 439 ring_req->u.rw.seg[i] =
@@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req)
368 /* Keep a private copy so we can reissue requests when recovering. */ 449 /* Keep a private copy so we can reissue requests when recovering. */
369 info->shadow[id].req = *ring_req; 450 info->shadow[id].req = *ring_req;
370 451
371 gnttab_free_grant_references(gref_head); 452 if (new_persistent_gnts)
453 gnttab_free_grant_references(gref_head);
372 454
373 return 0; 455 return 0;
374} 456}
@@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
480static void xlvbd_flush(struct blkfront_info *info) 562static void xlvbd_flush(struct blkfront_info *info)
481{ 563{
482 blk_queue_flush(info->rq, info->feature_flush); 564 blk_queue_flush(info->rq, info->feature_flush);
483 printk(KERN_INFO "blkfront: %s: %s: %s\n", 565 printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
484 info->gd->disk_name, 566 info->gd->disk_name,
485 info->flush_op == BLKIF_OP_WRITE_BARRIER ? 567 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
486 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? 568 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
487 "flush diskcache" : "barrier or flush"), 569 "flush diskcache" : "barrier or flush"),
488 info->feature_flush ? "enabled" : "disabled"); 570 info->feature_flush ? "enabled" : "disabled",
571 info->feature_persistent ? "using persistent grants" : "");
489} 572}
490 573
491static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) 574static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work)
707 790
708static void blkif_free(struct blkfront_info *info, int suspend) 791static void blkif_free(struct blkfront_info *info, int suspend)
709{ 792{
793 struct llist_node *all_gnts;
794 struct grant *persistent_gnt;
795
710 /* Prevent new requests being issued until we fix things up. */ 796 /* Prevent new requests being issued until we fix things up. */
711 spin_lock_irq(&info->io_lock); 797 spin_lock_irq(&info->io_lock);
712 info->connected = suspend ? 798 info->connected = suspend ?
@@ -714,6 +800,18 @@ static void blkif_free(struct blkfront_info *info, int suspend)
714 /* No more blkif_request(). */ 800 /* No more blkif_request(). */
715 if (info->rq) 801 if (info->rq)
716 blk_stop_queue(info->rq); 802 blk_stop_queue(info->rq);
803
804 /* Remove all persistent grants */
805 if (info->persistent_gnts_c) {
806 all_gnts = llist_del_all(&info->persistent_gnts);
807 llist_for_each_entry(persistent_gnt, all_gnts, node) {
808 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
809 __free_page(pfn_to_page(persistent_gnt->pfn));
810 kfree(persistent_gnt);
811 }
812 info->persistent_gnts_c = 0;
813 }
814
717 /* No more gnttab callback work. */ 815 /* No more gnttab callback work. */
718 gnttab_cancel_free_callback(&info->callback); 816 gnttab_cancel_free_callback(&info->callback);
719 spin_unlock_irq(&info->io_lock); 817 spin_unlock_irq(&info->io_lock);
@@ -734,13 +832,43 @@ static void blkif_free(struct blkfront_info *info, int suspend)
734 832
735} 833}
736 834
737static void blkif_completion(struct blk_shadow *s) 835static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
836 struct blkif_response *bret)
738{ 837{
739 int i; 838 int i;
740 /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place 839 struct bio_vec *bvec;
741 * flag. */ 840 struct req_iterator iter;
742 for (i = 0; i < s->req.u.rw.nr_segments; i++) 841 unsigned long flags;
743 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); 842 char *bvec_data;
843 void *shared_data;
844 unsigned int offset = 0;
845
846 if (bret->operation == BLKIF_OP_READ) {
847 /*
848 * Copy the data received from the backend into the bvec.
849 * Since bv_offset can be different than 0, and bv_len different
850 * than PAGE_SIZE, we have to keep track of the current offset,
851 * to be sure we are copying the data from the right shared page.
852 */
853 rq_for_each_segment(bvec, s->request, iter) {
854 BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
855 i = offset >> PAGE_SHIFT;
856 BUG_ON(i >= s->req.u.rw.nr_segments);
857 shared_data = kmap_atomic(
858 pfn_to_page(s->grants_used[i]->pfn));
859 bvec_data = bvec_kmap_irq(bvec, &flags);
860 memcpy(bvec_data, shared_data + bvec->bv_offset,
861 bvec->bv_len);
862 bvec_kunmap_irq(bvec_data, &flags);
863 kunmap_atomic(shared_data);
864 offset += bvec->bv_len;
865 }
866 }
867 /* Add the persistent grant into the list of free grants */
868 for (i = 0; i < s->req.u.rw.nr_segments; i++) {
869 llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
870 info->persistent_gnts_c++;
871 }
744} 872}
745 873
746static irqreturn_t blkif_interrupt(int irq, void *dev_id) 874static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -783,7 +911,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
783 req = info->shadow[id].request; 911 req = info->shadow[id].request;
784 912
785 if (bret->operation != BLKIF_OP_DISCARD) 913 if (bret->operation != BLKIF_OP_DISCARD)
786 blkif_completion(&info->shadow[id]); 914 blkif_completion(&info->shadow[id], info, bret);
787 915
788 if (add_id_to_freelist(info, id)) { 916 if (add_id_to_freelist(info, id)) {
789 WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", 917 WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
@@ -942,6 +1070,11 @@ again:
942 message = "writing protocol"; 1070 message = "writing protocol";
943 goto abort_transaction; 1071 goto abort_transaction;
944 } 1072 }
1073 err = xenbus_printf(xbt, dev->nodename,
1074 "feature-persistent", "%u", 1);
1075 if (err)
1076 dev_warn(&dev->dev,
1077 "writing persistent grants feature to xenbus");
945 1078
946 err = xenbus_transaction_end(xbt, 0); 1079 err = xenbus_transaction_end(xbt, 0);
947 if (err) { 1080 if (err) {
@@ -1029,6 +1162,8 @@ static int blkfront_probe(struct xenbus_device *dev,
1029 spin_lock_init(&info->io_lock); 1162 spin_lock_init(&info->io_lock);
1030 info->xbdev = dev; 1163 info->xbdev = dev;
1031 info->vdevice = vdevice; 1164 info->vdevice = vdevice;
1165 init_llist_head(&info->persistent_gnts);
1166 info->persistent_gnts_c = 0;
1032 info->connected = BLKIF_STATE_DISCONNECTED; 1167 info->connected = BLKIF_STATE_DISCONNECTED;
1033 INIT_WORK(&info->work, blkif_restart_queue); 1168 INIT_WORK(&info->work, blkif_restart_queue);
1034 1169
@@ -1093,7 +1228,7 @@ static int blkif_recover(struct blkfront_info *info)
1093 req->u.rw.seg[j].gref, 1228 req->u.rw.seg[j].gref,
1094 info->xbdev->otherend_id, 1229 info->xbdev->otherend_id,
1095 pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), 1230 pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
1096 rq_data_dir(info->shadow[req->u.rw.id].request)); 1231 0);
1097 } 1232 }
1098 info->shadow[req->u.rw.id].req = *req; 1233 info->shadow[req->u.rw.id].req = *req;
1099 1234
@@ -1225,7 +1360,7 @@ static void blkfront_connect(struct blkfront_info *info)
1225 unsigned long sector_size; 1360 unsigned long sector_size;
1226 unsigned int binfo; 1361 unsigned int binfo;
1227 int err; 1362 int err;
1228 int barrier, flush, discard; 1363 int barrier, flush, discard, persistent;
1229 1364
1230 switch (info->connected) { 1365 switch (info->connected) {
1231 case BLKIF_STATE_CONNECTED: 1366 case BLKIF_STATE_CONNECTED:
@@ -1303,6 +1438,14 @@ static void blkfront_connect(struct blkfront_info *info)
1303 if (!err && discard) 1438 if (!err && discard)
1304 blkfront_setup_discard(info); 1439 blkfront_setup_discard(info);
1305 1440
1441 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1442 "feature-persistent", "%u", &persistent,
1443 NULL);
1444 if (err)
1445 info->feature_persistent = 0;
1446 else
1447 info->feature_persistent = persistent;
1448
1306 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1449 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1307 if (err) { 1450 if (err) {
1308 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 1451 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",