diff options
Diffstat (limited to 'drivers/block/xen-blkfront.c')
-rw-r--r-- | drivers/block/xen-blkfront.c | 199 |
1 files changed, 171 insertions, 28 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 007db8986e84..96e9b00db081 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/mutex.h> | 44 | #include <linux/mutex.h> |
45 | #include <linux/scatterlist.h> | 45 | #include <linux/scatterlist.h> |
46 | #include <linux/bitmap.h> | 46 | #include <linux/bitmap.h> |
47 | #include <linux/llist.h> | ||
47 | 48 | ||
48 | #include <xen/xen.h> | 49 | #include <xen/xen.h> |
49 | #include <xen/xenbus.h> | 50 | #include <xen/xenbus.h> |
@@ -64,10 +65,17 @@ enum blkif_state { | |||
64 | BLKIF_STATE_SUSPENDED, | 65 | BLKIF_STATE_SUSPENDED, |
65 | }; | 66 | }; |
66 | 67 | ||
68 | struct grant { | ||
69 | grant_ref_t gref; | ||
70 | unsigned long pfn; | ||
71 | struct llist_node node; | ||
72 | }; | ||
73 | |||
67 | struct blk_shadow { | 74 | struct blk_shadow { |
68 | struct blkif_request req; | 75 | struct blkif_request req; |
69 | struct request *request; | 76 | struct request *request; |
70 | unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 77 | unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
78 | struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
71 | }; | 79 | }; |
72 | 80 | ||
73 | static DEFINE_MUTEX(blkfront_mutex); | 81 | static DEFINE_MUTEX(blkfront_mutex); |
@@ -97,6 +105,8 @@ struct blkfront_info | |||
97 | struct work_struct work; | 105 | struct work_struct work; |
98 | struct gnttab_free_callback callback; | 106 | struct gnttab_free_callback callback; |
99 | struct blk_shadow shadow[BLK_RING_SIZE]; | 107 | struct blk_shadow shadow[BLK_RING_SIZE]; |
108 | struct llist_head persistent_gnts; | ||
109 | unsigned int persistent_gnts_c; | ||
100 | unsigned long shadow_free; | 110 | unsigned long shadow_free; |
101 | unsigned int feature_flush; | 111 | unsigned int feature_flush; |
102 | unsigned int flush_op; | 112 | unsigned int flush_op; |
@@ -104,6 +114,7 @@ struct blkfront_info | |||
104 | unsigned int feature_secdiscard:1; | 114 | unsigned int feature_secdiscard:1; |
105 | unsigned int discard_granularity; | 115 | unsigned int discard_granularity; |
106 | unsigned int discard_alignment; | 116 | unsigned int discard_alignment; |
117 | unsigned int feature_persistent:1; | ||
107 | int is_ready; | 118 | int is_ready; |
108 | }; | 119 | }; |
109 | 120 | ||
@@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req) | |||
287 | unsigned long id; | 298 | unsigned long id; |
288 | unsigned int fsect, lsect; | 299 | unsigned int fsect, lsect; |
289 | int i, ref; | 300 | int i, ref; |
301 | |||
302 | /* | ||
303 | * Used to store if we are able to queue the request by just using | ||
304 | * existing persistent grants, or if we have to get new grants, | ||
305 | * as there are not sufficiently many free. | ||
306 | */ | ||
307 | bool new_persistent_gnts; | ||
290 | grant_ref_t gref_head; | 308 | grant_ref_t gref_head; |
309 | struct page *granted_page; | ||
310 | struct grant *gnt_list_entry = NULL; | ||
291 | struct scatterlist *sg; | 311 | struct scatterlist *sg; |
292 | 312 | ||
293 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) | 313 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) |
294 | return 1; | 314 | return 1; |
295 | 315 | ||
296 | if (gnttab_alloc_grant_references( | 316 | /* Check if we have enought grants to allocate a requests */ |
297 | BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { | 317 | if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) { |
298 | gnttab_request_free_callback( | 318 | new_persistent_gnts = 1; |
299 | &info->callback, | 319 | if (gnttab_alloc_grant_references( |
300 | blkif_restart_queue_callback, | 320 | BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c, |
301 | info, | 321 | &gref_head) < 0) { |
302 | BLKIF_MAX_SEGMENTS_PER_REQUEST); | 322 | gnttab_request_free_callback( |
303 | return 1; | 323 | &info->callback, |
304 | } | 324 | blkif_restart_queue_callback, |
325 | info, | ||
326 | BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
327 | return 1; | ||
328 | } | ||
329 | } else | ||
330 | new_persistent_gnts = 0; | ||
305 | 331 | ||
306 | /* Fill out a communications ring structure. */ | 332 | /* Fill out a communications ring structure. */ |
307 | ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | 333 | ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); |
@@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req) | |||
341 | BLKIF_MAX_SEGMENTS_PER_REQUEST); | 367 | BLKIF_MAX_SEGMENTS_PER_REQUEST); |
342 | 368 | ||
343 | for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { | 369 | for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { |
344 | buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); | ||
345 | fsect = sg->offset >> 9; | 370 | fsect = sg->offset >> 9; |
346 | lsect = fsect + (sg->length >> 9) - 1; | 371 | lsect = fsect + (sg->length >> 9) - 1; |
347 | /* install a grant reference. */ | ||
348 | ref = gnttab_claim_grant_reference(&gref_head); | ||
349 | BUG_ON(ref == -ENOSPC); | ||
350 | 372 | ||
351 | gnttab_grant_foreign_access_ref( | 373 | if (info->persistent_gnts_c) { |
352 | ref, | 374 | BUG_ON(llist_empty(&info->persistent_gnts)); |
375 | gnt_list_entry = llist_entry( | ||
376 | llist_del_first(&info->persistent_gnts), | ||
377 | struct grant, node); | ||
378 | |||
379 | ref = gnt_list_entry->gref; | ||
380 | buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); | ||
381 | info->persistent_gnts_c--; | ||
382 | } else { | ||
383 | ref = gnttab_claim_grant_reference(&gref_head); | ||
384 | BUG_ON(ref == -ENOSPC); | ||
385 | |||
386 | gnt_list_entry = | ||
387 | kmalloc(sizeof(struct grant), | ||
388 | GFP_ATOMIC); | ||
389 | if (!gnt_list_entry) | ||
390 | return -ENOMEM; | ||
391 | |||
392 | granted_page = alloc_page(GFP_ATOMIC); | ||
393 | if (!granted_page) { | ||
394 | kfree(gnt_list_entry); | ||
395 | return -ENOMEM; | ||
396 | } | ||
397 | |||
398 | gnt_list_entry->pfn = | ||
399 | page_to_pfn(granted_page); | ||
400 | gnt_list_entry->gref = ref; | ||
401 | |||
402 | buffer_mfn = pfn_to_mfn(page_to_pfn( | ||
403 | granted_page)); | ||
404 | gnttab_grant_foreign_access_ref(ref, | ||
353 | info->xbdev->otherend_id, | 405 | info->xbdev->otherend_id, |
354 | buffer_mfn, | 406 | buffer_mfn, 0); |
355 | rq_data_dir(req)); | 407 | } |
408 | |||
409 | info->shadow[id].grants_used[i] = gnt_list_entry; | ||
410 | |||
411 | if (rq_data_dir(req)) { | ||
412 | char *bvec_data; | ||
413 | void *shared_data; | ||
414 | |||
415 | BUG_ON(sg->offset + sg->length > PAGE_SIZE); | ||
416 | |||
417 | shared_data = kmap_atomic( | ||
418 | pfn_to_page(gnt_list_entry->pfn)); | ||
419 | bvec_data = kmap_atomic(sg_page(sg)); | ||
420 | |||
421 | /* | ||
422 | * this does not wipe data stored outside the | ||
423 | * range sg->offset..sg->offset+sg->length. | ||
424 | * Therefore, blkback *could* see data from | ||
425 | * previous requests. This is OK as long as | ||
426 | * persistent grants are shared with just one | ||
427 | * domain. It may need refactoring if this | ||
428 | * changes | ||
429 | */ | ||
430 | memcpy(shared_data + sg->offset, | ||
431 | bvec_data + sg->offset, | ||
432 | sg->length); | ||
433 | |||
434 | kunmap_atomic(bvec_data); | ||
435 | kunmap_atomic(shared_data); | ||
436 | } | ||
356 | 437 | ||
357 | info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); | 438 | info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); |
358 | ring_req->u.rw.seg[i] = | 439 | ring_req->u.rw.seg[i] = |
@@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req) | |||
368 | /* Keep a private copy so we can reissue requests when recovering. */ | 449 | /* Keep a private copy so we can reissue requests when recovering. */ |
369 | info->shadow[id].req = *ring_req; | 450 | info->shadow[id].req = *ring_req; |
370 | 451 | ||
371 | gnttab_free_grant_references(gref_head); | 452 | if (new_persistent_gnts) |
453 | gnttab_free_grant_references(gref_head); | ||
372 | 454 | ||
373 | return 0; | 455 | return 0; |
374 | } | 456 | } |
@@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | |||
480 | static void xlvbd_flush(struct blkfront_info *info) | 562 | static void xlvbd_flush(struct blkfront_info *info) |
481 | { | 563 | { |
482 | blk_queue_flush(info->rq, info->feature_flush); | 564 | blk_queue_flush(info->rq, info->feature_flush); |
483 | printk(KERN_INFO "blkfront: %s: %s: %s\n", | 565 | printk(KERN_INFO "blkfront: %s: %s: %s %s\n", |
484 | info->gd->disk_name, | 566 | info->gd->disk_name, |
485 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? | 567 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? |
486 | "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? | 568 | "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? |
487 | "flush diskcache" : "barrier or flush"), | 569 | "flush diskcache" : "barrier or flush"), |
488 | info->feature_flush ? "enabled" : "disabled"); | 570 | info->feature_flush ? "enabled" : "disabled", |
571 | info->feature_persistent ? "using persistent grants" : ""); | ||
489 | } | 572 | } |
490 | 573 | ||
491 | static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) | 574 | static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) |
@@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work) | |||
707 | 790 | ||
708 | static void blkif_free(struct blkfront_info *info, int suspend) | 791 | static void blkif_free(struct blkfront_info *info, int suspend) |
709 | { | 792 | { |
793 | struct llist_node *all_gnts; | ||
794 | struct grant *persistent_gnt; | ||
795 | |||
710 | /* Prevent new requests being issued until we fix things up. */ | 796 | /* Prevent new requests being issued until we fix things up. */ |
711 | spin_lock_irq(&info->io_lock); | 797 | spin_lock_irq(&info->io_lock); |
712 | info->connected = suspend ? | 798 | info->connected = suspend ? |
@@ -714,6 +800,18 @@ static void blkif_free(struct blkfront_info *info, int suspend) | |||
714 | /* No more blkif_request(). */ | 800 | /* No more blkif_request(). */ |
715 | if (info->rq) | 801 | if (info->rq) |
716 | blk_stop_queue(info->rq); | 802 | blk_stop_queue(info->rq); |
803 | |||
804 | /* Remove all persistent grants */ | ||
805 | if (info->persistent_gnts_c) { | ||
806 | all_gnts = llist_del_all(&info->persistent_gnts); | ||
807 | llist_for_each_entry(persistent_gnt, all_gnts, node) { | ||
808 | gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); | ||
809 | __free_page(pfn_to_page(persistent_gnt->pfn)); | ||
810 | kfree(persistent_gnt); | ||
811 | } | ||
812 | info->persistent_gnts_c = 0; | ||
813 | } | ||
814 | |||
717 | /* No more gnttab callback work. */ | 815 | /* No more gnttab callback work. */ |
718 | gnttab_cancel_free_callback(&info->callback); | 816 | gnttab_cancel_free_callback(&info->callback); |
719 | spin_unlock_irq(&info->io_lock); | 817 | spin_unlock_irq(&info->io_lock); |
@@ -734,13 +832,43 @@ static void blkif_free(struct blkfront_info *info, int suspend) | |||
734 | 832 | ||
735 | } | 833 | } |
736 | 834 | ||
737 | static void blkif_completion(struct blk_shadow *s) | 835 | static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, |
836 | struct blkif_response *bret) | ||
738 | { | 837 | { |
739 | int i; | 838 | int i; |
740 | /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place | 839 | struct bio_vec *bvec; |
741 | * flag. */ | 840 | struct req_iterator iter; |
742 | for (i = 0; i < s->req.u.rw.nr_segments; i++) | 841 | unsigned long flags; |
743 | gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); | 842 | char *bvec_data; |
843 | void *shared_data; | ||
844 | unsigned int offset = 0; | ||
845 | |||
846 | if (bret->operation == BLKIF_OP_READ) { | ||
847 | /* | ||
848 | * Copy the data received from the backend into the bvec. | ||
849 | * Since bv_offset can be different than 0, and bv_len different | ||
850 | * than PAGE_SIZE, we have to keep track of the current offset, | ||
851 | * to be sure we are copying the data from the right shared page. | ||
852 | */ | ||
853 | rq_for_each_segment(bvec, s->request, iter) { | ||
854 | BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); | ||
855 | i = offset >> PAGE_SHIFT; | ||
856 | BUG_ON(i >= s->req.u.rw.nr_segments); | ||
857 | shared_data = kmap_atomic( | ||
858 | pfn_to_page(s->grants_used[i]->pfn)); | ||
859 | bvec_data = bvec_kmap_irq(bvec, &flags); | ||
860 | memcpy(bvec_data, shared_data + bvec->bv_offset, | ||
861 | bvec->bv_len); | ||
862 | bvec_kunmap_irq(bvec_data, &flags); | ||
863 | kunmap_atomic(shared_data); | ||
864 | offset += bvec->bv_len; | ||
865 | } | ||
866 | } | ||
867 | /* Add the persistent grant into the list of free grants */ | ||
868 | for (i = 0; i < s->req.u.rw.nr_segments; i++) { | ||
869 | llist_add(&s->grants_used[i]->node, &info->persistent_gnts); | ||
870 | info->persistent_gnts_c++; | ||
871 | } | ||
744 | } | 872 | } |
745 | 873 | ||
746 | static irqreturn_t blkif_interrupt(int irq, void *dev_id) | 874 | static irqreturn_t blkif_interrupt(int irq, void *dev_id) |
@@ -783,7 +911,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
783 | req = info->shadow[id].request; | 911 | req = info->shadow[id].request; |
784 | 912 | ||
785 | if (bret->operation != BLKIF_OP_DISCARD) | 913 | if (bret->operation != BLKIF_OP_DISCARD) |
786 | blkif_completion(&info->shadow[id]); | 914 | blkif_completion(&info->shadow[id], info, bret); |
787 | 915 | ||
788 | if (add_id_to_freelist(info, id)) { | 916 | if (add_id_to_freelist(info, id)) { |
789 | WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", | 917 | WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", |
@@ -942,6 +1070,11 @@ again: | |||
942 | message = "writing protocol"; | 1070 | message = "writing protocol"; |
943 | goto abort_transaction; | 1071 | goto abort_transaction; |
944 | } | 1072 | } |
1073 | err = xenbus_printf(xbt, dev->nodename, | ||
1074 | "feature-persistent", "%u", 1); | ||
1075 | if (err) | ||
1076 | dev_warn(&dev->dev, | ||
1077 | "writing persistent grants feature to xenbus"); | ||
945 | 1078 | ||
946 | err = xenbus_transaction_end(xbt, 0); | 1079 | err = xenbus_transaction_end(xbt, 0); |
947 | if (err) { | 1080 | if (err) { |
@@ -1029,6 +1162,8 @@ static int blkfront_probe(struct xenbus_device *dev, | |||
1029 | spin_lock_init(&info->io_lock); | 1162 | spin_lock_init(&info->io_lock); |
1030 | info->xbdev = dev; | 1163 | info->xbdev = dev; |
1031 | info->vdevice = vdevice; | 1164 | info->vdevice = vdevice; |
1165 | init_llist_head(&info->persistent_gnts); | ||
1166 | info->persistent_gnts_c = 0; | ||
1032 | info->connected = BLKIF_STATE_DISCONNECTED; | 1167 | info->connected = BLKIF_STATE_DISCONNECTED; |
1033 | INIT_WORK(&info->work, blkif_restart_queue); | 1168 | INIT_WORK(&info->work, blkif_restart_queue); |
1034 | 1169 | ||
@@ -1093,7 +1228,7 @@ static int blkif_recover(struct blkfront_info *info) | |||
1093 | req->u.rw.seg[j].gref, | 1228 | req->u.rw.seg[j].gref, |
1094 | info->xbdev->otherend_id, | 1229 | info->xbdev->otherend_id, |
1095 | pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), | 1230 | pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), |
1096 | rq_data_dir(info->shadow[req->u.rw.id].request)); | 1231 | 0); |
1097 | } | 1232 | } |
1098 | info->shadow[req->u.rw.id].req = *req; | 1233 | info->shadow[req->u.rw.id].req = *req; |
1099 | 1234 | ||
@@ -1225,7 +1360,7 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1225 | unsigned long sector_size; | 1360 | unsigned long sector_size; |
1226 | unsigned int binfo; | 1361 | unsigned int binfo; |
1227 | int err; | 1362 | int err; |
1228 | int barrier, flush, discard; | 1363 | int barrier, flush, discard, persistent; |
1229 | 1364 | ||
1230 | switch (info->connected) { | 1365 | switch (info->connected) { |
1231 | case BLKIF_STATE_CONNECTED: | 1366 | case BLKIF_STATE_CONNECTED: |
@@ -1303,6 +1438,14 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1303 | if (!err && discard) | 1438 | if (!err && discard) |
1304 | blkfront_setup_discard(info); | 1439 | blkfront_setup_discard(info); |
1305 | 1440 | ||
1441 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | ||
1442 | "feature-persistent", "%u", &persistent, | ||
1443 | NULL); | ||
1444 | if (err) | ||
1445 | info->feature_persistent = 0; | ||
1446 | else | ||
1447 | info->feature_persistent = persistent; | ||
1448 | |||
1306 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); | 1449 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); |
1307 | if (err) { | 1450 | if (err) { |
1308 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", | 1451 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", |