summaryrefslogtreecommitdiffstats
path: root/drivers/block/xen-blkfront.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 20:32:42 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 20:32:42 -0500
commit41ecf1404b34d9975eb97f5005d9e4274eaeb76a (patch)
tree8582dec3a644cfbe178132acded5ebb458e0e32e /drivers/block/xen-blkfront.c
parent2dc10ad81fc017837037e60439662e1b16bdffb9 (diff)
parentabed7d0710e8f892c267932a9492ccf447674fb8 (diff)
Merge tag 'for-linus-4.4-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from David Vrabel: - Improve balloon driver memory hotplug placement. - Use unpopulated hotplugged memory for foreign pages (if supported/enabled). - Support 64 KiB guest pages on arm64. - CPU hotplug support on arm/arm64. * tag 'for-linus-4.4-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (44 commits) xen: fix the check of e_pfn in xen_find_pfn_range x86/xen: add reschedule point when mapping foreign GFNs xen/arm: don't try to re-register vcpu_info on cpu_hotplug. xen, cpu_hotplug: call device_offline instead of cpu_down xen/arm: Enable cpu_hotplug.c xenbus: Support multiple grants ring with 64KB xen/grant-table: Add an helper to iterate over a specific number of grants xen/xenbus: Rename *RING_PAGE* to *RING_GRANT* xen/arm: correct comment in enlighten.c xen/gntdev: use types from linux/types.h in userspace headers xen/gntalloc: use types from linux/types.h in userspace headers xen/balloon: Use the correct sizeof when declaring frame_list xen/swiotlb: Add support for 64KB page granularity xen/swiotlb: Pass addresses rather than frame numbers to xen_arch_need_swiotlb arm/xen: Add support for 64KB page granularity xen/privcmd: Add support for Linux 64KB page granularity net/xen-netback: Make it running on 64KB page granularity net/xen-netfront: Make it running on 64KB page granularity block/xen-blkback: Make it running on 64KB page granularity block/xen-blkfront: Make it running on 64KB page granularity ...
Diffstat (limited to 'drivers/block/xen-blkfront.c')
-rw-r--r--drivers/block/xen-blkfront.c560
1 files changed, 360 insertions, 200 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a69c02dadec0..2fee2eef988d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -68,7 +68,7 @@ enum blkif_state {
68 68
69struct grant { 69struct grant {
70 grant_ref_t gref; 70 grant_ref_t gref;
71 unsigned long pfn; 71 struct page *page;
72 struct list_head node; 72 struct list_head node;
73}; 73};
74 74
@@ -78,6 +78,7 @@ struct blk_shadow {
78 struct grant **grants_used; 78 struct grant **grants_used;
79 struct grant **indirect_grants; 79 struct grant **indirect_grants;
80 struct scatterlist *sg; 80 struct scatterlist *sg;
81 unsigned int num_sg;
81}; 82};
82 83
83struct split_bio { 84struct split_bio {
@@ -106,8 +107,12 @@ static unsigned int xen_blkif_max_ring_order;
106module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); 107module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
107MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); 108MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
108 109
109#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages) 110#define BLK_RING_SIZE(info) \
110#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES) 111 __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
112
113#define BLK_MAX_RING_SIZE \
114 __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS)
115
111/* 116/*
112 * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 117 * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
113 * characters are enough. Define to 20 to keep consist with backend. 118 * characters are enough. Define to 20 to keep consist with backend.
@@ -128,7 +133,7 @@ struct blkfront_info
128 int vdevice; 133 int vdevice;
129 blkif_vdev_t handle; 134 blkif_vdev_t handle;
130 enum blkif_state connected; 135 enum blkif_state connected;
131 int ring_ref[XENBUS_MAX_RING_PAGES]; 136 int ring_ref[XENBUS_MAX_RING_GRANTS];
132 unsigned int nr_ring_pages; 137 unsigned int nr_ring_pages;
133 struct blkif_front_ring ring; 138 struct blkif_front_ring ring;
134 unsigned int evtchn, irq; 139 unsigned int evtchn, irq;
@@ -146,6 +151,7 @@ struct blkfront_info
146 unsigned int discard_granularity; 151 unsigned int discard_granularity;
147 unsigned int discard_alignment; 152 unsigned int discard_alignment;
148 unsigned int feature_persistent:1; 153 unsigned int feature_persistent:1;
154 /* Number of 4KB segments handled */
149 unsigned int max_indirect_segments; 155 unsigned int max_indirect_segments;
150 int is_ready; 156 int is_ready;
151 struct blk_mq_tag_set tag_set; 157 struct blk_mq_tag_set tag_set;
@@ -174,10 +180,23 @@ static DEFINE_SPINLOCK(minor_lock);
174 180
175#define DEV_NAME "xvd" /* name in /dev */ 181#define DEV_NAME "xvd" /* name in /dev */
176 182
177#define SEGS_PER_INDIRECT_FRAME \ 183/*
178 (PAGE_SIZE/sizeof(struct blkif_request_segment)) 184 * Grants are always the same size as a Xen page (i.e 4KB).
179#define INDIRECT_GREFS(_segs) \ 185 * A physical segment is always the same size as a Linux page.
180 ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) 186 * Number of grants per physical segment
187 */
188#define GRANTS_PER_PSEG (PAGE_SIZE / XEN_PAGE_SIZE)
189
190#define GRANTS_PER_INDIRECT_FRAME \
191 (XEN_PAGE_SIZE / sizeof(struct blkif_request_segment))
192
193#define PSEGS_PER_INDIRECT_FRAME \
194 (GRANTS_INDIRECT_FRAME / GRANTS_PSEGS)
195
196#define INDIRECT_GREFS(_grants) \
197 DIV_ROUND_UP(_grants, GRANTS_PER_INDIRECT_FRAME)
198
199#define GREFS(_psegs) ((_psegs) * GRANTS_PER_PSEG)
181 200
182static int blkfront_setup_indirect(struct blkfront_info *info); 201static int blkfront_setup_indirect(struct blkfront_info *info);
183static int blkfront_gather_backend_features(struct blkfront_info *info); 202static int blkfront_gather_backend_features(struct blkfront_info *info);
@@ -221,7 +240,7 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
221 kfree(gnt_list_entry); 240 kfree(gnt_list_entry);
222 goto out_of_memory; 241 goto out_of_memory;
223 } 242 }
224 gnt_list_entry->pfn = page_to_pfn(granted_page); 243 gnt_list_entry->page = granted_page;
225 } 244 }
226 245
227 gnt_list_entry->gref = GRANT_INVALID_REF; 246 gnt_list_entry->gref = GRANT_INVALID_REF;
@@ -236,7 +255,7 @@ out_of_memory:
236 &info->grants, node) { 255 &info->grants, node) {
237 list_del(&gnt_list_entry->node); 256 list_del(&gnt_list_entry->node);
238 if (info->feature_persistent) 257 if (info->feature_persistent)
239 __free_page(pfn_to_page(gnt_list_entry->pfn)); 258 __free_page(gnt_list_entry->page);
240 kfree(gnt_list_entry); 259 kfree(gnt_list_entry);
241 i--; 260 i--;
242 } 261 }
@@ -244,34 +263,77 @@ out_of_memory:
244 return -ENOMEM; 263 return -ENOMEM;
245} 264}
246 265
247static struct grant *get_grant(grant_ref_t *gref_head, 266static struct grant *get_free_grant(struct blkfront_info *info)
248 unsigned long pfn,
249 struct blkfront_info *info)
250{ 267{
251 struct grant *gnt_list_entry; 268 struct grant *gnt_list_entry;
252 unsigned long buffer_gfn;
253 269
254 BUG_ON(list_empty(&info->grants)); 270 BUG_ON(list_empty(&info->grants));
255 gnt_list_entry = list_first_entry(&info->grants, struct grant, 271 gnt_list_entry = list_first_entry(&info->grants, struct grant,
256 node); 272 node);
257 list_del(&gnt_list_entry->node); 273 list_del(&gnt_list_entry->node);
258 274
259 if (gnt_list_entry->gref != GRANT_INVALID_REF) { 275 if (gnt_list_entry->gref != GRANT_INVALID_REF)
260 info->persistent_gnts_c--; 276 info->persistent_gnts_c--;
277
278 return gnt_list_entry;
279}
280
281static inline void grant_foreign_access(const struct grant *gnt_list_entry,
282 const struct blkfront_info *info)
283{
284 gnttab_page_grant_foreign_access_ref_one(gnt_list_entry->gref,
285 info->xbdev->otherend_id,
286 gnt_list_entry->page,
287 0);
288}
289
290static struct grant *get_grant(grant_ref_t *gref_head,
291 unsigned long gfn,
292 struct blkfront_info *info)
293{
294 struct grant *gnt_list_entry = get_free_grant(info);
295
296 if (gnt_list_entry->gref != GRANT_INVALID_REF)
261 return gnt_list_entry; 297 return gnt_list_entry;
298
299 /* Assign a gref to this page */
300 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
301 BUG_ON(gnt_list_entry->gref == -ENOSPC);
302 if (info->feature_persistent)
303 grant_foreign_access(gnt_list_entry, info);
304 else {
305 /* Grant access to the GFN passed by the caller */
306 gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
307 info->xbdev->otherend_id,
308 gfn, 0);
262 } 309 }
263 310
311 return gnt_list_entry;
312}
313
314static struct grant *get_indirect_grant(grant_ref_t *gref_head,
315 struct blkfront_info *info)
316{
317 struct grant *gnt_list_entry = get_free_grant(info);
318
319 if (gnt_list_entry->gref != GRANT_INVALID_REF)
320 return gnt_list_entry;
321
264 /* Assign a gref to this page */ 322 /* Assign a gref to this page */
265 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 323 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
266 BUG_ON(gnt_list_entry->gref == -ENOSPC); 324 BUG_ON(gnt_list_entry->gref == -ENOSPC);
267 if (!info->feature_persistent) { 325 if (!info->feature_persistent) {
268 BUG_ON(!pfn); 326 struct page *indirect_page;
269 gnt_list_entry->pfn = pfn; 327
328 /* Fetch a pre-allocated page to use for indirect grefs */
329 BUG_ON(list_empty(&info->indirect_pages));
330 indirect_page = list_first_entry(&info->indirect_pages,
331 struct page, lru);
332 list_del(&indirect_page->lru);
333 gnt_list_entry->page = indirect_page;
270 } 334 }
271 buffer_gfn = pfn_to_gfn(gnt_list_entry->pfn); 335 grant_foreign_access(gnt_list_entry, info);
272 gnttab_grant_foreign_access_ref(gnt_list_entry->gref, 336
273 info->xbdev->otherend_id,
274 buffer_gfn, 0);
275 return gnt_list_entry; 337 return gnt_list_entry;
276} 338}
277 339
@@ -394,20 +456,128 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
394 return 0; 456 return 0;
395} 457}
396 458
397/* 459static int blkif_queue_discard_req(struct request *req)
398 * Generate a Xen blkfront IO request from a blk layer request. Reads
399 * and writes are handled as expected.
400 *
401 * @req: a request struct
402 */
403static int blkif_queue_request(struct request *req)
404{ 460{
405 struct blkfront_info *info = req->rq_disk->private_data; 461 struct blkfront_info *info = req->rq_disk->private_data;
406 struct blkif_request *ring_req; 462 struct blkif_request *ring_req;
407 unsigned long id; 463 unsigned long id;
464
465 /* Fill out a communications ring structure. */
466 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
467 id = get_id_from_freelist(info);
468 info->shadow[id].request = req;
469
470 ring_req->operation = BLKIF_OP_DISCARD;
471 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
472 ring_req->u.discard.id = id;
473 ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
474 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
475 ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
476 else
477 ring_req->u.discard.flag = 0;
478
479 info->ring.req_prod_pvt++;
480
481 /* Keep a private copy so we can reissue requests when recovering. */
482 info->shadow[id].req = *ring_req;
483
484 return 0;
485}
486
487struct setup_rw_req {
488 unsigned int grant_idx;
489 struct blkif_request_segment *segments;
490 struct blkfront_info *info;
491 struct blkif_request *ring_req;
492 grant_ref_t gref_head;
493 unsigned int id;
494 /* Only used when persistent grant is used and it's a read request */
495 bool need_copy;
496 unsigned int bvec_off;
497 char *bvec_data;
498};
499
500static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
501 unsigned int len, void *data)
502{
503 struct setup_rw_req *setup = data;
504 int n, ref;
505 struct grant *gnt_list_entry;
408 unsigned int fsect, lsect; 506 unsigned int fsect, lsect;
409 int i, ref, n; 507 /* Convenient aliases */
410 struct blkif_request_segment *segments = NULL; 508 unsigned int grant_idx = setup->grant_idx;
509 struct blkif_request *ring_req = setup->ring_req;
510 struct blkfront_info *info = setup->info;
511 struct blk_shadow *shadow = &info->shadow[setup->id];
512
513 if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
514 (grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) {
515 if (setup->segments)
516 kunmap_atomic(setup->segments);
517
518 n = grant_idx / GRANTS_PER_INDIRECT_FRAME;
519 gnt_list_entry = get_indirect_grant(&setup->gref_head, info);
520 shadow->indirect_grants[n] = gnt_list_entry;
521 setup->segments = kmap_atomic(gnt_list_entry->page);
522 ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
523 }
524
525 gnt_list_entry = get_grant(&setup->gref_head, gfn, info);
526 ref = gnt_list_entry->gref;
527 shadow->grants_used[grant_idx] = gnt_list_entry;
528
529 if (setup->need_copy) {
530 void *shared_data;
531
532 shared_data = kmap_atomic(gnt_list_entry->page);
533 /*
534 * this does not wipe data stored outside the
535 * range sg->offset..sg->offset+sg->length.
536 * Therefore, blkback *could* see data from
537 * previous requests. This is OK as long as
538 * persistent grants are shared with just one
539 * domain. It may need refactoring if this
540 * changes
541 */
542 memcpy(shared_data + offset,
543 setup->bvec_data + setup->bvec_off,
544 len);
545
546 kunmap_atomic(shared_data);
547 setup->bvec_off += len;
548 }
549
550 fsect = offset >> 9;
551 lsect = fsect + (len >> 9) - 1;
552 if (ring_req->operation != BLKIF_OP_INDIRECT) {
553 ring_req->u.rw.seg[grant_idx] =
554 (struct blkif_request_segment) {
555 .gref = ref,
556 .first_sect = fsect,
557 .last_sect = lsect };
558 } else {
559 setup->segments[grant_idx % GRANTS_PER_INDIRECT_FRAME] =
560 (struct blkif_request_segment) {
561 .gref = ref,
562 .first_sect = fsect,
563 .last_sect = lsect };
564 }
565
566 (setup->grant_idx)++;
567}
568
569static int blkif_queue_rw_req(struct request *req)
570{
571 struct blkfront_info *info = req->rq_disk->private_data;
572 struct blkif_request *ring_req;
573 unsigned long id;
574 int i;
575 struct setup_rw_req setup = {
576 .grant_idx = 0,
577 .segments = NULL,
578 .info = info,
579 .need_copy = rq_data_dir(req) && info->feature_persistent,
580 };
411 581
412 /* 582 /*
413 * Used to store if we are able to queue the request by just using 583 * Used to store if we are able to queue the request by just using
@@ -415,28 +585,23 @@ static int blkif_queue_request(struct request *req)
415 * as there are not sufficiently many free. 585 * as there are not sufficiently many free.
416 */ 586 */
417 bool new_persistent_gnts; 587 bool new_persistent_gnts;
418 grant_ref_t gref_head;
419 struct grant *gnt_list_entry = NULL;
420 struct scatterlist *sg; 588 struct scatterlist *sg;
421 int nseg, max_grefs; 589 int num_sg, max_grefs, num_grant;
422 590
423 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 591 max_grefs = req->nr_phys_segments * GRANTS_PER_PSEG;
424 return 1;
425
426 max_grefs = req->nr_phys_segments;
427 if (max_grefs > BLKIF_MAX_SEGMENTS_PER_REQUEST) 592 if (max_grefs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
428 /* 593 /*
429 * If we are using indirect segments we need to account 594 * If we are using indirect segments we need to account
430 * for the indirect grefs used in the request. 595 * for the indirect grefs used in the request.
431 */ 596 */
432 max_grefs += INDIRECT_GREFS(req->nr_phys_segments); 597 max_grefs += INDIRECT_GREFS(max_grefs);
433 598
434 /* Check if we have enough grants to allocate a requests */ 599 /* Check if we have enough grants to allocate a requests */
435 if (info->persistent_gnts_c < max_grefs) { 600 if (info->persistent_gnts_c < max_grefs) {
436 new_persistent_gnts = 1; 601 new_persistent_gnts = 1;
437 if (gnttab_alloc_grant_references( 602 if (gnttab_alloc_grant_references(
438 max_grefs - info->persistent_gnts_c, 603 max_grefs - info->persistent_gnts_c,
439 &gref_head) < 0) { 604 &setup.gref_head) < 0) {
440 gnttab_request_free_callback( 605 gnttab_request_free_callback(
441 &info->callback, 606 &info->callback,
442 blkif_restart_queue_callback, 607 blkif_restart_queue_callback,
@@ -452,139 +617,82 @@ static int blkif_queue_request(struct request *req)
452 id = get_id_from_freelist(info); 617 id = get_id_from_freelist(info);
453 info->shadow[id].request = req; 618 info->shadow[id].request = req;
454 619
455 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { 620 BUG_ON(info->max_indirect_segments == 0 &&
456 ring_req->operation = BLKIF_OP_DISCARD; 621 GREFS(req->nr_phys_segments) > BLKIF_MAX_SEGMENTS_PER_REQUEST);
457 ring_req->u.discard.nr_sectors = blk_rq_sectors(req); 622 BUG_ON(info->max_indirect_segments &&
458 ring_req->u.discard.id = id; 623 GREFS(req->nr_phys_segments) > info->max_indirect_segments);
459 ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); 624
460 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) 625 num_sg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
461 ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; 626 num_grant = 0;
462 else 627 /* Calculate the number of grant used */
463 ring_req->u.discard.flag = 0; 628 for_each_sg(info->shadow[id].sg, sg, num_sg, i)
629 num_grant += gnttab_count_grant(sg->offset, sg->length);
630
631 ring_req->u.rw.id = id;
632 info->shadow[id].num_sg = num_sg;
633 if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
634 /*
635 * The indirect operation can only be a BLKIF_OP_READ or
636 * BLKIF_OP_WRITE
637 */
638 BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
639 ring_req->operation = BLKIF_OP_INDIRECT;
640 ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
641 BLKIF_OP_WRITE : BLKIF_OP_READ;
642 ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req);
643 ring_req->u.indirect.handle = info->handle;
644 ring_req->u.indirect.nr_segments = num_grant;
464 } else { 645 } else {
465 BUG_ON(info->max_indirect_segments == 0 && 646 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
466 req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 647 ring_req->u.rw.handle = info->handle;
467 BUG_ON(info->max_indirect_segments && 648 ring_req->operation = rq_data_dir(req) ?
468 req->nr_phys_segments > info->max_indirect_segments); 649 BLKIF_OP_WRITE : BLKIF_OP_READ;
469 nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); 650 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
470 ring_req->u.rw.id = id;
471 if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
472 /* 651 /*
473 * The indirect operation can only be a BLKIF_OP_READ or 652 * Ideally we can do an unordered flush-to-disk.
474 * BLKIF_OP_WRITE 653 * In case the backend onlysupports barriers, use that.
654 * A barrier request a superset of FUA, so we can
655 * implement it the same way. (It's also a FLUSH+FUA,
656 * since it is guaranteed ordered WRT previous writes.)
475 */ 657 */
476 BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA)); 658 switch (info->feature_flush &
477 ring_req->operation = BLKIF_OP_INDIRECT; 659 ((REQ_FLUSH|REQ_FUA))) {
478 ring_req->u.indirect.indirect_op = rq_data_dir(req) ? 660 case REQ_FLUSH|REQ_FUA:
479 BLKIF_OP_WRITE : BLKIF_OP_READ; 661 ring_req->operation =
480 ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req); 662 BLKIF_OP_WRITE_BARRIER;
481 ring_req->u.indirect.handle = info->handle; 663 break;
482 ring_req->u.indirect.nr_segments = nseg; 664 case REQ_FLUSH:
483 } else { 665 ring_req->operation =
484 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); 666 BLKIF_OP_FLUSH_DISKCACHE;
485 ring_req->u.rw.handle = info->handle; 667 break;
486 ring_req->operation = rq_data_dir(req) ? 668 default:
487 BLKIF_OP_WRITE : BLKIF_OP_READ; 669 ring_req->operation = 0;
488 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
489 /*
490 * Ideally we can do an unordered flush-to-disk. In case the
491 * backend onlysupports barriers, use that. A barrier request
492 * a superset of FUA, so we can implement it the same
493 * way. (It's also a FLUSH+FUA, since it is
494 * guaranteed ordered WRT previous writes.)
495 */
496 switch (info->feature_flush &
497 ((REQ_FLUSH|REQ_FUA))) {
498 case REQ_FLUSH|REQ_FUA:
499 ring_req->operation =
500 BLKIF_OP_WRITE_BARRIER;
501 break;
502 case REQ_FLUSH:
503 ring_req->operation =
504 BLKIF_OP_FLUSH_DISKCACHE;
505 break;
506 default:
507 ring_req->operation = 0;
508 }
509 } 670 }
510 ring_req->u.rw.nr_segments = nseg;
511 } 671 }
512 for_each_sg(info->shadow[id].sg, sg, nseg, i) { 672 ring_req->u.rw.nr_segments = num_grant;
513 fsect = sg->offset >> 9; 673 }
514 lsect = fsect + (sg->length >> 9) - 1;
515
516 if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
517 (i % SEGS_PER_INDIRECT_FRAME == 0)) {
518 unsigned long uninitialized_var(pfn);
519
520 if (segments)
521 kunmap_atomic(segments);
522
523 n = i / SEGS_PER_INDIRECT_FRAME;
524 if (!info->feature_persistent) {
525 struct page *indirect_page;
526
527 /* Fetch a pre-allocated page to use for indirect grefs */
528 BUG_ON(list_empty(&info->indirect_pages));
529 indirect_page = list_first_entry(&info->indirect_pages,
530 struct page, lru);
531 list_del(&indirect_page->lru);
532 pfn = page_to_pfn(indirect_page);
533 }
534 gnt_list_entry = get_grant(&gref_head, pfn, info);
535 info->shadow[id].indirect_grants[n] = gnt_list_entry;
536 segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
537 ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
538 }
539
540 gnt_list_entry = get_grant(&gref_head, page_to_pfn(sg_page(sg)), info);
541 ref = gnt_list_entry->gref;
542
543 info->shadow[id].grants_used[i] = gnt_list_entry;
544
545 if (rq_data_dir(req) && info->feature_persistent) {
546 char *bvec_data;
547 void *shared_data;
548 674
549 BUG_ON(sg->offset + sg->length > PAGE_SIZE); 675 setup.ring_req = ring_req;
676 setup.id = id;
677 for_each_sg(info->shadow[id].sg, sg, num_sg, i) {
678 BUG_ON(sg->offset + sg->length > PAGE_SIZE);
550 679
551 shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); 680 if (setup.need_copy) {
552 bvec_data = kmap_atomic(sg_page(sg)); 681 setup.bvec_off = sg->offset;
682 setup.bvec_data = kmap_atomic(sg_page(sg));
683 }
553 684
554 /* 685 gnttab_foreach_grant_in_range(sg_page(sg),
555 * this does not wipe data stored outside the 686 sg->offset,
556 * range sg->offset..sg->offset+sg->length. 687 sg->length,
557 * Therefore, blkback *could* see data from 688 blkif_setup_rw_req_grant,
558 * previous requests. This is OK as long as 689 &setup);
559 * persistent grants are shared with just one
560 * domain. It may need refactoring if this
561 * changes
562 */
563 memcpy(shared_data + sg->offset,
564 bvec_data + sg->offset,
565 sg->length);
566 690
567 kunmap_atomic(bvec_data); 691 if (setup.need_copy)
568 kunmap_atomic(shared_data); 692 kunmap_atomic(setup.bvec_data);
569 }
570 if (ring_req->operation != BLKIF_OP_INDIRECT) {
571 ring_req->u.rw.seg[i] =
572 (struct blkif_request_segment) {
573 .gref = ref,
574 .first_sect = fsect,
575 .last_sect = lsect };
576 } else {
577 n = i % SEGS_PER_INDIRECT_FRAME;
578 segments[n] =
579 (struct blkif_request_segment) {
580 .gref = ref,
581 .first_sect = fsect,
582 .last_sect = lsect };
583 }
584 }
585 if (segments)
586 kunmap_atomic(segments);
587 } 693 }
694 if (setup.segments)
695 kunmap_atomic(setup.segments);
588 696
589 info->ring.req_prod_pvt++; 697 info->ring.req_prod_pvt++;
590 698
@@ -592,11 +700,29 @@ static int blkif_queue_request(struct request *req)
592 info->shadow[id].req = *ring_req; 700 info->shadow[id].req = *ring_req;
593 701
594 if (new_persistent_gnts) 702 if (new_persistent_gnts)
595 gnttab_free_grant_references(gref_head); 703 gnttab_free_grant_references(setup.gref_head);
596 704
597 return 0; 705 return 0;
598} 706}
599 707
708/*
709 * Generate a Xen blkfront IO request from a blk layer request. Reads
710 * and writes are handled as expected.
711 *
712 * @req: a request struct
713 */
714static int blkif_queue_request(struct request *req)
715{
716 struct blkfront_info *info = req->rq_disk->private_data;
717
718 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
719 return 1;
720
721 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE)))
722 return blkif_queue_discard_req(req);
723 else
724 return blkif_queue_rw_req(req);
725}
600 726
601static inline void flush_requests(struct blkfront_info *info) 727static inline void flush_requests(struct blkfront_info *info)
602{ 728{
@@ -691,14 +817,14 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
691 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 817 /* Hard sector size and max sectors impersonate the equiv. hardware. */
692 blk_queue_logical_block_size(rq, sector_size); 818 blk_queue_logical_block_size(rq, sector_size);
693 blk_queue_physical_block_size(rq, physical_sector_size); 819 blk_queue_physical_block_size(rq, physical_sector_size);
694 blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512); 820 blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
695 821
696 /* Each segment in a request is up to an aligned page in size. */ 822 /* Each segment in a request is up to an aligned page in size. */
697 blk_queue_segment_boundary(rq, PAGE_SIZE - 1); 823 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
698 blk_queue_max_segment_size(rq, PAGE_SIZE); 824 blk_queue_max_segment_size(rq, PAGE_SIZE);
699 825
700 /* Ensure a merged request will fit in a single I/O ring slot. */ 826 /* Ensure a merged request will fit in a single I/O ring slot. */
701 blk_queue_max_segments(rq, segments); 827 blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
702 828
703 /* Make sure buffer addresses are sector-aligned. */ 829 /* Make sure buffer addresses are sector-aligned. */
704 blk_queue_dma_alignment(rq, 511); 830 blk_queue_dma_alignment(rq, 511);
@@ -972,7 +1098,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
972 info->persistent_gnts_c--; 1098 info->persistent_gnts_c--;
973 } 1099 }
974 if (info->feature_persistent) 1100 if (info->feature_persistent)
975 __free_page(pfn_to_page(persistent_gnt->pfn)); 1101 __free_page(persistent_gnt->page);
976 kfree(persistent_gnt); 1102 kfree(persistent_gnt);
977 } 1103 }
978 } 1104 }
@@ -1007,7 +1133,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
1007 persistent_gnt = info->shadow[i].grants_used[j]; 1133 persistent_gnt = info->shadow[i].grants_used[j];
1008 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); 1134 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
1009 if (info->feature_persistent) 1135 if (info->feature_persistent)
1010 __free_page(pfn_to_page(persistent_gnt->pfn)); 1136 __free_page(persistent_gnt->page);
1011 kfree(persistent_gnt); 1137 kfree(persistent_gnt);
1012 } 1138 }
1013 1139
@@ -1021,7 +1147,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
1021 for (j = 0; j < INDIRECT_GREFS(segs); j++) { 1147 for (j = 0; j < INDIRECT_GREFS(segs); j++) {
1022 persistent_gnt = info->shadow[i].indirect_grants[j]; 1148 persistent_gnt = info->shadow[i].indirect_grants[j];
1023 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); 1149 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
1024 __free_page(pfn_to_page(persistent_gnt->pfn)); 1150 __free_page(persistent_gnt->page);
1025 kfree(persistent_gnt); 1151 kfree(persistent_gnt);
1026 } 1152 }
1027 1153
@@ -1057,33 +1183,65 @@ free_shadow:
1057 1183
1058} 1184}
1059 1185
1186struct copy_from_grant {
1187 const struct blk_shadow *s;
1188 unsigned int grant_idx;
1189 unsigned int bvec_offset;
1190 char *bvec_data;
1191};
1192
1193static void blkif_copy_from_grant(unsigned long gfn, unsigned int offset,
1194 unsigned int len, void *data)
1195{
1196 struct copy_from_grant *info = data;
1197 char *shared_data;
1198 /* Convenient aliases */
1199 const struct blk_shadow *s = info->s;
1200
1201 shared_data = kmap_atomic(s->grants_used[info->grant_idx]->page);
1202
1203 memcpy(info->bvec_data + info->bvec_offset,
1204 shared_data + offset, len);
1205
1206 info->bvec_offset += len;
1207 info->grant_idx++;
1208
1209 kunmap_atomic(shared_data);
1210}
1211
1060static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, 1212static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
1061 struct blkif_response *bret) 1213 struct blkif_response *bret)
1062{ 1214{
1063 int i = 0; 1215 int i = 0;
1064 struct scatterlist *sg; 1216 struct scatterlist *sg;
1065 char *bvec_data; 1217 int num_sg, num_grant;
1066 void *shared_data; 1218 struct copy_from_grant data = {
1067 int nseg; 1219 .s = s,
1220 .grant_idx = 0,
1221 };
1068 1222
1069 nseg = s->req.operation == BLKIF_OP_INDIRECT ? 1223 num_grant = s->req.operation == BLKIF_OP_INDIRECT ?
1070 s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; 1224 s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
1225 num_sg = s->num_sg;
1071 1226
1072 if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { 1227 if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
1073 for_each_sg(s->sg, sg, nseg, i) { 1228 for_each_sg(s->sg, sg, num_sg, i) {
1074 BUG_ON(sg->offset + sg->length > PAGE_SIZE); 1229 BUG_ON(sg->offset + sg->length > PAGE_SIZE);
1075 shared_data = kmap_atomic( 1230
1076 pfn_to_page(s->grants_used[i]->pfn)); 1231 data.bvec_offset = sg->offset;
1077 bvec_data = kmap_atomic(sg_page(sg)); 1232 data.bvec_data = kmap_atomic(sg_page(sg));
1078 memcpy(bvec_data + sg->offset, 1233
1079 shared_data + sg->offset, 1234 gnttab_foreach_grant_in_range(sg_page(sg),
1080 sg->length); 1235 sg->offset,
1081 kunmap_atomic(bvec_data); 1236 sg->length,
1082 kunmap_atomic(shared_data); 1237 blkif_copy_from_grant,
1238 &data);
1239
1240 kunmap_atomic(data.bvec_data);
1083 } 1241 }
1084 } 1242 }
1085 /* Add the persistent grant into the list of free grants */ 1243 /* Add the persistent grant into the list of free grants */
1086 for (i = 0; i < nseg; i++) { 1244 for (i = 0; i < num_grant; i++) {
1087 if (gnttab_query_foreign_access(s->grants_used[i]->gref)) { 1245 if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
1088 /* 1246 /*
1089 * If the grant is still mapped by the backend (the 1247 * If the grant is still mapped by the backend (the
@@ -1109,7 +1267,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
1109 } 1267 }
1110 } 1268 }
1111 if (s->req.operation == BLKIF_OP_INDIRECT) { 1269 if (s->req.operation == BLKIF_OP_INDIRECT) {
1112 for (i = 0; i < INDIRECT_GREFS(nseg); i++) { 1270 for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
1113 if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) { 1271 if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
1114 if (!info->feature_persistent) 1272 if (!info->feature_persistent)
1115 pr_alert_ratelimited("backed has not unmapped grant: %u\n", 1273 pr_alert_ratelimited("backed has not unmapped grant: %u\n",
@@ -1125,7 +1283,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
1125 * available pages for indirect grefs. 1283 * available pages for indirect grefs.
1126 */ 1284 */
1127 if (!info->feature_persistent) { 1285 if (!info->feature_persistent) {
1128 indirect_page = pfn_to_page(s->indirect_grants[i]->pfn); 1286 indirect_page = s->indirect_grants[i]->page;
1129 list_add(&indirect_page->lru, &info->indirect_pages); 1287 list_add(&indirect_page->lru, &info->indirect_pages);
1130 } 1288 }
1131 s->indirect_grants[i]->gref = GRANT_INVALID_REF; 1289 s->indirect_grants[i]->gref = GRANT_INVALID_REF;
@@ -1254,8 +1412,8 @@ static int setup_blkring(struct xenbus_device *dev,
1254{ 1412{
1255 struct blkif_sring *sring; 1413 struct blkif_sring *sring;
1256 int err, i; 1414 int err, i;
1257 unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE; 1415 unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
1258 grant_ref_t gref[XENBUS_MAX_RING_PAGES]; 1416 grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
1259 1417
1260 for (i = 0; i < info->nr_ring_pages; i++) 1418 for (i = 0; i < info->nr_ring_pages; i++)
1261 info->ring_ref[i] = GRANT_INVALID_REF; 1419 info->ring_ref[i] = GRANT_INVALID_REF;
@@ -1583,8 +1741,8 @@ static int blkif_recover(struct blkfront_info *info)
1583 atomic_set(&split_bio->pending, pending); 1741 atomic_set(&split_bio->pending, pending);
1584 split_bio->bio = bio; 1742 split_bio->bio = bio;
1585 for (i = 0; i < pending; i++) { 1743 for (i = 0; i < pending; i++) {
1586 offset = (i * segs * PAGE_SIZE) >> 9; 1744 offset = (i * segs * XEN_PAGE_SIZE) >> 9;
1587 size = min((unsigned int)(segs * PAGE_SIZE) >> 9, 1745 size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
1588 (unsigned int)bio_sectors(bio) - offset); 1746 (unsigned int)bio_sectors(bio) - offset);
1589 cloned_bio = bio_clone(bio, GFP_NOIO); 1747 cloned_bio = bio_clone(bio, GFP_NOIO);
1590 BUG_ON(cloned_bio == NULL); 1748 BUG_ON(cloned_bio == NULL);
@@ -1695,15 +1853,17 @@ static void blkfront_setup_discard(struct blkfront_info *info)
1695 1853
1696static int blkfront_setup_indirect(struct blkfront_info *info) 1854static int blkfront_setup_indirect(struct blkfront_info *info)
1697{ 1855{
1698 unsigned int segs; 1856 unsigned int psegs, grants;
1699 int err, i; 1857 int err, i;
1700 1858
1701 if (info->max_indirect_segments == 0) 1859 if (info->max_indirect_segments == 0)
1702 segs = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1860 grants = BLKIF_MAX_SEGMENTS_PER_REQUEST;
1703 else 1861 else
1704 segs = info->max_indirect_segments; 1862 grants = info->max_indirect_segments;
1863 psegs = grants / GRANTS_PER_PSEG;
1705 1864
1706 err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info)); 1865 err = fill_grant_buffer(info,
1866 (grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info));
1707 if (err) 1867 if (err)
1708 goto out_of_memory; 1868 goto out_of_memory;
1709 1869
@@ -1713,7 +1873,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
1713 * grants, we need to allocate a set of pages that can be 1873 * grants, we need to allocate a set of pages that can be
1714 * used for mapping indirect grefs 1874 * used for mapping indirect grefs
1715 */ 1875 */
1716 int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info); 1876 int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
1717 1877
1718 BUG_ON(!list_empty(&info->indirect_pages)); 1878 BUG_ON(!list_empty(&info->indirect_pages));
1719 for (i = 0; i < num; i++) { 1879 for (i = 0; i < num; i++) {
@@ -1726,20 +1886,20 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
1726 1886
1727 for (i = 0; i < BLK_RING_SIZE(info); i++) { 1887 for (i = 0; i < BLK_RING_SIZE(info); i++) {
1728 info->shadow[i].grants_used = kzalloc( 1888 info->shadow[i].grants_used = kzalloc(
1729 sizeof(info->shadow[i].grants_used[0]) * segs, 1889 sizeof(info->shadow[i].grants_used[0]) * grants,
1730 GFP_NOIO); 1890 GFP_NOIO);
1731 info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO); 1891 info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * psegs, GFP_NOIO);
1732 if (info->max_indirect_segments) 1892 if (info->max_indirect_segments)
1733 info->shadow[i].indirect_grants = kzalloc( 1893 info->shadow[i].indirect_grants = kzalloc(
1734 sizeof(info->shadow[i].indirect_grants[0]) * 1894 sizeof(info->shadow[i].indirect_grants[0]) *
1735 INDIRECT_GREFS(segs), 1895 INDIRECT_GREFS(grants),
1736 GFP_NOIO); 1896 GFP_NOIO);
1737 if ((info->shadow[i].grants_used == NULL) || 1897 if ((info->shadow[i].grants_used == NULL) ||
1738 (info->shadow[i].sg == NULL) || 1898 (info->shadow[i].sg == NULL) ||
1739 (info->max_indirect_segments && 1899 (info->max_indirect_segments &&
1740 (info->shadow[i].indirect_grants == NULL))) 1900 (info->shadow[i].indirect_grants == NULL)))
1741 goto out_of_memory; 1901 goto out_of_memory;
1742 sg_init_table(info->shadow[i].sg, segs); 1902 sg_init_table(info->shadow[i].sg, psegs);
1743 } 1903 }
1744 1904
1745 1905
@@ -2125,9 +2285,9 @@ static int __init xlblk_init(void)
2125 if (!xen_domain()) 2285 if (!xen_domain())
2126 return -ENODEV; 2286 return -ENODEV;
2127 2287
2128 if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { 2288 if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) {
2129 pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", 2289 pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
2130 xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); 2290 xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER);
2131 xen_blkif_max_ring_order = 0; 2291 xen_blkif_max_ring_order = 0;
2132 } 2292 }
2133 2293