diff options
author | Christoph Hellwig <hch@lst.de> | 2010-06-18 10:59:42 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-08-07 12:23:08 -0400 |
commit | 66ac0280197981f88774e74b60c8e5f9f07c1dba (patch) | |
tree | d093ce493146779926df88b5831805c6f9ee14e1 | |
parent | 082439004b31adc146e96e5f1c574dd2b57dcd93 (diff) |
block: don't allocate a payload for discard request
Allocating a fixed payload for discard requests always was a horrible hack,
and it's not coming to byte us when adding support for discard in DM/MD.
So change the code to leave the allocation of a payload to the lowlevel
driver. Unfortunately that means we'll need another hack, which allows
us to update the various block layer length fields indicating that we
have a payload. Instead of hiding this in sd.c, which we already partially
do for UNMAP support add a documented helper in the core block layer for it.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r-- | block/blk-core.c | 32 | ||||
-rw-r--r-- | block/blk-lib.c | 33 | ||||
-rw-r--r-- | drivers/scsi/sd.c | 52 | ||||
-rw-r--r-- | include/linux/blkdev.h | 2 |
4 files changed, 74 insertions, 45 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 66c3cfe94d0a..3531d8e1da04 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -1135,6 +1135,38 @@ void blk_put_request(struct request *req) | |||
1135 | } | 1135 | } |
1136 | EXPORT_SYMBOL(blk_put_request); | 1136 | EXPORT_SYMBOL(blk_put_request); |
1137 | 1137 | ||
1138 | /** | ||
1139 | * blk_add_request_payload - add a payload to a request | ||
1140 | * @rq: request to update | ||
1141 | * @page: page backing the payload | ||
1142 | * @len: length of the payload. | ||
1143 | * | ||
1144 | * This allows to later add a payload to an already submitted request by | ||
1145 | * a block driver. The driver needs to take care of freeing the payload | ||
1146 | * itself. | ||
1147 | * | ||
1148 | * Note that this is a quite horrible hack and nothing but handling of | ||
1149 | * discard requests should ever use it. | ||
1150 | */ | ||
1151 | void blk_add_request_payload(struct request *rq, struct page *page, | ||
1152 | unsigned int len) | ||
1153 | { | ||
1154 | struct bio *bio = rq->bio; | ||
1155 | |||
1156 | bio->bi_io_vec->bv_page = page; | ||
1157 | bio->bi_io_vec->bv_offset = 0; | ||
1158 | bio->bi_io_vec->bv_len = len; | ||
1159 | |||
1160 | bio->bi_size = len; | ||
1161 | bio->bi_vcnt = 1; | ||
1162 | bio->bi_phys_segments = 1; | ||
1163 | |||
1164 | rq->__data_len = rq->resid_len = len; | ||
1165 | rq->nr_phys_segments = 1; | ||
1166 | rq->buffer = bio_data(bio); | ||
1167 | } | ||
1168 | EXPORT_SYMBOL_GPL(blk_add_request_payload); | ||
1169 | |||
1138 | void init_request_from_bio(struct request *req, struct bio *bio) | 1170 | void init_request_from_bio(struct request *req, struct bio *bio) |
1139 | { | 1171 | { |
1140 | req->cpu = bio->bi_comp_cpu; | 1172 | req->cpu = bio->bi_comp_cpu; |
diff --git a/block/blk-lib.c b/block/blk-lib.c index d0216b9f22d4..e16185b0d8e1 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -19,7 +19,6 @@ static void blkdev_discard_end_io(struct bio *bio, int err) | |||
19 | 19 | ||
20 | if (bio->bi_private) | 20 | if (bio->bi_private) |
21 | complete(bio->bi_private); | 21 | complete(bio->bi_private); |
22 | __free_page(bio_page(bio)); | ||
23 | 22 | ||
24 | bio_put(bio); | 23 | bio_put(bio); |
25 | } | 24 | } |
@@ -43,7 +42,6 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
43 | int type = flags & BLKDEV_IFL_BARRIER ? | 42 | int type = flags & BLKDEV_IFL_BARRIER ? |
44 | DISCARD_BARRIER : DISCARD_NOBARRIER; | 43 | DISCARD_BARRIER : DISCARD_NOBARRIER; |
45 | struct bio *bio; | 44 | struct bio *bio; |
46 | struct page *page; | ||
47 | int ret = 0; | 45 | int ret = 0; |
48 | 46 | ||
49 | if (!q) | 47 | if (!q) |
@@ -53,35 +51,21 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
53 | return -EOPNOTSUPP; | 51 | return -EOPNOTSUPP; |
54 | 52 | ||
55 | while (nr_sects && !ret) { | 53 | while (nr_sects && !ret) { |
56 | unsigned int sector_size = q->limits.logical_block_size; | ||
57 | unsigned int max_discard_sectors = | 54 | unsigned int max_discard_sectors = |
58 | min(q->limits.max_discard_sectors, UINT_MAX >> 9); | 55 | min(q->limits.max_discard_sectors, UINT_MAX >> 9); |
59 | 56 | ||
60 | bio = bio_alloc(gfp_mask, 1); | 57 | bio = bio_alloc(gfp_mask, 1); |
61 | if (!bio) | 58 | if (!bio) { |
62 | goto out; | 59 | ret = -ENOMEM; |
60 | break; | ||
61 | } | ||
62 | |||
63 | bio->bi_sector = sector; | 63 | bio->bi_sector = sector; |
64 | bio->bi_end_io = blkdev_discard_end_io; | 64 | bio->bi_end_io = blkdev_discard_end_io; |
65 | bio->bi_bdev = bdev; | 65 | bio->bi_bdev = bdev; |
66 | if (flags & BLKDEV_IFL_WAIT) | 66 | if (flags & BLKDEV_IFL_WAIT) |
67 | bio->bi_private = &wait; | 67 | bio->bi_private = &wait; |
68 | 68 | ||
69 | /* | ||
70 | * Add a zeroed one-sector payload as that's what | ||
71 | * our current implementations need. If we'll ever need | ||
72 | * more the interface will need revisiting. | ||
73 | */ | ||
74 | page = alloc_page(gfp_mask | __GFP_ZERO); | ||
75 | if (!page) | ||
76 | goto out_free_bio; | ||
77 | if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) | ||
78 | goto out_free_page; | ||
79 | |||
80 | /* | ||
81 | * And override the bio size - the way discard works we | ||
82 | * touch many more blocks on disk than the actual payload | ||
83 | * length. | ||
84 | */ | ||
85 | if (nr_sects > max_discard_sectors) { | 69 | if (nr_sects > max_discard_sectors) { |
86 | bio->bi_size = max_discard_sectors << 9; | 70 | bio->bi_size = max_discard_sectors << 9; |
87 | nr_sects -= max_discard_sectors; | 71 | nr_sects -= max_discard_sectors; |
@@ -103,13 +87,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
103 | ret = -EIO; | 87 | ret = -EIO; |
104 | bio_put(bio); | 88 | bio_put(bio); |
105 | } | 89 | } |
90 | |||
106 | return ret; | 91 | return ret; |
107 | out_free_page: | ||
108 | __free_page(page); | ||
109 | out_free_bio: | ||
110 | bio_put(bio); | ||
111 | out: | ||
112 | return -ENOMEM; | ||
113 | } | 92 | } |
114 | EXPORT_SYMBOL(blkdev_issue_discard); | 93 | EXPORT_SYMBOL(blkdev_issue_discard); |
115 | 94 | ||
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a3fdf4dc59da..86da819c70eb 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c | |||
@@ -411,22 +411,25 @@ static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif) | |||
411 | } | 411 | } |
412 | 412 | ||
413 | /** | 413 | /** |
414 | * sd_prepare_discard - unmap blocks on thinly provisioned device | 414 | * scsi_setup_discard_cmnd - unmap blocks on thinly provisioned device |
415 | * @sdp: scsi device to operate one | ||
415 | * @rq: Request to prepare | 416 | * @rq: Request to prepare |
416 | * | 417 | * |
417 | * Will issue either UNMAP or WRITE SAME(16) depending on preference | 418 | * Will issue either UNMAP or WRITE SAME(16) depending on preference |
418 | * indicated by target device. | 419 | * indicated by target device. |
419 | **/ | 420 | **/ |
420 | static int sd_prepare_discard(struct request *rq) | 421 | static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq) |
421 | { | 422 | { |
422 | struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); | 423 | struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); |
423 | struct bio *bio = rq->bio; | 424 | struct bio *bio = rq->bio; |
424 | sector_t sector = bio->bi_sector; | 425 | sector_t sector = bio->bi_sector; |
425 | unsigned int num = bio_sectors(bio); | 426 | unsigned int nr_sectors = bio_sectors(bio); |
427 | unsigned int len; | ||
428 | struct page *page; | ||
426 | 429 | ||
427 | if (sdkp->device->sector_size == 4096) { | 430 | if (sdkp->device->sector_size == 4096) { |
428 | sector >>= 3; | 431 | sector >>= 3; |
429 | num >>= 3; | 432 | nr_sectors >>= 3; |
430 | } | 433 | } |
431 | 434 | ||
432 | rq->cmd_type = REQ_TYPE_BLOCK_PC; | 435 | rq->cmd_type = REQ_TYPE_BLOCK_PC; |
@@ -434,31 +437,35 @@ static int sd_prepare_discard(struct request *rq) | |||
434 | 437 | ||
435 | memset(rq->cmd, 0, rq->cmd_len); | 438 | memset(rq->cmd, 0, rq->cmd_len); |
436 | 439 | ||
440 | page = alloc_page(GFP_ATOMIC | __GFP_ZERO); | ||
441 | if (!page) | ||
442 | return BLKPREP_DEFER; | ||
443 | |||
437 | if (sdkp->unmap) { | 444 | if (sdkp->unmap) { |
438 | char *buf = kmap_atomic(bio_page(bio), KM_USER0); | 445 | char *buf = page_address(page); |
439 | 446 | ||
447 | rq->cmd_len = 10; | ||
440 | rq->cmd[0] = UNMAP; | 448 | rq->cmd[0] = UNMAP; |
441 | rq->cmd[8] = 24; | 449 | rq->cmd[8] = 24; |
442 | rq->cmd_len = 10; | ||
443 | |||
444 | /* Ensure that data length matches payload */ | ||
445 | rq->__data_len = bio->bi_size = bio->bi_io_vec->bv_len = 24; | ||
446 | 450 | ||
447 | put_unaligned_be16(6 + 16, &buf[0]); | 451 | put_unaligned_be16(6 + 16, &buf[0]); |
448 | put_unaligned_be16(16, &buf[2]); | 452 | put_unaligned_be16(16, &buf[2]); |
449 | put_unaligned_be64(sector, &buf[8]); | 453 | put_unaligned_be64(sector, &buf[8]); |
450 | put_unaligned_be32(num, &buf[16]); | 454 | put_unaligned_be32(nr_sectors, &buf[16]); |
451 | 455 | ||
452 | kunmap_atomic(buf, KM_USER0); | 456 | len = 24; |
453 | } else { | 457 | } else { |
458 | rq->cmd_len = 16; | ||
454 | rq->cmd[0] = WRITE_SAME_16; | 459 | rq->cmd[0] = WRITE_SAME_16; |
455 | rq->cmd[1] = 0x8; /* UNMAP */ | 460 | rq->cmd[1] = 0x8; /* UNMAP */ |
456 | put_unaligned_be64(sector, &rq->cmd[2]); | 461 | put_unaligned_be64(sector, &rq->cmd[2]); |
457 | put_unaligned_be32(num, &rq->cmd[10]); | 462 | put_unaligned_be32(nr_sectors, &rq->cmd[10]); |
458 | rq->cmd_len = 16; | 463 | |
464 | len = sdkp->device->sector_size; | ||
459 | } | 465 | } |
460 | 466 | ||
461 | return BLKPREP_OK; | 467 | blk_add_request_payload(rq, page, len); |
468 | return scsi_setup_blk_pc_cmnd(sdp, rq); | ||
462 | } | 469 | } |
463 | 470 | ||
464 | /** | 471 | /** |
@@ -485,10 +492,10 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) | |||
485 | * Discard request come in as REQ_TYPE_FS but we turn them into | 492 | * Discard request come in as REQ_TYPE_FS but we turn them into |
486 | * block PC requests to make life easier. | 493 | * block PC requests to make life easier. |
487 | */ | 494 | */ |
488 | if (rq->cmd_flags & REQ_DISCARD) | 495 | if (rq->cmd_flags & REQ_DISCARD) { |
489 | ret = sd_prepare_discard(rq); | 496 | ret = scsi_setup_discard_cmnd(sdp, rq); |
490 | 497 | goto out; | |
491 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 498 | } else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
492 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); | 499 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); |
493 | goto out; | 500 | goto out; |
494 | } else if (rq->cmd_type != REQ_TYPE_FS) { | 501 | } else if (rq->cmd_type != REQ_TYPE_FS) { |
@@ -1163,6 +1170,15 @@ static int sd_done(struct scsi_cmnd *SCpnt) | |||
1163 | int sense_valid = 0; | 1170 | int sense_valid = 0; |
1164 | int sense_deferred = 0; | 1171 | int sense_deferred = 0; |
1165 | 1172 | ||
1173 | /* | ||
1174 | * If this is a discard request that originated from the kernel | ||
1175 | * we need to free our payload here. Note that we need to check | ||
1176 | * the request flag as the normal payload rules apply for | ||
1177 | * pass-through UNMAP / WRITE SAME requests. | ||
1178 | */ | ||
1179 | if (SCpnt->request->cmd_flags & REQ_DISCARD) | ||
1180 | __free_page(bio_page(SCpnt->request->bio)); | ||
1181 | |||
1166 | if (result) { | 1182 | if (result) { |
1167 | sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); | 1183 | sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); |
1168 | if (sense_valid) | 1184 | if (sense_valid) |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3fc0f5908619..204fbe22354d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -705,6 +705,8 @@ extern struct request *blk_make_request(struct request_queue *, struct bio *, | |||
705 | gfp_t); | 705 | gfp_t); |
706 | extern void blk_insert_request(struct request_queue *, struct request *, int, void *); | 706 | extern void blk_insert_request(struct request_queue *, struct request *, int, void *); |
707 | extern void blk_requeue_request(struct request_queue *, struct request *); | 707 | extern void blk_requeue_request(struct request_queue *, struct request *); |
708 | extern void blk_add_request_payload(struct request *rq, struct page *page, | ||
709 | unsigned int len); | ||
708 | extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); | 710 | extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); |
709 | extern int blk_lld_busy(struct request_queue *q); | 711 | extern int blk_lld_busy(struct request_queue *q); |
710 | extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | 712 | extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, |