diff options
author | James Bottomley <James.Bottomley@SteelEye.com> | 2005-06-20 08:06:52 -0400 |
---|---|---|
committer | Jens Axboe <axboe@suse.de> | 2005-06-20 08:06:52 -0400 |
commit | f1970baf6d74e03bd32072ab453f2fc01bc1b8d3 (patch) | |
tree | 559898cdf83bd0f93b8a72248c6423a6548fb604 | |
parent | dd1cab95f356f1395278633565f198463cf6bd24 (diff) |
[PATCH] Add scatter-gather support for the block layer SG_IO
Signed-off-by: Jens Axboe <axboe@suse.de>
-rw-r--r-- | drivers/block/ll_rw_blk.c | 64 | ||||
-rw-r--r-- | drivers/block/scsi_ioctl.c | 34 | ||||
-rw-r--r-- | fs/bio.c | 150 | ||||
-rw-r--r-- | include/linux/bio.h | 4 | ||||
-rw-r--r-- | include/linux/blkdev.h | 1 |
5 files changed, 191 insertions, 62 deletions
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 42c4f3651cf8..874e46fc3748 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c | |||
@@ -2149,6 +2149,50 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, | |||
2149 | EXPORT_SYMBOL(blk_rq_map_user); | 2149 | EXPORT_SYMBOL(blk_rq_map_user); |
2150 | 2150 | ||
2151 | /** | 2151 | /** |
2152 | * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage | ||
2153 | * @q: request queue where request should be inserted | ||
2154 | * @rq: request to map data to | ||
2155 | * @iov: pointer to the iovec | ||
2156 | * @iov_count: number of elements in the iovec | ||
2157 | * | ||
2158 | * Description: | ||
2159 | * Data will be mapped directly for zero copy io, if possible. Otherwise | ||
2160 | * a kernel bounce buffer is used. | ||
2161 | * | ||
2162 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | ||
2163 | * still in process context. | ||
2164 | * | ||
2165 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | ||
2166 | * before being submitted to the device, as pages mapped may be out of | ||
2167 | * reach. It's the callers responsibility to make sure this happens. The | ||
2168 | * original bio must be passed back in to blk_rq_unmap_user() for proper | ||
2169 | * unmapping. | ||
2170 | */ | ||
2171 | int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, | ||
2172 | struct sg_iovec *iov, int iov_count) | ||
2173 | { | ||
2174 | struct bio *bio; | ||
2175 | |||
2176 | if (!iov || iov_count <= 0) | ||
2177 | return -EINVAL; | ||
2178 | |||
2179 | /* we don't allow misaligned data like bio_map_user() does. If the | ||
2180 | * user is using sg, they're expected to know the alignment constraints | ||
2181 | * and respect them accordingly */ | ||
2182 | bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); | ||
2183 | if (IS_ERR(bio)) | ||
2184 | return PTR_ERR(bio); | ||
2185 | |||
2186 | rq->bio = rq->biotail = bio; | ||
2187 | blk_rq_bio_prep(q, rq, bio); | ||
2188 | rq->buffer = rq->data = NULL; | ||
2189 | rq->data_len = bio->bi_size; | ||
2190 | return 0; | ||
2191 | } | ||
2192 | |||
2193 | EXPORT_SYMBOL(blk_rq_map_user_iov); | ||
2194 | |||
2195 | /** | ||
2152 | * blk_rq_unmap_user - unmap a request with user data | 2196 | * blk_rq_unmap_user - unmap a request with user data |
2153 | * @rq: request to be unmapped | 2197 | * @rq: request to be unmapped |
2154 | * @bio: bio for the request | 2198 | * @bio: bio for the request |
@@ -2207,6 +2251,19 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, | |||
2207 | 2251 | ||
2208 | EXPORT_SYMBOL(blk_rq_map_kern); | 2252 | EXPORT_SYMBOL(blk_rq_map_kern); |
2209 | 2253 | ||
2254 | void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, | ||
2255 | struct request *rq, int at_head, | ||
2256 | void (*done)(struct request *)) | ||
2257 | { | ||
2258 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | ||
2259 | |||
2260 | rq->rq_disk = bd_disk; | ||
2261 | rq->flags |= REQ_NOMERGE; | ||
2262 | rq->end_io = done; | ||
2263 | elv_add_request(q, rq, where, 1); | ||
2264 | generic_unplug_device(q); | ||
2265 | } | ||
2266 | |||
2210 | /** | 2267 | /** |
2211 | * blk_execute_rq - insert a request into queue for execution | 2268 | * blk_execute_rq - insert a request into queue for execution |
2212 | * @q: queue to insert the request in | 2269 | * @q: queue to insert the request in |
@@ -2224,8 +2281,6 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, | |||
2224 | char sense[SCSI_SENSE_BUFFERSIZE]; | 2281 | char sense[SCSI_SENSE_BUFFERSIZE]; |
2225 | int err = 0; | 2282 | int err = 0; |
2226 | 2283 | ||
2227 | rq->rq_disk = bd_disk; | ||
2228 | |||
2229 | /* | 2284 | /* |
2230 | * we need an extra reference to the request, so we can look at | 2285 | * we need an extra reference to the request, so we can look at |
2231 | * it after io completion | 2286 | * it after io completion |
@@ -2238,11 +2293,8 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, | |||
2238 | rq->sense_len = 0; | 2293 | rq->sense_len = 0; |
2239 | } | 2294 | } |
2240 | 2295 | ||
2241 | rq->flags |= REQ_NOMERGE; | ||
2242 | rq->waiting = &wait; | 2296 | rq->waiting = &wait; |
2243 | rq->end_io = blk_end_sync_rq; | 2297 | blk_execute_rq_nowait(q, bd_disk, rq, 0, blk_end_sync_rq); |
2244 | elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); | ||
2245 | generic_unplug_device(q); | ||
2246 | wait_for_completion(&wait); | 2298 | wait_for_completion(&wait); |
2247 | rq->waiting = NULL; | 2299 | rq->waiting = NULL; |
2248 | 2300 | ||
diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c index 93c4ca874be3..09a7e73a0812 100644 --- a/drivers/block/scsi_ioctl.c +++ b/drivers/block/scsi_ioctl.c | |||
@@ -231,17 +231,11 @@ static int sg_io(struct file *file, request_queue_t *q, | |||
231 | if (verify_command(file, cmd)) | 231 | if (verify_command(file, cmd)) |
232 | return -EPERM; | 232 | return -EPERM; |
233 | 233 | ||
234 | /* | ||
235 | * we'll do that later | ||
236 | */ | ||
237 | if (hdr->iovec_count) | ||
238 | return -EOPNOTSUPP; | ||
239 | |||
240 | if (hdr->dxfer_len > (q->max_sectors << 9)) | 234 | if (hdr->dxfer_len > (q->max_sectors << 9)) |
241 | return -EIO; | 235 | return -EIO; |
242 | 236 | ||
243 | reading = writing = 0; | 237 | reading = writing = 0; |
244 | if (hdr->dxfer_len) { | 238 | if (hdr->dxfer_len) |
245 | switch (hdr->dxfer_direction) { | 239 | switch (hdr->dxfer_direction) { |
246 | default: | 240 | default: |
247 | return -EINVAL; | 241 | return -EINVAL; |
@@ -261,11 +255,29 @@ static int sg_io(struct file *file, request_queue_t *q, | |||
261 | if (!rq) | 255 | if (!rq) |
262 | return -ENOMEM; | 256 | return -ENOMEM; |
263 | 257 | ||
264 | if (reading || writing) { | 258 | if (hdr->iovec_count) { |
265 | ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); | 259 | const int size = sizeof(struct sg_iovec) * hdr->iovec_count; |
266 | if (ret) | 260 | struct sg_iovec *iov; |
261 | |||
262 | iov = kmalloc(size, GFP_KERNEL); | ||
263 | if (!iov) { | ||
264 | ret = -ENOMEM; | ||
267 | goto out; | 265 | goto out; |
268 | } | 266 | } |
267 | |||
268 | if (copy_from_user(iov, hdr->dxferp, size)) { | ||
269 | kfree(iov); | ||
270 | ret = -EFAULT; | ||
271 | goto out; | ||
272 | } | ||
273 | |||
274 | ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count); | ||
275 | kfree(iov); | ||
276 | } else if (hdr->dxfer_len) | ||
277 | ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); | ||
278 | |||
279 | if (ret) | ||
280 | goto out; | ||
269 | 281 | ||
270 | /* | 282 | /* |
271 | * fill in request structure | 283 | * fill in request structure |
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <linux/mempool.h> | 26 | #include <linux/mempool.h> |
27 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
28 | #include <scsi/sg.h> /* for struct sg_iovec */ | ||
28 | 29 | ||
29 | #define BIO_POOL_SIZE 256 | 30 | #define BIO_POOL_SIZE 256 |
30 | 31 | ||
@@ -549,22 +550,34 @@ out_bmd: | |||
549 | return ERR_PTR(ret); | 550 | return ERR_PTR(ret); |
550 | } | 551 | } |
551 | 552 | ||
552 | static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, | 553 | static struct bio *__bio_map_user_iov(request_queue_t *q, |
553 | unsigned long uaddr, unsigned int len, | 554 | struct block_device *bdev, |
554 | int write_to_vm) | 555 | struct sg_iovec *iov, int iov_count, |
556 | int write_to_vm) | ||
555 | { | 557 | { |
556 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 558 | int i, j; |
557 | unsigned long start = uaddr >> PAGE_SHIFT; | 559 | int nr_pages = 0; |
558 | const int nr_pages = end - start; | ||
559 | int ret, offset, i; | ||
560 | struct page **pages; | 560 | struct page **pages; |
561 | struct bio *bio; | 561 | struct bio *bio; |
562 | int cur_page = 0; | ||
563 | int ret, offset; | ||
562 | 564 | ||
563 | /* | 565 | for (i = 0; i < iov_count; i++) { |
564 | * transfer and buffer must be aligned to at least hardsector | 566 | unsigned long uaddr = (unsigned long)iov[i].iov_base; |
565 | * size for now, in the future we can relax this restriction | 567 | unsigned long len = iov[i].iov_len; |
566 | */ | 568 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
567 | if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) | 569 | unsigned long start = uaddr >> PAGE_SHIFT; |
570 | |||
571 | nr_pages += end - start; | ||
572 | /* | ||
573 | * transfer and buffer must be aligned to at least hardsector | ||
574 | * size for now, in the future we can relax this restriction | ||
575 | */ | ||
576 | if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) | ||
577 | return ERR_PTR(-EINVAL); | ||
578 | } | ||
579 | |||
580 | if (!nr_pages) | ||
568 | return ERR_PTR(-EINVAL); | 581 | return ERR_PTR(-EINVAL); |
569 | 582 | ||
570 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 583 | bio = bio_alloc(GFP_KERNEL, nr_pages); |
@@ -576,42 +589,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, | |||
576 | if (!pages) | 589 | if (!pages) |
577 | goto out; | 590 | goto out; |
578 | 591 | ||
579 | down_read(¤t->mm->mmap_sem); | 592 | memset(pages, 0, nr_pages * sizeof(struct page *)); |
580 | ret = get_user_pages(current, current->mm, uaddr, nr_pages, | 593 | |
581 | write_to_vm, 0, pages, NULL); | 594 | for (i = 0; i < iov_count; i++) { |
582 | up_read(¤t->mm->mmap_sem); | 595 | unsigned long uaddr = (unsigned long)iov[i].iov_base; |
583 | 596 | unsigned long len = iov[i].iov_len; | |
584 | if (ret < nr_pages) | 597 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
585 | goto out; | 598 | unsigned long start = uaddr >> PAGE_SHIFT; |
586 | 599 | const int local_nr_pages = end - start; | |
587 | bio->bi_bdev = bdev; | 600 | const int page_limit = cur_page + local_nr_pages; |
588 | 601 | ||
589 | offset = uaddr & ~PAGE_MASK; | 602 | down_read(¤t->mm->mmap_sem); |
590 | for (i = 0; i < nr_pages; i++) { | 603 | ret = get_user_pages(current, current->mm, uaddr, |
591 | unsigned int bytes = PAGE_SIZE - offset; | 604 | local_nr_pages, |
592 | 605 | write_to_vm, 0, &pages[cur_page], NULL); | |
593 | if (len <= 0) | 606 | up_read(¤t->mm->mmap_sem); |
594 | break; | 607 | |
595 | 608 | if (ret < local_nr_pages) | |
596 | if (bytes > len) | 609 | goto out_unmap; |
597 | bytes = len; | 610 | |
611 | |||
612 | offset = uaddr & ~PAGE_MASK; | ||
613 | for (j = cur_page; j < page_limit; j++) { | ||
614 | unsigned int bytes = PAGE_SIZE - offset; | ||
615 | |||
616 | if (len <= 0) | ||
617 | break; | ||
618 | |||
619 | if (bytes > len) | ||
620 | bytes = len; | ||
621 | |||
622 | /* | ||
623 | * sorry... | ||
624 | */ | ||
625 | if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes) | ||
626 | break; | ||
627 | |||
628 | len -= bytes; | ||
629 | offset = 0; | ||
630 | } | ||
598 | 631 | ||
632 | cur_page = j; | ||
599 | /* | 633 | /* |
600 | * sorry... | 634 | * release the pages we didn't map into the bio, if any |
601 | */ | 635 | */ |
602 | if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes) | 636 | while (j < page_limit) |
603 | break; | 637 | page_cache_release(pages[j++]); |
604 | |||
605 | len -= bytes; | ||
606 | offset = 0; | ||
607 | } | 638 | } |
608 | 639 | ||
609 | /* | ||
610 | * release the pages we didn't map into the bio, if any | ||
611 | */ | ||
612 | while (i < nr_pages) | ||
613 | page_cache_release(pages[i++]); | ||
614 | |||
615 | kfree(pages); | 640 | kfree(pages); |
616 | 641 | ||
617 | /* | 642 | /* |
@@ -620,9 +645,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, | |||
620 | if (!write_to_vm) | 645 | if (!write_to_vm) |
621 | bio->bi_rw |= (1 << BIO_RW); | 646 | bio->bi_rw |= (1 << BIO_RW); |
622 | 647 | ||
648 | bio->bi_bdev = bdev; | ||
623 | bio->bi_flags |= (1 << BIO_USER_MAPPED); | 649 | bio->bi_flags |= (1 << BIO_USER_MAPPED); |
624 | return bio; | 650 | return bio; |
625 | out: | 651 | |
652 | out_unmap: | ||
653 | for (i = 0; i < nr_pages; i++) { | ||
654 | if(!pages[i]) | ||
655 | break; | ||
656 | page_cache_release(pages[i]); | ||
657 | } | ||
658 | out: | ||
626 | kfree(pages); | 659 | kfree(pages); |
627 | bio_put(bio); | 660 | bio_put(bio); |
628 | return ERR_PTR(ret); | 661 | return ERR_PTR(ret); |
@@ -642,9 +675,33 @@ out: | |||
642 | struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, | 675 | struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, |
643 | unsigned long uaddr, unsigned int len, int write_to_vm) | 676 | unsigned long uaddr, unsigned int len, int write_to_vm) |
644 | { | 677 | { |
678 | struct sg_iovec iov; | ||
679 | |||
680 | iov.iov_base = (__user void *)uaddr; | ||
681 | iov.iov_len = len; | ||
682 | |||
683 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); | ||
684 | } | ||
685 | |||
686 | /** | ||
687 | * bio_map_user_iov - map user sg_iovec table into bio | ||
688 | * @q: the request_queue_t for the bio | ||
689 | * @bdev: destination block device | ||
690 | * @iov: the iovec. | ||
691 | * @iov_count: number of elements in the iovec | ||
692 | * @write_to_vm: bool indicating writing to pages or not | ||
693 | * | ||
694 | * Map the user space address into a bio suitable for io to a block | ||
695 | * device. Returns an error pointer in case of error. | ||
696 | */ | ||
697 | struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev, | ||
698 | struct sg_iovec *iov, int iov_count, | ||
699 | int write_to_vm) | ||
700 | { | ||
645 | struct bio *bio; | 701 | struct bio *bio; |
702 | int len = 0, i; | ||
646 | 703 | ||
647 | bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); | 704 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); |
648 | 705 | ||
649 | if (IS_ERR(bio)) | 706 | if (IS_ERR(bio)) |
650 | return bio; | 707 | return bio; |
@@ -657,6 +714,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, | |||
657 | */ | 714 | */ |
658 | bio_get(bio); | 715 | bio_get(bio); |
659 | 716 | ||
717 | for (i = 0; i < iov_count; i++) | ||
718 | len += iov[i].iov_len; | ||
719 | |||
660 | if (bio->bi_size == len) | 720 | if (bio->bi_size == len) |
661 | return bio; | 721 | return bio; |
662 | 722 | ||
diff --git a/include/linux/bio.h b/include/linux/bio.h index 1dd2bc2e84ae..ebcd03ba2e20 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -281,6 +281,10 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); | |||
281 | extern int bio_get_nr_vecs(struct block_device *); | 281 | extern int bio_get_nr_vecs(struct block_device *); |
282 | extern struct bio *bio_map_user(struct request_queue *, struct block_device *, | 282 | extern struct bio *bio_map_user(struct request_queue *, struct block_device *, |
283 | unsigned long, unsigned int, int); | 283 | unsigned long, unsigned int, int); |
284 | struct sg_iovec; | ||
285 | extern struct bio *bio_map_user_iov(struct request_queue *, | ||
286 | struct block_device *, | ||
287 | struct sg_iovec *, int, int); | ||
284 | extern void bio_unmap_user(struct bio *); | 288 | extern void bio_unmap_user(struct bio *); |
285 | extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, | 289 | extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, |
286 | unsigned int); | 290 | unsigned int); |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fc0dce078616..0430ea3e5f2e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -561,6 +561,7 @@ extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); | |||
561 | extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); | 561 | extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); |
562 | extern int blk_rq_unmap_user(struct bio *, unsigned int); | 562 | extern int blk_rq_unmap_user(struct bio *, unsigned int); |
563 | extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); | 563 | extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); |
564 | extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); | ||
564 | extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); | 565 | extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); |
565 | 566 | ||
566 | static inline request_queue_t *bdev_get_queue(struct block_device *bdev) | 567 | static inline request_queue_t *bdev_get_queue(struct block_device *bdev) |