diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 17:13:23 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 17:13:23 -0500 |
commit | 3e12cefbe143b4947171ff92dd50024c4841e291 (patch) | |
tree | f58ec23a4092576ed08843cca5f5443a32106bd1 /block | |
parent | 6bec0035286119eefc32a5b1102127e6a4032cb2 (diff) | |
parent | d427e3c82ef4fc5fbb22c0cef0b040e6767b1028 (diff) |
Merge branch 'for-3.20/core' of git://git.kernel.dk/linux-block
Pull core block IO changes from Jens Axboe:
"This contains:
- A series from Christoph that cleans up and refactors various parts
of the REQ_BLOCK_PC handling. Contributions in that series from
Dongsu Park and Kent Overstreet as well.
- CFQ:
- A bug fix for cfq for realtime IO scheduling from Jeff Moyer.
- A stable patch fixing a potential crash in CFQ in OOM
situations. From Konstantin Khlebnikov.
- blk-mq:
- Add support for tag allocation policies, from Shaohua. This is
a prep patch enabling libata (and other SCSI parts) to use the
blk-mq tagging, instead of rolling their own.
- Various little tweaks from Keith and Mike, in preparation for
DM blk-mq support.
- Minor little fixes or tweaks from me.
- A double free error fix from Tony Battersby.
- The partition 4k issue fixes from Matthew and Boaz.
- Add support for zero+unprovision for blkdev_issue_zeroout() from
Martin"
* 'for-3.20/core' of git://git.kernel.dk/linux-block: (27 commits)
block: remove unused function blk_bio_map_sg
block: handle the null_mapped flag correctly in blk_rq_map_user_iov
blk-mq: fix double-free in error path
block: prevent request-to-request merging with gaps if not allowed
blk-mq: make blk_mq_run_queues() static
dm: fix multipath regression due to initializing wrong request
cfq-iosched: handle failure of cfq group allocation
block: Quiesce zeroout wrapper
block: rewrite and split __bio_copy_iov()
block: merge __bio_map_user_iov into bio_map_user_iov
block: merge __bio_map_kern into bio_map_kern
block: pass iov_iter to the BLOCK_PC mapping functions
block: add a helper to free bio bounce buffer pages
block: use blk_rq_map_user_iov to implement blk_rq_map_user
block: simplify bio_map_kern
block: mark blk-mq devices as stackable
block: keep established cmd_flags when cloning into a blk-mq request
block: add blk-mq support to blk_insert_cloned_request()
block: require blk_rq_prep_clone() be given an initialized clone request
blk-mq: add tag allocation policy
...
Diffstat (limited to 'block')
-rw-r--r-- | block/bio.c | 426 | ||||
-rw-r--r-- | block/blk-core.c | 11 | ||||
-rw-r--r-- | block/blk-lib.c | 30 | ||||
-rw-r--r-- | block/blk-map.c | 172 | ||||
-rw-r--r-- | block/blk-merge.c | 41 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 81 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 4 | ||||
-rw-r--r-- | block/blk-mq.c | 13 | ||||
-rw-r--r-- | block/blk-tag.c | 33 | ||||
-rw-r--r-- | block/cfq-iosched.c | 16 | ||||
-rw-r--r-- | block/ioctl.c | 2 | ||||
-rw-r--r-- | block/partitions/check.c | 12 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 17 |
13 files changed, 372 insertions, 486 deletions
diff --git a/block/bio.c b/block/bio.c index 471d7382c7d1..f66a4eae16ee 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/mempool.h> | 28 | #include <linux/mempool.h> |
29 | #include <linux/workqueue.h> | 29 | #include <linux/workqueue.h> |
30 | #include <linux/cgroup.h> | 30 | #include <linux/cgroup.h> |
31 | #include <scsi/sg.h> /* for struct sg_iovec */ | ||
32 | 31 | ||
33 | #include <trace/events/block.h> | 32 | #include <trace/events/block.h> |
34 | 33 | ||
@@ -1022,21 +1021,11 @@ void bio_copy_data(struct bio *dst, struct bio *src) | |||
1022 | EXPORT_SYMBOL(bio_copy_data); | 1021 | EXPORT_SYMBOL(bio_copy_data); |
1023 | 1022 | ||
1024 | struct bio_map_data { | 1023 | struct bio_map_data { |
1025 | int nr_sgvecs; | ||
1026 | int is_our_pages; | 1024 | int is_our_pages; |
1027 | struct sg_iovec sgvecs[]; | 1025 | struct iov_iter iter; |
1026 | struct iovec iov[]; | ||
1028 | }; | 1027 | }; |
1029 | 1028 | ||
1030 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, | ||
1031 | const struct sg_iovec *iov, int iov_count, | ||
1032 | int is_our_pages) | ||
1033 | { | ||
1034 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); | ||
1035 | bmd->nr_sgvecs = iov_count; | ||
1036 | bmd->is_our_pages = is_our_pages; | ||
1037 | bio->bi_private = bmd; | ||
1038 | } | ||
1039 | |||
1040 | static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, | 1029 | static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, |
1041 | gfp_t gfp_mask) | 1030 | gfp_t gfp_mask) |
1042 | { | 1031 | { |
@@ -1044,85 +1033,101 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, | |||
1044 | return NULL; | 1033 | return NULL; |
1045 | 1034 | ||
1046 | return kmalloc(sizeof(struct bio_map_data) + | 1035 | return kmalloc(sizeof(struct bio_map_data) + |
1047 | sizeof(struct sg_iovec) * iov_count, gfp_mask); | 1036 | sizeof(struct iovec) * iov_count, gfp_mask); |
1048 | } | 1037 | } |
1049 | 1038 | ||
1050 | static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count, | 1039 | /** |
1051 | int to_user, int from_user, int do_free_page) | 1040 | * bio_copy_from_iter - copy all pages from iov_iter to bio |
1041 | * @bio: The &struct bio which describes the I/O as destination | ||
1042 | * @iter: iov_iter as source | ||
1043 | * | ||
1044 | * Copy all pages from iov_iter to bio. | ||
1045 | * Returns 0 on success, or error on failure. | ||
1046 | */ | ||
1047 | static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter) | ||
1052 | { | 1048 | { |
1053 | int ret = 0, i; | 1049 | int i; |
1054 | struct bio_vec *bvec; | 1050 | struct bio_vec *bvec; |
1055 | int iov_idx = 0; | ||
1056 | unsigned int iov_off = 0; | ||
1057 | 1051 | ||
1058 | bio_for_each_segment_all(bvec, bio, i) { | 1052 | bio_for_each_segment_all(bvec, bio, i) { |
1059 | char *bv_addr = page_address(bvec->bv_page); | 1053 | ssize_t ret; |
1060 | unsigned int bv_len = bvec->bv_len; | ||
1061 | 1054 | ||
1062 | while (bv_len && iov_idx < iov_count) { | 1055 | ret = copy_page_from_iter(bvec->bv_page, |
1063 | unsigned int bytes; | 1056 | bvec->bv_offset, |
1064 | char __user *iov_addr; | 1057 | bvec->bv_len, |
1058 | &iter); | ||
1065 | 1059 | ||
1066 | bytes = min_t(unsigned int, | 1060 | if (!iov_iter_count(&iter)) |
1067 | iov[iov_idx].iov_len - iov_off, bv_len); | 1061 | break; |
1068 | iov_addr = iov[iov_idx].iov_base + iov_off; | ||
1069 | 1062 | ||
1070 | if (!ret) { | 1063 | if (ret < bvec->bv_len) |
1071 | if (to_user) | 1064 | return -EFAULT; |
1072 | ret = copy_to_user(iov_addr, bv_addr, | 1065 | } |
1073 | bytes); | ||
1074 | 1066 | ||
1075 | if (from_user) | 1067 | return 0; |
1076 | ret = copy_from_user(bv_addr, iov_addr, | 1068 | } |
1077 | bytes); | ||
1078 | 1069 | ||
1079 | if (ret) | 1070 | /** |
1080 | ret = -EFAULT; | 1071 | * bio_copy_to_iter - copy all pages from bio to iov_iter |
1081 | } | 1072 | * @bio: The &struct bio which describes the I/O as source |
1073 | * @iter: iov_iter as destination | ||
1074 | * | ||
1075 | * Copy all pages from bio to iov_iter. | ||
1076 | * Returns 0 on success, or error on failure. | ||
1077 | */ | ||
1078 | static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) | ||
1079 | { | ||
1080 | int i; | ||
1081 | struct bio_vec *bvec; | ||
1082 | 1082 | ||
1083 | bv_len -= bytes; | 1083 | bio_for_each_segment_all(bvec, bio, i) { |
1084 | bv_addr += bytes; | 1084 | ssize_t ret; |
1085 | iov_addr += bytes; | ||
1086 | iov_off += bytes; | ||
1087 | 1085 | ||
1088 | if (iov[iov_idx].iov_len == iov_off) { | 1086 | ret = copy_page_to_iter(bvec->bv_page, |
1089 | iov_idx++; | 1087 | bvec->bv_offset, |
1090 | iov_off = 0; | 1088 | bvec->bv_len, |
1091 | } | 1089 | &iter); |
1092 | } | 1090 | |
1091 | if (!iov_iter_count(&iter)) | ||
1092 | break; | ||
1093 | 1093 | ||
1094 | if (do_free_page) | 1094 | if (ret < bvec->bv_len) |
1095 | __free_page(bvec->bv_page); | 1095 | return -EFAULT; |
1096 | } | 1096 | } |
1097 | 1097 | ||
1098 | return ret; | 1098 | return 0; |
1099 | } | ||
1100 | |||
1101 | static void bio_free_pages(struct bio *bio) | ||
1102 | { | ||
1103 | struct bio_vec *bvec; | ||
1104 | int i; | ||
1105 | |||
1106 | bio_for_each_segment_all(bvec, bio, i) | ||
1107 | __free_page(bvec->bv_page); | ||
1099 | } | 1108 | } |
1100 | 1109 | ||
1101 | /** | 1110 | /** |
1102 | * bio_uncopy_user - finish previously mapped bio | 1111 | * bio_uncopy_user - finish previously mapped bio |
1103 | * @bio: bio being terminated | 1112 | * @bio: bio being terminated |
1104 | * | 1113 | * |
1105 | * Free pages allocated from bio_copy_user() and write back data | 1114 | * Free pages allocated from bio_copy_user_iov() and write back data |
1106 | * to user space in case of a read. | 1115 | * to user space in case of a read. |
1107 | */ | 1116 | */ |
1108 | int bio_uncopy_user(struct bio *bio) | 1117 | int bio_uncopy_user(struct bio *bio) |
1109 | { | 1118 | { |
1110 | struct bio_map_data *bmd = bio->bi_private; | 1119 | struct bio_map_data *bmd = bio->bi_private; |
1111 | struct bio_vec *bvec; | 1120 | int ret = 0; |
1112 | int ret = 0, i; | ||
1113 | 1121 | ||
1114 | if (!bio_flagged(bio, BIO_NULL_MAPPED)) { | 1122 | if (!bio_flagged(bio, BIO_NULL_MAPPED)) { |
1115 | /* | 1123 | /* |
1116 | * if we're in a workqueue, the request is orphaned, so | 1124 | * if we're in a workqueue, the request is orphaned, so |
1117 | * don't copy into a random user address space, just free. | 1125 | * don't copy into a random user address space, just free. |
1118 | */ | 1126 | */ |
1119 | if (current->mm) | 1127 | if (current->mm && bio_data_dir(bio) == READ) |
1120 | ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, | 1128 | ret = bio_copy_to_iter(bio, bmd->iter); |
1121 | bio_data_dir(bio) == READ, | 1129 | if (bmd->is_our_pages) |
1122 | 0, bmd->is_our_pages); | 1130 | bio_free_pages(bio); |
1123 | else if (bmd->is_our_pages) | ||
1124 | bio_for_each_segment_all(bvec, bio, i) | ||
1125 | __free_page(bvec->bv_page); | ||
1126 | } | 1131 | } |
1127 | kfree(bmd); | 1132 | kfree(bmd); |
1128 | bio_put(bio); | 1133 | bio_put(bio); |
@@ -1132,12 +1137,10 @@ EXPORT_SYMBOL(bio_uncopy_user); | |||
1132 | 1137 | ||
1133 | /** | 1138 | /** |
1134 | * bio_copy_user_iov - copy user data to bio | 1139 | * bio_copy_user_iov - copy user data to bio |
1135 | * @q: destination block queue | 1140 | * @q: destination block queue |
1136 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | 1141 | * @map_data: pointer to the rq_map_data holding pages (if necessary) |
1137 | * @iov: the iovec. | 1142 | * @iter: iovec iterator |
1138 | * @iov_count: number of elements in the iovec | 1143 | * @gfp_mask: memory allocation flags |
1139 | * @write_to_vm: bool indicating writing to pages or not | ||
1140 | * @gfp_mask: memory allocation flags | ||
1141 | * | 1144 | * |
1142 | * Prepares and returns a bio for indirect user io, bouncing data | 1145 | * Prepares and returns a bio for indirect user io, bouncing data |
1143 | * to/from kernel pages as necessary. Must be paired with | 1146 | * to/from kernel pages as necessary. Must be paired with |
@@ -1145,25 +1148,25 @@ EXPORT_SYMBOL(bio_uncopy_user); | |||
1145 | */ | 1148 | */ |
1146 | struct bio *bio_copy_user_iov(struct request_queue *q, | 1149 | struct bio *bio_copy_user_iov(struct request_queue *q, |
1147 | struct rq_map_data *map_data, | 1150 | struct rq_map_data *map_data, |
1148 | const struct sg_iovec *iov, int iov_count, | 1151 | const struct iov_iter *iter, |
1149 | int write_to_vm, gfp_t gfp_mask) | 1152 | gfp_t gfp_mask) |
1150 | { | 1153 | { |
1151 | struct bio_map_data *bmd; | 1154 | struct bio_map_data *bmd; |
1152 | struct bio_vec *bvec; | ||
1153 | struct page *page; | 1155 | struct page *page; |
1154 | struct bio *bio; | 1156 | struct bio *bio; |
1155 | int i, ret; | 1157 | int i, ret; |
1156 | int nr_pages = 0; | 1158 | int nr_pages = 0; |
1157 | unsigned int len = 0; | 1159 | unsigned int len = iter->count; |
1158 | unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0; | 1160 | unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0; |
1159 | 1161 | ||
1160 | for (i = 0; i < iov_count; i++) { | 1162 | for (i = 0; i < iter->nr_segs; i++) { |
1161 | unsigned long uaddr; | 1163 | unsigned long uaddr; |
1162 | unsigned long end; | 1164 | unsigned long end; |
1163 | unsigned long start; | 1165 | unsigned long start; |
1164 | 1166 | ||
1165 | uaddr = (unsigned long)iov[i].iov_base; | 1167 | uaddr = (unsigned long) iter->iov[i].iov_base; |
1166 | end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1168 | end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1) |
1169 | >> PAGE_SHIFT; | ||
1167 | start = uaddr >> PAGE_SHIFT; | 1170 | start = uaddr >> PAGE_SHIFT; |
1168 | 1171 | ||
1169 | /* | 1172 | /* |
@@ -1173,22 +1176,31 @@ struct bio *bio_copy_user_iov(struct request_queue *q, | |||
1173 | return ERR_PTR(-EINVAL); | 1176 | return ERR_PTR(-EINVAL); |
1174 | 1177 | ||
1175 | nr_pages += end - start; | 1178 | nr_pages += end - start; |
1176 | len += iov[i].iov_len; | ||
1177 | } | 1179 | } |
1178 | 1180 | ||
1179 | if (offset) | 1181 | if (offset) |
1180 | nr_pages++; | 1182 | nr_pages++; |
1181 | 1183 | ||
1182 | bmd = bio_alloc_map_data(iov_count, gfp_mask); | 1184 | bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask); |
1183 | if (!bmd) | 1185 | if (!bmd) |
1184 | return ERR_PTR(-ENOMEM); | 1186 | return ERR_PTR(-ENOMEM); |
1185 | 1187 | ||
1188 | /* | ||
1189 | * We need to do a deep copy of the iov_iter including the iovecs. | ||
1190 | * The caller provided iov might point to an on-stack or otherwise | ||
1191 | * shortlived one. | ||
1192 | */ | ||
1193 | bmd->is_our_pages = map_data ? 0 : 1; | ||
1194 | memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs); | ||
1195 | iov_iter_init(&bmd->iter, iter->type, bmd->iov, | ||
1196 | iter->nr_segs, iter->count); | ||
1197 | |||
1186 | ret = -ENOMEM; | 1198 | ret = -ENOMEM; |
1187 | bio = bio_kmalloc(gfp_mask, nr_pages); | 1199 | bio = bio_kmalloc(gfp_mask, nr_pages); |
1188 | if (!bio) | 1200 | if (!bio) |
1189 | goto out_bmd; | 1201 | goto out_bmd; |
1190 | 1202 | ||
1191 | if (!write_to_vm) | 1203 | if (iter->type & WRITE) |
1192 | bio->bi_rw |= REQ_WRITE; | 1204 | bio->bi_rw |= REQ_WRITE; |
1193 | 1205 | ||
1194 | ret = 0; | 1206 | ret = 0; |
@@ -1236,20 +1248,18 @@ struct bio *bio_copy_user_iov(struct request_queue *q, | |||
1236 | /* | 1248 | /* |
1237 | * success | 1249 | * success |
1238 | */ | 1250 | */ |
1239 | if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || | 1251 | if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || |
1240 | (map_data && map_data->from_user)) { | 1252 | (map_data && map_data->from_user)) { |
1241 | ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0); | 1253 | ret = bio_copy_from_iter(bio, *iter); |
1242 | if (ret) | 1254 | if (ret) |
1243 | goto cleanup; | 1255 | goto cleanup; |
1244 | } | 1256 | } |
1245 | 1257 | ||
1246 | bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); | 1258 | bio->bi_private = bmd; |
1247 | return bio; | 1259 | return bio; |
1248 | cleanup: | 1260 | cleanup: |
1249 | if (!map_data) | 1261 | if (!map_data) |
1250 | bio_for_each_segment_all(bvec, bio, i) | 1262 | bio_free_pages(bio); |
1251 | __free_page(bvec->bv_page); | ||
1252 | |||
1253 | bio_put(bio); | 1263 | bio_put(bio); |
1254 | out_bmd: | 1264 | out_bmd: |
1255 | kfree(bmd); | 1265 | kfree(bmd); |
@@ -1257,46 +1267,30 @@ out_bmd: | |||
1257 | } | 1267 | } |
1258 | 1268 | ||
1259 | /** | 1269 | /** |
1260 | * bio_copy_user - copy user data to bio | 1270 | * bio_map_user_iov - map user iovec into bio |
1261 | * @q: destination block queue | 1271 | * @q: the struct request_queue for the bio |
1262 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | 1272 | * @iter: iovec iterator |
1263 | * @uaddr: start of user address | 1273 | * @gfp_mask: memory allocation flags |
1264 | * @len: length in bytes | ||
1265 | * @write_to_vm: bool indicating writing to pages or not | ||
1266 | * @gfp_mask: memory allocation flags | ||
1267 | * | 1274 | * |
1268 | * Prepares and returns a bio for indirect user io, bouncing data | 1275 | * Map the user space address into a bio suitable for io to a block |
1269 | * to/from kernel pages as necessary. Must be paired with | 1276 | * device. Returns an error pointer in case of error. |
1270 | * call bio_uncopy_user() on io completion. | ||
1271 | */ | 1277 | */ |
1272 | struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, | 1278 | struct bio *bio_map_user_iov(struct request_queue *q, |
1273 | unsigned long uaddr, unsigned int len, | 1279 | const struct iov_iter *iter, |
1274 | int write_to_vm, gfp_t gfp_mask) | 1280 | gfp_t gfp_mask) |
1275 | { | 1281 | { |
1276 | struct sg_iovec iov; | 1282 | int j; |
1277 | |||
1278 | iov.iov_base = (void __user *)uaddr; | ||
1279 | iov.iov_len = len; | ||
1280 | |||
1281 | return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); | ||
1282 | } | ||
1283 | EXPORT_SYMBOL(bio_copy_user); | ||
1284 | |||
1285 | static struct bio *__bio_map_user_iov(struct request_queue *q, | ||
1286 | struct block_device *bdev, | ||
1287 | const struct sg_iovec *iov, int iov_count, | ||
1288 | int write_to_vm, gfp_t gfp_mask) | ||
1289 | { | ||
1290 | int i, j; | ||
1291 | int nr_pages = 0; | 1283 | int nr_pages = 0; |
1292 | struct page **pages; | 1284 | struct page **pages; |
1293 | struct bio *bio; | 1285 | struct bio *bio; |
1294 | int cur_page = 0; | 1286 | int cur_page = 0; |
1295 | int ret, offset; | 1287 | int ret, offset; |
1288 | struct iov_iter i; | ||
1289 | struct iovec iov; | ||
1296 | 1290 | ||
1297 | for (i = 0; i < iov_count; i++) { | 1291 | iov_for_each(iov, i, *iter) { |
1298 | unsigned long uaddr = (unsigned long)iov[i].iov_base; | 1292 | unsigned long uaddr = (unsigned long) iov.iov_base; |
1299 | unsigned long len = iov[i].iov_len; | 1293 | unsigned long len = iov.iov_len; |
1300 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1294 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1301 | unsigned long start = uaddr >> PAGE_SHIFT; | 1295 | unsigned long start = uaddr >> PAGE_SHIFT; |
1302 | 1296 | ||
@@ -1326,16 +1320,17 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
1326 | if (!pages) | 1320 | if (!pages) |
1327 | goto out; | 1321 | goto out; |
1328 | 1322 | ||
1329 | for (i = 0; i < iov_count; i++) { | 1323 | iov_for_each(iov, i, *iter) { |
1330 | unsigned long uaddr = (unsigned long)iov[i].iov_base; | 1324 | unsigned long uaddr = (unsigned long) iov.iov_base; |
1331 | unsigned long len = iov[i].iov_len; | 1325 | unsigned long len = iov.iov_len; |
1332 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1326 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1333 | unsigned long start = uaddr >> PAGE_SHIFT; | 1327 | unsigned long start = uaddr >> PAGE_SHIFT; |
1334 | const int local_nr_pages = end - start; | 1328 | const int local_nr_pages = end - start; |
1335 | const int page_limit = cur_page + local_nr_pages; | 1329 | const int page_limit = cur_page + local_nr_pages; |
1336 | 1330 | ||
1337 | ret = get_user_pages_fast(uaddr, local_nr_pages, | 1331 | ret = get_user_pages_fast(uaddr, local_nr_pages, |
1338 | write_to_vm, &pages[cur_page]); | 1332 | (iter->type & WRITE) != WRITE, |
1333 | &pages[cur_page]); | ||
1339 | if (ret < local_nr_pages) { | 1334 | if (ret < local_nr_pages) { |
1340 | ret = -EFAULT; | 1335 | ret = -EFAULT; |
1341 | goto out_unmap; | 1336 | goto out_unmap; |
@@ -1375,72 +1370,10 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
1375 | /* | 1370 | /* |
1376 | * set data direction, and check if mapped pages need bouncing | 1371 | * set data direction, and check if mapped pages need bouncing |
1377 | */ | 1372 | */ |
1378 | if (!write_to_vm) | 1373 | if (iter->type & WRITE) |
1379 | bio->bi_rw |= REQ_WRITE; | 1374 | bio->bi_rw |= REQ_WRITE; |
1380 | 1375 | ||
1381 | bio->bi_bdev = bdev; | ||
1382 | bio->bi_flags |= (1 << BIO_USER_MAPPED); | 1376 | bio->bi_flags |= (1 << BIO_USER_MAPPED); |
1383 | return bio; | ||
1384 | |||
1385 | out_unmap: | ||
1386 | for (i = 0; i < nr_pages; i++) { | ||
1387 | if(!pages[i]) | ||
1388 | break; | ||
1389 | page_cache_release(pages[i]); | ||
1390 | } | ||
1391 | out: | ||
1392 | kfree(pages); | ||
1393 | bio_put(bio); | ||
1394 | return ERR_PTR(ret); | ||
1395 | } | ||
1396 | |||
1397 | /** | ||
1398 | * bio_map_user - map user address into bio | ||
1399 | * @q: the struct request_queue for the bio | ||
1400 | * @bdev: destination block device | ||
1401 | * @uaddr: start of user address | ||
1402 | * @len: length in bytes | ||
1403 | * @write_to_vm: bool indicating writing to pages or not | ||
1404 | * @gfp_mask: memory allocation flags | ||
1405 | * | ||
1406 | * Map the user space address into a bio suitable for io to a block | ||
1407 | * device. Returns an error pointer in case of error. | ||
1408 | */ | ||
1409 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | ||
1410 | unsigned long uaddr, unsigned int len, int write_to_vm, | ||
1411 | gfp_t gfp_mask) | ||
1412 | { | ||
1413 | struct sg_iovec iov; | ||
1414 | |||
1415 | iov.iov_base = (void __user *)uaddr; | ||
1416 | iov.iov_len = len; | ||
1417 | |||
1418 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); | ||
1419 | } | ||
1420 | EXPORT_SYMBOL(bio_map_user); | ||
1421 | |||
1422 | /** | ||
1423 | * bio_map_user_iov - map user sg_iovec table into bio | ||
1424 | * @q: the struct request_queue for the bio | ||
1425 | * @bdev: destination block device | ||
1426 | * @iov: the iovec. | ||
1427 | * @iov_count: number of elements in the iovec | ||
1428 | * @write_to_vm: bool indicating writing to pages or not | ||
1429 | * @gfp_mask: memory allocation flags | ||
1430 | * | ||
1431 | * Map the user space address into a bio suitable for io to a block | ||
1432 | * device. Returns an error pointer in case of error. | ||
1433 | */ | ||
1434 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, | ||
1435 | const struct sg_iovec *iov, int iov_count, | ||
1436 | int write_to_vm, gfp_t gfp_mask) | ||
1437 | { | ||
1438 | struct bio *bio; | ||
1439 | |||
1440 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, | ||
1441 | gfp_mask); | ||
1442 | if (IS_ERR(bio)) | ||
1443 | return bio; | ||
1444 | 1377 | ||
1445 | /* | 1378 | /* |
1446 | * subtle -- if __bio_map_user() ended up bouncing a bio, | 1379 | * subtle -- if __bio_map_user() ended up bouncing a bio, |
@@ -1449,8 +1382,18 @@ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, | |||
1449 | * reference to it | 1382 | * reference to it |
1450 | */ | 1383 | */ |
1451 | bio_get(bio); | 1384 | bio_get(bio); |
1452 | |||
1453 | return bio; | 1385 | return bio; |
1386 | |||
1387 | out_unmap: | ||
1388 | for (j = 0; j < nr_pages; j++) { | ||
1389 | if (!pages[j]) | ||
1390 | break; | ||
1391 | page_cache_release(pages[j]); | ||
1392 | } | ||
1393 | out: | ||
1394 | kfree(pages); | ||
1395 | bio_put(bio); | ||
1396 | return ERR_PTR(ret); | ||
1454 | } | 1397 | } |
1455 | 1398 | ||
1456 | static void __bio_unmap_user(struct bio *bio) | 1399 | static void __bio_unmap_user(struct bio *bio) |
@@ -1492,8 +1435,18 @@ static void bio_map_kern_endio(struct bio *bio, int err) | |||
1492 | bio_put(bio); | 1435 | bio_put(bio); |
1493 | } | 1436 | } |
1494 | 1437 | ||
1495 | static struct bio *__bio_map_kern(struct request_queue *q, void *data, | 1438 | /** |
1496 | unsigned int len, gfp_t gfp_mask) | 1439 | * bio_map_kern - map kernel address into bio |
1440 | * @q: the struct request_queue for the bio | ||
1441 | * @data: pointer to buffer to map | ||
1442 | * @len: length in bytes | ||
1443 | * @gfp_mask: allocation flags for bio allocation | ||
1444 | * | ||
1445 | * Map the kernel address into a bio suitable for io to a block | ||
1446 | * device. Returns an error pointer in case of error. | ||
1447 | */ | ||
1448 | struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, | ||
1449 | gfp_t gfp_mask) | ||
1497 | { | 1450 | { |
1498 | unsigned long kaddr = (unsigned long)data; | 1451 | unsigned long kaddr = (unsigned long)data; |
1499 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1452 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
@@ -1517,8 +1470,11 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data, | |||
1517 | bytes = len; | 1470 | bytes = len; |
1518 | 1471 | ||
1519 | if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, | 1472 | if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, |
1520 | offset) < bytes) | 1473 | offset) < bytes) { |
1521 | break; | 1474 | /* we don't support partial mappings */ |
1475 | bio_put(bio); | ||
1476 | return ERR_PTR(-EINVAL); | ||
1477 | } | ||
1522 | 1478 | ||
1523 | data += bytes; | 1479 | data += bytes; |
1524 | len -= bytes; | 1480 | len -= bytes; |
@@ -1528,57 +1484,26 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data, | |||
1528 | bio->bi_end_io = bio_map_kern_endio; | 1484 | bio->bi_end_io = bio_map_kern_endio; |
1529 | return bio; | 1485 | return bio; |
1530 | } | 1486 | } |
1487 | EXPORT_SYMBOL(bio_map_kern); | ||
1531 | 1488 | ||
1532 | /** | 1489 | static void bio_copy_kern_endio(struct bio *bio, int err) |
1533 | * bio_map_kern - map kernel address into bio | ||
1534 | * @q: the struct request_queue for the bio | ||
1535 | * @data: pointer to buffer to map | ||
1536 | * @len: length in bytes | ||
1537 | * @gfp_mask: allocation flags for bio allocation | ||
1538 | * | ||
1539 | * Map the kernel address into a bio suitable for io to a block | ||
1540 | * device. Returns an error pointer in case of error. | ||
1541 | */ | ||
1542 | struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, | ||
1543 | gfp_t gfp_mask) | ||
1544 | { | 1490 | { |
1545 | struct bio *bio; | 1491 | bio_free_pages(bio); |
1546 | |||
1547 | bio = __bio_map_kern(q, data, len, gfp_mask); | ||
1548 | if (IS_ERR(bio)) | ||
1549 | return bio; | ||
1550 | |||
1551 | if (bio->bi_iter.bi_size == len) | ||
1552 | return bio; | ||
1553 | |||
1554 | /* | ||
1555 | * Don't support partial mappings. | ||
1556 | */ | ||
1557 | bio_put(bio); | 1492 | bio_put(bio); |
1558 | return ERR_PTR(-EINVAL); | ||
1559 | } | 1493 | } |
1560 | EXPORT_SYMBOL(bio_map_kern); | ||
1561 | 1494 | ||
1562 | static void bio_copy_kern_endio(struct bio *bio, int err) | 1495 | static void bio_copy_kern_endio_read(struct bio *bio, int err) |
1563 | { | 1496 | { |
1497 | char *p = bio->bi_private; | ||
1564 | struct bio_vec *bvec; | 1498 | struct bio_vec *bvec; |
1565 | const int read = bio_data_dir(bio) == READ; | ||
1566 | struct bio_map_data *bmd = bio->bi_private; | ||
1567 | int i; | 1499 | int i; |
1568 | char *p = bmd->sgvecs[0].iov_base; | ||
1569 | 1500 | ||
1570 | bio_for_each_segment_all(bvec, bio, i) { | 1501 | bio_for_each_segment_all(bvec, bio, i) { |
1571 | char *addr = page_address(bvec->bv_page); | 1502 | memcpy(p, page_address(bvec->bv_page), bvec->bv_len); |
1572 | |||
1573 | if (read) | ||
1574 | memcpy(p, addr, bvec->bv_len); | ||
1575 | |||
1576 | __free_page(bvec->bv_page); | ||
1577 | p += bvec->bv_len; | 1503 | p += bvec->bv_len; |
1578 | } | 1504 | } |
1579 | 1505 | ||
1580 | kfree(bmd); | 1506 | bio_copy_kern_endio(bio, err); |
1581 | bio_put(bio); | ||
1582 | } | 1507 | } |
1583 | 1508 | ||
1584 | /** | 1509 | /** |
@@ -1595,28 +1520,59 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
1595 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | 1520 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, |
1596 | gfp_t gfp_mask, int reading) | 1521 | gfp_t gfp_mask, int reading) |
1597 | { | 1522 | { |
1523 | unsigned long kaddr = (unsigned long)data; | ||
1524 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
1525 | unsigned long start = kaddr >> PAGE_SHIFT; | ||
1598 | struct bio *bio; | 1526 | struct bio *bio; |
1599 | struct bio_vec *bvec; | 1527 | void *p = data; |
1600 | int i; | 1528 | int nr_pages = 0; |
1529 | |||
1530 | /* | ||
1531 | * Overflow, abort | ||
1532 | */ | ||
1533 | if (end < start) | ||
1534 | return ERR_PTR(-EINVAL); | ||
1601 | 1535 | ||
1602 | bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); | 1536 | nr_pages = end - start; |
1603 | if (IS_ERR(bio)) | 1537 | bio = bio_kmalloc(gfp_mask, nr_pages); |
1604 | return bio; | 1538 | if (!bio) |
1539 | return ERR_PTR(-ENOMEM); | ||
1605 | 1540 | ||
1606 | if (!reading) { | 1541 | while (len) { |
1607 | void *p = data; | 1542 | struct page *page; |
1543 | unsigned int bytes = PAGE_SIZE; | ||
1608 | 1544 | ||
1609 | bio_for_each_segment_all(bvec, bio, i) { | 1545 | if (bytes > len) |
1610 | char *addr = page_address(bvec->bv_page); | 1546 | bytes = len; |
1611 | 1547 | ||
1612 | memcpy(addr, p, bvec->bv_len); | 1548 | page = alloc_page(q->bounce_gfp | gfp_mask); |
1613 | p += bvec->bv_len; | 1549 | if (!page) |
1614 | } | 1550 | goto cleanup; |
1551 | |||
1552 | if (!reading) | ||
1553 | memcpy(page_address(page), p, bytes); | ||
1554 | |||
1555 | if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) | ||
1556 | break; | ||
1557 | |||
1558 | len -= bytes; | ||
1559 | p += bytes; | ||
1615 | } | 1560 | } |
1616 | 1561 | ||
1617 | bio->bi_end_io = bio_copy_kern_endio; | 1562 | if (reading) { |
1563 | bio->bi_end_io = bio_copy_kern_endio_read; | ||
1564 | bio->bi_private = data; | ||
1565 | } else { | ||
1566 | bio->bi_end_io = bio_copy_kern_endio; | ||
1567 | bio->bi_rw |= REQ_WRITE; | ||
1568 | } | ||
1618 | 1569 | ||
1619 | return bio; | 1570 | return bio; |
1571 | |||
1572 | cleanup: | ||
1573 | bio_free_pages(bio); | ||
1574 | bio_put(bio); | ||
1575 | return ERR_PTR(-ENOMEM); | ||
1620 | } | 1576 | } |
1621 | EXPORT_SYMBOL(bio_copy_kern); | 1577 | EXPORT_SYMBOL(bio_copy_kern); |
1622 | 1578 | ||
diff --git a/block/blk-core.c b/block/blk-core.c index 928aac29bccd..794c3e7f01cf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -2048,6 +2048,13 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |||
2048 | should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) | 2048 | should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) |
2049 | return -EIO; | 2049 | return -EIO; |
2050 | 2050 | ||
2051 | if (q->mq_ops) { | ||
2052 | if (blk_queue_io_stat(q)) | ||
2053 | blk_account_io_start(rq, true); | ||
2054 | blk_mq_insert_request(rq, false, true, true); | ||
2055 | return 0; | ||
2056 | } | ||
2057 | |||
2051 | spin_lock_irqsave(q->queue_lock, flags); | 2058 | spin_lock_irqsave(q->queue_lock, flags); |
2052 | if (unlikely(blk_queue_dying(q))) { | 2059 | if (unlikely(blk_queue_dying(q))) { |
2053 | spin_unlock_irqrestore(q->queue_lock, flags); | 2060 | spin_unlock_irqrestore(q->queue_lock, flags); |
@@ -2907,7 +2914,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); | |||
2907 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) | 2914 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) |
2908 | { | 2915 | { |
2909 | dst->cpu = src->cpu; | 2916 | dst->cpu = src->cpu; |
2910 | dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; | 2917 | dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; |
2911 | dst->cmd_type = src->cmd_type; | 2918 | dst->cmd_type = src->cmd_type; |
2912 | dst->__sector = blk_rq_pos(src); | 2919 | dst->__sector = blk_rq_pos(src); |
2913 | dst->__data_len = blk_rq_bytes(src); | 2920 | dst->__data_len = blk_rq_bytes(src); |
@@ -2945,8 +2952,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | |||
2945 | if (!bs) | 2952 | if (!bs) |
2946 | bs = fs_bio_set; | 2953 | bs = fs_bio_set; |
2947 | 2954 | ||
2948 | blk_rq_init(NULL, rq); | ||
2949 | |||
2950 | __rq_for_each_bio(bio_src, rq_src) { | 2955 | __rq_for_each_bio(bio_src, rq_src) { |
2951 | bio = bio_clone_fast(bio_src, gfp_mask, bs); | 2956 | bio = bio_clone_fast(bio_src, gfp_mask, bs); |
2952 | if (!bio) | 2957 | if (!bio) |
diff --git a/block/blk-lib.c b/block/blk-lib.c index 8411be3c19d3..7688ee3f5d72 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -283,24 +283,34 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | |||
283 | * @sector: start sector | 283 | * @sector: start sector |
284 | * @nr_sects: number of sectors to write | 284 | * @nr_sects: number of sectors to write |
285 | * @gfp_mask: memory allocation flags (for bio_alloc) | 285 | * @gfp_mask: memory allocation flags (for bio_alloc) |
286 | * @discard: whether to discard the block range | ||
286 | * | 287 | * |
287 | * Description: | 288 | * Description: |
288 | * Generate and issue number of bios with zerofiled pages. | 289 | * Zero-fill a block range. If the discard flag is set and the block |
290 | * device guarantees that subsequent READ operations to the block range | ||
291 | * in question will return zeroes, the blocks will be discarded. Should | ||
292 | * the discard request fail, if the discard flag is not set, or if | ||
293 | * discard_zeroes_data is not supported, this function will resort to | ||
294 | * zeroing the blocks manually, thus provisioning (allocating, | ||
295 | * anchoring) them. If the block device supports the WRITE SAME command | ||
296 | * blkdev_issue_zeroout() will use it to optimize the process of | ||
297 | * clearing the block range. Otherwise the zeroing will be performed | ||
298 | * using regular WRITE calls. | ||
289 | */ | 299 | */ |
290 | 300 | ||
291 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 301 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
292 | sector_t nr_sects, gfp_t gfp_mask) | 302 | sector_t nr_sects, gfp_t gfp_mask, bool discard) |
293 | { | 303 | { |
294 | if (bdev_write_same(bdev)) { | 304 | struct request_queue *q = bdev_get_queue(bdev); |
295 | unsigned char bdn[BDEVNAME_SIZE]; | ||
296 | 305 | ||
297 | if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, | 306 | if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data && |
298 | ZERO_PAGE(0))) | 307 | blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0) |
299 | return 0; | 308 | return 0; |
300 | 309 | ||
301 | bdevname(bdev, bdn); | 310 | if (bdev_write_same(bdev) && |
302 | pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn); | 311 | blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, |
303 | } | 312 | ZERO_PAGE(0)) == 0) |
313 | return 0; | ||
304 | 314 | ||
305 | return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); | 315 | return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); |
306 | } | 316 | } |
diff --git a/block/blk-map.c b/block/blk-map.c index f890d4345b0c..b8d2725324a6 100644 --- a/block/blk-map.c +++ b/block/blk-map.c | |||
@@ -5,7 +5,7 @@ | |||
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/bio.h> | 6 | #include <linux/bio.h> |
7 | #include <linux/blkdev.h> | 7 | #include <linux/blkdev.h> |
8 | #include <scsi/sg.h> /* for struct sg_iovec */ | 8 | #include <linux/uio.h> |
9 | 9 | ||
10 | #include "blk.h" | 10 | #include "blk.h" |
11 | 11 | ||
@@ -39,138 +39,12 @@ static int __blk_rq_unmap_user(struct bio *bio) | |||
39 | return ret; | 39 | return ret; |
40 | } | 40 | } |
41 | 41 | ||
42 | static int __blk_rq_map_user(struct request_queue *q, struct request *rq, | ||
43 | struct rq_map_data *map_data, void __user *ubuf, | ||
44 | unsigned int len, gfp_t gfp_mask) | ||
45 | { | ||
46 | unsigned long uaddr; | ||
47 | struct bio *bio, *orig_bio; | ||
48 | int reading, ret; | ||
49 | |||
50 | reading = rq_data_dir(rq) == READ; | ||
51 | |||
52 | /* | ||
53 | * if alignment requirement is satisfied, map in user pages for | ||
54 | * direct dma. else, set up kernel bounce buffers | ||
55 | */ | ||
56 | uaddr = (unsigned long) ubuf; | ||
57 | if (blk_rq_aligned(q, uaddr, len) && !map_data) | ||
58 | bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); | ||
59 | else | ||
60 | bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask); | ||
61 | |||
62 | if (IS_ERR(bio)) | ||
63 | return PTR_ERR(bio); | ||
64 | |||
65 | if (map_data && map_data->null_mapped) | ||
66 | bio->bi_flags |= (1 << BIO_NULL_MAPPED); | ||
67 | |||
68 | orig_bio = bio; | ||
69 | blk_queue_bounce(q, &bio); | ||
70 | |||
71 | /* | ||
72 | * We link the bounce buffer in and could have to traverse it | ||
73 | * later so we have to get a ref to prevent it from being freed | ||
74 | */ | ||
75 | bio_get(bio); | ||
76 | |||
77 | ret = blk_rq_append_bio(q, rq, bio); | ||
78 | if (!ret) | ||
79 | return bio->bi_iter.bi_size; | ||
80 | |||
81 | /* if it was boucned we must call the end io function */ | ||
82 | bio_endio(bio, 0); | ||
83 | __blk_rq_unmap_user(orig_bio); | ||
84 | bio_put(bio); | ||
85 | return ret; | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage | ||
90 | * @q: request queue where request should be inserted | ||
91 | * @rq: request structure to fill | ||
92 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
93 | * @ubuf: the user buffer | ||
94 | * @len: length of user data | ||
95 | * @gfp_mask: memory allocation flags | ||
96 | * | ||
97 | * Description: | ||
98 | * Data will be mapped directly for zero copy I/O, if possible. Otherwise | ||
99 | * a kernel bounce buffer is used. | ||
100 | * | ||
101 | * A matching blk_rq_unmap_user() must be issued at the end of I/O, while | ||
102 | * still in process context. | ||
103 | * | ||
104 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | ||
105 | * before being submitted to the device, as pages mapped may be out of | ||
106 | * reach. It's the callers responsibility to make sure this happens. The | ||
107 | * original bio must be passed back in to blk_rq_unmap_user() for proper | ||
108 | * unmapping. | ||
109 | */ | ||
110 | int blk_rq_map_user(struct request_queue *q, struct request *rq, | ||
111 | struct rq_map_data *map_data, void __user *ubuf, | ||
112 | unsigned long len, gfp_t gfp_mask) | ||
113 | { | ||
114 | unsigned long bytes_read = 0; | ||
115 | struct bio *bio = NULL; | ||
116 | int ret; | ||
117 | |||
118 | if (len > (queue_max_hw_sectors(q) << 9)) | ||
119 | return -EINVAL; | ||
120 | if (!len) | ||
121 | return -EINVAL; | ||
122 | |||
123 | if (!ubuf && (!map_data || !map_data->null_mapped)) | ||
124 | return -EINVAL; | ||
125 | |||
126 | while (bytes_read != len) { | ||
127 | unsigned long map_len, end, start; | ||
128 | |||
129 | map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); | ||
130 | end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) | ||
131 | >> PAGE_SHIFT; | ||
132 | start = (unsigned long)ubuf >> PAGE_SHIFT; | ||
133 | |||
134 | /* | ||
135 | * A bad offset could cause us to require BIO_MAX_PAGES + 1 | ||
136 | * pages. If this happens we just lower the requested | ||
137 | * mapping len by a page so that we can fit | ||
138 | */ | ||
139 | if (end - start > BIO_MAX_PAGES) | ||
140 | map_len -= PAGE_SIZE; | ||
141 | |||
142 | ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len, | ||
143 | gfp_mask); | ||
144 | if (ret < 0) | ||
145 | goto unmap_rq; | ||
146 | if (!bio) | ||
147 | bio = rq->bio; | ||
148 | bytes_read += ret; | ||
149 | ubuf += ret; | ||
150 | |||
151 | if (map_data) | ||
152 | map_data->offset += ret; | ||
153 | } | ||
154 | |||
155 | if (!bio_flagged(bio, BIO_USER_MAPPED)) | ||
156 | rq->cmd_flags |= REQ_COPY_USER; | ||
157 | |||
158 | return 0; | ||
159 | unmap_rq: | ||
160 | blk_rq_unmap_user(bio); | ||
161 | rq->bio = NULL; | ||
162 | return ret; | ||
163 | } | ||
164 | EXPORT_SYMBOL(blk_rq_map_user); | ||
165 | |||
166 | /** | 42 | /** |
167 | * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage | 43 | * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage |
168 | * @q: request queue where request should be inserted | 44 | * @q: request queue where request should be inserted |
169 | * @rq: request to map data to | 45 | * @rq: request to map data to |
170 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | 46 | * @map_data: pointer to the rq_map_data holding pages (if necessary) |
171 | * @iov: pointer to the iovec | 47 | * @iter: iovec iterator |
172 | * @iov_count: number of elements in the iovec | ||
173 | * @len: I/O byte count | ||
174 | * @gfp_mask: memory allocation flags | 48 | * @gfp_mask: memory allocation flags |
175 | * | 49 | * |
176 | * Description: | 50 | * Description: |
@@ -187,20 +61,21 @@ EXPORT_SYMBOL(blk_rq_map_user); | |||
187 | * unmapping. | 61 | * unmapping. |
188 | */ | 62 | */ |
189 | int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | 63 | int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, |
190 | struct rq_map_data *map_data, const struct sg_iovec *iov, | 64 | struct rq_map_data *map_data, |
191 | int iov_count, unsigned int len, gfp_t gfp_mask) | 65 | const struct iov_iter *iter, gfp_t gfp_mask) |
192 | { | 66 | { |
193 | struct bio *bio; | 67 | struct bio *bio; |
194 | int i, read = rq_data_dir(rq) == READ; | ||
195 | int unaligned = 0; | 68 | int unaligned = 0; |
69 | struct iov_iter i; | ||
70 | struct iovec iov; | ||
196 | 71 | ||
197 | if (!iov || iov_count <= 0) | 72 | if (!iter || !iter->count) |
198 | return -EINVAL; | 73 | return -EINVAL; |
199 | 74 | ||
200 | for (i = 0; i < iov_count; i++) { | 75 | iov_for_each(iov, i, *iter) { |
201 | unsigned long uaddr = (unsigned long)iov[i].iov_base; | 76 | unsigned long uaddr = (unsigned long) iov.iov_base; |
202 | 77 | ||
203 | if (!iov[i].iov_len) | 78 | if (!iov.iov_len) |
204 | return -EINVAL; | 79 | return -EINVAL; |
205 | 80 | ||
206 | /* | 81 | /* |
@@ -210,16 +85,18 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
210 | unaligned = 1; | 85 | unaligned = 1; |
211 | } | 86 | } |
212 | 87 | ||
213 | if (unaligned || (q->dma_pad_mask & len) || map_data) | 88 | if (unaligned || (q->dma_pad_mask & iter->count) || map_data) |
214 | bio = bio_copy_user_iov(q, map_data, iov, iov_count, read, | 89 | bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); |
215 | gfp_mask); | ||
216 | else | 90 | else |
217 | bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); | 91 | bio = bio_map_user_iov(q, iter, gfp_mask); |
218 | 92 | ||
219 | if (IS_ERR(bio)) | 93 | if (IS_ERR(bio)) |
220 | return PTR_ERR(bio); | 94 | return PTR_ERR(bio); |
221 | 95 | ||
222 | if (bio->bi_iter.bi_size != len) { | 96 | if (map_data && map_data->null_mapped) |
97 | bio->bi_flags |= (1 << BIO_NULL_MAPPED); | ||
98 | |||
99 | if (bio->bi_iter.bi_size != iter->count) { | ||
223 | /* | 100 | /* |
224 | * Grab an extra reference to this bio, as bio_unmap_user() | 101 | * Grab an extra reference to this bio, as bio_unmap_user() |
225 | * expects to be able to drop it twice as it happens on the | 102 | * expects to be able to drop it twice as it happens on the |
@@ -241,6 +118,21 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
241 | } | 118 | } |
242 | EXPORT_SYMBOL(blk_rq_map_user_iov); | 119 | EXPORT_SYMBOL(blk_rq_map_user_iov); |
243 | 120 | ||
121 | int blk_rq_map_user(struct request_queue *q, struct request *rq, | ||
122 | struct rq_map_data *map_data, void __user *ubuf, | ||
123 | unsigned long len, gfp_t gfp_mask) | ||
124 | { | ||
125 | struct iovec iov; | ||
126 | struct iov_iter i; | ||
127 | |||
128 | iov.iov_base = ubuf; | ||
129 | iov.iov_len = len; | ||
130 | iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len); | ||
131 | |||
132 | return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); | ||
133 | } | ||
134 | EXPORT_SYMBOL(blk_rq_map_user); | ||
135 | |||
244 | /** | 136 | /** |
245 | * blk_rq_unmap_user - unmap a request with user data | 137 | * blk_rq_unmap_user - unmap a request with user data |
246 | * @bio: start of bio list | 138 | * @bio: start of bio list |
diff --git a/block/blk-merge.c b/block/blk-merge.c index 89b97b5e0881..fc1ff3b1ea1f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -283,35 +283,6 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, | |||
283 | } | 283 | } |
284 | EXPORT_SYMBOL(blk_rq_map_sg); | 284 | EXPORT_SYMBOL(blk_rq_map_sg); |
285 | 285 | ||
286 | /** | ||
287 | * blk_bio_map_sg - map a bio to a scatterlist | ||
288 | * @q: request_queue in question | ||
289 | * @bio: bio being mapped | ||
290 | * @sglist: scatterlist being mapped | ||
291 | * | ||
292 | * Note: | ||
293 | * Caller must make sure sg can hold bio->bi_phys_segments entries | ||
294 | * | ||
295 | * Will return the number of sg entries setup | ||
296 | */ | ||
297 | int blk_bio_map_sg(struct request_queue *q, struct bio *bio, | ||
298 | struct scatterlist *sglist) | ||
299 | { | ||
300 | struct scatterlist *sg = NULL; | ||
301 | int nsegs; | ||
302 | struct bio *next = bio->bi_next; | ||
303 | bio->bi_next = NULL; | ||
304 | |||
305 | nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); | ||
306 | bio->bi_next = next; | ||
307 | if (sg) | ||
308 | sg_mark_end(sg); | ||
309 | |||
310 | BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments); | ||
311 | return nsegs; | ||
312 | } | ||
313 | EXPORT_SYMBOL(blk_bio_map_sg); | ||
314 | |||
315 | static inline int ll_new_hw_segment(struct request_queue *q, | 286 | static inline int ll_new_hw_segment(struct request_queue *q, |
316 | struct request *req, | 287 | struct request *req, |
317 | struct bio *bio) | 288 | struct bio *bio) |
@@ -385,6 +356,14 @@ static bool req_no_special_merge(struct request *req) | |||
385 | return !q->mq_ops && req->special; | 356 | return !q->mq_ops && req->special; |
386 | } | 357 | } |
387 | 358 | ||
359 | static int req_gap_to_prev(struct request *req, struct request *next) | ||
360 | { | ||
361 | struct bio *prev = req->biotail; | ||
362 | |||
363 | return bvec_gap_to_prev(&prev->bi_io_vec[prev->bi_vcnt - 1], | ||
364 | next->bio->bi_io_vec[0].bv_offset); | ||
365 | } | ||
366 | |||
388 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | 367 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, |
389 | struct request *next) | 368 | struct request *next) |
390 | { | 369 | { |
@@ -399,6 +378,10 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | |||
399 | if (req_no_special_merge(req) || req_no_special_merge(next)) | 378 | if (req_no_special_merge(req) || req_no_special_merge(next)) |
400 | return 0; | 379 | return 0; |
401 | 380 | ||
381 | if (test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags) && | ||
382 | req_gap_to_prev(req, next)) | ||
383 | return 0; | ||
384 | |||
402 | /* | 385 | /* |
403 | * Will it become too large? | 386 | * Will it become too large? |
404 | */ | 387 | */ |
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 60c9d4a93fe4..d53a764b05ea 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -140,35 +140,39 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, | |||
140 | return atomic_read(&hctx->nr_active) < depth; | 140 | return atomic_read(&hctx->nr_active) < depth; |
141 | } | 141 | } |
142 | 142 | ||
143 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) | 143 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag, |
144 | bool nowrap) | ||
144 | { | 145 | { |
145 | int tag, org_last_tag, end; | 146 | int tag, org_last_tag = last_tag; |
146 | bool wrap = last_tag != 0; | ||
147 | 147 | ||
148 | org_last_tag = last_tag; | 148 | while (1) { |
149 | end = bm->depth; | 149 | tag = find_next_zero_bit(&bm->word, bm->depth, last_tag); |
150 | do { | 150 | if (unlikely(tag >= bm->depth)) { |
151 | restart: | ||
152 | tag = find_next_zero_bit(&bm->word, end, last_tag); | ||
153 | if (unlikely(tag >= end)) { | ||
154 | /* | 151 | /* |
155 | * We started with an offset, start from 0 to | 152 | * We started with an offset, and we didn't reset the |
153 | * offset to 0 in a failure case, so start from 0 to | ||
156 | * exhaust the map. | 154 | * exhaust the map. |
157 | */ | 155 | */ |
158 | if (wrap) { | 156 | if (org_last_tag && last_tag && !nowrap) { |
159 | wrap = false; | 157 | last_tag = org_last_tag = 0; |
160 | end = org_last_tag; | 158 | continue; |
161 | last_tag = 0; | ||
162 | goto restart; | ||
163 | } | 159 | } |
164 | return -1; | 160 | return -1; |
165 | } | 161 | } |
162 | |||
163 | if (!test_and_set_bit(tag, &bm->word)) | ||
164 | break; | ||
165 | |||
166 | last_tag = tag + 1; | 166 | last_tag = tag + 1; |
167 | } while (test_and_set_bit(tag, &bm->word)); | 167 | if (last_tag >= bm->depth - 1) |
168 | last_tag = 0; | ||
169 | } | ||
168 | 170 | ||
169 | return tag; | 171 | return tag; |
170 | } | 172 | } |
171 | 173 | ||
174 | #define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR) | ||
175 | |||
172 | /* | 176 | /* |
173 | * Straight forward bitmap tag implementation, where each bit is a tag | 177 | * Straight forward bitmap tag implementation, where each bit is a tag |
174 | * (cleared == free, and set == busy). The small twist is using per-cpu | 178 | * (cleared == free, and set == busy). The small twist is using per-cpu |
@@ -181,7 +185,7 @@ restart: | |||
181 | * until the map is exhausted. | 185 | * until the map is exhausted. |
182 | */ | 186 | */ |
183 | static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, | 187 | static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, |
184 | unsigned int *tag_cache) | 188 | unsigned int *tag_cache, struct blk_mq_tags *tags) |
185 | { | 189 | { |
186 | unsigned int last_tag, org_last_tag; | 190 | unsigned int last_tag, org_last_tag; |
187 | int index, i, tag; | 191 | int index, i, tag; |
@@ -193,15 +197,24 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, | |||
193 | index = TAG_TO_INDEX(bt, last_tag); | 197 | index = TAG_TO_INDEX(bt, last_tag); |
194 | 198 | ||
195 | for (i = 0; i < bt->map_nr; i++) { | 199 | for (i = 0; i < bt->map_nr; i++) { |
196 | tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag)); | 200 | tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag), |
201 | BT_ALLOC_RR(tags)); | ||
197 | if (tag != -1) { | 202 | if (tag != -1) { |
198 | tag += (index << bt->bits_per_word); | 203 | tag += (index << bt->bits_per_word); |
199 | goto done; | 204 | goto done; |
200 | } | 205 | } |
201 | 206 | ||
202 | last_tag = 0; | 207 | /* |
203 | if (++index >= bt->map_nr) | 208 | * Jump to next index, and reset the last tag to be the |
209 | * first tag of that index | ||
210 | */ | ||
211 | index++; | ||
212 | last_tag = (index << bt->bits_per_word); | ||
213 | |||
214 | if (index >= bt->map_nr) { | ||
204 | index = 0; | 215 | index = 0; |
216 | last_tag = 0; | ||
217 | } | ||
205 | } | 218 | } |
206 | 219 | ||
207 | *tag_cache = 0; | 220 | *tag_cache = 0; |
@@ -212,7 +225,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, | |||
212 | * up using the specific cached tag. | 225 | * up using the specific cached tag. |
213 | */ | 226 | */ |
214 | done: | 227 | done: |
215 | if (tag == org_last_tag) { | 228 | if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) { |
216 | last_tag = tag + 1; | 229 | last_tag = tag + 1; |
217 | if (last_tag >= bt->depth - 1) | 230 | if (last_tag >= bt->depth - 1) |
218 | last_tag = 0; | 231 | last_tag = 0; |
@@ -241,13 +254,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, | |||
241 | static int bt_get(struct blk_mq_alloc_data *data, | 254 | static int bt_get(struct blk_mq_alloc_data *data, |
242 | struct blk_mq_bitmap_tags *bt, | 255 | struct blk_mq_bitmap_tags *bt, |
243 | struct blk_mq_hw_ctx *hctx, | 256 | struct blk_mq_hw_ctx *hctx, |
244 | unsigned int *last_tag) | 257 | unsigned int *last_tag, struct blk_mq_tags *tags) |
245 | { | 258 | { |
246 | struct bt_wait_state *bs; | 259 | struct bt_wait_state *bs; |
247 | DEFINE_WAIT(wait); | 260 | DEFINE_WAIT(wait); |
248 | int tag; | 261 | int tag; |
249 | 262 | ||
250 | tag = __bt_get(hctx, bt, last_tag); | 263 | tag = __bt_get(hctx, bt, last_tag, tags); |
251 | if (tag != -1) | 264 | if (tag != -1) |
252 | return tag; | 265 | return tag; |
253 | 266 | ||
@@ -258,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data, | |||
258 | do { | 271 | do { |
259 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); | 272 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); |
260 | 273 | ||
261 | tag = __bt_get(hctx, bt, last_tag); | 274 | tag = __bt_get(hctx, bt, last_tag, tags); |
262 | if (tag != -1) | 275 | if (tag != -1) |
263 | break; | 276 | break; |
264 | 277 | ||
@@ -273,7 +286,7 @@ static int bt_get(struct blk_mq_alloc_data *data, | |||
273 | * Retry tag allocation after running the hardware queue, | 286 | * Retry tag allocation after running the hardware queue, |
274 | * as running the queue may also have found completions. | 287 | * as running the queue may also have found completions. |
275 | */ | 288 | */ |
276 | tag = __bt_get(hctx, bt, last_tag); | 289 | tag = __bt_get(hctx, bt, last_tag, tags); |
277 | if (tag != -1) | 290 | if (tag != -1) |
278 | break; | 291 | break; |
279 | 292 | ||
@@ -304,7 +317,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data) | |||
304 | int tag; | 317 | int tag; |
305 | 318 | ||
306 | tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, | 319 | tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, |
307 | &data->ctx->last_tag); | 320 | &data->ctx->last_tag, data->hctx->tags); |
308 | if (tag >= 0) | 321 | if (tag >= 0) |
309 | return tag + data->hctx->tags->nr_reserved_tags; | 322 | return tag + data->hctx->tags->nr_reserved_tags; |
310 | 323 | ||
@@ -320,7 +333,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) | |||
320 | return BLK_MQ_TAG_FAIL; | 333 | return BLK_MQ_TAG_FAIL; |
321 | } | 334 | } |
322 | 335 | ||
323 | tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero); | 336 | tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero, |
337 | data->hctx->tags); | ||
324 | if (tag < 0) | 338 | if (tag < 0) |
325 | return BLK_MQ_TAG_FAIL; | 339 | return BLK_MQ_TAG_FAIL; |
326 | 340 | ||
@@ -392,7 +406,8 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, | |||
392 | 406 | ||
393 | BUG_ON(real_tag >= tags->nr_tags); | 407 | BUG_ON(real_tag >= tags->nr_tags); |
394 | bt_clear_tag(&tags->bitmap_tags, real_tag); | 408 | bt_clear_tag(&tags->bitmap_tags, real_tag); |
395 | *last_tag = real_tag; | 409 | if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO)) |
410 | *last_tag = real_tag; | ||
396 | } else { | 411 | } else { |
397 | BUG_ON(tag >= tags->nr_reserved_tags); | 412 | BUG_ON(tag >= tags->nr_reserved_tags); |
398 | bt_clear_tag(&tags->breserved_tags, tag); | 413 | bt_clear_tag(&tags->breserved_tags, tag); |
@@ -509,6 +524,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, | |||
509 | bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); | 524 | bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); |
510 | if (!bt->bs) { | 525 | if (!bt->bs) { |
511 | kfree(bt->map); | 526 | kfree(bt->map); |
527 | bt->map = NULL; | ||
512 | return -ENOMEM; | 528 | return -ENOMEM; |
513 | } | 529 | } |
514 | 530 | ||
@@ -529,10 +545,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt) | |||
529 | } | 545 | } |
530 | 546 | ||
531 | static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, | 547 | static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, |
532 | int node) | 548 | int node, int alloc_policy) |
533 | { | 549 | { |
534 | unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; | 550 | unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; |
535 | 551 | ||
552 | tags->alloc_policy = alloc_policy; | ||
553 | |||
536 | if (bt_alloc(&tags->bitmap_tags, depth, node, false)) | 554 | if (bt_alloc(&tags->bitmap_tags, depth, node, false)) |
537 | goto enomem; | 555 | goto enomem; |
538 | if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true)) | 556 | if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true)) |
@@ -546,7 +564,8 @@ enomem: | |||
546 | } | 564 | } |
547 | 565 | ||
548 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | 566 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, |
549 | unsigned int reserved_tags, int node) | 567 | unsigned int reserved_tags, |
568 | int node, int alloc_policy) | ||
550 | { | 569 | { |
551 | struct blk_mq_tags *tags; | 570 | struct blk_mq_tags *tags; |
552 | 571 | ||
@@ -562,7 +581,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | |||
562 | tags->nr_tags = total_tags; | 581 | tags->nr_tags = total_tags; |
563 | tags->nr_reserved_tags = reserved_tags; | 582 | tags->nr_reserved_tags = reserved_tags; |
564 | 583 | ||
565 | return blk_mq_init_bitmap_tags(tags, node); | 584 | return blk_mq_init_bitmap_tags(tags, node, alloc_policy); |
566 | } | 585 | } |
567 | 586 | ||
568 | void blk_mq_free_tags(struct blk_mq_tags *tags) | 587 | void blk_mq_free_tags(struct blk_mq_tags *tags) |
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index a6fa0fc9d41a..90767b370308 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h | |||
@@ -42,10 +42,12 @@ struct blk_mq_tags { | |||
42 | 42 | ||
43 | struct request **rqs; | 43 | struct request **rqs; |
44 | struct list_head page_list; | 44 | struct list_head page_list; |
45 | |||
46 | int alloc_policy; | ||
45 | }; | 47 | }; |
46 | 48 | ||
47 | 49 | ||
48 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); | 50 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy); |
49 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); | 51 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); |
50 | 52 | ||
51 | extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); | 53 | extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 2390c5541e71..4f4bea21052e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -33,6 +33,7 @@ static DEFINE_MUTEX(all_q_mutex); | |||
33 | static LIST_HEAD(all_q_list); | 33 | static LIST_HEAD(all_q_list); |
34 | 34 | ||
35 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); | 35 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); |
36 | static void blk_mq_run_queues(struct request_queue *q); | ||
36 | 37 | ||
37 | /* | 38 | /* |
38 | * Check if any of the ctx's have pending work in this hardware queue | 39 | * Check if any of the ctx's have pending work in this hardware queue |
@@ -117,7 +118,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q) | |||
117 | 118 | ||
118 | if (freeze) { | 119 | if (freeze) { |
119 | percpu_ref_kill(&q->mq_usage_counter); | 120 | percpu_ref_kill(&q->mq_usage_counter); |
120 | blk_mq_run_queues(q, false); | 121 | blk_mq_run_queues(q); |
121 | } | 122 | } |
122 | } | 123 | } |
123 | EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); | 124 | EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); |
@@ -136,6 +137,7 @@ void blk_mq_freeze_queue(struct request_queue *q) | |||
136 | blk_mq_freeze_queue_start(q); | 137 | blk_mq_freeze_queue_start(q); |
137 | blk_mq_freeze_queue_wait(q); | 138 | blk_mq_freeze_queue_wait(q); |
138 | } | 139 | } |
140 | EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); | ||
139 | 141 | ||
140 | void blk_mq_unfreeze_queue(struct request_queue *q) | 142 | void blk_mq_unfreeze_queue(struct request_queue *q) |
141 | { | 143 | { |
@@ -902,7 +904,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | |||
902 | &hctx->run_work, 0); | 904 | &hctx->run_work, 0); |
903 | } | 905 | } |
904 | 906 | ||
905 | void blk_mq_run_queues(struct request_queue *q, bool async) | 907 | static void blk_mq_run_queues(struct request_queue *q) |
906 | { | 908 | { |
907 | struct blk_mq_hw_ctx *hctx; | 909 | struct blk_mq_hw_ctx *hctx; |
908 | int i; | 910 | int i; |
@@ -913,10 +915,9 @@ void blk_mq_run_queues(struct request_queue *q, bool async) | |||
913 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) | 915 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) |
914 | continue; | 916 | continue; |
915 | 917 | ||
916 | blk_mq_run_hw_queue(hctx, async); | 918 | blk_mq_run_hw_queue(hctx, false); |
917 | } | 919 | } |
918 | } | 920 | } |
919 | EXPORT_SYMBOL(blk_mq_run_queues); | ||
920 | 921 | ||
921 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) | 922 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) |
922 | { | 923 | { |
@@ -954,7 +955,6 @@ void blk_mq_start_hw_queues(struct request_queue *q) | |||
954 | } | 955 | } |
955 | EXPORT_SYMBOL(blk_mq_start_hw_queues); | 956 | EXPORT_SYMBOL(blk_mq_start_hw_queues); |
956 | 957 | ||
957 | |||
958 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) | 958 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) |
959 | { | 959 | { |
960 | struct blk_mq_hw_ctx *hctx; | 960 | struct blk_mq_hw_ctx *hctx; |
@@ -1423,7 +1423,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1423 | size_t rq_size, left; | 1423 | size_t rq_size, left; |
1424 | 1424 | ||
1425 | tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, | 1425 | tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, |
1426 | set->numa_node); | 1426 | set->numa_node, |
1427 | BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); | ||
1427 | if (!tags) | 1428 | if (!tags) |
1428 | return NULL; | 1429 | return NULL; |
1429 | 1430 | ||
diff --git a/block/blk-tag.c b/block/blk-tag.c index a185b86741e5..f0344e6939d5 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c | |||
@@ -119,7 +119,7 @@ fail: | |||
119 | } | 119 | } |
120 | 120 | ||
121 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, | 121 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, |
122 | int depth) | 122 | int depth, int alloc_policy) |
123 | { | 123 | { |
124 | struct blk_queue_tag *tags; | 124 | struct blk_queue_tag *tags; |
125 | 125 | ||
@@ -131,6 +131,8 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, | |||
131 | goto fail; | 131 | goto fail; |
132 | 132 | ||
133 | atomic_set(&tags->refcnt, 1); | 133 | atomic_set(&tags->refcnt, 1); |
134 | tags->alloc_policy = alloc_policy; | ||
135 | tags->next_tag = 0; | ||
134 | return tags; | 136 | return tags; |
135 | fail: | 137 | fail: |
136 | kfree(tags); | 138 | kfree(tags); |
@@ -140,10 +142,11 @@ fail: | |||
140 | /** | 142 | /** |
141 | * blk_init_tags - initialize the tag info for an external tag map | 143 | * blk_init_tags - initialize the tag info for an external tag map |
142 | * @depth: the maximum queue depth supported | 144 | * @depth: the maximum queue depth supported |
145 | * @alloc_policy: tag allocation policy | ||
143 | **/ | 146 | **/ |
144 | struct blk_queue_tag *blk_init_tags(int depth) | 147 | struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy) |
145 | { | 148 | { |
146 | return __blk_queue_init_tags(NULL, depth); | 149 | return __blk_queue_init_tags(NULL, depth, alloc_policy); |
147 | } | 150 | } |
148 | EXPORT_SYMBOL(blk_init_tags); | 151 | EXPORT_SYMBOL(blk_init_tags); |
149 | 152 | ||
@@ -152,19 +155,20 @@ EXPORT_SYMBOL(blk_init_tags); | |||
152 | * @q: the request queue for the device | 155 | * @q: the request queue for the device |
153 | * @depth: the maximum queue depth supported | 156 | * @depth: the maximum queue depth supported |
154 | * @tags: the tag to use | 157 | * @tags: the tag to use |
158 | * @alloc_policy: tag allocation policy | ||
155 | * | 159 | * |
156 | * Queue lock must be held here if the function is called to resize an | 160 | * Queue lock must be held here if the function is called to resize an |
157 | * existing map. | 161 | * existing map. |
158 | **/ | 162 | **/ |
159 | int blk_queue_init_tags(struct request_queue *q, int depth, | 163 | int blk_queue_init_tags(struct request_queue *q, int depth, |
160 | struct blk_queue_tag *tags) | 164 | struct blk_queue_tag *tags, int alloc_policy) |
161 | { | 165 | { |
162 | int rc; | 166 | int rc; |
163 | 167 | ||
164 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); | 168 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); |
165 | 169 | ||
166 | if (!tags && !q->queue_tags) { | 170 | if (!tags && !q->queue_tags) { |
167 | tags = __blk_queue_init_tags(q, depth); | 171 | tags = __blk_queue_init_tags(q, depth, alloc_policy); |
168 | 172 | ||
169 | if (!tags) | 173 | if (!tags) |
170 | return -ENOMEM; | 174 | return -ENOMEM; |
@@ -344,9 +348,21 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) | |||
344 | } | 348 | } |
345 | 349 | ||
346 | do { | 350 | do { |
347 | tag = find_first_zero_bit(bqt->tag_map, max_depth); | 351 | if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) { |
348 | if (tag >= max_depth) | 352 | tag = find_first_zero_bit(bqt->tag_map, max_depth); |
349 | return 1; | 353 | if (tag >= max_depth) |
354 | return 1; | ||
355 | } else { | ||
356 | int start = bqt->next_tag; | ||
357 | int size = min_t(int, bqt->max_depth, max_depth + start); | ||
358 | tag = find_next_zero_bit(bqt->tag_map, size, start); | ||
359 | if (tag >= size && start + size > bqt->max_depth) { | ||
360 | size = start + size - bqt->max_depth; | ||
361 | tag = find_first_zero_bit(bqt->tag_map, size); | ||
362 | } | ||
363 | if (tag >= size) | ||
364 | return 1; | ||
365 | } | ||
350 | 366 | ||
351 | } while (test_and_set_bit_lock(tag, bqt->tag_map)); | 367 | } while (test_and_set_bit_lock(tag, bqt->tag_map)); |
352 | /* | 368 | /* |
@@ -354,6 +370,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) | |||
354 | * See blk_queue_end_tag for details. | 370 | * See blk_queue_end_tag for details. |
355 | */ | 371 | */ |
356 | 372 | ||
373 | bqt->next_tag = (tag + 1) % bqt->max_depth; | ||
357 | rq->cmd_flags |= REQ_QUEUED; | 374 | rq->cmd_flags |= REQ_QUEUED; |
358 | rq->tag = tag; | 375 | rq->tag = tag; |
359 | bqt->tag_index[tag] = rq; | 376 | bqt->tag_index[tag] = rq; |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 6f2751d305de..5da8e6e9ab4b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -3590,6 +3590,11 @@ retry: | |||
3590 | 3590 | ||
3591 | blkcg = bio_blkcg(bio); | 3591 | blkcg = bio_blkcg(bio); |
3592 | cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); | 3592 | cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); |
3593 | if (!cfqg) { | ||
3594 | cfqq = &cfqd->oom_cfqq; | ||
3595 | goto out; | ||
3596 | } | ||
3597 | |||
3593 | cfqq = cic_to_cfqq(cic, is_sync); | 3598 | cfqq = cic_to_cfqq(cic, is_sync); |
3594 | 3599 | ||
3595 | /* | 3600 | /* |
@@ -3626,7 +3631,7 @@ retry: | |||
3626 | } else | 3631 | } else |
3627 | cfqq = &cfqd->oom_cfqq; | 3632 | cfqq = &cfqd->oom_cfqq; |
3628 | } | 3633 | } |
3629 | 3634 | out: | |
3630 | if (new_cfqq) | 3635 | if (new_cfqq) |
3631 | kmem_cache_free(cfq_pool, new_cfqq); | 3636 | kmem_cache_free(cfq_pool, new_cfqq); |
3632 | 3637 | ||
@@ -3656,12 +3661,17 @@ static struct cfq_queue * | |||
3656 | cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, | 3661 | cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, |
3657 | struct bio *bio, gfp_t gfp_mask) | 3662 | struct bio *bio, gfp_t gfp_mask) |
3658 | { | 3663 | { |
3659 | const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); | 3664 | int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); |
3660 | const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio); | 3665 | int ioprio = IOPRIO_PRIO_DATA(cic->ioprio); |
3661 | struct cfq_queue **async_cfqq = NULL; | 3666 | struct cfq_queue **async_cfqq = NULL; |
3662 | struct cfq_queue *cfqq = NULL; | 3667 | struct cfq_queue *cfqq = NULL; |
3663 | 3668 | ||
3664 | if (!is_sync) { | 3669 | if (!is_sync) { |
3670 | if (!ioprio_valid(cic->ioprio)) { | ||
3671 | struct task_struct *tsk = current; | ||
3672 | ioprio = task_nice_ioprio(tsk); | ||
3673 | ioprio_class = task_nice_ioclass(tsk); | ||
3674 | } | ||
3665 | async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); | 3675 | async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); |
3666 | cfqq = *async_cfqq; | 3676 | cfqq = *async_cfqq; |
3667 | } | 3677 | } |
diff --git a/block/ioctl.c b/block/ioctl.c index 6c7bf903742f..7d8befde2aca 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -198,7 +198,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start, | |||
198 | if (start + len > (i_size_read(bdev->bd_inode) >> 9)) | 198 | if (start + len > (i_size_read(bdev->bd_inode) >> 9)) |
199 | return -EINVAL; | 199 | return -EINVAL; |
200 | 200 | ||
201 | return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL); | 201 | return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL, false); |
202 | } | 202 | } |
203 | 203 | ||
204 | static int put_ushort(unsigned long arg, unsigned short val) | 204 | static int put_ushort(unsigned long arg, unsigned short val) |
diff --git a/block/partitions/check.c b/block/partitions/check.c index 9ac1df74f699..16118d11dbfc 100644 --- a/block/partitions/check.c +++ b/block/partitions/check.c | |||
@@ -184,12 +184,12 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
184 | if (err) | 184 | if (err) |
185 | /* The partition is unrecognized. So report I/O errors if there were any */ | 185 | /* The partition is unrecognized. So report I/O errors if there were any */ |
186 | res = err; | 186 | res = err; |
187 | if (!res) | 187 | if (res) { |
188 | strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE); | 188 | if (warn_no_part) |
189 | else if (warn_no_part) | 189 | strlcat(state->pp_buf, |
190 | strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE); | 190 | " unable to read partition table\n", PAGE_SIZE); |
191 | 191 | printk(KERN_INFO "%s", state->pp_buf); | |
192 | printk(KERN_INFO "%s", state->pp_buf); | 192 | } |
193 | 193 | ||
194 | free_page((unsigned long)state->pp_buf); | 194 | free_page((unsigned long)state->pp_buf); |
195 | free_partitions(state); | 195 | free_partitions(state); |
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 28163fad3c5d..e1f71c396193 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c | |||
@@ -332,7 +332,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, | |||
332 | 332 | ||
333 | ret = 0; | 333 | ret = 0; |
334 | if (hdr->iovec_count) { | 334 | if (hdr->iovec_count) { |
335 | size_t iov_data_len; | 335 | struct iov_iter i; |
336 | struct iovec *iov = NULL; | 336 | struct iovec *iov = NULL; |
337 | 337 | ||
338 | ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count, | 338 | ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count, |
@@ -342,20 +342,11 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, | |||
342 | goto out_free_cdb; | 342 | goto out_free_cdb; |
343 | } | 343 | } |
344 | 344 | ||
345 | iov_data_len = ret; | ||
346 | ret = 0; | ||
347 | |||
348 | /* SG_IO howto says that the shorter of the two wins */ | 345 | /* SG_IO howto says that the shorter of the two wins */ |
349 | if (hdr->dxfer_len < iov_data_len) { | 346 | iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count, |
350 | hdr->iovec_count = iov_shorten(iov, | 347 | min_t(unsigned, ret, hdr->dxfer_len)); |
351 | hdr->iovec_count, | ||
352 | hdr->dxfer_len); | ||
353 | iov_data_len = hdr->dxfer_len; | ||
354 | } | ||
355 | 348 | ||
356 | ret = blk_rq_map_user_iov(q, rq, NULL, (struct sg_iovec *) iov, | 349 | ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL); |
357 | hdr->iovec_count, | ||
358 | iov_data_len, GFP_KERNEL); | ||
359 | kfree(iov); | 350 | kfree(iov); |
360 | } else if (hdr->dxfer_len) | 351 | } else if (hdr->dxfer_len) |
361 | ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, | 352 | ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, |