aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/xip.txt15
-rw-r--r--arch/powerpc/sysdev/axonram.c17
-rw-r--r--block/bio.c426
-rw-r--r--block/blk-core.c11
-rw-r--r--block/blk-lib.c30
-rw-r--r--block/blk-map.c172
-rw-r--r--block/blk-merge.c41
-rw-r--r--block/blk-mq-tag.c81
-rw-r--r--block/blk-mq-tag.h4
-rw-r--r--block/blk-mq.c13
-rw-r--r--block/blk-tag.c33
-rw-r--r--block/cfq-iosched.c16
-rw-r--r--block/ioctl.c2
-rw-r--r--block/partitions/check.c12
-rw-r--r--block/scsi_ioctl.c17
-rw-r--r--drivers/block/brd.c14
-rw-r--r--drivers/block/drbd/drbd_receiver.c2
-rw-r--r--drivers/block/osdblk.c2
-rw-r--r--drivers/md/dm.c1
-rw-r--r--drivers/s390/block/dcssblk.c21
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/scsi/scsi_scan.c3
-rw-r--r--drivers/scsi/sg.c15
-rw-r--r--fs/block_dev.c40
-rw-r--r--fs/ext2/xip.c31
-rw-r--r--include/linux/bio.h12
-rw-r--r--include/linux/blk-mq.h10
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--include/scsi/scsi_host.h3
-rw-r--r--include/scsi/scsi_tcq.h3
30 files changed, 499 insertions, 575 deletions
diff --git a/Documentation/filesystems/xip.txt b/Documentation/filesystems/xip.txt
index 0466ee569278..b77472949ede 100644
--- a/Documentation/filesystems/xip.txt
+++ b/Documentation/filesystems/xip.txt
@@ -28,12 +28,15 @@ Implementation
28Execute-in-place is implemented in three steps: block device operation, 28Execute-in-place is implemented in three steps: block device operation,
29address space operation, and file operations. 29address space operation, and file operations.
30 30
31A block device operation named direct_access is used to retrieve a 31A block device operation named direct_access is used to translate the
32reference (pointer) to a block on-disk. The reference is supposed to be 32block device sector number to a page frame number (pfn) that identifies
33cpu-addressable, physical address and remain valid until the release operation 33the physical page for the memory. It also returns a kernel virtual
34is performed. A struct block_device reference is used to address the device, 34address that can be used to access the memory.
35and a sector_t argument is used to identify the individual block. As an 35
36alternative, memory technology devices can be used for this. 36The direct_access method takes a 'size' parameter that indicates the
37number of bytes being requested. The function should return the number
38of bytes that can be contiguously accessed at that offset. It may also
39return a negative errno if an error occurs.
37 40
38The block device operation is optional, these block devices support it as of 41The block device operation is optional, these block devices support it as of
39today: 42today:
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index f532c92bf99d..20f8afe855d1 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -139,26 +139,17 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
139 * axon_ram_direct_access - direct_access() method for block device 139 * axon_ram_direct_access - direct_access() method for block device
140 * @device, @sector, @data: see block_device_operations method 140 * @device, @sector, @data: see block_device_operations method
141 */ 141 */
142static int 142static long
143axon_ram_direct_access(struct block_device *device, sector_t sector, 143axon_ram_direct_access(struct block_device *device, sector_t sector,
144 void **kaddr, unsigned long *pfn) 144 void **kaddr, unsigned long *pfn, long size)
145{ 145{
146 struct axon_ram_bank *bank = device->bd_disk->private_data; 146 struct axon_ram_bank *bank = device->bd_disk->private_data;
147 loff_t offset; 147 loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
148
149 offset = sector;
150 if (device->bd_part != NULL)
151 offset += device->bd_part->start_sect;
152 offset <<= AXON_RAM_SECTOR_SHIFT;
153 if (offset >= bank->size) {
154 dev_err(&bank->device->dev, "Access outside of address space\n");
155 return -ERANGE;
156 }
157 148
158 *kaddr = (void *)(bank->ph_addr + offset); 149 *kaddr = (void *)(bank->ph_addr + offset);
159 *pfn = virt_to_phys(kaddr) >> PAGE_SHIFT; 150 *pfn = virt_to_phys(kaddr) >> PAGE_SHIFT;
160 151
161 return 0; 152 return bank->size - offset;
162} 153}
163 154
164static const struct block_device_operations axon_ram_devops = { 155static const struct block_device_operations axon_ram_devops = {
diff --git a/block/bio.c b/block/bio.c
index 471d7382c7d1..f66a4eae16ee 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -28,7 +28,6 @@
28#include <linux/mempool.h> 28#include <linux/mempool.h>
29#include <linux/workqueue.h> 29#include <linux/workqueue.h>
30#include <linux/cgroup.h> 30#include <linux/cgroup.h>
31#include <scsi/sg.h> /* for struct sg_iovec */
32 31
33#include <trace/events/block.h> 32#include <trace/events/block.h>
34 33
@@ -1022,21 +1021,11 @@ void bio_copy_data(struct bio *dst, struct bio *src)
1022EXPORT_SYMBOL(bio_copy_data); 1021EXPORT_SYMBOL(bio_copy_data);
1023 1022
1024struct bio_map_data { 1023struct bio_map_data {
1025 int nr_sgvecs;
1026 int is_our_pages; 1024 int is_our_pages;
1027 struct sg_iovec sgvecs[]; 1025 struct iov_iter iter;
1026 struct iovec iov[];
1028}; 1027};
1029 1028
1030static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
1031 const struct sg_iovec *iov, int iov_count,
1032 int is_our_pages)
1033{
1034 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
1035 bmd->nr_sgvecs = iov_count;
1036 bmd->is_our_pages = is_our_pages;
1037 bio->bi_private = bmd;
1038}
1039
1040static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, 1029static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
1041 gfp_t gfp_mask) 1030 gfp_t gfp_mask)
1042{ 1031{
@@ -1044,85 +1033,101 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
1044 return NULL; 1033 return NULL;
1045 1034
1046 return kmalloc(sizeof(struct bio_map_data) + 1035 return kmalloc(sizeof(struct bio_map_data) +
1047 sizeof(struct sg_iovec) * iov_count, gfp_mask); 1036 sizeof(struct iovec) * iov_count, gfp_mask);
1048} 1037}
1049 1038
1050static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count, 1039/**
1051 int to_user, int from_user, int do_free_page) 1040 * bio_copy_from_iter - copy all pages from iov_iter to bio
1041 * @bio: The &struct bio which describes the I/O as destination
1042 * @iter: iov_iter as source
1043 *
1044 * Copy all pages from iov_iter to bio.
1045 * Returns 0 on success, or error on failure.
1046 */
1047static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
1052{ 1048{
1053 int ret = 0, i; 1049 int i;
1054 struct bio_vec *bvec; 1050 struct bio_vec *bvec;
1055 int iov_idx = 0;
1056 unsigned int iov_off = 0;
1057 1051
1058 bio_for_each_segment_all(bvec, bio, i) { 1052 bio_for_each_segment_all(bvec, bio, i) {
1059 char *bv_addr = page_address(bvec->bv_page); 1053 ssize_t ret;
1060 unsigned int bv_len = bvec->bv_len;
1061 1054
1062 while (bv_len && iov_idx < iov_count) { 1055 ret = copy_page_from_iter(bvec->bv_page,
1063 unsigned int bytes; 1056 bvec->bv_offset,
1064 char __user *iov_addr; 1057 bvec->bv_len,
1058 &iter);
1065 1059
1066 bytes = min_t(unsigned int, 1060 if (!iov_iter_count(&iter))
1067 iov[iov_idx].iov_len - iov_off, bv_len); 1061 break;
1068 iov_addr = iov[iov_idx].iov_base + iov_off;
1069 1062
1070 if (!ret) { 1063 if (ret < bvec->bv_len)
1071 if (to_user) 1064 return -EFAULT;
1072 ret = copy_to_user(iov_addr, bv_addr, 1065 }
1073 bytes);
1074 1066
1075 if (from_user) 1067 return 0;
1076 ret = copy_from_user(bv_addr, iov_addr, 1068}
1077 bytes);
1078 1069
1079 if (ret) 1070/**
1080 ret = -EFAULT; 1071 * bio_copy_to_iter - copy all pages from bio to iov_iter
1081 } 1072 * @bio: The &struct bio which describes the I/O as source
1073 * @iter: iov_iter as destination
1074 *
1075 * Copy all pages from bio to iov_iter.
1076 * Returns 0 on success, or error on failure.
1077 */
1078static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
1079{
1080 int i;
1081 struct bio_vec *bvec;
1082 1082
1083 bv_len -= bytes; 1083 bio_for_each_segment_all(bvec, bio, i) {
1084 bv_addr += bytes; 1084 ssize_t ret;
1085 iov_addr += bytes;
1086 iov_off += bytes;
1087 1085
1088 if (iov[iov_idx].iov_len == iov_off) { 1086 ret = copy_page_to_iter(bvec->bv_page,
1089 iov_idx++; 1087 bvec->bv_offset,
1090 iov_off = 0; 1088 bvec->bv_len,
1091 } 1089 &iter);
1092 } 1090
1091 if (!iov_iter_count(&iter))
1092 break;
1093 1093
1094 if (do_free_page) 1094 if (ret < bvec->bv_len)
1095 __free_page(bvec->bv_page); 1095 return -EFAULT;
1096 } 1096 }
1097 1097
1098 return ret; 1098 return 0;
1099}
1100
1101static void bio_free_pages(struct bio *bio)
1102{
1103 struct bio_vec *bvec;
1104 int i;
1105
1106 bio_for_each_segment_all(bvec, bio, i)
1107 __free_page(bvec->bv_page);
1099} 1108}
1100 1109
1101/** 1110/**
1102 * bio_uncopy_user - finish previously mapped bio 1111 * bio_uncopy_user - finish previously mapped bio
1103 * @bio: bio being terminated 1112 * @bio: bio being terminated
1104 * 1113 *
1105 * Free pages allocated from bio_copy_user() and write back data 1114 * Free pages allocated from bio_copy_user_iov() and write back data
1106 * to user space in case of a read. 1115 * to user space in case of a read.
1107 */ 1116 */
1108int bio_uncopy_user(struct bio *bio) 1117int bio_uncopy_user(struct bio *bio)
1109{ 1118{
1110 struct bio_map_data *bmd = bio->bi_private; 1119 struct bio_map_data *bmd = bio->bi_private;
1111 struct bio_vec *bvec; 1120 int ret = 0;
1112 int ret = 0, i;
1113 1121
1114 if (!bio_flagged(bio, BIO_NULL_MAPPED)) { 1122 if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
1115 /* 1123 /*
1116 * if we're in a workqueue, the request is orphaned, so 1124 * if we're in a workqueue, the request is orphaned, so
1117 * don't copy into a random user address space, just free. 1125 * don't copy into a random user address space, just free.
1118 */ 1126 */
1119 if (current->mm) 1127 if (current->mm && bio_data_dir(bio) == READ)
1120 ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1128 ret = bio_copy_to_iter(bio, bmd->iter);
1121 bio_data_dir(bio) == READ, 1129 if (bmd->is_our_pages)
1122 0, bmd->is_our_pages); 1130 bio_free_pages(bio);
1123 else if (bmd->is_our_pages)
1124 bio_for_each_segment_all(bvec, bio, i)
1125 __free_page(bvec->bv_page);
1126 } 1131 }
1127 kfree(bmd); 1132 kfree(bmd);
1128 bio_put(bio); 1133 bio_put(bio);
@@ -1132,12 +1137,10 @@ EXPORT_SYMBOL(bio_uncopy_user);
1132 1137
1133/** 1138/**
1134 * bio_copy_user_iov - copy user data to bio 1139 * bio_copy_user_iov - copy user data to bio
1135 * @q: destination block queue 1140 * @q: destination block queue
1136 * @map_data: pointer to the rq_map_data holding pages (if necessary) 1141 * @map_data: pointer to the rq_map_data holding pages (if necessary)
1137 * @iov: the iovec. 1142 * @iter: iovec iterator
1138 * @iov_count: number of elements in the iovec 1143 * @gfp_mask: memory allocation flags
1139 * @write_to_vm: bool indicating writing to pages or not
1140 * @gfp_mask: memory allocation flags
1141 * 1144 *
1142 * Prepares and returns a bio for indirect user io, bouncing data 1145 * Prepares and returns a bio for indirect user io, bouncing data
1143 * to/from kernel pages as necessary. Must be paired with 1146 * to/from kernel pages as necessary. Must be paired with
@@ -1145,25 +1148,25 @@ EXPORT_SYMBOL(bio_uncopy_user);
1145 */ 1148 */
1146struct bio *bio_copy_user_iov(struct request_queue *q, 1149struct bio *bio_copy_user_iov(struct request_queue *q,
1147 struct rq_map_data *map_data, 1150 struct rq_map_data *map_data,
1148 const struct sg_iovec *iov, int iov_count, 1151 const struct iov_iter *iter,
1149 int write_to_vm, gfp_t gfp_mask) 1152 gfp_t gfp_mask)
1150{ 1153{
1151 struct bio_map_data *bmd; 1154 struct bio_map_data *bmd;
1152 struct bio_vec *bvec;
1153 struct page *page; 1155 struct page *page;
1154 struct bio *bio; 1156 struct bio *bio;
1155 int i, ret; 1157 int i, ret;
1156 int nr_pages = 0; 1158 int nr_pages = 0;
1157 unsigned int len = 0; 1159 unsigned int len = iter->count;
1158 unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0; 1160 unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
1159 1161
1160 for (i = 0; i < iov_count; i++) { 1162 for (i = 0; i < iter->nr_segs; i++) {
1161 unsigned long uaddr; 1163 unsigned long uaddr;
1162 unsigned long end; 1164 unsigned long end;
1163 unsigned long start; 1165 unsigned long start;
1164 1166
1165 uaddr = (unsigned long)iov[i].iov_base; 1167 uaddr = (unsigned long) iter->iov[i].iov_base;
1166 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1168 end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
1169 >> PAGE_SHIFT;
1167 start = uaddr >> PAGE_SHIFT; 1170 start = uaddr >> PAGE_SHIFT;
1168 1171
1169 /* 1172 /*
@@ -1173,22 +1176,31 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
1173 return ERR_PTR(-EINVAL); 1176 return ERR_PTR(-EINVAL);
1174 1177
1175 nr_pages += end - start; 1178 nr_pages += end - start;
1176 len += iov[i].iov_len;
1177 } 1179 }
1178 1180
1179 if (offset) 1181 if (offset)
1180 nr_pages++; 1182 nr_pages++;
1181 1183
1182 bmd = bio_alloc_map_data(iov_count, gfp_mask); 1184 bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
1183 if (!bmd) 1185 if (!bmd)
1184 return ERR_PTR(-ENOMEM); 1186 return ERR_PTR(-ENOMEM);
1185 1187
1188 /*
1189 * We need to do a deep copy of the iov_iter including the iovecs.
1190 * The caller provided iov might point to an on-stack or otherwise
1191 * shortlived one.
1192 */
1193 bmd->is_our_pages = map_data ? 0 : 1;
1194 memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
1195 iov_iter_init(&bmd->iter, iter->type, bmd->iov,
1196 iter->nr_segs, iter->count);
1197
1186 ret = -ENOMEM; 1198 ret = -ENOMEM;
1187 bio = bio_kmalloc(gfp_mask, nr_pages); 1199 bio = bio_kmalloc(gfp_mask, nr_pages);
1188 if (!bio) 1200 if (!bio)
1189 goto out_bmd; 1201 goto out_bmd;
1190 1202
1191 if (!write_to_vm) 1203 if (iter->type & WRITE)
1192 bio->bi_rw |= REQ_WRITE; 1204 bio->bi_rw |= REQ_WRITE;
1193 1205
1194 ret = 0; 1206 ret = 0;
@@ -1236,20 +1248,18 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
1236 /* 1248 /*
1237 * success 1249 * success
1238 */ 1250 */
1239 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || 1251 if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
1240 (map_data && map_data->from_user)) { 1252 (map_data && map_data->from_user)) {
1241 ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0); 1253 ret = bio_copy_from_iter(bio, *iter);
1242 if (ret) 1254 if (ret)
1243 goto cleanup; 1255 goto cleanup;
1244 } 1256 }
1245 1257
1246 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); 1258 bio->bi_private = bmd;
1247 return bio; 1259 return bio;
1248cleanup: 1260cleanup:
1249 if (!map_data) 1261 if (!map_data)
1250 bio_for_each_segment_all(bvec, bio, i) 1262 bio_free_pages(bio);
1251 __free_page(bvec->bv_page);
1252
1253 bio_put(bio); 1263 bio_put(bio);
1254out_bmd: 1264out_bmd:
1255 kfree(bmd); 1265 kfree(bmd);
@@ -1257,46 +1267,30 @@ out_bmd:
1257} 1267}
1258 1268
1259/** 1269/**
1260 * bio_copy_user - copy user data to bio 1270 * bio_map_user_iov - map user iovec into bio
1261 * @q: destination block queue 1271 * @q: the struct request_queue for the bio
1262 * @map_data: pointer to the rq_map_data holding pages (if necessary) 1272 * @iter: iovec iterator
1263 * @uaddr: start of user address 1273 * @gfp_mask: memory allocation flags
1264 * @len: length in bytes
1265 * @write_to_vm: bool indicating writing to pages or not
1266 * @gfp_mask: memory allocation flags
1267 * 1274 *
1268 * Prepares and returns a bio for indirect user io, bouncing data 1275 * Map the user space address into a bio suitable for io to a block
1269 * to/from kernel pages as necessary. Must be paired with 1276 * device. Returns an error pointer in case of error.
1270 * call bio_uncopy_user() on io completion.
1271 */ 1277 */
1272struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, 1278struct bio *bio_map_user_iov(struct request_queue *q,
1273 unsigned long uaddr, unsigned int len, 1279 const struct iov_iter *iter,
1274 int write_to_vm, gfp_t gfp_mask) 1280 gfp_t gfp_mask)
1275{ 1281{
1276 struct sg_iovec iov; 1282 int j;
1277
1278 iov.iov_base = (void __user *)uaddr;
1279 iov.iov_len = len;
1280
1281 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
1282}
1283EXPORT_SYMBOL(bio_copy_user);
1284
1285static struct bio *__bio_map_user_iov(struct request_queue *q,
1286 struct block_device *bdev,
1287 const struct sg_iovec *iov, int iov_count,
1288 int write_to_vm, gfp_t gfp_mask)
1289{
1290 int i, j;
1291 int nr_pages = 0; 1283 int nr_pages = 0;
1292 struct page **pages; 1284 struct page **pages;
1293 struct bio *bio; 1285 struct bio *bio;
1294 int cur_page = 0; 1286 int cur_page = 0;
1295 int ret, offset; 1287 int ret, offset;
1288 struct iov_iter i;
1289 struct iovec iov;
1296 1290
1297 for (i = 0; i < iov_count; i++) { 1291 iov_for_each(iov, i, *iter) {
1298 unsigned long uaddr = (unsigned long)iov[i].iov_base; 1292 unsigned long uaddr = (unsigned long) iov.iov_base;
1299 unsigned long len = iov[i].iov_len; 1293 unsigned long len = iov.iov_len;
1300 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1294 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1301 unsigned long start = uaddr >> PAGE_SHIFT; 1295 unsigned long start = uaddr >> PAGE_SHIFT;
1302 1296
@@ -1326,16 +1320,17 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
1326 if (!pages) 1320 if (!pages)
1327 goto out; 1321 goto out;
1328 1322
1329 for (i = 0; i < iov_count; i++) { 1323 iov_for_each(iov, i, *iter) {
1330 unsigned long uaddr = (unsigned long)iov[i].iov_base; 1324 unsigned long uaddr = (unsigned long) iov.iov_base;
1331 unsigned long len = iov[i].iov_len; 1325 unsigned long len = iov.iov_len;
1332 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1326 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1333 unsigned long start = uaddr >> PAGE_SHIFT; 1327 unsigned long start = uaddr >> PAGE_SHIFT;
1334 const int local_nr_pages = end - start; 1328 const int local_nr_pages = end - start;
1335 const int page_limit = cur_page + local_nr_pages; 1329 const int page_limit = cur_page + local_nr_pages;
1336 1330
1337 ret = get_user_pages_fast(uaddr, local_nr_pages, 1331 ret = get_user_pages_fast(uaddr, local_nr_pages,
1338 write_to_vm, &pages[cur_page]); 1332 (iter->type & WRITE) != WRITE,
1333 &pages[cur_page]);
1339 if (ret < local_nr_pages) { 1334 if (ret < local_nr_pages) {
1340 ret = -EFAULT; 1335 ret = -EFAULT;
1341 goto out_unmap; 1336 goto out_unmap;
@@ -1375,72 +1370,10 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
1375 /* 1370 /*
1376 * set data direction, and check if mapped pages need bouncing 1371 * set data direction, and check if mapped pages need bouncing
1377 */ 1372 */
1378 if (!write_to_vm) 1373 if (iter->type & WRITE)
1379 bio->bi_rw |= REQ_WRITE; 1374 bio->bi_rw |= REQ_WRITE;
1380 1375
1381 bio->bi_bdev = bdev;
1382 bio->bi_flags |= (1 << BIO_USER_MAPPED); 1376 bio->bi_flags |= (1 << BIO_USER_MAPPED);
1383 return bio;
1384
1385 out_unmap:
1386 for (i = 0; i < nr_pages; i++) {
1387 if(!pages[i])
1388 break;
1389 page_cache_release(pages[i]);
1390 }
1391 out:
1392 kfree(pages);
1393 bio_put(bio);
1394 return ERR_PTR(ret);
1395}
1396
1397/**
1398 * bio_map_user - map user address into bio
1399 * @q: the struct request_queue for the bio
1400 * @bdev: destination block device
1401 * @uaddr: start of user address
1402 * @len: length in bytes
1403 * @write_to_vm: bool indicating writing to pages or not
1404 * @gfp_mask: memory allocation flags
1405 *
1406 * Map the user space address into a bio suitable for io to a block
1407 * device. Returns an error pointer in case of error.
1408 */
1409struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
1410 unsigned long uaddr, unsigned int len, int write_to_vm,
1411 gfp_t gfp_mask)
1412{
1413 struct sg_iovec iov;
1414
1415 iov.iov_base = (void __user *)uaddr;
1416 iov.iov_len = len;
1417
1418 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
1419}
1420EXPORT_SYMBOL(bio_map_user);
1421
1422/**
1423 * bio_map_user_iov - map user sg_iovec table into bio
1424 * @q: the struct request_queue for the bio
1425 * @bdev: destination block device
1426 * @iov: the iovec.
1427 * @iov_count: number of elements in the iovec
1428 * @write_to_vm: bool indicating writing to pages or not
1429 * @gfp_mask: memory allocation flags
1430 *
1431 * Map the user space address into a bio suitable for io to a block
1432 * device. Returns an error pointer in case of error.
1433 */
1434struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
1435 const struct sg_iovec *iov, int iov_count,
1436 int write_to_vm, gfp_t gfp_mask)
1437{
1438 struct bio *bio;
1439
1440 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
1441 gfp_mask);
1442 if (IS_ERR(bio))
1443 return bio;
1444 1377
1445 /* 1378 /*
1446 * subtle -- if __bio_map_user() ended up bouncing a bio, 1379 * subtle -- if __bio_map_user() ended up bouncing a bio,
@@ -1449,8 +1382,18 @@ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
1449 * reference to it 1382 * reference to it
1450 */ 1383 */
1451 bio_get(bio); 1384 bio_get(bio);
1452
1453 return bio; 1385 return bio;
1386
1387 out_unmap:
1388 for (j = 0; j < nr_pages; j++) {
1389 if (!pages[j])
1390 break;
1391 page_cache_release(pages[j]);
1392 }
1393 out:
1394 kfree(pages);
1395 bio_put(bio);
1396 return ERR_PTR(ret);
1454} 1397}
1455 1398
1456static void __bio_unmap_user(struct bio *bio) 1399static void __bio_unmap_user(struct bio *bio)
@@ -1492,8 +1435,18 @@ static void bio_map_kern_endio(struct bio *bio, int err)
1492 bio_put(bio); 1435 bio_put(bio);
1493} 1436}
1494 1437
1495static struct bio *__bio_map_kern(struct request_queue *q, void *data, 1438/**
1496 unsigned int len, gfp_t gfp_mask) 1439 * bio_map_kern - map kernel address into bio
1440 * @q: the struct request_queue for the bio
1441 * @data: pointer to buffer to map
1442 * @len: length in bytes
1443 * @gfp_mask: allocation flags for bio allocation
1444 *
1445 * Map the kernel address into a bio suitable for io to a block
1446 * device. Returns an error pointer in case of error.
1447 */
1448struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
1449 gfp_t gfp_mask)
1497{ 1450{
1498 unsigned long kaddr = (unsigned long)data; 1451 unsigned long kaddr = (unsigned long)data;
1499 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1452 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -1517,8 +1470,11 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
1517 bytes = len; 1470 bytes = len;
1518 1471
1519 if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, 1472 if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
1520 offset) < bytes) 1473 offset) < bytes) {
1521 break; 1474 /* we don't support partial mappings */
1475 bio_put(bio);
1476 return ERR_PTR(-EINVAL);
1477 }
1522 1478
1523 data += bytes; 1479 data += bytes;
1524 len -= bytes; 1480 len -= bytes;
@@ -1528,57 +1484,26 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
1528 bio->bi_end_io = bio_map_kern_endio; 1484 bio->bi_end_io = bio_map_kern_endio;
1529 return bio; 1485 return bio;
1530} 1486}
1487EXPORT_SYMBOL(bio_map_kern);
1531 1488
1532/** 1489static void bio_copy_kern_endio(struct bio *bio, int err)
1533 * bio_map_kern - map kernel address into bio
1534 * @q: the struct request_queue for the bio
1535 * @data: pointer to buffer to map
1536 * @len: length in bytes
1537 * @gfp_mask: allocation flags for bio allocation
1538 *
1539 * Map the kernel address into a bio suitable for io to a block
1540 * device. Returns an error pointer in case of error.
1541 */
1542struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
1543 gfp_t gfp_mask)
1544{ 1490{
1545 struct bio *bio; 1491 bio_free_pages(bio);
1546
1547 bio = __bio_map_kern(q, data, len, gfp_mask);
1548 if (IS_ERR(bio))
1549 return bio;
1550
1551 if (bio->bi_iter.bi_size == len)
1552 return bio;
1553
1554 /*
1555 * Don't support partial mappings.
1556 */
1557 bio_put(bio); 1492 bio_put(bio);
1558 return ERR_PTR(-EINVAL);
1559} 1493}
1560EXPORT_SYMBOL(bio_map_kern);
1561 1494
1562static void bio_copy_kern_endio(struct bio *bio, int err) 1495static void bio_copy_kern_endio_read(struct bio *bio, int err)
1563{ 1496{
1497 char *p = bio->bi_private;
1564 struct bio_vec *bvec; 1498 struct bio_vec *bvec;
1565 const int read = bio_data_dir(bio) == READ;
1566 struct bio_map_data *bmd = bio->bi_private;
1567 int i; 1499 int i;
1568 char *p = bmd->sgvecs[0].iov_base;
1569 1500
1570 bio_for_each_segment_all(bvec, bio, i) { 1501 bio_for_each_segment_all(bvec, bio, i) {
1571 char *addr = page_address(bvec->bv_page); 1502 memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
1572
1573 if (read)
1574 memcpy(p, addr, bvec->bv_len);
1575
1576 __free_page(bvec->bv_page);
1577 p += bvec->bv_len; 1503 p += bvec->bv_len;
1578 } 1504 }
1579 1505
1580 kfree(bmd); 1506 bio_copy_kern_endio(bio, err);
1581 bio_put(bio);
1582} 1507}
1583 1508
1584/** 1509/**
@@ -1595,28 +1520,59 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
1595struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, 1520struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1596 gfp_t gfp_mask, int reading) 1521 gfp_t gfp_mask, int reading)
1597{ 1522{
1523 unsigned long kaddr = (unsigned long)data;
1524 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1525 unsigned long start = kaddr >> PAGE_SHIFT;
1598 struct bio *bio; 1526 struct bio *bio;
1599 struct bio_vec *bvec; 1527 void *p = data;
1600 int i; 1528 int nr_pages = 0;
1529
1530 /*
1531 * Overflow, abort
1532 */
1533 if (end < start)
1534 return ERR_PTR(-EINVAL);
1601 1535
1602 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); 1536 nr_pages = end - start;
1603 if (IS_ERR(bio)) 1537 bio = bio_kmalloc(gfp_mask, nr_pages);
1604 return bio; 1538 if (!bio)
1539 return ERR_PTR(-ENOMEM);
1605 1540
1606 if (!reading) { 1541 while (len) {
1607 void *p = data; 1542 struct page *page;
1543 unsigned int bytes = PAGE_SIZE;
1608 1544
1609 bio_for_each_segment_all(bvec, bio, i) { 1545 if (bytes > len)
1610 char *addr = page_address(bvec->bv_page); 1546 bytes = len;
1611 1547
1612 memcpy(addr, p, bvec->bv_len); 1548 page = alloc_page(q->bounce_gfp | gfp_mask);
1613 p += bvec->bv_len; 1549 if (!page)
1614 } 1550 goto cleanup;
1551
1552 if (!reading)
1553 memcpy(page_address(page), p, bytes);
1554
1555 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
1556 break;
1557
1558 len -= bytes;
1559 p += bytes;
1615 } 1560 }
1616 1561
1617 bio->bi_end_io = bio_copy_kern_endio; 1562 if (reading) {
1563 bio->bi_end_io = bio_copy_kern_endio_read;
1564 bio->bi_private = data;
1565 } else {
1566 bio->bi_end_io = bio_copy_kern_endio;
1567 bio->bi_rw |= REQ_WRITE;
1568 }
1618 1569
1619 return bio; 1570 return bio;
1571
1572cleanup:
1573 bio_free_pages(bio);
1574 bio_put(bio);
1575 return ERR_PTR(-ENOMEM);
1620} 1576}
1621EXPORT_SYMBOL(bio_copy_kern); 1577EXPORT_SYMBOL(bio_copy_kern);
1622 1578
diff --git a/block/blk-core.c b/block/blk-core.c
index 928aac29bccd..794c3e7f01cf 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2048,6 +2048,13 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2048 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) 2048 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2049 return -EIO; 2049 return -EIO;
2050 2050
2051 if (q->mq_ops) {
2052 if (blk_queue_io_stat(q))
2053 blk_account_io_start(rq, true);
2054 blk_mq_insert_request(rq, false, true, true);
2055 return 0;
2056 }
2057
2051 spin_lock_irqsave(q->queue_lock, flags); 2058 spin_lock_irqsave(q->queue_lock, flags);
2052 if (unlikely(blk_queue_dying(q))) { 2059 if (unlikely(blk_queue_dying(q))) {
2053 spin_unlock_irqrestore(q->queue_lock, flags); 2060 spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2907,7 +2914,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2907static void __blk_rq_prep_clone(struct request *dst, struct request *src) 2914static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2908{ 2915{
2909 dst->cpu = src->cpu; 2916 dst->cpu = src->cpu;
2910 dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; 2917 dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
2911 dst->cmd_type = src->cmd_type; 2918 dst->cmd_type = src->cmd_type;
2912 dst->__sector = blk_rq_pos(src); 2919 dst->__sector = blk_rq_pos(src);
2913 dst->__data_len = blk_rq_bytes(src); 2920 dst->__data_len = blk_rq_bytes(src);
@@ -2945,8 +2952,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2945 if (!bs) 2952 if (!bs)
2946 bs = fs_bio_set; 2953 bs = fs_bio_set;
2947 2954
2948 blk_rq_init(NULL, rq);
2949
2950 __rq_for_each_bio(bio_src, rq_src) { 2955 __rq_for_each_bio(bio_src, rq_src) {
2951 bio = bio_clone_fast(bio_src, gfp_mask, bs); 2956 bio = bio_clone_fast(bio_src, gfp_mask, bs);
2952 if (!bio) 2957 if (!bio)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 8411be3c19d3..7688ee3f5d72 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -283,24 +283,34 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
283 * @sector: start sector 283 * @sector: start sector
284 * @nr_sects: number of sectors to write 284 * @nr_sects: number of sectors to write
285 * @gfp_mask: memory allocation flags (for bio_alloc) 285 * @gfp_mask: memory allocation flags (for bio_alloc)
286 * @discard: whether to discard the block range
286 * 287 *
287 * Description: 288 * Description:
288 * Generate and issue number of bios with zerofiled pages. 289 * Zero-fill a block range. If the discard flag is set and the block
290 * device guarantees that subsequent READ operations to the block range
291 * in question will return zeroes, the blocks will be discarded. Should
292 * the discard request fail, if the discard flag is not set, or if
293 * discard_zeroes_data is not supported, this function will resort to
294 * zeroing the blocks manually, thus provisioning (allocating,
295 * anchoring) them. If the block device supports the WRITE SAME command
296 * blkdev_issue_zeroout() will use it to optimize the process of
297 * clearing the block range. Otherwise the zeroing will be performed
298 * using regular WRITE calls.
289 */ 299 */
290 300
291int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 301int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
292 sector_t nr_sects, gfp_t gfp_mask) 302 sector_t nr_sects, gfp_t gfp_mask, bool discard)
293{ 303{
294 if (bdev_write_same(bdev)) { 304 struct request_queue *q = bdev_get_queue(bdev);
295 unsigned char bdn[BDEVNAME_SIZE];
296 305
297 if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, 306 if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data &&
298 ZERO_PAGE(0))) 307 blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0)
299 return 0; 308 return 0;
300 309
301 bdevname(bdev, bdn); 310 if (bdev_write_same(bdev) &&
302 pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn); 311 blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
303 } 312 ZERO_PAGE(0)) == 0)
313 return 0;
304 314
305 return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); 315 return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
306} 316}
diff --git a/block/blk-map.c b/block/blk-map.c
index f890d4345b0c..b8d2725324a6 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -5,7 +5,7 @@
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/bio.h> 6#include <linux/bio.h>
7#include <linux/blkdev.h> 7#include <linux/blkdev.h>
8#include <scsi/sg.h> /* for struct sg_iovec */ 8#include <linux/uio.h>
9 9
10#include "blk.h" 10#include "blk.h"
11 11
@@ -39,138 +39,12 @@ static int __blk_rq_unmap_user(struct bio *bio)
39 return ret; 39 return ret;
40} 40}
41 41
42static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
43 struct rq_map_data *map_data, void __user *ubuf,
44 unsigned int len, gfp_t gfp_mask)
45{
46 unsigned long uaddr;
47 struct bio *bio, *orig_bio;
48 int reading, ret;
49
50 reading = rq_data_dir(rq) == READ;
51
52 /*
53 * if alignment requirement is satisfied, map in user pages for
54 * direct dma. else, set up kernel bounce buffers
55 */
56 uaddr = (unsigned long) ubuf;
57 if (blk_rq_aligned(q, uaddr, len) && !map_data)
58 bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
59 else
60 bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
61
62 if (IS_ERR(bio))
63 return PTR_ERR(bio);
64
65 if (map_data && map_data->null_mapped)
66 bio->bi_flags |= (1 << BIO_NULL_MAPPED);
67
68 orig_bio = bio;
69 blk_queue_bounce(q, &bio);
70
71 /*
72 * We link the bounce buffer in and could have to traverse it
73 * later so we have to get a ref to prevent it from being freed
74 */
75 bio_get(bio);
76
77 ret = blk_rq_append_bio(q, rq, bio);
78 if (!ret)
79 return bio->bi_iter.bi_size;
80
81 /* if it was boucned we must call the end io function */
82 bio_endio(bio, 0);
83 __blk_rq_unmap_user(orig_bio);
84 bio_put(bio);
85 return ret;
86}
87
88/**
89 * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
90 * @q: request queue where request should be inserted
91 * @rq: request structure to fill
92 * @map_data: pointer to the rq_map_data holding pages (if necessary)
93 * @ubuf: the user buffer
94 * @len: length of user data
95 * @gfp_mask: memory allocation flags
96 *
97 * Description:
98 * Data will be mapped directly for zero copy I/O, if possible. Otherwise
99 * a kernel bounce buffer is used.
100 *
101 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
102 * still in process context.
103 *
104 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
105 * before being submitted to the device, as pages mapped may be out of
106 * reach. It's the callers responsibility to make sure this happens. The
107 * original bio must be passed back in to blk_rq_unmap_user() for proper
108 * unmapping.
109 */
110int blk_rq_map_user(struct request_queue *q, struct request *rq,
111 struct rq_map_data *map_data, void __user *ubuf,
112 unsigned long len, gfp_t gfp_mask)
113{
114 unsigned long bytes_read = 0;
115 struct bio *bio = NULL;
116 int ret;
117
118 if (len > (queue_max_hw_sectors(q) << 9))
119 return -EINVAL;
120 if (!len)
121 return -EINVAL;
122
123 if (!ubuf && (!map_data || !map_data->null_mapped))
124 return -EINVAL;
125
126 while (bytes_read != len) {
127 unsigned long map_len, end, start;
128
129 map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE);
130 end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1)
131 >> PAGE_SHIFT;
132 start = (unsigned long)ubuf >> PAGE_SHIFT;
133
134 /*
135 * A bad offset could cause us to require BIO_MAX_PAGES + 1
136 * pages. If this happens we just lower the requested
137 * mapping len by a page so that we can fit
138 */
139 if (end - start > BIO_MAX_PAGES)
140 map_len -= PAGE_SIZE;
141
142 ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
143 gfp_mask);
144 if (ret < 0)
145 goto unmap_rq;
146 if (!bio)
147 bio = rq->bio;
148 bytes_read += ret;
149 ubuf += ret;
150
151 if (map_data)
152 map_data->offset += ret;
153 }
154
155 if (!bio_flagged(bio, BIO_USER_MAPPED))
156 rq->cmd_flags |= REQ_COPY_USER;
157
158 return 0;
159unmap_rq:
160 blk_rq_unmap_user(bio);
161 rq->bio = NULL;
162 return ret;
163}
164EXPORT_SYMBOL(blk_rq_map_user);
165
166/** 42/**
167 * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage 43 * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
168 * @q: request queue where request should be inserted 44 * @q: request queue where request should be inserted
169 * @rq: request to map data to 45 * @rq: request to map data to
170 * @map_data: pointer to the rq_map_data holding pages (if necessary) 46 * @map_data: pointer to the rq_map_data holding pages (if necessary)
171 * @iov: pointer to the iovec 47 * @iter: iovec iterator
172 * @iov_count: number of elements in the iovec
173 * @len: I/O byte count
174 * @gfp_mask: memory allocation flags 48 * @gfp_mask: memory allocation flags
175 * 49 *
176 * Description: 50 * Description:
@@ -187,20 +61,21 @@ EXPORT_SYMBOL(blk_rq_map_user);
187 * unmapping. 61 * unmapping.
188 */ 62 */
189int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 63int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
190 struct rq_map_data *map_data, const struct sg_iovec *iov, 64 struct rq_map_data *map_data,
191 int iov_count, unsigned int len, gfp_t gfp_mask) 65 const struct iov_iter *iter, gfp_t gfp_mask)
192{ 66{
193 struct bio *bio; 67 struct bio *bio;
194 int i, read = rq_data_dir(rq) == READ;
195 int unaligned = 0; 68 int unaligned = 0;
69 struct iov_iter i;
70 struct iovec iov;
196 71
197 if (!iov || iov_count <= 0) 72 if (!iter || !iter->count)
198 return -EINVAL; 73 return -EINVAL;
199 74
200 for (i = 0; i < iov_count; i++) { 75 iov_for_each(iov, i, *iter) {
201 unsigned long uaddr = (unsigned long)iov[i].iov_base; 76 unsigned long uaddr = (unsigned long) iov.iov_base;
202 77
203 if (!iov[i].iov_len) 78 if (!iov.iov_len)
204 return -EINVAL; 79 return -EINVAL;
205 80
206 /* 81 /*
@@ -210,16 +85,18 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
210 unaligned = 1; 85 unaligned = 1;
211 } 86 }
212 87
213 if (unaligned || (q->dma_pad_mask & len) || map_data) 88 if (unaligned || (q->dma_pad_mask & iter->count) || map_data)
214 bio = bio_copy_user_iov(q, map_data, iov, iov_count, read, 89 bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
215 gfp_mask);
216 else 90 else
217 bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); 91 bio = bio_map_user_iov(q, iter, gfp_mask);
218 92
219 if (IS_ERR(bio)) 93 if (IS_ERR(bio))
220 return PTR_ERR(bio); 94 return PTR_ERR(bio);
221 95
222 if (bio->bi_iter.bi_size != len) { 96 if (map_data && map_data->null_mapped)
97 bio->bi_flags |= (1 << BIO_NULL_MAPPED);
98
99 if (bio->bi_iter.bi_size != iter->count) {
223 /* 100 /*
224 * Grab an extra reference to this bio, as bio_unmap_user() 101 * Grab an extra reference to this bio, as bio_unmap_user()
225 * expects to be able to drop it twice as it happens on the 102 * expects to be able to drop it twice as it happens on the
@@ -241,6 +118,21 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
241} 118}
242EXPORT_SYMBOL(blk_rq_map_user_iov); 119EXPORT_SYMBOL(blk_rq_map_user_iov);
243 120
121int blk_rq_map_user(struct request_queue *q, struct request *rq,
122 struct rq_map_data *map_data, void __user *ubuf,
123 unsigned long len, gfp_t gfp_mask)
124{
125 struct iovec iov;
126 struct iov_iter i;
127
128 iov.iov_base = ubuf;
129 iov.iov_len = len;
130 iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len);
131
132 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
133}
134EXPORT_SYMBOL(blk_rq_map_user);
135
244/** 136/**
245 * blk_rq_unmap_user - unmap a request with user data 137 * blk_rq_unmap_user - unmap a request with user data
246 * @bio: start of bio list 138 * @bio: start of bio list
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 89b97b5e0881..fc1ff3b1ea1f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -283,35 +283,6 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
283} 283}
284EXPORT_SYMBOL(blk_rq_map_sg); 284EXPORT_SYMBOL(blk_rq_map_sg);
285 285
286/**
287 * blk_bio_map_sg - map a bio to a scatterlist
288 * @q: request_queue in question
289 * @bio: bio being mapped
290 * @sglist: scatterlist being mapped
291 *
292 * Note:
293 * Caller must make sure sg can hold bio->bi_phys_segments entries
294 *
295 * Will return the number of sg entries setup
296 */
297int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
298 struct scatterlist *sglist)
299{
300 struct scatterlist *sg = NULL;
301 int nsegs;
302 struct bio *next = bio->bi_next;
303 bio->bi_next = NULL;
304
305 nsegs = __blk_bios_map_sg(q, bio, sglist, &sg);
306 bio->bi_next = next;
307 if (sg)
308 sg_mark_end(sg);
309
310 BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments);
311 return nsegs;
312}
313EXPORT_SYMBOL(blk_bio_map_sg);
314
315static inline int ll_new_hw_segment(struct request_queue *q, 286static inline int ll_new_hw_segment(struct request_queue *q,
316 struct request *req, 287 struct request *req,
317 struct bio *bio) 288 struct bio *bio)
@@ -385,6 +356,14 @@ static bool req_no_special_merge(struct request *req)
385 return !q->mq_ops && req->special; 356 return !q->mq_ops && req->special;
386} 357}
387 358
359static int req_gap_to_prev(struct request *req, struct request *next)
360{
361 struct bio *prev = req->biotail;
362
363 return bvec_gap_to_prev(&prev->bi_io_vec[prev->bi_vcnt - 1],
364 next->bio->bi_io_vec[0].bv_offset);
365}
366
388static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 367static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
389 struct request *next) 368 struct request *next)
390{ 369{
@@ -399,6 +378,10 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
399 if (req_no_special_merge(req) || req_no_special_merge(next)) 378 if (req_no_special_merge(req) || req_no_special_merge(next))
400 return 0; 379 return 0;
401 380
381 if (test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags) &&
382 req_gap_to_prev(req, next))
383 return 0;
384
402 /* 385 /*
403 * Will it become too large? 386 * Will it become too large?
404 */ 387 */
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 60c9d4a93fe4..d53a764b05ea 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -140,35 +140,39 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
140 return atomic_read(&hctx->nr_active) < depth; 140 return atomic_read(&hctx->nr_active) < depth;
141} 141}
142 142
143static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) 143static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag,
144 bool nowrap)
144{ 145{
145 int tag, org_last_tag, end; 146 int tag, org_last_tag = last_tag;
146 bool wrap = last_tag != 0;
147 147
148 org_last_tag = last_tag; 148 while (1) {
149 end = bm->depth; 149 tag = find_next_zero_bit(&bm->word, bm->depth, last_tag);
150 do { 150 if (unlikely(tag >= bm->depth)) {
151restart:
152 tag = find_next_zero_bit(&bm->word, end, last_tag);
153 if (unlikely(tag >= end)) {
154 /* 151 /*
155 * We started with an offset, start from 0 to 152 * We started with an offset, and we didn't reset the
153 * offset to 0 in a failure case, so start from 0 to
156 * exhaust the map. 154 * exhaust the map.
157 */ 155 */
158 if (wrap) { 156 if (org_last_tag && last_tag && !nowrap) {
159 wrap = false; 157 last_tag = org_last_tag = 0;
160 end = org_last_tag; 158 continue;
161 last_tag = 0;
162 goto restart;
163 } 159 }
164 return -1; 160 return -1;
165 } 161 }
162
163 if (!test_and_set_bit(tag, &bm->word))
164 break;
165
166 last_tag = tag + 1; 166 last_tag = tag + 1;
167 } while (test_and_set_bit(tag, &bm->word)); 167 if (last_tag >= bm->depth - 1)
168 last_tag = 0;
169 }
168 170
169 return tag; 171 return tag;
170} 172}
171 173
174#define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR)
175
172/* 176/*
173 * Straight forward bitmap tag implementation, where each bit is a tag 177 * Straight forward bitmap tag implementation, where each bit is a tag
174 * (cleared == free, and set == busy). The small twist is using per-cpu 178 * (cleared == free, and set == busy). The small twist is using per-cpu
@@ -181,7 +185,7 @@ restart:
181 * until the map is exhausted. 185 * until the map is exhausted.
182 */ 186 */
183static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, 187static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
184 unsigned int *tag_cache) 188 unsigned int *tag_cache, struct blk_mq_tags *tags)
185{ 189{
186 unsigned int last_tag, org_last_tag; 190 unsigned int last_tag, org_last_tag;
187 int index, i, tag; 191 int index, i, tag;
@@ -193,15 +197,24 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
193 index = TAG_TO_INDEX(bt, last_tag); 197 index = TAG_TO_INDEX(bt, last_tag);
194 198
195 for (i = 0; i < bt->map_nr; i++) { 199 for (i = 0; i < bt->map_nr; i++) {
196 tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag)); 200 tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag),
201 BT_ALLOC_RR(tags));
197 if (tag != -1) { 202 if (tag != -1) {
198 tag += (index << bt->bits_per_word); 203 tag += (index << bt->bits_per_word);
199 goto done; 204 goto done;
200 } 205 }
201 206
202 last_tag = 0; 207 /*
203 if (++index >= bt->map_nr) 208 * Jump to next index, and reset the last tag to be the
209 * first tag of that index
210 */
211 index++;
212 last_tag = (index << bt->bits_per_word);
213
214 if (index >= bt->map_nr) {
204 index = 0; 215 index = 0;
216 last_tag = 0;
217 }
205 } 218 }
206 219
207 *tag_cache = 0; 220 *tag_cache = 0;
@@ -212,7 +225,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
212 * up using the specific cached tag. 225 * up using the specific cached tag.
213 */ 226 */
214done: 227done:
215 if (tag == org_last_tag) { 228 if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) {
216 last_tag = tag + 1; 229 last_tag = tag + 1;
217 if (last_tag >= bt->depth - 1) 230 if (last_tag >= bt->depth - 1)
218 last_tag = 0; 231 last_tag = 0;
@@ -241,13 +254,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
241static int bt_get(struct blk_mq_alloc_data *data, 254static int bt_get(struct blk_mq_alloc_data *data,
242 struct blk_mq_bitmap_tags *bt, 255 struct blk_mq_bitmap_tags *bt,
243 struct blk_mq_hw_ctx *hctx, 256 struct blk_mq_hw_ctx *hctx,
244 unsigned int *last_tag) 257 unsigned int *last_tag, struct blk_mq_tags *tags)
245{ 258{
246 struct bt_wait_state *bs; 259 struct bt_wait_state *bs;
247 DEFINE_WAIT(wait); 260 DEFINE_WAIT(wait);
248 int tag; 261 int tag;
249 262
250 tag = __bt_get(hctx, bt, last_tag); 263 tag = __bt_get(hctx, bt, last_tag, tags);
251 if (tag != -1) 264 if (tag != -1)
252 return tag; 265 return tag;
253 266
@@ -258,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
258 do { 271 do {
259 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); 272 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
260 273
261 tag = __bt_get(hctx, bt, last_tag); 274 tag = __bt_get(hctx, bt, last_tag, tags);
262 if (tag != -1) 275 if (tag != -1)
263 break; 276 break;
264 277
@@ -273,7 +286,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
273 * Retry tag allocation after running the hardware queue, 286 * Retry tag allocation after running the hardware queue,
274 * as running the queue may also have found completions. 287 * as running the queue may also have found completions.
275 */ 288 */
276 tag = __bt_get(hctx, bt, last_tag); 289 tag = __bt_get(hctx, bt, last_tag, tags);
277 if (tag != -1) 290 if (tag != -1)
278 break; 291 break;
279 292
@@ -304,7 +317,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
304 int tag; 317 int tag;
305 318
306 tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, 319 tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
307 &data->ctx->last_tag); 320 &data->ctx->last_tag, data->hctx->tags);
308 if (tag >= 0) 321 if (tag >= 0)
309 return tag + data->hctx->tags->nr_reserved_tags; 322 return tag + data->hctx->tags->nr_reserved_tags;
310 323
@@ -320,7 +333,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
320 return BLK_MQ_TAG_FAIL; 333 return BLK_MQ_TAG_FAIL;
321 } 334 }
322 335
323 tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero); 336 tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero,
337 data->hctx->tags);
324 if (tag < 0) 338 if (tag < 0)
325 return BLK_MQ_TAG_FAIL; 339 return BLK_MQ_TAG_FAIL;
326 340
@@ -392,7 +406,8 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
392 406
393 BUG_ON(real_tag >= tags->nr_tags); 407 BUG_ON(real_tag >= tags->nr_tags);
394 bt_clear_tag(&tags->bitmap_tags, real_tag); 408 bt_clear_tag(&tags->bitmap_tags, real_tag);
395 *last_tag = real_tag; 409 if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO))
410 *last_tag = real_tag;
396 } else { 411 } else {
397 BUG_ON(tag >= tags->nr_reserved_tags); 412 BUG_ON(tag >= tags->nr_reserved_tags);
398 bt_clear_tag(&tags->breserved_tags, tag); 413 bt_clear_tag(&tags->breserved_tags, tag);
@@ -509,6 +524,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
509 bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); 524 bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
510 if (!bt->bs) { 525 if (!bt->bs) {
511 kfree(bt->map); 526 kfree(bt->map);
527 bt->map = NULL;
512 return -ENOMEM; 528 return -ENOMEM;
513 } 529 }
514 530
@@ -529,10 +545,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt)
529} 545}
530 546
531static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, 547static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
532 int node) 548 int node, int alloc_policy)
533{ 549{
534 unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; 550 unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
535 551
552 tags->alloc_policy = alloc_policy;
553
536 if (bt_alloc(&tags->bitmap_tags, depth, node, false)) 554 if (bt_alloc(&tags->bitmap_tags, depth, node, false))
537 goto enomem; 555 goto enomem;
538 if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true)) 556 if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
@@ -546,7 +564,8 @@ enomem:
546} 564}
547 565
548struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, 566struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
549 unsigned int reserved_tags, int node) 567 unsigned int reserved_tags,
568 int node, int alloc_policy)
550{ 569{
551 struct blk_mq_tags *tags; 570 struct blk_mq_tags *tags;
552 571
@@ -562,7 +581,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
562 tags->nr_tags = total_tags; 581 tags->nr_tags = total_tags;
563 tags->nr_reserved_tags = reserved_tags; 582 tags->nr_reserved_tags = reserved_tags;
564 583
565 return blk_mq_init_bitmap_tags(tags, node); 584 return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
566} 585}
567 586
568void blk_mq_free_tags(struct blk_mq_tags *tags) 587void blk_mq_free_tags(struct blk_mq_tags *tags)
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index a6fa0fc9d41a..90767b370308 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -42,10 +42,12 @@ struct blk_mq_tags {
42 42
43 struct request **rqs; 43 struct request **rqs;
44 struct list_head page_list; 44 struct list_head page_list;
45
46 int alloc_policy;
45}; 47};
46 48
47 49
48extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); 50extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy);
49extern void blk_mq_free_tags(struct blk_mq_tags *tags); 51extern void blk_mq_free_tags(struct blk_mq_tags *tags);
50 52
51extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); 53extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2390c5541e71..4f4bea21052e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -33,6 +33,7 @@ static DEFINE_MUTEX(all_q_mutex);
33static LIST_HEAD(all_q_list); 33static LIST_HEAD(all_q_list);
34 34
35static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); 35static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
36static void blk_mq_run_queues(struct request_queue *q);
36 37
37/* 38/*
38 * Check if any of the ctx's have pending work in this hardware queue 39 * Check if any of the ctx's have pending work in this hardware queue
@@ -117,7 +118,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
117 118
118 if (freeze) { 119 if (freeze) {
119 percpu_ref_kill(&q->mq_usage_counter); 120 percpu_ref_kill(&q->mq_usage_counter);
120 blk_mq_run_queues(q, false); 121 blk_mq_run_queues(q);
121 } 122 }
122} 123}
123EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); 124EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
@@ -136,6 +137,7 @@ void blk_mq_freeze_queue(struct request_queue *q)
136 blk_mq_freeze_queue_start(q); 137 blk_mq_freeze_queue_start(q);
137 blk_mq_freeze_queue_wait(q); 138 blk_mq_freeze_queue_wait(q);
138} 139}
140EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
139 141
140void blk_mq_unfreeze_queue(struct request_queue *q) 142void blk_mq_unfreeze_queue(struct request_queue *q)
141{ 143{
@@ -902,7 +904,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
902 &hctx->run_work, 0); 904 &hctx->run_work, 0);
903} 905}
904 906
905void blk_mq_run_queues(struct request_queue *q, bool async) 907static void blk_mq_run_queues(struct request_queue *q)
906{ 908{
907 struct blk_mq_hw_ctx *hctx; 909 struct blk_mq_hw_ctx *hctx;
908 int i; 910 int i;
@@ -913,10 +915,9 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
913 test_bit(BLK_MQ_S_STOPPED, &hctx->state)) 915 test_bit(BLK_MQ_S_STOPPED, &hctx->state))
914 continue; 916 continue;
915 917
916 blk_mq_run_hw_queue(hctx, async); 918 blk_mq_run_hw_queue(hctx, false);
917 } 919 }
918} 920}
919EXPORT_SYMBOL(blk_mq_run_queues);
920 921
921void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) 922void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
922{ 923{
@@ -954,7 +955,6 @@ void blk_mq_start_hw_queues(struct request_queue *q)
954} 955}
955EXPORT_SYMBOL(blk_mq_start_hw_queues); 956EXPORT_SYMBOL(blk_mq_start_hw_queues);
956 957
957
958void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) 958void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
959{ 959{
960 struct blk_mq_hw_ctx *hctx; 960 struct blk_mq_hw_ctx *hctx;
@@ -1423,7 +1423,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1423 size_t rq_size, left; 1423 size_t rq_size, left;
1424 1424
1425 tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, 1425 tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
1426 set->numa_node); 1426 set->numa_node,
1427 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
1427 if (!tags) 1428 if (!tags)
1428 return NULL; 1429 return NULL;
1429 1430
diff --git a/block/blk-tag.c b/block/blk-tag.c
index a185b86741e5..f0344e6939d5 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -119,7 +119,7 @@ fail:
119} 119}
120 120
121static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, 121static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
122 int depth) 122 int depth, int alloc_policy)
123{ 123{
124 struct blk_queue_tag *tags; 124 struct blk_queue_tag *tags;
125 125
@@ -131,6 +131,8 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
131 goto fail; 131 goto fail;
132 132
133 atomic_set(&tags->refcnt, 1); 133 atomic_set(&tags->refcnt, 1);
134 tags->alloc_policy = alloc_policy;
135 tags->next_tag = 0;
134 return tags; 136 return tags;
135fail: 137fail:
136 kfree(tags); 138 kfree(tags);
@@ -140,10 +142,11 @@ fail:
140/** 142/**
141 * blk_init_tags - initialize the tag info for an external tag map 143 * blk_init_tags - initialize the tag info for an external tag map
142 * @depth: the maximum queue depth supported 144 * @depth: the maximum queue depth supported
145 * @alloc_policy: tag allocation policy
143 **/ 146 **/
144struct blk_queue_tag *blk_init_tags(int depth) 147struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy)
145{ 148{
146 return __blk_queue_init_tags(NULL, depth); 149 return __blk_queue_init_tags(NULL, depth, alloc_policy);
147} 150}
148EXPORT_SYMBOL(blk_init_tags); 151EXPORT_SYMBOL(blk_init_tags);
149 152
@@ -152,19 +155,20 @@ EXPORT_SYMBOL(blk_init_tags);
152 * @q: the request queue for the device 155 * @q: the request queue for the device
153 * @depth: the maximum queue depth supported 156 * @depth: the maximum queue depth supported
154 * @tags: the tag to use 157 * @tags: the tag to use
158 * @alloc_policy: tag allocation policy
155 * 159 *
156 * Queue lock must be held here if the function is called to resize an 160 * Queue lock must be held here if the function is called to resize an
157 * existing map. 161 * existing map.
158 **/ 162 **/
159int blk_queue_init_tags(struct request_queue *q, int depth, 163int blk_queue_init_tags(struct request_queue *q, int depth,
160 struct blk_queue_tag *tags) 164 struct blk_queue_tag *tags, int alloc_policy)
161{ 165{
162 int rc; 166 int rc;
163 167
164 BUG_ON(tags && q->queue_tags && tags != q->queue_tags); 168 BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
165 169
166 if (!tags && !q->queue_tags) { 170 if (!tags && !q->queue_tags) {
167 tags = __blk_queue_init_tags(q, depth); 171 tags = __blk_queue_init_tags(q, depth, alloc_policy);
168 172
169 if (!tags) 173 if (!tags)
170 return -ENOMEM; 174 return -ENOMEM;
@@ -344,9 +348,21 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
344 } 348 }
345 349
346 do { 350 do {
347 tag = find_first_zero_bit(bqt->tag_map, max_depth); 351 if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) {
348 if (tag >= max_depth) 352 tag = find_first_zero_bit(bqt->tag_map, max_depth);
349 return 1; 353 if (tag >= max_depth)
354 return 1;
355 } else {
356 int start = bqt->next_tag;
357 int size = min_t(int, bqt->max_depth, max_depth + start);
358 tag = find_next_zero_bit(bqt->tag_map, size, start);
359 if (tag >= size && start + size > bqt->max_depth) {
360 size = start + size - bqt->max_depth;
361 tag = find_first_zero_bit(bqt->tag_map, size);
362 }
363 if (tag >= size)
364 return 1;
365 }
350 366
351 } while (test_and_set_bit_lock(tag, bqt->tag_map)); 367 } while (test_and_set_bit_lock(tag, bqt->tag_map));
352 /* 368 /*
@@ -354,6 +370,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
354 * See blk_queue_end_tag for details. 370 * See blk_queue_end_tag for details.
355 */ 371 */
356 372
373 bqt->next_tag = (tag + 1) % bqt->max_depth;
357 rq->cmd_flags |= REQ_QUEUED; 374 rq->cmd_flags |= REQ_QUEUED;
358 rq->tag = tag; 375 rq->tag = tag;
359 bqt->tag_index[tag] = rq; 376 bqt->tag_index[tag] = rq;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6f2751d305de..5da8e6e9ab4b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3590,6 +3590,11 @@ retry:
3590 3590
3591 blkcg = bio_blkcg(bio); 3591 blkcg = bio_blkcg(bio);
3592 cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); 3592 cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
3593 if (!cfqg) {
3594 cfqq = &cfqd->oom_cfqq;
3595 goto out;
3596 }
3597
3593 cfqq = cic_to_cfqq(cic, is_sync); 3598 cfqq = cic_to_cfqq(cic, is_sync);
3594 3599
3595 /* 3600 /*
@@ -3626,7 +3631,7 @@ retry:
3626 } else 3631 } else
3627 cfqq = &cfqd->oom_cfqq; 3632 cfqq = &cfqd->oom_cfqq;
3628 } 3633 }
3629 3634out:
3630 if (new_cfqq) 3635 if (new_cfqq)
3631 kmem_cache_free(cfq_pool, new_cfqq); 3636 kmem_cache_free(cfq_pool, new_cfqq);
3632 3637
@@ -3656,12 +3661,17 @@ static struct cfq_queue *
3656cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, 3661cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
3657 struct bio *bio, gfp_t gfp_mask) 3662 struct bio *bio, gfp_t gfp_mask)
3658{ 3663{
3659 const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); 3664 int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
3660 const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio); 3665 int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
3661 struct cfq_queue **async_cfqq = NULL; 3666 struct cfq_queue **async_cfqq = NULL;
3662 struct cfq_queue *cfqq = NULL; 3667 struct cfq_queue *cfqq = NULL;
3663 3668
3664 if (!is_sync) { 3669 if (!is_sync) {
3670 if (!ioprio_valid(cic->ioprio)) {
3671 struct task_struct *tsk = current;
3672 ioprio = task_nice_ioprio(tsk);
3673 ioprio_class = task_nice_ioclass(tsk);
3674 }
3665 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); 3675 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
3666 cfqq = *async_cfqq; 3676 cfqq = *async_cfqq;
3667 } 3677 }
diff --git a/block/ioctl.c b/block/ioctl.c
index 6c7bf903742f..7d8befde2aca 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -198,7 +198,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
198 if (start + len > (i_size_read(bdev->bd_inode) >> 9)) 198 if (start + len > (i_size_read(bdev->bd_inode) >> 9))
199 return -EINVAL; 199 return -EINVAL;
200 200
201 return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL); 201 return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL, false);
202} 202}
203 203
204static int put_ushort(unsigned long arg, unsigned short val) 204static int put_ushort(unsigned long arg, unsigned short val)
diff --git a/block/partitions/check.c b/block/partitions/check.c
index 9ac1df74f699..16118d11dbfc 100644
--- a/block/partitions/check.c
+++ b/block/partitions/check.c
@@ -184,12 +184,12 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
184 if (err) 184 if (err)
185 /* The partition is unrecognized. So report I/O errors if there were any */ 185 /* The partition is unrecognized. So report I/O errors if there were any */
186 res = err; 186 res = err;
187 if (!res) 187 if (res) {
188 strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE); 188 if (warn_no_part)
189 else if (warn_no_part) 189 strlcat(state->pp_buf,
190 strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE); 190 " unable to read partition table\n", PAGE_SIZE);
191 191 printk(KERN_INFO "%s", state->pp_buf);
192 printk(KERN_INFO "%s", state->pp_buf); 192 }
193 193
194 free_page((unsigned long)state->pp_buf); 194 free_page((unsigned long)state->pp_buf);
195 free_partitions(state); 195 free_partitions(state);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 28163fad3c5d..e1f71c396193 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -332,7 +332,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
332 332
333 ret = 0; 333 ret = 0;
334 if (hdr->iovec_count) { 334 if (hdr->iovec_count) {
335 size_t iov_data_len; 335 struct iov_iter i;
336 struct iovec *iov = NULL; 336 struct iovec *iov = NULL;
337 337
338 ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count, 338 ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
@@ -342,20 +342,11 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
342 goto out_free_cdb; 342 goto out_free_cdb;
343 } 343 }
344 344
345 iov_data_len = ret;
346 ret = 0;
347
348 /* SG_IO howto says that the shorter of the two wins */ 345 /* SG_IO howto says that the shorter of the two wins */
349 if (hdr->dxfer_len < iov_data_len) { 346 iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count,
350 hdr->iovec_count = iov_shorten(iov, 347 min_t(unsigned, ret, hdr->dxfer_len));
351 hdr->iovec_count,
352 hdr->dxfer_len);
353 iov_data_len = hdr->dxfer_len;
354 }
355 348
356 ret = blk_rq_map_user_iov(q, rq, NULL, (struct sg_iovec *) iov, 349 ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL);
357 hdr->iovec_count,
358 iov_data_len, GFP_KERNEL);
359 kfree(iov); 350 kfree(iov);
360 } else if (hdr->dxfer_len) 351 } else if (hdr->dxfer_len)
361 ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, 352 ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 3598110d2cef..89e90ec52f28 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -370,25 +370,25 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
370} 370}
371 371
372#ifdef CONFIG_BLK_DEV_XIP 372#ifdef CONFIG_BLK_DEV_XIP
373static int brd_direct_access(struct block_device *bdev, sector_t sector, 373static long brd_direct_access(struct block_device *bdev, sector_t sector,
374 void **kaddr, unsigned long *pfn) 374 void **kaddr, unsigned long *pfn, long size)
375{ 375{
376 struct brd_device *brd = bdev->bd_disk->private_data; 376 struct brd_device *brd = bdev->bd_disk->private_data;
377 struct page *page; 377 struct page *page;
378 378
379 if (!brd) 379 if (!brd)
380 return -ENODEV; 380 return -ENODEV;
381 if (sector & (PAGE_SECTORS-1))
382 return -EINVAL;
383 if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk))
384 return -ERANGE;
385 page = brd_insert_page(brd, sector); 381 page = brd_insert_page(brd, sector);
386 if (!page) 382 if (!page)
387 return -ENOSPC; 383 return -ENOSPC;
388 *kaddr = page_address(page); 384 *kaddr = page_address(page);
389 *pfn = page_to_pfn(page); 385 *pfn = page_to_pfn(page);
390 386
391 return 0; 387 /*
388 * TODO: If size > PAGE_SIZE, we could look to see if the next page in
389 * the file happens to be mapped to the next page of physical RAM.
390 */
391 return PAGE_SIZE;
392} 392}
393#endif 393#endif
394 394
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index d169b4a79267..cee20354ac37 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1388,7 +1388,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
1388 list_add_tail(&peer_req->w.list, &device->active_ee); 1388 list_add_tail(&peer_req->w.list, &device->active_ee);
1389 spin_unlock_irq(&device->resource->req_lock); 1389 spin_unlock_irq(&device->resource->req_lock);
1390 if (blkdev_issue_zeroout(device->ldev->backing_bdev, 1390 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1391 sector, data_size >> 9, GFP_NOIO)) 1391 sector, data_size >> 9, GFP_NOIO, false))
1392 peer_req->flags |= EE_WAS_ERROR; 1392 peer_req->flags |= EE_WAS_ERROR;
1393 drbd_endio_write_sec_final(peer_req); 1393 drbd_endio_write_sec_final(peer_req);
1394 return 0; 1394 return 0;
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 79aa179305b5..e22942596207 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -423,7 +423,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
423 } 423 }
424 424
425 /* switch queue to TCQ mode; allocate tag map */ 425 /* switch queue to TCQ mode; allocate tag map */
426 rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL); 426 rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO);
427 if (rc) { 427 if (rc) {
428 blk_cleanup_queue(q); 428 blk_cleanup_queue(q);
429 put_disk(disk); 429 put_disk(disk);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 2caf5b374649..68c1b535c52e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1722,6 +1722,7 @@ static int setup_clone(struct request *clone, struct request *rq,
1722{ 1722{
1723 int r; 1723 int r;
1724 1724
1725 blk_rq_init(NULL, clone);
1725 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, 1726 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1726 dm_rq_bio_constructor, tio); 1727 dm_rq_bio_constructor, tio);
1727 if (r) 1728 if (r)
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 7f900229404d..96128cb009f3 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -28,8 +28,8 @@
28static int dcssblk_open(struct block_device *bdev, fmode_t mode); 28static int dcssblk_open(struct block_device *bdev, fmode_t mode);
29static void dcssblk_release(struct gendisk *disk, fmode_t mode); 29static void dcssblk_release(struct gendisk *disk, fmode_t mode);
30static void dcssblk_make_request(struct request_queue *q, struct bio *bio); 30static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
31static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum, 31static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
32 void **kaddr, unsigned long *pfn); 32 void **kaddr, unsigned long *pfn, long size);
33 33
34static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; 34static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
35 35
@@ -877,25 +877,22 @@ fail:
877 bio_io_error(bio); 877 bio_io_error(bio);
878} 878}
879 879
880static int 880static long
881dcssblk_direct_access (struct block_device *bdev, sector_t secnum, 881dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
882 void **kaddr, unsigned long *pfn) 882 void **kaddr, unsigned long *pfn, long size)
883{ 883{
884 struct dcssblk_dev_info *dev_info; 884 struct dcssblk_dev_info *dev_info;
885 unsigned long pgoff; 885 unsigned long offset, dev_sz;
886 886
887 dev_info = bdev->bd_disk->private_data; 887 dev_info = bdev->bd_disk->private_data;
888 if (!dev_info) 888 if (!dev_info)
889 return -ENODEV; 889 return -ENODEV;
890 if (secnum % (PAGE_SIZE/512)) 890 dev_sz = dev_info->end - dev_info->start;
891 return -EINVAL; 891 offset = secnum * 512;
892 pgoff = secnum / (PAGE_SIZE / 512); 892 *kaddr = (void *) (dev_info->start + offset);
893 if ((pgoff+1)*PAGE_SIZE-1 > dev_info->end - dev_info->start)
894 return -ERANGE;
895 *kaddr = (void *) (dev_info->start+pgoff*PAGE_SIZE);
896 *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT; 893 *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
897 894
898 return 0; 895 return dev_sz - offset;
899} 896}
900 897
901static void 898static void
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 17bb541f7cc2..54d7a6cbb98a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2197,6 +2197,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
2197 shost->tag_set.cmd_size = cmd_size; 2197 shost->tag_set.cmd_size = cmd_size;
2198 shost->tag_set.numa_node = NUMA_NO_NODE; 2198 shost->tag_set.numa_node = NUMA_NO_NODE;
2199 shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; 2199 shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
2200 shost->tag_set.flags |=
2201 BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
2200 shost->tag_set.driver_data = shost; 2202 shost->tag_set.driver_data = shost;
2201 2203
2202 return blk_mq_alloc_tag_set(&shost->tag_set); 2204 return blk_mq_alloc_tag_set(&shost->tag_set);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 0deb385ad4d6..9c0a520d933c 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -277,7 +277,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
277 if (!shost_use_blk_mq(sdev->host) && 277 if (!shost_use_blk_mq(sdev->host) &&
278 (shost->bqt || shost->hostt->use_blk_tags)) { 278 (shost->bqt || shost->hostt->use_blk_tags)) {
279 blk_queue_init_tags(sdev->request_queue, 279 blk_queue_init_tags(sdev->request_queue,
280 sdev->host->cmd_per_lun, shost->bqt); 280 sdev->host->cmd_per_lun, shost->bqt,
281 shost->hostt->tag_alloc_policy);
281 } 282 }
282 scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun); 283 scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun);
283 284
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index a668c88ea150..0cbc1fb45f10 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1719,22 +1719,19 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
1719 } 1719 }
1720 1720
1721 if (iov_count) { 1721 if (iov_count) {
1722 int len, size = sizeof(struct sg_iovec) * iov_count; 1722 int size = sizeof(struct iovec) * iov_count;
1723 struct iovec *iov; 1723 struct iovec *iov;
1724 struct iov_iter i;
1724 1725
1725 iov = memdup_user(hp->dxferp, size); 1726 iov = memdup_user(hp->dxferp, size);
1726 if (IS_ERR(iov)) 1727 if (IS_ERR(iov))
1727 return PTR_ERR(iov); 1728 return PTR_ERR(iov);
1728 1729
1729 len = iov_length(iov, iov_count); 1730 iov_iter_init(&i, rw, iov, iov_count,
1730 if (hp->dxfer_len < len) { 1731 min_t(size_t, hp->dxfer_len,
1731 iov_count = iov_shorten(iov, iov_count, hp->dxfer_len); 1732 iov_length(iov, iov_count)));
1732 len = hp->dxfer_len;
1733 }
1734 1733
1735 res = blk_rq_map_user_iov(q, rq, md, (struct sg_iovec *)iov, 1734 res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC);
1736 iov_count,
1737 len, GFP_ATOMIC);
1738 kfree(iov); 1735 kfree(iov);
1739 } else 1736 } else
1740 res = blk_rq_map_user(q, rq, md, hp->dxferp, 1737 res = blk_rq_map_user(q, rq, md, hp->dxferp,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index a9f92794d7a0..975266be67d3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -421,6 +421,46 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
421} 421}
422EXPORT_SYMBOL_GPL(bdev_write_page); 422EXPORT_SYMBOL_GPL(bdev_write_page);
423 423
424/**
425 * bdev_direct_access() - Get the address for directly-accessibly memory
426 * @bdev: The device containing the memory
427 * @sector: The offset within the device
428 * @addr: Where to put the address of the memory
429 * @pfn: The Page Frame Number for the memory
430 * @size: The number of bytes requested
431 *
432 * If a block device is made up of directly addressable memory, this function
433 * will tell the caller the PFN and the address of the memory. The address
434 * may be directly dereferenced within the kernel without the need to call
435 * ioremap(), kmap() or similar. The PFN is suitable for inserting into
436 * page tables.
437 *
438 * Return: negative errno if an error occurs, otherwise the number of bytes
439 * accessible at this address.
440 */
441long bdev_direct_access(struct block_device *bdev, sector_t sector,
442 void **addr, unsigned long *pfn, long size)
443{
444 long avail;
445 const struct block_device_operations *ops = bdev->bd_disk->fops;
446
447 if (size < 0)
448 return size;
449 if (!ops->direct_access)
450 return -EOPNOTSUPP;
451 if ((sector + DIV_ROUND_UP(size, 512)) >
452 part_nr_sects_read(bdev->bd_part))
453 return -ERANGE;
454 sector += get_start_sect(bdev);
455 if (sector % (PAGE_SIZE / 512))
456 return -EINVAL;
457 avail = ops->direct_access(bdev, sector, addr, pfn, size);
458 if (!avail)
459 return -ERANGE;
460 return min(avail, size);
461}
462EXPORT_SYMBOL_GPL(bdev_direct_access);
463
424/* 464/*
425 * pseudo-fs 465 * pseudo-fs
426 */ 466 */
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index e98171a11cfe..bbc5fec6ff7f 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -13,18 +13,12 @@
13#include "ext2.h" 13#include "ext2.h"
14#include "xip.h" 14#include "xip.h"
15 15
16static inline int 16static inline long __inode_direct_access(struct inode *inode, sector_t block,
17__inode_direct_access(struct inode *inode, sector_t block, 17 void **kaddr, unsigned long *pfn, long size)
18 void **kaddr, unsigned long *pfn)
19{ 18{
20 struct block_device *bdev = inode->i_sb->s_bdev; 19 struct block_device *bdev = inode->i_sb->s_bdev;
21 const struct block_device_operations *ops = bdev->bd_disk->fops; 20 sector_t sector = block * (PAGE_SIZE / 512);
22 sector_t sector; 21 return bdev_direct_access(bdev, sector, kaddr, pfn, size);
23
24 sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */
25
26 BUG_ON(!ops->direct_access);
27 return ops->direct_access(bdev, sector, kaddr, pfn);
28} 22}
29 23
30static inline int 24static inline int
@@ -53,12 +47,13 @@ ext2_clear_xip_target(struct inode *inode, sector_t block)
53{ 47{
54 void *kaddr; 48 void *kaddr;
55 unsigned long pfn; 49 unsigned long pfn;
56 int rc; 50 long size;
57 51
58 rc = __inode_direct_access(inode, block, &kaddr, &pfn); 52 size = __inode_direct_access(inode, block, &kaddr, &pfn, PAGE_SIZE);
59 if (!rc) 53 if (size < 0)
60 clear_page(kaddr); 54 return size;
61 return rc; 55 clear_page(kaddr);
56 return 0;
62} 57}
63 58
64void ext2_xip_verify_sb(struct super_block *sb) 59void ext2_xip_verify_sb(struct super_block *sb)
@@ -77,7 +72,7 @@ void ext2_xip_verify_sb(struct super_block *sb)
77int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create, 72int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create,
78 void **kmem, unsigned long *pfn) 73 void **kmem, unsigned long *pfn)
79{ 74{
80 int rc; 75 long rc;
81 sector_t block; 76 sector_t block;
82 77
83 /* first, retrieve the sector number */ 78 /* first, retrieve the sector number */
@@ -86,6 +81,6 @@ int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create,
86 return rc; 81 return rc;
87 82
88 /* retrieve address of the target data */ 83 /* retrieve address of the target data */
89 rc = __inode_direct_access(mapping->host, block, kmem, pfn); 84 rc = __inode_direct_access(mapping->host, block, kmem, pfn, PAGE_SIZE);
90 return rc; 85 return (rc < 0) ? rc : 0;
91} 86}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index efead0b532c4..da3a127c9958 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -428,13 +428,9 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
428extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, 428extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
429 unsigned int, unsigned int); 429 unsigned int, unsigned int);
430extern int bio_get_nr_vecs(struct block_device *); 430extern int bio_get_nr_vecs(struct block_device *);
431extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
432 unsigned long, unsigned int, int, gfp_t);
433struct sg_iovec;
434struct rq_map_data; 431struct rq_map_data;
435extern struct bio *bio_map_user_iov(struct request_queue *, 432extern struct bio *bio_map_user_iov(struct request_queue *,
436 struct block_device *, 433 const struct iov_iter *, gfp_t);
437 const struct sg_iovec *, int, int, gfp_t);
438extern void bio_unmap_user(struct bio *); 434extern void bio_unmap_user(struct bio *);
439extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, 435extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
440 gfp_t); 436 gfp_t);
@@ -462,12 +458,10 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
462extern void bio_copy_data(struct bio *dst, struct bio *src); 458extern void bio_copy_data(struct bio *dst, struct bio *src);
463extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); 459extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
464 460
465extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
466 unsigned long, unsigned int, int, gfp_t);
467extern struct bio *bio_copy_user_iov(struct request_queue *, 461extern struct bio *bio_copy_user_iov(struct request_queue *,
468 struct rq_map_data *, 462 struct rq_map_data *,
469 const struct sg_iovec *, 463 const struct iov_iter *,
470 int, int, gfp_t); 464 gfp_t);
471extern int bio_uncopy_user(struct bio *); 465extern int bio_uncopy_user(struct bio *);
472void zero_fill_bio(struct bio *bio); 466void zero_fill_bio(struct bio *bio);
473extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); 467extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 5735e7130d63..7aec86127335 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -146,6 +146,8 @@ enum {
146 BLK_MQ_F_SG_MERGE = 1 << 2, 146 BLK_MQ_F_SG_MERGE = 1 << 2,
147 BLK_MQ_F_SYSFS_UP = 1 << 3, 147 BLK_MQ_F_SYSFS_UP = 1 << 3,
148 BLK_MQ_F_DEFER_ISSUE = 1 << 4, 148 BLK_MQ_F_DEFER_ISSUE = 1 << 4,
149 BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
150 BLK_MQ_F_ALLOC_POLICY_BITS = 1,
149 151
150 BLK_MQ_S_STOPPED = 0, 152 BLK_MQ_S_STOPPED = 0,
151 BLK_MQ_S_TAG_ACTIVE = 1, 153 BLK_MQ_S_TAG_ACTIVE = 1,
@@ -154,6 +156,12 @@ enum {
154 156
155 BLK_MQ_CPU_WORK_BATCH = 8, 157 BLK_MQ_CPU_WORK_BATCH = 8,
156}; 158};
159#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
160 ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
161 ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
162#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
163 ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
164 << BLK_MQ_F_ALLOC_POLICY_START_BIT)
157 165
158struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); 166struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
159void blk_mq_finish_init(struct request_queue *q); 167void blk_mq_finish_init(struct request_queue *q);
@@ -166,7 +174,6 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
166void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); 174void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
167 175
168void blk_mq_insert_request(struct request *, bool, bool, bool); 176void blk_mq_insert_request(struct request *, bool, bool, bool);
169void blk_mq_run_queues(struct request_queue *q, bool async);
170void blk_mq_free_request(struct request *rq); 177void blk_mq_free_request(struct request *rq);
171void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq); 178void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
172bool blk_mq_can_queue(struct blk_mq_hw_ctx *); 179bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
@@ -214,6 +221,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
214void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); 221void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
215void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, 222void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
216 void *priv); 223 void *priv);
224void blk_mq_freeze_queue(struct request_queue *q);
217void blk_mq_unfreeze_queue(struct request_queue *q); 225void blk_mq_unfreeze_queue(struct request_queue *q);
218void blk_mq_freeze_queue_start(struct request_queue *q); 226void blk_mq_freeze_queue_start(struct request_queue *q);
219 227
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 92f4b4b288dd..7f9a516f24de 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -272,7 +272,11 @@ struct blk_queue_tag {
272 int max_depth; /* what we will send to device */ 272 int max_depth; /* what we will send to device */
273 int real_max_depth; /* what the array can hold */ 273 int real_max_depth; /* what the array can hold */
274 atomic_t refcnt; /* map can be shared */ 274 atomic_t refcnt; /* map can be shared */
275 int alloc_policy; /* tag allocation policy */
276 int next_tag; /* next tag */
275}; 277};
278#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
279#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
276 280
277#define BLK_SCSI_MAX_CMDS (256) 281#define BLK_SCSI_MAX_CMDS (256)
278#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) 282#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
@@ -516,6 +520,7 @@ struct request_queue {
516 (1 << QUEUE_FLAG_ADD_RANDOM)) 520 (1 << QUEUE_FLAG_ADD_RANDOM))
517 521
518#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 522#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
523 (1 << QUEUE_FLAG_STACKABLE) | \
519 (1 << QUEUE_FLAG_SAME_COMP)) 524 (1 << QUEUE_FLAG_SAME_COMP))
520 525
521static inline void queue_lockdep_assert_held(struct request_queue *q) 526static inline void queue_lockdep_assert_held(struct request_queue *q)
@@ -850,8 +855,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *,
850extern int blk_rq_unmap_user(struct bio *); 855extern int blk_rq_unmap_user(struct bio *);
851extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); 856extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
852extern int blk_rq_map_user_iov(struct request_queue *, struct request *, 857extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
853 struct rq_map_data *, const struct sg_iovec *, 858 struct rq_map_data *, const struct iov_iter *,
854 int, unsigned int, gfp_t); 859 gfp_t);
855extern int blk_execute_rq(struct request_queue *, struct gendisk *, 860extern int blk_execute_rq(struct request_queue *, struct gendisk *,
856 struct request *, int); 861 struct request *, int);
857extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 862extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
@@ -1044,8 +1049,6 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
1044extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 1049extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
1045 1050
1046extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 1051extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
1047extern int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
1048 struct scatterlist *sglist);
1049extern void blk_dump_rq_flags(struct request *, char *); 1052extern void blk_dump_rq_flags(struct request *, char *);
1050extern long nr_blockdev_pages(void); 1053extern long nr_blockdev_pages(void);
1051 1054
@@ -1139,11 +1142,11 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
1139extern int blk_queue_start_tag(struct request_queue *, struct request *); 1142extern int blk_queue_start_tag(struct request_queue *, struct request *);
1140extern struct request *blk_queue_find_tag(struct request_queue *, int); 1143extern struct request *blk_queue_find_tag(struct request_queue *, int);
1141extern void blk_queue_end_tag(struct request_queue *, struct request *); 1144extern void blk_queue_end_tag(struct request_queue *, struct request *);
1142extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); 1145extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
1143extern void blk_queue_free_tags(struct request_queue *); 1146extern void blk_queue_free_tags(struct request_queue *);
1144extern int blk_queue_resize_tags(struct request_queue *, int); 1147extern int blk_queue_resize_tags(struct request_queue *, int);
1145extern void blk_queue_invalidate_tags(struct request_queue *); 1148extern void blk_queue_invalidate_tags(struct request_queue *);
1146extern struct blk_queue_tag *blk_init_tags(int); 1149extern struct blk_queue_tag *blk_init_tags(int, int);
1147extern void blk_free_tags(struct blk_queue_tag *); 1150extern void blk_free_tags(struct blk_queue_tag *);
1148 1151
1149static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, 1152static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
@@ -1162,7 +1165,7 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
1162extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, 1165extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
1163 sector_t nr_sects, gfp_t gfp_mask, struct page *page); 1166 sector_t nr_sects, gfp_t gfp_mask, struct page *page);
1164extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 1167extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
1165 sector_t nr_sects, gfp_t gfp_mask); 1168 sector_t nr_sects, gfp_t gfp_mask, bool discard);
1166static inline int sb_issue_discard(struct super_block *sb, sector_t block, 1169static inline int sb_issue_discard(struct super_block *sb, sector_t block,
1167 sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) 1170 sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
1168{ 1171{
@@ -1176,7 +1179,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
1176 return blkdev_issue_zeroout(sb->s_bdev, 1179 return blkdev_issue_zeroout(sb->s_bdev,
1177 block << (sb->s_blocksize_bits - 9), 1180 block << (sb->s_blocksize_bits - 9),
1178 nr_blocks << (sb->s_blocksize_bits - 9), 1181 nr_blocks << (sb->s_blocksize_bits - 9),
1179 gfp_mask); 1182 gfp_mask, true);
1180} 1183}
1181 1184
1182extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); 1185extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
@@ -1601,8 +1604,8 @@ struct block_device_operations {
1601 int (*rw_page)(struct block_device *, sector_t, struct page *, int rw); 1604 int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
1602 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1605 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1603 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1606 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1604 int (*direct_access) (struct block_device *, sector_t, 1607 long (*direct_access)(struct block_device *, sector_t,
1605 void **, unsigned long *); 1608 void **, unsigned long *pfn, long size);
1606 unsigned int (*check_events) (struct gendisk *disk, 1609 unsigned int (*check_events) (struct gendisk *disk,
1607 unsigned int clearing); 1610 unsigned int clearing);
1608 /* ->media_changed() is DEPRECATED, use ->check_events() instead */ 1611 /* ->media_changed() is DEPRECATED, use ->check_events() instead */
@@ -1620,6 +1623,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
1620extern int bdev_read_page(struct block_device *, sector_t, struct page *); 1623extern int bdev_read_page(struct block_device *, sector_t, struct page *);
1621extern int bdev_write_page(struct block_device *, sector_t, struct page *, 1624extern int bdev_write_page(struct block_device *, sector_t, struct page *,
1622 struct writeback_control *); 1625 struct writeback_control *);
1626extern long bdev_direct_access(struct block_device *, sector_t, void **addr,
1627 unsigned long *pfn, long size);
1623#else /* CONFIG_BLOCK */ 1628#else /* CONFIG_BLOCK */
1624 1629
1625struct block_device; 1630struct block_device;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 019e66858ce6..e113c757d555 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -402,6 +402,9 @@ struct scsi_host_template {
402 */ 402 */
403 unsigned char present; 403 unsigned char present;
404 404
405 /* If use block layer to manage tags, this is tag allocation policy */
406 int tag_alloc_policy;
407
405 /* 408 /*
406 * Let the block layer assigns tags to all commands. 409 * Let the block layer assigns tags to all commands.
407 */ 410 */
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index 9708b28bd2aa..b27977e8aaed 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -66,7 +66,8 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
66 * devices on the shared host (for libata) 66 * devices on the shared host (for libata)
67 */ 67 */
68 if (!shost->bqt) { 68 if (!shost->bqt) {
69 shost->bqt = blk_init_tags(depth); 69 shost->bqt = blk_init_tags(depth,
70 shost->hostt->tag_alloc_policy);
70 if (!shost->bqt) 71 if (!shost->bqt)
71 return -ENOMEM; 72 return -ENOMEM;
72 } 73 }