aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-block8
-rw-r--r--Documentation/block/data-integrity.txt54
-rw-r--r--block/Kconfig1
-rw-r--r--block/Makefile4
-rw-r--r--block/bio-integrity.c271
-rw-r--r--block/bio.c61
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-cgroup.h3
-rw-r--r--block/blk-core.c67
-rw-r--r--block/blk-flush.c147
-rw-r--r--block/blk-integrity.c103
-rw-r--r--block/blk-merge.c14
-rw-r--r--block/blk-mq-tag.c55
-rw-r--r--block/blk-mq.c364
-rw-r--r--block/blk-mq.h3
-rw-r--r--block/blk-settings.c4
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--block/blk-timeout.c15
-rw-r--r--block/blk.h37
-rw-r--r--block/bsg.c9
-rw-r--r--block/cfq-iosched.c10
-rw-r--r--block/compat_ioctl.c4
-rw-r--r--block/ioctl.c4
-rw-r--r--block/partitions/mac.c8
-rw-r--r--block/scsi_ioctl.c11
-rw-r--r--block/t10-pi.c197
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c9
-rw-r--r--drivers/block/null_blk.c7
-rw-r--r--drivers/block/paride/pd.c2
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/sx8.c2
-rw-r--r--drivers/block/virtio_blk.c8
-rw-r--r--drivers/cdrom/cdrom.c4
-rw-r--r--drivers/ide/ide-park.c2
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--drivers/scsi/device_handler/scsi_dh_alua.c2
-rw-r--r--drivers/scsi/device_handler/scsi_dh_emc.c2
-rw-r--r--drivers/scsi/device_handler/scsi_dh_hp_sw.c4
-rw-r--r--drivers/scsi/device_handler/scsi_dh_rdac.c2
-rw-r--r--drivers/scsi/osd/osd_initiator.c4
-rw-r--r--drivers/scsi/osst.c2
-rw-r--r--drivers/scsi/scsi_error.c2
-rw-r--r--drivers/scsi/scsi_lib.c24
-rw-r--r--drivers/scsi/sd.c73
-rw-r--r--drivers/scsi/sd.h66
-rw-r--r--drivers/scsi/sd_dif.c353
-rw-r--r--drivers/scsi/sg.c4
-rw-r--r--drivers/scsi/st.c2
-rw-r--r--drivers/target/target_core_pscsi.c2
-rw-r--r--fs/block_dev.c34
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/nfs/direct.c12
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--include/linux/backing-dev.h4
-rw-r--r--include/linux/bio.h65
-rw-r--r--include/linux/blk-mq.h22
-rw-r--r--include/linux/blk_types.h18
-rw-r--r--include/linux/blkdev.h71
-rw-r--r--include/linux/crc-t10dif.h5
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/nfs_fs.h4
-rw-r--r--include/linux/t10-pi.h22
-rw-r--r--include/scsi/scsi_cmnd.h36
-rw-r--r--mm/backing-dev.c40
65 files changed, 1210 insertions, 1172 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 279da08f7541..8df003963d99 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -53,6 +53,14 @@ Description:
53 512 bytes of data. 53 512 bytes of data.
54 54
55 55
56What: /sys/block/<disk>/integrity/device_is_integrity_capable
57Date: July 2014
58Contact: Martin K. Petersen <martin.petersen@oracle.com>
59Description:
60 Indicates whether a storage device is capable of storing
61 integrity metadata. Set if the device is T10 PI-capable.
62
63
56What: /sys/block/<disk>/integrity/write_generate 64What: /sys/block/<disk>/integrity/write_generate
57Date: June 2008 65Date: June 2008
58Contact: Martin K. Petersen <martin.petersen@oracle.com> 66Contact: Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index 2d735b0ae383..f56ec97f0d14 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -129,11 +129,11 @@ interface for this is being worked on.
1294.1 BIO 1294.1 BIO
130 130
131The data integrity patches add a new field to struct bio when 131The data integrity patches add a new field to struct bio when
132CONFIG_BLK_DEV_INTEGRITY is enabled. bio->bi_integrity is a pointer 132CONFIG_BLK_DEV_INTEGRITY is enabled. bio_integrity(bio) returns a
133to a struct bip which contains the bio integrity payload. Essentially 133pointer to a struct bip which contains the bio integrity payload.
134a bip is a trimmed down struct bio which holds a bio_vec containing 134Essentially a bip is a trimmed down struct bio which holds a bio_vec
135the integrity metadata and the required housekeeping information (bvec 135containing the integrity metadata and the required housekeeping
136pool, vector count, etc.) 136information (bvec pool, vector count, etc.)
137 137
138A kernel subsystem can enable data integrity protection on a bio by 138A kernel subsystem can enable data integrity protection on a bio by
139calling bio_integrity_alloc(bio). This will allocate and attach the 139calling bio_integrity_alloc(bio). This will allocate and attach the
@@ -192,16 +192,6 @@ will require extra work due to the application tag.
192 supported by the block device. 192 supported by the block device.
193 193
194 194
195 int bdev_integrity_enabled(block_device, int rw);
196
197 bdev_integrity_enabled() will return 1 if the block device
198 supports integrity metadata transfer for the data direction
199 specified in 'rw'.
200
201 bdev_integrity_enabled() honors the write_generate and
202 read_verify flags in sysfs and will respond accordingly.
203
204
205 int bio_integrity_prep(bio); 195 int bio_integrity_prep(bio);
206 196
207 To generate IMD for WRITE and to set up buffers for READ, the 197 To generate IMD for WRITE and to set up buffers for READ, the
@@ -216,36 +206,6 @@ will require extra work due to the application tag.
216 bio_integrity_enabled() returned 1. 206 bio_integrity_enabled() returned 1.
217 207
218 208
219 int bio_integrity_tag_size(bio);
220
221 If the filesystem wants to use the application tag space it will
222 first have to find out how much storage space is available.
223 Because tag space is generally limited (usually 2 bytes per
224 sector regardless of sector size), the integrity framework
225 supports interleaving the information between the sectors in an
226 I/O.
227
228 Filesystems can call bio_integrity_tag_size(bio) to find out how
229 many bytes of storage are available for that particular bio.
230
231 Another option is bdev_get_tag_size(block_device) which will
232 return the number of available bytes per hardware sector.
233
234
235 int bio_integrity_set_tag(bio, void *tag_buf, len);
236
237 After a successful return from bio_integrity_prep(),
238 bio_integrity_set_tag() can be used to attach an opaque tag
239 buffer to a bio. Obviously this only makes sense if the I/O is
240 a WRITE.
241
242
243 int bio_integrity_get_tag(bio, void *tag_buf, len);
244
245 Similarly, at READ I/O completion time the filesystem can
246 retrieve the tag buffer using bio_integrity_get_tag().
247
248
2495.3 PASSING EXISTING INTEGRITY METADATA 2095.3 PASSING EXISTING INTEGRITY METADATA
250 210
251 Filesystems that either generate their own integrity metadata or 211 Filesystems that either generate their own integrity metadata or
@@ -298,8 +258,6 @@ will require extra work due to the application tag.
298 .name = "STANDARDSBODY-TYPE-VARIANT-CSUM", 258 .name = "STANDARDSBODY-TYPE-VARIANT-CSUM",
299 .generate_fn = my_generate_fn, 259 .generate_fn = my_generate_fn,
300 .verify_fn = my_verify_fn, 260 .verify_fn = my_verify_fn,
301 .get_tag_fn = my_get_tag_fn,
302 .set_tag_fn = my_set_tag_fn,
303 .tuple_size = sizeof(struct my_tuple_size), 261 .tuple_size = sizeof(struct my_tuple_size),
304 .tag_size = <tag bytes per hw sector>, 262 .tag_size = <tag bytes per hw sector>,
305 }; 263 };
@@ -321,7 +279,5 @@ will require extra work due to the application tag.
321 are available per hardware sector. For DIF this is either 2 or 279 are available per hardware sector. For DIF this is either 2 or
322 0 depending on the value of the Control Mode Page ATO bit. 280 0 depending on the value of the Control Mode Page ATO bit.
323 281
324 See 6.2 for a description of get_tag_fn and set_tag_fn.
325
326---------------------------------------------------------------------- 282----------------------------------------------------------------------
3272007-12-24 Martin K. Petersen <martin.petersen@oracle.com> 2832007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/block/Kconfig b/block/Kconfig
index 2429515c05c2..161491d0a879 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -77,6 +77,7 @@ config BLK_DEV_BSGLIB
77 77
78config BLK_DEV_INTEGRITY 78config BLK_DEV_INTEGRITY
79 bool "Block layer data integrity support" 79 bool "Block layer data integrity support"
80 select CRC_T10DIF if BLK_DEV_INTEGRITY
80 ---help--- 81 ---help---
81 Some storage devices allow extra information to be 82 Some storage devices allow extra information to be
82 stored/retrieved to help protect the data. The block layer 83 stored/retrieved to help protect the data. The block layer
diff --git a/block/Makefile b/block/Makefile
index a2ce6ac935ec..00ecc97629db 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -20,6 +20,6 @@ obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
20obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o 20obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
21 21
22obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o 22obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
23obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
24obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o 23obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
25obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o 24obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
25
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index f14b4abbebd8..0984232e429f 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -79,6 +79,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
79 bip->bip_slab = idx; 79 bip->bip_slab = idx;
80 bip->bip_bio = bio; 80 bip->bip_bio = bio;
81 bio->bi_integrity = bip; 81 bio->bi_integrity = bip;
82 bio->bi_rw |= REQ_INTEGRITY;
82 83
83 return bip; 84 return bip;
84err: 85err:
@@ -96,11 +97,12 @@ EXPORT_SYMBOL(bio_integrity_alloc);
96 */ 97 */
97void bio_integrity_free(struct bio *bio) 98void bio_integrity_free(struct bio *bio)
98{ 99{
99 struct bio_integrity_payload *bip = bio->bi_integrity; 100 struct bio_integrity_payload *bip = bio_integrity(bio);
100 struct bio_set *bs = bio->bi_pool; 101 struct bio_set *bs = bio->bi_pool;
101 102
102 if (bip->bip_owns_buf) 103 if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
103 kfree(bip->bip_buf); 104 kfree(page_address(bip->bip_vec->bv_page) +
105 bip->bip_vec->bv_offset);
104 106
105 if (bs) { 107 if (bs) {
106 if (bip->bip_slab != BIO_POOL_NONE) 108 if (bip->bip_slab != BIO_POOL_NONE)
@@ -128,7 +130,7 @@ EXPORT_SYMBOL(bio_integrity_free);
128int bio_integrity_add_page(struct bio *bio, struct page *page, 130int bio_integrity_add_page(struct bio *bio, struct page *page,
129 unsigned int len, unsigned int offset) 131 unsigned int len, unsigned int offset)
130{ 132{
131 struct bio_integrity_payload *bip = bio->bi_integrity; 133 struct bio_integrity_payload *bip = bio_integrity(bio);
132 struct bio_vec *iv; 134 struct bio_vec *iv;
133 135
134 if (bip->bip_vcnt >= bip->bip_max_vcnt) { 136 if (bip->bip_vcnt >= bip->bip_max_vcnt) {
@@ -147,24 +149,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
147} 149}
148EXPORT_SYMBOL(bio_integrity_add_page); 150EXPORT_SYMBOL(bio_integrity_add_page);
149 151
150static int bdev_integrity_enabled(struct block_device *bdev, int rw)
151{
152 struct blk_integrity *bi = bdev_get_integrity(bdev);
153
154 if (bi == NULL)
155 return 0;
156
157 if (rw == READ && bi->verify_fn != NULL &&
158 (bi->flags & INTEGRITY_FLAG_READ))
159 return 1;
160
161 if (rw == WRITE && bi->generate_fn != NULL &&
162 (bi->flags & INTEGRITY_FLAG_WRITE))
163 return 1;
164
165 return 0;
166}
167
168/** 152/**
169 * bio_integrity_enabled - Check whether integrity can be passed 153 * bio_integrity_enabled - Check whether integrity can be passed
170 * @bio: bio to check 154 * @bio: bio to check
@@ -174,171 +158,86 @@ static int bdev_integrity_enabled(struct block_device *bdev, int rw)
174 * set prior to calling. The functions honors the write_generate and 158 * set prior to calling. The functions honors the write_generate and
175 * read_verify flags in sysfs. 159 * read_verify flags in sysfs.
176 */ 160 */
177int bio_integrity_enabled(struct bio *bio) 161bool bio_integrity_enabled(struct bio *bio)
178{ 162{
163 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
164
179 if (!bio_is_rw(bio)) 165 if (!bio_is_rw(bio))
180 return 0; 166 return false;
181 167
182 /* Already protected? */ 168 /* Already protected? */
183 if (bio_integrity(bio)) 169 if (bio_integrity(bio))
184 return 0; 170 return false;
171
172 if (bi == NULL)
173 return false;
174
175 if (bio_data_dir(bio) == READ && bi->verify_fn != NULL &&
176 (bi->flags & BLK_INTEGRITY_VERIFY))
177 return true;
178
179 if (bio_data_dir(bio) == WRITE && bi->generate_fn != NULL &&
180 (bi->flags & BLK_INTEGRITY_GENERATE))
181 return true;
185 182
186 return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio)); 183 return false;
187} 184}
188EXPORT_SYMBOL(bio_integrity_enabled); 185EXPORT_SYMBOL(bio_integrity_enabled);
189 186
190/** 187/**
191 * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto 188 * bio_integrity_intervals - Return number of integrity intervals for a bio
192 * @bi: blk_integrity profile for device 189 * @bi: blk_integrity profile for device
193 * @sectors: Number of 512 sectors to convert 190 * @sectors: Size of the bio in 512-byte sectors
194 * 191 *
195 * Description: The block layer calculates everything in 512 byte 192 * Description: The block layer calculates everything in 512 byte
196 * sectors but integrity metadata is done in terms of the hardware 193 * sectors but integrity metadata is done in terms of the data integrity
197 * sector size of the storage device. Convert the block layer sectors 194 * interval size of the storage device. Convert the block layer sectors
198 * to physical sectors. 195 * to the appropriate number of integrity intervals.
199 */ 196 */
200static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi, 197static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
201 unsigned int sectors) 198 unsigned int sectors)
202{ 199{
203 /* At this point there are only 512b or 4096b DIF/EPP devices */ 200 return sectors >> (ilog2(bi->interval) - 9);
204 if (bi->sector_size == 4096)
205 return sectors >>= 3;
206
207 return sectors;
208} 201}
209 202
210static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, 203static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
211 unsigned int sectors) 204 unsigned int sectors)
212{ 205{
213 return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size; 206 return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
214} 207}
215 208
216/** 209/**
217 * bio_integrity_tag_size - Retrieve integrity tag space 210 * bio_integrity_process - Process integrity metadata for a bio
218 * @bio: bio to inspect
219 *
220 * Description: Returns the maximum number of tag bytes that can be
221 * attached to this bio. Filesystems can use this to determine how
222 * much metadata to attach to an I/O.
223 */
224unsigned int bio_integrity_tag_size(struct bio *bio)
225{
226 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
227
228 BUG_ON(bio->bi_iter.bi_size == 0);
229
230 return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size);
231}
232EXPORT_SYMBOL(bio_integrity_tag_size);
233
234static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
235 int set)
236{
237 struct bio_integrity_payload *bip = bio->bi_integrity;
238 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
239 unsigned int nr_sectors;
240
241 BUG_ON(bip->bip_buf == NULL);
242
243 if (bi->tag_size == 0)
244 return -1;
245
246 nr_sectors = bio_integrity_hw_sectors(bi,
247 DIV_ROUND_UP(len, bi->tag_size));
248
249 if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) {
250 printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__,
251 nr_sectors * bi->tuple_size, bip->bip_iter.bi_size);
252 return -1;
253 }
254
255 if (set)
256 bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
257 else
258 bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
259
260 return 0;
261}
262
263/**
264 * bio_integrity_set_tag - Attach a tag buffer to a bio
265 * @bio: bio to attach buffer to
266 * @tag_buf: Pointer to a buffer containing tag data
267 * @len: Length of the included buffer
268 *
269 * Description: Use this function to tag a bio by leveraging the extra
270 * space provided by devices formatted with integrity protection. The
271 * size of the integrity buffer must be <= to the size reported by
272 * bio_integrity_tag_size().
273 */
274int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
275{
276 BUG_ON(bio_data_dir(bio) != WRITE);
277
278 return bio_integrity_tag(bio, tag_buf, len, 1);
279}
280EXPORT_SYMBOL(bio_integrity_set_tag);
281
282/**
283 * bio_integrity_get_tag - Retrieve a tag buffer from a bio
284 * @bio: bio to retrieve buffer from
285 * @tag_buf: Pointer to a buffer for the tag data
286 * @len: Length of the target buffer
287 *
288 * Description: Use this function to retrieve the tag buffer from a
289 * completed I/O. The size of the integrity buffer must be <= to the
290 * size reported by bio_integrity_tag_size().
291 */
292int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
293{
294 BUG_ON(bio_data_dir(bio) != READ);
295
296 return bio_integrity_tag(bio, tag_buf, len, 0);
297}
298EXPORT_SYMBOL(bio_integrity_get_tag);
299
300/**
301 * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio
302 * @bio: bio to generate/verify integrity metadata for 211 * @bio: bio to generate/verify integrity metadata for
303 * @operate: operate number, 1 for generate, 0 for verify 212 * @proc_fn: Pointer to the relevant processing function
304 */ 213 */
305static int bio_integrity_generate_verify(struct bio *bio, int operate) 214static int bio_integrity_process(struct bio *bio,
215 integrity_processing_fn *proc_fn)
306{ 216{
307 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 217 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
308 struct blk_integrity_exchg bix; 218 struct blk_integrity_iter iter;
309 struct bio_vec *bv; 219 struct bio_vec *bv;
310 sector_t sector; 220 struct bio_integrity_payload *bip = bio_integrity(bio);
311 unsigned int sectors, ret = 0, i; 221 unsigned int i, ret = 0;
312 void *prot_buf = bio->bi_integrity->bip_buf; 222 void *prot_buf = page_address(bip->bip_vec->bv_page) +
313 223 bip->bip_vec->bv_offset;
314 if (operate)
315 sector = bio->bi_iter.bi_sector;
316 else
317 sector = bio->bi_integrity->bip_iter.bi_sector;
318 224
319 bix.disk_name = bio->bi_bdev->bd_disk->disk_name; 225 iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
320 bix.sector_size = bi->sector_size; 226 iter.interval = bi->interval;
227 iter.seed = bip_get_seed(bip);
228 iter.prot_buf = prot_buf;
321 229
322 bio_for_each_segment_all(bv, bio, i) { 230 bio_for_each_segment_all(bv, bio, i) {
323 void *kaddr = kmap_atomic(bv->bv_page); 231 void *kaddr = kmap_atomic(bv->bv_page);
324 bix.data_buf = kaddr + bv->bv_offset;
325 bix.data_size = bv->bv_len;
326 bix.prot_buf = prot_buf;
327 bix.sector = sector;
328
329 if (operate)
330 bi->generate_fn(&bix);
331 else {
332 ret = bi->verify_fn(&bix);
333 if (ret) {
334 kunmap_atomic(kaddr);
335 return ret;
336 }
337 }
338 232
339 sectors = bv->bv_len / bi->sector_size; 233 iter.data_buf = kaddr + bv->bv_offset;
340 sector += sectors; 234 iter.data_size = bv->bv_len;
341 prot_buf += sectors * bi->tuple_size; 235
236 ret = proc_fn(&iter);
237 if (ret) {
238 kunmap_atomic(kaddr);
239 return ret;
240 }
342 241
343 kunmap_atomic(kaddr); 242 kunmap_atomic(kaddr);
344 } 243 }
@@ -346,28 +245,6 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
346} 245}
347 246
348/** 247/**
349 * bio_integrity_generate - Generate integrity metadata for a bio
350 * @bio: bio to generate integrity metadata for
351 *
352 * Description: Generates integrity metadata for a bio by calling the
353 * block device's generation callback function. The bio must have a
354 * bip attached with enough room to accommodate the generated
355 * integrity metadata.
356 */
357static void bio_integrity_generate(struct bio *bio)
358{
359 bio_integrity_generate_verify(bio, 1);
360}
361
362static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
363{
364 if (bi)
365 return bi->tuple_size;
366
367 return 0;
368}
369
370/**
371 * bio_integrity_prep - Prepare bio for integrity I/O 248 * bio_integrity_prep - Prepare bio for integrity I/O
372 * @bio: bio to prepare 249 * @bio: bio to prepare
373 * 250 *
@@ -387,17 +264,17 @@ int bio_integrity_prep(struct bio *bio)
387 unsigned long start, end; 264 unsigned long start, end;
388 unsigned int len, nr_pages; 265 unsigned int len, nr_pages;
389 unsigned int bytes, offset, i; 266 unsigned int bytes, offset, i;
390 unsigned int sectors; 267 unsigned int intervals;
391 268
392 bi = bdev_get_integrity(bio->bi_bdev); 269 bi = bdev_get_integrity(bio->bi_bdev);
393 q = bdev_get_queue(bio->bi_bdev); 270 q = bdev_get_queue(bio->bi_bdev);
394 BUG_ON(bi == NULL); 271 BUG_ON(bi == NULL);
395 BUG_ON(bio_integrity(bio)); 272 BUG_ON(bio_integrity(bio));
396 273
397 sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio)); 274 intervals = bio_integrity_intervals(bi, bio_sectors(bio));
398 275
399 /* Allocate kernel buffer for protection data */ 276 /* Allocate kernel buffer for protection data */
400 len = sectors * blk_integrity_tuple_size(bi); 277 len = intervals * bi->tuple_size;
401 buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); 278 buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
402 if (unlikely(buf == NULL)) { 279 if (unlikely(buf == NULL)) {
403 printk(KERN_ERR "could not allocate integrity buffer\n"); 280 printk(KERN_ERR "could not allocate integrity buffer\n");
@@ -416,10 +293,12 @@ int bio_integrity_prep(struct bio *bio)
416 return -EIO; 293 return -EIO;
417 } 294 }
418 295
419 bip->bip_owns_buf = 1; 296 bip->bip_flags |= BIP_BLOCK_INTEGRITY;
420 bip->bip_buf = buf;
421 bip->bip_iter.bi_size = len; 297 bip->bip_iter.bi_size = len;
422 bip->bip_iter.bi_sector = bio->bi_iter.bi_sector; 298 bip_set_seed(bip, bio->bi_iter.bi_sector);
299
300 if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
301 bip->bip_flags |= BIP_IP_CHECKSUM;
423 302
424 /* Map it */ 303 /* Map it */
425 offset = offset_in_page(buf); 304 offset = offset_in_page(buf);
@@ -455,26 +334,13 @@ int bio_integrity_prep(struct bio *bio)
455 334
456 /* Auto-generate integrity metadata if this is a write */ 335 /* Auto-generate integrity metadata if this is a write */
457 if (bio_data_dir(bio) == WRITE) 336 if (bio_data_dir(bio) == WRITE)
458 bio_integrity_generate(bio); 337 bio_integrity_process(bio, bi->generate_fn);
459 338
460 return 0; 339 return 0;
461} 340}
462EXPORT_SYMBOL(bio_integrity_prep); 341EXPORT_SYMBOL(bio_integrity_prep);
463 342
464/** 343/**
465 * bio_integrity_verify - Verify integrity metadata for a bio
466 * @bio: bio to verify
467 *
468 * Description: This function is called to verify the integrity of a
469 * bio. The data in the bio io_vec is compared to the integrity
470 * metadata returned by the HBA.
471 */
472static int bio_integrity_verify(struct bio *bio)
473{
474 return bio_integrity_generate_verify(bio, 0);
475}
476
477/**
478 * bio_integrity_verify_fn - Integrity I/O completion worker 344 * bio_integrity_verify_fn - Integrity I/O completion worker
479 * @work: Work struct stored in bio to be verified 345 * @work: Work struct stored in bio to be verified
480 * 346 *
@@ -487,9 +353,10 @@ static void bio_integrity_verify_fn(struct work_struct *work)
487 struct bio_integrity_payload *bip = 353 struct bio_integrity_payload *bip =
488 container_of(work, struct bio_integrity_payload, bip_work); 354 container_of(work, struct bio_integrity_payload, bip_work);
489 struct bio *bio = bip->bip_bio; 355 struct bio *bio = bip->bip_bio;
356 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
490 int error; 357 int error;
491 358
492 error = bio_integrity_verify(bio); 359 error = bio_integrity_process(bio, bi->verify_fn);
493 360
494 /* Restore original bio completion handler */ 361 /* Restore original bio completion handler */
495 bio->bi_end_io = bip->bip_end_io; 362 bio->bi_end_io = bip->bip_end_io;
@@ -510,7 +377,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
510 */ 377 */
511void bio_integrity_endio(struct bio *bio, int error) 378void bio_integrity_endio(struct bio *bio, int error)
512{ 379{
513 struct bio_integrity_payload *bip = bio->bi_integrity; 380 struct bio_integrity_payload *bip = bio_integrity(bio);
514 381
515 BUG_ON(bip->bip_bio != bio); 382 BUG_ON(bip->bip_bio != bio);
516 383
@@ -541,7 +408,7 @@ EXPORT_SYMBOL(bio_integrity_endio);
541 */ 408 */
542void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) 409void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
543{ 410{
544 struct bio_integrity_payload *bip = bio->bi_integrity; 411 struct bio_integrity_payload *bip = bio_integrity(bio);
545 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 412 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
546 unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); 413 unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
547 414
@@ -563,7 +430,7 @@ EXPORT_SYMBOL(bio_integrity_advance);
563void bio_integrity_trim(struct bio *bio, unsigned int offset, 430void bio_integrity_trim(struct bio *bio, unsigned int offset,
564 unsigned int sectors) 431 unsigned int sectors)
565{ 432{
566 struct bio_integrity_payload *bip = bio->bi_integrity; 433 struct bio_integrity_payload *bip = bio_integrity(bio);
567 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 434 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
568 435
569 bio_integrity_advance(bio, offset << 9); 436 bio_integrity_advance(bio, offset << 9);
@@ -582,7 +449,7 @@ EXPORT_SYMBOL(bio_integrity_trim);
582int bio_integrity_clone(struct bio *bio, struct bio *bio_src, 449int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
583 gfp_t gfp_mask) 450 gfp_t gfp_mask)
584{ 451{
585 struct bio_integrity_payload *bip_src = bio_src->bi_integrity; 452 struct bio_integrity_payload *bip_src = bio_integrity(bio_src);
586 struct bio_integrity_payload *bip; 453 struct bio_integrity_payload *bip;
587 454
588 BUG_ON(bip_src == NULL); 455 BUG_ON(bip_src == NULL);
@@ -646,6 +513,4 @@ void __init bio_integrity_init(void)
646 sizeof(struct bio_integrity_payload) + 513 sizeof(struct bio_integrity_payload) +
647 sizeof(struct bio_vec) * BIP_INLINE_VECS, 514 sizeof(struct bio_vec) * BIP_INLINE_VECS,
648 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 515 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
649 if (!bip_slab)
650 panic("Failed to create slab\n");
651} 516}
diff --git a/block/bio.c b/block/bio.c
index 3e6331d25d90..3e6e1986a5b2 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -428,6 +428,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
428 front_pad = 0; 428 front_pad = 0;
429 inline_vecs = nr_iovecs; 429 inline_vecs = nr_iovecs;
430 } else { 430 } else {
431 /* should not use nobvec bioset for nr_iovecs > 0 */
432 if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
433 return NULL;
431 /* 434 /*
432 * generic_make_request() converts recursion to iteration; this 435 * generic_make_request() converts recursion to iteration; this
433 * means if we're running beneath it, any bios we allocate and 436 * means if we're running beneath it, any bios we allocate and
@@ -1900,20 +1903,9 @@ void bioset_free(struct bio_set *bs)
1900} 1903}
1901EXPORT_SYMBOL(bioset_free); 1904EXPORT_SYMBOL(bioset_free);
1902 1905
1903/** 1906static struct bio_set *__bioset_create(unsigned int pool_size,
1904 * bioset_create - Create a bio_set 1907 unsigned int front_pad,
1905 * @pool_size: Number of bio and bio_vecs to cache in the mempool 1908 bool create_bvec_pool)
1906 * @front_pad: Number of bytes to allocate in front of the returned bio
1907 *
1908 * Description:
1909 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1910 * to ask for a number of bytes to be allocated in front of the bio.
1911 * Front pad allocation is useful for embedding the bio inside
1912 * another structure, to avoid allocating extra data to go with the bio.
1913 * Note that the bio must be embedded at the END of that structure always,
1914 * or things will break badly.
1915 */
1916struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1917{ 1909{
1918 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); 1910 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1919 struct bio_set *bs; 1911 struct bio_set *bs;
@@ -1938,9 +1930,11 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1938 if (!bs->bio_pool) 1930 if (!bs->bio_pool)
1939 goto bad; 1931 goto bad;
1940 1932
1941 bs->bvec_pool = biovec_create_pool(pool_size); 1933 if (create_bvec_pool) {
1942 if (!bs->bvec_pool) 1934 bs->bvec_pool = biovec_create_pool(pool_size);
1943 goto bad; 1935 if (!bs->bvec_pool)
1936 goto bad;
1937 }
1944 1938
1945 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); 1939 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
1946 if (!bs->rescue_workqueue) 1940 if (!bs->rescue_workqueue)
@@ -1951,8 +1945,41 @@ bad:
1951 bioset_free(bs); 1945 bioset_free(bs);
1952 return NULL; 1946 return NULL;
1953} 1947}
1948
1949/**
1950 * bioset_create - Create a bio_set
1951 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1952 * @front_pad: Number of bytes to allocate in front of the returned bio
1953 *
1954 * Description:
1955 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1956 * to ask for a number of bytes to be allocated in front of the bio.
1957 * Front pad allocation is useful for embedding the bio inside
1958 * another structure, to avoid allocating extra data to go with the bio.
1959 * Note that the bio must be embedded at the END of that structure always,
1960 * or things will break badly.
1961 */
1962struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1963{
1964 return __bioset_create(pool_size, front_pad, true);
1965}
1954EXPORT_SYMBOL(bioset_create); 1966EXPORT_SYMBOL(bioset_create);
1955 1967
1968/**
1969 * bioset_create_nobvec - Create a bio_set without bio_vec mempool
1970 * @pool_size: Number of bio to cache in the mempool
1971 * @front_pad: Number of bytes to allocate in front of the returned bio
1972 *
1973 * Description:
1974 * Same functionality as bioset_create() except that mempool is not
1975 * created for bio_vecs. Saving some memory for bio_clone_fast() users.
1976 */
1977struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
1978{
1979 return __bioset_create(pool_size, front_pad, false);
1980}
1981EXPORT_SYMBOL(bioset_create_nobvec);
1982
1956#ifdef CONFIG_BLK_CGROUP 1983#ifdef CONFIG_BLK_CGROUP
1957/** 1984/**
1958 * bio_associate_current - associate a bio with %current 1985 * bio_associate_current - associate a bio with %current
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e17da947f6bd..0ac817b750db 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -822,7 +822,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
822static struct cgroup_subsys_state * 822static struct cgroup_subsys_state *
823blkcg_css_alloc(struct cgroup_subsys_state *parent_css) 823blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
824{ 824{
825 static atomic64_t id_seq = ATOMIC64_INIT(0);
826 struct blkcg *blkcg; 825 struct blkcg *blkcg;
827 826
828 if (!parent_css) { 827 if (!parent_css) {
@@ -836,7 +835,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
836 835
837 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; 836 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
838 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; 837 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
839 blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
840done: 838done:
841 spin_lock_init(&blkcg->lock); 839 spin_lock_init(&blkcg->lock);
842 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); 840 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index d3fd7aa3d2a3..c567865b5f1d 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -50,9 +50,6 @@ struct blkcg {
50 struct blkcg_gq *blkg_hint; 50 struct blkcg_gq *blkg_hint;
51 struct hlist_head blkg_list; 51 struct hlist_head blkg_list;
52 52
53 /* for policies to test whether associated blkcg has changed */
54 uint64_t id;
55
56 /* TODO: per-policy storage in blkcg */ 53 /* TODO: per-policy storage in blkcg */
57 unsigned int cfq_weight; /* belongs to cfq */ 54 unsigned int cfq_weight; /* belongs to cfq */
58 unsigned int cfq_leaf_weight; 55 unsigned int cfq_leaf_weight;
diff --git a/block/blk-core.c b/block/blk-core.c
index 9c888bd22b00..0421b53e6431 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -83,18 +83,14 @@ void blk_queue_congestion_threshold(struct request_queue *q)
83 * @bdev: device 83 * @bdev: device
84 * 84 *
85 * Locates the passed device's request queue and returns the address of its 85 * Locates the passed device's request queue and returns the address of its
86 * backing_dev_info 86 * backing_dev_info. This function can only be called if @bdev is opened
87 * 87 * and the return value is never NULL.
88 * Will return NULL if the request queue cannot be located.
89 */ 88 */
90struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) 89struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
91{ 90{
92 struct backing_dev_info *ret = NULL;
93 struct request_queue *q = bdev_get_queue(bdev); 91 struct request_queue *q = bdev_get_queue(bdev);
94 92
95 if (q) 93 return &q->backing_dev_info;
96 ret = &q->backing_dev_info;
97 return ret;
98} 94}
99EXPORT_SYMBOL(blk_get_backing_dev_info); 95EXPORT_SYMBOL(blk_get_backing_dev_info);
100 96
@@ -394,11 +390,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
394 * be drained. Check all the queues and counters. 390 * be drained. Check all the queues and counters.
395 */ 391 */
396 if (drain_all) { 392 if (drain_all) {
393 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
397 drain |= !list_empty(&q->queue_head); 394 drain |= !list_empty(&q->queue_head);
398 for (i = 0; i < 2; i++) { 395 for (i = 0; i < 2; i++) {
399 drain |= q->nr_rqs[i]; 396 drain |= q->nr_rqs[i];
400 drain |= q->in_flight[i]; 397 drain |= q->in_flight[i];
401 drain |= !list_empty(&q->flush_queue[i]); 398 if (fq)
399 drain |= !list_empty(&fq->flush_queue[i]);
402 } 400 }
403 } 401 }
404 402
@@ -604,9 +602,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
604#ifdef CONFIG_BLK_CGROUP 602#ifdef CONFIG_BLK_CGROUP
605 INIT_LIST_HEAD(&q->blkg_list); 603 INIT_LIST_HEAD(&q->blkg_list);
606#endif 604#endif
607 INIT_LIST_HEAD(&q->flush_queue[0]);
608 INIT_LIST_HEAD(&q->flush_queue[1]);
609 INIT_LIST_HEAD(&q->flush_data_in_flight);
610 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); 605 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
611 606
612 kobject_init(&q->kobj, &blk_queue_ktype); 607 kobject_init(&q->kobj, &blk_queue_ktype);
@@ -709,8 +704,8 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
709 if (!q) 704 if (!q)
710 return NULL; 705 return NULL;
711 706
712 q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); 707 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
713 if (!q->flush_rq) 708 if (!q->fq)
714 return NULL; 709 return NULL;
715 710
716 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) 711 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
@@ -746,7 +741,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
746 return q; 741 return q;
747 742
748fail: 743fail:
749 kfree(q->flush_rq); 744 blk_free_flush_queue(q->fq);
750 return NULL; 745 return NULL;
751} 746}
752EXPORT_SYMBOL(blk_init_allocated_queue); 747EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -934,8 +929,8 @@ static struct io_context *rq_ioc(struct bio *bio)
934 * pressure or if @q is dead. 929 * pressure or if @q is dead.
935 * 930 *
936 * Must be called with @q->queue_lock held and, 931 * Must be called with @q->queue_lock held and,
937 * Returns %NULL on failure, with @q->queue_lock held. 932 * Returns ERR_PTR on failure, with @q->queue_lock held.
938 * Returns !%NULL on success, with @q->queue_lock *not held*. 933 * Returns request pointer on success, with @q->queue_lock *not held*.
939 */ 934 */
940static struct request *__get_request(struct request_list *rl, int rw_flags, 935static struct request *__get_request(struct request_list *rl, int rw_flags,
941 struct bio *bio, gfp_t gfp_mask) 936 struct bio *bio, gfp_t gfp_mask)
@@ -949,7 +944,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
949 int may_queue; 944 int may_queue;
950 945
951 if (unlikely(blk_queue_dying(q))) 946 if (unlikely(blk_queue_dying(q)))
952 return NULL; 947 return ERR_PTR(-ENODEV);
953 948
954 may_queue = elv_may_queue(q, rw_flags); 949 may_queue = elv_may_queue(q, rw_flags);
955 if (may_queue == ELV_MQUEUE_NO) 950 if (may_queue == ELV_MQUEUE_NO)
@@ -974,7 +969,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
974 * process is not a "batcher", and not 969 * process is not a "batcher", and not
975 * exempted by the IO scheduler 970 * exempted by the IO scheduler
976 */ 971 */
977 return NULL; 972 return ERR_PTR(-ENOMEM);
978 } 973 }
979 } 974 }
980 } 975 }
@@ -992,7 +987,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
992 * allocated with any setting of ->nr_requests 987 * allocated with any setting of ->nr_requests
993 */ 988 */
994 if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) 989 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
995 return NULL; 990 return ERR_PTR(-ENOMEM);
996 991
997 q->nr_rqs[is_sync]++; 992 q->nr_rqs[is_sync]++;
998 rl->count[is_sync]++; 993 rl->count[is_sync]++;
@@ -1065,8 +1060,8 @@ fail_elvpriv:
1065 * shouldn't stall IO. Treat this request as !elvpriv. This will 1060 * shouldn't stall IO. Treat this request as !elvpriv. This will
1066 * disturb iosched and blkcg but weird is bettern than dead. 1061 * disturb iosched and blkcg but weird is bettern than dead.
1067 */ 1062 */
1068 printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n", 1063 printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n",
1069 dev_name(q->backing_dev_info.dev)); 1064 __func__, dev_name(q->backing_dev_info.dev));
1070 1065
1071 rq->cmd_flags &= ~REQ_ELVPRIV; 1066 rq->cmd_flags &= ~REQ_ELVPRIV;
1072 rq->elv.icq = NULL; 1067 rq->elv.icq = NULL;
@@ -1097,7 +1092,7 @@ fail_alloc:
1097rq_starved: 1092rq_starved:
1098 if (unlikely(rl->count[is_sync] == 0)) 1093 if (unlikely(rl->count[is_sync] == 0))
1099 rl->starved[is_sync] = 1; 1094 rl->starved[is_sync] = 1;
1100 return NULL; 1095 return ERR_PTR(-ENOMEM);
1101} 1096}
1102 1097
1103/** 1098/**
@@ -1111,8 +1106,8 @@ rq_starved:
1111 * function keeps retrying under memory pressure and fails iff @q is dead. 1106 * function keeps retrying under memory pressure and fails iff @q is dead.
1112 * 1107 *
1113 * Must be called with @q->queue_lock held and, 1108 * Must be called with @q->queue_lock held and,
1114 * Returns %NULL on failure, with @q->queue_lock held. 1109 * Returns ERR_PTR on failure, with @q->queue_lock held.
1115 * Returns !%NULL on success, with @q->queue_lock *not held*. 1110 * Returns request pointer on success, with @q->queue_lock *not held*.
1116 */ 1111 */
1117static struct request *get_request(struct request_queue *q, int rw_flags, 1112static struct request *get_request(struct request_queue *q, int rw_flags,
1118 struct bio *bio, gfp_t gfp_mask) 1113 struct bio *bio, gfp_t gfp_mask)
@@ -1125,12 +1120,12 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
1125 rl = blk_get_rl(q, bio); /* transferred to @rq on success */ 1120 rl = blk_get_rl(q, bio); /* transferred to @rq on success */
1126retry: 1121retry:
1127 rq = __get_request(rl, rw_flags, bio, gfp_mask); 1122 rq = __get_request(rl, rw_flags, bio, gfp_mask);
1128 if (rq) 1123 if (!IS_ERR(rq))
1129 return rq; 1124 return rq;
1130 1125
1131 if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) { 1126 if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
1132 blk_put_rl(rl); 1127 blk_put_rl(rl);
1133 return NULL; 1128 return rq;
1134 } 1129 }
1135 1130
1136 /* wait on @rl and retry */ 1131 /* wait on @rl and retry */
@@ -1167,7 +1162,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
1167 1162
1168 spin_lock_irq(q->queue_lock); 1163 spin_lock_irq(q->queue_lock);
1169 rq = get_request(q, rw, NULL, gfp_mask); 1164 rq = get_request(q, rw, NULL, gfp_mask);
1170 if (!rq) 1165 if (IS_ERR(rq))
1171 spin_unlock_irq(q->queue_lock); 1166 spin_unlock_irq(q->queue_lock);
1172 /* q->queue_lock is unlocked at this point */ 1167 /* q->queue_lock is unlocked at this point */
1173 1168
@@ -1219,8 +1214,8 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
1219{ 1214{
1220 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); 1215 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
1221 1216
1222 if (unlikely(!rq)) 1217 if (IS_ERR(rq))
1223 return ERR_PTR(-ENOMEM); 1218 return rq;
1224 1219
1225 blk_rq_set_block_pc(rq); 1220 blk_rq_set_block_pc(rq);
1226 1221
@@ -1614,8 +1609,8 @@ get_rq:
1614 * Returns with the queue unlocked. 1609 * Returns with the queue unlocked.
1615 */ 1610 */
1616 req = get_request(q, rw_flags, bio, GFP_NOIO); 1611 req = get_request(q, rw_flags, bio, GFP_NOIO);
1617 if (unlikely(!req)) { 1612 if (IS_ERR(req)) {
1618 bio_endio(bio, -ENODEV); /* @q is dead */ 1613 bio_endio(bio, PTR_ERR(req)); /* @q is dead */
1619 goto out_unlock; 1614 goto out_unlock;
1620 } 1615 }
1621 1616
@@ -2405,11 +2400,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2405{ 2400{
2406 int total_bytes; 2401 int total_bytes;
2407 2402
2403 trace_block_rq_complete(req->q, req, nr_bytes);
2404
2408 if (!req->bio) 2405 if (!req->bio)
2409 return false; 2406 return false;
2410 2407
2411 trace_block_rq_complete(req->q, req, nr_bytes);
2412
2413 /* 2408 /*
2414 * For fs requests, rq is just carrier of independent bio's 2409 * For fs requests, rq is just carrier of independent bio's
2415 * and each partial completion should be handled separately. 2410 * and each partial completion should be handled separately.
@@ -2449,8 +2444,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2449 error_type = "I/O"; 2444 error_type = "I/O";
2450 break; 2445 break;
2451 } 2446 }
2452 printk_ratelimited(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", 2447 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
2453 error_type, req->rq_disk ? 2448 __func__, error_type, req->rq_disk ?
2454 req->rq_disk->disk_name : "?", 2449 req->rq_disk->disk_name : "?",
2455 (unsigned long long)blk_rq_pos(req)); 2450 (unsigned long long)blk_rq_pos(req));
2456 2451
@@ -2931,7 +2926,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2931 blk_rq_init(NULL, rq); 2926 blk_rq_init(NULL, rq);
2932 2927
2933 __rq_for_each_bio(bio_src, rq_src) { 2928 __rq_for_each_bio(bio_src, rq_src) {
2934 bio = bio_clone_bioset(bio_src, gfp_mask, bs); 2929 bio = bio_clone_fast(bio_src, gfp_mask, bs);
2935 if (!bio) 2930 if (!bio)
2936 goto free_and_out; 2931 goto free_and_out;
2937 2932
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3cb5e9e7108a..20badd7b9d1b 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -28,7 +28,7 @@
28 * 28 *
29 * The actual execution of flush is double buffered. Whenever a request 29 * The actual execution of flush is double buffered. Whenever a request
30 * needs to execute PRE or POSTFLUSH, it queues at 30 * needs to execute PRE or POSTFLUSH, it queues at
31 * q->flush_queue[q->flush_pending_idx]. Once certain criteria are met, a 31 * fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a
32 * flush is issued and the pending_idx is toggled. When the flush 32 * flush is issued and the pending_idx is toggled. When the flush
33 * completes, all the requests which were pending are proceeded to the next 33 * completes, all the requests which were pending are proceeded to the next
34 * step. This allows arbitrary merging of different types of FLUSH/FUA 34 * step. This allows arbitrary merging of different types of FLUSH/FUA
@@ -91,7 +91,8 @@ enum {
91 FLUSH_PENDING_TIMEOUT = 5 * HZ, 91 FLUSH_PENDING_TIMEOUT = 5 * HZ,
92}; 92};
93 93
94static bool blk_kick_flush(struct request_queue *q); 94static bool blk_kick_flush(struct request_queue *q,
95 struct blk_flush_queue *fq);
95 96
96static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) 97static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
97{ 98{
@@ -126,8 +127,6 @@ static void blk_flush_restore_request(struct request *rq)
126 /* make @rq a normal request */ 127 /* make @rq a normal request */
127 rq->cmd_flags &= ~REQ_FLUSH_SEQ; 128 rq->cmd_flags &= ~REQ_FLUSH_SEQ;
128 rq->end_io = rq->flush.saved_end_io; 129 rq->end_io = rq->flush.saved_end_io;
129
130 blk_clear_rq_complete(rq);
131} 130}
132 131
133static bool blk_flush_queue_rq(struct request *rq, bool add_front) 132static bool blk_flush_queue_rq(struct request *rq, bool add_front)
@@ -150,6 +149,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
150/** 149/**
151 * blk_flush_complete_seq - complete flush sequence 150 * blk_flush_complete_seq - complete flush sequence
152 * @rq: FLUSH/FUA request being sequenced 151 * @rq: FLUSH/FUA request being sequenced
152 * @fq: flush queue
153 * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero) 153 * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
154 * @error: whether an error occurred 154 * @error: whether an error occurred
155 * 155 *
@@ -157,16 +157,17 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
157 * completion and trigger the next step. 157 * completion and trigger the next step.
158 * 158 *
159 * CONTEXT: 159 * CONTEXT:
160 * spin_lock_irq(q->queue_lock or q->mq_flush_lock) 160 * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
161 * 161 *
162 * RETURNS: 162 * RETURNS:
163 * %true if requests were added to the dispatch queue, %false otherwise. 163 * %true if requests were added to the dispatch queue, %false otherwise.
164 */ 164 */
165static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, 165static bool blk_flush_complete_seq(struct request *rq,
166 int error) 166 struct blk_flush_queue *fq,
167 unsigned int seq, int error)
167{ 168{
168 struct request_queue *q = rq->q; 169 struct request_queue *q = rq->q;
169 struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; 170 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
170 bool queued = false, kicked; 171 bool queued = false, kicked;
171 172
172 BUG_ON(rq->flush.seq & seq); 173 BUG_ON(rq->flush.seq & seq);
@@ -182,12 +183,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
182 case REQ_FSEQ_POSTFLUSH: 183 case REQ_FSEQ_POSTFLUSH:
183 /* queue for flush */ 184 /* queue for flush */
184 if (list_empty(pending)) 185 if (list_empty(pending))
185 q->flush_pending_since = jiffies; 186 fq->flush_pending_since = jiffies;
186 list_move_tail(&rq->flush.list, pending); 187 list_move_tail(&rq->flush.list, pending);
187 break; 188 break;
188 189
189 case REQ_FSEQ_DATA: 190 case REQ_FSEQ_DATA:
190 list_move_tail(&rq->flush.list, &q->flush_data_in_flight); 191 list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
191 queued = blk_flush_queue_rq(rq, true); 192 queued = blk_flush_queue_rq(rq, true);
192 break; 193 break;
193 194
@@ -202,7 +203,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
202 list_del_init(&rq->flush.list); 203 list_del_init(&rq->flush.list);
203 blk_flush_restore_request(rq); 204 blk_flush_restore_request(rq);
204 if (q->mq_ops) 205 if (q->mq_ops)
205 blk_mq_end_io(rq, error); 206 blk_mq_end_request(rq, error);
206 else 207 else
207 __blk_end_request_all(rq, error); 208 __blk_end_request_all(rq, error);
208 break; 209 break;
@@ -211,7 +212,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
211 BUG(); 212 BUG();
212 } 213 }
213 214
214 kicked = blk_kick_flush(q); 215 kicked = blk_kick_flush(q, fq);
215 return kicked | queued; 216 return kicked | queued;
216} 217}
217 218
@@ -222,17 +223,18 @@ static void flush_end_io(struct request *flush_rq, int error)
222 bool queued = false; 223 bool queued = false;
223 struct request *rq, *n; 224 struct request *rq, *n;
224 unsigned long flags = 0; 225 unsigned long flags = 0;
226 struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
225 227
226 if (q->mq_ops) { 228 if (q->mq_ops) {
227 spin_lock_irqsave(&q->mq_flush_lock, flags); 229 spin_lock_irqsave(&fq->mq_flush_lock, flags);
228 q->flush_rq->tag = -1; 230 flush_rq->tag = -1;
229 } 231 }
230 232
231 running = &q->flush_queue[q->flush_running_idx]; 233 running = &fq->flush_queue[fq->flush_running_idx];
232 BUG_ON(q->flush_pending_idx == q->flush_running_idx); 234 BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
233 235
234 /* account completion of the flush request */ 236 /* account completion of the flush request */
235 q->flush_running_idx ^= 1; 237 fq->flush_running_idx ^= 1;
236 238
237 if (!q->mq_ops) 239 if (!q->mq_ops)
238 elv_completed_request(q, flush_rq); 240 elv_completed_request(q, flush_rq);
@@ -242,7 +244,7 @@ static void flush_end_io(struct request *flush_rq, int error)
242 unsigned int seq = blk_flush_cur_seq(rq); 244 unsigned int seq = blk_flush_cur_seq(rq);
243 245
244 BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); 246 BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
245 queued |= blk_flush_complete_seq(rq, seq, error); 247 queued |= blk_flush_complete_seq(rq, fq, seq, error);
246 } 248 }
247 249
248 /* 250 /*
@@ -256,71 +258,81 @@ static void flush_end_io(struct request *flush_rq, int error)
256 * directly into request_fn may confuse the driver. Always use 258 * directly into request_fn may confuse the driver. Always use
257 * kblockd. 259 * kblockd.
258 */ 260 */
259 if (queued || q->flush_queue_delayed) { 261 if (queued || fq->flush_queue_delayed) {
260 WARN_ON(q->mq_ops); 262 WARN_ON(q->mq_ops);
261 blk_run_queue_async(q); 263 blk_run_queue_async(q);
262 } 264 }
263 q->flush_queue_delayed = 0; 265 fq->flush_queue_delayed = 0;
264 if (q->mq_ops) 266 if (q->mq_ops)
265 spin_unlock_irqrestore(&q->mq_flush_lock, flags); 267 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
266} 268}
267 269
268/** 270/**
269 * blk_kick_flush - consider issuing flush request 271 * blk_kick_flush - consider issuing flush request
270 * @q: request_queue being kicked 272 * @q: request_queue being kicked
273 * @fq: flush queue
271 * 274 *
272 * Flush related states of @q have changed, consider issuing flush request. 275 * Flush related states of @q have changed, consider issuing flush request.
273 * Please read the comment at the top of this file for more info. 276 * Please read the comment at the top of this file for more info.
274 * 277 *
275 * CONTEXT: 278 * CONTEXT:
276 * spin_lock_irq(q->queue_lock or q->mq_flush_lock) 279 * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
277 * 280 *
278 * RETURNS: 281 * RETURNS:
279 * %true if flush was issued, %false otherwise. 282 * %true if flush was issued, %false otherwise.
280 */ 283 */
281static bool blk_kick_flush(struct request_queue *q) 284static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
282{ 285{
283 struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; 286 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
284 struct request *first_rq = 287 struct request *first_rq =
285 list_first_entry(pending, struct request, flush.list); 288 list_first_entry(pending, struct request, flush.list);
289 struct request *flush_rq = fq->flush_rq;
286 290
287 /* C1 described at the top of this file */ 291 /* C1 described at the top of this file */
288 if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending)) 292 if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
289 return false; 293 return false;
290 294
291 /* C2 and C3 */ 295 /* C2 and C3 */
292 if (!list_empty(&q->flush_data_in_flight) && 296 if (!list_empty(&fq->flush_data_in_flight) &&
293 time_before(jiffies, 297 time_before(jiffies,
294 q->flush_pending_since + FLUSH_PENDING_TIMEOUT)) 298 fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
295 return false; 299 return false;
296 300
297 /* 301 /*
298 * Issue flush and toggle pending_idx. This makes pending_idx 302 * Issue flush and toggle pending_idx. This makes pending_idx
299 * different from running_idx, which means flush is in flight. 303 * different from running_idx, which means flush is in flight.
300 */ 304 */
301 q->flush_pending_idx ^= 1; 305 fq->flush_pending_idx ^= 1;
302 306
303 blk_rq_init(q, q->flush_rq); 307 blk_rq_init(q, flush_rq);
304 if (q->mq_ops) 308
305 blk_mq_clone_flush_request(q->flush_rq, first_rq); 309 /*
310 * Borrow tag from the first request since they can't
311 * be in flight at the same time.
312 */
313 if (q->mq_ops) {
314 flush_rq->mq_ctx = first_rq->mq_ctx;
315 flush_rq->tag = first_rq->tag;
316 }
306 317
307 q->flush_rq->cmd_type = REQ_TYPE_FS; 318 flush_rq->cmd_type = REQ_TYPE_FS;
308 q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; 319 flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
309 q->flush_rq->rq_disk = first_rq->rq_disk; 320 flush_rq->rq_disk = first_rq->rq_disk;
310 q->flush_rq->end_io = flush_end_io; 321 flush_rq->end_io = flush_end_io;
311 322
312 return blk_flush_queue_rq(q->flush_rq, false); 323 return blk_flush_queue_rq(flush_rq, false);
313} 324}
314 325
315static void flush_data_end_io(struct request *rq, int error) 326static void flush_data_end_io(struct request *rq, int error)
316{ 327{
317 struct request_queue *q = rq->q; 328 struct request_queue *q = rq->q;
329 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
318 330
319 /* 331 /*
320 * After populating an empty queue, kick it to avoid stall. Read 332 * After populating an empty queue, kick it to avoid stall. Read
321 * the comment in flush_end_io(). 333 * the comment in flush_end_io().
322 */ 334 */
323 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) 335 if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
324 blk_run_queue_async(q); 336 blk_run_queue_async(q);
325} 337}
326 338
@@ -328,20 +340,20 @@ static void mq_flush_data_end_io(struct request *rq, int error)
328{ 340{
329 struct request_queue *q = rq->q; 341 struct request_queue *q = rq->q;
330 struct blk_mq_hw_ctx *hctx; 342 struct blk_mq_hw_ctx *hctx;
331 struct blk_mq_ctx *ctx; 343 struct blk_mq_ctx *ctx = rq->mq_ctx;
332 unsigned long flags; 344 unsigned long flags;
345 struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
333 346
334 ctx = rq->mq_ctx;
335 hctx = q->mq_ops->map_queue(q, ctx->cpu); 347 hctx = q->mq_ops->map_queue(q, ctx->cpu);
336 348
337 /* 349 /*
338 * After populating an empty queue, kick it to avoid stall. Read 350 * After populating an empty queue, kick it to avoid stall. Read
339 * the comment in flush_end_io(). 351 * the comment in flush_end_io().
340 */ 352 */
341 spin_lock_irqsave(&q->mq_flush_lock, flags); 353 spin_lock_irqsave(&fq->mq_flush_lock, flags);
342 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) 354 if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
343 blk_mq_run_hw_queue(hctx, true); 355 blk_mq_run_hw_queue(hctx, true);
344 spin_unlock_irqrestore(&q->mq_flush_lock, flags); 356 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
345} 357}
346 358
347/** 359/**
@@ -361,6 +373,7 @@ void blk_insert_flush(struct request *rq)
361 struct request_queue *q = rq->q; 373 struct request_queue *q = rq->q;
362 unsigned int fflags = q->flush_flags; /* may change, cache */ 374 unsigned int fflags = q->flush_flags; /* may change, cache */
363 unsigned int policy = blk_flush_policy(fflags, rq); 375 unsigned int policy = blk_flush_policy(fflags, rq);
376 struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
364 377
365 /* 378 /*
366 * @policy now records what operations need to be done. Adjust 379 * @policy now records what operations need to be done. Adjust
@@ -378,7 +391,7 @@ void blk_insert_flush(struct request *rq)
378 */ 391 */
379 if (!policy) { 392 if (!policy) {
380 if (q->mq_ops) 393 if (q->mq_ops)
381 blk_mq_end_io(rq, 0); 394 blk_mq_end_request(rq, 0);
382 else 395 else
383 __blk_end_bidi_request(rq, 0, 0, 0); 396 __blk_end_bidi_request(rq, 0, 0, 0);
384 return; 397 return;
@@ -411,14 +424,14 @@ void blk_insert_flush(struct request *rq)
411 if (q->mq_ops) { 424 if (q->mq_ops) {
412 rq->end_io = mq_flush_data_end_io; 425 rq->end_io = mq_flush_data_end_io;
413 426
414 spin_lock_irq(&q->mq_flush_lock); 427 spin_lock_irq(&fq->mq_flush_lock);
415 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); 428 blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
416 spin_unlock_irq(&q->mq_flush_lock); 429 spin_unlock_irq(&fq->mq_flush_lock);
417 return; 430 return;
418 } 431 }
419 rq->end_io = flush_data_end_io; 432 rq->end_io = flush_data_end_io;
420 433
421 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); 434 blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
422} 435}
423 436
424/** 437/**
@@ -474,7 +487,43 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
474} 487}
475EXPORT_SYMBOL(blkdev_issue_flush); 488EXPORT_SYMBOL(blkdev_issue_flush);
476 489
477void blk_mq_init_flush(struct request_queue *q) 490struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
491 int node, int cmd_size)
492{
493 struct blk_flush_queue *fq;
494 int rq_sz = sizeof(struct request);
495
496 fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
497 if (!fq)
498 goto fail;
499
500 if (q->mq_ops) {
501 spin_lock_init(&fq->mq_flush_lock);
502 rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
503 }
504
505 fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
506 if (!fq->flush_rq)
507 goto fail_rq;
508
509 INIT_LIST_HEAD(&fq->flush_queue[0]);
510 INIT_LIST_HEAD(&fq->flush_queue[1]);
511 INIT_LIST_HEAD(&fq->flush_data_in_flight);
512
513 return fq;
514
515 fail_rq:
516 kfree(fq);
517 fail:
518 return NULL;
519}
520
521void blk_free_flush_queue(struct blk_flush_queue *fq)
478{ 522{
479 spin_lock_init(&q->mq_flush_lock); 523 /* bio based request queue hasn't flush queue */
524 if (!fq)
525 return;
526
527 kfree(fq->flush_rq);
528 kfree(fq);
480} 529}
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 7fbab84399e6..79ffb4855af0 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -154,10 +154,10 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
154 if (!b1 || !b2) 154 if (!b1 || !b2)
155 return -1; 155 return -1;
156 156
157 if (b1->sector_size != b2->sector_size) { 157 if (b1->interval != b2->interval) {
158 printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, 158 pr_err("%s: %s/%s protection interval %u != %u\n",
159 gd1->disk_name, gd2->disk_name, 159 __func__, gd1->disk_name, gd2->disk_name,
160 b1->sector_size, b2->sector_size); 160 b1->interval, b2->interval);
161 return -1; 161 return -1;
162 } 162 }
163 163
@@ -186,37 +186,53 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
186} 186}
187EXPORT_SYMBOL(blk_integrity_compare); 187EXPORT_SYMBOL(blk_integrity_compare);
188 188
189int blk_integrity_merge_rq(struct request_queue *q, struct request *req, 189bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
190 struct request *next) 190 struct request *next)
191{ 191{
192 if (blk_integrity_rq(req) != blk_integrity_rq(next)) 192 if (blk_integrity_rq(req) == 0 && blk_integrity_rq(next) == 0)
193 return -1; 193 return true;
194
195 if (blk_integrity_rq(req) == 0 || blk_integrity_rq(next) == 0)
196 return false;
197
198 if (bio_integrity(req->bio)->bip_flags !=
199 bio_integrity(next->bio)->bip_flags)
200 return false;
194 201
195 if (req->nr_integrity_segments + next->nr_integrity_segments > 202 if (req->nr_integrity_segments + next->nr_integrity_segments >
196 q->limits.max_integrity_segments) 203 q->limits.max_integrity_segments)
197 return -1; 204 return false;
198 205
199 return 0; 206 return true;
200} 207}
201EXPORT_SYMBOL(blk_integrity_merge_rq); 208EXPORT_SYMBOL(blk_integrity_merge_rq);
202 209
203int blk_integrity_merge_bio(struct request_queue *q, struct request *req, 210bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
204 struct bio *bio) 211 struct bio *bio)
205{ 212{
206 int nr_integrity_segs; 213 int nr_integrity_segs;
207 struct bio *next = bio->bi_next; 214 struct bio *next = bio->bi_next;
208 215
216 if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL)
217 return true;
218
219 if (blk_integrity_rq(req) == 0 || bio_integrity(bio) == NULL)
220 return false;
221
222 if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags)
223 return false;
224
209 bio->bi_next = NULL; 225 bio->bi_next = NULL;
210 nr_integrity_segs = blk_rq_count_integrity_sg(q, bio); 226 nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
211 bio->bi_next = next; 227 bio->bi_next = next;
212 228
213 if (req->nr_integrity_segments + nr_integrity_segs > 229 if (req->nr_integrity_segments + nr_integrity_segs >
214 q->limits.max_integrity_segments) 230 q->limits.max_integrity_segments)
215 return -1; 231 return false;
216 232
217 req->nr_integrity_segments += nr_integrity_segs; 233 req->nr_integrity_segments += nr_integrity_segs;
218 234
219 return 0; 235 return true;
220} 236}
221EXPORT_SYMBOL(blk_integrity_merge_bio); 237EXPORT_SYMBOL(blk_integrity_merge_bio);
222 238
@@ -269,42 +285,48 @@ static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
269 return sprintf(page, "0\n"); 285 return sprintf(page, "0\n");
270} 286}
271 287
272static ssize_t integrity_read_store(struct blk_integrity *bi, 288static ssize_t integrity_verify_store(struct blk_integrity *bi,
273 const char *page, size_t count) 289 const char *page, size_t count)
274{ 290{
275 char *p = (char *) page; 291 char *p = (char *) page;
276 unsigned long val = simple_strtoul(p, &p, 10); 292 unsigned long val = simple_strtoul(p, &p, 10);
277 293
278 if (val) 294 if (val)
279 bi->flags |= INTEGRITY_FLAG_READ; 295 bi->flags |= BLK_INTEGRITY_VERIFY;
280 else 296 else
281 bi->flags &= ~INTEGRITY_FLAG_READ; 297 bi->flags &= ~BLK_INTEGRITY_VERIFY;
282 298
283 return count; 299 return count;
284} 300}
285 301
286static ssize_t integrity_read_show(struct blk_integrity *bi, char *page) 302static ssize_t integrity_verify_show(struct blk_integrity *bi, char *page)
287{ 303{
288 return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0); 304 return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_VERIFY) != 0);
289} 305}
290 306
291static ssize_t integrity_write_store(struct blk_integrity *bi, 307static ssize_t integrity_generate_store(struct blk_integrity *bi,
292 const char *page, size_t count) 308 const char *page, size_t count)
293{ 309{
294 char *p = (char *) page; 310 char *p = (char *) page;
295 unsigned long val = simple_strtoul(p, &p, 10); 311 unsigned long val = simple_strtoul(p, &p, 10);
296 312
297 if (val) 313 if (val)
298 bi->flags |= INTEGRITY_FLAG_WRITE; 314 bi->flags |= BLK_INTEGRITY_GENERATE;
299 else 315 else
300 bi->flags &= ~INTEGRITY_FLAG_WRITE; 316 bi->flags &= ~BLK_INTEGRITY_GENERATE;
301 317
302 return count; 318 return count;
303} 319}
304 320
305static ssize_t integrity_write_show(struct blk_integrity *bi, char *page) 321static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page)
322{
323 return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0);
324}
325
326static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
306{ 327{
307 return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0); 328 return sprintf(page, "%u\n",
329 (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) != 0);
308} 330}
309 331
310static struct integrity_sysfs_entry integrity_format_entry = { 332static struct integrity_sysfs_entry integrity_format_entry = {
@@ -317,23 +339,29 @@ static struct integrity_sysfs_entry integrity_tag_size_entry = {
317 .show = integrity_tag_size_show, 339 .show = integrity_tag_size_show,
318}; 340};
319 341
320static struct integrity_sysfs_entry integrity_read_entry = { 342static struct integrity_sysfs_entry integrity_verify_entry = {
321 .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR }, 343 .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
322 .show = integrity_read_show, 344 .show = integrity_verify_show,
323 .store = integrity_read_store, 345 .store = integrity_verify_store,
324}; 346};
325 347
326static struct integrity_sysfs_entry integrity_write_entry = { 348static struct integrity_sysfs_entry integrity_generate_entry = {
327 .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR }, 349 .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
328 .show = integrity_write_show, 350 .show = integrity_generate_show,
329 .store = integrity_write_store, 351 .store = integrity_generate_store,
352};
353
354static struct integrity_sysfs_entry integrity_device_entry = {
355 .attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
356 .show = integrity_device_show,
330}; 357};
331 358
332static struct attribute *integrity_attrs[] = { 359static struct attribute *integrity_attrs[] = {
333 &integrity_format_entry.attr, 360 &integrity_format_entry.attr,
334 &integrity_tag_size_entry.attr, 361 &integrity_tag_size_entry.attr,
335 &integrity_read_entry.attr, 362 &integrity_verify_entry.attr,
336 &integrity_write_entry.attr, 363 &integrity_generate_entry.attr,
364 &integrity_device_entry.attr,
337 NULL, 365 NULL,
338}; 366};
339 367
@@ -406,8 +434,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
406 434
407 kobject_uevent(&bi->kobj, KOBJ_ADD); 435 kobject_uevent(&bi->kobj, KOBJ_ADD);
408 436
409 bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE; 437 bi->flags |= BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE;
410 bi->sector_size = queue_logical_block_size(disk->queue); 438 bi->interval = queue_logical_block_size(disk->queue);
411 disk->integrity = bi; 439 disk->integrity = bi;
412 } else 440 } else
413 bi = disk->integrity; 441 bi = disk->integrity;
@@ -418,9 +446,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
418 bi->generate_fn = template->generate_fn; 446 bi->generate_fn = template->generate_fn;
419 bi->verify_fn = template->verify_fn; 447 bi->verify_fn = template->verify_fn;
420 bi->tuple_size = template->tuple_size; 448 bi->tuple_size = template->tuple_size;
421 bi->set_tag_fn = template->set_tag_fn;
422 bi->get_tag_fn = template->get_tag_fn;
423 bi->tag_size = template->tag_size; 449 bi->tag_size = template->tag_size;
450 bi->flags |= template->flags;
424 } else 451 } else
425 bi->name = bi_unsupported_name; 452 bi->name = bi_unsupported_name;
426 453
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 77881798f793..ba99351c0f58 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -97,14 +97,18 @@ void blk_recalc_rq_segments(struct request *rq)
97 97
98void blk_recount_segments(struct request_queue *q, struct bio *bio) 98void blk_recount_segments(struct request_queue *q, struct bio *bio)
99{ 99{
100 if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && 100 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
101 &q->queue_flags);
102
103 if (no_sg_merge && !bio_flagged(bio, BIO_CLONED) &&
101 bio->bi_vcnt < queue_max_segments(q)) 104 bio->bi_vcnt < queue_max_segments(q))
102 bio->bi_phys_segments = bio->bi_vcnt; 105 bio->bi_phys_segments = bio->bi_vcnt;
103 else { 106 else {
104 struct bio *nxt = bio->bi_next; 107 struct bio *nxt = bio->bi_next;
105 108
106 bio->bi_next = NULL; 109 bio->bi_next = NULL;
107 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); 110 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio,
111 no_sg_merge);
108 bio->bi_next = nxt; 112 bio->bi_next = nxt;
109 } 113 }
110 114
@@ -313,7 +317,7 @@ static inline int ll_new_hw_segment(struct request_queue *q,
313 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 317 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q))
314 goto no_merge; 318 goto no_merge;
315 319
316 if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio)) 320 if (blk_integrity_merge_bio(q, req, bio) == false)
317 goto no_merge; 321 goto no_merge;
318 322
319 /* 323 /*
@@ -410,7 +414,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
410 if (total_phys_segments > queue_max_segments(q)) 414 if (total_phys_segments > queue_max_segments(q))
411 return 0; 415 return 0;
412 416
413 if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next)) 417 if (blk_integrity_merge_rq(q, req, next) == false)
414 return 0; 418 return 0;
415 419
416 /* Merge is OK... */ 420 /* Merge is OK... */
@@ -590,7 +594,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
590 return false; 594 return false;
591 595
592 /* only merge integrity protected bio into ditto rq */ 596 /* only merge integrity protected bio into ditto rq */
593 if (bio_integrity(bio) != blk_integrity_rq(rq)) 597 if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
594 return false; 598 return false;
595 599
596 /* must be using the same buffer */ 600 /* must be using the same buffer */
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index c1b92426c95e..8317175a3009 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -351,15 +351,12 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
351 return; 351 return;
352 352
353 wait_cnt = atomic_dec_return(&bs->wait_cnt); 353 wait_cnt = atomic_dec_return(&bs->wait_cnt);
354 if (unlikely(wait_cnt < 0))
355 wait_cnt = atomic_inc_return(&bs->wait_cnt);
354 if (wait_cnt == 0) { 356 if (wait_cnt == 0) {
355wake:
356 atomic_add(bt->wake_cnt, &bs->wait_cnt); 357 atomic_add(bt->wake_cnt, &bs->wait_cnt);
357 bt_index_atomic_inc(&bt->wake_index); 358 bt_index_atomic_inc(&bt->wake_index);
358 wake_up(&bs->wait); 359 wake_up(&bs->wait);
359 } else if (wait_cnt < 0) {
360 wait_cnt = atomic_inc_return(&bs->wait_cnt);
361 if (!wait_cnt)
362 goto wake;
363 } 360 }
364} 361}
365 362
@@ -392,45 +389,37 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
392 __blk_mq_put_reserved_tag(tags, tag); 389 __blk_mq_put_reserved_tag(tags, tag);
393} 390}
394 391
395static void bt_for_each_free(struct blk_mq_bitmap_tags *bt, 392static void bt_for_each(struct blk_mq_hw_ctx *hctx,
396 unsigned long *free_map, unsigned int off) 393 struct blk_mq_bitmap_tags *bt, unsigned int off,
394 busy_iter_fn *fn, void *data, bool reserved)
397{ 395{
398 int i; 396 struct request *rq;
397 int bit, i;
399 398
400 for (i = 0; i < bt->map_nr; i++) { 399 for (i = 0; i < bt->map_nr; i++) {
401 struct blk_align_bitmap *bm = &bt->map[i]; 400 struct blk_align_bitmap *bm = &bt->map[i];
402 int bit = 0;
403 401
404 do { 402 for (bit = find_first_bit(&bm->word, bm->depth);
405 bit = find_next_zero_bit(&bm->word, bm->depth, bit); 403 bit < bm->depth;
406 if (bit >= bm->depth) 404 bit = find_next_bit(&bm->word, bm->depth, bit + 1)) {
407 break; 405 rq = blk_mq_tag_to_rq(hctx->tags, off + bit);
408 406 if (rq->q == hctx->queue)
409 __set_bit(bit + off, free_map); 407 fn(hctx, rq, data, reserved);
410 bit++; 408 }
411 } while (1);
412 409
413 off += (1 << bt->bits_per_word); 410 off += (1 << bt->bits_per_word);
414 } 411 }
415} 412}
416 413
417void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, 414void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
418 void (*fn)(void *, unsigned long *), void *data) 415 void *priv)
419{ 416{
420 unsigned long *tag_map; 417 struct blk_mq_tags *tags = hctx->tags;
421 size_t map_size;
422
423 map_size = ALIGN(tags->nr_tags, BITS_PER_LONG) / BITS_PER_LONG;
424 tag_map = kzalloc(map_size * sizeof(unsigned long), GFP_ATOMIC);
425 if (!tag_map)
426 return;
427 418
428 bt_for_each_free(&tags->bitmap_tags, tag_map, tags->nr_reserved_tags);
429 if (tags->nr_reserved_tags) 419 if (tags->nr_reserved_tags)
430 bt_for_each_free(&tags->breserved_tags, tag_map, 0); 420 bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true);
431 421 bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
432 fn(data, tag_map); 422 false);
433 kfree(tag_map);
434} 423}
435EXPORT_SYMBOL(blk_mq_tag_busy_iter); 424EXPORT_SYMBOL(blk_mq_tag_busy_iter);
436 425
@@ -463,8 +452,8 @@ static void bt_update_count(struct blk_mq_bitmap_tags *bt,
463 } 452 }
464 453
465 bt->wake_cnt = BT_WAIT_BATCH; 454 bt->wake_cnt = BT_WAIT_BATCH;
466 if (bt->wake_cnt > depth / 4) 455 if (bt->wake_cnt > depth / BT_WAIT_QUEUES)
467 bt->wake_cnt = max(1U, depth / 4); 456 bt->wake_cnt = max(1U, depth / BT_WAIT_QUEUES);
468 457
469 bt->depth = depth; 458 bt->depth = depth;
470} 459}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 38f4a165640d..68929bad9a6a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -20,6 +20,7 @@
20#include <linux/cache.h> 20#include <linux/cache.h>
21#include <linux/sched/sysctl.h> 21#include <linux/sched/sysctl.h>
22#include <linux/delay.h> 22#include <linux/delay.h>
23#include <linux/crash_dump.h>
23 24
24#include <trace/events/block.h> 25#include <trace/events/block.h>
25 26
@@ -223,9 +224,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
223 struct blk_mq_hw_ctx *hctx; 224 struct blk_mq_hw_ctx *hctx;
224 struct request *rq; 225 struct request *rq;
225 struct blk_mq_alloc_data alloc_data; 226 struct blk_mq_alloc_data alloc_data;
227 int ret;
226 228
227 if (blk_mq_queue_enter(q)) 229 ret = blk_mq_queue_enter(q);
228 return NULL; 230 if (ret)
231 return ERR_PTR(ret);
229 232
230 ctx = blk_mq_get_ctx(q); 233 ctx = blk_mq_get_ctx(q);
231 hctx = q->mq_ops->map_queue(q, ctx->cpu); 234 hctx = q->mq_ops->map_queue(q, ctx->cpu);
@@ -245,6 +248,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
245 ctx = alloc_data.ctx; 248 ctx = alloc_data.ctx;
246 } 249 }
247 blk_mq_put_ctx(ctx); 250 blk_mq_put_ctx(ctx);
251 if (!rq)
252 return ERR_PTR(-EWOULDBLOCK);
248 return rq; 253 return rq;
249} 254}
250EXPORT_SYMBOL(blk_mq_alloc_request); 255EXPORT_SYMBOL(blk_mq_alloc_request);
@@ -276,27 +281,7 @@ void blk_mq_free_request(struct request *rq)
276 __blk_mq_free_request(hctx, ctx, rq); 281 __blk_mq_free_request(hctx, ctx, rq);
277} 282}
278 283
279/* 284inline void __blk_mq_end_request(struct request *rq, int error)
280 * Clone all relevant state from a request that has been put on hold in
281 * the flush state machine into the preallocated flush request that hangs
282 * off the request queue.
283 *
284 * For a driver the flush request should be invisible, that's why we are
285 * impersonating the original request here.
286 */
287void blk_mq_clone_flush_request(struct request *flush_rq,
288 struct request *orig_rq)
289{
290 struct blk_mq_hw_ctx *hctx =
291 orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
292
293 flush_rq->mq_ctx = orig_rq->mq_ctx;
294 flush_rq->tag = orig_rq->tag;
295 memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
296 hctx->cmd_size);
297}
298
299inline void __blk_mq_end_io(struct request *rq, int error)
300{ 285{
301 blk_account_io_done(rq); 286 blk_account_io_done(rq);
302 287
@@ -308,15 +293,15 @@ inline void __blk_mq_end_io(struct request *rq, int error)
308 blk_mq_free_request(rq); 293 blk_mq_free_request(rq);
309 } 294 }
310} 295}
311EXPORT_SYMBOL(__blk_mq_end_io); 296EXPORT_SYMBOL(__blk_mq_end_request);
312 297
313void blk_mq_end_io(struct request *rq, int error) 298void blk_mq_end_request(struct request *rq, int error)
314{ 299{
315 if (blk_update_request(rq, error, blk_rq_bytes(rq))) 300 if (blk_update_request(rq, error, blk_rq_bytes(rq)))
316 BUG(); 301 BUG();
317 __blk_mq_end_io(rq, error); 302 __blk_mq_end_request(rq, error);
318} 303}
319EXPORT_SYMBOL(blk_mq_end_io); 304EXPORT_SYMBOL(blk_mq_end_request);
320 305
321static void __blk_mq_complete_request_remote(void *data) 306static void __blk_mq_complete_request_remote(void *data)
322{ 307{
@@ -356,7 +341,7 @@ void __blk_mq_complete_request(struct request *rq)
356 struct request_queue *q = rq->q; 341 struct request_queue *q = rq->q;
357 342
358 if (!q->softirq_done_fn) 343 if (!q->softirq_done_fn)
359 blk_mq_end_io(rq, rq->errors); 344 blk_mq_end_request(rq, rq->errors);
360 else 345 else
361 blk_mq_ipi_complete_request(rq); 346 blk_mq_ipi_complete_request(rq);
362} 347}
@@ -380,7 +365,7 @@ void blk_mq_complete_request(struct request *rq)
380} 365}
381EXPORT_SYMBOL(blk_mq_complete_request); 366EXPORT_SYMBOL(blk_mq_complete_request);
382 367
383static void blk_mq_start_request(struct request *rq, bool last) 368void blk_mq_start_request(struct request *rq)
384{ 369{
385 struct request_queue *q = rq->q; 370 struct request_queue *q = rq->q;
386 371
@@ -417,35 +402,24 @@ static void blk_mq_start_request(struct request *rq, bool last)
417 */ 402 */
418 rq->nr_phys_segments++; 403 rq->nr_phys_segments++;
419 } 404 }
420
421 /*
422 * Flag the last request in the series so that drivers know when IO
423 * should be kicked off, if they don't do it on a per-request basis.
424 *
425 * Note: the flag isn't the only condition drivers should do kick off.
426 * If drive is busy, the last request might not have the bit set.
427 */
428 if (last)
429 rq->cmd_flags |= REQ_END;
430} 405}
406EXPORT_SYMBOL(blk_mq_start_request);
431 407
432static void __blk_mq_requeue_request(struct request *rq) 408static void __blk_mq_requeue_request(struct request *rq)
433{ 409{
434 struct request_queue *q = rq->q; 410 struct request_queue *q = rq->q;
435 411
436 trace_block_rq_requeue(q, rq); 412 trace_block_rq_requeue(q, rq);
437 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
438
439 rq->cmd_flags &= ~REQ_END;
440 413
441 if (q->dma_drain_size && blk_rq_bytes(rq)) 414 if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
442 rq->nr_phys_segments--; 415 if (q->dma_drain_size && blk_rq_bytes(rq))
416 rq->nr_phys_segments--;
417 }
443} 418}
444 419
445void blk_mq_requeue_request(struct request *rq) 420void blk_mq_requeue_request(struct request *rq)
446{ 421{
447 __blk_mq_requeue_request(rq); 422 __blk_mq_requeue_request(rq);
448 blk_clear_rq_complete(rq);
449 423
450 BUG_ON(blk_queued_rq(rq)); 424 BUG_ON(blk_queued_rq(rq));
451 blk_mq_add_to_requeue_list(rq, true); 425 blk_mq_add_to_requeue_list(rq, true);
@@ -514,78 +488,35 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
514} 488}
515EXPORT_SYMBOL(blk_mq_kick_requeue_list); 489EXPORT_SYMBOL(blk_mq_kick_requeue_list);
516 490
517static inline bool is_flush_request(struct request *rq, unsigned int tag) 491static inline bool is_flush_request(struct request *rq,
492 struct blk_flush_queue *fq, unsigned int tag)
518{ 493{
519 return ((rq->cmd_flags & REQ_FLUSH_SEQ) && 494 return ((rq->cmd_flags & REQ_FLUSH_SEQ) &&
520 rq->q->flush_rq->tag == tag); 495 fq->flush_rq->tag == tag);
521} 496}
522 497
523struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) 498struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
524{ 499{
525 struct request *rq = tags->rqs[tag]; 500 struct request *rq = tags->rqs[tag];
501 /* mq_ctx of flush rq is always cloned from the corresponding req */
502 struct blk_flush_queue *fq = blk_get_flush_queue(rq->q, rq->mq_ctx);
526 503
527 if (!is_flush_request(rq, tag)) 504 if (!is_flush_request(rq, fq, tag))
528 return rq; 505 return rq;
529 506
530 return rq->q->flush_rq; 507 return fq->flush_rq;
531} 508}
532EXPORT_SYMBOL(blk_mq_tag_to_rq); 509EXPORT_SYMBOL(blk_mq_tag_to_rq);
533 510
534struct blk_mq_timeout_data { 511struct blk_mq_timeout_data {
535 struct blk_mq_hw_ctx *hctx; 512 unsigned long next;
536 unsigned long *next; 513 unsigned int next_set;
537 unsigned int *next_set;
538}; 514};
539 515
540static void blk_mq_timeout_check(void *__data, unsigned long *free_tags) 516void blk_mq_rq_timed_out(struct request *req, bool reserved)
541{ 517{
542 struct blk_mq_timeout_data *data = __data; 518 struct blk_mq_ops *ops = req->q->mq_ops;
543 struct blk_mq_hw_ctx *hctx = data->hctx; 519 enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
544 unsigned int tag;
545
546 /* It may not be in flight yet (this is where
547 * the REQ_ATOMIC_STARTED flag comes in). The requests are
548 * statically allocated, so we know it's always safe to access the
549 * memory associated with a bit offset into ->rqs[].
550 */
551 tag = 0;
552 do {
553 struct request *rq;
554
555 tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag);
556 if (tag >= hctx->tags->nr_tags)
557 break;
558
559 rq = blk_mq_tag_to_rq(hctx->tags, tag++);
560 if (rq->q != hctx->queue)
561 continue;
562 if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
563 continue;
564
565 blk_rq_check_expired(rq, data->next, data->next_set);
566 } while (1);
567}
568
569static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx,
570 unsigned long *next,
571 unsigned int *next_set)
572{
573 struct blk_mq_timeout_data data = {
574 .hctx = hctx,
575 .next = next,
576 .next_set = next_set,
577 };
578
579 /*
580 * Ask the tagging code to iterate busy requests, so we can
581 * check them for timeout.
582 */
583 blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data);
584}
585
586static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
587{
588 struct request_queue *q = rq->q;
589 520
590 /* 521 /*
591 * We know that complete is set at this point. If STARTED isn't set 522 * We know that complete is set at this point. If STARTED isn't set
@@ -596,21 +527,54 @@ static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
596 * we both flags will get cleared. So check here again, and ignore 527 * we both flags will get cleared. So check here again, and ignore
597 * a timeout event with a request that isn't active. 528 * a timeout event with a request that isn't active.
598 */ 529 */
599 if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) 530 if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags))
600 return BLK_EH_NOT_HANDLED; 531 return;
532
533 if (ops->timeout)
534 ret = ops->timeout(req, reserved);
535
536 switch (ret) {
537 case BLK_EH_HANDLED:
538 __blk_mq_complete_request(req);
539 break;
540 case BLK_EH_RESET_TIMER:
541 blk_add_timer(req);
542 blk_clear_rq_complete(req);
543 break;
544 case BLK_EH_NOT_HANDLED:
545 break;
546 default:
547 printk(KERN_ERR "block: bad eh return: %d\n", ret);
548 break;
549 }
550}
551
552static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
553 struct request *rq, void *priv, bool reserved)
554{
555 struct blk_mq_timeout_data *data = priv;
601 556
602 if (!q->mq_ops->timeout) 557 if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
603 return BLK_EH_RESET_TIMER; 558 return;
604 559
605 return q->mq_ops->timeout(rq); 560 if (time_after_eq(jiffies, rq->deadline)) {
561 if (!blk_mark_rq_complete(rq))
562 blk_mq_rq_timed_out(rq, reserved);
563 } else if (!data->next_set || time_after(data->next, rq->deadline)) {
564 data->next = rq->deadline;
565 data->next_set = 1;
566 }
606} 567}
607 568
608static void blk_mq_rq_timer(unsigned long data) 569static void blk_mq_rq_timer(unsigned long priv)
609{ 570{
610 struct request_queue *q = (struct request_queue *) data; 571 struct request_queue *q = (struct request_queue *)priv;
572 struct blk_mq_timeout_data data = {
573 .next = 0,
574 .next_set = 0,
575 };
611 struct blk_mq_hw_ctx *hctx; 576 struct blk_mq_hw_ctx *hctx;
612 unsigned long next = 0; 577 int i;
613 int i, next_set = 0;
614 578
615 queue_for_each_hw_ctx(q, hctx, i) { 579 queue_for_each_hw_ctx(q, hctx, i) {
616 /* 580 /*
@@ -620,12 +584,12 @@ static void blk_mq_rq_timer(unsigned long data)
620 if (!hctx->nr_ctx || !hctx->tags) 584 if (!hctx->nr_ctx || !hctx->tags)
621 continue; 585 continue;
622 586
623 blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); 587 blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
624 } 588 }
625 589
626 if (next_set) { 590 if (data.next_set) {
627 next = blk_rq_timeout(round_jiffies_up(next)); 591 data.next = blk_rq_timeout(round_jiffies_up(data.next));
628 mod_timer(&q->timeout, next); 592 mod_timer(&q->timeout, data.next);
629 } else { 593 } else {
630 queue_for_each_hw_ctx(q, hctx, i) 594 queue_for_each_hw_ctx(q, hctx, i)
631 blk_mq_tag_idle(hctx); 595 blk_mq_tag_idle(hctx);
@@ -751,9 +715,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
751 rq = list_first_entry(&rq_list, struct request, queuelist); 715 rq = list_first_entry(&rq_list, struct request, queuelist);
752 list_del_init(&rq->queuelist); 716 list_del_init(&rq->queuelist);
753 717
754 blk_mq_start_request(rq, list_empty(&rq_list)); 718 ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
755
756 ret = q->mq_ops->queue_rq(hctx, rq);
757 switch (ret) { 719 switch (ret) {
758 case BLK_MQ_RQ_QUEUE_OK: 720 case BLK_MQ_RQ_QUEUE_OK:
759 queued++; 721 queued++;
@@ -766,7 +728,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
766 pr_err("blk-mq: bad return on queue: %d\n", ret); 728 pr_err("blk-mq: bad return on queue: %d\n", ret);
767 case BLK_MQ_RQ_QUEUE_ERROR: 729 case BLK_MQ_RQ_QUEUE_ERROR:
768 rq->errors = -EIO; 730 rq->errors = -EIO;
769 blk_mq_end_io(rq, rq->errors); 731 blk_mq_end_request(rq, rq->errors);
770 break; 732 break;
771 } 733 }
772 734
@@ -1194,14 +1156,13 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1194 int ret; 1156 int ret;
1195 1157
1196 blk_mq_bio_to_request(rq, bio); 1158 blk_mq_bio_to_request(rq, bio);
1197 blk_mq_start_request(rq, true);
1198 1159
1199 /* 1160 /*
1200 * For OK queue, we are done. For error, kill it. Any other 1161 * For OK queue, we are done. For error, kill it. Any other
1201 * error (busy), just add it to our list as we previously 1162 * error (busy), just add it to our list as we previously
1202 * would have done 1163 * would have done
1203 */ 1164 */
1204 ret = q->mq_ops->queue_rq(data.hctx, rq); 1165 ret = q->mq_ops->queue_rq(data.hctx, rq, true);
1205 if (ret == BLK_MQ_RQ_QUEUE_OK) 1166 if (ret == BLK_MQ_RQ_QUEUE_OK)
1206 goto done; 1167 goto done;
1207 else { 1168 else {
@@ -1209,7 +1170,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1209 1170
1210 if (ret == BLK_MQ_RQ_QUEUE_ERROR) { 1171 if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
1211 rq->errors = -EIO; 1172 rq->errors = -EIO;
1212 blk_mq_end_io(rq, rq->errors); 1173 blk_mq_end_request(rq, rq->errors);
1213 goto done; 1174 goto done;
1214 } 1175 }
1215 } 1176 }
@@ -1531,6 +1492,28 @@ static int blk_mq_hctx_notify(void *data, unsigned long action,
1531 return NOTIFY_OK; 1492 return NOTIFY_OK;
1532} 1493}
1533 1494
1495static void blk_mq_exit_hctx(struct request_queue *q,
1496 struct blk_mq_tag_set *set,
1497 struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
1498{
1499 unsigned flush_start_tag = set->queue_depth;
1500
1501 blk_mq_tag_idle(hctx);
1502
1503 if (set->ops->exit_request)
1504 set->ops->exit_request(set->driver_data,
1505 hctx->fq->flush_rq, hctx_idx,
1506 flush_start_tag + hctx_idx);
1507
1508 if (set->ops->exit_hctx)
1509 set->ops->exit_hctx(hctx, hctx_idx);
1510
1511 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1512 blk_free_flush_queue(hctx->fq);
1513 kfree(hctx->ctxs);
1514 blk_mq_free_bitmap(&hctx->ctx_map);
1515}
1516
1534static void blk_mq_exit_hw_queues(struct request_queue *q, 1517static void blk_mq_exit_hw_queues(struct request_queue *q,
1535 struct blk_mq_tag_set *set, int nr_queue) 1518 struct blk_mq_tag_set *set, int nr_queue)
1536{ 1519{
@@ -1540,17 +1523,8 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
1540 queue_for_each_hw_ctx(q, hctx, i) { 1523 queue_for_each_hw_ctx(q, hctx, i) {
1541 if (i == nr_queue) 1524 if (i == nr_queue)
1542 break; 1525 break;
1543 1526 blk_mq_exit_hctx(q, set, hctx, i);
1544 blk_mq_tag_idle(hctx);
1545
1546 if (set->ops->exit_hctx)
1547 set->ops->exit_hctx(hctx, i);
1548
1549 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1550 kfree(hctx->ctxs);
1551 blk_mq_free_bitmap(&hctx->ctx_map);
1552 } 1527 }
1553
1554} 1528}
1555 1529
1556static void blk_mq_free_hw_queues(struct request_queue *q, 1530static void blk_mq_free_hw_queues(struct request_queue *q,
@@ -1565,53 +1539,88 @@ static void blk_mq_free_hw_queues(struct request_queue *q,
1565 } 1539 }
1566} 1540}
1567 1541
1568static int blk_mq_init_hw_queues(struct request_queue *q, 1542static int blk_mq_init_hctx(struct request_queue *q,
1569 struct blk_mq_tag_set *set) 1543 struct blk_mq_tag_set *set,
1544 struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
1570{ 1545{
1571 struct blk_mq_hw_ctx *hctx; 1546 int node;
1572 unsigned int i; 1547 unsigned flush_start_tag = set->queue_depth;
1548
1549 node = hctx->numa_node;
1550 if (node == NUMA_NO_NODE)
1551 node = hctx->numa_node = set->numa_node;
1552
1553 INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
1554 INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
1555 spin_lock_init(&hctx->lock);
1556 INIT_LIST_HEAD(&hctx->dispatch);
1557 hctx->queue = q;
1558 hctx->queue_num = hctx_idx;
1559 hctx->flags = set->flags;
1560 hctx->cmd_size = set->cmd_size;
1561
1562 blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
1563 blk_mq_hctx_notify, hctx);
1564 blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
1565
1566 hctx->tags = set->tags[hctx_idx];
1573 1567
1574 /* 1568 /*
1575 * Initialize hardware queues 1569 * Allocate space for all possible cpus to avoid allocation at
1570 * runtime
1576 */ 1571 */
1577 queue_for_each_hw_ctx(q, hctx, i) { 1572 hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
1578 int node; 1573 GFP_KERNEL, node);
1574 if (!hctx->ctxs)
1575 goto unregister_cpu_notifier;
1579 1576
1580 node = hctx->numa_node; 1577 if (blk_mq_alloc_bitmap(&hctx->ctx_map, node))
1581 if (node == NUMA_NO_NODE) 1578 goto free_ctxs;
1582 node = hctx->numa_node = set->numa_node;
1583 1579
1584 INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); 1580 hctx->nr_ctx = 0;
1585 INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
1586 spin_lock_init(&hctx->lock);
1587 INIT_LIST_HEAD(&hctx->dispatch);
1588 hctx->queue = q;
1589 hctx->queue_num = i;
1590 hctx->flags = set->flags;
1591 hctx->cmd_size = set->cmd_size;
1592 1581
1593 blk_mq_init_cpu_notifier(&hctx->cpu_notifier, 1582 if (set->ops->init_hctx &&
1594 blk_mq_hctx_notify, hctx); 1583 set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
1595 blk_mq_register_cpu_notifier(&hctx->cpu_notifier); 1584 goto free_bitmap;
1596 1585
1597 hctx->tags = set->tags[i]; 1586 hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
1587 if (!hctx->fq)
1588 goto exit_hctx;
1598 1589
1599 /* 1590 if (set->ops->init_request &&
1600 * Allocate space for all possible cpus to avoid allocation at 1591 set->ops->init_request(set->driver_data,
1601 * runtime 1592 hctx->fq->flush_rq, hctx_idx,
1602 */ 1593 flush_start_tag + hctx_idx, node))
1603 hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *), 1594 goto free_fq;
1604 GFP_KERNEL, node);
1605 if (!hctx->ctxs)
1606 break;
1607 1595
1608 if (blk_mq_alloc_bitmap(&hctx->ctx_map, node)) 1596 return 0;
1609 break;
1610 1597
1611 hctx->nr_ctx = 0; 1598 free_fq:
1599 kfree(hctx->fq);
1600 exit_hctx:
1601 if (set->ops->exit_hctx)
1602 set->ops->exit_hctx(hctx, hctx_idx);
1603 free_bitmap:
1604 blk_mq_free_bitmap(&hctx->ctx_map);
1605 free_ctxs:
1606 kfree(hctx->ctxs);
1607 unregister_cpu_notifier:
1608 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1609
1610 return -1;
1611}
1612
1613static int blk_mq_init_hw_queues(struct request_queue *q,
1614 struct blk_mq_tag_set *set)
1615{
1616 struct blk_mq_hw_ctx *hctx;
1617 unsigned int i;
1612 1618
1613 if (set->ops->init_hctx && 1619 /*
1614 set->ops->init_hctx(hctx, set->driver_data, i)) 1620 * Initialize hardware queues
1621 */
1622 queue_for_each_hw_ctx(q, hctx, i) {
1623 if (blk_mq_init_hctx(q, set, hctx, i))
1615 break; 1624 break;
1616 } 1625 }
1617 1626
@@ -1765,6 +1774,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1765 if (!ctx) 1774 if (!ctx)
1766 return ERR_PTR(-ENOMEM); 1775 return ERR_PTR(-ENOMEM);
1767 1776
1777 /*
1778 * If a crashdump is active, then we are potentially in a very
1779 * memory constrained environment. Limit us to 1 queue and
1780 * 64 tags to prevent using too much memory.
1781 */
1782 if (is_kdump_kernel()) {
1783 set->nr_hw_queues = 1;
1784 set->queue_depth = min(64U, set->queue_depth);
1785 }
1786
1768 hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, 1787 hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
1769 set->numa_node); 1788 set->numa_node);
1770 1789
@@ -1783,7 +1802,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1783 if (!hctxs[i]) 1802 if (!hctxs[i])
1784 goto err_hctxs; 1803 goto err_hctxs;
1785 1804
1786 if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL)) 1805 if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
1806 node))
1787 goto err_hctxs; 1807 goto err_hctxs;
1788 1808
1789 atomic_set(&hctxs[i]->nr_active, 0); 1809 atomic_set(&hctxs[i]->nr_active, 0);
@@ -1830,7 +1850,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1830 else 1850 else
1831 blk_queue_make_request(q, blk_sq_make_request); 1851 blk_queue_make_request(q, blk_sq_make_request);
1832 1852
1833 blk_queue_rq_timed_out(q, blk_mq_rq_timed_out);
1834 if (set->timeout) 1853 if (set->timeout)
1835 blk_queue_rq_timeout(q, set->timeout); 1854 blk_queue_rq_timeout(q, set->timeout);
1836 1855
@@ -1842,17 +1861,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1842 if (set->ops->complete) 1861 if (set->ops->complete)
1843 blk_queue_softirq_done(q, set->ops->complete); 1862 blk_queue_softirq_done(q, set->ops->complete);
1844 1863
1845 blk_mq_init_flush(q);
1846 blk_mq_init_cpu_queues(q, set->nr_hw_queues); 1864 blk_mq_init_cpu_queues(q, set->nr_hw_queues);
1847 1865
1848 q->flush_rq = kzalloc(round_up(sizeof(struct request) +
1849 set->cmd_size, cache_line_size()),
1850 GFP_KERNEL);
1851 if (!q->flush_rq)
1852 goto err_hw;
1853
1854 if (blk_mq_init_hw_queues(q, set)) 1866 if (blk_mq_init_hw_queues(q, set))
1855 goto err_flush_rq; 1867 goto err_hw;
1856 1868
1857 mutex_lock(&all_q_mutex); 1869 mutex_lock(&all_q_mutex);
1858 list_add_tail(&q->all_q_node, &all_q_list); 1870 list_add_tail(&q->all_q_node, &all_q_list);
@@ -1864,8 +1876,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1864 1876
1865 return q; 1877 return q;
1866 1878
1867err_flush_rq:
1868 kfree(q->flush_rq);
1869err_hw: 1879err_hw:
1870 blk_cleanup_queue(q); 1880 blk_cleanup_queue(q);
1871err_hctxs: 1881err_hctxs:
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ca4964a6295d..d567d5283ffa 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,7 +27,6 @@ struct blk_mq_ctx {
27 27
28void __blk_mq_complete_request(struct request *rq); 28void __blk_mq_complete_request(struct request *rq);
29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
30void blk_mq_init_flush(struct request_queue *q);
31void blk_mq_freeze_queue(struct request_queue *q); 30void blk_mq_freeze_queue(struct request_queue *q);
32void blk_mq_free_queue(struct request_queue *q); 31void blk_mq_free_queue(struct request_queue *q);
33void blk_mq_clone_flush_request(struct request *flush_rq, 32void blk_mq_clone_flush_request(struct request *flush_rq,
@@ -60,6 +59,8 @@ extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
60extern int blk_mq_sysfs_register(struct request_queue *q); 59extern int blk_mq_sysfs_register(struct request_queue *q);
61extern void blk_mq_sysfs_unregister(struct request_queue *q); 60extern void blk_mq_sysfs_unregister(struct request_queue *q);
62 61
62extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
63
63/* 64/*
64 * Basic implementation of sparser bitmap, allowing the user to spread 65 * Basic implementation of sparser bitmap, allowing the user to spread
65 * the bits over more cachelines. 66 * the bits over more cachelines.
diff --git a/block/blk-settings.c b/block/blk-settings.c
index f1a1795a5683..aa02247d227e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -574,7 +574,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
574 bottom = max(b->physical_block_size, b->io_min) + alignment; 574 bottom = max(b->physical_block_size, b->io_min) + alignment;
575 575
576 /* Verify that top and bottom intervals line up */ 576 /* Verify that top and bottom intervals line up */
577 if (max(top, bottom) & (min(top, bottom) - 1)) { 577 if (max(top, bottom) % min(top, bottom)) {
578 t->misaligned = 1; 578 t->misaligned = 1;
579 ret = -1; 579 ret = -1;
580 } 580 }
@@ -619,7 +619,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
619 619
620 /* Find lowest common alignment_offset */ 620 /* Find lowest common alignment_offset */
621 t->alignment_offset = lcm(t->alignment_offset, alignment) 621 t->alignment_offset = lcm(t->alignment_offset, alignment)
622 & (max(t->physical_block_size, t->io_min) - 1); 622 % max(t->physical_block_size, t->io_min);
623 623
624 /* Verify that new alignment_offset is on a logical block boundary */ 624 /* Verify that new alignment_offset is on a logical block boundary */
625 if (t->alignment_offset & (t->logical_block_size - 1)) { 625 if (t->alignment_offset & (t->logical_block_size - 1)) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 521ae9089c50..1fac43408911 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -519,8 +519,8 @@ static void blk_release_queue(struct kobject *kobj)
519 519
520 if (q->mq_ops) 520 if (q->mq_ops)
521 blk_mq_free_queue(q); 521 blk_mq_free_queue(q);
522 522 else
523 kfree(q->flush_rq); 523 blk_free_flush_queue(q->fq);
524 524
525 blk_trace_shutdown(q); 525 blk_trace_shutdown(q);
526 526
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 95a09590ccfd..56c025894cdf 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -90,10 +90,7 @@ static void blk_rq_timed_out(struct request *req)
90 switch (ret) { 90 switch (ret) {
91 case BLK_EH_HANDLED: 91 case BLK_EH_HANDLED:
92 /* Can we use req->errors here? */ 92 /* Can we use req->errors here? */
93 if (q->mq_ops) 93 __blk_complete_request(req);
94 __blk_mq_complete_request(req);
95 else
96 __blk_complete_request(req);
97 break; 94 break;
98 case BLK_EH_RESET_TIMER: 95 case BLK_EH_RESET_TIMER:
99 blk_add_timer(req); 96 blk_add_timer(req);
@@ -113,7 +110,7 @@ static void blk_rq_timed_out(struct request *req)
113 } 110 }
114} 111}
115 112
116void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, 113static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
117 unsigned int *next_set) 114 unsigned int *next_set)
118{ 115{
119 if (time_after_eq(jiffies, rq->deadline)) { 116 if (time_after_eq(jiffies, rq->deadline)) {
@@ -162,7 +159,10 @@ void blk_abort_request(struct request *req)
162 if (blk_mark_rq_complete(req)) 159 if (blk_mark_rq_complete(req))
163 return; 160 return;
164 blk_delete_timer(req); 161 blk_delete_timer(req);
165 blk_rq_timed_out(req); 162 if (req->q->mq_ops)
163 blk_mq_rq_timed_out(req, false);
164 else
165 blk_rq_timed_out(req);
166} 166}
167EXPORT_SYMBOL_GPL(blk_abort_request); 167EXPORT_SYMBOL_GPL(blk_abort_request);
168 168
@@ -190,7 +190,8 @@ void blk_add_timer(struct request *req)
190 struct request_queue *q = req->q; 190 struct request_queue *q = req->q;
191 unsigned long expiry; 191 unsigned long expiry;
192 192
193 if (!q->rq_timed_out_fn) 193 /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
194 if (!q->mq_ops && !q->rq_timed_out_fn)
194 return; 195 return;
195 196
196 BUG_ON(!list_empty(&req->timeout_list)); 197 BUG_ON(!list_empty(&req->timeout_list));
diff --git a/block/blk.h b/block/blk.h
index 6748c4f8d7a1..43b036185712 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -2,6 +2,8 @@
2#define BLK_INTERNAL_H 2#define BLK_INTERNAL_H
3 3
4#include <linux/idr.h> 4#include <linux/idr.h>
5#include <linux/blk-mq.h>
6#include "blk-mq.h"
5 7
6/* Amount of time in which a process may batch requests */ 8/* Amount of time in which a process may batch requests */
7#define BLK_BATCH_TIME (HZ/50UL) 9#define BLK_BATCH_TIME (HZ/50UL)
@@ -12,16 +14,44 @@
12/* Max future timer expiry for timeouts */ 14/* Max future timer expiry for timeouts */
13#define BLK_MAX_TIMEOUT (5 * HZ) 15#define BLK_MAX_TIMEOUT (5 * HZ)
14 16
17struct blk_flush_queue {
18 unsigned int flush_queue_delayed:1;
19 unsigned int flush_pending_idx:1;
20 unsigned int flush_running_idx:1;
21 unsigned long flush_pending_since;
22 struct list_head flush_queue[2];
23 struct list_head flush_data_in_flight;
24 struct request *flush_rq;
25 spinlock_t mq_flush_lock;
26};
27
15extern struct kmem_cache *blk_requestq_cachep; 28extern struct kmem_cache *blk_requestq_cachep;
16extern struct kmem_cache *request_cachep; 29extern struct kmem_cache *request_cachep;
17extern struct kobj_type blk_queue_ktype; 30extern struct kobj_type blk_queue_ktype;
18extern struct ida blk_queue_ida; 31extern struct ida blk_queue_ida;
19 32
33static inline struct blk_flush_queue *blk_get_flush_queue(
34 struct request_queue *q, struct blk_mq_ctx *ctx)
35{
36 struct blk_mq_hw_ctx *hctx;
37
38 if (!q->mq_ops)
39 return q->fq;
40
41 hctx = q->mq_ops->map_queue(q, ctx->cpu);
42
43 return hctx->fq;
44}
45
20static inline void __blk_get_queue(struct request_queue *q) 46static inline void __blk_get_queue(struct request_queue *q)
21{ 47{
22 kobject_get(&q->kobj); 48 kobject_get(&q->kobj);
23} 49}
24 50
51struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
52 int node, int cmd_size);
53void blk_free_flush_queue(struct blk_flush_queue *q);
54
25int blk_init_rl(struct request_list *rl, struct request_queue *q, 55int blk_init_rl(struct request_list *rl, struct request_queue *q,
26 gfp_t gfp_mask); 56 gfp_t gfp_mask);
27void blk_exit_rl(struct request_list *rl); 57void blk_exit_rl(struct request_list *rl);
@@ -38,8 +68,6 @@ bool __blk_end_bidi_request(struct request *rq, int error,
38 unsigned int nr_bytes, unsigned int bidi_bytes); 68 unsigned int nr_bytes, unsigned int bidi_bytes);
39 69
40void blk_rq_timed_out_timer(unsigned long data); 70void blk_rq_timed_out_timer(unsigned long data);
41void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
42 unsigned int *next_set);
43unsigned long blk_rq_timeout(unsigned long timeout); 71unsigned long blk_rq_timeout(unsigned long timeout);
44void blk_add_timer(struct request *req); 72void blk_add_timer(struct request *req);
45void blk_delete_timer(struct request *); 73void blk_delete_timer(struct request *);
@@ -88,6 +116,7 @@ void blk_insert_flush(struct request *rq);
88static inline struct request *__elv_next_request(struct request_queue *q) 116static inline struct request *__elv_next_request(struct request_queue *q)
89{ 117{
90 struct request *rq; 118 struct request *rq;
119 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
91 120
92 while (1) { 121 while (1) {
93 if (!list_empty(&q->queue_head)) { 122 if (!list_empty(&q->queue_head)) {
@@ -110,9 +139,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
110 * should be restarted later. Please see flush_end_io() for 139 * should be restarted later. Please see flush_end_io() for
111 * details. 140 * details.
112 */ 141 */
113 if (q->flush_pending_idx != q->flush_running_idx && 142 if (fq->flush_pending_idx != fq->flush_running_idx &&
114 !queue_flush_queueable(q)) { 143 !queue_flush_queueable(q)) {
115 q->flush_queue_delayed = 1; 144 fq->flush_queue_delayed = 1;
116 return NULL; 145 return NULL;
117 } 146 }
118 if (unlikely(blk_queue_bypass(q)) || 147 if (unlikely(blk_queue_bypass(q)) ||
diff --git a/block/bsg.c b/block/bsg.c
index ff46addde5d8..276e869e686c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -270,8 +270,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
270 * map scatter-gather elements separately and string them to request 270 * map scatter-gather elements separately and string them to request
271 */ 271 */
272 rq = blk_get_request(q, rw, GFP_KERNEL); 272 rq = blk_get_request(q, rw, GFP_KERNEL);
273 if (!rq) 273 if (IS_ERR(rq))
274 return ERR_PTR(-ENOMEM); 274 return rq;
275 blk_rq_set_block_pc(rq); 275 blk_rq_set_block_pc(rq);
276 276
277 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); 277 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
@@ -285,8 +285,9 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
285 } 285 }
286 286
287 next_rq = blk_get_request(q, READ, GFP_KERNEL); 287 next_rq = blk_get_request(q, READ, GFP_KERNEL);
288 if (!next_rq) { 288 if (IS_ERR(next_rq)) {
289 ret = -ENOMEM; 289 ret = PTR_ERR(next_rq);
290 next_rq = NULL;
290 goto out; 291 goto out;
291 } 292 }
292 rq->next_rq = next_rq; 293 rq->next_rq = next_rq;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3f31cf9508e6..6f2751d305de 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -299,7 +299,7 @@ struct cfq_io_cq {
299 struct cfq_ttime ttime; 299 struct cfq_ttime ttime;
300 int ioprio; /* the current ioprio */ 300 int ioprio; /* the current ioprio */
301#ifdef CONFIG_CFQ_GROUP_IOSCHED 301#ifdef CONFIG_CFQ_GROUP_IOSCHED
302 uint64_t blkcg_id; /* the current blkcg ID */ 302 uint64_t blkcg_serial_nr; /* the current blkcg serial */
303#endif 303#endif
304}; 304};
305 305
@@ -3547,17 +3547,17 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
3547{ 3547{
3548 struct cfq_data *cfqd = cic_to_cfqd(cic); 3548 struct cfq_data *cfqd = cic_to_cfqd(cic);
3549 struct cfq_queue *sync_cfqq; 3549 struct cfq_queue *sync_cfqq;
3550 uint64_t id; 3550 uint64_t serial_nr;
3551 3551
3552 rcu_read_lock(); 3552 rcu_read_lock();
3553 id = bio_blkcg(bio)->id; 3553 serial_nr = bio_blkcg(bio)->css.serial_nr;
3554 rcu_read_unlock(); 3554 rcu_read_unlock();
3555 3555
3556 /* 3556 /*
3557 * Check whether blkcg has changed. The condition may trigger 3557 * Check whether blkcg has changed. The condition may trigger
3558 * spuriously on a newly created cic but there's no harm. 3558 * spuriously on a newly created cic but there's no harm.
3559 */ 3559 */
3560 if (unlikely(!cfqd) || likely(cic->blkcg_id == id)) 3560 if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
3561 return; 3561 return;
3562 3562
3563 sync_cfqq = cic_to_cfqq(cic, 1); 3563 sync_cfqq = cic_to_cfqq(cic, 1);
@@ -3571,7 +3571,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
3571 cfq_put_queue(sync_cfqq); 3571 cfq_put_queue(sync_cfqq);
3572 } 3572 }
3573 3573
3574 cic->blkcg_id = id; 3574 cic->blkcg_serial_nr = serial_nr;
3575} 3575}
3576#else 3576#else
3577static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { } 3577static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { }
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 18b282ce361e..f678c733df40 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -709,8 +709,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
709 if (!arg) 709 if (!arg)
710 return -EINVAL; 710 return -EINVAL;
711 bdi = blk_get_backing_dev_info(bdev); 711 bdi = blk_get_backing_dev_info(bdev);
712 if (bdi == NULL)
713 return -ENOTTY;
714 return compat_put_long(arg, 712 return compat_put_long(arg,
715 (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); 713 (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
716 case BLKROGET: /* compatible */ 714 case BLKROGET: /* compatible */
@@ -731,8 +729,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
731 if (!capable(CAP_SYS_ADMIN)) 729 if (!capable(CAP_SYS_ADMIN))
732 return -EACCES; 730 return -EACCES;
733 bdi = blk_get_backing_dev_info(bdev); 731 bdi = blk_get_backing_dev_info(bdev);
734 if (bdi == NULL)
735 return -ENOTTY;
736 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; 732 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
737 return 0; 733 return 0;
738 case BLKGETSIZE: 734 case BLKGETSIZE:
diff --git a/block/ioctl.c b/block/ioctl.c
index d6cda8147c91..6c7bf903742f 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -356,8 +356,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
356 if (!arg) 356 if (!arg)
357 return -EINVAL; 357 return -EINVAL;
358 bdi = blk_get_backing_dev_info(bdev); 358 bdi = blk_get_backing_dev_info(bdev);
359 if (bdi == NULL)
360 return -ENOTTY;
361 return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); 359 return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
362 case BLKROGET: 360 case BLKROGET:
363 return put_int(arg, bdev_read_only(bdev) != 0); 361 return put_int(arg, bdev_read_only(bdev) != 0);
@@ -386,8 +384,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
386 if(!capable(CAP_SYS_ADMIN)) 384 if(!capable(CAP_SYS_ADMIN))
387 return -EACCES; 385 return -EACCES;
388 bdi = blk_get_backing_dev_info(bdev); 386 bdi = blk_get_backing_dev_info(bdev);
389 if (bdi == NULL)
390 return -ENOTTY;
391 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; 387 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
392 return 0; 388 return 0;
393 case BLKBSZSET: 389 case BLKBSZSET:
diff --git a/block/partitions/mac.c b/block/partitions/mac.c
index 76d8ba6379a9..c2c48ec64b27 100644
--- a/block/partitions/mac.c
+++ b/block/partitions/mac.c
@@ -81,7 +81,7 @@ int mac_partition(struct parsed_partitions *state)
81 be32_to_cpu(part->start_block) * (secsize/512), 81 be32_to_cpu(part->start_block) * (secsize/512),
82 be32_to_cpu(part->block_count) * (secsize/512)); 82 be32_to_cpu(part->block_count) * (secsize/512));
83 83
84 if (!strnicmp(part->type, "Linux_RAID", 10)) 84 if (!strncasecmp(part->type, "Linux_RAID", 10))
85 state->parts[slot].flags = ADDPART_FLAG_RAID; 85 state->parts[slot].flags = ADDPART_FLAG_RAID;
86#ifdef CONFIG_PPC_PMAC 86#ifdef CONFIG_PPC_PMAC
87 /* 87 /*
@@ -100,7 +100,7 @@ int mac_partition(struct parsed_partitions *state)
100 goodness++; 100 goodness++;
101 101
102 if (strcasecmp(part->type, "Apple_UNIX_SVR2") == 0 102 if (strcasecmp(part->type, "Apple_UNIX_SVR2") == 0
103 || (strnicmp(part->type, "Linux", 5) == 0 103 || (strncasecmp(part->type, "Linux", 5) == 0
104 && strcasecmp(part->type, "Linux_swap") != 0)) { 104 && strcasecmp(part->type, "Linux_swap") != 0)) {
105 int i, l; 105 int i, l;
106 106
@@ -109,13 +109,13 @@ int mac_partition(struct parsed_partitions *state)
109 if (strcmp(part->name, "/") == 0) 109 if (strcmp(part->name, "/") == 0)
110 goodness++; 110 goodness++;
111 for (i = 0; i <= l - 4; ++i) { 111 for (i = 0; i <= l - 4; ++i) {
112 if (strnicmp(part->name + i, "root", 112 if (strncasecmp(part->name + i, "root",
113 4) == 0) { 113 4) == 0) {
114 goodness += 2; 114 goodness += 2;
115 break; 115 break;
116 } 116 }
117 } 117 }
118 if (strnicmp(part->name, "swap", 4) == 0) 118 if (strncasecmp(part->name, "swap", 4) == 0)
119 goodness--; 119 goodness--;
120 } 120 }
121 121
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 9b8eaeca6a79..abb2e65b24cc 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -316,8 +316,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
316 316
317 ret = -ENOMEM; 317 ret = -ENOMEM;
318 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); 318 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
319 if (!rq) 319 if (IS_ERR(rq))
320 goto out; 320 return PTR_ERR(rq);
321 blk_rq_set_block_pc(rq); 321 blk_rq_set_block_pc(rq);
322 322
323 if (hdr->cmd_len > BLK_MAX_CDB) { 323 if (hdr->cmd_len > BLK_MAX_CDB) {
@@ -387,7 +387,6 @@ out_free_cdb:
387 kfree(rq->cmd); 387 kfree(rq->cmd);
388out_put_request: 388out_put_request:
389 blk_put_request(rq); 389 blk_put_request(rq);
390out:
391 return ret; 390 return ret;
392} 391}
393 392
@@ -457,8 +456,8 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
457 } 456 }
458 457
459 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); 458 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
460 if (!rq) { 459 if (IS_ERR(rq)) {
461 err = -ENOMEM; 460 err = PTR_ERR(rq);
462 goto error; 461 goto error;
463 } 462 }
464 blk_rq_set_block_pc(rq); 463 blk_rq_set_block_pc(rq);
@@ -548,6 +547,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
548 int err; 547 int err;
549 548
550 rq = blk_get_request(q, WRITE, __GFP_WAIT); 549 rq = blk_get_request(q, WRITE, __GFP_WAIT);
550 if (IS_ERR(rq))
551 return PTR_ERR(rq);
551 blk_rq_set_block_pc(rq); 552 blk_rq_set_block_pc(rq);
552 rq->timeout = BLK_DEFAULT_SG_TIMEOUT; 553 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
553 rq->cmd[0] = cmd; 554 rq->cmd[0] = cmd;
diff --git a/block/t10-pi.c b/block/t10-pi.c
new file mode 100644
index 000000000000..24d6e9715318
--- /dev/null
+++ b/block/t10-pi.c
@@ -0,0 +1,197 @@
1/*
2 * t10_pi.c - Functions for generating and verifying T10 Protection
3 * Information.
4 *
5 * Copyright (C) 2007, 2008, 2014 Oracle Corporation
6 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; see the file COPYING. If not, write to
19 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
20 * USA.
21 *
22 */
23
24#include <linux/t10-pi.h>
25#include <linux/blkdev.h>
26#include <linux/crc-t10dif.h>
27#include <net/checksum.h>
28
29typedef __be16 (csum_fn) (void *, unsigned int);
30
31static const __be16 APP_ESCAPE = (__force __be16) 0xffff;
32static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff;
33
34static __be16 t10_pi_crc_fn(void *data, unsigned int len)
35{
36 return cpu_to_be16(crc_t10dif(data, len));
37}
38
39static __be16 t10_pi_ip_fn(void *data, unsigned int len)
40{
41 return (__force __be16)ip_compute_csum(data, len);
42}
43
44/*
45 * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
46 * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
47 * tag.
48 */
49static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
50 unsigned int type)
51{
52 unsigned int i;
53
54 for (i = 0 ; i < iter->data_size ; i += iter->interval) {
55 struct t10_pi_tuple *pi = iter->prot_buf;
56
57 pi->guard_tag = fn(iter->data_buf, iter->interval);
58 pi->app_tag = 0;
59
60 if (type == 1)
61 pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
62 else
63 pi->ref_tag = 0;
64
65 iter->data_buf += iter->interval;
66 iter->prot_buf += sizeof(struct t10_pi_tuple);
67 iter->seed++;
68 }
69
70 return 0;
71}
72
73static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
74 unsigned int type)
75{
76 unsigned int i;
77
78 for (i = 0 ; i < iter->data_size ; i += iter->interval) {
79 struct t10_pi_tuple *pi = iter->prot_buf;
80 __be16 csum;
81
82 switch (type) {
83 case 1:
84 case 2:
85 if (pi->app_tag == APP_ESCAPE)
86 goto next;
87
88 if (be32_to_cpu(pi->ref_tag) !=
89 lower_32_bits(iter->seed)) {
90 pr_err("%s: ref tag error at location %llu " \
91 "(rcvd %u)\n", iter->disk_name,
92 (unsigned long long)
93 iter->seed, be32_to_cpu(pi->ref_tag));
94 return -EILSEQ;
95 }
96 break;
97 case 3:
98 if (pi->app_tag == APP_ESCAPE &&
99 pi->ref_tag == REF_ESCAPE)
100 goto next;
101 break;
102 }
103
104 csum = fn(iter->data_buf, iter->interval);
105
106 if (pi->guard_tag != csum) {
107 pr_err("%s: guard tag error at sector %llu " \
108 "(rcvd %04x, want %04x)\n", iter->disk_name,
109 (unsigned long long)iter->seed,
110 be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
111 return -EILSEQ;
112 }
113
114next:
115 iter->data_buf += iter->interval;
116 iter->prot_buf += sizeof(struct t10_pi_tuple);
117 iter->seed++;
118 }
119
120 return 0;
121}
122
123static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
124{
125 return t10_pi_generate(iter, t10_pi_crc_fn, 1);
126}
127
128static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
129{
130 return t10_pi_generate(iter, t10_pi_ip_fn, 1);
131}
132
133static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
134{
135 return t10_pi_verify(iter, t10_pi_crc_fn, 1);
136}
137
138static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
139{
140 return t10_pi_verify(iter, t10_pi_ip_fn, 1);
141}
142
143static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
144{
145 return t10_pi_generate(iter, t10_pi_crc_fn, 3);
146}
147
148static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
149{
150 return t10_pi_generate(iter, t10_pi_ip_fn, 3);
151}
152
153static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
154{
155 return t10_pi_verify(iter, t10_pi_crc_fn, 3);
156}
157
158static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
159{
160 return t10_pi_verify(iter, t10_pi_ip_fn, 3);
161}
162
163struct blk_integrity t10_pi_type1_crc = {
164 .name = "T10-DIF-TYPE1-CRC",
165 .generate_fn = t10_pi_type1_generate_crc,
166 .verify_fn = t10_pi_type1_verify_crc,
167 .tuple_size = sizeof(struct t10_pi_tuple),
168 .tag_size = 0,
169};
170EXPORT_SYMBOL(t10_pi_type1_crc);
171
172struct blk_integrity t10_pi_type1_ip = {
173 .name = "T10-DIF-TYPE1-IP",
174 .generate_fn = t10_pi_type1_generate_ip,
175 .verify_fn = t10_pi_type1_verify_ip,
176 .tuple_size = sizeof(struct t10_pi_tuple),
177 .tag_size = 0,
178};
179EXPORT_SYMBOL(t10_pi_type1_ip);
180
181struct blk_integrity t10_pi_type3_crc = {
182 .name = "T10-DIF-TYPE3-CRC",
183 .generate_fn = t10_pi_type3_generate_crc,
184 .verify_fn = t10_pi_type3_verify_crc,
185 .tuple_size = sizeof(struct t10_pi_tuple),
186 .tag_size = 0,
187};
188EXPORT_SYMBOL(t10_pi_type3_crc);
189
190struct blk_integrity t10_pi_type3_ip = {
191 .name = "T10-DIF-TYPE3-IP",
192 .generate_fn = t10_pi_type3_generate_ip,
193 .verify_fn = t10_pi_type3_verify_ip,
194 .tuple_size = sizeof(struct t10_pi_tuple),
195 .tag_size = 0,
196};
197EXPORT_SYMBOL(t10_pi_type3_ip);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 5c8e7fe07745..6b7e8d0fba99 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -247,7 +247,7 @@ static void mtip_async_complete(struct mtip_port *port,
247 if (unlikely(cmd->unaligned)) 247 if (unlikely(cmd->unaligned))
248 up(&port->cmd_slot_unal); 248 up(&port->cmd_slot_unal);
249 249
250 blk_mq_end_io(rq, status ? -EIO : 0); 250 blk_mq_end_request(rq, status ? -EIO : 0);
251} 251}
252 252
253/* 253/*
@@ -3739,7 +3739,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
3739 int err; 3739 int err;
3740 3740
3741 err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); 3741 err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
3742 blk_mq_end_io(rq, err); 3742 blk_mq_end_request(rq, err);
3743 return 0; 3743 return 0;
3744 } 3744 }
3745 3745
@@ -3775,13 +3775,16 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
3775 return false; 3775 return false;
3776} 3776}
3777 3777
3778static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) 3778static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
3779 bool last)
3779{ 3780{
3780 int ret; 3781 int ret;
3781 3782
3782 if (unlikely(mtip_check_unal_depth(hctx, rq))) 3783 if (unlikely(mtip_check_unal_depth(hctx, rq)))
3783 return BLK_MQ_RQ_QUEUE_BUSY; 3784 return BLK_MQ_RQ_QUEUE_BUSY;
3784 3785
3786 blk_mq_start_request(rq);
3787
3785 ret = mtip_submit_request(hctx, rq); 3788 ret = mtip_submit_request(hctx, rq);
3786 if (likely(!ret)) 3789 if (likely(!ret))
3787 return BLK_MQ_RQ_QUEUE_OK; 3790 return BLK_MQ_RQ_QUEUE_OK;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 00d469c7f9f7..ac50a2931044 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -177,7 +177,7 @@ static void end_cmd(struct nullb_cmd *cmd)
177{ 177{
178 switch (queue_mode) { 178 switch (queue_mode) {
179 case NULL_Q_MQ: 179 case NULL_Q_MQ:
180 blk_mq_end_io(cmd->rq, 0); 180 blk_mq_end_request(cmd->rq, 0);
181 return; 181 return;
182 case NULL_Q_RQ: 182 case NULL_Q_RQ:
183 INIT_LIST_HEAD(&cmd->rq->queuelist); 183 INIT_LIST_HEAD(&cmd->rq->queuelist);
@@ -313,13 +313,16 @@ static void null_request_fn(struct request_queue *q)
313 } 313 }
314} 314}
315 315
316static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) 316static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
317 bool last)
317{ 318{
318 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); 319 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
319 320
320 cmd->rq = rq; 321 cmd->rq = rq;
321 cmd->nq = hctx->driver_data; 322 cmd->nq = hctx->driver_data;
322 323
324 blk_mq_start_request(rq);
325
323 null_handle_cmd(cmd); 326 null_handle_cmd(cmd);
324 return BLK_MQ_RQ_QUEUE_OK; 327 return BLK_MQ_RQ_QUEUE_OK;
325} 328}
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index fea7e76a00de..d48715b287e6 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -722,6 +722,8 @@ static int pd_special_command(struct pd_unit *disk,
722 int err = 0; 722 int err = 0;
723 723
724 rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT); 724 rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT);
725 if (IS_ERR(rq))
726 return PTR_ERR(rq);
725 727
726 rq->cmd_type = REQ_TYPE_SPECIAL; 728 rq->cmd_type = REQ_TYPE_SPECIAL;
727 rq->special = func; 729 rq->special = func;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 758ac442c5b5..09e628dafd9d 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -704,6 +704,8 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
704 704
705 rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? 705 rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
706 WRITE : READ, __GFP_WAIT); 706 WRITE : READ, __GFP_WAIT);
707 if (IS_ERR(rq))
708 return PTR_ERR(rq);
707 blk_rq_set_block_pc(rq); 709 blk_rq_set_block_pc(rq);
708 710
709 if (cgc->buflen) { 711 if (cgc->buflen) {
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index d5e2d12b9d9e..5d552857de41 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -568,7 +568,7 @@ static struct carm_request *carm_get_special(struct carm_host *host)
568 return NULL; 568 return NULL;
569 569
570 rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL); 570 rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL);
571 if (!rq) { 571 if (IS_ERR(rq)) {
572 spin_lock_irqsave(&host->lock, flags); 572 spin_lock_irqsave(&host->lock, flags);
573 carm_put_request(host, crq); 573 carm_put_request(host, crq);
574 spin_unlock_irqrestore(&host->lock, flags); 574 spin_unlock_irqrestore(&host->lock, flags);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 930fee886917..c6a27d54ad62 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -129,7 +129,7 @@ static inline void virtblk_request_done(struct request *req)
129 req->errors = (error != 0); 129 req->errors = (error != 0);
130 } 130 }
131 131
132 blk_mq_end_io(req, error); 132 blk_mq_end_request(req, error);
133} 133}
134 134
135static void virtblk_done(struct virtqueue *vq) 135static void virtblk_done(struct virtqueue *vq)
@@ -158,14 +158,14 @@ static void virtblk_done(struct virtqueue *vq)
158 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 158 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
159} 159}
160 160
161static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) 161static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
162 bool last)
162{ 163{
163 struct virtio_blk *vblk = hctx->queue->queuedata; 164 struct virtio_blk *vblk = hctx->queue->queuedata;
164 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 165 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
165 unsigned long flags; 166 unsigned long flags;
166 unsigned int num; 167 unsigned int num;
167 int qid = hctx->queue_num; 168 int qid = hctx->queue_num;
168 const bool last = (req->cmd_flags & REQ_END) != 0;
169 int err; 169 int err;
170 bool notify = false; 170 bool notify = false;
171 171
@@ -199,6 +199,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
199 } 199 }
200 } 200 }
201 201
202 blk_mq_start_request(req);
203
202 num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg); 204 num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
203 if (num) { 205 if (num) {
204 if (rq_data_dir(vbr->req) == WRITE) 206 if (rq_data_dir(vbr->req) == WRITE)
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 898b84bba28a..5d28a45d2960 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2180,8 +2180,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2180 len = nr * CD_FRAMESIZE_RAW; 2180 len = nr * CD_FRAMESIZE_RAW;
2181 2181
2182 rq = blk_get_request(q, READ, GFP_KERNEL); 2182 rq = blk_get_request(q, READ, GFP_KERNEL);
2183 if (!rq) { 2183 if (IS_ERR(rq)) {
2184 ret = -ENOMEM; 2184 ret = PTR_ERR(rq);
2185 break; 2185 break;
2186 } 2186 }
2187 blk_rq_set_block_pc(rq); 2187 blk_rq_set_block_pc(rq);
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index f41558a0bcd1..ca958604cda2 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -46,7 +46,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
46 * timeout has expired, so power management will be reenabled. 46 * timeout has expired, so power management will be reenabled.
47 */ 47 */
48 rq = blk_get_request(q, READ, GFP_NOWAIT); 48 rq = blk_get_request(q, READ, GFP_NOWAIT);
49 if (unlikely(!rq)) 49 if (IS_ERR(rq))
50 goto out; 50 goto out;
51 51
52 rq->cmd[0] = REQ_UNPARK_HEADS; 52 rq->cmd[0] = REQ_UNPARK_HEADS;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 296619b7426c..3a820f61ce65 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -73,7 +73,6 @@ comment "SCSI support type (disk, tape, CD-ROM)"
73config BLK_DEV_SD 73config BLK_DEV_SD
74 tristate "SCSI disk support" 74 tristate "SCSI disk support"
75 depends on SCSI 75 depends on SCSI
76 select CRC_T10DIF if BLK_DEV_INTEGRITY
77 ---help--- 76 ---help---
78 If you want to use SCSI hard disks, Fibre Channel disks, 77 If you want to use SCSI hard disks, Fibre Channel disks,
79 Serial ATA (SATA) or Parallel ATA (PATA) hard disks, 78 Serial ATA (SATA) or Parallel ATA (PATA) hard disks,
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 7bcf67eec921..e99507ed0e3c 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -115,7 +115,7 @@ static struct request *get_alua_req(struct scsi_device *sdev,
115 115
116 rq = blk_get_request(q, rw, GFP_NOIO); 116 rq = blk_get_request(q, rw, GFP_NOIO);
117 117
118 if (!rq) { 118 if (IS_ERR(rq)) {
119 sdev_printk(KERN_INFO, sdev, 119 sdev_printk(KERN_INFO, sdev,
120 "%s: blk_get_request failed\n", __func__); 120 "%s: blk_get_request failed\n", __func__);
121 return NULL; 121 return NULL;
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index 6f07f7fe3aa1..84765384c47c 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -275,7 +275,7 @@ static struct request *get_req(struct scsi_device *sdev, int cmd,
275 275
276 rq = blk_get_request(sdev->request_queue, 276 rq = blk_get_request(sdev->request_queue,
277 (cmd != INQUIRY) ? WRITE : READ, GFP_NOIO); 277 (cmd != INQUIRY) ? WRITE : READ, GFP_NOIO);
278 if (!rq) { 278 if (IS_ERR(rq)) {
279 sdev_printk(KERN_INFO, sdev, "get_req: blk_get_request failed"); 279 sdev_printk(KERN_INFO, sdev, "get_req: blk_get_request failed");
280 return NULL; 280 return NULL;
281 } 281 }
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index e9d9fea9e272..4ee2759f5299 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -117,7 +117,7 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h)
117 117
118retry: 118retry:
119 req = blk_get_request(sdev->request_queue, WRITE, GFP_NOIO); 119 req = blk_get_request(sdev->request_queue, WRITE, GFP_NOIO);
120 if (!req) 120 if (IS_ERR(req))
121 return SCSI_DH_RES_TEMP_UNAVAIL; 121 return SCSI_DH_RES_TEMP_UNAVAIL;
122 122
123 blk_rq_set_block_pc(req); 123 blk_rq_set_block_pc(req);
@@ -247,7 +247,7 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h)
247 struct request *req; 247 struct request *req;
248 248
249 req = blk_get_request(h->sdev->request_queue, WRITE, GFP_ATOMIC); 249 req = blk_get_request(h->sdev->request_queue, WRITE, GFP_ATOMIC);
250 if (!req) 250 if (IS_ERR(req))
251 return SCSI_DH_RES_TEMP_UNAVAIL; 251 return SCSI_DH_RES_TEMP_UNAVAIL;
252 252
253 blk_rq_set_block_pc(req); 253 blk_rq_set_block_pc(req);
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 826069db9848..1b5bc9293e37 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -274,7 +274,7 @@ static struct request *get_rdac_req(struct scsi_device *sdev,
274 274
275 rq = blk_get_request(q, rw, GFP_NOIO); 275 rq = blk_get_request(q, rw, GFP_NOIO);
276 276
277 if (!rq) { 277 if (IS_ERR(rq)) {
278 sdev_printk(KERN_INFO, sdev, 278 sdev_printk(KERN_INFO, sdev,
279 "get_rdac_req: blk_get_request failed.\n"); 279 "get_rdac_req: blk_get_request failed.\n");
280 return NULL; 280 return NULL;
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 5f4cbf0c4759..fd19fd8468ac 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1567,8 +1567,8 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
1567 struct request *req; 1567 struct request *req;
1568 1568
1569 req = blk_get_request(q, has_write ? WRITE : READ, flags); 1569 req = blk_get_request(q, has_write ? WRITE : READ, flags);
1570 if (unlikely(!req)) 1570 if (IS_ERR(req))
1571 return ERR_PTR(-ENOMEM); 1571 return req;
1572 1572
1573 blk_rq_set_block_pc(req); 1573 blk_rq_set_block_pc(req);
1574 return req; 1574 return req;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 0727ea7cc387..dff37a250d79 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -362,7 +362,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
362 int write = (data_direction == DMA_TO_DEVICE); 362 int write = (data_direction == DMA_TO_DEVICE);
363 363
364 req = blk_get_request(SRpnt->stp->device->request_queue, write, GFP_KERNEL); 364 req = blk_get_request(SRpnt->stp->device->request_queue, write, GFP_KERNEL);
365 if (!req) 365 if (IS_ERR(req))
366 return DRIVER_ERROR << 24; 366 return DRIVER_ERROR << 24;
367 367
368 blk_rq_set_block_pc(req); 368 blk_rq_set_block_pc(req);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 6b20ef3fee54..9a6f8468225f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1961,6 +1961,8 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
1961 * request becomes available 1961 * request becomes available
1962 */ 1962 */
1963 req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); 1963 req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
1964 if (IS_ERR(req))
1965 return;
1964 1966
1965 blk_rq_set_block_pc(req); 1967 blk_rq_set_block_pc(req);
1966 1968
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index db8c449282f9..9eff8a375132 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -221,7 +221,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
221 int ret = DRIVER_ERROR << 24; 221 int ret = DRIVER_ERROR << 24;
222 222
223 req = blk_get_request(sdev->request_queue, write, __GFP_WAIT); 223 req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
224 if (!req) 224 if (IS_ERR(req))
225 return ret; 225 return ret;
226 blk_rq_set_block_pc(req); 226 blk_rq_set_block_pc(req);
227 227
@@ -715,7 +715,7 @@ static bool scsi_end_request(struct request *req, int error,
715 715
716 if (req->mq_ctx) { 716 if (req->mq_ctx) {
717 /* 717 /*
718 * In the MQ case the command gets freed by __blk_mq_end_io, 718 * In the MQ case the command gets freed by __blk_mq_end_request,
719 * so we have to do all cleanup that depends on it earlier. 719 * so we have to do all cleanup that depends on it earlier.
720 * 720 *
721 * We also can't kick the queues from irq context, so we 721 * We also can't kick the queues from irq context, so we
@@ -723,7 +723,7 @@ static bool scsi_end_request(struct request *req, int error,
723 */ 723 */
724 scsi_mq_uninit_cmd(cmd); 724 scsi_mq_uninit_cmd(cmd);
725 725
726 __blk_mq_end_io(req, error); 726 __blk_mq_end_request(req, error);
727 727
728 if (scsi_target(sdev)->single_lun || 728 if (scsi_target(sdev)->single_lun ||
729 !list_empty(&sdev->host->starved_list)) 729 !list_empty(&sdev->host->starved_list))
@@ -1847,6 +1847,8 @@ static int scsi_mq_prep_fn(struct request *req)
1847 next_rq->special = bidi_sdb; 1847 next_rq->special = bidi_sdb;
1848 } 1848 }
1849 1849
1850 blk_mq_start_request(req);
1851
1850 return scsi_setup_cmnd(sdev, req); 1852 return scsi_setup_cmnd(sdev, req);
1851} 1853}
1852 1854
@@ -1856,7 +1858,8 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
1856 blk_mq_complete_request(cmd->request); 1858 blk_mq_complete_request(cmd->request);
1857} 1859}
1858 1860
1859static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) 1861static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
1862 bool last)
1860{ 1863{
1861 struct request_queue *q = req->q; 1864 struct request_queue *q = req->q;
1862 struct scsi_device *sdev = q->queuedata; 1865 struct scsi_device *sdev = q->queuedata;
@@ -1880,11 +1883,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
1880 if (!scsi_host_queue_ready(q, shost, sdev)) 1883 if (!scsi_host_queue_ready(q, shost, sdev))
1881 goto out_dec_target_busy; 1884 goto out_dec_target_busy;
1882 1885
1886
1883 if (!(req->cmd_flags & REQ_DONTPREP)) { 1887 if (!(req->cmd_flags & REQ_DONTPREP)) {
1884 ret = prep_to_mq(scsi_mq_prep_fn(req)); 1888 ret = prep_to_mq(scsi_mq_prep_fn(req));
1885 if (ret) 1889 if (ret)
1886 goto out_dec_host_busy; 1890 goto out_dec_host_busy;
1887 req->cmd_flags |= REQ_DONTPREP; 1891 req->cmd_flags |= REQ_DONTPREP;
1892 } else {
1893 blk_mq_start_request(req);
1888 } 1894 }
1889 1895
1890 scsi_init_cmd_errh(cmd); 1896 scsi_init_cmd_errh(cmd);
@@ -1931,6 +1937,14 @@ out:
1931 return ret; 1937 return ret;
1932} 1938}
1933 1939
1940static enum blk_eh_timer_return scsi_timeout(struct request *req,
1941 bool reserved)
1942{
1943 if (reserved)
1944 return BLK_EH_RESET_TIMER;
1945 return scsi_times_out(req);
1946}
1947
1934static int scsi_init_request(void *data, struct request *rq, 1948static int scsi_init_request(void *data, struct request *rq,
1935 unsigned int hctx_idx, unsigned int request_idx, 1949 unsigned int hctx_idx, unsigned int request_idx,
1936 unsigned int numa_node) 1950 unsigned int numa_node)
@@ -2042,7 +2056,7 @@ static struct blk_mq_ops scsi_mq_ops = {
2042 .map_queue = blk_mq_map_queue, 2056 .map_queue = blk_mq_map_queue,
2043 .queue_rq = scsi_queue_rq, 2057 .queue_rq = scsi_queue_rq,
2044 .complete = scsi_softirq_done, 2058 .complete = scsi_softirq_done,
2045 .timeout = scsi_times_out, 2059 .timeout = scsi_timeout,
2046 .init_request = scsi_init_request, 2060 .init_request = scsi_init_request,
2047 .exit_request = scsi_exit_request, 2061 .exit_request = scsi_exit_request,
2048}; 2062};
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 0cb5c9f0c743..7ee86028a535 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -610,29 +610,44 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
610 mutex_unlock(&sd_ref_mutex); 610 mutex_unlock(&sd_ref_mutex);
611} 611}
612 612
613static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif) 613
614{ 614
615 unsigned int prot_op = SCSI_PROT_NORMAL; 615static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd,
616 unsigned int dix = scsi_prot_sg_count(scmd); 616 unsigned int dix, unsigned int dif)
617 617{
618 if (scmd->sc_data_direction == DMA_FROM_DEVICE) { 618 struct bio *bio = scmd->request->bio;
619 if (dif && dix) 619 unsigned int prot_op = sd_prot_op(rq_data_dir(scmd->request), dix, dif);
620 prot_op = SCSI_PROT_READ_PASS; 620 unsigned int protect = 0;
621 else if (dif && !dix) 621
622 prot_op = SCSI_PROT_READ_STRIP; 622 if (dix) { /* DIX Type 0, 1, 2, 3 */
623 else if (!dif && dix) 623 if (bio_integrity_flagged(bio, BIP_IP_CHECKSUM))
624 prot_op = SCSI_PROT_READ_INSERT; 624 scmd->prot_flags |= SCSI_PROT_IP_CHECKSUM;
625 } else { 625
626 if (dif && dix) 626 if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false)
627 prot_op = SCSI_PROT_WRITE_PASS; 627 scmd->prot_flags |= SCSI_PROT_GUARD_CHECK;
628 else if (dif && !dix) 628 }
629 prot_op = SCSI_PROT_WRITE_INSERT; 629
630 else if (!dif && dix) 630 if (dif != SD_DIF_TYPE3_PROTECTION) { /* DIX/DIF Type 0, 1, 2 */
631 prot_op = SCSI_PROT_WRITE_STRIP; 631 scmd->prot_flags |= SCSI_PROT_REF_INCREMENT;
632
633 if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false)
634 scmd->prot_flags |= SCSI_PROT_REF_CHECK;
635 }
636
637 if (dif) { /* DIX/DIF Type 1, 2, 3 */
638 scmd->prot_flags |= SCSI_PROT_TRANSFER_PI;
639
640 if (bio_integrity_flagged(bio, BIP_DISK_NOCHECK))
641 protect = 3 << 5; /* Disable target PI checking */
642 else
643 protect = 1 << 5; /* Enable target PI checking */
632 } 644 }
633 645
634 scsi_set_prot_op(scmd, prot_op); 646 scsi_set_prot_op(scmd, prot_op);
635 scsi_set_prot_type(scmd, dif); 647 scsi_set_prot_type(scmd, dif);
648 scmd->prot_flags &= sd_prot_flag_mask(prot_op);
649
650 return protect;
636} 651}
637 652
638static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) 653static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
@@ -893,7 +908,8 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
893 sector_t block = blk_rq_pos(rq); 908 sector_t block = blk_rq_pos(rq);
894 sector_t threshold; 909 sector_t threshold;
895 unsigned int this_count = blk_rq_sectors(rq); 910 unsigned int this_count = blk_rq_sectors(rq);
896 int ret, host_dif; 911 unsigned int dif, dix;
912 int ret;
897 unsigned char protect; 913 unsigned char protect;
898 914
899 ret = scsi_init_io(SCpnt, GFP_ATOMIC); 915 ret = scsi_init_io(SCpnt, GFP_ATOMIC);
@@ -995,7 +1011,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
995 SCpnt->cmnd[0] = WRITE_6; 1011 SCpnt->cmnd[0] = WRITE_6;
996 1012
997 if (blk_integrity_rq(rq)) 1013 if (blk_integrity_rq(rq))
998 sd_dif_prepare(rq, block, sdp->sector_size); 1014 sd_dif_prepare(SCpnt);
999 1015
1000 } else if (rq_data_dir(rq) == READ) { 1016 } else if (rq_data_dir(rq) == READ) {
1001 SCpnt->cmnd[0] = READ_6; 1017 SCpnt->cmnd[0] = READ_6;
@@ -1010,14 +1026,15 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
1010 "writing" : "reading", this_count, 1026 "writing" : "reading", this_count,
1011 blk_rq_sectors(rq))); 1027 blk_rq_sectors(rq)));
1012 1028
1013 /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */ 1029 dix = scsi_prot_sg_count(SCpnt);
1014 host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); 1030 dif = scsi_host_dif_capable(SCpnt->device->host, sdkp->protection_type);
1015 if (host_dif) 1031
1016 protect = 1 << 5; 1032 if (dif || dix)
1033 protect = sd_setup_protect_cmnd(SCpnt, dix, dif);
1017 else 1034 else
1018 protect = 0; 1035 protect = 0;
1019 1036
1020 if (host_dif == SD_DIF_TYPE2_PROTECTION) { 1037 if (protect && sdkp->protection_type == SD_DIF_TYPE2_PROTECTION) {
1021 SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); 1038 SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC);
1022 1039
1023 if (unlikely(SCpnt->cmnd == NULL)) { 1040 if (unlikely(SCpnt->cmnd == NULL)) {
@@ -1102,10 +1119,6 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
1102 } 1119 }
1103 SCpnt->sdb.length = this_count * sdp->sector_size; 1120 SCpnt->sdb.length = this_count * sdp->sector_size;
1104 1121
1105 /* If DIF or DIX is enabled, tell HBA how to handle request */
1106 if (host_dif || scsi_prot_sg_count(SCpnt))
1107 sd_prot_op(SCpnt, host_dif);
1108
1109 /* 1122 /*
1110 * We shouldn't disconnect in the middle of a sector, so with a dumb 1123 * We shouldn't disconnect in the middle of a sector, so with a dumb
1111 * host adapter, it's safe to assume that we can at least transfer 1124 * host adapter, it's safe to assume that we can at least transfer
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4c3ab8377fd3..467377884b63 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -167,6 +167,68 @@ enum sd_dif_target_protection_types {
167}; 167};
168 168
169/* 169/*
170 * Look up the DIX operation based on whether the command is read or
171 * write and whether dix and dif are enabled.
172 */
173static inline unsigned int sd_prot_op(bool write, bool dix, bool dif)
174{
175 /* Lookup table: bit 2 (write), bit 1 (dix), bit 0 (dif) */
176 const unsigned int ops[] = { /* wrt dix dif */
177 SCSI_PROT_NORMAL, /* 0 0 0 */
178 SCSI_PROT_READ_STRIP, /* 0 0 1 */
179 SCSI_PROT_READ_INSERT, /* 0 1 0 */
180 SCSI_PROT_READ_PASS, /* 0 1 1 */
181 SCSI_PROT_NORMAL, /* 1 0 0 */
182 SCSI_PROT_WRITE_INSERT, /* 1 0 1 */
183 SCSI_PROT_WRITE_STRIP, /* 1 1 0 */
184 SCSI_PROT_WRITE_PASS, /* 1 1 1 */
185 };
186
187 return ops[write << 2 | dix << 1 | dif];
188}
189
190/*
191 * Returns a mask of the protection flags that are valid for a given DIX
192 * operation.
193 */
194static inline unsigned int sd_prot_flag_mask(unsigned int prot_op)
195{
196 const unsigned int flag_mask[] = {
197 [SCSI_PROT_NORMAL] = 0,
198
199 [SCSI_PROT_READ_STRIP] = SCSI_PROT_TRANSFER_PI |
200 SCSI_PROT_GUARD_CHECK |
201 SCSI_PROT_REF_CHECK |
202 SCSI_PROT_REF_INCREMENT,
203
204 [SCSI_PROT_READ_INSERT] = SCSI_PROT_REF_INCREMENT |
205 SCSI_PROT_IP_CHECKSUM,
206
207 [SCSI_PROT_READ_PASS] = SCSI_PROT_TRANSFER_PI |
208 SCSI_PROT_GUARD_CHECK |
209 SCSI_PROT_REF_CHECK |
210 SCSI_PROT_REF_INCREMENT |
211 SCSI_PROT_IP_CHECKSUM,
212
213 [SCSI_PROT_WRITE_INSERT] = SCSI_PROT_TRANSFER_PI |
214 SCSI_PROT_REF_INCREMENT,
215
216 [SCSI_PROT_WRITE_STRIP] = SCSI_PROT_GUARD_CHECK |
217 SCSI_PROT_REF_CHECK |
218 SCSI_PROT_REF_INCREMENT |
219 SCSI_PROT_IP_CHECKSUM,
220
221 [SCSI_PROT_WRITE_PASS] = SCSI_PROT_TRANSFER_PI |
222 SCSI_PROT_GUARD_CHECK |
223 SCSI_PROT_REF_CHECK |
224 SCSI_PROT_REF_INCREMENT |
225 SCSI_PROT_IP_CHECKSUM,
226 };
227
228 return flag_mask[prot_op];
229}
230
231/*
170 * Data Integrity Field tuple. 232 * Data Integrity Field tuple.
171 */ 233 */
172struct sd_dif_tuple { 234struct sd_dif_tuple {
@@ -178,7 +240,7 @@ struct sd_dif_tuple {
178#ifdef CONFIG_BLK_DEV_INTEGRITY 240#ifdef CONFIG_BLK_DEV_INTEGRITY
179 241
180extern void sd_dif_config_host(struct scsi_disk *); 242extern void sd_dif_config_host(struct scsi_disk *);
181extern void sd_dif_prepare(struct request *rq, sector_t, unsigned int); 243extern void sd_dif_prepare(struct scsi_cmnd *scmd);
182extern void sd_dif_complete(struct scsi_cmnd *, unsigned int); 244extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
183 245
184#else /* CONFIG_BLK_DEV_INTEGRITY */ 246#else /* CONFIG_BLK_DEV_INTEGRITY */
@@ -186,7 +248,7 @@ extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
186static inline void sd_dif_config_host(struct scsi_disk *disk) 248static inline void sd_dif_config_host(struct scsi_disk *disk)
187{ 249{
188} 250}
189static inline int sd_dif_prepare(struct request *rq, sector_t s, unsigned int a) 251static inline int sd_dif_prepare(struct scsi_cmnd *scmd)
190{ 252{
191 return 0; 253 return 0;
192} 254}
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index a7a691d0af7d..14c7d42a11c2 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -21,7 +21,7 @@
21 */ 21 */
22 22
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <linux/crc-t10dif.h> 24#include <linux/t10-pi.h>
25 25
26#include <scsi/scsi.h> 26#include <scsi/scsi.h>
27#include <scsi/scsi_cmnd.h> 27#include <scsi/scsi_cmnd.h>
@@ -33,268 +33,8 @@
33#include <scsi/scsi_ioctl.h> 33#include <scsi/scsi_ioctl.h>
34#include <scsi/scsicam.h> 34#include <scsi/scsicam.h>
35 35
36#include <net/checksum.h>
37
38#include "sd.h" 36#include "sd.h"
39 37
40typedef __u16 (csum_fn) (void *, unsigned int);
41
42static __u16 sd_dif_crc_fn(void *data, unsigned int len)
43{
44 return cpu_to_be16(crc_t10dif(data, len));
45}
46
47static __u16 sd_dif_ip_fn(void *data, unsigned int len)
48{
49 return ip_compute_csum(data, len);
50}
51
52/*
53 * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
54 * 16 bit app tag, 32 bit reference tag.
55 */
56static void sd_dif_type1_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
57{
58 void *buf = bix->data_buf;
59 struct sd_dif_tuple *sdt = bix->prot_buf;
60 sector_t sector = bix->sector;
61 unsigned int i;
62
63 for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
64 sdt->guard_tag = fn(buf, bix->sector_size);
65 sdt->ref_tag = cpu_to_be32(sector & 0xffffffff);
66 sdt->app_tag = 0;
67
68 buf += bix->sector_size;
69 sector++;
70 }
71}
72
73static void sd_dif_type1_generate_crc(struct blk_integrity_exchg *bix)
74{
75 sd_dif_type1_generate(bix, sd_dif_crc_fn);
76}
77
78static void sd_dif_type1_generate_ip(struct blk_integrity_exchg *bix)
79{
80 sd_dif_type1_generate(bix, sd_dif_ip_fn);
81}
82
83static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
84{
85 void *buf = bix->data_buf;
86 struct sd_dif_tuple *sdt = bix->prot_buf;
87 sector_t sector = bix->sector;
88 unsigned int i;
89 __u16 csum;
90
91 for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
92 /* Unwritten sectors */
93 if (sdt->app_tag == 0xffff)
94 return 0;
95
96 if (be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) {
97 printk(KERN_ERR
98 "%s: ref tag error on sector %lu (rcvd %u)\n",
99 bix->disk_name, (unsigned long)sector,
100 be32_to_cpu(sdt->ref_tag));
101 return -EIO;
102 }
103
104 csum = fn(buf, bix->sector_size);
105
106 if (sdt->guard_tag != csum) {
107 printk(KERN_ERR "%s: guard tag error on sector %lu " \
108 "(rcvd %04x, data %04x)\n", bix->disk_name,
109 (unsigned long)sector,
110 be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
111 return -EIO;
112 }
113
114 buf += bix->sector_size;
115 sector++;
116 }
117
118 return 0;
119}
120
121static int sd_dif_type1_verify_crc(struct blk_integrity_exchg *bix)
122{
123 return sd_dif_type1_verify(bix, sd_dif_crc_fn);
124}
125
126static int sd_dif_type1_verify_ip(struct blk_integrity_exchg *bix)
127{
128 return sd_dif_type1_verify(bix, sd_dif_ip_fn);
129}
130
131/*
132 * Functions for interleaving and deinterleaving application tags
133 */
134static void sd_dif_type1_set_tag(void *prot, void *tag_buf, unsigned int sectors)
135{
136 struct sd_dif_tuple *sdt = prot;
137 u8 *tag = tag_buf;
138 unsigned int i, j;
139
140 for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
141 sdt->app_tag = tag[j] << 8 | tag[j+1];
142 BUG_ON(sdt->app_tag == 0xffff);
143 }
144}
145
146static void sd_dif_type1_get_tag(void *prot, void *tag_buf, unsigned int sectors)
147{
148 struct sd_dif_tuple *sdt = prot;
149 u8 *tag = tag_buf;
150 unsigned int i, j;
151
152 for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
153 tag[j] = (sdt->app_tag & 0xff00) >> 8;
154 tag[j+1] = sdt->app_tag & 0xff;
155 }
156}
157
158static struct blk_integrity dif_type1_integrity_crc = {
159 .name = "T10-DIF-TYPE1-CRC",
160 .generate_fn = sd_dif_type1_generate_crc,
161 .verify_fn = sd_dif_type1_verify_crc,
162 .get_tag_fn = sd_dif_type1_get_tag,
163 .set_tag_fn = sd_dif_type1_set_tag,
164 .tuple_size = sizeof(struct sd_dif_tuple),
165 .tag_size = 0,
166};
167
168static struct blk_integrity dif_type1_integrity_ip = {
169 .name = "T10-DIF-TYPE1-IP",
170 .generate_fn = sd_dif_type1_generate_ip,
171 .verify_fn = sd_dif_type1_verify_ip,
172 .get_tag_fn = sd_dif_type1_get_tag,
173 .set_tag_fn = sd_dif_type1_set_tag,
174 .tuple_size = sizeof(struct sd_dif_tuple),
175 .tag_size = 0,
176};
177
178
179/*
180 * Type 3 protection has a 16-bit guard tag and 16 + 32 bits of opaque
181 * tag space.
182 */
183static void sd_dif_type3_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
184{
185 void *buf = bix->data_buf;
186 struct sd_dif_tuple *sdt = bix->prot_buf;
187 unsigned int i;
188
189 for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
190 sdt->guard_tag = fn(buf, bix->sector_size);
191 sdt->ref_tag = 0;
192 sdt->app_tag = 0;
193
194 buf += bix->sector_size;
195 }
196}
197
198static void sd_dif_type3_generate_crc(struct blk_integrity_exchg *bix)
199{
200 sd_dif_type3_generate(bix, sd_dif_crc_fn);
201}
202
203static void sd_dif_type3_generate_ip(struct blk_integrity_exchg *bix)
204{
205 sd_dif_type3_generate(bix, sd_dif_ip_fn);
206}
207
208static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
209{
210 void *buf = bix->data_buf;
211 struct sd_dif_tuple *sdt = bix->prot_buf;
212 sector_t sector = bix->sector;
213 unsigned int i;
214 __u16 csum;
215
216 for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
217 /* Unwritten sectors */
218 if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff)
219 return 0;
220
221 csum = fn(buf, bix->sector_size);
222
223 if (sdt->guard_tag != csum) {
224 printk(KERN_ERR "%s: guard tag error on sector %lu " \
225 "(rcvd %04x, data %04x)\n", bix->disk_name,
226 (unsigned long)sector,
227 be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
228 return -EIO;
229 }
230
231 buf += bix->sector_size;
232 sector++;
233 }
234
235 return 0;
236}
237
238static int sd_dif_type3_verify_crc(struct blk_integrity_exchg *bix)
239{
240 return sd_dif_type3_verify(bix, sd_dif_crc_fn);
241}
242
243static int sd_dif_type3_verify_ip(struct blk_integrity_exchg *bix)
244{
245 return sd_dif_type3_verify(bix, sd_dif_ip_fn);
246}
247
248static void sd_dif_type3_set_tag(void *prot, void *tag_buf, unsigned int sectors)
249{
250 struct sd_dif_tuple *sdt = prot;
251 u8 *tag = tag_buf;
252 unsigned int i, j;
253
254 for (i = 0, j = 0 ; i < sectors ; i++, j += 6, sdt++) {
255 sdt->app_tag = tag[j] << 8 | tag[j+1];
256 sdt->ref_tag = tag[j+2] << 24 | tag[j+3] << 16 |
257 tag[j+4] << 8 | tag[j+5];
258 }
259}
260
261static void sd_dif_type3_get_tag(void *prot, void *tag_buf, unsigned int sectors)
262{
263 struct sd_dif_tuple *sdt = prot;
264 u8 *tag = tag_buf;
265 unsigned int i, j;
266
267 for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
268 tag[j] = (sdt->app_tag & 0xff00) >> 8;
269 tag[j+1] = sdt->app_tag & 0xff;
270 tag[j+2] = (sdt->ref_tag & 0xff000000) >> 24;
271 tag[j+3] = (sdt->ref_tag & 0xff0000) >> 16;
272 tag[j+4] = (sdt->ref_tag & 0xff00) >> 8;
273 tag[j+5] = sdt->ref_tag & 0xff;
274 BUG_ON(sdt->app_tag == 0xffff || sdt->ref_tag == 0xffffffff);
275 }
276}
277
278static struct blk_integrity dif_type3_integrity_crc = {
279 .name = "T10-DIF-TYPE3-CRC",
280 .generate_fn = sd_dif_type3_generate_crc,
281 .verify_fn = sd_dif_type3_verify_crc,
282 .get_tag_fn = sd_dif_type3_get_tag,
283 .set_tag_fn = sd_dif_type3_set_tag,
284 .tuple_size = sizeof(struct sd_dif_tuple),
285 .tag_size = 0,
286};
287
288static struct blk_integrity dif_type3_integrity_ip = {
289 .name = "T10-DIF-TYPE3-IP",
290 .generate_fn = sd_dif_type3_generate_ip,
291 .verify_fn = sd_dif_type3_verify_ip,
292 .get_tag_fn = sd_dif_type3_get_tag,
293 .set_tag_fn = sd_dif_type3_set_tag,
294 .tuple_size = sizeof(struct sd_dif_tuple),
295 .tag_size = 0,
296};
297
298/* 38/*
299 * Configure exchange of protection information between OS and HBA. 39 * Configure exchange of protection information between OS and HBA.
300 */ 40 */
@@ -316,22 +56,30 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
316 return; 56 return;
317 57
318 /* Enable DMA of protection information */ 58 /* Enable DMA of protection information */
319 if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) 59 if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) {
320 if (type == SD_DIF_TYPE3_PROTECTION) 60 if (type == SD_DIF_TYPE3_PROTECTION)
321 blk_integrity_register(disk, &dif_type3_integrity_ip); 61 blk_integrity_register(disk, &t10_pi_type3_ip);
322 else 62 else
323 blk_integrity_register(disk, &dif_type1_integrity_ip); 63 blk_integrity_register(disk, &t10_pi_type1_ip);
324 else 64
65 disk->integrity->flags |= BLK_INTEGRITY_IP_CHECKSUM;
66 } else
325 if (type == SD_DIF_TYPE3_PROTECTION) 67 if (type == SD_DIF_TYPE3_PROTECTION)
326 blk_integrity_register(disk, &dif_type3_integrity_crc); 68 blk_integrity_register(disk, &t10_pi_type3_crc);
327 else 69 else
328 blk_integrity_register(disk, &dif_type1_integrity_crc); 70 blk_integrity_register(disk, &t10_pi_type1_crc);
329 71
330 sd_printk(KERN_NOTICE, sdkp, 72 sd_printk(KERN_NOTICE, sdkp,
331 "Enabling DIX %s protection\n", disk->integrity->name); 73 "Enabling DIX %s protection\n", disk->integrity->name);
332 74
333 /* Signal to block layer that we support sector tagging */ 75 /* Signal to block layer that we support sector tagging */
334 if (dif && type && sdkp->ATO) { 76 if (dif && type) {
77
78 disk->integrity->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
79
80 if (!sdkp)
81 return;
82
335 if (type == SD_DIF_TYPE3_PROTECTION) 83 if (type == SD_DIF_TYPE3_PROTECTION)
336 disk->integrity->tag_size = sizeof(u16) + sizeof(u32); 84 disk->integrity->tag_size = sizeof(u16) + sizeof(u32);
337 else 85 else
@@ -358,50 +106,49 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
358 * 106 *
359 * Type 3 does not have a reference tag so no remapping is required. 107 * Type 3 does not have a reference tag so no remapping is required.
360 */ 108 */
361void sd_dif_prepare(struct request *rq, sector_t hw_sector, 109void sd_dif_prepare(struct scsi_cmnd *scmd)
362 unsigned int sector_sz)
363{ 110{
364 const int tuple_sz = sizeof(struct sd_dif_tuple); 111 const int tuple_sz = sizeof(struct t10_pi_tuple);
365 struct bio *bio; 112 struct bio *bio;
366 struct scsi_disk *sdkp; 113 struct scsi_disk *sdkp;
367 struct sd_dif_tuple *sdt; 114 struct t10_pi_tuple *pi;
368 u32 phys, virt; 115 u32 phys, virt;
369 116
370 sdkp = rq->bio->bi_bdev->bd_disk->private_data; 117 sdkp = scsi_disk(scmd->request->rq_disk);
371 118
372 if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION) 119 if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION)
373 return; 120 return;
374 121
375 phys = hw_sector & 0xffffffff; 122 phys = scsi_prot_ref_tag(scmd);
376 123
377 __rq_for_each_bio(bio, rq) { 124 __rq_for_each_bio(bio, scmd->request) {
125 struct bio_integrity_payload *bip = bio_integrity(bio);
378 struct bio_vec iv; 126 struct bio_vec iv;
379 struct bvec_iter iter; 127 struct bvec_iter iter;
380 unsigned int j; 128 unsigned int j;
381 129
382 /* Already remapped? */ 130 /* Already remapped? */
383 if (bio_flagged(bio, BIO_MAPPED_INTEGRITY)) 131 if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
384 break; 132 break;
385 133
386 virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff; 134 virt = bip_get_seed(bip) & 0xffffffff;
387 135
388 bip_for_each_vec(iv, bio->bi_integrity, iter) { 136 bip_for_each_vec(iv, bip, iter) {
389 sdt = kmap_atomic(iv.bv_page) 137 pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
390 + iv.bv_offset;
391 138
392 for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) { 139 for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
393 140
394 if (be32_to_cpu(sdt->ref_tag) == virt) 141 if (be32_to_cpu(pi->ref_tag) == virt)
395 sdt->ref_tag = cpu_to_be32(phys); 142 pi->ref_tag = cpu_to_be32(phys);
396 143
397 virt++; 144 virt++;
398 phys++; 145 phys++;
399 } 146 }
400 147
401 kunmap_atomic(sdt); 148 kunmap_atomic(pi);
402 } 149 }
403 150
404 bio->bi_flags |= (1 << BIO_MAPPED_INTEGRITY); 151 bip->bip_flags |= BIP_MAPPED_INTEGRITY;
405 } 152 }
406} 153}
407 154
@@ -411,11 +158,11 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
411 */ 158 */
412void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes) 159void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
413{ 160{
414 const int tuple_sz = sizeof(struct sd_dif_tuple); 161 const int tuple_sz = sizeof(struct t10_pi_tuple);
415 struct scsi_disk *sdkp; 162 struct scsi_disk *sdkp;
416 struct bio *bio; 163 struct bio *bio;
417 struct sd_dif_tuple *sdt; 164 struct t10_pi_tuple *pi;
418 unsigned int j, sectors, sector_sz; 165 unsigned int j, intervals;
419 u32 phys, virt; 166 u32 phys, virt;
420 167
421 sdkp = scsi_disk(scmd->request->rq_disk); 168 sdkp = scsi_disk(scmd->request->rq_disk);
@@ -423,39 +170,35 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
423 if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION || good_bytes == 0) 170 if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION || good_bytes == 0)
424 return; 171 return;
425 172
426 sector_sz = scmd->device->sector_size; 173 intervals = good_bytes / scsi_prot_interval(scmd);
427 sectors = good_bytes / sector_sz; 174 phys = scsi_prot_ref_tag(scmd);
428
429 phys = blk_rq_pos(scmd->request) & 0xffffffff;
430 if (sector_sz == 4096)
431 phys >>= 3;
432 175
433 __rq_for_each_bio(bio, scmd->request) { 176 __rq_for_each_bio(bio, scmd->request) {
177 struct bio_integrity_payload *bip = bio_integrity(bio);
434 struct bio_vec iv; 178 struct bio_vec iv;
435 struct bvec_iter iter; 179 struct bvec_iter iter;
436 180
437 virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff; 181 virt = bip_get_seed(bip) & 0xffffffff;
438 182
439 bip_for_each_vec(iv, bio->bi_integrity, iter) { 183 bip_for_each_vec(iv, bip, iter) {
440 sdt = kmap_atomic(iv.bv_page) 184 pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
441 + iv.bv_offset;
442 185
443 for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) { 186 for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
444 187
445 if (sectors == 0) { 188 if (intervals == 0) {
446 kunmap_atomic(sdt); 189 kunmap_atomic(pi);
447 return; 190 return;
448 } 191 }
449 192
450 if (be32_to_cpu(sdt->ref_tag) == phys) 193 if (be32_to_cpu(pi->ref_tag) == phys)
451 sdt->ref_tag = cpu_to_be32(virt); 194 pi->ref_tag = cpu_to_be32(virt);
452 195
453 virt++; 196 virt++;
454 phys++; 197 phys++;
455 sectors--; 198 intervals--;
456 } 199 }
457 200
458 kunmap_atomic(sdt); 201 kunmap_atomic(pi);
459 } 202 }
460 } 203 }
461} 204}
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 01cf88888797..60354449d9ed 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1711,9 +1711,9 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
1711 } 1711 }
1712 1712
1713 rq = blk_get_request(q, rw, GFP_ATOMIC); 1713 rq = blk_get_request(q, rw, GFP_ATOMIC);
1714 if (!rq) { 1714 if (IS_ERR(rq)) {
1715 kfree(long_cmdp); 1715 kfree(long_cmdp);
1716 return -ENOMEM; 1716 return PTR_ERR(rq);
1717 } 1717 }
1718 1718
1719 blk_rq_set_block_pc(rq); 1719 blk_rq_set_block_pc(rq);
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index d3fd6e8fb378..4daa372ed381 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -490,7 +490,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
490 490
491 req = blk_get_request(SRpnt->stp->device->request_queue, write, 491 req = blk_get_request(SRpnt->stp->device->request_queue, write,
492 GFP_KERNEL); 492 GFP_KERNEL);
493 if (!req) 493 if (IS_ERR(req))
494 return DRIVER_ERROR << 24; 494 return DRIVER_ERROR << 24;
495 495
496 blk_rq_set_block_pc(req); 496 blk_rq_set_block_pc(req);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 943b1dbe859a..70d9f6dabba0 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1050,7 +1050,7 @@ pscsi_execute_cmd(struct se_cmd *cmd)
1050 req = blk_get_request(pdv->pdv_sd->request_queue, 1050 req = blk_get_request(pdv->pdv_sd->request_queue,
1051 (data_direction == DMA_TO_DEVICE), 1051 (data_direction == DMA_TO_DEVICE),
1052 GFP_KERNEL); 1052 GFP_KERNEL);
1053 if (!req) { 1053 if (IS_ERR(req)) {
1054 pr_err("PSCSI: blk_get_request() failed\n"); 1054 pr_err("PSCSI: blk_get_request() failed\n");
1055 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 1055 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
1056 goto fail; 1056 goto fail;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e2f3ad0879ce..cc9d4114cda0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -50,32 +50,22 @@ inline struct block_device *I_BDEV(struct inode *inode)
50EXPORT_SYMBOL(I_BDEV); 50EXPORT_SYMBOL(I_BDEV);
51 51
52/* 52/*
53 * Move the inode from its current bdi to a new bdi. If the inode is dirty we 53 * Move the inode from its current bdi to a new bdi. Make sure the inode
54 * need to move it onto the dirty list of @dst so that the inode is always on 54 * is clean before moving so that it doesn't linger on the old bdi.
55 * the right list.
56 */ 55 */
57static void bdev_inode_switch_bdi(struct inode *inode, 56static void bdev_inode_switch_bdi(struct inode *inode,
58 struct backing_dev_info *dst) 57 struct backing_dev_info *dst)
59{ 58{
60 struct backing_dev_info *old = inode->i_data.backing_dev_info; 59 while (true) {
61 bool wakeup_bdi = false; 60 spin_lock(&inode->i_lock);
62 61 if (!(inode->i_state & I_DIRTY)) {
63 if (unlikely(dst == old)) /* deadlock avoidance */ 62 inode->i_data.backing_dev_info = dst;
64 return; 63 spin_unlock(&inode->i_lock);
65 bdi_lock_two(&old->wb, &dst->wb); 64 return;
66 spin_lock(&inode->i_lock); 65 }
67 inode->i_data.backing_dev_info = dst; 66 spin_unlock(&inode->i_lock);
68 if (inode->i_state & I_DIRTY) { 67 WARN_ON_ONCE(write_inode_now(inode, true));
69 if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb))
70 wakeup_bdi = true;
71 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
72 } 68 }
73 spin_unlock(&inode->i_lock);
74 spin_unlock(&old->wb.list_lock);
75 spin_unlock(&dst->wb.list_lock);
76
77 if (wakeup_bdi)
78 bdi_wakeup_thread_delayed(dst);
79} 69}
80 70
81/* Kill _all_ buffers and pagecache , dirty or not.. */ 71/* Kill _all_ buffers and pagecache , dirty or not.. */
@@ -1179,8 +1169,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1179 if (!ret) { 1169 if (!ret) {
1180 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1170 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1181 bdi = blk_get_backing_dev_info(bdev); 1171 bdi = blk_get_backing_dev_info(bdev);
1182 if (bdi == NULL)
1183 bdi = &default_backing_dev_info;
1184 bdev_inode_switch_bdi(bdev->bd_inode, bdi); 1172 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1185 } 1173 }
1186 1174
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fa45e3cae40d..1ad0f47ac850 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1702,7 +1702,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1702 if (!device->bdev) 1702 if (!device->bdev)
1703 continue; 1703 continue;
1704 bdi = blk_get_backing_dev_info(device->bdev); 1704 bdi = blk_get_backing_dev_info(device->bdev);
1705 if (bdi && bdi_congested(bdi, bdi_bits)) { 1705 if (bdi_congested(bdi, bdi_bits)) {
1706 ret = 1; 1706 ret = 1;
1707 break; 1707 break;
1708 } 1708 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index dda4b8667c02..20cffc830468 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -220,11 +220,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
220#else 220#else
221 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); 221 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
222 222
223 if (rw == READ || rw == KERNEL_READ) 223 if (rw == READ)
224 return nfs_file_direct_read(iocb, iter, pos, 224 return nfs_file_direct_read(iocb, iter, pos);
225 rw == READ ? true : false); 225 return nfs_file_direct_write(iocb, iter, pos);
226 return nfs_file_direct_write(iocb, iter, pos,
227 rw == WRITE ? true : false);
228#endif /* CONFIG_NFS_SWAP */ 226#endif /* CONFIG_NFS_SWAP */
229} 227}
230 228
@@ -510,7 +508,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
510 * cache. 508 * cache.
511 */ 509 */
512ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, 510ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
513 loff_t pos, bool uio) 511 loff_t pos)
514{ 512{
515 struct file *file = iocb->ki_filp; 513 struct file *file = iocb->ki_filp;
516 struct address_space *mapping = file->f_mapping; 514 struct address_space *mapping = file->f_mapping;
@@ -879,7 +877,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
879 * is no atomic O_APPEND write facility in the NFS protocol. 877 * is no atomic O_APPEND write facility in the NFS protocol.
880 */ 878 */
881ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, 879ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
882 loff_t pos, bool uio) 880 loff_t pos)
883{ 881{
884 ssize_t result = -EINVAL; 882 ssize_t result = -EINVAL;
885 struct file *file = iocb->ki_filp; 883 struct file *file = iocb->ki_filp;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4ea92ce0537f..2ab6f00dba5b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -172,7 +172,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
172 ssize_t result; 172 ssize_t result;
173 173
174 if (iocb->ki_filp->f_flags & O_DIRECT) 174 if (iocb->ki_filp->f_flags & O_DIRECT)
175 return nfs_file_direct_read(iocb, to, iocb->ki_pos, true); 175 return nfs_file_direct_read(iocb, to, iocb->ki_pos);
176 176
177 dprintk("NFS: read(%pD2, %zu@%lu)\n", 177 dprintk("NFS: read(%pD2, %zu@%lu)\n",
178 iocb->ki_filp, 178 iocb->ki_filp,
@@ -676,7 +676,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
676 return result; 676 return result;
677 677
678 if (file->f_flags & O_DIRECT) 678 if (file->f_flags & O_DIRECT)
679 return nfs_file_direct_write(iocb, from, pos, true); 679 return nfs_file_direct_write(iocb, from, pos);
680 680
681 dprintk("NFS: write(%pD2, %zu@%Ld)\n", 681 dprintk("NFS: write(%pD2, %zu@%Ld)\n",
682 file, count, (long long) pos); 682 file, count, (long long) pos);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 017b6afe340b..24b4ebea0d4d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1670,8 +1670,6 @@ xfs_alloc_buftarg(
1670 btp->bt_dev = bdev->bd_dev; 1670 btp->bt_dev = bdev->bd_dev;
1671 btp->bt_bdev = bdev; 1671 btp->bt_bdev = bdev;
1672 btp->bt_bdi = blk_get_backing_dev_info(bdev); 1672 btp->bt_bdi = blk_get_backing_dev_info(bdev);
1673 if (!btp->bt_bdi)
1674 goto error;
1675 1673
1676 if (xfs_setsize_buftarg_early(btp, bdev)) 1674 if (xfs_setsize_buftarg_early(btp, bdev))
1677 goto error; 1675 goto error;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index e488e9459a93..5da6012b7a14 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -28,12 +28,10 @@ struct dentry;
28 * Bits in backing_dev_info.state 28 * Bits in backing_dev_info.state
29 */ 29 */
30enum bdi_state { 30enum bdi_state {
31 BDI_wb_alloc, /* Default embedded wb allocated */
32 BDI_async_congested, /* The async (write) queue is getting full */ 31 BDI_async_congested, /* The async (write) queue is getting full */
33 BDI_sync_congested, /* The sync queue is getting full */ 32 BDI_sync_congested, /* The sync queue is getting full */
34 BDI_registered, /* bdi_register() was done */ 33 BDI_registered, /* bdi_register() was done */
35 BDI_writeback_running, /* Writeback is in progress */ 34 BDI_writeback_running, /* Writeback is in progress */
36 BDI_unused, /* Available bits start here */
37}; 35};
38 36
39typedef int (congested_fn)(void *, int); 37typedef int (congested_fn)(void *, int);
@@ -50,7 +48,6 @@ enum bdi_stat_item {
50 48
51struct bdi_writeback { 49struct bdi_writeback {
52 struct backing_dev_info *bdi; /* our parent bdi */ 50 struct backing_dev_info *bdi; /* our parent bdi */
53 unsigned int nr;
54 51
55 unsigned long last_old_flush; /* last old data flush */ 52 unsigned long last_old_flush; /* last old data flush */
56 53
@@ -124,7 +121,6 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi);
124void bdi_writeback_workfn(struct work_struct *work); 121void bdi_writeback_workfn(struct work_struct *work);
125int bdi_has_dirty_io(struct backing_dev_info *bdi); 122int bdi_has_dirty_io(struct backing_dev_info *bdi);
126void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); 123void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
127void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
128 124
129extern spinlock_t bdi_lock; 125extern spinlock_t bdi_lock;
130extern struct list_head bdi_list; 126extern struct list_head bdi_list;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b39e5000ff58..7347f486ceca 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -292,7 +292,24 @@ static inline unsigned bio_segments(struct bio *bio)
292 */ 292 */
293#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) 293#define bio_get(bio) atomic_inc(&(bio)->bi_cnt)
294 294
295enum bip_flags {
296 BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
297 BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
298 BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */
299 BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */
300 BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
301};
302
295#if defined(CONFIG_BLK_DEV_INTEGRITY) 303#if defined(CONFIG_BLK_DEV_INTEGRITY)
304
305static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
306{
307 if (bio->bi_rw & REQ_INTEGRITY)
308 return bio->bi_integrity;
309
310 return NULL;
311}
312
296/* 313/*
297 * bio integrity payload 314 * bio integrity payload
298 */ 315 */
@@ -301,21 +318,40 @@ struct bio_integrity_payload {
301 318
302 struct bvec_iter bip_iter; 319 struct bvec_iter bip_iter;
303 320
304 /* kill - should just use bip_vec */
305 void *bip_buf; /* generated integrity data */
306
307 bio_end_io_t *bip_end_io; /* saved I/O completion fn */ 321 bio_end_io_t *bip_end_io; /* saved I/O completion fn */
308 322
309 unsigned short bip_slab; /* slab the bip came from */ 323 unsigned short bip_slab; /* slab the bip came from */
310 unsigned short bip_vcnt; /* # of integrity bio_vecs */ 324 unsigned short bip_vcnt; /* # of integrity bio_vecs */
311 unsigned short bip_max_vcnt; /* integrity bio_vec slots */ 325 unsigned short bip_max_vcnt; /* integrity bio_vec slots */
312 unsigned bip_owns_buf:1; /* should free bip_buf */ 326 unsigned short bip_flags; /* control flags */
313 327
314 struct work_struct bip_work; /* I/O completion */ 328 struct work_struct bip_work; /* I/O completion */
315 329
316 struct bio_vec *bip_vec; 330 struct bio_vec *bip_vec;
317 struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ 331 struct bio_vec bip_inline_vecs[0];/* embedded bvec array */
318}; 332};
333
334static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
335{
336 struct bio_integrity_payload *bip = bio_integrity(bio);
337
338 if (bip)
339 return bip->bip_flags & flag;
340
341 return false;
342}
343
344static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
345{
346 return bip->bip_iter.bi_sector;
347}
348
349static inline void bip_set_seed(struct bio_integrity_payload *bip,
350 sector_t seed)
351{
352 bip->bip_iter.bi_sector = seed;
353}
354
319#endif /* CONFIG_BLK_DEV_INTEGRITY */ 355#endif /* CONFIG_BLK_DEV_INTEGRITY */
320 356
321extern void bio_trim(struct bio *bio, int offset, int size); 357extern void bio_trim(struct bio *bio, int offset, int size);
@@ -342,6 +378,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
342} 378}
343 379
344extern struct bio_set *bioset_create(unsigned int, unsigned int); 380extern struct bio_set *bioset_create(unsigned int, unsigned int);
381extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
345extern void bioset_free(struct bio_set *); 382extern void bioset_free(struct bio_set *);
346extern mempool_t *biovec_create_pool(int pool_entries); 383extern mempool_t *biovec_create_pool(int pool_entries);
347 384
@@ -353,7 +390,6 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
353extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); 390extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
354 391
355extern struct bio_set *fs_bio_set; 392extern struct bio_set *fs_bio_set;
356unsigned int bio_integrity_tag_size(struct bio *bio);
357 393
358static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) 394static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
359{ 395{
@@ -661,14 +697,10 @@ struct biovec_slab {
661 for_each_bio(_bio) \ 697 for_each_bio(_bio) \
662 bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) 698 bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
663 699
664#define bio_integrity(bio) (bio->bi_integrity != NULL)
665
666extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); 700extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
667extern void bio_integrity_free(struct bio *); 701extern void bio_integrity_free(struct bio *);
668extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); 702extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
669extern int bio_integrity_enabled(struct bio *bio); 703extern bool bio_integrity_enabled(struct bio *bio);
670extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
671extern int bio_integrity_get_tag(struct bio *, void *, unsigned int);
672extern int bio_integrity_prep(struct bio *); 704extern int bio_integrity_prep(struct bio *);
673extern void bio_integrity_endio(struct bio *, int); 705extern void bio_integrity_endio(struct bio *, int);
674extern void bio_integrity_advance(struct bio *, unsigned int); 706extern void bio_integrity_advance(struct bio *, unsigned int);
@@ -680,14 +712,14 @@ extern void bio_integrity_init(void);
680 712
681#else /* CONFIG_BLK_DEV_INTEGRITY */ 713#else /* CONFIG_BLK_DEV_INTEGRITY */
682 714
683static inline int bio_integrity(struct bio *bio) 715static inline void *bio_integrity(struct bio *bio)
684{ 716{
685 return 0; 717 return NULL;
686} 718}
687 719
688static inline int bio_integrity_enabled(struct bio *bio) 720static inline bool bio_integrity_enabled(struct bio *bio)
689{ 721{
690 return 0; 722 return false;
691} 723}
692 724
693static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) 725static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
@@ -733,6 +765,11 @@ static inline void bio_integrity_init(void)
733 return; 765 return;
734} 766}
735 767
768static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
769{
770 return false;
771}
772
736#endif /* CONFIG_BLK_DEV_INTEGRITY */ 773#endif /* CONFIG_BLK_DEV_INTEGRITY */
737 774
738#endif /* CONFIG_BLOCK */ 775#endif /* CONFIG_BLOCK */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c13a0c09faea..c9be1589415a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -4,6 +4,7 @@
4#include <linux/blkdev.h> 4#include <linux/blkdev.h>
5 5
6struct blk_mq_tags; 6struct blk_mq_tags;
7struct blk_flush_queue;
7 8
8struct blk_mq_cpu_notifier { 9struct blk_mq_cpu_notifier {
9 struct list_head list; 10 struct list_head list;
@@ -34,6 +35,7 @@ struct blk_mq_hw_ctx {
34 35
35 struct request_queue *queue; 36 struct request_queue *queue;
36 unsigned int queue_num; 37 unsigned int queue_num;
38 struct blk_flush_queue *fq;
37 39
38 void *driver_data; 40 void *driver_data;
39 41
@@ -77,8 +79,9 @@ struct blk_mq_tag_set {
77 struct list_head tag_list; 79 struct list_head tag_list;
78}; 80};
79 81
80typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); 82typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool);
81typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); 83typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
84typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
82typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); 85typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
83typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); 86typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
84typedef int (init_request_fn)(void *, struct request *, unsigned int, 87typedef int (init_request_fn)(void *, struct request *, unsigned int,
@@ -86,6 +89,9 @@ typedef int (init_request_fn)(void *, struct request *, unsigned int,
86typedef void (exit_request_fn)(void *, struct request *, unsigned int, 89typedef void (exit_request_fn)(void *, struct request *, unsigned int,
87 unsigned int); 90 unsigned int);
88 91
92typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
93 bool);
94
89struct blk_mq_ops { 95struct blk_mq_ops {
90 /* 96 /*
91 * Queue request 97 * Queue request
@@ -100,7 +106,7 @@ struct blk_mq_ops {
100 /* 106 /*
101 * Called on request timeout 107 * Called on request timeout
102 */ 108 */
103 rq_timed_out_fn *timeout; 109 timeout_fn *timeout;
104 110
105 softirq_done_fn *complete; 111 softirq_done_fn *complete;
106 112
@@ -115,6 +121,10 @@ struct blk_mq_ops {
115 /* 121 /*
116 * Called for every command allocated by the block layer to allow 122 * Called for every command allocated by the block layer to allow
117 * the driver to set up driver specific data. 123 * the driver to set up driver specific data.
124 *
125 * Tag greater than or equal to queue_depth is for setting up
126 * flush request.
127 *
118 * Ditto for exit/teardown. 128 * Ditto for exit/teardown.
119 */ 129 */
120 init_request_fn *init_request; 130 init_request_fn *init_request;
@@ -160,8 +170,9 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
160struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); 170struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
161struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); 171struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
162 172
163void blk_mq_end_io(struct request *rq, int error); 173void blk_mq_start_request(struct request *rq);
164void __blk_mq_end_io(struct request *rq, int error); 174void blk_mq_end_request(struct request *rq, int error);
175void __blk_mq_end_request(struct request *rq, int error);
165 176
166void blk_mq_requeue_request(struct request *rq); 177void blk_mq_requeue_request(struct request *rq);
167void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); 178void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
@@ -174,7 +185,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
174void blk_mq_start_hw_queues(struct request_queue *q); 185void blk_mq_start_hw_queues(struct request_queue *q);
175void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); 186void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
176void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); 187void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
177void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); 188void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
189 void *priv);
178 190
179/* 191/*
180 * Driver command data is immediately after the request. So subtract request 192 * Driver command data is immediately after the request. So subtract request
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 66c2167f04a9..445d59231bc4 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -78,9 +78,11 @@ struct bio {
78 struct io_context *bi_ioc; 78 struct io_context *bi_ioc;
79 struct cgroup_subsys_state *bi_css; 79 struct cgroup_subsys_state *bi_css;
80#endif 80#endif
81 union {
81#if defined(CONFIG_BLK_DEV_INTEGRITY) 82#if defined(CONFIG_BLK_DEV_INTEGRITY)
82 struct bio_integrity_payload *bi_integrity; /* data integrity */ 83 struct bio_integrity_payload *bi_integrity; /* data integrity */
83#endif 84#endif
85 };
84 86
85 unsigned short bi_vcnt; /* how many bio_vec's */ 87 unsigned short bi_vcnt; /* how many bio_vec's */
86 88
@@ -118,10 +120,8 @@ struct bio {
118#define BIO_USER_MAPPED 6 /* contains user pages */ 120#define BIO_USER_MAPPED 6 /* contains user pages */
119#define BIO_EOPNOTSUPP 7 /* not supported */ 121#define BIO_EOPNOTSUPP 7 /* not supported */
120#define BIO_NULL_MAPPED 8 /* contains invalid user pages */ 122#define BIO_NULL_MAPPED 8 /* contains invalid user pages */
121#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ 123#define BIO_QUIET 9 /* Make BIO Quiet */
122#define BIO_QUIET 10 /* Make BIO Quiet */ 124#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */
123#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
124#define BIO_SNAP_STABLE 12 /* bio data must be snapshotted during write */
125 125
126/* 126/*
127 * Flags starting here get preserved by bio_reset() - this includes 127 * Flags starting here get preserved by bio_reset() - this includes
@@ -162,6 +162,7 @@ enum rq_flag_bits {
162 __REQ_WRITE_SAME, /* write same block many times */ 162 __REQ_WRITE_SAME, /* write same block many times */
163 163
164 __REQ_NOIDLE, /* don't anticipate more IO after this one */ 164 __REQ_NOIDLE, /* don't anticipate more IO after this one */
165 __REQ_INTEGRITY, /* I/O includes block integrity payload */
165 __REQ_FUA, /* forced unit access */ 166 __REQ_FUA, /* forced unit access */
166 __REQ_FLUSH, /* request for cache flush */ 167 __REQ_FLUSH, /* request for cache flush */
167 168
@@ -186,9 +187,7 @@ enum rq_flag_bits {
186 __REQ_FLUSH_SEQ, /* request for flush sequence */ 187 __REQ_FLUSH_SEQ, /* request for flush sequence */
187 __REQ_IO_STAT, /* account I/O stat */ 188 __REQ_IO_STAT, /* account I/O stat */
188 __REQ_MIXED_MERGE, /* merge of different types, fail separately */ 189 __REQ_MIXED_MERGE, /* merge of different types, fail separately */
189 __REQ_KERNEL, /* direct IO to kernel pages */
190 __REQ_PM, /* runtime pm request */ 190 __REQ_PM, /* runtime pm request */
191 __REQ_END, /* last of chain of requests */
192 __REQ_HASHED, /* on IO scheduler merge hash */ 191 __REQ_HASHED, /* on IO scheduler merge hash */
193 __REQ_MQ_INFLIGHT, /* track inflight for MQ */ 192 __REQ_MQ_INFLIGHT, /* track inflight for MQ */
194 __REQ_NR_BITS, /* stops here */ 193 __REQ_NR_BITS, /* stops here */
@@ -204,13 +203,14 @@ enum rq_flag_bits {
204#define REQ_DISCARD (1ULL << __REQ_DISCARD) 203#define REQ_DISCARD (1ULL << __REQ_DISCARD)
205#define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME) 204#define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME)
206#define REQ_NOIDLE (1ULL << __REQ_NOIDLE) 205#define REQ_NOIDLE (1ULL << __REQ_NOIDLE)
206#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY)
207 207
208#define REQ_FAILFAST_MASK \ 208#define REQ_FAILFAST_MASK \
209 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 209 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
210#define REQ_COMMON_MASK \ 210#define REQ_COMMON_MASK \
211 (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ 211 (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
212 REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ 212 REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
213 REQ_SECURE) 213 REQ_SECURE | REQ_INTEGRITY)
214#define REQ_CLONE_MASK REQ_COMMON_MASK 214#define REQ_CLONE_MASK REQ_COMMON_MASK
215 215
216#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) 216#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME)
@@ -240,9 +240,7 @@ enum rq_flag_bits {
240#define REQ_IO_STAT (1ULL << __REQ_IO_STAT) 240#define REQ_IO_STAT (1ULL << __REQ_IO_STAT)
241#define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE) 241#define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE)
242#define REQ_SECURE (1ULL << __REQ_SECURE) 242#define REQ_SECURE (1ULL << __REQ_SECURE)
243#define REQ_KERNEL (1ULL << __REQ_KERNEL)
244#define REQ_PM (1ULL << __REQ_PM) 243#define REQ_PM (1ULL << __REQ_PM)
245#define REQ_END (1ULL << __REQ_END)
246#define REQ_HASHED (1ULL << __REQ_HASHED) 244#define REQ_HASHED (1ULL << __REQ_HASHED)
247#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) 245#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT)
248 246
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 87be398166d3..0207a78a8d82 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -36,6 +36,7 @@ struct request;
36struct sg_io_hdr; 36struct sg_io_hdr;
37struct bsg_job; 37struct bsg_job;
38struct blkcg_gq; 38struct blkcg_gq;
39struct blk_flush_queue;
39 40
40#define BLKDEV_MIN_RQ 4 41#define BLKDEV_MIN_RQ 4
41#define BLKDEV_MAX_RQ 128 /* Default maximum */ 42#define BLKDEV_MAX_RQ 128 /* Default maximum */
@@ -455,14 +456,7 @@ struct request_queue {
455 */ 456 */
456 unsigned int flush_flags; 457 unsigned int flush_flags;
457 unsigned int flush_not_queueable:1; 458 unsigned int flush_not_queueable:1;
458 unsigned int flush_queue_delayed:1; 459 struct blk_flush_queue *fq;
459 unsigned int flush_pending_idx:1;
460 unsigned int flush_running_idx:1;
461 unsigned long flush_pending_since;
462 struct list_head flush_queue[2];
463 struct list_head flush_data_in_flight;
464 struct request *flush_rq;
465 spinlock_t mq_flush_lock;
466 460
467 struct list_head requeue_list; 461 struct list_head requeue_list;
468 spinlock_t requeue_lock; 462 spinlock_t requeue_lock;
@@ -865,7 +859,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
865 859
866static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 860static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
867{ 861{
868 return bdev->bd_disk->queue; 862 return bdev->bd_disk->queue; /* this is never NULL */
869} 863}
870 864
871/* 865/*
@@ -1285,10 +1279,9 @@ static inline int queue_alignment_offset(struct request_queue *q)
1285static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) 1279static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
1286{ 1280{
1287 unsigned int granularity = max(lim->physical_block_size, lim->io_min); 1281 unsigned int granularity = max(lim->physical_block_size, lim->io_min);
1288 unsigned int alignment = (sector << 9) & (granularity - 1); 1282 unsigned int alignment = sector_div(sector, granularity >> 9) << 9;
1289 1283
1290 return (granularity + lim->alignment_offset - alignment) 1284 return (granularity + lim->alignment_offset - alignment) % granularity;
1291 & (granularity - 1);
1292} 1285}
1293 1286
1294static inline int bdev_alignment_offset(struct block_device *bdev) 1287static inline int bdev_alignment_offset(struct block_device *bdev)
@@ -1464,32 +1457,31 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
1464 1457
1465#if defined(CONFIG_BLK_DEV_INTEGRITY) 1458#if defined(CONFIG_BLK_DEV_INTEGRITY)
1466 1459
1467#define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ 1460enum blk_integrity_flags {
1468#define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ 1461 BLK_INTEGRITY_VERIFY = 1 << 0,
1462 BLK_INTEGRITY_GENERATE = 1 << 1,
1463 BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2,
1464 BLK_INTEGRITY_IP_CHECKSUM = 1 << 3,
1465};
1469 1466
1470struct blk_integrity_exchg { 1467struct blk_integrity_iter {
1471 void *prot_buf; 1468 void *prot_buf;
1472 void *data_buf; 1469 void *data_buf;
1473 sector_t sector; 1470 sector_t seed;
1474 unsigned int data_size; 1471 unsigned int data_size;
1475 unsigned short sector_size; 1472 unsigned short interval;
1476 const char *disk_name; 1473 const char *disk_name;
1477}; 1474};
1478 1475
1479typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); 1476typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
1480typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
1481typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
1482typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
1483 1477
1484struct blk_integrity { 1478struct blk_integrity {
1485 integrity_gen_fn *generate_fn; 1479 integrity_processing_fn *generate_fn;
1486 integrity_vrfy_fn *verify_fn; 1480 integrity_processing_fn *verify_fn;
1487 integrity_set_tag_fn *set_tag_fn;
1488 integrity_get_tag_fn *get_tag_fn;
1489 1481
1490 unsigned short flags; 1482 unsigned short flags;
1491 unsigned short tuple_size; 1483 unsigned short tuple_size;
1492 unsigned short sector_size; 1484 unsigned short interval;
1493 unsigned short tag_size; 1485 unsigned short tag_size;
1494 1486
1495 const char *name; 1487 const char *name;
@@ -1504,10 +1496,10 @@ extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
1504extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, 1496extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
1505 struct scatterlist *); 1497 struct scatterlist *);
1506extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); 1498extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
1507extern int blk_integrity_merge_rq(struct request_queue *, struct request *, 1499extern bool blk_integrity_merge_rq(struct request_queue *, struct request *,
1508 struct request *); 1500 struct request *);
1509extern int blk_integrity_merge_bio(struct request_queue *, struct request *, 1501extern bool blk_integrity_merge_bio(struct request_queue *, struct request *,
1510 struct bio *); 1502 struct bio *);
1511 1503
1512static inline 1504static inline
1513struct blk_integrity *bdev_get_integrity(struct block_device *bdev) 1505struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
@@ -1520,12 +1512,9 @@ static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
1520 return disk->integrity; 1512 return disk->integrity;
1521} 1513}
1522 1514
1523static inline int blk_integrity_rq(struct request *rq) 1515static inline bool blk_integrity_rq(struct request *rq)
1524{ 1516{
1525 if (rq->bio == NULL) 1517 return rq->cmd_flags & REQ_INTEGRITY;
1526 return 0;
1527
1528 return bio_integrity(rq->bio);
1529} 1518}
1530 1519
1531static inline void blk_queue_max_integrity_segments(struct request_queue *q, 1520static inline void blk_queue_max_integrity_segments(struct request_queue *q,
@@ -1590,15 +1579,15 @@ static inline unsigned short queue_max_integrity_segments(struct request_queue *
1590{ 1579{
1591 return 0; 1580 return 0;
1592} 1581}
1593static inline int blk_integrity_merge_rq(struct request_queue *rq, 1582static inline bool blk_integrity_merge_rq(struct request_queue *rq,
1594 struct request *r1, 1583 struct request *r1,
1595 struct request *r2) 1584 struct request *r2)
1596{ 1585{
1597 return 0; 1586 return 0;
1598} 1587}
1599static inline int blk_integrity_merge_bio(struct request_queue *rq, 1588static inline bool blk_integrity_merge_bio(struct request_queue *rq,
1600 struct request *r, 1589 struct request *r,
1601 struct bio *b) 1590 struct bio *b)
1602{ 1591{
1603 return 0; 1592 return 0;
1604} 1593}
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index b3cb71f0d3b0..cf53d0773ce3 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -6,7 +6,8 @@
6#define CRC_T10DIF_DIGEST_SIZE 2 6#define CRC_T10DIF_DIGEST_SIZE 2
7#define CRC_T10DIF_BLOCK_SIZE 1 7#define CRC_T10DIF_BLOCK_SIZE 1
8 8
9__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); 9extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer,
10__u16 crc_t10dif(unsigned char const *, size_t); 10 size_t len);
11extern __u16 crc_t10dif(unsigned char const *, size_t);
11 12
12#endif 13#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ab4f1a10da20..a957d4366c24 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -192,8 +192,6 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
192#define READ 0 192#define READ 0
193#define WRITE RW_MASK 193#define WRITE RW_MASK
194#define READA RWA_MASK 194#define READA RWA_MASK
195#define KERNEL_READ (READ|REQ_KERNEL)
196#define KERNEL_WRITE (WRITE|REQ_KERNEL)
197 195
198#define READ_SYNC (READ | REQ_SYNC) 196#define READ_SYNC (READ | REQ_SYNC)
199#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) 197#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 28d649054d5f..c72d1ad41ad4 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -448,10 +448,10 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file)
448extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); 448extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
449extern ssize_t nfs_file_direct_read(struct kiocb *iocb, 449extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
450 struct iov_iter *iter, 450 struct iov_iter *iter,
451 loff_t pos, bool uio); 451 loff_t pos);
452extern ssize_t nfs_file_direct_write(struct kiocb *iocb, 452extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
453 struct iov_iter *iter, 453 struct iov_iter *iter,
454 loff_t pos, bool uio); 454 loff_t pos);
455 455
456/* 456/*
457 * linux/fs/nfs/dir.c 457 * linux/fs/nfs/dir.c
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
new file mode 100644
index 000000000000..6a8b9942632d
--- /dev/null
+++ b/include/linux/t10-pi.h
@@ -0,0 +1,22 @@
1#ifndef _LINUX_T10_PI_H
2#define _LINUX_T10_PI_H
3
4#include <linux/types.h>
5#include <linux/blkdev.h>
6
7/*
8 * T10 Protection Information tuple.
9 */
10struct t10_pi_tuple {
11 __be16 guard_tag; /* Checksum */
12 __be16 app_tag; /* Opaque storage */
13 __be32 ref_tag; /* Target LBA or indirect LBA */
14};
15
16
17extern struct blk_integrity t10_pi_type1_crc;
18extern struct blk_integrity t10_pi_type1_ip;
19extern struct blk_integrity t10_pi_type3_crc;
20extern struct blk_integrity t10_pi_type3_ip;
21
22#endif
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 73f349044941..522a5f27f553 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -10,9 +10,10 @@
10#include <scsi/scsi_device.h> 10#include <scsi/scsi_device.h>
11 11
12struct Scsi_Host; 12struct Scsi_Host;
13struct scsi_device;
14struct scsi_driver; 13struct scsi_driver;
15 14
15#include <scsi/scsi_device.h>
16
16/* 17/*
17 * MAX_COMMAND_SIZE is: 18 * MAX_COMMAND_SIZE is:
18 * The longest fixed-length SCSI CDB as per the SCSI standard. 19 * The longest fixed-length SCSI CDB as per the SCSI standard.
@@ -81,6 +82,7 @@ struct scsi_cmnd {
81 82
82 unsigned char prot_op; 83 unsigned char prot_op;
83 unsigned char prot_type; 84 unsigned char prot_type;
85 unsigned char prot_flags;
84 86
85 unsigned short cmd_len; 87 unsigned short cmd_len;
86 enum dma_data_direction sc_data_direction; 88 enum dma_data_direction sc_data_direction;
@@ -252,6 +254,14 @@ static inline unsigned char scsi_get_prot_op(struct scsi_cmnd *scmd)
252 return scmd->prot_op; 254 return scmd->prot_op;
253} 255}
254 256
257enum scsi_prot_flags {
258 SCSI_PROT_TRANSFER_PI = 1 << 0,
259 SCSI_PROT_GUARD_CHECK = 1 << 1,
260 SCSI_PROT_REF_CHECK = 1 << 2,
261 SCSI_PROT_REF_INCREMENT = 1 << 3,
262 SCSI_PROT_IP_CHECKSUM = 1 << 4,
263};
264
255/* 265/*
256 * The controller usually does not know anything about the target it 266 * The controller usually does not know anything about the target it
257 * is communicating with. However, when DIX is enabled the controller 267 * is communicating with. However, when DIX is enabled the controller
@@ -280,6 +290,17 @@ static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd)
280 return blk_rq_pos(scmd->request); 290 return blk_rq_pos(scmd->request);
281} 291}
282 292
293static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
294{
295 return scmd->device->sector_size;
296}
297
298static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
299{
300 return blk_rq_pos(scmd->request) >>
301 (ilog2(scsi_prot_interval(scmd)) - 9) & 0xffffffff;
302}
303
283static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd) 304static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
284{ 305{
285 return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0; 306 return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0;
@@ -316,17 +337,12 @@ static inline void set_driver_byte(struct scsi_cmnd *cmd, char status)
316static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd) 337static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
317{ 338{
318 unsigned int xfer_len = scsi_out(scmd)->length; 339 unsigned int xfer_len = scsi_out(scmd)->length;
319 unsigned int prot_op = scsi_get_prot_op(scmd); 340 unsigned int prot_interval = scsi_prot_interval(scmd);
320 unsigned int sector_size = scmd->device->sector_size;
321 341
322 switch (prot_op) { 342 if (scmd->prot_flags & SCSI_PROT_TRANSFER_PI)
323 case SCSI_PROT_NORMAL: 343 xfer_len += (xfer_len >> ilog2(prot_interval)) * 8;
324 case SCSI_PROT_WRITE_STRIP:
325 case SCSI_PROT_READ_INSERT:
326 return xfer_len;
327 }
328 344
329 return xfer_len + (xfer_len >> ilog2(sector_size)) * 8; 345 return xfer_len;
330} 346}
331 347
332#endif /* _SCSI_SCSI_CMND_H */ 348#endif /* _SCSI_SCSI_CMND_H */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 12a992b62576..0ae0df55000b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -40,7 +40,7 @@ LIST_HEAD(bdi_list);
40/* bdi_wq serves all asynchronous writeback tasks */ 40/* bdi_wq serves all asynchronous writeback tasks */
41struct workqueue_struct *bdi_wq; 41struct workqueue_struct *bdi_wq;
42 42
43void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) 43static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
44{ 44{
45 if (wb1 < wb2) { 45 if (wb1 < wb2) {
46 spin_lock(&wb1->list_lock); 46 spin_lock(&wb1->list_lock);
@@ -376,13 +376,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
376 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); 376 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
377 flush_delayed_work(&bdi->wb.dwork); 377 flush_delayed_work(&bdi->wb.dwork);
378 WARN_ON(!list_empty(&bdi->work_list)); 378 WARN_ON(!list_empty(&bdi->work_list));
379 379 WARN_ON(delayed_work_pending(&bdi->wb.dwork));
380 /*
381 * This shouldn't be necessary unless @bdi for some reason has
382 * unflushed dirty IO after work_list is drained. Do it anyway
383 * just in case.
384 */
385 cancel_delayed_work_sync(&bdi->wb.dwork);
386} 380}
387 381
388/* 382/*
@@ -402,21 +396,15 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
402 396
403void bdi_unregister(struct backing_dev_info *bdi) 397void bdi_unregister(struct backing_dev_info *bdi)
404{ 398{
405 struct device *dev = bdi->dev; 399 if (bdi->dev) {
406
407 if (dev) {
408 bdi_set_min_ratio(bdi, 0); 400 bdi_set_min_ratio(bdi, 0);
409 trace_writeback_bdi_unregister(bdi); 401 trace_writeback_bdi_unregister(bdi);
410 bdi_prune_sb(bdi); 402 bdi_prune_sb(bdi);
411 403
412 bdi_wb_shutdown(bdi); 404 bdi_wb_shutdown(bdi);
413 bdi_debug_unregister(bdi); 405 bdi_debug_unregister(bdi);
414 406 device_unregister(bdi->dev);
415 spin_lock_bh(&bdi->wb_lock);
416 bdi->dev = NULL; 407 bdi->dev = NULL;
417 spin_unlock_bh(&bdi->wb_lock);
418
419 device_unregister(dev);
420 } 408 }
421} 409}
422EXPORT_SYMBOL(bdi_unregister); 410EXPORT_SYMBOL(bdi_unregister);
@@ -487,8 +475,17 @@ void bdi_destroy(struct backing_dev_info *bdi)
487 int i; 475 int i;
488 476
489 /* 477 /*
490 * Splice our entries to the default_backing_dev_info, if this 478 * Splice our entries to the default_backing_dev_info. This
491 * bdi disappears 479 * condition shouldn't happen. @wb must be empty at this point and
480 * dirty inodes on it might cause other issues. This workaround is
481 * added by ce5f8e779519 ("writeback: splice dirty inode entries to
482 * default bdi on bdi_destroy()") without root-causing the issue.
483 *
484 * http://lkml.kernel.org/g/1253038617-30204-11-git-send-email-jens.axboe@oracle.com
485 * http://thread.gmane.org/gmane.linux.file-systems/35341/focus=35350
486 *
487 * We should probably add WARN_ON() to find out whether it still
488 * happens and track it down if so.
492 */ 489 */
493 if (bdi_has_dirty_io(bdi)) { 490 if (bdi_has_dirty_io(bdi)) {
494 struct bdi_writeback *dst = &default_backing_dev_info.wb; 491 struct bdi_writeback *dst = &default_backing_dev_info.wb;
@@ -503,12 +500,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
503 500
504 bdi_unregister(bdi); 501 bdi_unregister(bdi);
505 502
506 /* 503 WARN_ON(delayed_work_pending(&bdi->wb.dwork));
507 * If bdi_unregister() had already been called earlier, the dwork
508 * could still be pending because bdi_prune_sb() can race with the
509 * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
510 */
511 cancel_delayed_work_sync(&bdi->wb.dwork);
512 504
513 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 505 for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
514 percpu_counter_destroy(&bdi->bdi_stat[i]); 506 percpu_counter_destroy(&bdi->bdi_stat[i]);