aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/block/biodoc.txt7
-rw-r--r--Documentation/block/biovecs.txt111
-rw-r--r--Documentation/filesystems/nfs/nfs41-server.txt42
-rw-r--r--arch/m68k/emu/nfblock.c13
-rw-r--r--arch/powerpc/sysdev/axonram.c21
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c14
-rw-r--r--block/blk-core.c61
-rw-r--r--block/blk-exec.c4
-rw-r--r--block/blk-flush.c2
-rw-r--r--block/blk-integrity.c40
-rw-r--r--block/blk-lib.c12
-rw-r--r--block/blk-map.c6
-rw-r--r--block/blk-merge.c66
-rw-r--r--block/blk-mq-cpu.c37
-rw-r--r--block/blk-mq.c123
-rw-r--r--block/blk-mq.h3
-rw-r--r--block/blk-settings.c4
-rw-r--r--block/blk-sysfs.c1
-rw-r--r--block/blk-throttle.c14
-rw-r--r--block/cmdline-parser.c18
-rw-r--r--block/elevator.c2
-rw-r--r--block/scsi_ioctl.c6
-rw-r--r--drivers/block/aoe/aoe.h10
-rw-r--r--drivers/block/aoe/aoecmd.c153
-rw-r--r--drivers/block/brd.c16
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_main.c27
-rw-r--r--drivers/block/drbd/drbd_receiver.c19
-rw-r--r--drivers/block/drbd/drbd_req.c6
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_worker.c8
-rw-r--r--drivers/block/floppy.c52
-rw-r--r--drivers/block/loop.c29
-rw-r--r--drivers/block/mg_disk.c2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c270
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h14
-rw-r--r--drivers/block/nbd.c14
-rw-r--r--drivers/block/null_blk.c5
-rw-r--r--drivers/block/nvme-core.c142
-rw-r--r--drivers/block/paride/pg.c2
-rw-r--r--drivers/block/pktcdvd.c186
-rw-r--r--drivers/block/ps3disk.c17
-rw-r--r--drivers/block/ps3vram.c12
-rw-r--r--drivers/block/rbd.c91
-rw-r--r--drivers/block/rsxx/dev.c6
-rw-r--r--drivers/block/rsxx/dma.c15
-rw-r--r--drivers/block/sx8.c16
-rw-r--r--drivers/block/umem.c53
-rw-r--r--drivers/block/xen-blkback/blkback.c2
-rw-r--r--drivers/block/xen-blkfront.c2
-rw-r--r--drivers/cdrom/gdrom.c4
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c1
-rw-r--r--drivers/md/bcache/Makefile5
-rw-r--r--drivers/md/bcache/alloc.c89
-rw-r--r--drivers/md/bcache/bcache.h84
-rw-r--r--drivers/md/bcache/bset.c904
-rw-r--r--drivers/md/bcache/bset.h440
-rw-r--r--drivers/md/bcache/btree.c682
-rw-r--r--drivers/md/bcache/btree.h62
-rw-r--r--drivers/md/bcache/closure.c90
-rw-r--r--drivers/md/bcache/closure.h355
-rw-r--r--drivers/md/bcache/debug.c268
-rw-r--r--drivers/md/bcache/debug.h27
-rw-r--r--drivers/md/bcache/extents.c616
-rw-r--r--drivers/md/bcache/extents.h13
-rw-r--r--drivers/md/bcache/io.c196
-rw-r--r--drivers/md/bcache/journal.c87
-rw-r--r--drivers/md/bcache/journal.h1
-rw-r--r--drivers/md/bcache/movinggc.c6
-rw-r--r--drivers/md/bcache/request.c197
-rw-r--r--drivers/md/bcache/request.h21
-rw-r--r--drivers/md/bcache/super.c123
-rw-r--r--drivers/md/bcache/sysfs.c79
-rw-r--r--drivers/md/bcache/util.c4
-rw-r--r--drivers/md/bcache/util.h8
-rw-r--r--drivers/md/bcache/writeback.c6
-rw-r--r--drivers/md/bcache/writeback.h2
-rw-r--r--drivers/md/dm-bio-record.h37
-rw-r--r--drivers/md/dm-bufio.c2
-rw-r--r--drivers/md/dm-cache-policy-mq.c4
-rw-r--r--drivers/md/dm-cache-target.c28
-rw-r--r--drivers/md/dm-crypt.c64
-rw-r--r--drivers/md/dm-delay.c7
-rw-r--r--drivers/md/dm-flakey.c7
-rw-r--r--drivers/md/dm-io.c37
-rw-r--r--drivers/md/dm-linear.c3
-rw-r--r--drivers/md/dm-raid1.c20
-rw-r--r--drivers/md/dm-region-hash.c3
-rw-r--r--drivers/md/dm-snap.c19
-rw-r--r--drivers/md/dm-stripe.c13
-rw-r--r--drivers/md/dm-switch.c4
-rw-r--r--drivers/md/dm-thin.c30
-rw-r--r--drivers/md/dm-verity.c62
-rw-r--r--drivers/md/dm.c189
-rw-r--r--drivers/md/faulty.c19
-rw-r--r--drivers/md/linear.c96
-rw-r--r--drivers/md/md.c12
-rw-r--r--drivers/md/multipath.c13
-rw-r--r--drivers/md/raid0.c79
-rw-r--r--drivers/md/raid1.c73
-rw-r--r--drivers/md/raid10.c194
-rw-r--r--drivers/md/raid5.c85
-rw-r--r--drivers/message/fusion/mptsas.c8
-rw-r--r--drivers/net/wireless/ath/ar5523/ar5523.c2
-rw-r--r--drivers/net/wireless/ath/ath5k/phy.c2
-rw-r--r--drivers/net/wireless/ath/ath9k/ar9003_eeprom.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h4
-rw-r--r--drivers/net/wireless/ath/ath9k/htc.h2
-rw-r--r--drivers/net/wireless/ath/ath9k/htc_drv_init.c8
-rw-r--r--drivers/net/wireless/ath/ath9k/htc_drv_main.c63
-rw-r--r--drivers/net/wireless/ath/ath9k/hw.c13
-rw-r--r--drivers/net/wireless/ath/ath9k/init.c8
-rw-r--r--drivers/net/wireless/ath/ath9k/recv.c70
-rw-r--r--drivers/net/wireless/ath/ath9k/xmit.c13
-rw-r--r--drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c14
-rw-r--r--drivers/net/wireless/hostap/hostap_ap.c2
-rw-r--r--drivers/net/wireless/hostap/hostap_proc.c2
-rw-r--r--drivers/net/wireless/libertas/cfg.c2
-rw-r--r--drivers/net/wireless/mwifiex/pcie.c34
-rw-r--r--drivers/net/wireless/mwifiex/usb.c12
-rw-r--r--drivers/net/wireless/mwifiex/wmm.c3
-rw-r--r--drivers/net/wireless/rt2x00/rt2500pci.c5
-rw-r--r--drivers/net/wireless/rt2x00/rt2500usb.c5
-rw-r--r--drivers/net/wireless/rt2x00/rt2800lib.c5
-rw-r--r--drivers/net/wireless/rtl818x/rtl8180/dev.c23
-rw-r--r--drivers/net/wireless/rtl818x/rtl8187/rtl8187.h10
-rw-r--r--drivers/net/wireless/rtlwifi/ps.c2
-rw-r--r--drivers/net/wireless/rtlwifi/rtl8192ce/hw.c18
-rw-r--r--drivers/net/wireless/ti/wl1251/rx.c2
-rw-r--r--drivers/s390/block/dasd_diag.c10
-rw-r--r--drivers/s390/block/dasd_eckd.c48
-rw-r--r--drivers/s390/block/dasd_fba.c26
-rw-r--r--drivers/s390/block/dcssblk.c21
-rw-r--r--drivers/s390/block/scm_blk.c8
-rw-r--r--drivers/s390/block/scm_blk_cluster.c4
-rw-r--r--drivers/s390/block/xpram.c19
-rw-r--r--drivers/scsi/libsas/sas_expander.c8
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_transport.c41
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_transport.c39
-rw-r--r--drivers/scsi/osd/osd_initiator.c2
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--drivers/scsi/sd_dif.c30
-rw-r--r--drivers/staging/lustre/lustre/llite/lloop.c26
-rw-r--r--drivers/staging/zram/zram_drv.c33
-rw-r--r--drivers/target/target_core_iblock.c2
-rw-r--r--fs/bio-integrity.c170
-rw-r--r--fs/bio.c502
-rw-r--r--fs/btrfs/check-integrity.c8
-rw-r--r--fs/btrfs/compression.c27
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent_io.c49
-rw-r--r--fs/btrfs/file-item.c19
-rw-r--r--fs/btrfs/inode.c37
-rw-r--r--fs/btrfs/raid56.c22
-rw-r--r--fs/btrfs/scrub.c12
-rw-r--r--fs/btrfs/volumes.c19
-rw-r--r--fs/buffer.c14
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/ext4/page-io.c8
-rw-r--r--fs/f2fs/data.c35
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/jfs/jfs_logmgr.c12
-rw-r--r--fs/jfs/jfs_metapage.c9
-rw-r--r--fs/logfs/dev_bdev.c38
-rw-r--r--fs/mpage.c19
-rw-r--r--fs/nfs/blocklayout/blocklayout.c43
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfsd/acl.h2
-rw-r--r--fs/nfsd/cache.h8
-rw-r--r--fs/nfsd/idmap.h4
-rw-r--r--fs/nfsd/netns.h1
-rw-r--r--fs/nfsd/nfs3xdr.c14
-rw-r--r--fs/nfsd/nfs4acl.c20
-rw-r--r--fs/nfsd/nfs4idmap.c50
-rw-r--r--fs/nfsd/nfs4proc.c57
-rw-r--r--fs/nfsd/nfs4state.c40
-rw-r--r--fs/nfsd/nfs4xdr.c178
-rw-r--r--fs/nfsd/nfscache.c36
-rw-r--r--fs/nfsd/nfssvc.c30
-rw-r--r--fs/nfsd/nfsxdr.c2
-rw-r--r--fs/nfsd/vfs.c40
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/nfsd/xdr3.h3
-rw-r--r--fs/nfsd/xdr4.h4
-rw-r--r--fs/nilfs2/segbuf.c3
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_buf.c4
-rw-r--r--include/linux/bio.h283
-rw-r--r--include/linux/blk-mq.h13
-rw-r--r--include/linux/blk_types.h24
-rw-r--r--include/linux/blkdev.h10
-rw-r--r--include/linux/ceph/messenger.h4
-rw-r--r--include/linux/cmdline-parser.h8
-rw-r--r--include/linux/dm-io.h4
-rw-r--r--include/linux/sunrpc/svc.h6
-rw-r--r--include/trace/events/bcache.h36
-rw-r--r--include/trace/events/block.h26
-rw-r--r--include/trace/events/f2fs.h4
-rw-r--r--include/uapi/linux/bcache.h3
-rw-r--r--include/uapi/linux/fd.h3
-rw-r--r--kernel/power/block_io.c2
-rw-r--r--kernel/trace/blktrace.c15
-rw-r--r--mm/bounce.c44
-rw-r--r--mm/page_io.c10
-rw-r--r--net/ceph/messenger.c43
-rw-r--r--net/mac80211/cfg.c44
-rw-r--r--net/mac80211/ht.c4
-rw-r--r--net/mac80211/ibss.c5
-rw-r--r--net/mac80211/ieee80211_i.h10
-rw-r--r--net/mac80211/iface.c27
-rw-r--r--net/mac80211/mlme.c24
-rw-r--r--net/mac80211/rx.c7
-rw-r--r--net/mac80211/sta_info.c66
-rw-r--r--net/mac80211/sta_info.h7
-rw-r--r--net/mac80211/tx.c17
-rw-r--r--net/mac80211/util.c48
-rw-r--r--net/mac80211/wme.c5
-rw-r--r--net/nfc/nci/core.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c17
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c82
-rw-r--r--net/sunrpc/cache.c4
-rw-r--r--net/sunrpc/netns.h1
-rw-r--r--net/sunrpc/svc.c25
-rw-r--r--net/sunrpc/xprtsock.c7
-rw-r--r--net/wireless/core.c17
-rw-r--r--net/wireless/core.h4
-rw-r--r--net/wireless/nl80211.c32
-rw-r--r--net/wireless/nl80211.h8
-rw-r--r--net/wireless/reg.c12
-rw-r--r--net/wireless/scan.c40
-rw-r--r--net/wireless/sme.c2
237 files changed, 5296 insertions, 5244 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 8df5e8e6dceb..2101e718670d 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -447,14 +447,13 @@ struct bio_vec {
447 * main unit of I/O for the block layer and lower layers (ie drivers) 447 * main unit of I/O for the block layer and lower layers (ie drivers)
448 */ 448 */
449struct bio { 449struct bio {
450 sector_t bi_sector;
451 struct bio *bi_next; /* request queue link */ 450 struct bio *bi_next; /* request queue link */
452 struct block_device *bi_bdev; /* target device */ 451 struct block_device *bi_bdev; /* target device */
453 unsigned long bi_flags; /* status, command, etc */ 452 unsigned long bi_flags; /* status, command, etc */
454 unsigned long bi_rw; /* low bits: r/w, high: priority */ 453 unsigned long bi_rw; /* low bits: r/w, high: priority */
455 454
456 unsigned int bi_vcnt; /* how may bio_vec's */ 455 unsigned int bi_vcnt; /* how may bio_vec's */
457 unsigned int bi_idx; /* current index into bio_vec array */ 456 struct bvec_iter bi_iter; /* current index into bio_vec array */
458 457
459 unsigned int bi_size; /* total size in bytes */ 458 unsigned int bi_size; /* total size in bytes */
460 unsigned short bi_phys_segments; /* segments after physaddr coalesce*/ 459 unsigned short bi_phys_segments; /* segments after physaddr coalesce*/
@@ -480,7 +479,7 @@ With this multipage bio design:
480- Code that traverses the req list can find all the segments of a bio 479- Code that traverses the req list can find all the segments of a bio
481 by using rq_for_each_segment. This handles the fact that a request 480 by using rq_for_each_segment. This handles the fact that a request
482 has multiple bios, each of which can have multiple segments. 481 has multiple bios, each of which can have multiple segments.
483- Drivers which can't process a large bio in one shot can use the bi_idx 482- Drivers which can't process a large bio in one shot can use the bi_iter
484 field to keep track of the next bio_vec entry to process. 483 field to keep track of the next bio_vec entry to process.
485 (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE) 484 (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
486 [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying 485 [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
@@ -589,7 +588,7 @@ driver should not modify these values. The block layer sets up the
589nr_sectors and current_nr_sectors fields (based on the corresponding 588nr_sectors and current_nr_sectors fields (based on the corresponding
590hard_xxx values and the number of bytes transferred) and updates it on 589hard_xxx values and the number of bytes transferred) and updates it on
591every transfer that invokes end_that_request_first. It does the same for the 590every transfer that invokes end_that_request_first. It does the same for the
592buffer, bio, bio->bi_idx fields too. 591buffer, bio, bio->bi_iter fields too.
593 592
594The buffer field is just a virtual address mapping of the current segment 593The buffer field is just a virtual address mapping of the current segment
595of the i/o buffer in cases where the buffer resides in low-memory. For high 594of the i/o buffer in cases where the buffer resides in low-memory. For high
diff --git a/Documentation/block/biovecs.txt b/Documentation/block/biovecs.txt
new file mode 100644
index 000000000000..74a32ad52f53
--- /dev/null
+++ b/Documentation/block/biovecs.txt
@@ -0,0 +1,111 @@
1
2Immutable biovecs and biovec iterators:
3=======================================
4
5Kent Overstreet <kmo@daterainc.com>
6
7As of 3.13, biovecs should never be modified after a bio has been submitted.
8Instead, we have a new struct bvec_iter which represents a range of a biovec -
9the iterator will be modified as the bio is completed, not the biovec.
10
11More specifically, old code that needed to partially complete a bio would
12update bi_sector and bi_size, and advance bi_idx to the next biovec. If it
13ended up partway through a biovec, it would increment bv_offset and decrement
14bv_len by the number of bytes completed in that biovec.
15
16In the new scheme of things, everything that must be mutated in order to
17partially complete a bio is segregated into struct bvec_iter: bi_sector,
18bi_size and bi_idx have been moved there; and instead of modifying bv_offset
19and bv_len, struct bvec_iter has bi_bvec_done, which represents the number of
20bytes completed in the current bvec.
21
22There are a bunch of new helper macros for hiding the gory details - in
23particular, presenting the illusion of partially completed biovecs so that
24normal code doesn't have to deal with bi_bvec_done.
25
26 * Driver code should no longer refer to biovecs directly; we now have
27 bio_iovec() and bio_iovec_iter() macros that return literal struct biovecs,
28 constructed from the raw biovecs but taking into account bi_bvec_done and
29 bi_size.
30
31 bio_for_each_segment() has been updated to take a bvec_iter argument
32 instead of an integer (that corresponded to bi_idx); for a lot of code the
33 conversion just required changing the types of the arguments to
34 bio_for_each_segment().
35
36 * Advancing a bvec_iter is done with bio_advance_iter(); bio_advance() is a
37 wrapper around bio_advance_iter() that operates on bio->bi_iter, and also
38 advances the bio integrity's iter if present.
39
40 There is a lower level advance function - bvec_iter_advance() - which takes
41 a pointer to a biovec, not a bio; this is used by the bio integrity code.
42
43What's all this get us?
44=======================
45
46Having a real iterator, and making biovecs immutable, has a number of
47advantages:
48
49 * Before, iterating over bios was very awkward when you weren't processing
50 exactly one bvec at a time - for example, bio_copy_data() in fs/bio.c,
51 which copies the contents of one bio into another. Because the biovecs
52 wouldn't necessarily be the same size, the old code was tricky convoluted -
53 it had to walk two different bios at the same time, keeping both bi_idx and
54 and offset into the current biovec for each.
55
56 The new code is much more straightforward - have a look. This sort of
57 pattern comes up in a lot of places; a lot of drivers were essentially open
58 coding bvec iterators before, and having common implementation considerably
59 simplifies a lot of code.
60
61 * Before, any code that might need to use the biovec after the bio had been
62 completed (perhaps to copy the data somewhere else, or perhaps to resubmit
63 it somewhere else if there was an error) had to save the entire bvec array
64 - again, this was being done in a fair number of places.
65
66 * Biovecs can be shared between multiple bios - a bvec iter can represent an
67 arbitrary range of an existing biovec, both starting and ending midway
68 through biovecs. This is what enables efficient splitting of arbitrary
69 bios. Note that this means we _only_ use bi_size to determine when we've
70 reached the end of a bio, not bi_vcnt - and the bio_iovec() macro takes
71 bi_size into account when constructing biovecs.
72
73 * Splitting bios is now much simpler. The old bio_split() didn't even work on
74 bios with more than a single bvec! Now, we can efficiently split arbitrary
75 size bios - because the new bio can share the old bio's biovec.
76
77 Care must be taken to ensure the biovec isn't freed while the split bio is
78 still using it, in case the original bio completes first, though. Using
79 bio_chain() when splitting bios helps with this.
80
81 * Submitting partially completed bios is now perfectly fine - this comes up
82 occasionally in stacking block drivers and various code (e.g. md and
83 bcache) had some ugly workarounds for this.
84
85 It used to be the case that submitting a partially completed bio would work
86 fine to _most_ devices, but since accessing the raw bvec array was the
87 norm, not all drivers would respect bi_idx and those would break. Now,
88 since all drivers _must_ go through the bvec iterator - and have been
89 audited to make sure they are - submitting partially completed bios is
90 perfectly fine.
91
92Other implications:
93===================
94
95 * Almost all usage of bi_idx is now incorrect and has been removed; instead,
96 where previously you would have used bi_idx you'd now use a bvec_iter,
97 probably passing it to one of the helper macros.
98
99 I.e. instead of using bio_iovec_idx() (or bio->bi_iovec[bio->bi_idx]), you
100 now use bio_iter_iovec(), which takes a bvec_iter and returns a
101 literal struct bio_vec - constructed on the fly from the raw biovec but
102 taking into account bi_bvec_done (and bi_size).
103
104 * bi_vcnt can't be trusted or relied upon by driver code - i.e. anything that
105 doesn't actually own the bio. The reason is twofold: firstly, it's not
106 actually needed for iterating over the bio anymore - we only use bi_size.
107 Secondly, when cloning a bio and reusing (a portion of) the original bio's
108 biovec, in order to calculate bi_vcnt for the new bio we'd have to iterate
109 over all the biovecs in the new bio - which is silly as it's not needed.
110
111 So, don't use bi_vcnt anymore.
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index 01c2db769791..b930ad087780 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -5,11 +5,11 @@ Server support for minorversion 1 can be controlled using the
5by reading this file will contain either "+4.1" or "-4.1" 5by reading this file will contain either "+4.1" or "-4.1"
6correspondingly. 6correspondingly.
7 7
8Currently, server support for minorversion 1 is disabled by default. 8Currently, server support for minorversion 1 is enabled by default.
9It can be enabled at run time by writing the string "+4.1" to 9It can be disabled at run time by writing the string "-4.1" to
10the /proc/fs/nfsd/versions control file. Note that to write this 10the /proc/fs/nfsd/versions control file. Note that to write this
11control file, the nfsd service must be taken down. Use your user-mode 11control file, the nfsd service must be taken down. You can use rpc.nfsd
12nfs-utils to set this up; see rpc.nfsd(8) 12for this; see rpc.nfsd(8).
13 13
14(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and 14(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
15"-4", respectively. Therefore, code meant to work on both new and old 15"-4", respectively. Therefore, code meant to work on both new and old
@@ -29,29 +29,6 @@ are still under development out of tree.
29See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design 29See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
30for more information. 30for more information.
31 31
32The current implementation is intended for developers only: while it
33does support ordinary file operations on clients we have tested against
34(including the linux client), it is incomplete in ways which may limit
35features unexpectedly, cause known bugs in rare cases, or cause
36interoperability problems with future clients. Known issues:
37
38 - gss support is questionable: currently mounts with kerberos
39 from a linux client are possible, but we aren't really
40 conformant with the spec (for example, we don't use kerberos
41 on the backchannel correctly).
42 - We do not support SSV, which provides security for shared
43 client-server state (thus preventing unauthorized tampering
44 with locks and opens, for example). It is mandatory for
45 servers to support this, though no clients use it yet.
46
47In addition, some limitations are inherited from the current NFSv4
48implementation:
49
50 - Incomplete delegation enforcement: if a file is renamed or
51 unlinked by a local process, a client holding a delegation may
52 continue to indefinitely allow opens of the file under the old
53 name.
54
55The table below, taken from the NFSv4.1 document, lists 32The table below, taken from the NFSv4.1 document, lists
56the operations that are mandatory to implement (REQ), optional 33the operations that are mandatory to implement (REQ), optional
57(OPT), and NFSv4.0 operations that are required not to implement (MNI) 34(OPT), and NFSv4.0 operations that are required not to implement (MNI)
@@ -169,6 +146,16 @@ NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
169 146
170Implementation notes: 147Implementation notes:
171 148
149SSV:
150* The spec claims this is mandatory, but we don't actually know of any
151 implementations, so we're ignoring it for now. The server returns
152 NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof.
153
154GSS on the backchannel:
155* Again, theoretically required but not widely implemented (in
156 particular, the current Linux client doesn't request it). We return
157 NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION.
158
172DELEGPURGE: 159DELEGPURGE:
173* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or 160* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
174 CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that 161 CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
@@ -176,7 +163,6 @@ DELEGPURGE:
176 now. 163 now.
177 164
178EXCHANGE_ID: 165EXCHANGE_ID:
179* only SP4_NONE state protection supported
180* implementation ids are ignored 166* implementation ids are ignored
181 167
182CREATE_SESSION: 168CREATE_SESSION:
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 0721858fbd1e..2d75ae246167 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -62,17 +62,18 @@ struct nfhd_device {
62static void nfhd_make_request(struct request_queue *queue, struct bio *bio) 62static void nfhd_make_request(struct request_queue *queue, struct bio *bio)
63{ 63{
64 struct nfhd_device *dev = queue->queuedata; 64 struct nfhd_device *dev = queue->queuedata;
65 struct bio_vec *bvec; 65 struct bio_vec bvec;
66 int i, dir, len, shift; 66 struct bvec_iter iter;
67 sector_t sec = bio->bi_sector; 67 int dir, len, shift;
68 sector_t sec = bio->bi_iter.bi_sector;
68 69
69 dir = bio_data_dir(bio); 70 dir = bio_data_dir(bio);
70 shift = dev->bshift; 71 shift = dev->bshift;
71 bio_for_each_segment(bvec, bio, i) { 72 bio_for_each_segment(bvec, bio, iter) {
72 len = bvec->bv_len; 73 len = bvec.bv_len;
73 len >>= 9; 74 len >>= 9;
74 nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift, 75 nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift,
75 bvec_to_phys(bvec)); 76 bvec_to_phys(&bvec));
76 sec += len; 77 sec += len;
77 } 78 }
78 bio_endio(bio, 0); 79 bio_endio(bio, 0);
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 1c16141c031c..47b6b9f81d43 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -109,27 +109,28 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
109 struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data; 109 struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
110 unsigned long phys_mem, phys_end; 110 unsigned long phys_mem, phys_end;
111 void *user_mem; 111 void *user_mem;
112 struct bio_vec *vec; 112 struct bio_vec vec;
113 unsigned int transfered; 113 unsigned int transfered;
114 unsigned short idx; 114 struct bvec_iter iter;
115 115
116 phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT); 116 phys_mem = bank->io_addr + (bio->bi_iter.bi_sector <<
117 AXON_RAM_SECTOR_SHIFT);
117 phys_end = bank->io_addr + bank->size; 118 phys_end = bank->io_addr + bank->size;
118 transfered = 0; 119 transfered = 0;
119 bio_for_each_segment(vec, bio, idx) { 120 bio_for_each_segment(vec, bio, iter) {
120 if (unlikely(phys_mem + vec->bv_len > phys_end)) { 121 if (unlikely(phys_mem + vec.bv_len > phys_end)) {
121 bio_io_error(bio); 122 bio_io_error(bio);
122 return; 123 return;
123 } 124 }
124 125
125 user_mem = page_address(vec->bv_page) + vec->bv_offset; 126 user_mem = page_address(vec.bv_page) + vec.bv_offset;
126 if (bio_data_dir(bio) == READ) 127 if (bio_data_dir(bio) == READ)
127 memcpy(user_mem, (void *) phys_mem, vec->bv_len); 128 memcpy(user_mem, (void *) phys_mem, vec.bv_len);
128 else 129 else
129 memcpy((void *) phys_mem, user_mem, vec->bv_len); 130 memcpy((void *) phys_mem, user_mem, vec.bv_len);
130 131
131 phys_mem += vec->bv_len; 132 phys_mem += vec.bv_len;
132 transfered += vec->bv_len; 133 transfered += vec.bv_len;
133 } 134 }
134 bio_endio(bio, 0); 135 bio_endio(bio, 0);
135} 136}
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 8c6e819cd8ed..48eebacdf5fe 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -103,18 +103,18 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector,
103 103
104static int simdisk_xfer_bio(struct simdisk *dev, struct bio *bio) 104static int simdisk_xfer_bio(struct simdisk *dev, struct bio *bio)
105{ 105{
106 int i; 106 struct bio_vec bvec;
107 struct bio_vec *bvec; 107 struct bvec_iter iter;
108 sector_t sector = bio->bi_sector; 108 sector_t sector = bio->bi_iter.bi_sector;
109 109
110 bio_for_each_segment(bvec, bio, i) { 110 bio_for_each_segment(bvec, bio, iter) {
111 char *buffer = __bio_kmap_atomic(bio, i); 111 char *buffer = __bio_kmap_atomic(bio, iter);
112 unsigned len = bvec->bv_len >> SECTOR_SHIFT; 112 unsigned len = bvec.bv_len >> SECTOR_SHIFT;
113 113
114 simdisk_transfer(dev, sector, len, buffer, 114 simdisk_transfer(dev, sector, len, buffer,
115 bio_data_dir(bio) == WRITE); 115 bio_data_dir(bio) == WRITE);
116 sector += len; 116 sector += len;
117 __bio_kunmap_atomic(bio); 117 __bio_kunmap_atomic(buffer);
118 } 118 }
119 return 0; 119 return 0;
120} 120}
diff --git a/block/blk-core.c b/block/blk-core.c
index 8bdd0121212a..c00e0bdeab4a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -38,6 +38,7 @@
38 38
39#include "blk.h" 39#include "blk.h"
40#include "blk-cgroup.h" 40#include "blk-cgroup.h"
41#include "blk-mq.h"
41 42
42EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); 43EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
43EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 44EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -130,7 +131,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
130 bio_advance(bio, nbytes); 131 bio_advance(bio, nbytes);
131 132
132 /* don't actually finish bio if it's part of flush sequence */ 133 /* don't actually finish bio if it's part of flush sequence */
133 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) 134 if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
134 bio_endio(bio, error); 135 bio_endio(bio, error);
135} 136}
136 137
@@ -245,7 +246,16 @@ EXPORT_SYMBOL(blk_stop_queue);
245void blk_sync_queue(struct request_queue *q) 246void blk_sync_queue(struct request_queue *q)
246{ 247{
247 del_timer_sync(&q->timeout); 248 del_timer_sync(&q->timeout);
248 cancel_delayed_work_sync(&q->delay_work); 249
250 if (q->mq_ops) {
251 struct blk_mq_hw_ctx *hctx;
252 int i;
253
254 queue_for_each_hw_ctx(q, hctx, i)
255 cancel_delayed_work_sync(&hctx->delayed_work);
256 } else {
257 cancel_delayed_work_sync(&q->delay_work);
258 }
249} 259}
250EXPORT_SYMBOL(blk_sync_queue); 260EXPORT_SYMBOL(blk_sync_queue);
251 261
@@ -497,8 +507,13 @@ void blk_cleanup_queue(struct request_queue *q)
497 * Drain all requests queued before DYING marking. Set DEAD flag to 507 * Drain all requests queued before DYING marking. Set DEAD flag to
498 * prevent that q->request_fn() gets invoked after draining finished. 508 * prevent that q->request_fn() gets invoked after draining finished.
499 */ 509 */
500 spin_lock_irq(lock); 510 if (q->mq_ops) {
501 __blk_drain_queue(q, true); 511 blk_mq_drain_queue(q);
512 spin_lock_irq(lock);
513 } else {
514 spin_lock_irq(lock);
515 __blk_drain_queue(q, true);
516 }
502 queue_flag_set(QUEUE_FLAG_DEAD, q); 517 queue_flag_set(QUEUE_FLAG_DEAD, q);
503 spin_unlock_irq(lock); 518 spin_unlock_irq(lock);
504 519
@@ -1326,7 +1341,7 @@ void blk_add_request_payload(struct request *rq, struct page *page,
1326 bio->bi_io_vec->bv_offset = 0; 1341 bio->bi_io_vec->bv_offset = 0;
1327 bio->bi_io_vec->bv_len = len; 1342 bio->bi_io_vec->bv_len = len;
1328 1343
1329 bio->bi_size = len; 1344 bio->bi_iter.bi_size = len;
1330 bio->bi_vcnt = 1; 1345 bio->bi_vcnt = 1;
1331 bio->bi_phys_segments = 1; 1346 bio->bi_phys_segments = 1;
1332 1347
@@ -1351,7 +1366,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1351 1366
1352 req->biotail->bi_next = bio; 1367 req->biotail->bi_next = bio;
1353 req->biotail = bio; 1368 req->biotail = bio;
1354 req->__data_len += bio->bi_size; 1369 req->__data_len += bio->bi_iter.bi_size;
1355 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); 1370 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1356 1371
1357 blk_account_io_start(req, false); 1372 blk_account_io_start(req, false);
@@ -1380,8 +1395,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
1380 * not touch req->buffer either... 1395 * not touch req->buffer either...
1381 */ 1396 */
1382 req->buffer = bio_data(bio); 1397 req->buffer = bio_data(bio);
1383 req->__sector = bio->bi_sector; 1398 req->__sector = bio->bi_iter.bi_sector;
1384 req->__data_len += bio->bi_size; 1399 req->__data_len += bio->bi_iter.bi_size;
1385 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); 1400 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1386 1401
1387 blk_account_io_start(req, false); 1402 blk_account_io_start(req, false);
@@ -1459,7 +1474,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1459 req->cmd_flags |= REQ_FAILFAST_MASK; 1474 req->cmd_flags |= REQ_FAILFAST_MASK;
1460 1475
1461 req->errors = 0; 1476 req->errors = 0;
1462 req->__sector = bio->bi_sector; 1477 req->__sector = bio->bi_iter.bi_sector;
1463 req->ioprio = bio_prio(bio); 1478 req->ioprio = bio_prio(bio);
1464 blk_rq_bio_prep(req->q, req, bio); 1479 blk_rq_bio_prep(req->q, req, bio);
1465} 1480}
@@ -1583,12 +1598,12 @@ static inline void blk_partition_remap(struct bio *bio)
1583 if (bio_sectors(bio) && bdev != bdev->bd_contains) { 1598 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1584 struct hd_struct *p = bdev->bd_part; 1599 struct hd_struct *p = bdev->bd_part;
1585 1600
1586 bio->bi_sector += p->start_sect; 1601 bio->bi_iter.bi_sector += p->start_sect;
1587 bio->bi_bdev = bdev->bd_contains; 1602 bio->bi_bdev = bdev->bd_contains;
1588 1603
1589 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, 1604 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1590 bdev->bd_dev, 1605 bdev->bd_dev,
1591 bio->bi_sector - p->start_sect); 1606 bio->bi_iter.bi_sector - p->start_sect);
1592 } 1607 }
1593} 1608}
1594 1609
@@ -1654,7 +1669,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1654 /* Test device or partition size, when known. */ 1669 /* Test device or partition size, when known. */
1655 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; 1670 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1656 if (maxsector) { 1671 if (maxsector) {
1657 sector_t sector = bio->bi_sector; 1672 sector_t sector = bio->bi_iter.bi_sector;
1658 1673
1659 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 1674 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1660 /* 1675 /*
@@ -1690,7 +1705,7 @@ generic_make_request_checks(struct bio *bio)
1690 "generic_make_request: Trying to access " 1705 "generic_make_request: Trying to access "
1691 "nonexistent block-device %s (%Lu)\n", 1706 "nonexistent block-device %s (%Lu)\n",
1692 bdevname(bio->bi_bdev, b), 1707 bdevname(bio->bi_bdev, b),
1693 (long long) bio->bi_sector); 1708 (long long) bio->bi_iter.bi_sector);
1694 goto end_io; 1709 goto end_io;
1695 } 1710 }
1696 1711
@@ -1704,9 +1719,9 @@ generic_make_request_checks(struct bio *bio)
1704 } 1719 }
1705 1720
1706 part = bio->bi_bdev->bd_part; 1721 part = bio->bi_bdev->bd_part;
1707 if (should_fail_request(part, bio->bi_size) || 1722 if (should_fail_request(part, bio->bi_iter.bi_size) ||
1708 should_fail_request(&part_to_disk(part)->part0, 1723 should_fail_request(&part_to_disk(part)->part0,
1709 bio->bi_size)) 1724 bio->bi_iter.bi_size))
1710 goto end_io; 1725 goto end_io;
1711 1726
1712 /* 1727 /*
@@ -1865,7 +1880,7 @@ void submit_bio(int rw, struct bio *bio)
1865 if (rw & WRITE) { 1880 if (rw & WRITE) {
1866 count_vm_events(PGPGOUT, count); 1881 count_vm_events(PGPGOUT, count);
1867 } else { 1882 } else {
1868 task_io_account_read(bio->bi_size); 1883 task_io_account_read(bio->bi_iter.bi_size);
1869 count_vm_events(PGPGIN, count); 1884 count_vm_events(PGPGIN, count);
1870 } 1885 }
1871 1886
@@ -1874,7 +1889,7 @@ void submit_bio(int rw, struct bio *bio)
1874 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", 1889 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
1875 current->comm, task_pid_nr(current), 1890 current->comm, task_pid_nr(current),
1876 (rw & WRITE) ? "WRITE" : "READ", 1891 (rw & WRITE) ? "WRITE" : "READ",
1877 (unsigned long long)bio->bi_sector, 1892 (unsigned long long)bio->bi_iter.bi_sector,
1878 bdevname(bio->bi_bdev, b), 1893 bdevname(bio->bi_bdev, b),
1879 count); 1894 count);
1880 } 1895 }
@@ -2007,7 +2022,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
2007 for (bio = rq->bio; bio; bio = bio->bi_next) { 2022 for (bio = rq->bio; bio; bio = bio->bi_next) {
2008 if ((bio->bi_rw & ff) != ff) 2023 if ((bio->bi_rw & ff) != ff)
2009 break; 2024 break;
2010 bytes += bio->bi_size; 2025 bytes += bio->bi_iter.bi_size;
2011 } 2026 }
2012 2027
2013 /* this could lead to infinite loop */ 2028 /* this could lead to infinite loop */
@@ -2378,9 +2393,9 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2378 total_bytes = 0; 2393 total_bytes = 0;
2379 while (req->bio) { 2394 while (req->bio) {
2380 struct bio *bio = req->bio; 2395 struct bio *bio = req->bio;
2381 unsigned bio_bytes = min(bio->bi_size, nr_bytes); 2396 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
2382 2397
2383 if (bio_bytes == bio->bi_size) 2398 if (bio_bytes == bio->bi_iter.bi_size)
2384 req->bio = bio->bi_next; 2399 req->bio = bio->bi_next;
2385 2400
2386 req_bio_endio(req, bio, bio_bytes, error); 2401 req_bio_endio(req, bio, bio_bytes, error);
@@ -2728,7 +2743,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2728 rq->nr_phys_segments = bio_phys_segments(q, bio); 2743 rq->nr_phys_segments = bio_phys_segments(q, bio);
2729 rq->buffer = bio_data(bio); 2744 rq->buffer = bio_data(bio);
2730 } 2745 }
2731 rq->__data_len = bio->bi_size; 2746 rq->__data_len = bio->bi_iter.bi_size;
2732 rq->bio = rq->biotail = bio; 2747 rq->bio = rq->biotail = bio;
2733 2748
2734 if (bio->bi_bdev) 2749 if (bio->bi_bdev)
@@ -2746,10 +2761,10 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2746void rq_flush_dcache_pages(struct request *rq) 2761void rq_flush_dcache_pages(struct request *rq)
2747{ 2762{
2748 struct req_iterator iter; 2763 struct req_iterator iter;
2749 struct bio_vec *bvec; 2764 struct bio_vec bvec;
2750 2765
2751 rq_for_each_segment(bvec, rq, iter) 2766 rq_for_each_segment(bvec, rq, iter)
2752 flush_dcache_page(bvec->bv_page); 2767 flush_dcache_page(bvec.bv_page);
2753} 2768}
2754EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); 2769EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2755#endif 2770#endif
diff --git a/block/blk-exec.c b/block/blk-exec.c
index c3edf9dff566..bbfc072a79c2 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -60,6 +60,10 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
60 rq->rq_disk = bd_disk; 60 rq->rq_disk = bd_disk;
61 rq->end_io = done; 61 rq->end_io = done;
62 62
63 /*
64 * don't check dying flag for MQ because the request won't
65 * be resued after dying flag is set
66 */
63 if (q->mq_ops) { 67 if (q->mq_ops) {
64 blk_mq_insert_request(q, rq, true); 68 blk_mq_insert_request(q, rq, true);
65 return; 69 return;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index fb6f3c0ffa49..9288aaf35c21 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -548,7 +548,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
548 * copied from blk_rq_pos(rq). 548 * copied from blk_rq_pos(rq).
549 */ 549 */
550 if (error_sector) 550 if (error_sector)
551 *error_sector = bio->bi_sector; 551 *error_sector = bio->bi_iter.bi_sector;
552 552
553 bio_put(bio); 553 bio_put(bio);
554 return ret; 554 return ret;
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 03cf7179e8ef..7fbab84399e6 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -43,30 +43,32 @@ static const char *bi_unsupported_name = "unsupported";
43 */ 43 */
44int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) 44int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
45{ 45{
46 struct bio_vec *iv, *ivprv = NULL; 46 struct bio_vec iv, ivprv = { NULL };
47 unsigned int segments = 0; 47 unsigned int segments = 0;
48 unsigned int seg_size = 0; 48 unsigned int seg_size = 0;
49 unsigned int i = 0; 49 struct bvec_iter iter;
50 int prev = 0;
50 51
51 bio_for_each_integrity_vec(iv, bio, i) { 52 bio_for_each_integrity_vec(iv, bio, iter) {
52 53
53 if (ivprv) { 54 if (prev) {
54 if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) 55 if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
55 goto new_segment; 56 goto new_segment;
56 57
57 if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) 58 if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
58 goto new_segment; 59 goto new_segment;
59 60
60 if (seg_size + iv->bv_len > queue_max_segment_size(q)) 61 if (seg_size + iv.bv_len > queue_max_segment_size(q))
61 goto new_segment; 62 goto new_segment;
62 63
63 seg_size += iv->bv_len; 64 seg_size += iv.bv_len;
64 } else { 65 } else {
65new_segment: 66new_segment:
66 segments++; 67 segments++;
67 seg_size = iv->bv_len; 68 seg_size = iv.bv_len;
68 } 69 }
69 70
71 prev = 1;
70 ivprv = iv; 72 ivprv = iv;
71 } 73 }
72 74
@@ -87,24 +89,25 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg);
87int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, 89int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
88 struct scatterlist *sglist) 90 struct scatterlist *sglist)
89{ 91{
90 struct bio_vec *iv, *ivprv = NULL; 92 struct bio_vec iv, ivprv = { NULL };
91 struct scatterlist *sg = NULL; 93 struct scatterlist *sg = NULL;
92 unsigned int segments = 0; 94 unsigned int segments = 0;
93 unsigned int i = 0; 95 struct bvec_iter iter;
96 int prev = 0;
94 97
95 bio_for_each_integrity_vec(iv, bio, i) { 98 bio_for_each_integrity_vec(iv, bio, iter) {
96 99
97 if (ivprv) { 100 if (prev) {
98 if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) 101 if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
99 goto new_segment; 102 goto new_segment;
100 103
101 if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) 104 if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
102 goto new_segment; 105 goto new_segment;
103 106
104 if (sg->length + iv->bv_len > queue_max_segment_size(q)) 107 if (sg->length + iv.bv_len > queue_max_segment_size(q))
105 goto new_segment; 108 goto new_segment;
106 109
107 sg->length += iv->bv_len; 110 sg->length += iv.bv_len;
108 } else { 111 } else {
109new_segment: 112new_segment:
110 if (!sg) 113 if (!sg)
@@ -114,10 +117,11 @@ new_segment:
114 sg = sg_next(sg); 117 sg = sg_next(sg);
115 } 118 }
116 119
117 sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset); 120 sg_set_page(sg, iv.bv_page, iv.bv_len, iv.bv_offset);
118 segments++; 121 segments++;
119 } 122 }
120 123
124 prev = 1;
121 ivprv = iv; 125 ivprv = iv;
122 } 126 }
123 127
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9b5b561cb928..2da76c999ef3 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -108,12 +108,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
108 req_sects = end_sect - sector; 108 req_sects = end_sect - sector;
109 } 109 }
110 110
111 bio->bi_sector = sector; 111 bio->bi_iter.bi_sector = sector;
112 bio->bi_end_io = bio_batch_end_io; 112 bio->bi_end_io = bio_batch_end_io;
113 bio->bi_bdev = bdev; 113 bio->bi_bdev = bdev;
114 bio->bi_private = &bb; 114 bio->bi_private = &bb;
115 115
116 bio->bi_size = req_sects << 9; 116 bio->bi_iter.bi_size = req_sects << 9;
117 nr_sects -= req_sects; 117 nr_sects -= req_sects;
118 sector = end_sect; 118 sector = end_sect;
119 119
@@ -174,7 +174,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
174 break; 174 break;
175 } 175 }
176 176
177 bio->bi_sector = sector; 177 bio->bi_iter.bi_sector = sector;
178 bio->bi_end_io = bio_batch_end_io; 178 bio->bi_end_io = bio_batch_end_io;
179 bio->bi_bdev = bdev; 179 bio->bi_bdev = bdev;
180 bio->bi_private = &bb; 180 bio->bi_private = &bb;
@@ -184,11 +184,11 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
184 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); 184 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
185 185
186 if (nr_sects > max_write_same_sectors) { 186 if (nr_sects > max_write_same_sectors) {
187 bio->bi_size = max_write_same_sectors << 9; 187 bio->bi_iter.bi_size = max_write_same_sectors << 9;
188 nr_sects -= max_write_same_sectors; 188 nr_sects -= max_write_same_sectors;
189 sector += max_write_same_sectors; 189 sector += max_write_same_sectors;
190 } else { 190 } else {
191 bio->bi_size = nr_sects << 9; 191 bio->bi_iter.bi_size = nr_sects << 9;
192 nr_sects = 0; 192 nr_sects = 0;
193 } 193 }
194 194
@@ -240,7 +240,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
240 break; 240 break;
241 } 241 }
242 242
243 bio->bi_sector = sector; 243 bio->bi_iter.bi_sector = sector;
244 bio->bi_bdev = bdev; 244 bio->bi_bdev = bdev;
245 bio->bi_end_io = bio_batch_end_io; 245 bio->bi_end_io = bio_batch_end_io;
246 bio->bi_private = &bb; 246 bio->bi_private = &bb;
diff --git a/block/blk-map.c b/block/blk-map.c
index 623e1cd4cffe..ae4ae1047fd9 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -20,7 +20,7 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
20 rq->biotail->bi_next = bio; 20 rq->biotail->bi_next = bio;
21 rq->biotail = bio; 21 rq->biotail = bio;
22 22
23 rq->__data_len += bio->bi_size; 23 rq->__data_len += bio->bi_iter.bi_size;
24 } 24 }
25 return 0; 25 return 0;
26} 26}
@@ -76,7 +76,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
76 76
77 ret = blk_rq_append_bio(q, rq, bio); 77 ret = blk_rq_append_bio(q, rq, bio);
78 if (!ret) 78 if (!ret)
79 return bio->bi_size; 79 return bio->bi_iter.bi_size;
80 80
81 /* if it was boucned we must call the end io function */ 81 /* if it was boucned we must call the end io function */
82 bio_endio(bio, 0); 82 bio_endio(bio, 0);
@@ -220,7 +220,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
220 if (IS_ERR(bio)) 220 if (IS_ERR(bio))
221 return PTR_ERR(bio); 221 return PTR_ERR(bio);
222 222
223 if (bio->bi_size != len) { 223 if (bio->bi_iter.bi_size != len) {
224 /* 224 /*
225 * Grab an extra reference to this bio, as bio_unmap_user() 225 * Grab an extra reference to this bio, as bio_unmap_user()
226 * expects to be able to drop it twice as it happens on the 226 * expects to be able to drop it twice as it happens on the
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1ffc58977835..8f8adaa95466 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -12,10 +12,11 @@
12static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 12static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
13 struct bio *bio) 13 struct bio *bio)
14{ 14{
15 struct bio_vec *bv, *bvprv = NULL; 15 struct bio_vec bv, bvprv = { NULL };
16 int cluster, i, high, highprv = 1; 16 int cluster, high, highprv = 1;
17 unsigned int seg_size, nr_phys_segs; 17 unsigned int seg_size, nr_phys_segs;
18 struct bio *fbio, *bbio; 18 struct bio *fbio, *bbio;
19 struct bvec_iter iter;
19 20
20 if (!bio) 21 if (!bio)
21 return 0; 22 return 0;
@@ -25,25 +26,23 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
25 seg_size = 0; 26 seg_size = 0;
26 nr_phys_segs = 0; 27 nr_phys_segs = 0;
27 for_each_bio(bio) { 28 for_each_bio(bio) {
28 bio_for_each_segment(bv, bio, i) { 29 bio_for_each_segment(bv, bio, iter) {
29 /* 30 /*
30 * the trick here is making sure that a high page is 31 * the trick here is making sure that a high page is
31 * never considered part of another segment, since that 32 * never considered part of another segment, since that
32 * might change with the bounce page. 33 * might change with the bounce page.
33 */ 34 */
34 high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q); 35 high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q);
35 if (high || highprv) 36 if (!high && !highprv && cluster) {
36 goto new_segment; 37 if (seg_size + bv.bv_len
37 if (cluster) {
38 if (seg_size + bv->bv_len
39 > queue_max_segment_size(q)) 38 > queue_max_segment_size(q))
40 goto new_segment; 39 goto new_segment;
41 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) 40 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
42 goto new_segment; 41 goto new_segment;
43 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) 42 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
44 goto new_segment; 43 goto new_segment;
45 44
46 seg_size += bv->bv_len; 45 seg_size += bv.bv_len;
47 bvprv = bv; 46 bvprv = bv;
48 continue; 47 continue;
49 } 48 }
@@ -54,7 +53,7 @@ new_segment:
54 53
55 nr_phys_segs++; 54 nr_phys_segs++;
56 bvprv = bv; 55 bvprv = bv;
57 seg_size = bv->bv_len; 56 seg_size = bv.bv_len;
58 highprv = high; 57 highprv = high;
59 } 58 }
60 bbio = bio; 59 bbio = bio;
@@ -87,6 +86,9 @@ EXPORT_SYMBOL(blk_recount_segments);
87static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 86static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
88 struct bio *nxt) 87 struct bio *nxt)
89{ 88{
89 struct bio_vec end_bv = { NULL }, nxt_bv;
90 struct bvec_iter iter;
91
90 if (!blk_queue_cluster(q)) 92 if (!blk_queue_cluster(q))
91 return 0; 93 return 0;
92 94
@@ -97,34 +99,40 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
97 if (!bio_has_data(bio)) 99 if (!bio_has_data(bio))
98 return 1; 100 return 1;
99 101
100 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) 102 bio_for_each_segment(end_bv, bio, iter)
103 if (end_bv.bv_len == iter.bi_size)
104 break;
105
106 nxt_bv = bio_iovec(nxt);
107
108 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
101 return 0; 109 return 0;
102 110
103 /* 111 /*
104 * bio and nxt are contiguous in memory; check if the queue allows 112 * bio and nxt are contiguous in memory; check if the queue allows
105 * these two to be merged into one 113 * these two to be merged into one
106 */ 114 */
107 if (BIO_SEG_BOUNDARY(q, bio, nxt)) 115 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv))
108 return 1; 116 return 1;
109 117
110 return 0; 118 return 0;
111} 119}
112 120
113static void 121static inline void
114__blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 122__blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
115 struct scatterlist *sglist, struct bio_vec **bvprv, 123 struct scatterlist *sglist, struct bio_vec *bvprv,
116 struct scatterlist **sg, int *nsegs, int *cluster) 124 struct scatterlist **sg, int *nsegs, int *cluster)
117{ 125{
118 126
119 int nbytes = bvec->bv_len; 127 int nbytes = bvec->bv_len;
120 128
121 if (*bvprv && *cluster) { 129 if (*sg && *cluster) {
122 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 130 if ((*sg)->length + nbytes > queue_max_segment_size(q))
123 goto new_segment; 131 goto new_segment;
124 132
125 if (!BIOVEC_PHYS_MERGEABLE(*bvprv, bvec)) 133 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
126 goto new_segment; 134 goto new_segment;
127 if (!BIOVEC_SEG_BOUNDARY(q, *bvprv, bvec)) 135 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
128 goto new_segment; 136 goto new_segment;
129 137
130 (*sg)->length += nbytes; 138 (*sg)->length += nbytes;
@@ -150,7 +158,7 @@ new_segment:
150 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 158 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset);
151 (*nsegs)++; 159 (*nsegs)++;
152 } 160 }
153 *bvprv = bvec; 161 *bvprv = *bvec;
154} 162}
155 163
156/* 164/*
@@ -160,7 +168,7 @@ new_segment:
160int blk_rq_map_sg(struct request_queue *q, struct request *rq, 168int blk_rq_map_sg(struct request_queue *q, struct request *rq,
161 struct scatterlist *sglist) 169 struct scatterlist *sglist)
162{ 170{
163 struct bio_vec *bvec, *bvprv; 171 struct bio_vec bvec, bvprv = { NULL };
164 struct req_iterator iter; 172 struct req_iterator iter;
165 struct scatterlist *sg; 173 struct scatterlist *sg;
166 int nsegs, cluster; 174 int nsegs, cluster;
@@ -171,10 +179,9 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
171 /* 179 /*
172 * for each bio in rq 180 * for each bio in rq
173 */ 181 */
174 bvprv = NULL;
175 sg = NULL; 182 sg = NULL;
176 rq_for_each_segment(bvec, rq, iter) { 183 rq_for_each_segment(bvec, rq, iter) {
177 __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg, 184 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
178 &nsegs, &cluster); 185 &nsegs, &cluster);
179 } /* segments in rq */ 186 } /* segments in rq */
180 187
@@ -223,18 +230,17 @@ EXPORT_SYMBOL(blk_rq_map_sg);
223int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 230int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
224 struct scatterlist *sglist) 231 struct scatterlist *sglist)
225{ 232{
226 struct bio_vec *bvec, *bvprv; 233 struct bio_vec bvec, bvprv = { NULL };
227 struct scatterlist *sg; 234 struct scatterlist *sg;
228 int nsegs, cluster; 235 int nsegs, cluster;
229 unsigned long i; 236 struct bvec_iter iter;
230 237
231 nsegs = 0; 238 nsegs = 0;
232 cluster = blk_queue_cluster(q); 239 cluster = blk_queue_cluster(q);
233 240
234 bvprv = NULL;
235 sg = NULL; 241 sg = NULL;
236 bio_for_each_segment(bvec, bio, i) { 242 bio_for_each_segment(bvec, bio, iter) {
237 __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg, 243 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
238 &nsegs, &cluster); 244 &nsegs, &cluster);
239 } /* segments in bio */ 245 } /* segments in bio */
240 246
@@ -543,9 +549,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
543 549
544int blk_try_merge(struct request *rq, struct bio *bio) 550int blk_try_merge(struct request *rq, struct bio *bio)
545{ 551{
546 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector) 552 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
547 return ELEVATOR_BACK_MERGE; 553 return ELEVATOR_BACK_MERGE;
548 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector) 554 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
549 return ELEVATOR_FRONT_MERGE; 555 return ELEVATOR_FRONT_MERGE;
550 return ELEVATOR_NO_MERGE; 556 return ELEVATOR_NO_MERGE;
551} 557}
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
index 0045ace9bdf0..3146befb56aa 100644
--- a/block/blk-mq-cpu.c
+++ b/block/blk-mq-cpu.c
@@ -28,36 +28,6 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
28 return NOTIFY_OK; 28 return NOTIFY_OK;
29} 29}
30 30
31static void blk_mq_cpu_notify(void *data, unsigned long action,
32 unsigned int cpu)
33{
34 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
35 /*
36 * If the CPU goes away, ensure that we run any pending
37 * completions.
38 */
39 struct llist_node *node;
40 struct request *rq;
41
42 local_irq_disable();
43
44 node = llist_del_all(&per_cpu(ipi_lists, cpu));
45 while (node) {
46 struct llist_node *next = node->next;
47
48 rq = llist_entry(node, struct request, ll_list);
49 __blk_mq_end_io(rq, rq->errors);
50 node = next;
51 }
52
53 local_irq_enable();
54 }
55}
56
57static struct notifier_block __cpuinitdata blk_mq_main_cpu_notifier = {
58 .notifier_call = blk_mq_main_cpu_notify,
59};
60
61void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) 31void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
62{ 32{
63 BUG_ON(!notifier->notify); 33 BUG_ON(!notifier->notify);
@@ -82,12 +52,7 @@ void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
82 notifier->data = data; 52 notifier->data = data;
83} 53}
84 54
85static struct blk_mq_cpu_notifier __cpuinitdata cpu_notifier = {
86 .notify = blk_mq_cpu_notify,
87};
88
89void __init blk_mq_cpu_init(void) 55void __init blk_mq_cpu_init(void)
90{ 56{
91 register_hotcpu_notifier(&blk_mq_main_cpu_notifier); 57 hotcpu_notifier(blk_mq_main_cpu_notify, 0);
92 blk_mq_register_cpu_notifier(&cpu_notifier);
93} 58}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c79126e11030..57039fcd9c93 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -27,8 +27,6 @@ static LIST_HEAD(all_q_list);
27 27
28static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); 28static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
29 29
30DEFINE_PER_CPU(struct llist_head, ipi_lists);
31
32static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, 30static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
33 unsigned int cpu) 31 unsigned int cpu)
34{ 32{
@@ -106,10 +104,13 @@ static int blk_mq_queue_enter(struct request_queue *q)
106 104
107 spin_lock_irq(q->queue_lock); 105 spin_lock_irq(q->queue_lock);
108 ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, 106 ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
109 !blk_queue_bypass(q), *q->queue_lock); 107 !blk_queue_bypass(q) || blk_queue_dying(q),
108 *q->queue_lock);
110 /* inc usage with lock hold to avoid freeze_queue runs here */ 109 /* inc usage with lock hold to avoid freeze_queue runs here */
111 if (!ret) 110 if (!ret && !blk_queue_dying(q))
112 __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); 111 __percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
112 else if (blk_queue_dying(q))
113 ret = -ENODEV;
113 spin_unlock_irq(q->queue_lock); 114 spin_unlock_irq(q->queue_lock);
114 115
115 return ret; 116 return ret;
@@ -120,6 +121,22 @@ static void blk_mq_queue_exit(struct request_queue *q)
120 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); 121 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
121} 122}
122 123
124static void __blk_mq_drain_queue(struct request_queue *q)
125{
126 while (true) {
127 s64 count;
128
129 spin_lock_irq(q->queue_lock);
130 count = percpu_counter_sum(&q->mq_usage_counter);
131 spin_unlock_irq(q->queue_lock);
132
133 if (count == 0)
134 break;
135 blk_mq_run_queues(q, false);
136 msleep(10);
137 }
138}
139
123/* 140/*
124 * Guarantee no request is in use, so we can change any data structure of 141 * Guarantee no request is in use, so we can change any data structure of
125 * the queue afterward. 142 * the queue afterward.
@@ -133,21 +150,13 @@ static void blk_mq_freeze_queue(struct request_queue *q)
133 queue_flag_set(QUEUE_FLAG_BYPASS, q); 150 queue_flag_set(QUEUE_FLAG_BYPASS, q);
134 spin_unlock_irq(q->queue_lock); 151 spin_unlock_irq(q->queue_lock);
135 152
136 if (!drain) 153 if (drain)
137 return; 154 __blk_mq_drain_queue(q);
138 155}
139 while (true) {
140 s64 count;
141
142 spin_lock_irq(q->queue_lock);
143 count = percpu_counter_sum(&q->mq_usage_counter);
144 spin_unlock_irq(q->queue_lock);
145 156
146 if (count == 0) 157void blk_mq_drain_queue(struct request_queue *q)
147 break; 158{
148 blk_mq_run_queues(q, false); 159 __blk_mq_drain_queue(q);
149 msleep(10);
150 }
151} 160}
152 161
153static void blk_mq_unfreeze_queue(struct request_queue *q) 162static void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -179,6 +188,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
179 188
180 rq->mq_ctx = ctx; 189 rq->mq_ctx = ctx;
181 rq->cmd_flags = rw_flags; 190 rq->cmd_flags = rw_flags;
191 rq->start_time = jiffies;
192 set_start_time_ns(rq);
182 ctx->rq_dispatched[rw_is_sync(rw_flags)]++; 193 ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
183} 194}
184 195
@@ -305,7 +316,7 @@ void blk_mq_complete_request(struct request *rq, int error)
305 struct bio *next = bio->bi_next; 316 struct bio *next = bio->bi_next;
306 317
307 bio->bi_next = NULL; 318 bio->bi_next = NULL;
308 bytes += bio->bi_size; 319 bytes += bio->bi_iter.bi_size;
309 blk_mq_bio_endio(rq, bio, error); 320 blk_mq_bio_endio(rq, bio, error);
310 bio = next; 321 bio = next;
311 } 322 }
@@ -326,55 +337,12 @@ void __blk_mq_end_io(struct request *rq, int error)
326 blk_mq_complete_request(rq, error); 337 blk_mq_complete_request(rq, error);
327} 338}
328 339
329#if defined(CONFIG_SMP) 340static void blk_mq_end_io_remote(void *data)
330
331/*
332 * Called with interrupts disabled.
333 */
334static void ipi_end_io(void *data)
335{
336 struct llist_head *list = &per_cpu(ipi_lists, smp_processor_id());
337 struct llist_node *entry, *next;
338 struct request *rq;
339
340 entry = llist_del_all(list);
341
342 while (entry) {
343 next = entry->next;
344 rq = llist_entry(entry, struct request, ll_list);
345 __blk_mq_end_io(rq, rq->errors);
346 entry = next;
347 }
348}
349
350static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
351 struct request *rq, const int error)
352{ 341{
353 struct call_single_data *data = &rq->csd; 342 struct request *rq = data;
354
355 rq->errors = error;
356 rq->ll_list.next = NULL;
357 343
358 /* 344 __blk_mq_end_io(rq, rq->errors);
359 * If the list is non-empty, an existing IPI must already
360 * be "in flight". If that is the case, we need not schedule
361 * a new one.
362 */
363 if (llist_add(&rq->ll_list, &per_cpu(ipi_lists, ctx->cpu))) {
364 data->func = ipi_end_io;
365 data->flags = 0;
366 __smp_call_function_single(ctx->cpu, data, 0);
367 }
368
369 return true;
370}
371#else /* CONFIG_SMP */
372static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
373 struct request *rq, const int error)
374{
375 return false;
376} 345}
377#endif
378 346
379/* 347/*
380 * End IO on this request on a multiqueue enabled driver. We'll either do 348 * End IO on this request on a multiqueue enabled driver. We'll either do
@@ -390,11 +358,15 @@ void blk_mq_end_io(struct request *rq, int error)
390 return __blk_mq_end_io(rq, error); 358 return __blk_mq_end_io(rq, error);
391 359
392 cpu = get_cpu(); 360 cpu = get_cpu();
393 361 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
394 if (cpu == ctx->cpu || !cpu_online(ctx->cpu) || 362 rq->errors = error;
395 !ipi_remote_cpu(ctx, cpu, rq, error)) 363 rq->csd.func = blk_mq_end_io_remote;
364 rq->csd.info = rq;
365 rq->csd.flags = 0;
366 __smp_call_function_single(ctx->cpu, &rq->csd, 0);
367 } else {
396 __blk_mq_end_io(rq, error); 368 __blk_mq_end_io(rq, error);
397 369 }
398 put_cpu(); 370 put_cpu();
399} 371}
400EXPORT_SYMBOL(blk_mq_end_io); 372EXPORT_SYMBOL(blk_mq_end_io);
@@ -1091,8 +1063,8 @@ static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx)
1091 struct page *page; 1063 struct page *page;
1092 1064
1093 while (!list_empty(&hctx->page_list)) { 1065 while (!list_empty(&hctx->page_list)) {
1094 page = list_first_entry(&hctx->page_list, struct page, list); 1066 page = list_first_entry(&hctx->page_list, struct page, lru);
1095 list_del_init(&page->list); 1067 list_del_init(&page->lru);
1096 __free_pages(page, page->private); 1068 __free_pages(page, page->private);
1097 } 1069 }
1098 1070
@@ -1156,7 +1128,7 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
1156 break; 1128 break;
1157 1129
1158 page->private = this_order; 1130 page->private = this_order;
1159 list_add_tail(&page->list, &hctx->page_list); 1131 list_add_tail(&page->lru, &hctx->page_list);
1160 1132
1161 p = page_address(page); 1133 p = page_address(page);
1162 entries_per_page = order_to_size(this_order) / rq_size; 1134 entries_per_page = order_to_size(this_order) / rq_size;
@@ -1429,7 +1401,6 @@ void blk_mq_free_queue(struct request_queue *q)
1429 int i; 1401 int i;
1430 1402
1431 queue_for_each_hw_ctx(q, hctx, i) { 1403 queue_for_each_hw_ctx(q, hctx, i) {
1432 cancel_delayed_work_sync(&hctx->delayed_work);
1433 kfree(hctx->ctx_map); 1404 kfree(hctx->ctx_map);
1434 kfree(hctx->ctxs); 1405 kfree(hctx->ctxs);
1435 blk_mq_free_rq_map(hctx); 1406 blk_mq_free_rq_map(hctx);
@@ -1451,7 +1422,6 @@ void blk_mq_free_queue(struct request_queue *q)
1451 list_del_init(&q->all_q_node); 1422 list_del_init(&q->all_q_node);
1452 mutex_unlock(&all_q_mutex); 1423 mutex_unlock(&all_q_mutex);
1453} 1424}
1454EXPORT_SYMBOL(blk_mq_free_queue);
1455 1425
1456/* Basically redo blk_mq_init_queue with queue frozen */ 1426/* Basically redo blk_mq_init_queue with queue frozen */
1457static void blk_mq_queue_reinit(struct request_queue *q) 1427static void blk_mq_queue_reinit(struct request_queue *q)
@@ -1495,11 +1465,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
1495 1465
1496static int __init blk_mq_init(void) 1466static int __init blk_mq_init(void)
1497{ 1467{
1498 unsigned int i;
1499
1500 for_each_possible_cpu(i)
1501 init_llist_head(&per_cpu(ipi_lists, i));
1502
1503 blk_mq_cpu_init(); 1468 blk_mq_cpu_init();
1504 1469
1505 /* Must be called after percpu_counter_hotcpu_callback() */ 1470 /* Must be called after percpu_counter_hotcpu_callback() */
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 52bf1f96a2c2..5c3917984b00 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,6 +27,8 @@ void blk_mq_complete_request(struct request *rq, int error);
27void blk_mq_run_request(struct request *rq, bool run_queue, bool async); 27void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
28void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 28void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
29void blk_mq_init_flush(struct request_queue *q); 29void blk_mq_init_flush(struct request_queue *q);
30void blk_mq_drain_queue(struct request_queue *q);
31void blk_mq_free_queue(struct request_queue *q);
30 32
31/* 33/*
32 * CPU hotplug helpers 34 * CPU hotplug helpers
@@ -38,7 +40,6 @@ void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
38void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); 40void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
39void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); 41void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
40void blk_mq_cpu_init(void); 42void blk_mq_cpu_init(void);
41DECLARE_PER_CPU(struct llist_head, ipi_lists);
42 43
43/* 44/*
44 * CPU -> queue mappings 45 * CPU -> queue mappings
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 05e826793e4e..5d21239bc859 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -592,6 +592,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
592 ret = -1; 592 ret = -1;
593 } 593 }
594 594
595 t->raid_partial_stripes_expensive =
596 max(t->raid_partial_stripes_expensive,
597 b->raid_partial_stripes_expensive);
598
595 /* Find lowest common alignment_offset */ 599 /* Find lowest common alignment_offset */
596 t->alignment_offset = lcm(t->alignment_offset, alignment) 600 t->alignment_offset = lcm(t->alignment_offset, alignment)
597 & (max(t->physical_block_size, t->io_min) - 1); 601 & (max(t->physical_block_size, t->io_min) - 1);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 97779522472f..8095c4a21fc0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -11,6 +11,7 @@
11 11
12#include "blk.h" 12#include "blk.h"
13#include "blk-cgroup.h" 13#include "blk-cgroup.h"
14#include "blk-mq.h"
14 15
15struct queue_sysfs_entry { 16struct queue_sysfs_entry {
16 struct attribute attr; 17 struct attribute attr;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a760857e6b62..1474c3ab7e72 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -877,14 +877,14 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
877 do_div(tmp, HZ); 877 do_div(tmp, HZ);
878 bytes_allowed = tmp; 878 bytes_allowed = tmp;
879 879
880 if (tg->bytes_disp[rw] + bio->bi_size <= bytes_allowed) { 880 if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) {
881 if (wait) 881 if (wait)
882 *wait = 0; 882 *wait = 0;
883 return 1; 883 return 1;
884 } 884 }
885 885
886 /* Calc approx time to dispatch */ 886 /* Calc approx time to dispatch */
887 extra_bytes = tg->bytes_disp[rw] + bio->bi_size - bytes_allowed; 887 extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
888 jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]); 888 jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]);
889 889
890 if (!jiffy_wait) 890 if (!jiffy_wait)
@@ -987,7 +987,7 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
987 bool rw = bio_data_dir(bio); 987 bool rw = bio_data_dir(bio);
988 988
989 /* Charge the bio to the group */ 989 /* Charge the bio to the group */
990 tg->bytes_disp[rw] += bio->bi_size; 990 tg->bytes_disp[rw] += bio->bi_iter.bi_size;
991 tg->io_disp[rw]++; 991 tg->io_disp[rw]++;
992 992
993 /* 993 /*
@@ -1003,8 +1003,8 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
1003 */ 1003 */
1004 if (!(bio->bi_rw & REQ_THROTTLED)) { 1004 if (!(bio->bi_rw & REQ_THROTTLED)) {
1005 bio->bi_rw |= REQ_THROTTLED; 1005 bio->bi_rw |= REQ_THROTTLED;
1006 throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, 1006 throtl_update_dispatch_stats(tg_to_blkg(tg),
1007 bio->bi_rw); 1007 bio->bi_iter.bi_size, bio->bi_rw);
1008 } 1008 }
1009} 1009}
1010 1010
@@ -1503,7 +1503,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
1503 if (tg) { 1503 if (tg) {
1504 if (!tg->has_rules[rw]) { 1504 if (!tg->has_rules[rw]) {
1505 throtl_update_dispatch_stats(tg_to_blkg(tg), 1505 throtl_update_dispatch_stats(tg_to_blkg(tg),
1506 bio->bi_size, bio->bi_rw); 1506 bio->bi_iter.bi_size, bio->bi_rw);
1507 goto out_unlock_rcu; 1507 goto out_unlock_rcu;
1508 } 1508 }
1509 } 1509 }
@@ -1559,7 +1559,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
1559 /* out-of-limit, queue to @tg */ 1559 /* out-of-limit, queue to @tg */
1560 throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d", 1560 throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
1561 rw == READ ? 'R' : 'W', 1561 rw == READ ? 'R' : 'W',
1562 tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], 1562 tg->bytes_disp[rw], bio->bi_iter.bi_size, tg->bps[rw],
1563 tg->io_disp[rw], tg->iops[rw], 1563 tg->io_disp[rw], tg->iops[rw],
1564 sq->nr_queued[READ], sq->nr_queued[WRITE]); 1564 sq->nr_queued[READ], sq->nr_queued[WRITE]);
1565 1565
diff --git a/block/cmdline-parser.c b/block/cmdline-parser.c
index cc2637f8674e..9dbc67e42a99 100644
--- a/block/cmdline-parser.c
+++ b/block/cmdline-parser.c
@@ -4,8 +4,7 @@
4 * Written by Cai Zhiyong <caizhiyong@huawei.com> 4 * Written by Cai Zhiyong <caizhiyong@huawei.com>
5 * 5 *
6 */ 6 */
7#include <linux/buffer_head.h> 7#include <linux/export.h>
8#include <linux/module.h>
9#include <linux/cmdline-parser.h> 8#include <linux/cmdline-parser.h>
10 9
11static int parse_subpart(struct cmdline_subpart **subpart, char *partdef) 10static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
@@ -159,6 +158,7 @@ void cmdline_parts_free(struct cmdline_parts **parts)
159 *parts = next_parts; 158 *parts = next_parts;
160 } 159 }
161} 160}
161EXPORT_SYMBOL(cmdline_parts_free);
162 162
163int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline) 163int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline)
164{ 164{
@@ -206,6 +206,7 @@ fail:
206 cmdline_parts_free(parts); 206 cmdline_parts_free(parts);
207 goto done; 207 goto done;
208} 208}
209EXPORT_SYMBOL(cmdline_parts_parse);
209 210
210struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, 211struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
211 const char *bdev) 212 const char *bdev)
@@ -214,17 +215,17 @@ struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
214 parts = parts->next_parts; 215 parts = parts->next_parts;
215 return parts; 216 return parts;
216} 217}
218EXPORT_SYMBOL(cmdline_parts_find);
217 219
218/* 220/*
219 * add_part() 221 * add_part()
220 * 0 success. 222 * 0 success.
221 * 1 can not add so many partitions. 223 * 1 can not add so many partitions.
222 */ 224 */
223void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, 225int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
224 int slot, 226 int slot,
225 int (*add_part)(int, struct cmdline_subpart *, void *), 227 int (*add_part)(int, struct cmdline_subpart *, void *),
226 void *param) 228 void *param)
227
228{ 229{
229 sector_t from = 0; 230 sector_t from = 0;
230 struct cmdline_subpart *subpart; 231 struct cmdline_subpart *subpart;
@@ -247,4 +248,7 @@ void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
247 if (add_part(slot, subpart, param)) 248 if (add_part(slot, subpart, param))
248 break; 249 break;
249 } 250 }
251
252 return slot;
250} 253}
254EXPORT_SYMBOL(cmdline_parts_set);
diff --git a/block/elevator.c b/block/elevator.c
index b7ff2861b6bd..42c45a7d6714 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -440,7 +440,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
440 /* 440 /*
441 * See if our hash lookup can find a potential backmerge. 441 * See if our hash lookup can find a potential backmerge.
442 */ 442 */
443 __rq = elv_rqhash_find(q, bio->bi_sector); 443 __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
444 if (__rq && elv_rq_merge_ok(__rq, bio)) { 444 if (__rq && elv_rq_merge_ok(__rq, bio)) {
445 *req = __rq; 445 *req = __rq;
446 return ELEVATOR_BACK_MERGE; 446 return ELEVATOR_BACK_MERGE;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 625e3e471d65..26487972ac54 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -323,12 +323,14 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
323 323
324 if (hdr->iovec_count) { 324 if (hdr->iovec_count) {
325 size_t iov_data_len; 325 size_t iov_data_len;
326 struct iovec *iov; 326 struct iovec *iov = NULL;
327 327
328 ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count, 328 ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
329 0, NULL, &iov); 329 0, NULL, &iov);
330 if (ret < 0) 330 if (ret < 0) {
331 kfree(iov);
331 goto out; 332 goto out;
333 }
332 334
333 iov_data_len = ret; 335 iov_data_len = ret;
334 ret = 0; 336 ret = 0;
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 14a9d1912318..9220f8e833d0 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -100,11 +100,8 @@ enum {
100 100
101struct buf { 101struct buf {
102 ulong nframesout; 102 ulong nframesout;
103 ulong resid;
104 ulong bv_resid;
105 sector_t sector;
106 struct bio *bio; 103 struct bio *bio;
107 struct bio_vec *bv; 104 struct bvec_iter iter;
108 struct request *rq; 105 struct request *rq;
109}; 106};
110 107
@@ -120,13 +117,10 @@ struct frame {
120 ulong waited; 117 ulong waited;
121 ulong waited_total; 118 ulong waited_total;
122 struct aoetgt *t; /* parent target I belong to */ 119 struct aoetgt *t; /* parent target I belong to */
123 sector_t lba;
124 struct sk_buff *skb; /* command skb freed on module exit */ 120 struct sk_buff *skb; /* command skb freed on module exit */
125 struct sk_buff *r_skb; /* response skb for async processing */ 121 struct sk_buff *r_skb; /* response skb for async processing */
126 struct buf *buf; 122 struct buf *buf;
127 struct bio_vec *bv; 123 struct bvec_iter iter;
128 ulong bcnt;
129 ulong bv_off;
130 char flags; 124 char flags;
131}; 125};
132 126
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d2515435e23f..8184451b57c0 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -196,8 +196,7 @@ aoe_freetframe(struct frame *f)
196 196
197 t = f->t; 197 t = f->t;
198 f->buf = NULL; 198 f->buf = NULL;
199 f->lba = 0; 199 memset(&f->iter, 0, sizeof(f->iter));
200 f->bv = NULL;
201 f->r_skb = NULL; 200 f->r_skb = NULL;
202 f->flags = 0; 201 f->flags = 0;
203 list_add(&f->head, &t->ffree); 202 list_add(&f->head, &t->ffree);
@@ -295,21 +294,14 @@ newframe(struct aoedev *d)
295} 294}
296 295
297static void 296static void
298skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt) 297skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
299{ 298{
300 int frag = 0; 299 int frag = 0;
301 ulong fcnt; 300 struct bio_vec bv;
302loop: 301
303 fcnt = bv->bv_len - (off - bv->bv_offset); 302 __bio_for_each_segment(bv, bio, iter, iter)
304 if (fcnt > cnt) 303 skb_fill_page_desc(skb, frag++, bv.bv_page,
305 fcnt = cnt; 304 bv.bv_offset, bv.bv_len);
306 skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
307 cnt -= fcnt;
308 if (cnt <= 0)
309 return;
310 bv++;
311 off = bv->bv_offset;
312 goto loop;
313} 305}
314 306
315static void 307static void
@@ -346,12 +338,10 @@ ata_rw_frameinit(struct frame *f)
346 t->nout++; 338 t->nout++;
347 f->waited = 0; 339 f->waited = 0;
348 f->waited_total = 0; 340 f->waited_total = 0;
349 if (f->buf)
350 f->lba = f->buf->sector;
351 341
352 /* set up ata header */ 342 /* set up ata header */
353 ah->scnt = f->bcnt >> 9; 343 ah->scnt = f->iter.bi_size >> 9;
354 put_lba(ah, f->lba); 344 put_lba(ah, f->iter.bi_sector);
355 if (t->d->flags & DEVFL_EXT) { 345 if (t->d->flags & DEVFL_EXT) {
356 ah->aflags |= AOEAFL_EXT; 346 ah->aflags |= AOEAFL_EXT;
357 } else { 347 } else {
@@ -360,11 +350,11 @@ ata_rw_frameinit(struct frame *f)
360 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ 350 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
361 } 351 }
362 if (f->buf && bio_data_dir(f->buf->bio) == WRITE) { 352 if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
363 skb_fillup(skb, f->bv, f->bv_off, f->bcnt); 353 skb_fillup(skb, f->buf->bio, f->iter);
364 ah->aflags |= AOEAFL_WRITE; 354 ah->aflags |= AOEAFL_WRITE;
365 skb->len += f->bcnt; 355 skb->len += f->iter.bi_size;
366 skb->data_len = f->bcnt; 356 skb->data_len = f->iter.bi_size;
367 skb->truesize += f->bcnt; 357 skb->truesize += f->iter.bi_size;
368 t->wpkts++; 358 t->wpkts++;
369 } else { 359 } else {
370 t->rpkts++; 360 t->rpkts++;
@@ -382,7 +372,6 @@ aoecmd_ata_rw(struct aoedev *d)
382 struct buf *buf; 372 struct buf *buf;
383 struct sk_buff *skb; 373 struct sk_buff *skb;
384 struct sk_buff_head queue; 374 struct sk_buff_head queue;
385 ulong bcnt, fbcnt;
386 375
387 buf = nextbuf(d); 376 buf = nextbuf(d);
388 if (buf == NULL) 377 if (buf == NULL)
@@ -390,39 +379,22 @@ aoecmd_ata_rw(struct aoedev *d)
390 f = newframe(d); 379 f = newframe(d);
391 if (f == NULL) 380 if (f == NULL)
392 return 0; 381 return 0;
393 bcnt = d->maxbcnt;
394 if (bcnt == 0)
395 bcnt = DEFAULTBCNT;
396 if (bcnt > buf->resid)
397 bcnt = buf->resid;
398 fbcnt = bcnt;
399 f->bv = buf->bv;
400 f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
401 do {
402 if (fbcnt < buf->bv_resid) {
403 buf->bv_resid -= fbcnt;
404 buf->resid -= fbcnt;
405 break;
406 }
407 fbcnt -= buf->bv_resid;
408 buf->resid -= buf->bv_resid;
409 if (buf->resid == 0) {
410 d->ip.buf = NULL;
411 break;
412 }
413 buf->bv++;
414 buf->bv_resid = buf->bv->bv_len;
415 WARN_ON(buf->bv_resid == 0);
416 } while (fbcnt);
417 382
418 /* initialize the headers & frame */ 383 /* initialize the headers & frame */
419 f->buf = buf; 384 f->buf = buf;
420 f->bcnt = bcnt; 385 f->iter = buf->iter;
421 ata_rw_frameinit(f); 386 f->iter.bi_size = min_t(unsigned long,
387 d->maxbcnt ?: DEFAULTBCNT,
388 f->iter.bi_size);
389 bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
390
391 if (!buf->iter.bi_size)
392 d->ip.buf = NULL;
422 393
423 /* mark all tracking fields and load out */ 394 /* mark all tracking fields and load out */
424 buf->nframesout += 1; 395 buf->nframesout += 1;
425 buf->sector += bcnt >> 9; 396
397 ata_rw_frameinit(f);
426 398
427 skb = skb_clone(f->skb, GFP_ATOMIC); 399 skb = skb_clone(f->skb, GFP_ATOMIC);
428 if (skb) { 400 if (skb) {
@@ -613,10 +585,7 @@ reassign_frame(struct frame *f)
613 skb = nf->skb; 585 skb = nf->skb;
614 nf->skb = f->skb; 586 nf->skb = f->skb;
615 nf->buf = f->buf; 587 nf->buf = f->buf;
616 nf->bcnt = f->bcnt; 588 nf->iter = f->iter;
617 nf->lba = f->lba;
618 nf->bv = f->bv;
619 nf->bv_off = f->bv_off;
620 nf->waited = 0; 589 nf->waited = 0;
621 nf->waited_total = f->waited_total; 590 nf->waited_total = f->waited_total;
622 nf->sent = f->sent; 591 nf->sent = f->sent;
@@ -648,19 +617,19 @@ probe(struct aoetgt *t)
648 } 617 }
649 f->flags |= FFL_PROBE; 618 f->flags |= FFL_PROBE;
650 ifrotate(t); 619 ifrotate(t);
651 f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT; 620 f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
652 ata_rw_frameinit(f); 621 ata_rw_frameinit(f);
653 skb = f->skb; 622 skb = f->skb;
654 for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) { 623 for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
655 if (n < PAGE_SIZE) 624 if (n < PAGE_SIZE)
656 m = n; 625 m = n;
657 else 626 else
658 m = PAGE_SIZE; 627 m = PAGE_SIZE;
659 skb_fill_page_desc(skb, frag, empty_page, 0, m); 628 skb_fill_page_desc(skb, frag, empty_page, 0, m);
660 } 629 }
661 skb->len += f->bcnt; 630 skb->len += f->iter.bi_size;
662 skb->data_len = f->bcnt; 631 skb->data_len = f->iter.bi_size;
663 skb->truesize += f->bcnt; 632 skb->truesize += f->iter.bi_size;
664 633
665 skb = skb_clone(f->skb, GFP_ATOMIC); 634 skb = skb_clone(f->skb, GFP_ATOMIC);
666 if (skb) { 635 if (skb) {
@@ -897,15 +866,15 @@ rqbiocnt(struct request *r)
897static void 866static void
898bio_pageinc(struct bio *bio) 867bio_pageinc(struct bio *bio)
899{ 868{
900 struct bio_vec *bv; 869 struct bio_vec bv;
901 struct page *page; 870 struct page *page;
902 int i; 871 struct bvec_iter iter;
903 872
904 bio_for_each_segment(bv, bio, i) { 873 bio_for_each_segment(bv, bio, iter) {
905 /* Non-zero page count for non-head members of 874 /* Non-zero page count for non-head members of
906 * compound pages is no longer allowed by the kernel. 875 * compound pages is no longer allowed by the kernel.
907 */ 876 */
908 page = compound_trans_head(bv->bv_page); 877 page = compound_trans_head(bv.bv_page);
909 atomic_inc(&page->_count); 878 atomic_inc(&page->_count);
910 } 879 }
911} 880}
@@ -913,12 +882,12 @@ bio_pageinc(struct bio *bio)
913static void 882static void
914bio_pagedec(struct bio *bio) 883bio_pagedec(struct bio *bio)
915{ 884{
916 struct bio_vec *bv;
917 struct page *page; 885 struct page *page;
918 int i; 886 struct bio_vec bv;
887 struct bvec_iter iter;
919 888
920 bio_for_each_segment(bv, bio, i) { 889 bio_for_each_segment(bv, bio, iter) {
921 page = compound_trans_head(bv->bv_page); 890 page = compound_trans_head(bv.bv_page);
922 atomic_dec(&page->_count); 891 atomic_dec(&page->_count);
923 } 892 }
924} 893}
@@ -929,12 +898,8 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio)
929 memset(buf, 0, sizeof(*buf)); 898 memset(buf, 0, sizeof(*buf));
930 buf->rq = rq; 899 buf->rq = rq;
931 buf->bio = bio; 900 buf->bio = bio;
932 buf->resid = bio->bi_size; 901 buf->iter = bio->bi_iter;
933 buf->sector = bio->bi_sector;
934 bio_pageinc(bio); 902 bio_pageinc(bio);
935 buf->bv = bio_iovec(bio);
936 buf->bv_resid = buf->bv->bv_len;
937 WARN_ON(buf->bv_resid == 0);
938} 903}
939 904
940static struct buf * 905static struct buf *
@@ -1119,24 +1084,18 @@ gettgt(struct aoedev *d, char *addr)
1119} 1084}
1120 1085
1121static void 1086static void
1122bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt) 1087bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
1123{ 1088{
1124 ulong fcnt;
1125 char *p;
1126 int soff = 0; 1089 int soff = 0;
1127loop: 1090 struct bio_vec bv;
1128 fcnt = bv->bv_len - (off - bv->bv_offset); 1091
1129 if (fcnt > cnt) 1092 iter.bi_size = cnt;
1130 fcnt = cnt; 1093
1131 p = page_address(bv->bv_page) + off; 1094 __bio_for_each_segment(bv, bio, iter, iter) {
1132 skb_copy_bits(skb, soff, p, fcnt); 1095 char *p = page_address(bv.bv_page) + bv.bv_offset;
1133 soff += fcnt; 1096 skb_copy_bits(skb, soff, p, bv.bv_len);
1134 cnt -= fcnt; 1097 soff += bv.bv_len;
1135 if (cnt <= 0) 1098 }
1136 return;
1137 bv++;
1138 off = bv->bv_offset;
1139 goto loop;
1140} 1099}
1141 1100
1142void 1101void
@@ -1152,7 +1111,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
1152 do { 1111 do {
1153 bio = rq->bio; 1112 bio = rq->bio;
1154 bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags); 1113 bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags);
1155 } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size)); 1114 } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
1156 1115
1157 /* cf. http://lkml.org/lkml/2006/10/31/28 */ 1116 /* cf. http://lkml.org/lkml/2006/10/31/28 */
1158 if (!fastfail) 1117 if (!fastfail)
@@ -1229,7 +1188,15 @@ noskb: if (buf)
1229 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); 1188 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1230 break; 1189 break;
1231 } 1190 }
1232 bvcpy(f->bv, f->bv_off, skb, n); 1191 if (n > f->iter.bi_size) {
1192 pr_err_ratelimited("%s e%ld.%d. bytes=%ld need=%u\n",
1193 "aoe: too-large data size in read from",
1194 (long) d->aoemajor, d->aoeminor,
1195 n, f->iter.bi_size);
1196 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1197 break;
1198 }
1199 bvcpy(skb, f->buf->bio, f->iter, n);
1233 case ATA_CMD_PIO_WRITE: 1200 case ATA_CMD_PIO_WRITE:
1234 case ATA_CMD_PIO_WRITE_EXT: 1201 case ATA_CMD_PIO_WRITE_EXT:
1235 spin_lock_irq(&d->lock); 1202 spin_lock_irq(&d->lock);
@@ -1272,7 +1239,7 @@ out:
1272 1239
1273 aoe_freetframe(f); 1240 aoe_freetframe(f);
1274 1241
1275 if (buf && --buf->nframesout == 0 && buf->resid == 0) 1242 if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
1276 aoe_end_buf(d, buf); 1243 aoe_end_buf(d, buf);
1277 1244
1278 spin_unlock_irq(&d->lock); 1245 spin_unlock_irq(&d->lock);
@@ -1727,7 +1694,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
1727{ 1694{
1728 if (buf == NULL) 1695 if (buf == NULL)
1729 return; 1696 return;
1730 buf->resid = 0; 1697 buf->iter.bi_size = 0;
1731 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); 1698 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1732 if (buf->nframesout == 0) 1699 if (buf->nframesout == 0)
1733 aoe_end_buf(d, buf); 1700 aoe_end_buf(d, buf);
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index d91f1a56e861..e73b85cf0756 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -328,18 +328,18 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
328 struct block_device *bdev = bio->bi_bdev; 328 struct block_device *bdev = bio->bi_bdev;
329 struct brd_device *brd = bdev->bd_disk->private_data; 329 struct brd_device *brd = bdev->bd_disk->private_data;
330 int rw; 330 int rw;
331 struct bio_vec *bvec; 331 struct bio_vec bvec;
332 sector_t sector; 332 sector_t sector;
333 int i; 333 struct bvec_iter iter;
334 int err = -EIO; 334 int err = -EIO;
335 335
336 sector = bio->bi_sector; 336 sector = bio->bi_iter.bi_sector;
337 if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) 337 if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
338 goto out; 338 goto out;
339 339
340 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 340 if (unlikely(bio->bi_rw & REQ_DISCARD)) {
341 err = 0; 341 err = 0;
342 discard_from_brd(brd, sector, bio->bi_size); 342 discard_from_brd(brd, sector, bio->bi_iter.bi_size);
343 goto out; 343 goto out;
344 } 344 }
345 345
@@ -347,10 +347,10 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
347 if (rw == READA) 347 if (rw == READA)
348 rw = READ; 348 rw = READ;
349 349
350 bio_for_each_segment(bvec, bio, i) { 350 bio_for_each_segment(bvec, bio, iter) {
351 unsigned int len = bvec->bv_len; 351 unsigned int len = bvec.bv_len;
352 err = brd_do_bvec(brd, bvec->bv_page, len, 352 err = brd_do_bvec(brd, bvec.bv_page, len,
353 bvec->bv_offset, rw, sector); 353 bvec.bv_offset, rw, sector);
354 if (err) 354 if (err)
355 break; 355 break;
356 sector += len >> SECTOR_SHIFT; 356 sector += len >> SECTOR_SHIFT;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index b35fc4f5237c..036e8ab86c71 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -5004,7 +5004,7 @@ reinit_after_soft_reset:
5004 5004
5005 i = alloc_cciss_hba(pdev); 5005 i = alloc_cciss_hba(pdev);
5006 if (i < 0) 5006 if (i < 0)
5007 return -1; 5007 return -ENOMEM;
5008 5008
5009 h = hba[i]; 5009 h = hba[i];
5010 h->pdev = pdev; 5010 h->pdev = pdev;
@@ -5205,7 +5205,7 @@ clean_no_release_regions:
5205 */ 5205 */
5206 pci_set_drvdata(pdev, NULL); 5206 pci_set_drvdata(pdev, NULL);
5207 free_hba(h); 5207 free_hba(h);
5208 return -1; 5208 return -ENODEV;
5209} 5209}
5210 5210
5211static void cciss_shutdown(struct pci_dev *pdev) 5211static void cciss_shutdown(struct pci_dev *pdev)
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 28c73ca320a8..a9b13f2cc420 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -159,7 +159,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
159 159
160 bio = bio_alloc_drbd(GFP_NOIO); 160 bio = bio_alloc_drbd(GFP_NOIO);
161 bio->bi_bdev = bdev->md_bdev; 161 bio->bi_bdev = bdev->md_bdev;
162 bio->bi_sector = sector; 162 bio->bi_iter.bi_sector = sector;
163 err = -EIO; 163 err = -EIO;
164 if (bio_add_page(bio, page, size, 0) != size) 164 if (bio_add_page(bio, page, size, 0) != size)
165 goto out; 165 goto out;
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index b12c11ec4bd2..597f111df67b 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1028,7 +1028,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
1028 } else 1028 } else
1029 page = b->bm_pages[page_nr]; 1029 page = b->bm_pages[page_nr];
1030 bio->bi_bdev = mdev->ldev->md_bdev; 1030 bio->bi_bdev = mdev->ldev->md_bdev;
1031 bio->bi_sector = on_disk_sector; 1031 bio->bi_iter.bi_sector = on_disk_sector;
1032 /* bio_add_page of a single page to an empty bio will always succeed, 1032 /* bio_add_page of a single page to an empty bio will always succeed,
1033 * according to api. Do we want to assert that? */ 1033 * according to api. Do we want to assert that? */
1034 bio_add_page(bio, page, len, 0); 1034 bio_add_page(bio, page, len, 0);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9e3818b1bc83..929468e1512a 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1537,15 +1537,17 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
1537 1537
1538static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) 1538static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
1539{ 1539{
1540 struct bio_vec *bvec; 1540 struct bio_vec bvec;
1541 int i; 1541 struct bvec_iter iter;
1542
1542 /* hint all but last page with MSG_MORE */ 1543 /* hint all but last page with MSG_MORE */
1543 bio_for_each_segment(bvec, bio, i) { 1544 bio_for_each_segment(bvec, bio, iter) {
1544 int err; 1545 int err;
1545 1546
1546 err = _drbd_no_send_page(mdev, bvec->bv_page, 1547 err = _drbd_no_send_page(mdev, bvec.bv_page,
1547 bvec->bv_offset, bvec->bv_len, 1548 bvec.bv_offset, bvec.bv_len,
1548 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); 1549 bio_iter_last(bvec, iter)
1550 ? 0 : MSG_MORE);
1549 if (err) 1551 if (err)
1550 return err; 1552 return err;
1551 } 1553 }
@@ -1554,15 +1556,16 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
1554 1556
1555static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) 1557static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
1556{ 1558{
1557 struct bio_vec *bvec; 1559 struct bio_vec bvec;
1558 int i; 1560 struct bvec_iter iter;
1561
1559 /* hint all but last page with MSG_MORE */ 1562 /* hint all but last page with MSG_MORE */
1560 bio_for_each_segment(bvec, bio, i) { 1563 bio_for_each_segment(bvec, bio, iter) {
1561 int err; 1564 int err;
1562 1565
1563 err = _drbd_send_page(mdev, bvec->bv_page, 1566 err = _drbd_send_page(mdev, bvec.bv_page,
1564 bvec->bv_offset, bvec->bv_len, 1567 bvec.bv_offset, bvec.bv_len,
1565 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); 1568 bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
1566 if (err) 1569 if (err)
1567 return err; 1570 return err;
1568 } 1571 }
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6fa6673b36b3..d073305ffd5e 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1333,7 +1333,7 @@ next_bio:
1333 goto fail; 1333 goto fail;
1334 } 1334 }
1335 /* > peer_req->i.sector, unless this is the first bio */ 1335 /* > peer_req->i.sector, unless this is the first bio */
1336 bio->bi_sector = sector; 1336 bio->bi_iter.bi_sector = sector;
1337 bio->bi_bdev = mdev->ldev->backing_bdev; 1337 bio->bi_bdev = mdev->ldev->backing_bdev;
1338 bio->bi_rw = rw; 1338 bio->bi_rw = rw;
1339 bio->bi_private = peer_req; 1339 bio->bi_private = peer_req;
@@ -1353,7 +1353,7 @@ next_bio:
1353 dev_err(DEV, 1353 dev_err(DEV,
1354 "bio_add_page failed for len=%u, " 1354 "bio_add_page failed for len=%u, "
1355 "bi_vcnt=0 (bi_sector=%llu)\n", 1355 "bi_vcnt=0 (bi_sector=%llu)\n",
1356 len, (unsigned long long)bio->bi_sector); 1356 len, (uint64_t)bio->bi_iter.bi_sector);
1357 err = -ENOSPC; 1357 err = -ENOSPC;
1358 goto fail; 1358 goto fail;
1359 } 1359 }
@@ -1595,9 +1595,10 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1595static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, 1595static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1596 sector_t sector, int data_size) 1596 sector_t sector, int data_size)
1597{ 1597{
1598 struct bio_vec *bvec; 1598 struct bio_vec bvec;
1599 struct bvec_iter iter;
1599 struct bio *bio; 1600 struct bio *bio;
1600 int dgs, err, i, expect; 1601 int dgs, err, expect;
1601 void *dig_in = mdev->tconn->int_dig_in; 1602 void *dig_in = mdev->tconn->int_dig_in;
1602 void *dig_vv = mdev->tconn->int_dig_vv; 1603 void *dig_vv = mdev->tconn->int_dig_vv;
1603 1604
@@ -1615,13 +1616,13 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1615 mdev->recv_cnt += data_size>>9; 1616 mdev->recv_cnt += data_size>>9;
1616 1617
1617 bio = req->master_bio; 1618 bio = req->master_bio;
1618 D_ASSERT(sector == bio->bi_sector); 1619 D_ASSERT(sector == bio->bi_iter.bi_sector);
1619 1620
1620 bio_for_each_segment(bvec, bio, i) { 1621 bio_for_each_segment(bvec, bio, iter) {
1621 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset; 1622 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1622 expect = min_t(int, data_size, bvec->bv_len); 1623 expect = min_t(int, data_size, bvec.bv_len);
1623 err = drbd_recv_all_warn(mdev->tconn, mapped, expect); 1624 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
1624 kunmap(bvec->bv_page); 1625 kunmap(bvec.bv_page);
1625 if (err) 1626 if (err)
1626 return err; 1627 return err;
1627 data_size -= expect; 1628 data_size -= expect;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index fec7bef44994..104a040f24de 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -77,8 +77,8 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
77 req->epoch = 0; 77 req->epoch = 0;
78 78
79 drbd_clear_interval(&req->i); 79 drbd_clear_interval(&req->i);
80 req->i.sector = bio_src->bi_sector; 80 req->i.sector = bio_src->bi_iter.bi_sector;
81 req->i.size = bio_src->bi_size; 81 req->i.size = bio_src->bi_iter.bi_size;
82 req->i.local = true; 82 req->i.local = true;
83 req->i.waiting = false; 83 req->i.waiting = false;
84 84
@@ -1280,7 +1280,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
1280 /* 1280 /*
1281 * what we "blindly" assume: 1281 * what we "blindly" assume:
1282 */ 1282 */
1283 D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); 1283 D_ASSERT(IS_ALIGNED(bio->bi_iter.bi_size, 512));
1284 1284
1285 inc_ap_bio(mdev); 1285 inc_ap_bio(mdev);
1286 __drbd_make_request(mdev, bio, start_time); 1286 __drbd_make_request(mdev, bio, start_time);
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 978cb1addc98..28e15d91197a 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -269,7 +269,7 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi
269 269
270/* Short lived temporary struct on the stack. 270/* Short lived temporary struct on the stack.
271 * We could squirrel the error to be returned into 271 * We could squirrel the error to be returned into
272 * bio->bi_size, or similar. But that would be too ugly. */ 272 * bio->bi_iter.bi_size, or similar. But that would be too ugly. */
273struct bio_and_error { 273struct bio_and_error {
274 struct bio *bio; 274 struct bio *bio;
275 int error; 275 int error;
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 891c0ecaa292..84d3175d493a 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -313,8 +313,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
313{ 313{
314 struct hash_desc desc; 314 struct hash_desc desc;
315 struct scatterlist sg; 315 struct scatterlist sg;
316 struct bio_vec *bvec; 316 struct bio_vec bvec;
317 int i; 317 struct bvec_iter iter;
318 318
319 desc.tfm = tfm; 319 desc.tfm = tfm;
320 desc.flags = 0; 320 desc.flags = 0;
@@ -322,8 +322,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
322 sg_init_table(&sg, 1); 322 sg_init_table(&sg, 1);
323 crypto_hash_init(&desc); 323 crypto_hash_init(&desc);
324 324
325 bio_for_each_segment(bvec, bio, i) { 325 bio_for_each_segment(bvec, bio, iter) {
326 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); 326 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
327 crypto_hash_update(&desc, &sg, sg.length); 327 crypto_hash_update(&desc, &sg, sg.length);
328 } 328 }
329 crypto_hash_final(&desc, digest); 329 crypto_hash_final(&desc, digest);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 000abe2f105c..2023043ce7c0 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2351,7 +2351,7 @@ static void rw_interrupt(void)
2351/* Compute maximal contiguous buffer size. */ 2351/* Compute maximal contiguous buffer size. */
2352static int buffer_chain_size(void) 2352static int buffer_chain_size(void)
2353{ 2353{
2354 struct bio_vec *bv; 2354 struct bio_vec bv;
2355 int size; 2355 int size;
2356 struct req_iterator iter; 2356 struct req_iterator iter;
2357 char *base; 2357 char *base;
@@ -2360,10 +2360,10 @@ static int buffer_chain_size(void)
2360 size = 0; 2360 size = 0;
2361 2361
2362 rq_for_each_segment(bv, current_req, iter) { 2362 rq_for_each_segment(bv, current_req, iter) {
2363 if (page_address(bv->bv_page) + bv->bv_offset != base + size) 2363 if (page_address(bv.bv_page) + bv.bv_offset != base + size)
2364 break; 2364 break;
2365 2365
2366 size += bv->bv_len; 2366 size += bv.bv_len;
2367 } 2367 }
2368 2368
2369 return size >> 9; 2369 return size >> 9;
@@ -2389,7 +2389,7 @@ static int transfer_size(int ssize, int max_sector, int max_size)
2389static void copy_buffer(int ssize, int max_sector, int max_sector_2) 2389static void copy_buffer(int ssize, int max_sector, int max_sector_2)
2390{ 2390{
2391 int remaining; /* number of transferred 512-byte sectors */ 2391 int remaining; /* number of transferred 512-byte sectors */
2392 struct bio_vec *bv; 2392 struct bio_vec bv;
2393 char *buffer; 2393 char *buffer;
2394 char *dma_buffer; 2394 char *dma_buffer;
2395 int size; 2395 int size;
@@ -2427,10 +2427,10 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
2427 if (!remaining) 2427 if (!remaining)
2428 break; 2428 break;
2429 2429
2430 size = bv->bv_len; 2430 size = bv.bv_len;
2431 SUPBOUND(size, remaining); 2431 SUPBOUND(size, remaining);
2432 2432
2433 buffer = page_address(bv->bv_page) + bv->bv_offset; 2433 buffer = page_address(bv.bv_page) + bv.bv_offset;
2434 if (dma_buffer + size > 2434 if (dma_buffer + size >
2435 floppy_track_buffer + (max_buffer_sectors << 10) || 2435 floppy_track_buffer + (max_buffer_sectors << 10) ||
2436 dma_buffer < floppy_track_buffer) { 2436 dma_buffer < floppy_track_buffer) {
@@ -3691,9 +3691,12 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
3691 if (!(mode & FMODE_NDELAY)) { 3691 if (!(mode & FMODE_NDELAY)) {
3692 if (mode & (FMODE_READ|FMODE_WRITE)) { 3692 if (mode & (FMODE_READ|FMODE_WRITE)) {
3693 UDRS->last_checked = 0; 3693 UDRS->last_checked = 0;
3694 clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
3694 check_disk_change(bdev); 3695 check_disk_change(bdev);
3695 if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) 3696 if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
3696 goto out; 3697 goto out;
3698 if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
3699 goto out;
3697 } 3700 }
3698 res = -EROFS; 3701 res = -EROFS;
3699 if ((mode & FMODE_WRITE) && 3702 if ((mode & FMODE_WRITE) &&
@@ -3746,17 +3749,29 @@ static unsigned int floppy_check_events(struct gendisk *disk,
3746 * a disk in the drive, and whether that disk is writable. 3749 * a disk in the drive, and whether that disk is writable.
3747 */ 3750 */
3748 3751
3749static void floppy_rb0_complete(struct bio *bio, int err) 3752struct rb0_cbdata {
3753 int drive;
3754 struct completion complete;
3755};
3756
3757static void floppy_rb0_cb(struct bio *bio, int err)
3750{ 3758{
3751 complete((struct completion *)bio->bi_private); 3759 struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
3760 int drive = cbdata->drive;
3761
3762 if (err) {
3763 pr_info("floppy: error %d while reading block 0", err);
3764 set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
3765 }
3766 complete(&cbdata->complete);
3752} 3767}
3753 3768
3754static int __floppy_read_block_0(struct block_device *bdev) 3769static int __floppy_read_block_0(struct block_device *bdev, int drive)
3755{ 3770{
3756 struct bio bio; 3771 struct bio bio;
3757 struct bio_vec bio_vec; 3772 struct bio_vec bio_vec;
3758 struct completion complete;
3759 struct page *page; 3773 struct page *page;
3774 struct rb0_cbdata cbdata;
3760 size_t size; 3775 size_t size;
3761 3776
3762 page = alloc_page(GFP_NOIO); 3777 page = alloc_page(GFP_NOIO);
@@ -3769,23 +3784,26 @@ static int __floppy_read_block_0(struct block_device *bdev)
3769 if (!size) 3784 if (!size)
3770 size = 1024; 3785 size = 1024;
3771 3786
3787 cbdata.drive = drive;
3788
3772 bio_init(&bio); 3789 bio_init(&bio);
3773 bio.bi_io_vec = &bio_vec; 3790 bio.bi_io_vec = &bio_vec;
3774 bio_vec.bv_page = page; 3791 bio_vec.bv_page = page;
3775 bio_vec.bv_len = size; 3792 bio_vec.bv_len = size;
3776 bio_vec.bv_offset = 0; 3793 bio_vec.bv_offset = 0;
3777 bio.bi_vcnt = 1; 3794 bio.bi_vcnt = 1;
3778 bio.bi_size = size; 3795 bio.bi_iter.bi_size = size;
3779 bio.bi_bdev = bdev; 3796 bio.bi_bdev = bdev;
3780 bio.bi_sector = 0; 3797 bio.bi_iter.bi_sector = 0;
3781 bio.bi_flags = (1 << BIO_QUIET); 3798 bio.bi_flags = (1 << BIO_QUIET);
3782 init_completion(&complete); 3799 bio.bi_private = &cbdata;
3783 bio.bi_private = &complete; 3800 bio.bi_end_io = floppy_rb0_cb;
3784 bio.bi_end_io = floppy_rb0_complete;
3785 3801
3786 submit_bio(READ, &bio); 3802 submit_bio(READ, &bio);
3787 process_fd_request(); 3803 process_fd_request();
3788 wait_for_completion(&complete); 3804
3805 init_completion(&cbdata.complete);
3806 wait_for_completion(&cbdata.complete);
3789 3807
3790 __free_page(page); 3808 __free_page(page);
3791 3809
@@ -3827,7 +3845,7 @@ static int floppy_revalidate(struct gendisk *disk)
3827 UDRS->generation++; 3845 UDRS->generation++;
3828 if (drive_no_geom(drive)) { 3846 if (drive_no_geom(drive)) {
3829 /* auto-sensing */ 3847 /* auto-sensing */
3830 res = __floppy_read_block_0(opened_bdev[drive]); 3848 res = __floppy_read_block_0(opened_bdev[drive], drive);
3831 } else { 3849 } else {
3832 if (cf) 3850 if (cf)
3833 poll_drive(false, FD_RAW_NEED_DISK); 3851 poll_drive(false, FD_RAW_NEED_DISK);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c8dac7305244..66e8c3b94ef3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -288,9 +288,10 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
288{ 288{
289 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, 289 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
290 struct page *page); 290 struct page *page);
291 struct bio_vec *bvec; 291 struct bio_vec bvec;
292 struct bvec_iter iter;
292 struct page *page = NULL; 293 struct page *page = NULL;
293 int i, ret = 0; 294 int ret = 0;
294 295
295 if (lo->transfer != transfer_none) { 296 if (lo->transfer != transfer_none) {
296 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 297 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
@@ -302,11 +303,11 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
302 do_lo_send = do_lo_send_direct_write; 303 do_lo_send = do_lo_send_direct_write;
303 } 304 }
304 305
305 bio_for_each_segment(bvec, bio, i) { 306 bio_for_each_segment(bvec, bio, iter) {
306 ret = do_lo_send(lo, bvec, pos, page); 307 ret = do_lo_send(lo, &bvec, pos, page);
307 if (ret < 0) 308 if (ret < 0)
308 break; 309 break;
309 pos += bvec->bv_len; 310 pos += bvec.bv_len;
310 } 311 }
311 if (page) { 312 if (page) {
312 kunmap(page); 313 kunmap(page);
@@ -392,20 +393,20 @@ do_lo_receive(struct loop_device *lo,
392static int 393static int
393lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) 394lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
394{ 395{
395 struct bio_vec *bvec; 396 struct bio_vec bvec;
397 struct bvec_iter iter;
396 ssize_t s; 398 ssize_t s;
397 int i;
398 399
399 bio_for_each_segment(bvec, bio, i) { 400 bio_for_each_segment(bvec, bio, iter) {
400 s = do_lo_receive(lo, bvec, bsize, pos); 401 s = do_lo_receive(lo, &bvec, bsize, pos);
401 if (s < 0) 402 if (s < 0)
402 return s; 403 return s;
403 404
404 if (s != bvec->bv_len) { 405 if (s != bvec.bv_len) {
405 zero_fill_bio(bio); 406 zero_fill_bio(bio);
406 break; 407 break;
407 } 408 }
408 pos += bvec->bv_len; 409 pos += bvec.bv_len;
409 } 410 }
410 return 0; 411 return 0;
411} 412}
@@ -415,7 +416,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
415 loff_t pos; 416 loff_t pos;
416 int ret; 417 int ret;
417 418
418 pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; 419 pos = ((loff_t) bio->bi_iter.bi_sector << 9) + lo->lo_offset;
419 420
420 if (bio_rw(bio) == WRITE) { 421 if (bio_rw(bio) == WRITE) {
421 struct file *file = lo->lo_backing_file; 422 struct file *file = lo->lo_backing_file;
@@ -444,7 +445,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
444 goto out; 445 goto out;
445 } 446 }
446 ret = file->f_op->fallocate(file, mode, pos, 447 ret = file->f_op->fallocate(file, mode, pos,
447 bio->bi_size); 448 bio->bi_iter.bi_size);
448 if (unlikely(ret && ret != -EINVAL && 449 if (unlikely(ret && ret != -EINVAL &&
449 ret != -EOPNOTSUPP)) 450 ret != -EOPNOTSUPP))
450 ret = -EIO; 451 ret = -EIO;
@@ -798,7 +799,7 @@ static void loop_config_discard(struct loop_device *lo)
798 799
799 /* 800 /*
800 * We use punch hole to reclaim the free space used by the 801 * We use punch hole to reclaim the free space used by the
801 * image a.k.a. discard. However we do support discard if 802 * image a.k.a. discard. However we do not support discard if
802 * encryption is enabled, because it may give an attacker 803 * encryption is enabled, because it may give an attacker
803 * useful information. 804 * useful information.
804 */ 805 */
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 7bc363f1ee82..eb59b1241366 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -915,7 +915,7 @@ static int mg_probe(struct platform_device *plat_dev)
915 915
916 /* disk reset */ 916 /* disk reset */
917 if (prv_data->dev_attr == MG_STORAGE_DEV) { 917 if (prv_data->dev_attr == MG_STORAGE_DEV) {
918 /* If POR seq. not yet finised, wait */ 918 /* If POR seq. not yet finished, wait */
919 err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT); 919 err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT);
920 if (err) 920 if (err)
921 goto probe_err_3b; 921 goto probe_err_3b;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 050c71267f14..516026954be6 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -41,10 +41,31 @@
41#include "mtip32xx.h" 41#include "mtip32xx.h"
42 42
43#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) 43#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
44#define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16)) 44
45#define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS) 45/* DMA region containing RX Fis, Identify, RLE10, and SMART buffers */
46#define HW_PORT_PRIV_DMA_SZ \ 46#define AHCI_RX_FIS_SZ 0x100
47 (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ) 47#define AHCI_RX_FIS_OFFSET 0x0
48#define AHCI_IDFY_SZ ATA_SECT_SIZE
49#define AHCI_IDFY_OFFSET 0x400
50#define AHCI_SECTBUF_SZ ATA_SECT_SIZE
51#define AHCI_SECTBUF_OFFSET 0x800
52#define AHCI_SMARTBUF_SZ ATA_SECT_SIZE
53#define AHCI_SMARTBUF_OFFSET 0xC00
54/* 0x100 + 0x200 + 0x200 + 0x200 is smaller than 4k but we pad it out */
55#define BLOCK_DMA_ALLOC_SZ 4096
56
57/* DMA region containing command table (should be 8192 bytes) */
58#define AHCI_CMD_SLOT_SZ sizeof(struct mtip_cmd_hdr)
59#define AHCI_CMD_TBL_SZ (MTIP_MAX_COMMAND_SLOTS * AHCI_CMD_SLOT_SZ)
60#define AHCI_CMD_TBL_OFFSET 0x0
61
62/* DMA region per command (contains header and SGL) */
63#define AHCI_CMD_TBL_HDR_SZ 0x80
64#define AHCI_CMD_TBL_HDR_OFFSET 0x0
65#define AHCI_CMD_TBL_SGL_SZ (MTIP_MAX_SG * sizeof(struct mtip_cmd_sg))
66#define AHCI_CMD_TBL_SGL_OFFSET AHCI_CMD_TBL_HDR_SZ
67#define CMD_DMA_ALLOC_SZ (AHCI_CMD_TBL_SGL_SZ + AHCI_CMD_TBL_HDR_SZ)
68
48 69
49#define HOST_CAP_NZDMA (1 << 19) 70#define HOST_CAP_NZDMA (1 << 19)
50#define HOST_HSORG 0xFC 71#define HOST_HSORG 0xFC
@@ -899,8 +920,9 @@ static void mtip_handle_tfe(struct driver_data *dd)
899 fail_reason = "thermal shutdown"; 920 fail_reason = "thermal shutdown";
900 } 921 }
901 if (buf[288] == 0xBF) { 922 if (buf[288] == 0xBF) {
923 set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
902 dev_info(&dd->pdev->dev, 924 dev_info(&dd->pdev->dev,
903 "Drive indicates rebuild has failed.\n"); 925 "Drive indicates rebuild has failed. Secure erase required.\n");
904 fail_all_ncq_cmds = 1; 926 fail_all_ncq_cmds = 1;
905 fail_reason = "rebuild failed"; 927 fail_reason = "rebuild failed";
906 } 928 }
@@ -1566,6 +1588,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
1566 } 1588 }
1567#endif 1589#endif
1568 1590
1591 /* Check security locked state */
1592 if (port->identify[128] & 0x4)
1593 set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1594 else
1595 clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1596
1569#ifdef MTIP_TRIM /* Disabling TRIM support temporarily */ 1597#ifdef MTIP_TRIM /* Disabling TRIM support temporarily */
1570 /* Demux ID.DRAT & ID.RZAT to determine trim support */ 1598 /* Demux ID.DRAT & ID.RZAT to determine trim support */
1571 if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5)) 1599 if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
@@ -1887,6 +1915,10 @@ static void mtip_dump_identify(struct mtip_port *port)
1887 strlcpy(cbuf, (char *)(port->identify+27), 41); 1915 strlcpy(cbuf, (char *)(port->identify+27), 41);
1888 dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf); 1916 dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
1889 1917
1918 dev_info(&port->dd->pdev->dev, "Security: %04x %s\n",
1919 port->identify[128],
1920 port->identify[128] & 0x4 ? "(LOCKED)" : "");
1921
1890 if (mtip_hw_get_capacity(port->dd, &sectors)) 1922 if (mtip_hw_get_capacity(port->dd, &sectors))
1891 dev_info(&port->dd->pdev->dev, 1923 dev_info(&port->dd->pdev->dev,
1892 "Capacity: %llu sectors (%llu MB)\n", 1924 "Capacity: %llu sectors (%llu MB)\n",
@@ -3313,6 +3345,118 @@ st_out:
3313} 3345}
3314 3346
3315/* 3347/*
3348 * DMA region teardown
3349 *
3350 * @dd Pointer to driver_data structure
3351 *
3352 * return value
3353 * None
3354 */
3355static void mtip_dma_free(struct driver_data *dd)
3356{
3357 int i;
3358 struct mtip_port *port = dd->port;
3359
3360 if (port->block1)
3361 dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3362 port->block1, port->block1_dma);
3363
3364 if (port->command_list) {
3365 dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
3366 port->command_list, port->command_list_dma);
3367 }
3368
3369 for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
3370 if (port->commands[i].command)
3371 dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
3372 port->commands[i].command,
3373 port->commands[i].command_dma);
3374 }
3375}
3376
3377/*
3378 * DMA region setup
3379 *
3380 * @dd Pointer to driver_data structure
3381 *
3382 * return value
3383 * -ENOMEM Not enough free DMA region space to initialize driver
3384 */
3385static int mtip_dma_alloc(struct driver_data *dd)
3386{
3387 struct mtip_port *port = dd->port;
3388 int i, rv = 0;
3389 u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
3390
3391 /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
3392 port->block1 =
3393 dmam_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3394 &port->block1_dma, GFP_KERNEL);
3395 if (!port->block1)
3396 return -ENOMEM;
3397 memset(port->block1, 0, BLOCK_DMA_ALLOC_SZ);
3398
3399 /* Allocate dma memory for command list */
3400 port->command_list =
3401 dmam_alloc_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
3402 &port->command_list_dma, GFP_KERNEL);
3403 if (!port->command_list) {
3404 dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3405 port->block1, port->block1_dma);
3406 port->block1 = NULL;
3407 port->block1_dma = 0;
3408 return -ENOMEM;
3409 }
3410 memset(port->command_list, 0, AHCI_CMD_TBL_SZ);
3411
3412 /* Setup all pointers into first DMA region */
3413 port->rxfis = port->block1 + AHCI_RX_FIS_OFFSET;
3414 port->rxfis_dma = port->block1_dma + AHCI_RX_FIS_OFFSET;
3415 port->identify = port->block1 + AHCI_IDFY_OFFSET;
3416 port->identify_dma = port->block1_dma + AHCI_IDFY_OFFSET;
3417 port->log_buf = port->block1 + AHCI_SECTBUF_OFFSET;
3418 port->log_buf_dma = port->block1_dma + AHCI_SECTBUF_OFFSET;
3419 port->smart_buf = port->block1 + AHCI_SMARTBUF_OFFSET;
3420 port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
3421
3422 /* Setup per command SGL DMA region */
3423
3424 /* Point the command headers at the command tables */
3425 for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
3426 port->commands[i].command =
3427 dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
3428 &port->commands[i].command_dma, GFP_KERNEL);
3429 if (!port->commands[i].command) {
3430 rv = -ENOMEM;
3431 mtip_dma_free(dd);
3432 return rv;
3433 }
3434 memset(port->commands[i].command, 0, CMD_DMA_ALLOC_SZ);
3435
3436 port->commands[i].command_header = port->command_list +
3437 (sizeof(struct mtip_cmd_hdr) * i);
3438 port->commands[i].command_header_dma =
3439 dd->port->command_list_dma +
3440 (sizeof(struct mtip_cmd_hdr) * i);
3441
3442 if (host_cap_64)
3443 port->commands[i].command_header->ctbau =
3444 __force_bit2int cpu_to_le32(
3445 (port->commands[i].command_dma >> 16) >> 16);
3446
3447 port->commands[i].command_header->ctba =
3448 __force_bit2int cpu_to_le32(
3449 port->commands[i].command_dma & 0xFFFFFFFF);
3450
3451 sg_init_table(port->commands[i].sg, MTIP_MAX_SG);
3452
3453 /* Mark command as currently inactive */
3454 atomic_set(&dd->port->commands[i].active, 0);
3455 }
3456 return 0;
3457}
3458
3459/*
3316 * Called once for each card. 3460 * Called once for each card.
3317 * 3461 *
3318 * @dd Pointer to the driver data structure. 3462 * @dd Pointer to the driver data structure.
@@ -3370,83 +3514,10 @@ static int mtip_hw_init(struct driver_data *dd)
3370 dd->port->mmio = dd->mmio + PORT_OFFSET; 3514 dd->port->mmio = dd->mmio + PORT_OFFSET;
3371 dd->port->dd = dd; 3515 dd->port->dd = dd;
3372 3516
3373 /* Allocate memory for the command list. */ 3517 /* DMA allocations */
3374 dd->port->command_list = 3518 rv = mtip_dma_alloc(dd);
3375 dmam_alloc_coherent(&dd->pdev->dev, 3519 if (rv < 0)
3376 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3377 &dd->port->command_list_dma,
3378 GFP_KERNEL);
3379 if (!dd->port->command_list) {
3380 dev_err(&dd->pdev->dev,
3381 "Memory allocation: command list\n");
3382 rv = -ENOMEM;
3383 goto out1; 3520 goto out1;
3384 }
3385
3386 /* Clear the memory we have allocated. */
3387 memset(dd->port->command_list,
3388 0,
3389 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
3390
3391 /* Setup the addresse of the RX FIS. */
3392 dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ;
3393 dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
3394
3395 /* Setup the address of the command tables. */
3396 dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ;
3397 dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
3398
3399 /* Setup the address of the identify data. */
3400 dd->port->identify = dd->port->command_table +
3401 HW_CMD_TBL_AR_SZ;
3402 dd->port->identify_dma = dd->port->command_tbl_dma +
3403 HW_CMD_TBL_AR_SZ;
3404
3405 /* Setup the address of the sector buffer - for some non-ncq cmds */
3406 dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
3407 dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
3408
3409 /* Setup the address of the log buf - for read log command */
3410 dd->port->log_buf = (void *)dd->port->sector_buffer + ATA_SECT_SIZE;
3411 dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
3412
3413 /* Setup the address of the smart buf - for smart read data command */
3414 dd->port->smart_buf = (void *)dd->port->log_buf + ATA_SECT_SIZE;
3415 dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
3416
3417
3418 /* Point the command headers at the command tables. */
3419 for (i = 0; i < num_command_slots; i++) {
3420 dd->port->commands[i].command_header =
3421 dd->port->command_list +
3422 (sizeof(struct mtip_cmd_hdr) * i);
3423 dd->port->commands[i].command_header_dma =
3424 dd->port->command_list_dma +
3425 (sizeof(struct mtip_cmd_hdr) * i);
3426
3427 dd->port->commands[i].command =
3428 dd->port->command_table + (HW_CMD_TBL_SZ * i);
3429 dd->port->commands[i].command_dma =
3430 dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
3431
3432 if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
3433 dd->port->commands[i].command_header->ctbau =
3434 __force_bit2int cpu_to_le32(
3435 (dd->port->commands[i].command_dma >> 16) >> 16);
3436 dd->port->commands[i].command_header->ctba =
3437 __force_bit2int cpu_to_le32(
3438 dd->port->commands[i].command_dma & 0xFFFFFFFF);
3439
3440 /*
3441 * If this is not done, a bug is reported by the stock
3442 * FC11 i386. Due to the fact that it has lots of kernel
3443 * debugging enabled.
3444 */
3445 sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
3446
3447 /* Mark all commands as currently inactive.*/
3448 atomic_set(&dd->port->commands[i].active, 0);
3449 }
3450 3521
3451 /* Setup the pointers to the extended s_active and CI registers. */ 3522 /* Setup the pointers to the extended s_active and CI registers. */
3452 for (i = 0; i < dd->slot_groups; i++) { 3523 for (i = 0; i < dd->slot_groups; i++) {
@@ -3594,12 +3665,8 @@ out3:
3594 3665
3595out2: 3666out2:
3596 mtip_deinit_port(dd->port); 3667 mtip_deinit_port(dd->port);
3668 mtip_dma_free(dd);
3597 3669
3598 /* Free the command/command header memory. */
3599 dmam_free_coherent(&dd->pdev->dev,
3600 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3601 dd->port->command_list,
3602 dd->port->command_list_dma);
3603out1: 3670out1:
3604 /* Free the memory allocated for the for structure. */ 3671 /* Free the memory allocated for the for structure. */
3605 kfree(dd->port); 3672 kfree(dd->port);
@@ -3622,7 +3689,8 @@ static int mtip_hw_exit(struct driver_data *dd)
3622 * saves its state. 3689 * saves its state.
3623 */ 3690 */
3624 if (!dd->sr) { 3691 if (!dd->sr) {
3625 if (!test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) 3692 if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
3693 !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
3626 if (mtip_standby_immediate(dd->port)) 3694 if (mtip_standby_immediate(dd->port))
3627 dev_warn(&dd->pdev->dev, 3695 dev_warn(&dd->pdev->dev,
3628 "STANDBY IMMEDIATE failed\n"); 3696 "STANDBY IMMEDIATE failed\n");
@@ -3641,11 +3709,9 @@ static int mtip_hw_exit(struct driver_data *dd)
3641 irq_set_affinity_hint(dd->pdev->irq, NULL); 3709 irq_set_affinity_hint(dd->pdev->irq, NULL);
3642 devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); 3710 devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
3643 3711
3644 /* Free the command/command header memory. */ 3712 /* Free dma regions */
3645 dmam_free_coherent(&dd->pdev->dev, 3713 mtip_dma_free(dd);
3646 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), 3714
3647 dd->port->command_list,
3648 dd->port->command_list_dma);
3649 /* Free the memory allocated for the for structure. */ 3715 /* Free the memory allocated for the for structure. */
3650 kfree(dd->port); 3716 kfree(dd->port);
3651 dd->port = NULL; 3717 dd->port = NULL;
@@ -3962,8 +4028,9 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3962{ 4028{
3963 struct driver_data *dd = queue->queuedata; 4029 struct driver_data *dd = queue->queuedata;
3964 struct scatterlist *sg; 4030 struct scatterlist *sg;
3965 struct bio_vec *bvec; 4031 struct bio_vec bvec;
3966 int i, nents = 0; 4032 struct bvec_iter iter;
4033 int nents = 0;
3967 int tag = 0, unaligned = 0; 4034 int tag = 0, unaligned = 0;
3968 4035
3969 if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { 4036 if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
@@ -3993,7 +4060,7 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3993 } 4060 }
3994 4061
3995 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 4062 if (unlikely(bio->bi_rw & REQ_DISCARD)) {
3996 bio_endio(bio, mtip_send_trim(dd, bio->bi_sector, 4063 bio_endio(bio, mtip_send_trim(dd, bio->bi_iter.bi_sector,
3997 bio_sectors(bio))); 4064 bio_sectors(bio)));
3998 return; 4065 return;
3999 } 4066 }
@@ -4006,7 +4073,8 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
4006 4073
4007 if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 && 4074 if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 &&
4008 dd->unal_qdepth) { 4075 dd->unal_qdepth) {
4009 if (bio->bi_sector % 8 != 0) /* Unaligned on 4k boundaries */ 4076 if (bio->bi_iter.bi_sector % 8 != 0)
4077 /* Unaligned on 4k boundaries */
4010 unaligned = 1; 4078 unaligned = 1;
4011 else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */ 4079 else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */
4012 unaligned = 1; 4080 unaligned = 1;
@@ -4025,17 +4093,17 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
4025 } 4093 }
4026 4094
4027 /* Create the scatter list for this bio. */ 4095 /* Create the scatter list for this bio. */
4028 bio_for_each_segment(bvec, bio, i) { 4096 bio_for_each_segment(bvec, bio, iter) {
4029 sg_set_page(&sg[nents], 4097 sg_set_page(&sg[nents],
4030 bvec->bv_page, 4098 bvec.bv_page,
4031 bvec->bv_len, 4099 bvec.bv_len,
4032 bvec->bv_offset); 4100 bvec.bv_offset);
4033 nents++; 4101 nents++;
4034 } 4102 }
4035 4103
4036 /* Issue the read/write. */ 4104 /* Issue the read/write. */
4037 mtip_hw_submit_io(dd, 4105 mtip_hw_submit_io(dd,
4038 bio->bi_sector, 4106 bio->bi_iter.bi_sector,
4039 bio_sectors(bio), 4107 bio_sectors(bio),
4040 nents, 4108 nents,
4041 tag, 4109 tag,
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 9be7a1582ad3..b52e9a6d6aad 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -69,7 +69,7 @@
69 * Maximum number of scatter gather entries 69 * Maximum number of scatter gather entries
70 * a single command may have. 70 * a single command may have.
71 */ 71 */
72#define MTIP_MAX_SG 128 72#define MTIP_MAX_SG 504
73 73
74/* 74/*
75 * Maximum number of slot groups (Command Issue & s_active registers) 75 * Maximum number of slot groups (Command Issue & s_active registers)
@@ -92,7 +92,7 @@
92 92
93/* Driver name and version strings */ 93/* Driver name and version strings */
94#define MTIP_DRV_NAME "mtip32xx" 94#define MTIP_DRV_NAME "mtip32xx"
95#define MTIP_DRV_VERSION "1.2.6os3" 95#define MTIP_DRV_VERSION "1.3.0"
96 96
97/* Maximum number of minor device numbers per device. */ 97/* Maximum number of minor device numbers per device. */
98#define MTIP_MAX_MINORS 16 98#define MTIP_MAX_MINORS 16
@@ -391,15 +391,13 @@ struct mtip_port {
391 */ 391 */
392 dma_addr_t rxfis_dma; 392 dma_addr_t rxfis_dma;
393 /* 393 /*
394 * Pointer to the beginning of the command table memory as used 394 * Pointer to the DMA region for RX Fis, Identify, RLE10, and SMART
395 * by the driver.
396 */ 395 */
397 void *command_table; 396 void *block1;
398 /* 397 /*
399 * Pointer to the beginning of the command table memory as used 398 * DMA address of region for RX Fis, Identify, RLE10, and SMART
400 * by the DMA.
401 */ 399 */
402 dma_addr_t command_tbl_dma; 400 dma_addr_t block1_dma;
403 /* 401 /*
404 * Pointer to the beginning of the identify data memory as used 402 * Pointer to the beginning of the identify data memory as used
405 * by the driver. 403 * by the driver.
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2dc3b5153f0d..55298db36b2d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -271,18 +271,18 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
271 271
272 if (nbd_cmd(req) == NBD_CMD_WRITE) { 272 if (nbd_cmd(req) == NBD_CMD_WRITE) {
273 struct req_iterator iter; 273 struct req_iterator iter;
274 struct bio_vec *bvec; 274 struct bio_vec bvec;
275 /* 275 /*
276 * we are really probing at internals to determine 276 * we are really probing at internals to determine
277 * whether to set MSG_MORE or not... 277 * whether to set MSG_MORE or not...
278 */ 278 */
279 rq_for_each_segment(bvec, req, iter) { 279 rq_for_each_segment(bvec, req, iter) {
280 flags = 0; 280 flags = 0;
281 if (!rq_iter_last(req, iter)) 281 if (!rq_iter_last(bvec, iter))
282 flags = MSG_MORE; 282 flags = MSG_MORE;
283 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", 283 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
284 nbd->disk->disk_name, req, bvec->bv_len); 284 nbd->disk->disk_name, req, bvec.bv_len);
285 result = sock_send_bvec(nbd, bvec, flags); 285 result = sock_send_bvec(nbd, &bvec, flags);
286 if (result <= 0) { 286 if (result <= 0) {
287 dev_err(disk_to_dev(nbd->disk), 287 dev_err(disk_to_dev(nbd->disk),
288 "Send data failed (result %d)\n", 288 "Send data failed (result %d)\n",
@@ -378,10 +378,10 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
378 nbd->disk->disk_name, req); 378 nbd->disk->disk_name, req);
379 if (nbd_cmd(req) == NBD_CMD_READ) { 379 if (nbd_cmd(req) == NBD_CMD_READ) {
380 struct req_iterator iter; 380 struct req_iterator iter;
381 struct bio_vec *bvec; 381 struct bio_vec bvec;
382 382
383 rq_for_each_segment(bvec, req, iter) { 383 rq_for_each_segment(bvec, req, iter) {
384 result = sock_recv_bvec(nbd, bvec); 384 result = sock_recv_bvec(nbd, &bvec);
385 if (result <= 0) { 385 if (result <= 0) {
386 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 386 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
387 result); 387 result);
@@ -389,7 +389,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
389 return req; 389 return req;
390 } 390 }
391 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", 391 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
392 nbd->disk->disk_name, req, bvec->bv_len); 392 nbd->disk->disk_name, req, bvec.bv_len);
393 } 393 }
394 } 394 }
395 return req; 395 return req;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 83a598ebb65a..3107282a9741 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -616,6 +616,11 @@ static int __init null_init(void)
616 irqmode = NULL_IRQ_NONE; 616 irqmode = NULL_IRQ_NONE;
617 } 617 }
618#endif 618#endif
619 if (bs > PAGE_SIZE) {
620 pr_warn("null_blk: invalid block size\n");
621 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
622 bs = PAGE_SIZE;
623 }
619 624
620 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { 625 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
621 if (submit_queues < nr_online_nodes) { 626 if (submit_queues < nr_online_nodes) {
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 26d03fa0bf26..1f14ac403945 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -441,104 +441,19 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
441 return total_len; 441 return total_len;
442} 442}
443 443
444struct nvme_bio_pair {
445 struct bio b1, b2, *parent;
446 struct bio_vec *bv1, *bv2;
447 int err;
448 atomic_t cnt;
449};
450
451static void nvme_bio_pair_endio(struct bio *bio, int err)
452{
453 struct nvme_bio_pair *bp = bio->bi_private;
454
455 if (err)
456 bp->err = err;
457
458 if (atomic_dec_and_test(&bp->cnt)) {
459 bio_endio(bp->parent, bp->err);
460 kfree(bp->bv1);
461 kfree(bp->bv2);
462 kfree(bp);
463 }
464}
465
466static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
467 int len, int offset)
468{
469 struct nvme_bio_pair *bp;
470
471 BUG_ON(len > bio->bi_size);
472 BUG_ON(idx > bio->bi_vcnt);
473
474 bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
475 if (!bp)
476 return NULL;
477 bp->err = 0;
478
479 bp->b1 = *bio;
480 bp->b2 = *bio;
481
482 bp->b1.bi_size = len;
483 bp->b2.bi_size -= len;
484 bp->b1.bi_vcnt = idx;
485 bp->b2.bi_idx = idx;
486 bp->b2.bi_sector += len >> 9;
487
488 if (offset) {
489 bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
490 GFP_ATOMIC);
491 if (!bp->bv1)
492 goto split_fail_1;
493
494 bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
495 GFP_ATOMIC);
496 if (!bp->bv2)
497 goto split_fail_2;
498
499 memcpy(bp->bv1, bio->bi_io_vec,
500 bio->bi_max_vecs * sizeof(struct bio_vec));
501 memcpy(bp->bv2, bio->bi_io_vec,
502 bio->bi_max_vecs * sizeof(struct bio_vec));
503
504 bp->b1.bi_io_vec = bp->bv1;
505 bp->b2.bi_io_vec = bp->bv2;
506 bp->b2.bi_io_vec[idx].bv_offset += offset;
507 bp->b2.bi_io_vec[idx].bv_len -= offset;
508 bp->b1.bi_io_vec[idx].bv_len = offset;
509 bp->b1.bi_vcnt++;
510 } else
511 bp->bv1 = bp->bv2 = NULL;
512
513 bp->b1.bi_private = bp;
514 bp->b2.bi_private = bp;
515
516 bp->b1.bi_end_io = nvme_bio_pair_endio;
517 bp->b2.bi_end_io = nvme_bio_pair_endio;
518
519 bp->parent = bio;
520 atomic_set(&bp->cnt, 2);
521
522 return bp;
523
524 split_fail_2:
525 kfree(bp->bv1);
526 split_fail_1:
527 kfree(bp);
528 return NULL;
529}
530
531static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, 444static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
532 int idx, int len, int offset) 445 int len)
533{ 446{
534 struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset); 447 struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
535 if (!bp) 448 if (!split)
536 return -ENOMEM; 449 return -ENOMEM;
537 450
451 bio_chain(split, bio);
452
538 if (bio_list_empty(&nvmeq->sq_cong)) 453 if (bio_list_empty(&nvmeq->sq_cong))
539 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); 454 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
540 bio_list_add(&nvmeq->sq_cong, &bp->b1); 455 bio_list_add(&nvmeq->sq_cong, split);
541 bio_list_add(&nvmeq->sq_cong, &bp->b2); 456 bio_list_add(&nvmeq->sq_cong, bio);
542 457
543 return 0; 458 return 0;
544} 459}
@@ -550,41 +465,44 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
550static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod, 465static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
551 struct bio *bio, enum dma_data_direction dma_dir, int psegs) 466 struct bio *bio, enum dma_data_direction dma_dir, int psegs)
552{ 467{
553 struct bio_vec *bvec, *bvprv = NULL; 468 struct bio_vec bvec, bvprv;
469 struct bvec_iter iter;
554 struct scatterlist *sg = NULL; 470 struct scatterlist *sg = NULL;
555 int i, length = 0, nsegs = 0, split_len = bio->bi_size; 471 int length = 0, nsegs = 0, split_len = bio->bi_iter.bi_size;
472 int first = 1;
556 473
557 if (nvmeq->dev->stripe_size) 474 if (nvmeq->dev->stripe_size)
558 split_len = nvmeq->dev->stripe_size - 475 split_len = nvmeq->dev->stripe_size -
559 ((bio->bi_sector << 9) & (nvmeq->dev->stripe_size - 1)); 476 ((bio->bi_iter.bi_sector << 9) &
477 (nvmeq->dev->stripe_size - 1));
560 478
561 sg_init_table(iod->sg, psegs); 479 sg_init_table(iod->sg, psegs);
562 bio_for_each_segment(bvec, bio, i) { 480 bio_for_each_segment(bvec, bio, iter) {
563 if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) { 481 if (!first && BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) {
564 sg->length += bvec->bv_len; 482 sg->length += bvec.bv_len;
565 } else { 483 } else {
566 if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec)) 484 if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
567 return nvme_split_and_submit(bio, nvmeq, i, 485 return nvme_split_and_submit(bio, nvmeq,
568 length, 0); 486 length);
569 487
570 sg = sg ? sg + 1 : iod->sg; 488 sg = sg ? sg + 1 : iod->sg;
571 sg_set_page(sg, bvec->bv_page, bvec->bv_len, 489 sg_set_page(sg, bvec.bv_page,
572 bvec->bv_offset); 490 bvec.bv_len, bvec.bv_offset);
573 nsegs++; 491 nsegs++;
574 } 492 }
575 493
576 if (split_len - length < bvec->bv_len) 494 if (split_len - length < bvec.bv_len)
577 return nvme_split_and_submit(bio, nvmeq, i, split_len, 495 return nvme_split_and_submit(bio, nvmeq, split_len);
578 split_len - length); 496 length += bvec.bv_len;
579 length += bvec->bv_len;
580 bvprv = bvec; 497 bvprv = bvec;
498 first = 0;
581 } 499 }
582 iod->nents = nsegs; 500 iod->nents = nsegs;
583 sg_mark_end(sg); 501 sg_mark_end(sg);
584 if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0) 502 if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0)
585 return -ENOMEM; 503 return -ENOMEM;
586 504
587 BUG_ON(length != bio->bi_size); 505 BUG_ON(length != bio->bi_iter.bi_size);
588 return length; 506 return length;
589} 507}
590 508
@@ -608,8 +526,8 @@ static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
608 iod->npages = 0; 526 iod->npages = 0;
609 527
610 range->cattr = cpu_to_le32(0); 528 range->cattr = cpu_to_le32(0);
611 range->nlb = cpu_to_le32(bio->bi_size >> ns->lba_shift); 529 range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift);
612 range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector)); 530 range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
613 531
614 memset(cmnd, 0, sizeof(*cmnd)); 532 memset(cmnd, 0, sizeof(*cmnd));
615 cmnd->dsm.opcode = nvme_cmd_dsm; 533 cmnd->dsm.opcode = nvme_cmd_dsm;
@@ -674,7 +592,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
674 } 592 }
675 593
676 result = -ENOMEM; 594 result = -ENOMEM;
677 iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC); 595 iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
678 if (!iod) 596 if (!iod)
679 goto nomem; 597 goto nomem;
680 iod->private = bio; 598 iod->private = bio;
@@ -723,7 +641,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
723 cmnd->rw.nsid = cpu_to_le32(ns->ns_id); 641 cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
724 length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length, 642 length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length,
725 GFP_ATOMIC); 643 GFP_ATOMIC);
726 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector)); 644 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
727 cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1); 645 cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1);
728 cmnd->rw.control = cpu_to_le16(control); 646 cmnd->rw.control = cpu_to_le16(control);
729 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); 647 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 4a27b1de5fcb..2ce3dfd7e6b9 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -581,7 +581,7 @@ static ssize_t pg_write(struct file *filp, const char __user *buf, size_t count,
581 581
582 if (hdr.magic != PG_MAGIC) 582 if (hdr.magic != PG_MAGIC)
583 return -EINVAL; 583 return -EINVAL;
584 if (hdr.dlen > PG_MAX_DATA) 584 if (hdr.dlen < 0 || hdr.dlen > PG_MAX_DATA)
585 return -EINVAL; 585 return -EINVAL;
586 if ((count - hs) > PG_MAX_DATA) 586 if ((count - hs) > PG_MAX_DATA)
587 return -EINVAL; 587 return -EINVAL;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ff8668c5efb1..a2af73db187b 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -651,7 +651,7 @@ static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s
651 651
652 for (;;) { 652 for (;;) {
653 tmp = rb_entry(n, struct pkt_rb_node, rb_node); 653 tmp = rb_entry(n, struct pkt_rb_node, rb_node);
654 if (s <= tmp->bio->bi_sector) 654 if (s <= tmp->bio->bi_iter.bi_sector)
655 next = n->rb_left; 655 next = n->rb_left;
656 else 656 else
657 next = n->rb_right; 657 next = n->rb_right;
@@ -660,12 +660,12 @@ static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s
660 n = next; 660 n = next;
661 } 661 }
662 662
663 if (s > tmp->bio->bi_sector) { 663 if (s > tmp->bio->bi_iter.bi_sector) {
664 tmp = pkt_rbtree_next(tmp); 664 tmp = pkt_rbtree_next(tmp);
665 if (!tmp) 665 if (!tmp)
666 return NULL; 666 return NULL;
667 } 667 }
668 BUG_ON(s > tmp->bio->bi_sector); 668 BUG_ON(s > tmp->bio->bi_iter.bi_sector);
669 return tmp; 669 return tmp;
670} 670}
671 671
@@ -676,13 +676,13 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod
676{ 676{
677 struct rb_node **p = &pd->bio_queue.rb_node; 677 struct rb_node **p = &pd->bio_queue.rb_node;
678 struct rb_node *parent = NULL; 678 struct rb_node *parent = NULL;
679 sector_t s = node->bio->bi_sector; 679 sector_t s = node->bio->bi_iter.bi_sector;
680 struct pkt_rb_node *tmp; 680 struct pkt_rb_node *tmp;
681 681
682 while (*p) { 682 while (*p) {
683 parent = *p; 683 parent = *p;
684 tmp = rb_entry(parent, struct pkt_rb_node, rb_node); 684 tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
685 if (s < tmp->bio->bi_sector) 685 if (s < tmp->bio->bi_iter.bi_sector)
686 p = &(*p)->rb_left; 686 p = &(*p)->rb_left;
687 else 687 else
688 p = &(*p)->rb_right; 688 p = &(*p)->rb_right;
@@ -706,7 +706,9 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
706 WRITE : READ, __GFP_WAIT); 706 WRITE : READ, __GFP_WAIT);
707 707
708 if (cgc->buflen) { 708 if (cgc->buflen) {
709 if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT)) 709 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
710 __GFP_WAIT);
711 if (ret)
710 goto out; 712 goto out;
711 } 713 }
712 714
@@ -857,7 +859,8 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
857 spin_lock(&pd->iosched.lock); 859 spin_lock(&pd->iosched.lock);
858 bio = bio_list_peek(&pd->iosched.write_queue); 860 bio = bio_list_peek(&pd->iosched.write_queue);
859 spin_unlock(&pd->iosched.lock); 861 spin_unlock(&pd->iosched.lock);
860 if (bio && (bio->bi_sector == pd->iosched.last_write)) 862 if (bio && (bio->bi_iter.bi_sector ==
863 pd->iosched.last_write))
861 need_write_seek = 0; 864 need_write_seek = 0;
862 if (need_write_seek && reads_queued) { 865 if (need_write_seek && reads_queued) {
863 if (atomic_read(&pd->cdrw.pending_bios) > 0) { 866 if (atomic_read(&pd->cdrw.pending_bios) > 0) {
@@ -888,7 +891,8 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
888 continue; 891 continue;
889 892
890 if (bio_data_dir(bio) == READ) 893 if (bio_data_dir(bio) == READ)
891 pd->iosched.successive_reads += bio->bi_size >> 10; 894 pd->iosched.successive_reads +=
895 bio->bi_iter.bi_size >> 10;
892 else { 896 else {
893 pd->iosched.successive_reads = 0; 897 pd->iosched.successive_reads = 0;
894 pd->iosched.last_write = bio_end_sector(bio); 898 pd->iosched.last_write = bio_end_sector(bio);
@@ -978,7 +982,7 @@ static void pkt_end_io_read(struct bio *bio, int err)
978 982
979 pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n", 983 pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
980 bio, (unsigned long long)pkt->sector, 984 bio, (unsigned long long)pkt->sector,
981 (unsigned long long)bio->bi_sector, err); 985 (unsigned long long)bio->bi_iter.bi_sector, err);
982 986
983 if (err) 987 if (err)
984 atomic_inc(&pkt->io_errors); 988 atomic_inc(&pkt->io_errors);
@@ -1026,8 +1030,9 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
1026 memset(written, 0, sizeof(written)); 1030 memset(written, 0, sizeof(written));
1027 spin_lock(&pkt->lock); 1031 spin_lock(&pkt->lock);
1028 bio_list_for_each(bio, &pkt->orig_bios) { 1032 bio_list_for_each(bio, &pkt->orig_bios) {
1029 int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9); 1033 int first_frame = (bio->bi_iter.bi_sector - pkt->sector) /
1030 int num_frames = bio->bi_size / CD_FRAMESIZE; 1034 (CD_FRAMESIZE >> 9);
1035 int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE;
1031 pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9); 1036 pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
1032 BUG_ON(first_frame < 0); 1037 BUG_ON(first_frame < 0);
1033 BUG_ON(first_frame + num_frames > pkt->frames); 1038 BUG_ON(first_frame + num_frames > pkt->frames);
@@ -1053,7 +1058,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
1053 1058
1054 bio = pkt->r_bios[f]; 1059 bio = pkt->r_bios[f];
1055 bio_reset(bio); 1060 bio_reset(bio);
1056 bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); 1061 bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
1057 bio->bi_bdev = pd->bdev; 1062 bio->bi_bdev = pd->bdev;
1058 bio->bi_end_io = pkt_end_io_read; 1063 bio->bi_end_io = pkt_end_io_read;
1059 bio->bi_private = pkt; 1064 bio->bi_private = pkt;
@@ -1150,8 +1155,8 @@ static int pkt_start_recovery(struct packet_data *pkt)
1150 bio_reset(pkt->bio); 1155 bio_reset(pkt->bio);
1151 pkt->bio->bi_bdev = pd->bdev; 1156 pkt->bio->bi_bdev = pd->bdev;
1152 pkt->bio->bi_rw = REQ_WRITE; 1157 pkt->bio->bi_rw = REQ_WRITE;
1153 pkt->bio->bi_sector = new_sector; 1158 pkt->bio->bi_iter.bi_sector = new_sector;
1154 pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE; 1159 pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
1155 pkt->bio->bi_vcnt = pkt->frames; 1160 pkt->bio->bi_vcnt = pkt->frames;
1156 1161
1157 pkt->bio->bi_end_io = pkt_end_io_packet_write; 1162 pkt->bio->bi_end_io = pkt_end_io_packet_write;
@@ -1213,7 +1218,7 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
1213 node = first_node; 1218 node = first_node;
1214 while (node) { 1219 while (node) {
1215 bio = node->bio; 1220 bio = node->bio;
1216 zone = get_zone(bio->bi_sector, pd); 1221 zone = get_zone(bio->bi_iter.bi_sector, pd);
1217 list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) { 1222 list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
1218 if (p->sector == zone) { 1223 if (p->sector == zone) {
1219 bio = NULL; 1224 bio = NULL;
@@ -1252,14 +1257,14 @@ try_next_bio:
1252 pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone); 1257 pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
1253 while ((node = pkt_rbtree_find(pd, zone)) != NULL) { 1258 while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
1254 bio = node->bio; 1259 bio = node->bio;
1255 pkt_dbg(2, pd, "found zone=%llx\n", 1260 pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
1256 (unsigned long long)get_zone(bio->bi_sector, pd)); 1261 get_zone(bio->bi_iter.bi_sector, pd));
1257 if (get_zone(bio->bi_sector, pd) != zone) 1262 if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
1258 break; 1263 break;
1259 pkt_rbtree_erase(pd, node); 1264 pkt_rbtree_erase(pd, node);
1260 spin_lock(&pkt->lock); 1265 spin_lock(&pkt->lock);
1261 bio_list_add(&pkt->orig_bios, bio); 1266 bio_list_add(&pkt->orig_bios, bio);
1262 pkt->write_size += bio->bi_size / CD_FRAMESIZE; 1267 pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE;
1263 spin_unlock(&pkt->lock); 1268 spin_unlock(&pkt->lock);
1264 } 1269 }
1265 /* check write congestion marks, and if bio_queue_size is 1270 /* check write congestion marks, and if bio_queue_size is
@@ -1293,7 +1298,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1293 struct bio_vec *bvec = pkt->w_bio->bi_io_vec; 1298 struct bio_vec *bvec = pkt->w_bio->bi_io_vec;
1294 1299
1295 bio_reset(pkt->w_bio); 1300 bio_reset(pkt->w_bio);
1296 pkt->w_bio->bi_sector = pkt->sector; 1301 pkt->w_bio->bi_iter.bi_sector = pkt->sector;
1297 pkt->w_bio->bi_bdev = pd->bdev; 1302 pkt->w_bio->bi_bdev = pd->bdev;
1298 pkt->w_bio->bi_end_io = pkt_end_io_packet_write; 1303 pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
1299 pkt->w_bio->bi_private = pkt; 1304 pkt->w_bio->bi_private = pkt;
@@ -2335,75 +2340,29 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
2335 pkt_bio_finished(pd); 2340 pkt_bio_finished(pd);
2336} 2341}
2337 2342
2338static void pkt_make_request(struct request_queue *q, struct bio *bio) 2343static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
2339{ 2344{
2340 struct pktcdvd_device *pd; 2345 struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
2341 char b[BDEVNAME_SIZE]; 2346 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
2347
2348 psd->pd = pd;
2349 psd->bio = bio;
2350 cloned_bio->bi_bdev = pd->bdev;
2351 cloned_bio->bi_private = psd;
2352 cloned_bio->bi_end_io = pkt_end_io_read_cloned;
2353 pd->stats.secs_r += bio_sectors(bio);
2354 pkt_queue_bio(pd, cloned_bio);
2355}
2356
2357static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
2358{
2359 struct pktcdvd_device *pd = q->queuedata;
2342 sector_t zone; 2360 sector_t zone;
2343 struct packet_data *pkt; 2361 struct packet_data *pkt;
2344 int was_empty, blocked_bio; 2362 int was_empty, blocked_bio;
2345 struct pkt_rb_node *node; 2363 struct pkt_rb_node *node;
2346 2364
2347 pd = q->queuedata; 2365 zone = get_zone(bio->bi_iter.bi_sector, pd);
2348 if (!pd) {
2349 pr_err("%s incorrect request queue\n",
2350 bdevname(bio->bi_bdev, b));
2351 goto end_io;
2352 }
2353
2354 /*
2355 * Clone READ bios so we can have our own bi_end_io callback.
2356 */
2357 if (bio_data_dir(bio) == READ) {
2358 struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
2359 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
2360
2361 psd->pd = pd;
2362 psd->bio = bio;
2363 cloned_bio->bi_bdev = pd->bdev;
2364 cloned_bio->bi_private = psd;
2365 cloned_bio->bi_end_io = pkt_end_io_read_cloned;
2366 pd->stats.secs_r += bio_sectors(bio);
2367 pkt_queue_bio(pd, cloned_bio);
2368 return;
2369 }
2370
2371 if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
2372 pkt_notice(pd, "WRITE for ro device (%llu)\n",
2373 (unsigned long long)bio->bi_sector);
2374 goto end_io;
2375 }
2376
2377 if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) {
2378 pkt_err(pd, "wrong bio size\n");
2379 goto end_io;
2380 }
2381
2382 blk_queue_bounce(q, &bio);
2383
2384 zone = get_zone(bio->bi_sector, pd);
2385 pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
2386 (unsigned long long)bio->bi_sector,
2387 (unsigned long long)bio_end_sector(bio));
2388
2389 /* Check if we have to split the bio */
2390 {
2391 struct bio_pair *bp;
2392 sector_t last_zone;
2393 int first_sectors;
2394
2395 last_zone = get_zone(bio_end_sector(bio) - 1, pd);
2396 if (last_zone != zone) {
2397 BUG_ON(last_zone != zone + pd->settings.size);
2398 first_sectors = last_zone - bio->bi_sector;
2399 bp = bio_split(bio, first_sectors);
2400 BUG_ON(!bp);
2401 pkt_make_request(q, &bp->bio1);
2402 pkt_make_request(q, &bp->bio2);
2403 bio_pair_release(bp);
2404 return;
2405 }
2406 }
2407 2366
2408 /* 2367 /*
2409 * If we find a matching packet in state WAITING or READ_WAIT, we can 2368 * If we find a matching packet in state WAITING or READ_WAIT, we can
@@ -2417,7 +2376,8 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2417 if ((pkt->state == PACKET_WAITING_STATE) || 2376 if ((pkt->state == PACKET_WAITING_STATE) ||
2418 (pkt->state == PACKET_READ_WAIT_STATE)) { 2377 (pkt->state == PACKET_READ_WAIT_STATE)) {
2419 bio_list_add(&pkt->orig_bios, bio); 2378 bio_list_add(&pkt->orig_bios, bio);
2420 pkt->write_size += bio->bi_size / CD_FRAMESIZE; 2379 pkt->write_size +=
2380 bio->bi_iter.bi_size / CD_FRAMESIZE;
2421 if ((pkt->write_size >= pkt->frames) && 2381 if ((pkt->write_size >= pkt->frames) &&
2422 (pkt->state == PACKET_WAITING_STATE)) { 2382 (pkt->state == PACKET_WAITING_STATE)) {
2423 atomic_inc(&pkt->run_sm); 2383 atomic_inc(&pkt->run_sm);
@@ -2476,6 +2436,64 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2476 */ 2436 */
2477 wake_up(&pd->wqueue); 2437 wake_up(&pd->wqueue);
2478 } 2438 }
2439}
2440
2441static void pkt_make_request(struct request_queue *q, struct bio *bio)
2442{
2443 struct pktcdvd_device *pd;
2444 char b[BDEVNAME_SIZE];
2445 struct bio *split;
2446
2447 pd = q->queuedata;
2448 if (!pd) {
2449 pr_err("%s incorrect request queue\n",
2450 bdevname(bio->bi_bdev, b));
2451 goto end_io;
2452 }
2453
2454 pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
2455 (unsigned long long)bio->bi_iter.bi_sector,
2456 (unsigned long long)bio_end_sector(bio));
2457
2458 /*
2459 * Clone READ bios so we can have our own bi_end_io callback.
2460 */
2461 if (bio_data_dir(bio) == READ) {
2462 pkt_make_request_read(pd, bio);
2463 return;
2464 }
2465
2466 if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
2467 pkt_notice(pd, "WRITE for ro device (%llu)\n",
2468 (unsigned long long)bio->bi_iter.bi_sector);
2469 goto end_io;
2470 }
2471
2472 if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
2473 pkt_err(pd, "wrong bio size\n");
2474 goto end_io;
2475 }
2476
2477 blk_queue_bounce(q, &bio);
2478
2479 do {
2480 sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
2481 sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
2482
2483 if (last_zone != zone) {
2484 BUG_ON(last_zone != zone + pd->settings.size);
2485
2486 split = bio_split(bio, last_zone -
2487 bio->bi_iter.bi_sector,
2488 GFP_NOIO, fs_bio_set);
2489 bio_chain(split, bio);
2490 } else {
2491 split = bio;
2492 }
2493
2494 pkt_make_request_write(q, split);
2495 } while (split != bio);
2496
2479 return; 2497 return;
2480end_io: 2498end_io:
2481 bio_io_error(bio); 2499 bio_io_error(bio);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index d754a88d7585..c120d70d3fb3 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -94,26 +94,25 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev,
94{ 94{
95 unsigned int offset = 0; 95 unsigned int offset = 0;
96 struct req_iterator iter; 96 struct req_iterator iter;
97 struct bio_vec *bvec; 97 struct bio_vec bvec;
98 unsigned int i = 0; 98 unsigned int i = 0;
99 size_t size; 99 size_t size;
100 void *buf; 100 void *buf;
101 101
102 rq_for_each_segment(bvec, req, iter) { 102 rq_for_each_segment(bvec, req, iter) {
103 unsigned long flags; 103 unsigned long flags;
104 dev_dbg(&dev->sbd.core, 104 dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %lu\n",
105 "%s:%u: bio %u: %u segs %u sectors from %lu\n", 105 __func__, __LINE__, i, bio_sectors(iter.bio),
106 __func__, __LINE__, i, bio_segments(iter.bio), 106 iter.bio->bi_iter.bi_sector);
107 bio_sectors(iter.bio), iter.bio->bi_sector);
108 107
109 size = bvec->bv_len; 108 size = bvec.bv_len;
110 buf = bvec_kmap_irq(bvec, &flags); 109 buf = bvec_kmap_irq(&bvec, &flags);
111 if (gather) 110 if (gather)
112 memcpy(dev->bounce_buf+offset, buf, size); 111 memcpy(dev->bounce_buf+offset, buf, size);
113 else 112 else
114 memcpy(buf, dev->bounce_buf+offset, size); 113 memcpy(buf, dev->bounce_buf+offset, size);
115 offset += size; 114 offset += size;
116 flush_kernel_dcache_page(bvec->bv_page); 115 flush_kernel_dcache_page(bvec.bv_page);
117 bvec_kunmap_irq(buf, &flags); 116 bvec_kunmap_irq(buf, &flags);
118 i++; 117 i++;
119 } 118 }
@@ -130,7 +129,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
130 129
131#ifdef DEBUG 130#ifdef DEBUG
132 unsigned int n = 0; 131 unsigned int n = 0;
133 struct bio_vec *bv; 132 struct bio_vec bv;
134 struct req_iterator iter; 133 struct req_iterator iter;
135 134
136 rq_for_each_segment(bv, req, iter) 135 rq_for_each_segment(bv, req, iter)
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 06a2e53e5f37..ef45cfb98fd2 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -553,16 +553,16 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
553 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); 553 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
554 int write = bio_data_dir(bio) == WRITE; 554 int write = bio_data_dir(bio) == WRITE;
555 const char *op = write ? "write" : "read"; 555 const char *op = write ? "write" : "read";
556 loff_t offset = bio->bi_sector << 9; 556 loff_t offset = bio->bi_iter.bi_sector << 9;
557 int error = 0; 557 int error = 0;
558 struct bio_vec *bvec; 558 struct bio_vec bvec;
559 unsigned int i; 559 struct bvec_iter iter;
560 struct bio *next; 560 struct bio *next;
561 561
562 bio_for_each_segment(bvec, bio, i) { 562 bio_for_each_segment(bvec, bio, iter) {
563 /* PS3 is ppc64, so we don't handle highmem */ 563 /* PS3 is ppc64, so we don't handle highmem */
564 char *ptr = page_address(bvec->bv_page) + bvec->bv_offset; 564 char *ptr = page_address(bvec.bv_page) + bvec.bv_offset;
565 size_t len = bvec->bv_len, retlen; 565 size_t len = bvec.bv_len, retlen;
566 566
567 dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op, 567 dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op,
568 len, offset); 568 len, offset);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 16cab6635163..b365e0dfccb6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1156,23 +1156,23 @@ static void bio_chain_put(struct bio *chain)
1156 */ 1156 */
1157static void zero_bio_chain(struct bio *chain, int start_ofs) 1157static void zero_bio_chain(struct bio *chain, int start_ofs)
1158{ 1158{
1159 struct bio_vec *bv; 1159 struct bio_vec bv;
1160 struct bvec_iter iter;
1160 unsigned long flags; 1161 unsigned long flags;
1161 void *buf; 1162 void *buf;
1162 int i;
1163 int pos = 0; 1163 int pos = 0;
1164 1164
1165 while (chain) { 1165 while (chain) {
1166 bio_for_each_segment(bv, chain, i) { 1166 bio_for_each_segment(bv, chain, iter) {
1167 if (pos + bv->bv_len > start_ofs) { 1167 if (pos + bv.bv_len > start_ofs) {
1168 int remainder = max(start_ofs - pos, 0); 1168 int remainder = max(start_ofs - pos, 0);
1169 buf = bvec_kmap_irq(bv, &flags); 1169 buf = bvec_kmap_irq(&bv, &flags);
1170 memset(buf + remainder, 0, 1170 memset(buf + remainder, 0,
1171 bv->bv_len - remainder); 1171 bv.bv_len - remainder);
1172 flush_dcache_page(bv->bv_page); 1172 flush_dcache_page(bv.bv_page);
1173 bvec_kunmap_irq(buf, &flags); 1173 bvec_kunmap_irq(buf, &flags);
1174 } 1174 }
1175 pos += bv->bv_len; 1175 pos += bv.bv_len;
1176 } 1176 }
1177 1177
1178 chain = chain->bi_next; 1178 chain = chain->bi_next;
@@ -1220,74 +1220,14 @@ static struct bio *bio_clone_range(struct bio *bio_src,
1220 unsigned int len, 1220 unsigned int len,
1221 gfp_t gfpmask) 1221 gfp_t gfpmask)
1222{ 1222{
1223 struct bio_vec *bv;
1224 unsigned int resid;
1225 unsigned short idx;
1226 unsigned int voff;
1227 unsigned short end_idx;
1228 unsigned short vcnt;
1229 struct bio *bio; 1223 struct bio *bio;
1230 1224
1231 /* Handle the easy case for the caller */ 1225 bio = bio_clone(bio_src, gfpmask);
1232
1233 if (!offset && len == bio_src->bi_size)
1234 return bio_clone(bio_src, gfpmask);
1235
1236 if (WARN_ON_ONCE(!len))
1237 return NULL;
1238 if (WARN_ON_ONCE(len > bio_src->bi_size))
1239 return NULL;
1240 if (WARN_ON_ONCE(offset > bio_src->bi_size - len))
1241 return NULL;
1242
1243 /* Find first affected segment... */
1244
1245 resid = offset;
1246 bio_for_each_segment(bv, bio_src, idx) {
1247 if (resid < bv->bv_len)
1248 break;
1249 resid -= bv->bv_len;
1250 }
1251 voff = resid;
1252
1253 /* ...and the last affected segment */
1254
1255 resid += len;
1256 __bio_for_each_segment(bv, bio_src, end_idx, idx) {
1257 if (resid <= bv->bv_len)
1258 break;
1259 resid -= bv->bv_len;
1260 }
1261 vcnt = end_idx - idx + 1;
1262
1263 /* Build the clone */
1264
1265 bio = bio_alloc(gfpmask, (unsigned int) vcnt);
1266 if (!bio) 1226 if (!bio)
1267 return NULL; /* ENOMEM */ 1227 return NULL; /* ENOMEM */
1268 1228
1269 bio->bi_bdev = bio_src->bi_bdev; 1229 bio_advance(bio, offset);
1270 bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT); 1230 bio->bi_iter.bi_size = len;
1271 bio->bi_rw = bio_src->bi_rw;
1272 bio->bi_flags |= 1 << BIO_CLONED;
1273
1274 /*
1275 * Copy over our part of the bio_vec, then update the first
1276 * and last (or only) entries.
1277 */
1278 memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx],
1279 vcnt * sizeof (struct bio_vec));
1280 bio->bi_io_vec[0].bv_offset += voff;
1281 if (vcnt > 1) {
1282 bio->bi_io_vec[0].bv_len -= voff;
1283 bio->bi_io_vec[vcnt - 1].bv_len = resid;
1284 } else {
1285 bio->bi_io_vec[0].bv_len = len;
1286 }
1287
1288 bio->bi_vcnt = vcnt;
1289 bio->bi_size = len;
1290 bio->bi_idx = 0;
1291 1231
1292 return bio; 1232 return bio;
1293} 1233}
@@ -1318,7 +1258,7 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
1318 1258
1319 /* Build up a chain of clone bios up to the limit */ 1259 /* Build up a chain of clone bios up to the limit */
1320 1260
1321 if (!bi || off >= bi->bi_size || !len) 1261 if (!bi || off >= bi->bi_iter.bi_size || !len)
1322 return NULL; /* Nothing to clone */ 1262 return NULL; /* Nothing to clone */
1323 1263
1324 end = &chain; 1264 end = &chain;
@@ -1330,7 +1270,7 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
1330 rbd_warn(NULL, "bio_chain exhausted with %u left", len); 1270 rbd_warn(NULL, "bio_chain exhausted with %u left", len);
1331 goto out_err; /* EINVAL; ran out of bio's */ 1271 goto out_err; /* EINVAL; ran out of bio's */
1332 } 1272 }
1333 bi_size = min_t(unsigned int, bi->bi_size - off, len); 1273 bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len);
1334 bio = bio_clone_range(bi, off, bi_size, gfpmask); 1274 bio = bio_clone_range(bi, off, bi_size, gfpmask);
1335 if (!bio) 1275 if (!bio)
1336 goto out_err; /* ENOMEM */ 1276 goto out_err; /* ENOMEM */
@@ -1339,7 +1279,7 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
1339 end = &bio->bi_next; 1279 end = &bio->bi_next;
1340 1280
1341 off += bi_size; 1281 off += bi_size;
1342 if (off == bi->bi_size) { 1282 if (off == bi->bi_iter.bi_size) {
1343 bi = bi->bi_next; 1283 bi = bi->bi_next;
1344 off = 0; 1284 off = 0;
1345 } 1285 }
@@ -2227,7 +2167,8 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2227 2167
2228 if (type == OBJ_REQUEST_BIO) { 2168 if (type == OBJ_REQUEST_BIO) {
2229 bio_list = data_desc; 2169 bio_list = data_desc;
2230 rbd_assert(img_offset == bio_list->bi_sector << SECTOR_SHIFT); 2170 rbd_assert(img_offset ==
2171 bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
2231 } else { 2172 } else {
2232 rbd_assert(type == OBJ_REQUEST_PAGES); 2173 rbd_assert(type == OBJ_REQUEST_PAGES);
2233 pages = data_desc; 2174 pages = data_desc;
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 2284f5d3a54a..2839d37e5af7 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -174,7 +174,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
174 if (!card) 174 if (!card)
175 goto req_err; 175 goto req_err;
176 176
177 if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk)) 177 if (bio_end_sector(bio) > get_capacity(card->gendisk))
178 goto req_err; 178 goto req_err;
179 179
180 if (unlikely(card->halt)) { 180 if (unlikely(card->halt)) {
@@ -187,7 +187,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
187 goto req_err; 187 goto req_err;
188 } 188 }
189 189
190 if (bio->bi_size == 0) { 190 if (bio->bi_iter.bi_size == 0) {
191 dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); 191 dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
192 goto req_err; 192 goto req_err;
193 } 193 }
@@ -208,7 +208,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
208 208
209 dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", 209 dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
210 bio_data_dir(bio) ? 'W' : 'R', bio_meta, 210 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
211 (u64)bio->bi_sector << 9, bio->bi_size); 211 (u64)bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
212 212
213 st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas, 213 st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas,
214 bio_dma_done_cb, bio_meta); 214 bio_dma_done_cb, bio_meta);
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index fc88ba3e1bd2..cf8cd293abb5 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -684,7 +684,8 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
684 void *cb_data) 684 void *cb_data)
685{ 685{
686 struct list_head dma_list[RSXX_MAX_TARGETS]; 686 struct list_head dma_list[RSXX_MAX_TARGETS];
687 struct bio_vec *bvec; 687 struct bio_vec bvec;
688 struct bvec_iter iter;
688 unsigned long long addr8; 689 unsigned long long addr8;
689 unsigned int laddr; 690 unsigned int laddr;
690 unsigned int bv_len; 691 unsigned int bv_len;
@@ -696,7 +697,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
696 int st; 697 int st;
697 int i; 698 int i;
698 699
699 addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */ 700 addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
700 atomic_set(n_dmas, 0); 701 atomic_set(n_dmas, 0);
701 702
702 for (i = 0; i < card->n_targets; i++) { 703 for (i = 0; i < card->n_targets; i++) {
@@ -705,7 +706,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
705 } 706 }
706 707
707 if (bio->bi_rw & REQ_DISCARD) { 708 if (bio->bi_rw & REQ_DISCARD) {
708 bv_len = bio->bi_size; 709 bv_len = bio->bi_iter.bi_size;
709 710
710 while (bv_len > 0) { 711 while (bv_len > 0) {
711 tgt = rsxx_get_dma_tgt(card, addr8); 712 tgt = rsxx_get_dma_tgt(card, addr8);
@@ -722,9 +723,9 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
722 bv_len -= RSXX_HW_BLK_SIZE; 723 bv_len -= RSXX_HW_BLK_SIZE;
723 } 724 }
724 } else { 725 } else {
725 bio_for_each_segment(bvec, bio, i) { 726 bio_for_each_segment(bvec, bio, iter) {
726 bv_len = bvec->bv_len; 727 bv_len = bvec.bv_len;
727 bv_off = bvec->bv_offset; 728 bv_off = bvec.bv_offset;
728 729
729 while (bv_len > 0) { 730 while (bv_len > 0) {
730 tgt = rsxx_get_dma_tgt(card, addr8); 731 tgt = rsxx_get_dma_tgt(card, addr8);
@@ -736,7 +737,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
736 st = rsxx_queue_dma(card, &dma_list[tgt], 737 st = rsxx_queue_dma(card, &dma_list[tgt],
737 bio_data_dir(bio), 738 bio_data_dir(bio),
738 dma_off, dma_len, 739 dma_off, dma_len,
739 laddr, bvec->bv_page, 740 laddr, bvec.bv_page,
740 bv_off, cb, cb_data); 741 bv_off, cb, cb_data);
741 if (st) 742 if (st)
742 goto bvec_err; 743 goto bvec_err;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 3fb6ab4c8b4e..d5e2d12b9d9e 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1744,20 +1744,6 @@ static void carm_remove_one (struct pci_dev *pdev)
1744 kfree(host); 1744 kfree(host);
1745 pci_release_regions(pdev); 1745 pci_release_regions(pdev);
1746 pci_disable_device(pdev); 1746 pci_disable_device(pdev);
1747 pci_set_drvdata(pdev, NULL);
1748} 1747}
1749 1748
1750static int __init carm_init(void) 1749module_pci_driver(carm_driver);
1751{
1752 return pci_register_driver(&carm_driver);
1753}
1754
1755static void __exit carm_exit(void)
1756{
1757 pci_unregister_driver(&carm_driver);
1758}
1759
1760module_init(carm_init);
1761module_exit(carm_exit);
1762
1763
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index ad70868f8a96..4cf81b5bf0f7 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -108,8 +108,7 @@ struct cardinfo {
108 * have been written 108 * have been written
109 */ 109 */
110 struct bio *bio, *currentbio, **biotail; 110 struct bio *bio, *currentbio, **biotail;
111 int current_idx; 111 struct bvec_iter current_iter;
112 sector_t current_sector;
113 112
114 struct request_queue *queue; 113 struct request_queue *queue;
115 114
@@ -118,7 +117,7 @@ struct cardinfo {
118 struct mm_dma_desc *desc; 117 struct mm_dma_desc *desc;
119 int cnt, headcnt; 118 int cnt, headcnt;
120 struct bio *bio, **biotail; 119 struct bio *bio, **biotail;
121 int idx; 120 struct bvec_iter iter;
122 } mm_pages[2]; 121 } mm_pages[2];
123#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc)) 122#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc))
124 123
@@ -344,16 +343,13 @@ static int add_bio(struct cardinfo *card)
344 dma_addr_t dma_handle; 343 dma_addr_t dma_handle;
345 int offset; 344 int offset;
346 struct bio *bio; 345 struct bio *bio;
347 struct bio_vec *vec; 346 struct bio_vec vec;
348 int idx;
349 int rw; 347 int rw;
350 int len;
351 348
352 bio = card->currentbio; 349 bio = card->currentbio;
353 if (!bio && card->bio) { 350 if (!bio && card->bio) {
354 card->currentbio = card->bio; 351 card->currentbio = card->bio;
355 card->current_idx = card->bio->bi_idx; 352 card->current_iter = card->bio->bi_iter;
356 card->current_sector = card->bio->bi_sector;
357 card->bio = card->bio->bi_next; 353 card->bio = card->bio->bi_next;
358 if (card->bio == NULL) 354 if (card->bio == NULL)
359 card->biotail = &card->bio; 355 card->biotail = &card->bio;
@@ -362,18 +358,17 @@ static int add_bio(struct cardinfo *card)
362 } 358 }
363 if (!bio) 359 if (!bio)
364 return 0; 360 return 0;
365 idx = card->current_idx;
366 361
367 rw = bio_rw(bio); 362 rw = bio_rw(bio);
368 if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE) 363 if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
369 return 0; 364 return 0;
370 365
371 vec = bio_iovec_idx(bio, idx); 366 vec = bio_iter_iovec(bio, card->current_iter);
372 len = vec->bv_len; 367
373 dma_handle = pci_map_page(card->dev, 368 dma_handle = pci_map_page(card->dev,
374 vec->bv_page, 369 vec.bv_page,
375 vec->bv_offset, 370 vec.bv_offset,
376 len, 371 vec.bv_len,
377 (rw == READ) ? 372 (rw == READ) ?
378 PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); 373 PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
379 374
@@ -381,7 +376,7 @@ static int add_bio(struct cardinfo *card)
381 desc = &p->desc[p->cnt]; 376 desc = &p->desc[p->cnt];
382 p->cnt++; 377 p->cnt++;
383 if (p->bio == NULL) 378 if (p->bio == NULL)
384 p->idx = idx; 379 p->iter = card->current_iter;
385 if ((p->biotail) != &bio->bi_next) { 380 if ((p->biotail) != &bio->bi_next) {
386 *(p->biotail) = bio; 381 *(p->biotail) = bio;
387 p->biotail = &(bio->bi_next); 382 p->biotail = &(bio->bi_next);
@@ -391,8 +386,8 @@ static int add_bio(struct cardinfo *card)
391 desc->data_dma_handle = dma_handle; 386 desc->data_dma_handle = dma_handle;
392 387
393 desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle); 388 desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle);
394 desc->local_addr = cpu_to_le64(card->current_sector << 9); 389 desc->local_addr = cpu_to_le64(card->current_iter.bi_sector << 9);
395 desc->transfer_size = cpu_to_le32(len); 390 desc->transfer_size = cpu_to_le32(vec.bv_len);
396 offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc)); 391 offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc));
397 desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset)); 392 desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset));
398 desc->zero1 = desc->zero2 = 0; 393 desc->zero1 = desc->zero2 = 0;
@@ -407,10 +402,9 @@ static int add_bio(struct cardinfo *card)
407 desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ); 402 desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
408 desc->sem_control_bits = desc->control_bits; 403 desc->sem_control_bits = desc->control_bits;
409 404
410 card->current_sector += (len >> 9); 405
411 idx++; 406 bio_advance_iter(bio, &card->current_iter, vec.bv_len);
412 card->current_idx = idx; 407 if (!card->current_iter.bi_size)
413 if (idx >= bio->bi_vcnt)
414 card->currentbio = NULL; 408 card->currentbio = NULL;
415 409
416 return 1; 410 return 1;
@@ -439,23 +433,25 @@ static void process_page(unsigned long data)
439 struct mm_dma_desc *desc = &page->desc[page->headcnt]; 433 struct mm_dma_desc *desc = &page->desc[page->headcnt];
440 int control = le32_to_cpu(desc->sem_control_bits); 434 int control = le32_to_cpu(desc->sem_control_bits);
441 int last = 0; 435 int last = 0;
442 int idx; 436 struct bio_vec vec;
443 437
444 if (!(control & DMASCR_DMA_COMPLETE)) { 438 if (!(control & DMASCR_DMA_COMPLETE)) {
445 control = dma_status; 439 control = dma_status;
446 last = 1; 440 last = 1;
447 } 441 }
442
448 page->headcnt++; 443 page->headcnt++;
449 idx = page->idx; 444 vec = bio_iter_iovec(bio, page->iter);
450 page->idx++; 445 bio_advance_iter(bio, &page->iter, vec.bv_len);
451 if (page->idx >= bio->bi_vcnt) { 446
447 if (!page->iter.bi_size) {
452 page->bio = bio->bi_next; 448 page->bio = bio->bi_next;
453 if (page->bio) 449 if (page->bio)
454 page->idx = page->bio->bi_idx; 450 page->iter = page->bio->bi_iter;
455 } 451 }
456 452
457 pci_unmap_page(card->dev, desc->data_dma_handle, 453 pci_unmap_page(card->dev, desc->data_dma_handle,
458 bio_iovec_idx(bio, idx)->bv_len, 454 vec.bv_len,
459 (control & DMASCR_TRANSFER_READ) ? 455 (control & DMASCR_TRANSFER_READ) ?
460 PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); 456 PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
461 if (control & DMASCR_HARD_ERROR) { 457 if (control & DMASCR_HARD_ERROR) {
@@ -532,7 +528,8 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
532{ 528{
533 struct cardinfo *card = q->queuedata; 529 struct cardinfo *card = q->queuedata;
534 pr_debug("mm_make_request %llu %u\n", 530 pr_debug("mm_make_request %llu %u\n",
535 (unsigned long long)bio->bi_sector, bio->bi_size); 531 (unsigned long long)bio->bi_iter.bi_sector,
532 bio->bi_iter.bi_size);
536 533
537 spin_lock_irq(&card->lock); 534 spin_lock_irq(&card->lock);
538 *card->biotail = bio; 535 *card->biotail = bio;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 6620b73d0490..4b97b86da926 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1257,7 +1257,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
1257 bio->bi_bdev = preq.bdev; 1257 bio->bi_bdev = preq.bdev;
1258 bio->bi_private = pending_req; 1258 bio->bi_private = pending_req;
1259 bio->bi_end_io = end_block_io_op; 1259 bio->bi_end_io = end_block_io_op;
1260 bio->bi_sector = preq.sector_number; 1260 bio->bi_iter.bi_sector = preq.sector_number;
1261 } 1261 }
1262 1262
1263 preq.sector_number += seg[i].nsec; 1263 preq.sector_number += seg[i].nsec;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index f9c43f91f03e..8dcfb54f1603 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1547,7 +1547,7 @@ static int blkif_recover(struct blkfront_info *info)
1547 for (i = 0; i < pending; i++) { 1547 for (i = 0; i < pending; i++) {
1548 offset = (i * segs * PAGE_SIZE) >> 9; 1548 offset = (i * segs * PAGE_SIZE) >> 9;
1549 size = min((unsigned int)(segs * PAGE_SIZE) >> 9, 1549 size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
1550 (unsigned int)(bio->bi_size >> 9) - offset); 1550 (unsigned int)bio_sectors(bio) - offset);
1551 cloned_bio = bio_clone(bio, GFP_NOIO); 1551 cloned_bio = bio_clone(bio, GFP_NOIO);
1552 BUG_ON(cloned_bio == NULL); 1552 BUG_ON(cloned_bio == NULL);
1553 bio_trim(cloned_bio, offset, size); 1553 bio_trim(cloned_bio, offset, size);
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 5980cb9af857..51e75ad96422 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -561,11 +561,11 @@ static int gdrom_set_interrupt_handlers(void)
561 int err; 561 int err;
562 562
563 err = request_irq(HW_EVENT_GDROM_CMD, gdrom_command_interrupt, 563 err = request_irq(HW_EVENT_GDROM_CMD, gdrom_command_interrupt,
564 IRQF_DISABLED, "gdrom_command", &gd); 564 0, "gdrom_command", &gd);
565 if (err) 565 if (err)
566 return err; 566 return err;
567 err = request_irq(HW_EVENT_GDROM_DMA, gdrom_dma_interrupt, 567 err = request_irq(HW_EVENT_GDROM_DMA, gdrom_dma_interrupt,
568 IRQF_DISABLED, "gdrom_dma", &gd); 568 0, "gdrom_dma", &gd);
569 if (err) 569 if (err)
570 free_irq(HW_EVENT_GDROM_CMD, &gd); 570 free_irq(HW_EVENT_GDROM_CMD, &gd);
571 return err; 571 return err;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 671c3852d359..03f41896d090 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2724,6 +2724,7 @@ static struct platform_driver ipmi_driver = {
2724static int ipmi_parisc_probe(struct parisc_device *dev) 2724static int ipmi_parisc_probe(struct parisc_device *dev)
2725{ 2725{
2726 struct smi_info *info; 2726 struct smi_info *info;
2727 int rv;
2727 2728
2728 info = smi_info_alloc(); 2729 info = smi_info_alloc();
2729 2730
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index 0e9c82523be6..c488b846f831 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -1,7 +1,8 @@
1 1
2obj-$(CONFIG_BCACHE) += bcache.o 2obj-$(CONFIG_BCACHE) += bcache.o
3 3
4bcache-y := alloc.o btree.o bset.o io.o journal.o writeback.o\ 4bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
5 movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o 5 io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
6 util.o writeback.o
6 7
7CFLAGS_request.o += -Iblock 8CFLAGS_request.o += -Iblock
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 4c9852d92b0a..c0d37d082443 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -132,10 +132,16 @@ bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
132{ 132{
133 BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b)); 133 BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));
134 134
135 if (fifo_used(&ca->free) > ca->watermark[WATERMARK_MOVINGGC] && 135 if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
136 CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) 136 unsigned i;
137 return false; 137
138 for (i = 0; i < RESERVE_NONE; i++)
139 if (!fifo_full(&ca->free[i]))
140 goto add;
138 141
142 return false;
143 }
144add:
139 b->prio = 0; 145 b->prio = 0;
140 146
141 if (can_inc_bucket_gen(b) && 147 if (can_inc_bucket_gen(b) &&
@@ -162,8 +168,21 @@ static void invalidate_one_bucket(struct cache *ca, struct bucket *b)
162 fifo_push(&ca->free_inc, b - ca->buckets); 168 fifo_push(&ca->free_inc, b - ca->buckets);
163} 169}
164 170
165#define bucket_prio(b) \ 171/*
166 (((unsigned) (b->prio - ca->set->min_prio)) * GC_SECTORS_USED(b)) 172 * Determines what order we're going to reuse buckets, smallest bucket_prio()
173 * first: we also take into account the number of sectors of live data in that
174 * bucket, and in order for that multiply to make sense we have to scale bucket
175 *
176 * Thus, we scale the bucket priorities so that the bucket with the smallest
177 * prio is worth 1/8th of what INITIAL_PRIO is worth.
178 */
179
180#define bucket_prio(b) \
181({ \
182 unsigned min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
183 \
184 (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
185})
167 186
168#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) 187#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r))
169#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) 188#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r))
@@ -304,6 +323,21 @@ do { \
304 __set_current_state(TASK_RUNNING); \ 323 __set_current_state(TASK_RUNNING); \
305} while (0) 324} while (0)
306 325
326static int bch_allocator_push(struct cache *ca, long bucket)
327{
328 unsigned i;
329
330 /* Prios/gens are actually the most important reserve */
331 if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
332 return true;
333
334 for (i = 0; i < RESERVE_NR; i++)
335 if (fifo_push(&ca->free[i], bucket))
336 return true;
337
338 return false;
339}
340
307static int bch_allocator_thread(void *arg) 341static int bch_allocator_thread(void *arg)
308{ 342{
309 struct cache *ca = arg; 343 struct cache *ca = arg;
@@ -336,9 +370,7 @@ static int bch_allocator_thread(void *arg)
336 mutex_lock(&ca->set->bucket_lock); 370 mutex_lock(&ca->set->bucket_lock);
337 } 371 }
338 372
339 allocator_wait(ca, !fifo_full(&ca->free)); 373 allocator_wait(ca, bch_allocator_push(ca, bucket));
340
341 fifo_push(&ca->free, bucket);
342 wake_up(&ca->set->bucket_wait); 374 wake_up(&ca->set->bucket_wait);
343 } 375 }
344 376
@@ -365,34 +397,29 @@ static int bch_allocator_thread(void *arg)
365 } 397 }
366} 398}
367 399
368long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait) 400long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
369{ 401{
370 DEFINE_WAIT(w); 402 DEFINE_WAIT(w);
371 struct bucket *b; 403 struct bucket *b;
372 long r; 404 long r;
373 405
374 /* fastpath */ 406 /* fastpath */
375 if (fifo_used(&ca->free) > ca->watermark[watermark]) { 407 if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
376 fifo_pop(&ca->free, r); 408 fifo_pop(&ca->free[reserve], r))
377 goto out; 409 goto out;
378 }
379 410
380 if (!wait) 411 if (!wait)
381 return -1; 412 return -1;
382 413
383 while (1) { 414 do {
384 if (fifo_used(&ca->free) > ca->watermark[watermark]) {
385 fifo_pop(&ca->free, r);
386 break;
387 }
388
389 prepare_to_wait(&ca->set->bucket_wait, &w, 415 prepare_to_wait(&ca->set->bucket_wait, &w,
390 TASK_UNINTERRUPTIBLE); 416 TASK_UNINTERRUPTIBLE);
391 417
392 mutex_unlock(&ca->set->bucket_lock); 418 mutex_unlock(&ca->set->bucket_lock);
393 schedule(); 419 schedule();
394 mutex_lock(&ca->set->bucket_lock); 420 mutex_lock(&ca->set->bucket_lock);
395 } 421 } while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
422 !fifo_pop(&ca->free[reserve], r));
396 423
397 finish_wait(&ca->set->bucket_wait, &w); 424 finish_wait(&ca->set->bucket_wait, &w);
398out: 425out:
@@ -401,12 +428,14 @@ out:
401 if (expensive_debug_checks(ca->set)) { 428 if (expensive_debug_checks(ca->set)) {
402 size_t iter; 429 size_t iter;
403 long i; 430 long i;
431 unsigned j;
404 432
405 for (iter = 0; iter < prio_buckets(ca) * 2; iter++) 433 for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
406 BUG_ON(ca->prio_buckets[iter] == (uint64_t) r); 434 BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
407 435
408 fifo_for_each(i, &ca->free, iter) 436 for (j = 0; j < RESERVE_NR; j++)
409 BUG_ON(i == r); 437 fifo_for_each(i, &ca->free[j], iter)
438 BUG_ON(i == r);
410 fifo_for_each(i, &ca->free_inc, iter) 439 fifo_for_each(i, &ca->free_inc, iter)
411 BUG_ON(i == r); 440 BUG_ON(i == r);
412 fifo_for_each(i, &ca->unused, iter) 441 fifo_for_each(i, &ca->unused, iter)
@@ -419,7 +448,7 @@ out:
419 448
420 SET_GC_SECTORS_USED(b, ca->sb.bucket_size); 449 SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
421 450
422 if (watermark <= WATERMARK_METADATA) { 451 if (reserve <= RESERVE_PRIO) {
423 SET_GC_MARK(b, GC_MARK_METADATA); 452 SET_GC_MARK(b, GC_MARK_METADATA);
424 SET_GC_MOVE(b, 0); 453 SET_GC_MOVE(b, 0);
425 b->prio = BTREE_PRIO; 454 b->prio = BTREE_PRIO;
@@ -445,7 +474,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
445 } 474 }
446} 475}
447 476
448int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, 477int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
449 struct bkey *k, int n, bool wait) 478 struct bkey *k, int n, bool wait)
450{ 479{
451 int i; 480 int i;
@@ -459,7 +488,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
459 488
460 for (i = 0; i < n; i++) { 489 for (i = 0; i < n; i++) {
461 struct cache *ca = c->cache_by_alloc[i]; 490 struct cache *ca = c->cache_by_alloc[i];
462 long b = bch_bucket_alloc(ca, watermark, wait); 491 long b = bch_bucket_alloc(ca, reserve, wait);
463 492
464 if (b == -1) 493 if (b == -1)
465 goto err; 494 goto err;
@@ -478,12 +507,12 @@ err:
478 return -1; 507 return -1;
479} 508}
480 509
481int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, 510int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
482 struct bkey *k, int n, bool wait) 511 struct bkey *k, int n, bool wait)
483{ 512{
484 int ret; 513 int ret;
485 mutex_lock(&c->bucket_lock); 514 mutex_lock(&c->bucket_lock);
486 ret = __bch_bucket_alloc_set(c, watermark, k, n, wait); 515 ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
487 mutex_unlock(&c->bucket_lock); 516 mutex_unlock(&c->bucket_lock);
488 return ret; 517 return ret;
489} 518}
@@ -573,8 +602,8 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
573 602
574 while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) { 603 while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
575 unsigned watermark = write_prio 604 unsigned watermark = write_prio
576 ? WATERMARK_MOVINGGC 605 ? RESERVE_MOVINGGC
577 : WATERMARK_NONE; 606 : RESERVE_NONE;
578 607
579 spin_unlock(&c->data_bucket_lock); 608 spin_unlock(&c->data_bucket_lock);
580 609
@@ -689,7 +718,7 @@ int bch_cache_allocator_init(struct cache *ca)
689 * Then 8 for btree allocations 718 * Then 8 for btree allocations
690 * Then half for the moving garbage collector 719 * Then half for the moving garbage collector
691 */ 720 */
692 721#if 0
693 ca->watermark[WATERMARK_PRIO] = 0; 722 ca->watermark[WATERMARK_PRIO] = 0;
694 723
695 ca->watermark[WATERMARK_METADATA] = prio_buckets(ca); 724 ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
@@ -699,6 +728,6 @@ int bch_cache_allocator_init(struct cache *ca)
699 728
700 ca->watermark[WATERMARK_NONE] = ca->free.size / 2 + 729 ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
701 ca->watermark[WATERMARK_MOVINGGC]; 730 ca->watermark[WATERMARK_MOVINGGC];
702 731#endif
703 return 0; 732 return 0;
704} 733}
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 754f43177483..0c707e4f4eaf 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -187,6 +187,7 @@
187#include <linux/types.h> 187#include <linux/types.h>
188#include <linux/workqueue.h> 188#include <linux/workqueue.h>
189 189
190#include "bset.h"
190#include "util.h" 191#include "util.h"
191#include "closure.h" 192#include "closure.h"
192 193
@@ -280,7 +281,6 @@ struct bcache_device {
280 unsigned long sectors_dirty_last; 281 unsigned long sectors_dirty_last;
281 long sectors_dirty_derivative; 282 long sectors_dirty_derivative;
282 283
283 mempool_t *unaligned_bvec;
284 struct bio_set *bio_split; 284 struct bio_set *bio_split;
285 285
286 unsigned data_csum:1; 286 unsigned data_csum:1;
@@ -310,7 +310,8 @@ struct cached_dev {
310 struct cache_sb sb; 310 struct cache_sb sb;
311 struct bio sb_bio; 311 struct bio sb_bio;
312 struct bio_vec sb_bv[1]; 312 struct bio_vec sb_bv[1];
313 struct closure_with_waitlist sb_write; 313 struct closure sb_write;
314 struct semaphore sb_write_mutex;
314 315
315 /* Refcount on the cache set. Always nonzero when we're caching. */ 316 /* Refcount on the cache set. Always nonzero when we're caching. */
316 atomic_t count; 317 atomic_t count;
@@ -383,12 +384,12 @@ struct cached_dev {
383 unsigned writeback_rate_p_term_inverse; 384 unsigned writeback_rate_p_term_inverse;
384}; 385};
385 386
386enum alloc_watermarks { 387enum alloc_reserve {
387 WATERMARK_PRIO, 388 RESERVE_BTREE,
388 WATERMARK_METADATA, 389 RESERVE_PRIO,
389 WATERMARK_MOVINGGC, 390 RESERVE_MOVINGGC,
390 WATERMARK_NONE, 391 RESERVE_NONE,
391 WATERMARK_MAX 392 RESERVE_NR,
392}; 393};
393 394
394struct cache { 395struct cache {
@@ -400,8 +401,6 @@ struct cache {
400 struct kobject kobj; 401 struct kobject kobj;
401 struct block_device *bdev; 402 struct block_device *bdev;
402 403
403 unsigned watermark[WATERMARK_MAX];
404
405 struct task_struct *alloc_thread; 404 struct task_struct *alloc_thread;
406 405
407 struct closure prio; 406 struct closure prio;
@@ -430,7 +429,7 @@ struct cache {
430 * because all the data they contained was overwritten), so we only 429 * because all the data they contained was overwritten), so we only
431 * need to discard them before they can be moved to the free list. 430 * need to discard them before they can be moved to the free list.
432 */ 431 */
433 DECLARE_FIFO(long, free); 432 DECLARE_FIFO(long, free)[RESERVE_NR];
434 DECLARE_FIFO(long, free_inc); 433 DECLARE_FIFO(long, free_inc);
435 DECLARE_FIFO(long, unused); 434 DECLARE_FIFO(long, unused);
436 435
@@ -515,7 +514,8 @@ struct cache_set {
515 uint64_t cached_dev_sectors; 514 uint64_t cached_dev_sectors;
516 struct closure caching; 515 struct closure caching;
517 516
518 struct closure_with_waitlist sb_write; 517 struct closure sb_write;
518 struct semaphore sb_write_mutex;
519 519
520 mempool_t *search; 520 mempool_t *search;
521 mempool_t *bio_meta; 521 mempool_t *bio_meta;
@@ -630,13 +630,15 @@ struct cache_set {
630 630
631#ifdef CONFIG_BCACHE_DEBUG 631#ifdef CONFIG_BCACHE_DEBUG
632 struct btree *verify_data; 632 struct btree *verify_data;
633 struct bset *verify_ondisk;
633 struct mutex verify_lock; 634 struct mutex verify_lock;
634#endif 635#endif
635 636
636 unsigned nr_uuids; 637 unsigned nr_uuids;
637 struct uuid_entry *uuids; 638 struct uuid_entry *uuids;
638 BKEY_PADDED(uuid_bucket); 639 BKEY_PADDED(uuid_bucket);
639 struct closure_with_waitlist uuid_write; 640 struct closure uuid_write;
641 struct semaphore uuid_write_mutex;
640 642
641 /* 643 /*
642 * A btree node on disk could have too many bsets for an iterator to fit 644 * A btree node on disk could have too many bsets for an iterator to fit
@@ -644,13 +646,7 @@ struct cache_set {
644 */ 646 */
645 mempool_t *fill_iter; 647 mempool_t *fill_iter;
646 648
647 /* 649 struct bset_sort_state sort;
648 * btree_sort() is a merge sort and requires temporary space - single
649 * element mempool
650 */
651 struct mutex sort_lock;
652 struct bset *sort;
653 unsigned sort_crit_factor;
654 650
655 /* List of buckets we're currently writing data to */ 651 /* List of buckets we're currently writing data to */
656 struct list_head data_buckets; 652 struct list_head data_buckets;
@@ -666,7 +662,6 @@ struct cache_set {
666 unsigned congested_read_threshold_us; 662 unsigned congested_read_threshold_us;
667 unsigned congested_write_threshold_us; 663 unsigned congested_write_threshold_us;
668 664
669 struct time_stats sort_time;
670 struct time_stats btree_gc_time; 665 struct time_stats btree_gc_time;
671 struct time_stats btree_split_time; 666 struct time_stats btree_split_time;
672 struct time_stats btree_read_time; 667 struct time_stats btree_read_time;
@@ -684,9 +679,9 @@ struct cache_set {
684 unsigned error_decay; 679 unsigned error_decay;
685 680
686 unsigned short journal_delay_ms; 681 unsigned short journal_delay_ms;
682 bool expensive_debug_checks;
687 unsigned verify:1; 683 unsigned verify:1;
688 unsigned key_merging_disabled:1; 684 unsigned key_merging_disabled:1;
689 unsigned expensive_debug_checks:1;
690 unsigned gc_always_rewrite:1; 685 unsigned gc_always_rewrite:1;
691 unsigned shrinker_disabled:1; 686 unsigned shrinker_disabled:1;
692 unsigned copy_gc_enabled:1; 687 unsigned copy_gc_enabled:1;
@@ -708,13 +703,8 @@ struct bbio {
708 struct bio bio; 703 struct bio bio;
709}; 704};
710 705
711static inline unsigned local_clock_us(void)
712{
713 return local_clock() >> 10;
714}
715
716#define BTREE_PRIO USHRT_MAX 706#define BTREE_PRIO USHRT_MAX
717#define INITIAL_PRIO 32768 707#define INITIAL_PRIO 32768U
718 708
719#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE) 709#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
720#define btree_blocks(b) \ 710#define btree_blocks(b) \
@@ -727,21 +717,6 @@ static inline unsigned local_clock_us(void)
727#define bucket_bytes(c) ((c)->sb.bucket_size << 9) 717#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
728#define block_bytes(c) ((c)->sb.block_size << 9) 718#define block_bytes(c) ((c)->sb.block_size << 9)
729 719
730#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t))
731#define set_bytes(i) __set_bytes(i, i->keys)
732
733#define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c))
734#define set_blocks(i, c) __set_blocks(i, (i)->keys, c)
735
736#define node(i, j) ((struct bkey *) ((i)->d + (j)))
737#define end(i) node(i, (i)->keys)
738
739#define index(i, b) \
740 ((size_t) (((void *) i - (void *) (b)->sets[0].data) / \
741 block_bytes(b->c)))
742
743#define btree_data_space(b) (PAGE_SIZE << (b)->page_order)
744
745#define prios_per_bucket(c) \ 720#define prios_per_bucket(c) \
746 ((bucket_bytes(c) - sizeof(struct prio_set)) / \ 721 ((bucket_bytes(c) - sizeof(struct prio_set)) / \
747 sizeof(struct bucket_disk)) 722 sizeof(struct bucket_disk))
@@ -784,20 +759,34 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
784 return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr); 759 return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
785} 760}
786 761
787/* Btree key macros */ 762static inline uint8_t gen_after(uint8_t a, uint8_t b)
763{
764 uint8_t r = a - b;
765 return r > 128U ? 0 : r;
766}
788 767
789static inline void bkey_init(struct bkey *k) 768static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
769 unsigned i)
790{ 770{
791 *k = ZERO_KEY; 771 return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
792} 772}
793 773
774static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
775 unsigned i)
776{
777 return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
778}
779
780/* Btree key macros */
781
794/* 782/*
795 * This is used for various on disk data structures - cache_sb, prio_set, bset, 783 * This is used for various on disk data structures - cache_sb, prio_set, bset,
796 * jset: The checksum is _always_ the first 8 bytes of these structs 784 * jset: The checksum is _always_ the first 8 bytes of these structs
797 */ 785 */
798#define csum_set(i) \ 786#define csum_set(i) \
799 bch_crc64(((void *) (i)) + sizeof(uint64_t), \ 787 bch_crc64(((void *) (i)) + sizeof(uint64_t), \
800 ((void *) end(i)) - (((void *) (i)) + sizeof(uint64_t))) 788 ((void *) bset_bkey_last(i)) - \
789 (((void *) (i)) + sizeof(uint64_t)))
801 790
802/* Error handling macros */ 791/* Error handling macros */
803 792
@@ -902,7 +891,6 @@ void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
902void bch_bbio_free(struct bio *, struct cache_set *); 891void bch_bbio_free(struct bio *, struct cache_set *);
903struct bio *bch_bbio_alloc(struct cache_set *); 892struct bio *bch_bbio_alloc(struct cache_set *);
904 893
905struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *);
906void bch_generic_make_request(struct bio *, struct bio_split_pool *); 894void bch_generic_make_request(struct bio *, struct bio_split_pool *);
907void __bch_submit_bbio(struct bio *, struct cache_set *); 895void __bch_submit_bbio(struct bio *, struct cache_set *);
908void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); 896void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 7d388b8bb50e..4f6b5940e609 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -5,30 +5,134 @@
5 * Copyright 2012 Google, Inc. 5 * Copyright 2012 Google, Inc.
6 */ 6 */
7 7
8#include "bcache.h" 8#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
9#include "btree.h"
10#include "debug.h"
11 9
10#include "util.h"
11#include "bset.h"
12
13#include <linux/console.h>
12#include <linux/random.h> 14#include <linux/random.h>
13#include <linux/prefetch.h> 15#include <linux/prefetch.h>
14 16
17#ifdef CONFIG_BCACHE_DEBUG
18
19void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
20{
21 struct bkey *k, *next;
22
23 for (k = i->start; k < bset_bkey_last(i); k = next) {
24 next = bkey_next(k);
25
26 printk(KERN_ERR "block %u key %zi/%u: ", set,
27 (uint64_t *) k - i->d, i->keys);
28
29 if (b->ops->key_dump)
30 b->ops->key_dump(b, k);
31 else
32 printk("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
33
34 if (next < bset_bkey_last(i) &&
35 bkey_cmp(k, b->ops->is_extents ?
36 &START_KEY(next) : next) > 0)
37 printk(KERN_ERR "Key skipped backwards\n");
38 }
39}
40
41void bch_dump_bucket(struct btree_keys *b)
42{
43 unsigned i;
44
45 console_lock();
46 for (i = 0; i <= b->nsets; i++)
47 bch_dump_bset(b, b->set[i].data,
48 bset_sector_offset(b, b->set[i].data));
49 console_unlock();
50}
51
52int __bch_count_data(struct btree_keys *b)
53{
54 unsigned ret = 0;
55 struct btree_iter iter;
56 struct bkey *k;
57
58 if (b->ops->is_extents)
59 for_each_key(b, k, &iter)
60 ret += KEY_SIZE(k);
61 return ret;
62}
63
64void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
65{
66 va_list args;
67 struct bkey *k, *p = NULL;
68 struct btree_iter iter;
69 const char *err;
70
71 for_each_key(b, k, &iter) {
72 if (b->ops->is_extents) {
73 err = "Keys out of order";
74 if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
75 goto bug;
76
77 if (bch_ptr_invalid(b, k))
78 continue;
79
80 err = "Overlapping keys";
81 if (p && bkey_cmp(p, &START_KEY(k)) > 0)
82 goto bug;
83 } else {
84 if (bch_ptr_bad(b, k))
85 continue;
86
87 err = "Duplicate keys";
88 if (p && !bkey_cmp(p, k))
89 goto bug;
90 }
91 p = k;
92 }
93#if 0
94 err = "Key larger than btree node key";
95 if (p && bkey_cmp(p, &b->key) > 0)
96 goto bug;
97#endif
98 return;
99bug:
100 bch_dump_bucket(b);
101
102 va_start(args, fmt);
103 vprintk(fmt, args);
104 va_end(args);
105
106 panic("bch_check_keys error: %s:\n", err);
107}
108
109static void bch_btree_iter_next_check(struct btree_iter *iter)
110{
111 struct bkey *k = iter->data->k, *next = bkey_next(k);
112
113 if (next < iter->data->end &&
114 bkey_cmp(k, iter->b->ops->is_extents ?
115 &START_KEY(next) : next) > 0) {
116 bch_dump_bucket(iter->b);
117 panic("Key skipped backwards\n");
118 }
119}
120
121#else
122
123static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
124
125#endif
126
15/* Keylists */ 127/* Keylists */
16 128
17int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c) 129int __bch_keylist_realloc(struct keylist *l, unsigned u64s)
18{ 130{
19 size_t oldsize = bch_keylist_nkeys(l); 131 size_t oldsize = bch_keylist_nkeys(l);
20 size_t newsize = oldsize + 2 + nptrs; 132 size_t newsize = oldsize + u64s;
21 uint64_t *old_keys = l->keys_p == l->inline_keys ? NULL : l->keys_p; 133 uint64_t *old_keys = l->keys_p == l->inline_keys ? NULL : l->keys_p;
22 uint64_t *new_keys; 134 uint64_t *new_keys;
23 135
24 /* The journalling code doesn't handle the case where the keys to insert
25 * is bigger than an empty write: If we just return -ENOMEM here,
26 * bio_insert() and bio_invalidate() will insert the keys created so far
27 * and finish the rest when the keylist is empty.
28 */
29 if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset))
30 return -ENOMEM;
31
32 newsize = roundup_pow_of_two(newsize); 136 newsize = roundup_pow_of_two(newsize);
33 137
34 if (newsize <= KEYLIST_INLINE || 138 if (newsize <= KEYLIST_INLINE ||
@@ -71,136 +175,6 @@ void bch_keylist_pop_front(struct keylist *l)
71 bch_keylist_bytes(l)); 175 bch_keylist_bytes(l));
72} 176}
73 177
74/* Pointer validation */
75
76static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
77{
78 unsigned i;
79
80 for (i = 0; i < KEY_PTRS(k); i++)
81 if (ptr_available(c, k, i)) {
82 struct cache *ca = PTR_CACHE(c, k, i);
83 size_t bucket = PTR_BUCKET_NR(c, k, i);
84 size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
85
86 if (KEY_SIZE(k) + r > c->sb.bucket_size ||
87 bucket < ca->sb.first_bucket ||
88 bucket >= ca->sb.nbuckets)
89 return true;
90 }
91
92 return false;
93}
94
95bool bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
96{
97 char buf[80];
98
99 if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
100 goto bad;
101
102 if (__ptr_invalid(c, k))
103 goto bad;
104
105 return false;
106bad:
107 bch_bkey_to_text(buf, sizeof(buf), k);
108 cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
109 return true;
110}
111
112bool bch_extent_ptr_invalid(struct cache_set *c, const struct bkey *k)
113{
114 char buf[80];
115
116 if (!KEY_SIZE(k))
117 return true;
118
119 if (KEY_SIZE(k) > KEY_OFFSET(k))
120 goto bad;
121
122 if (__ptr_invalid(c, k))
123 goto bad;
124
125 return false;
126bad:
127 bch_bkey_to_text(buf, sizeof(buf), k);
128 cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
129 return true;
130}
131
132static bool ptr_bad_expensive_checks(struct btree *b, const struct bkey *k,
133 unsigned ptr)
134{
135 struct bucket *g = PTR_BUCKET(b->c, k, ptr);
136 char buf[80];
137
138 if (mutex_trylock(&b->c->bucket_lock)) {
139 if (b->level) {
140 if (KEY_DIRTY(k) ||
141 g->prio != BTREE_PRIO ||
142 (b->c->gc_mark_valid &&
143 GC_MARK(g) != GC_MARK_METADATA))
144 goto err;
145
146 } else {
147 if (g->prio == BTREE_PRIO)
148 goto err;
149
150 if (KEY_DIRTY(k) &&
151 b->c->gc_mark_valid &&
152 GC_MARK(g) != GC_MARK_DIRTY)
153 goto err;
154 }
155 mutex_unlock(&b->c->bucket_lock);
156 }
157
158 return false;
159err:
160 mutex_unlock(&b->c->bucket_lock);
161 bch_bkey_to_text(buf, sizeof(buf), k);
162 btree_bug(b,
163"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
164 buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
165 g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
166 return true;
167}
168
169bool bch_ptr_bad(struct btree *b, const struct bkey *k)
170{
171 struct bucket *g;
172 unsigned i, stale;
173
174 if (!bkey_cmp(k, &ZERO_KEY) ||
175 !KEY_PTRS(k) ||
176 bch_ptr_invalid(b, k))
177 return true;
178
179 for (i = 0; i < KEY_PTRS(k); i++) {
180 if (!ptr_available(b->c, k, i))
181 return true;
182
183 g = PTR_BUCKET(b->c, k, i);
184 stale = ptr_stale(b->c, k, i);
185
186 btree_bug_on(stale > 96, b,
187 "key too stale: %i, need_gc %u",
188 stale, b->c->need_gc);
189
190 btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
191 b, "stale dirty pointer");
192
193 if (stale)
194 return true;
195
196 if (expensive_debug_checks(b->c) &&
197 ptr_bad_expensive_checks(b, k, i))
198 return true;
199 }
200
201 return false;
202}
203
204/* Key/pointer manipulation */ 178/* Key/pointer manipulation */
205 179
206void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, 180void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
@@ -255,56 +229,138 @@ bool __bch_cut_back(const struct bkey *where, struct bkey *k)
255 return true; 229 return true;
256} 230}
257 231
258static uint64_t merge_chksums(struct bkey *l, struct bkey *r) 232/* Auxiliary search trees */
233
234/* 32 bits total: */
235#define BKEY_MID_BITS 3
236#define BKEY_EXPONENT_BITS 7
237#define BKEY_MANTISSA_BITS (32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS)
238#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
239
240struct bkey_float {
241 unsigned exponent:BKEY_EXPONENT_BITS;
242 unsigned m:BKEY_MID_BITS;
243 unsigned mantissa:BKEY_MANTISSA_BITS;
244} __packed;
245
246/*
247 * BSET_CACHELINE was originally intended to match the hardware cacheline size -
248 * it used to be 64, but I realized the lookup code would touch slightly less
249 * memory if it was 128.
250 *
251 * It definites the number of bytes (in struct bset) per struct bkey_float in
252 * the auxiliar search tree - when we're done searching the bset_float tree we
253 * have this many bytes left that we do a linear search over.
254 *
255 * Since (after level 5) every level of the bset_tree is on a new cacheline,
256 * we're touching one fewer cacheline in the bset tree in exchange for one more
257 * cacheline in the linear search - but the linear search might stop before it
258 * gets to the second cacheline.
259 */
260
261#define BSET_CACHELINE 128
262
263/* Space required for the btree node keys */
264static inline size_t btree_keys_bytes(struct btree_keys *b)
259{ 265{
260 return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) & 266 return PAGE_SIZE << b->page_order;
261 ~((uint64_t)1 << 63);
262} 267}
263 268
264/* Tries to merge l and r: l should be lower than r 269static inline size_t btree_keys_cachelines(struct btree_keys *b)
265 * Returns true if we were able to merge. If we did merge, l will be the merged
266 * key, r will be untouched.
267 */
268bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r)
269{ 270{
270 unsigned i; 271 return btree_keys_bytes(b) / BSET_CACHELINE;
272}
271 273
272 if (key_merging_disabled(b->c)) 274/* Space required for the auxiliary search trees */
273 return false; 275static inline size_t bset_tree_bytes(struct btree_keys *b)
276{
277 return btree_keys_cachelines(b) * sizeof(struct bkey_float);
278}
274 279
275 if (KEY_PTRS(l) != KEY_PTRS(r) || 280/* Space required for the prev pointers */
276 KEY_DIRTY(l) != KEY_DIRTY(r) || 281static inline size_t bset_prev_bytes(struct btree_keys *b)
277 bkey_cmp(l, &START_KEY(r))) 282{
278 return false; 283 return btree_keys_cachelines(b) * sizeof(uint8_t);
284}
279 285
280 for (i = 0; i < KEY_PTRS(l); i++) 286/* Memory allocation */
281 if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
282 PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
283 return false;
284 287
285 /* Keys with no pointers aren't restricted to one bucket and could 288void bch_btree_keys_free(struct btree_keys *b)
286 * overflow KEY_SIZE 289{
287 */ 290 struct bset_tree *t = b->set;
288 if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
289 SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
290 SET_KEY_SIZE(l, USHRT_MAX);
291 291
292 bch_cut_front(l, r); 292 if (bset_prev_bytes(b) < PAGE_SIZE)
293 return false; 293 kfree(t->prev);
294 } 294 else
295 free_pages((unsigned long) t->prev,
296 get_order(bset_prev_bytes(b)));
295 297
296 if (KEY_CSUM(l)) { 298 if (bset_tree_bytes(b) < PAGE_SIZE)
297 if (KEY_CSUM(r)) 299 kfree(t->tree);
298 l->ptr[KEY_PTRS(l)] = merge_chksums(l, r); 300 else
299 else 301 free_pages((unsigned long) t->tree,
300 SET_KEY_CSUM(l, 0); 302 get_order(bset_tree_bytes(b)));
301 }
302 303
303 SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r)); 304 free_pages((unsigned long) t->data, b->page_order);
304 SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
305 305
306 return true; 306 t->prev = NULL;
307 t->tree = NULL;
308 t->data = NULL;
309}
310EXPORT_SYMBOL(bch_btree_keys_free);
311
312int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp)
313{
314 struct bset_tree *t = b->set;
315
316 BUG_ON(t->data);
317
318 b->page_order = page_order;
319
320 t->data = (void *) __get_free_pages(gfp, b->page_order);
321 if (!t->data)
322 goto err;
323
324 t->tree = bset_tree_bytes(b) < PAGE_SIZE
325 ? kmalloc(bset_tree_bytes(b), gfp)
326 : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
327 if (!t->tree)
328 goto err;
329
330 t->prev = bset_prev_bytes(b) < PAGE_SIZE
331 ? kmalloc(bset_prev_bytes(b), gfp)
332 : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
333 if (!t->prev)
334 goto err;
335
336 return 0;
337err:
338 bch_btree_keys_free(b);
339 return -ENOMEM;
307} 340}
341EXPORT_SYMBOL(bch_btree_keys_alloc);
342
343void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
344 bool *expensive_debug_checks)
345{
346 unsigned i;
347
348 b->ops = ops;
349 b->expensive_debug_checks = expensive_debug_checks;
350 b->nsets = 0;
351 b->last_set_unwritten = 0;
352
353 /* XXX: shouldn't be needed */
354 for (i = 0; i < MAX_BSETS; i++)
355 b->set[i].size = 0;
356 /*
357 * Second loop starts at 1 because b->keys[0]->data is the memory we
358 * allocated
359 */
360 for (i = 1; i < MAX_BSETS; i++)
361 b->set[i].data = NULL;
362}
363EXPORT_SYMBOL(bch_btree_keys_init);
308 364
309/* Binary tree stuff for auxiliary search trees */ 365/* Binary tree stuff for auxiliary search trees */
310 366
@@ -455,9 +511,11 @@ static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
455 return ((void *) k - (void *) t->data) / BSET_CACHELINE; 511 return ((void *) k - (void *) t->data) / BSET_CACHELINE;
456} 512}
457 513
458static unsigned bkey_to_cacheline_offset(struct bkey *k) 514static unsigned bkey_to_cacheline_offset(struct bset_tree *t,
515 unsigned cacheline,
516 struct bkey *k)
459{ 517{
460 return ((size_t) k & (BSET_CACHELINE - 1)) / sizeof(uint64_t); 518 return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
461} 519}
462 520
463static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j) 521static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j)
@@ -504,7 +562,7 @@ static void make_bfloat(struct bset_tree *t, unsigned j)
504 : tree_to_prev_bkey(t, j >> ffs(j)); 562 : tree_to_prev_bkey(t, j >> ffs(j));
505 563
506 struct bkey *r = is_power_of_2(j + 1) 564 struct bkey *r = is_power_of_2(j + 1)
507 ? node(t->data, t->data->keys - bkey_u64s(&t->end)) 565 ? bset_bkey_idx(t->data, t->data->keys - bkey_u64s(&t->end))
508 : tree_to_bkey(t, j >> (ffz(j) + 1)); 566 : tree_to_bkey(t, j >> (ffz(j) + 1));
509 567
510 BUG_ON(m < l || m > r); 568 BUG_ON(m < l || m > r);
@@ -528,9 +586,9 @@ static void make_bfloat(struct bset_tree *t, unsigned j)
528 f->exponent = 127; 586 f->exponent = 127;
529} 587}
530 588
531static void bset_alloc_tree(struct btree *b, struct bset_tree *t) 589static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
532{ 590{
533 if (t != b->sets) { 591 if (t != b->set) {
534 unsigned j = roundup(t[-1].size, 592 unsigned j = roundup(t[-1].size,
535 64 / sizeof(struct bkey_float)); 593 64 / sizeof(struct bkey_float));
536 594
@@ -538,33 +596,54 @@ static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
538 t->prev = t[-1].prev + j; 596 t->prev = t[-1].prev + j;
539 } 597 }
540 598
541 while (t < b->sets + MAX_BSETS) 599 while (t < b->set + MAX_BSETS)
542 t++->size = 0; 600 t++->size = 0;
543} 601}
544 602
545static void bset_build_unwritten_tree(struct btree *b) 603static void bch_bset_build_unwritten_tree(struct btree_keys *b)
546{ 604{
547 struct bset_tree *t = b->sets + b->nsets; 605 struct bset_tree *t = bset_tree_last(b);
606
607 BUG_ON(b->last_set_unwritten);
608 b->last_set_unwritten = 1;
548 609
549 bset_alloc_tree(b, t); 610 bset_alloc_tree(b, t);
550 611
551 if (t->tree != b->sets->tree + bset_tree_space(b)) { 612 if (t->tree != b->set->tree + btree_keys_cachelines(b)) {
552 t->prev[0] = bkey_to_cacheline_offset(t->data->start); 613 t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start);
553 t->size = 1; 614 t->size = 1;
554 } 615 }
555} 616}
556 617
557static void bset_build_written_tree(struct btree *b) 618void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
619{
620 if (i != b->set->data) {
621 b->set[++b->nsets].data = i;
622 i->seq = b->set->data->seq;
623 } else
624 get_random_bytes(&i->seq, sizeof(uint64_t));
625
626 i->magic = magic;
627 i->version = 0;
628 i->keys = 0;
629
630 bch_bset_build_unwritten_tree(b);
631}
632EXPORT_SYMBOL(bch_bset_init_next);
633
634void bch_bset_build_written_tree(struct btree_keys *b)
558{ 635{
559 struct bset_tree *t = b->sets + b->nsets; 636 struct bset_tree *t = bset_tree_last(b);
560 struct bkey *k = t->data->start; 637 struct bkey *prev = NULL, *k = t->data->start;
561 unsigned j, cacheline = 1; 638 unsigned j, cacheline = 1;
562 639
640 b->last_set_unwritten = 0;
641
563 bset_alloc_tree(b, t); 642 bset_alloc_tree(b, t);
564 643
565 t->size = min_t(unsigned, 644 t->size = min_t(unsigned,
566 bkey_to_cacheline(t, end(t->data)), 645 bkey_to_cacheline(t, bset_bkey_last(t->data)),
567 b->sets->tree + bset_tree_space(b) - t->tree); 646 b->set->tree + btree_keys_cachelines(b) - t->tree);
568 647
569 if (t->size < 2) { 648 if (t->size < 2) {
570 t->size = 0; 649 t->size = 0;
@@ -577,16 +656,14 @@ static void bset_build_written_tree(struct btree *b)
577 for (j = inorder_next(0, t->size); 656 for (j = inorder_next(0, t->size);
578 j; 657 j;
579 j = inorder_next(j, t->size)) { 658 j = inorder_next(j, t->size)) {
580 while (bkey_to_cacheline(t, k) != cacheline) 659 while (bkey_to_cacheline(t, k) < cacheline)
581 k = bkey_next(k); 660 prev = k, k = bkey_next(k);
582 661
583 t->prev[j] = bkey_u64s(k); 662 t->prev[j] = bkey_u64s(prev);
584 k = bkey_next(k); 663 t->tree[j].m = bkey_to_cacheline_offset(t, cacheline++, k);
585 cacheline++;
586 t->tree[j].m = bkey_to_cacheline_offset(k);
587 } 664 }
588 665
589 while (bkey_next(k) != end(t->data)) 666 while (bkey_next(k) != bset_bkey_last(t->data))
590 k = bkey_next(k); 667 k = bkey_next(k);
591 668
592 t->end = *k; 669 t->end = *k;
@@ -597,14 +674,17 @@ static void bset_build_written_tree(struct btree *b)
597 j = inorder_next(j, t->size)) 674 j = inorder_next(j, t->size))
598 make_bfloat(t, j); 675 make_bfloat(t, j);
599} 676}
677EXPORT_SYMBOL(bch_bset_build_written_tree);
600 678
601void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k) 679/* Insert */
680
681void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k)
602{ 682{
603 struct bset_tree *t; 683 struct bset_tree *t;
604 unsigned inorder, j = 1; 684 unsigned inorder, j = 1;
605 685
606 for (t = b->sets; t <= &b->sets[b->nsets]; t++) 686 for (t = b->set; t <= bset_tree_last(b); t++)
607 if (k < end(t->data)) 687 if (k < bset_bkey_last(t->data))
608 goto found_set; 688 goto found_set;
609 689
610 BUG(); 690 BUG();
@@ -617,7 +697,7 @@ found_set:
617 if (k == t->data->start) 697 if (k == t->data->start)
618 goto fix_left; 698 goto fix_left;
619 699
620 if (bkey_next(k) == end(t->data)) { 700 if (bkey_next(k) == bset_bkey_last(t->data)) {
621 t->end = *k; 701 t->end = *k;
622 goto fix_right; 702 goto fix_right;
623 } 703 }
@@ -642,10 +722,12 @@ fix_right: do {
642 j = j * 2 + 1; 722 j = j * 2 + 1;
643 } while (j < t->size); 723 } while (j < t->size);
644} 724}
725EXPORT_SYMBOL(bch_bset_fix_invalidated_key);
645 726
646void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) 727static void bch_bset_fix_lookup_table(struct btree_keys *b,
728 struct bset_tree *t,
729 struct bkey *k)
647{ 730{
648 struct bset_tree *t = &b->sets[b->nsets];
649 unsigned shift = bkey_u64s(k); 731 unsigned shift = bkey_u64s(k);
650 unsigned j = bkey_to_cacheline(t, k); 732 unsigned j = bkey_to_cacheline(t, k);
651 733
@@ -657,8 +739,8 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
657 * lookup table for the first key that is strictly greater than k: 739 * lookup table for the first key that is strictly greater than k:
658 * it's either k's cacheline or the next one 740 * it's either k's cacheline or the next one
659 */ 741 */
660 if (j < t->size && 742 while (j < t->size &&
661 table_to_bkey(t, j) <= k) 743 table_to_bkey(t, j) <= k)
662 j++; 744 j++;
663 745
664 /* Adjust all the lookup table entries, and find a new key for any that 746 /* Adjust all the lookup table entries, and find a new key for any that
@@ -673,54 +755,124 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
673 while (k < cacheline_to_bkey(t, j, 0)) 755 while (k < cacheline_to_bkey(t, j, 0))
674 k = bkey_next(k); 756 k = bkey_next(k);
675 757
676 t->prev[j] = bkey_to_cacheline_offset(k); 758 t->prev[j] = bkey_to_cacheline_offset(t, j, k);
677 } 759 }
678 } 760 }
679 761
680 if (t->size == b->sets->tree + bset_tree_space(b) - t->tree) 762 if (t->size == b->set->tree + btree_keys_cachelines(b) - t->tree)
681 return; 763 return;
682 764
683 /* Possibly add a new entry to the end of the lookup table */ 765 /* Possibly add a new entry to the end of the lookup table */
684 766
685 for (k = table_to_bkey(t, t->size - 1); 767 for (k = table_to_bkey(t, t->size - 1);
686 k != end(t->data); 768 k != bset_bkey_last(t->data);
687 k = bkey_next(k)) 769 k = bkey_next(k))
688 if (t->size == bkey_to_cacheline(t, k)) { 770 if (t->size == bkey_to_cacheline(t, k)) {
689 t->prev[t->size] = bkey_to_cacheline_offset(k); 771 t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k);
690 t->size++; 772 t->size++;
691 } 773 }
692} 774}
693 775
694void bch_bset_init_next(struct btree *b) 776/*
777 * Tries to merge l and r: l should be lower than r
778 * Returns true if we were able to merge. If we did merge, l will be the merged
779 * key, r will be untouched.
780 */
781bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r)
695{ 782{
696 struct bset *i = write_block(b); 783 if (!b->ops->key_merge)
784 return false;
697 785
698 if (i != b->sets[0].data) { 786 /*
699 b->sets[++b->nsets].data = i; 787 * Generic header checks
700 i->seq = b->sets[0].data->seq; 788 * Assumes left and right are in order
701 } else 789 * Left and right must be exactly aligned
702 get_random_bytes(&i->seq, sizeof(uint64_t)); 790 */
791 if (!bch_bkey_equal_header(l, r) ||
792 bkey_cmp(l, &START_KEY(r)))
793 return false;
703 794
704 i->magic = bset_magic(&b->c->sb); 795 return b->ops->key_merge(b, l, r);
705 i->version = 0; 796}
706 i->keys = 0; 797EXPORT_SYMBOL(bch_bkey_try_merge);
707 798
708 bset_build_unwritten_tree(b); 799void bch_bset_insert(struct btree_keys *b, struct bkey *where,
800 struct bkey *insert)
801{
802 struct bset_tree *t = bset_tree_last(b);
803
804 BUG_ON(!b->last_set_unwritten);
805 BUG_ON(bset_byte_offset(b, t->data) +
806 __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) >
807 PAGE_SIZE << b->page_order);
808
809 memmove((uint64_t *) where + bkey_u64s(insert),
810 where,
811 (void *) bset_bkey_last(t->data) - (void *) where);
812
813 t->data->keys += bkey_u64s(insert);
814 bkey_copy(where, insert);
815 bch_bset_fix_lookup_table(b, t, where);
709} 816}
817EXPORT_SYMBOL(bch_bset_insert);
818
819unsigned bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
820 struct bkey *replace_key)
821{
822 unsigned status = BTREE_INSERT_STATUS_NO_INSERT;
823 struct bset *i = bset_tree_last(b)->data;
824 struct bkey *m, *prev = NULL;
825 struct btree_iter iter;
826
827 BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
828
829 m = bch_btree_iter_init(b, &iter, b->ops->is_extents
830 ? PRECEDING_KEY(&START_KEY(k))
831 : PRECEDING_KEY(k));
832
833 if (b->ops->insert_fixup(b, k, &iter, replace_key))
834 return status;
835
836 status = BTREE_INSERT_STATUS_INSERT;
837
838 while (m != bset_bkey_last(i) &&
839 bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0)
840 prev = m, m = bkey_next(m);
841
842 /* prev is in the tree, if we merge we're done */
843 status = BTREE_INSERT_STATUS_BACK_MERGE;
844 if (prev &&
845 bch_bkey_try_merge(b, prev, k))
846 goto merged;
847#if 0
848 status = BTREE_INSERT_STATUS_OVERWROTE;
849 if (m != bset_bkey_last(i) &&
850 KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
851 goto copy;
852#endif
853 status = BTREE_INSERT_STATUS_FRONT_MERGE;
854 if (m != bset_bkey_last(i) &&
855 bch_bkey_try_merge(b, k, m))
856 goto copy;
857
858 bch_bset_insert(b, m, k);
859copy: bkey_copy(m, k);
860merged:
861 return status;
862}
863EXPORT_SYMBOL(bch_btree_insert_key);
864
865/* Lookup */
710 866
711struct bset_search_iter { 867struct bset_search_iter {
712 struct bkey *l, *r; 868 struct bkey *l, *r;
713}; 869};
714 870
715static struct bset_search_iter bset_search_write_set(struct btree *b, 871static struct bset_search_iter bset_search_write_set(struct bset_tree *t,
716 struct bset_tree *t,
717 const struct bkey *search) 872 const struct bkey *search)
718{ 873{
719 unsigned li = 0, ri = t->size; 874 unsigned li = 0, ri = t->size;
720 875
721 BUG_ON(!b->nsets &&
722 t->size < bkey_to_cacheline(t, end(t->data)));
723
724 while (li + 1 != ri) { 876 while (li + 1 != ri) {
725 unsigned m = (li + ri) >> 1; 877 unsigned m = (li + ri) >> 1;
726 878
@@ -732,12 +884,11 @@ static struct bset_search_iter bset_search_write_set(struct btree *b,
732 884
733 return (struct bset_search_iter) { 885 return (struct bset_search_iter) {
734 table_to_bkey(t, li), 886 table_to_bkey(t, li),
735 ri < t->size ? table_to_bkey(t, ri) : end(t->data) 887 ri < t->size ? table_to_bkey(t, ri) : bset_bkey_last(t->data)
736 }; 888 };
737} 889}
738 890
739static struct bset_search_iter bset_search_tree(struct btree *b, 891static struct bset_search_iter bset_search_tree(struct bset_tree *t,
740 struct bset_tree *t,
741 const struct bkey *search) 892 const struct bkey *search)
742{ 893{
743 struct bkey *l, *r; 894 struct bkey *l, *r;
@@ -784,7 +935,7 @@ static struct bset_search_iter bset_search_tree(struct btree *b,
784 f = &t->tree[inorder_next(j, t->size)]; 935 f = &t->tree[inorder_next(j, t->size)];
785 r = cacheline_to_bkey(t, inorder, f->m); 936 r = cacheline_to_bkey(t, inorder, f->m);
786 } else 937 } else
787 r = end(t->data); 938 r = bset_bkey_last(t->data);
788 } else { 939 } else {
789 r = cacheline_to_bkey(t, inorder, f->m); 940 r = cacheline_to_bkey(t, inorder, f->m);
790 941
@@ -798,7 +949,7 @@ static struct bset_search_iter bset_search_tree(struct btree *b,
798 return (struct bset_search_iter) {l, r}; 949 return (struct bset_search_iter) {l, r};
799} 950}
800 951
801struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, 952struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
802 const struct bkey *search) 953 const struct bkey *search)
803{ 954{
804 struct bset_search_iter i; 955 struct bset_search_iter i;
@@ -820,7 +971,7 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t,
820 971
821 if (unlikely(!t->size)) { 972 if (unlikely(!t->size)) {
822 i.l = t->data->start; 973 i.l = t->data->start;
823 i.r = end(t->data); 974 i.r = bset_bkey_last(t->data);
824 } else if (bset_written(b, t)) { 975 } else if (bset_written(b, t)) {
825 /* 976 /*
826 * Each node in the auxiliary search tree covers a certain range 977 * Each node in the auxiliary search tree covers a certain range
@@ -830,23 +981,27 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t,
830 */ 981 */
831 982
832 if (unlikely(bkey_cmp(search, &t->end) >= 0)) 983 if (unlikely(bkey_cmp(search, &t->end) >= 0))
833 return end(t->data); 984 return bset_bkey_last(t->data);
834 985
835 if (unlikely(bkey_cmp(search, t->data->start) < 0)) 986 if (unlikely(bkey_cmp(search, t->data->start) < 0))
836 return t->data->start; 987 return t->data->start;
837 988
838 i = bset_search_tree(b, t, search); 989 i = bset_search_tree(t, search);
839 } else 990 } else {
840 i = bset_search_write_set(b, t, search); 991 BUG_ON(!b->nsets &&
992 t->size < bkey_to_cacheline(t, bset_bkey_last(t->data)));
841 993
842 if (expensive_debug_checks(b->c)) { 994 i = bset_search_write_set(t, search);
995 }
996
997 if (btree_keys_expensive_checks(b)) {
843 BUG_ON(bset_written(b, t) && 998 BUG_ON(bset_written(b, t) &&
844 i.l != t->data->start && 999 i.l != t->data->start &&
845 bkey_cmp(tree_to_prev_bkey(t, 1000 bkey_cmp(tree_to_prev_bkey(t,
846 inorder_to_tree(bkey_to_cacheline(t, i.l), t)), 1001 inorder_to_tree(bkey_to_cacheline(t, i.l), t)),
847 search) > 0); 1002 search) > 0);
848 1003
849 BUG_ON(i.r != end(t->data) && 1004 BUG_ON(i.r != bset_bkey_last(t->data) &&
850 bkey_cmp(i.r, search) <= 0); 1005 bkey_cmp(i.r, search) <= 0);
851 } 1006 }
852 1007
@@ -856,22 +1011,17 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t,
856 1011
857 return i.l; 1012 return i.l;
858} 1013}
1014EXPORT_SYMBOL(__bch_bset_search);
859 1015
860/* Btree iterator */ 1016/* Btree iterator */
861 1017
862/* 1018typedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
863 * Returns true if l > r - unless l == r, in which case returns true if l is 1019 struct btree_iter_set);
864 * older than r. 1020
865 *
866 * Necessary for btree_sort_fixup() - if there are multiple keys that compare
867 * equal in different sets, we have to process them newest to oldest.
868 */
869static inline bool btree_iter_cmp(struct btree_iter_set l, 1021static inline bool btree_iter_cmp(struct btree_iter_set l,
870 struct btree_iter_set r) 1022 struct btree_iter_set r)
871{ 1023{
872 int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); 1024 return bkey_cmp(l.k, r.k) > 0;
873
874 return c ? c > 0 : l.k < r.k;
875} 1025}
876 1026
877static inline bool btree_iter_end(struct btree_iter *iter) 1027static inline bool btree_iter_end(struct btree_iter *iter)
@@ -888,8 +1038,10 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
888 btree_iter_cmp)); 1038 btree_iter_cmp));
889} 1039}
890 1040
891struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter, 1041static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
892 struct bkey *search, struct bset_tree *start) 1042 struct btree_iter *iter,
1043 struct bkey *search,
1044 struct bset_tree *start)
893{ 1045{
894 struct bkey *ret = NULL; 1046 struct bkey *ret = NULL;
895 iter->size = ARRAY_SIZE(iter->data); 1047 iter->size = ARRAY_SIZE(iter->data);
@@ -899,15 +1051,24 @@ struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter,
899 iter->b = b; 1051 iter->b = b;
900#endif 1052#endif
901 1053
902 for (; start <= &b->sets[b->nsets]; start++) { 1054 for (; start <= bset_tree_last(b); start++) {
903 ret = bch_bset_search(b, start, search); 1055 ret = bch_bset_search(b, start, search);
904 bch_btree_iter_push(iter, ret, end(start->data)); 1056 bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
905 } 1057 }
906 1058
907 return ret; 1059 return ret;
908} 1060}
909 1061
910struct bkey *bch_btree_iter_next(struct btree_iter *iter) 1062struct bkey *bch_btree_iter_init(struct btree_keys *b,
1063 struct btree_iter *iter,
1064 struct bkey *search)
1065{
1066 return __bch_btree_iter_init(b, iter, search, b->set);
1067}
1068EXPORT_SYMBOL(bch_btree_iter_init);
1069
1070static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
1071 btree_iter_cmp_fn *cmp)
911{ 1072{
912 struct btree_iter_set unused; 1073 struct btree_iter_set unused;
913 struct bkey *ret = NULL; 1074 struct bkey *ret = NULL;
@@ -924,16 +1085,23 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter)
924 } 1085 }
925 1086
926 if (iter->data->k == iter->data->end) 1087 if (iter->data->k == iter->data->end)
927 heap_pop(iter, unused, btree_iter_cmp); 1088 heap_pop(iter, unused, cmp);
928 else 1089 else
929 heap_sift(iter, 0, btree_iter_cmp); 1090 heap_sift(iter, 0, cmp);
930 } 1091 }
931 1092
932 return ret; 1093 return ret;
933} 1094}
934 1095
1096struct bkey *bch_btree_iter_next(struct btree_iter *iter)
1097{
1098 return __bch_btree_iter_next(iter, btree_iter_cmp);
1099
1100}
1101EXPORT_SYMBOL(bch_btree_iter_next);
1102
935struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, 1103struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
936 struct btree *b, ptr_filter_fn fn) 1104 struct btree_keys *b, ptr_filter_fn fn)
937{ 1105{
938 struct bkey *ret; 1106 struct bkey *ret;
939 1107
@@ -946,70 +1114,58 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
946 1114
947/* Mergesort */ 1115/* Mergesort */
948 1116
949static void sort_key_next(struct btree_iter *iter, 1117void bch_bset_sort_state_free(struct bset_sort_state *state)
950 struct btree_iter_set *i)
951{ 1118{
952 i->k = bkey_next(i->k); 1119 if (state->pool)
953 1120 mempool_destroy(state->pool);
954 if (i->k == i->end)
955 *i = iter->data[--iter->used];
956} 1121}
957 1122
958static void btree_sort_fixup(struct btree_iter *iter) 1123int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
959{ 1124{
960 while (iter->used > 1) { 1125 spin_lock_init(&state->time.lock);
961 struct btree_iter_set *top = iter->data, *i = top + 1;
962 1126
963 if (iter->used > 2 && 1127 state->page_order = page_order;
964 btree_iter_cmp(i[0], i[1])) 1128 state->crit_factor = int_sqrt(1 << page_order);
965 i++;
966 1129
967 if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) 1130 state->pool = mempool_create_page_pool(1, page_order);
968 break; 1131 if (!state->pool)
969 1132 return -ENOMEM;
970 if (!KEY_SIZE(i->k)) {
971 sort_key_next(iter, i);
972 heap_sift(iter, i - top, btree_iter_cmp);
973 continue;
974 }
975
976 if (top->k > i->k) {
977 if (bkey_cmp(top->k, i->k) >= 0)
978 sort_key_next(iter, i);
979 else
980 bch_cut_front(top->k, i->k);
981 1133
982 heap_sift(iter, i - top, btree_iter_cmp); 1134 return 0;
983 } else {
984 /* can't happen because of comparison func */
985 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
986 bch_cut_back(&START_KEY(i->k), top->k);
987 }
988 }
989} 1135}
1136EXPORT_SYMBOL(bch_bset_sort_state_init);
990 1137
991static void btree_mergesort(struct btree *b, struct bset *out, 1138static void btree_mergesort(struct btree_keys *b, struct bset *out,
992 struct btree_iter *iter, 1139 struct btree_iter *iter,
993 bool fixup, bool remove_stale) 1140 bool fixup, bool remove_stale)
994{ 1141{
1142 int i;
995 struct bkey *k, *last = NULL; 1143 struct bkey *k, *last = NULL;
996 bool (*bad)(struct btree *, const struct bkey *) = remove_stale 1144 BKEY_PADDED(k) tmp;
1145 bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
997 ? bch_ptr_bad 1146 ? bch_ptr_bad
998 : bch_ptr_invalid; 1147 : bch_ptr_invalid;
999 1148
1149 /* Heapify the iterator, using our comparison function */
1150 for (i = iter->used / 2 - 1; i >= 0; --i)
1151 heap_sift(iter, i, b->ops->sort_cmp);
1152
1000 while (!btree_iter_end(iter)) { 1153 while (!btree_iter_end(iter)) {
1001 if (fixup && !b->level) 1154 if (b->ops->sort_fixup && fixup)
1002 btree_sort_fixup(iter); 1155 k = b->ops->sort_fixup(iter, &tmp.k);
1156 else
1157 k = NULL;
1158
1159 if (!k)
1160 k = __bch_btree_iter_next(iter, b->ops->sort_cmp);
1003 1161
1004 k = bch_btree_iter_next(iter);
1005 if (bad(b, k)) 1162 if (bad(b, k))
1006 continue; 1163 continue;
1007 1164
1008 if (!last) { 1165 if (!last) {
1009 last = out->start; 1166 last = out->start;
1010 bkey_copy(last, k); 1167 bkey_copy(last, k);
1011 } else if (b->level || 1168 } else if (!bch_bkey_try_merge(b, last, k)) {
1012 !bch_bkey_try_merge(b, last, k)) {
1013 last = bkey_next(last); 1169 last = bkey_next(last);
1014 bkey_copy(last, k); 1170 bkey_copy(last, k);
1015 } 1171 }
@@ -1020,27 +1176,27 @@ static void btree_mergesort(struct btree *b, struct bset *out,
1020 pr_debug("sorted %i keys", out->keys); 1176 pr_debug("sorted %i keys", out->keys);
1021} 1177}
1022 1178
1023static void __btree_sort(struct btree *b, struct btree_iter *iter, 1179static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
1024 unsigned start, unsigned order, bool fixup) 1180 unsigned start, unsigned order, bool fixup,
1181 struct bset_sort_state *state)
1025{ 1182{
1026 uint64_t start_time; 1183 uint64_t start_time;
1027 bool remove_stale = !b->written; 1184 bool used_mempool = false;
1028 struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, 1185 struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
1029 order); 1186 order);
1030 if (!out) { 1187 if (!out) {
1031 mutex_lock(&b->c->sort_lock); 1188 BUG_ON(order > state->page_order);
1032 out = b->c->sort; 1189
1033 order = ilog2(bucket_pages(b->c)); 1190 out = page_address(mempool_alloc(state->pool, GFP_NOIO));
1191 used_mempool = true;
1192 order = state->page_order;
1034 } 1193 }
1035 1194
1036 start_time = local_clock(); 1195 start_time = local_clock();
1037 1196
1038 btree_mergesort(b, out, iter, fixup, remove_stale); 1197 btree_mergesort(b, out, iter, fixup, false);
1039 b->nsets = start; 1198 b->nsets = start;
1040 1199
1041 if (!fixup && !start && b->written)
1042 bch_btree_verify(b, out);
1043
1044 if (!start && order == b->page_order) { 1200 if (!start && order == b->page_order) {
1045 /* 1201 /*
1046 * Our temporary buffer is the same size as the btree node's 1202 * Our temporary buffer is the same size as the btree node's
@@ -1048,84 +1204,76 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter,
1048 * memcpy() 1204 * memcpy()
1049 */ 1205 */
1050 1206
1051 out->magic = bset_magic(&b->c->sb); 1207 out->magic = b->set->data->magic;
1052 out->seq = b->sets[0].data->seq; 1208 out->seq = b->set->data->seq;
1053 out->version = b->sets[0].data->version; 1209 out->version = b->set->data->version;
1054 swap(out, b->sets[0].data); 1210 swap(out, b->set->data);
1055
1056 if (b->c->sort == b->sets[0].data)
1057 b->c->sort = out;
1058 } else { 1211 } else {
1059 b->sets[start].data->keys = out->keys; 1212 b->set[start].data->keys = out->keys;
1060 memcpy(b->sets[start].data->start, out->start, 1213 memcpy(b->set[start].data->start, out->start,
1061 (void *) end(out) - (void *) out->start); 1214 (void *) bset_bkey_last(out) - (void *) out->start);
1062 } 1215 }
1063 1216
1064 if (out == b->c->sort) 1217 if (used_mempool)
1065 mutex_unlock(&b->c->sort_lock); 1218 mempool_free(virt_to_page(out), state->pool);
1066 else 1219 else
1067 free_pages((unsigned long) out, order); 1220 free_pages((unsigned long) out, order);
1068 1221
1069 if (b->written) 1222 bch_bset_build_written_tree(b);
1070 bset_build_written_tree(b);
1071 1223
1072 if (!start) 1224 if (!start)
1073 bch_time_stats_update(&b->c->sort_time, start_time); 1225 bch_time_stats_update(&state->time, start_time);
1074} 1226}
1075 1227
1076void bch_btree_sort_partial(struct btree *b, unsigned start) 1228void bch_btree_sort_partial(struct btree_keys *b, unsigned start,
1229 struct bset_sort_state *state)
1077{ 1230{
1078 size_t order = b->page_order, keys = 0; 1231 size_t order = b->page_order, keys = 0;
1079 struct btree_iter iter; 1232 struct btree_iter iter;
1080 int oldsize = bch_count_data(b); 1233 int oldsize = bch_count_data(b);
1081 1234
1082 __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); 1235 __bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
1083
1084 BUG_ON(b->sets[b->nsets].data == write_block(b) &&
1085 (b->sets[b->nsets].size || b->nsets));
1086
1087 1236
1088 if (start) { 1237 if (start) {
1089 unsigned i; 1238 unsigned i;
1090 1239
1091 for (i = start; i <= b->nsets; i++) 1240 for (i = start; i <= b->nsets; i++)
1092 keys += b->sets[i].data->keys; 1241 keys += b->set[i].data->keys;
1093 1242
1094 order = roundup_pow_of_two(__set_bytes(b->sets->data, 1243 order = get_order(__set_bytes(b->set->data, keys));
1095 keys)) / PAGE_SIZE;
1096 if (order)
1097 order = ilog2(order);
1098 } 1244 }
1099 1245
1100 __btree_sort(b, &iter, start, order, false); 1246 __btree_sort(b, &iter, start, order, false, state);
1101 1247
1102 EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); 1248 EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
1103} 1249}
1250EXPORT_SYMBOL(bch_btree_sort_partial);
1104 1251
1105void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) 1252void bch_btree_sort_and_fix_extents(struct btree_keys *b,
1253 struct btree_iter *iter,
1254 struct bset_sort_state *state)
1106{ 1255{
1107 BUG_ON(!b->written); 1256 __btree_sort(b, iter, 0, b->page_order, true, state);
1108 __btree_sort(b, iter, 0, b->page_order, true);
1109} 1257}
1110 1258
1111void bch_btree_sort_into(struct btree *b, struct btree *new) 1259void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
1260 struct bset_sort_state *state)
1112{ 1261{
1113 uint64_t start_time = local_clock(); 1262 uint64_t start_time = local_clock();
1114 1263
1115 struct btree_iter iter; 1264 struct btree_iter iter;
1116 bch_btree_iter_init(b, &iter, NULL); 1265 bch_btree_iter_init(b, &iter, NULL);
1117 1266
1118 btree_mergesort(b, new->sets->data, &iter, false, true); 1267 btree_mergesort(b, new->set->data, &iter, false, true);
1119 1268
1120 bch_time_stats_update(&b->c->sort_time, start_time); 1269 bch_time_stats_update(&state->time, start_time);
1121 1270
1122 bkey_copy_key(&new->key, &b->key); 1271 new->set->size = 0; // XXX: why?
1123 new->sets->size = 0;
1124} 1272}
1125 1273
1126#define SORT_CRIT (4096 / sizeof(uint64_t)) 1274#define SORT_CRIT (4096 / sizeof(uint64_t))
1127 1275
1128void bch_btree_sort_lazy(struct btree *b) 1276void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
1129{ 1277{
1130 unsigned crit = SORT_CRIT; 1278 unsigned crit = SORT_CRIT;
1131 int i; 1279 int i;
@@ -1134,50 +1282,32 @@ void bch_btree_sort_lazy(struct btree *b)
1134 if (!b->nsets) 1282 if (!b->nsets)
1135 goto out; 1283 goto out;
1136 1284
1137 /* If not a leaf node, always sort */
1138 if (b->level) {
1139 bch_btree_sort(b);
1140 return;
1141 }
1142
1143 for (i = b->nsets - 1; i >= 0; --i) { 1285 for (i = b->nsets - 1; i >= 0; --i) {
1144 crit *= b->c->sort_crit_factor; 1286 crit *= state->crit_factor;
1145 1287
1146 if (b->sets[i].data->keys < crit) { 1288 if (b->set[i].data->keys < crit) {
1147 bch_btree_sort_partial(b, i); 1289 bch_btree_sort_partial(b, i, state);
1148 return; 1290 return;
1149 } 1291 }
1150 } 1292 }
1151 1293
1152 /* Sort if we'd overflow */ 1294 /* Sort if we'd overflow */
1153 if (b->nsets + 1 == MAX_BSETS) { 1295 if (b->nsets + 1 == MAX_BSETS) {
1154 bch_btree_sort(b); 1296 bch_btree_sort(b, state);
1155 return; 1297 return;
1156 } 1298 }
1157 1299
1158out: 1300out:
1159 bset_build_written_tree(b); 1301 bch_bset_build_written_tree(b);
1160} 1302}
1303EXPORT_SYMBOL(bch_btree_sort_lazy);
1161 1304
1162/* Sysfs stuff */ 1305void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
1163
1164struct bset_stats {
1165 struct btree_op op;
1166 size_t nodes;
1167 size_t sets_written, sets_unwritten;
1168 size_t bytes_written, bytes_unwritten;
1169 size_t floats, failed;
1170};
1171
1172static int btree_bset_stats(struct btree_op *op, struct btree *b)
1173{ 1306{
1174 struct bset_stats *stats = container_of(op, struct bset_stats, op);
1175 unsigned i; 1307 unsigned i;
1176 1308
1177 stats->nodes++;
1178
1179 for (i = 0; i <= b->nsets; i++) { 1309 for (i = 0; i <= b->nsets; i++) {
1180 struct bset_tree *t = &b->sets[i]; 1310 struct bset_tree *t = &b->set[i];
1181 size_t bytes = t->data->keys * sizeof(uint64_t); 1311 size_t bytes = t->data->keys * sizeof(uint64_t);
1182 size_t j; 1312 size_t j;
1183 1313
@@ -1195,32 +1325,4 @@ static int btree_bset_stats(struct btree_op *op, struct btree *b)
1195 stats->bytes_unwritten += bytes; 1325 stats->bytes_unwritten += bytes;
1196 } 1326 }
1197 } 1327 }
1198
1199 return MAP_CONTINUE;
1200}
1201
1202int bch_bset_print_stats(struct cache_set *c, char *buf)
1203{
1204 struct bset_stats t;
1205 int ret;
1206
1207 memset(&t, 0, sizeof(struct bset_stats));
1208 bch_btree_op_init(&t.op, -1);
1209
1210 ret = bch_btree_map_nodes(&t.op, c, &ZERO_KEY, btree_bset_stats);
1211 if (ret < 0)
1212 return ret;
1213
1214 return snprintf(buf, PAGE_SIZE,
1215 "btree nodes: %zu\n"
1216 "written sets: %zu\n"
1217 "unwritten sets: %zu\n"
1218 "written key bytes: %zu\n"
1219 "unwritten key bytes: %zu\n"
1220 "floats: %zu\n"
1221 "failed: %zu\n",
1222 t.nodes,
1223 t.sets_written, t.sets_unwritten,
1224 t.bytes_written, t.bytes_unwritten,
1225 t.floats, t.failed);
1226} 1328}
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 1d3c24f9fa0e..003260f4ddf6 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -1,7 +1,11 @@
1#ifndef _BCACHE_BSET_H 1#ifndef _BCACHE_BSET_H
2#define _BCACHE_BSET_H 2#define _BCACHE_BSET_H
3 3
4#include <linux/slab.h> 4#include <linux/bcache.h>
5#include <linux/kernel.h>
6#include <linux/types.h>
7
8#include "util.h" /* for time_stats */
5 9
6/* 10/*
7 * BKEYS: 11 * BKEYS:
@@ -142,20 +146,13 @@
142 * first key in that range of bytes again. 146 * first key in that range of bytes again.
143 */ 147 */
144 148
145/* Btree key comparison/iteration */ 149struct btree_keys;
150struct btree_iter;
151struct btree_iter_set;
152struct bkey_float;
146 153
147#define MAX_BSETS 4U 154#define MAX_BSETS 4U
148 155
149struct btree_iter {
150 size_t size, used;
151#ifdef CONFIG_BCACHE_DEBUG
152 struct btree *b;
153#endif
154 struct btree_iter_set {
155 struct bkey *k, *end;
156 } data[MAX_BSETS];
157};
158
159struct bset_tree { 156struct bset_tree {
160 /* 157 /*
161 * We construct a binary tree in an array as if the array 158 * We construct a binary tree in an array as if the array
@@ -165,14 +162,14 @@ struct bset_tree {
165 */ 162 */
166 163
167 /* size of the binary tree and prev array */ 164 /* size of the binary tree and prev array */
168 unsigned size; 165 unsigned size;
169 166
170 /* function of size - precalculated for to_inorder() */ 167 /* function of size - precalculated for to_inorder() */
171 unsigned extra; 168 unsigned extra;
172 169
173 /* copy of the last key in the set */ 170 /* copy of the last key in the set */
174 struct bkey end; 171 struct bkey end;
175 struct bkey_float *tree; 172 struct bkey_float *tree;
176 173
177 /* 174 /*
178 * The nodes in the bset tree point to specific keys - this 175 * The nodes in the bset tree point to specific keys - this
@@ -182,12 +179,219 @@ struct bset_tree {
182 * to keep bkey_float to 4 bytes and prev isn't used in the fast 179 * to keep bkey_float to 4 bytes and prev isn't used in the fast
183 * path. 180 * path.
184 */ 181 */
185 uint8_t *prev; 182 uint8_t *prev;
186 183
187 /* The actual btree node, with pointers to each sorted set */ 184 /* The actual btree node, with pointers to each sorted set */
188 struct bset *data; 185 struct bset *data;
186};
187
188struct btree_keys_ops {
189 bool (*sort_cmp)(struct btree_iter_set,
190 struct btree_iter_set);
191 struct bkey *(*sort_fixup)(struct btree_iter *, struct bkey *);
192 bool (*insert_fixup)(struct btree_keys *, struct bkey *,
193 struct btree_iter *, struct bkey *);
194 bool (*key_invalid)(struct btree_keys *,
195 const struct bkey *);
196 bool (*key_bad)(struct btree_keys *, const struct bkey *);
197 bool (*key_merge)(struct btree_keys *,
198 struct bkey *, struct bkey *);
199 void (*key_to_text)(char *, size_t, const struct bkey *);
200 void (*key_dump)(struct btree_keys *, const struct bkey *);
201
202 /*
203 * Only used for deciding whether to use START_KEY(k) or just the key
204 * itself in a couple places
205 */
206 bool is_extents;
207};
208
209struct btree_keys {
210 const struct btree_keys_ops *ops;
211 uint8_t page_order;
212 uint8_t nsets;
213 unsigned last_set_unwritten:1;
214 bool *expensive_debug_checks;
215
216 /*
217 * Sets of sorted keys - the real btree node - plus a binary search tree
218 *
219 * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
220 * to the memory we have allocated for this btree node. Additionally,
221 * set[0]->data points to the entire btree node as it exists on disk.
222 */
223 struct bset_tree set[MAX_BSETS];
224};
225
226static inline struct bset_tree *bset_tree_last(struct btree_keys *b)
227{
228 return b->set + b->nsets;
229}
230
231static inline bool bset_written(struct btree_keys *b, struct bset_tree *t)
232{
233 return t <= b->set + b->nsets - b->last_set_unwritten;
234}
235
236static inline bool bkey_written(struct btree_keys *b, struct bkey *k)
237{
238 return !b->last_set_unwritten || k < b->set[b->nsets].data->start;
239}
240
241static inline unsigned bset_byte_offset(struct btree_keys *b, struct bset *i)
242{
243 return ((size_t) i) - ((size_t) b->set->data);
244}
245
246static inline unsigned bset_sector_offset(struct btree_keys *b, struct bset *i)
247{
248 return bset_byte_offset(b, i) >> 9;
249}
250
251#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t))
252#define set_bytes(i) __set_bytes(i, i->keys)
253
254#define __set_blocks(i, k, block_bytes) \
255 DIV_ROUND_UP(__set_bytes(i, k), block_bytes)
256#define set_blocks(i, block_bytes) \
257 __set_blocks(i, (i)->keys, block_bytes)
258
259static inline size_t bch_btree_keys_u64s_remaining(struct btree_keys *b)
260{
261 struct bset_tree *t = bset_tree_last(b);
262
263 BUG_ON((PAGE_SIZE << b->page_order) <
264 (bset_byte_offset(b, t->data) + set_bytes(t->data)));
265
266 if (!b->last_set_unwritten)
267 return 0;
268
269 return ((PAGE_SIZE << b->page_order) -
270 (bset_byte_offset(b, t->data) + set_bytes(t->data))) /
271 sizeof(u64);
272}
273
274static inline struct bset *bset_next_set(struct btree_keys *b,
275 unsigned block_bytes)
276{
277 struct bset *i = bset_tree_last(b)->data;
278
279 return ((void *) i) + roundup(set_bytes(i), block_bytes);
280}
281
282void bch_btree_keys_free(struct btree_keys *);
283int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t);
284void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *,
285 bool *);
286
287void bch_bset_init_next(struct btree_keys *, struct bset *, uint64_t);
288void bch_bset_build_written_tree(struct btree_keys *);
289void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey *);
290bool bch_bkey_try_merge(struct btree_keys *, struct bkey *, struct bkey *);
291void bch_bset_insert(struct btree_keys *, struct bkey *, struct bkey *);
292unsigned bch_btree_insert_key(struct btree_keys *, struct bkey *,
293 struct bkey *);
294
295enum {
296 BTREE_INSERT_STATUS_NO_INSERT = 0,
297 BTREE_INSERT_STATUS_INSERT,
298 BTREE_INSERT_STATUS_BACK_MERGE,
299 BTREE_INSERT_STATUS_OVERWROTE,
300 BTREE_INSERT_STATUS_FRONT_MERGE,
189}; 301};
190 302
303/* Btree key iteration */
304
305struct btree_iter {
306 size_t size, used;
307#ifdef CONFIG_BCACHE_DEBUG
308 struct btree_keys *b;
309#endif
310 struct btree_iter_set {
311 struct bkey *k, *end;
312 } data[MAX_BSETS];
313};
314
315typedef bool (*ptr_filter_fn)(struct btree_keys *, const struct bkey *);
316
317struct bkey *bch_btree_iter_next(struct btree_iter *);
318struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
319 struct btree_keys *, ptr_filter_fn);
320
321void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
322struct bkey *bch_btree_iter_init(struct btree_keys *, struct btree_iter *,
323 struct bkey *);
324
325struct bkey *__bch_bset_search(struct btree_keys *, struct bset_tree *,
326 const struct bkey *);
327
328/*
329 * Returns the first key that is strictly greater than search
330 */
331static inline struct bkey *bch_bset_search(struct btree_keys *b,
332 struct bset_tree *t,
333 const struct bkey *search)
334{
335 return search ? __bch_bset_search(b, t, search) : t->data->start;
336}
337
338#define for_each_key_filter(b, k, iter, filter) \
339 for (bch_btree_iter_init((b), (iter), NULL); \
340 ((k) = bch_btree_iter_next_filter((iter), (b), filter));)
341
342#define for_each_key(b, k, iter) \
343 for (bch_btree_iter_init((b), (iter), NULL); \
344 ((k) = bch_btree_iter_next(iter));)
345
346/* Sorting */
347
348struct bset_sort_state {
349 mempool_t *pool;
350
351 unsigned page_order;
352 unsigned crit_factor;
353
354 struct time_stats time;
355};
356
357void bch_bset_sort_state_free(struct bset_sort_state *);
358int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
359void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *);
360void bch_btree_sort_into(struct btree_keys *, struct btree_keys *,
361 struct bset_sort_state *);
362void bch_btree_sort_and_fix_extents(struct btree_keys *, struct btree_iter *,
363 struct bset_sort_state *);
364void bch_btree_sort_partial(struct btree_keys *, unsigned,
365 struct bset_sort_state *);
366
367static inline void bch_btree_sort(struct btree_keys *b,
368 struct bset_sort_state *state)
369{
370 bch_btree_sort_partial(b, 0, state);
371}
372
373struct bset_stats {
374 size_t sets_written, sets_unwritten;
375 size_t bytes_written, bytes_unwritten;
376 size_t floats, failed;
377};
378
379void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
380
381/* Bkey utility code */
382
383#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
384
385static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
386{
387 return bkey_idx(i->start, idx);
388}
389
390static inline void bkey_init(struct bkey *k)
391{
392 *k = ZERO_KEY;
393}
394
191static __always_inline int64_t bkey_cmp(const struct bkey *l, 395static __always_inline int64_t bkey_cmp(const struct bkey *l,
192 const struct bkey *r) 396 const struct bkey *r)
193{ 397{
@@ -196,6 +400,62 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l,
196 : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); 400 : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
197} 401}
198 402
403void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
404 unsigned);
405bool __bch_cut_front(const struct bkey *, struct bkey *);
406bool __bch_cut_back(const struct bkey *, struct bkey *);
407
408static inline bool bch_cut_front(const struct bkey *where, struct bkey *k)
409{
410 BUG_ON(bkey_cmp(where, k) > 0);
411 return __bch_cut_front(where, k);
412}
413
414static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
415{
416 BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0);
417 return __bch_cut_back(where, k);
418}
419
420#define PRECEDING_KEY(_k) \
421({ \
422 struct bkey *_ret = NULL; \
423 \
424 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
425 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
426 \
427 if (!_ret->low) \
428 _ret->high--; \
429 _ret->low--; \
430 } \
431 \
432 _ret; \
433})
434
435static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
436{
437 return b->ops->key_invalid(b, k);
438}
439
440static inline bool bch_ptr_bad(struct btree_keys *b, const struct bkey *k)
441{
442 return b->ops->key_bad(b, k);
443}
444
445static inline void bch_bkey_to_text(struct btree_keys *b, char *buf,
446 size_t size, const struct bkey *k)
447{
448 return b->ops->key_to_text(buf, size, k);
449}
450
451static inline bool bch_bkey_equal_header(const struct bkey *l,
452 const struct bkey *r)
453{
454 return (KEY_DIRTY(l) == KEY_DIRTY(r) &&
455 KEY_PTRS(l) == KEY_PTRS(r) &&
456 KEY_CSUM(l) == KEY_CSUM(l));
457}
458
199/* Keylists */ 459/* Keylists */
200 460
201struct keylist { 461struct keylist {
@@ -257,136 +517,44 @@ static inline size_t bch_keylist_bytes(struct keylist *l)
257 517
258struct bkey *bch_keylist_pop(struct keylist *); 518struct bkey *bch_keylist_pop(struct keylist *);
259void bch_keylist_pop_front(struct keylist *); 519void bch_keylist_pop_front(struct keylist *);
260int bch_keylist_realloc(struct keylist *, int, struct cache_set *); 520int __bch_keylist_realloc(struct keylist *, unsigned);
261
262void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
263 unsigned);
264bool __bch_cut_front(const struct bkey *, struct bkey *);
265bool __bch_cut_back(const struct bkey *, struct bkey *);
266 521
267static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) 522/* Debug stuff */
268{
269 BUG_ON(bkey_cmp(where, k) > 0);
270 return __bch_cut_front(where, k);
271}
272 523
273static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) 524#ifdef CONFIG_BCACHE_DEBUG
274{
275 BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0);
276 return __bch_cut_back(where, k);
277}
278
279const char *bch_ptr_status(struct cache_set *, const struct bkey *);
280bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
281bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *);
282
283bool bch_ptr_bad(struct btree *, const struct bkey *);
284
285static inline uint8_t gen_after(uint8_t a, uint8_t b)
286{
287 uint8_t r = a - b;
288 return r > 128U ? 0 : r;
289}
290
291static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
292 unsigned i)
293{
294 return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
295}
296
297static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
298 unsigned i)
299{
300 return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
301}
302
303
304typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);
305
306struct bkey *bch_btree_iter_next(struct btree_iter *);
307struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
308 struct btree *, ptr_filter_fn);
309
310void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
311struct bkey *__bch_btree_iter_init(struct btree *, struct btree_iter *,
312 struct bkey *, struct bset_tree *);
313
314/* 32 bits total: */
315#define BKEY_MID_BITS 3
316#define BKEY_EXPONENT_BITS 7
317#define BKEY_MANTISSA_BITS 22
318#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
319
320struct bkey_float {
321 unsigned exponent:BKEY_EXPONENT_BITS;
322 unsigned m:BKEY_MID_BITS;
323 unsigned mantissa:BKEY_MANTISSA_BITS;
324} __packed;
325
326/*
327 * BSET_CACHELINE was originally intended to match the hardware cacheline size -
328 * it used to be 64, but I realized the lookup code would touch slightly less
329 * memory if it was 128.
330 *
331 * It definites the number of bytes (in struct bset) per struct bkey_float in
332 * the auxiliar search tree - when we're done searching the bset_float tree we
333 * have this many bytes left that we do a linear search over.
334 *
335 * Since (after level 5) every level of the bset_tree is on a new cacheline,
336 * we're touching one fewer cacheline in the bset tree in exchange for one more
337 * cacheline in the linear search - but the linear search might stop before it
338 * gets to the second cacheline.
339 */
340
341#define BSET_CACHELINE 128
342#define bset_tree_space(b) (btree_data_space(b) / BSET_CACHELINE)
343 525
344#define bset_tree_bytes(b) (bset_tree_space(b) * sizeof(struct bkey_float)) 526int __bch_count_data(struct btree_keys *);
345#define bset_prev_bytes(b) (bset_tree_space(b) * sizeof(uint8_t)) 527void __bch_check_keys(struct btree_keys *, const char *, ...);
528void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
529void bch_dump_bucket(struct btree_keys *);
346 530
347void bch_bset_init_next(struct btree *); 531#else
348 532
349void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); 533static inline int __bch_count_data(struct btree_keys *b) { return -1; }
350void bch_bset_fix_lookup_table(struct btree *, struct bkey *); 534static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
535static inline void bch_dump_bucket(struct btree_keys *b) {}
536void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
351 537
352struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, 538#endif
353 const struct bkey *);
354 539
355/* 540static inline bool btree_keys_expensive_checks(struct btree_keys *b)
356 * Returns the first key that is strictly greater than search
357 */
358static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
359 const struct bkey *search)
360{ 541{
361 return search ? __bch_bset_search(b, t, search) : t->data->start; 542#ifdef CONFIG_BCACHE_DEBUG
543 return *b->expensive_debug_checks;
544#else
545 return false;
546#endif
362} 547}
363 548
364#define PRECEDING_KEY(_k) \ 549static inline int bch_count_data(struct btree_keys *b)
365({ \
366 struct bkey *_ret = NULL; \
367 \
368 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
369 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
370 \
371 if (!_ret->low) \
372 _ret->high--; \
373 _ret->low--; \
374 } \
375 \
376 _ret; \
377})
378
379bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
380void bch_btree_sort_lazy(struct btree *);
381void bch_btree_sort_into(struct btree *, struct btree *);
382void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *);
383void bch_btree_sort_partial(struct btree *, unsigned);
384
385static inline void bch_btree_sort(struct btree *b)
386{ 550{
387 bch_btree_sort_partial(b, 0); 551 return btree_keys_expensive_checks(b) ? __bch_count_data(b) : -1;
388} 552}
389 553
390int bch_bset_print_stats(struct cache_set *, char *); 554#define bch_check_keys(b, ...) \
555do { \
556 if (btree_keys_expensive_checks(b)) \
557 __bch_check_keys(b, __VA_ARGS__); \
558} while (0)
391 559
392#endif 560#endif
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 31bb53fcc67a..98cc0a810a36 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -23,7 +23,7 @@
23#include "bcache.h" 23#include "bcache.h"
24#include "btree.h" 24#include "btree.h"
25#include "debug.h" 25#include "debug.h"
26#include "writeback.h" 26#include "extents.h"
27 27
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/bitops.h> 29#include <linux/bitops.h>
@@ -89,13 +89,6 @@
89 * Test module load/unload 89 * Test module load/unload
90 */ 90 */
91 91
92enum {
93 BTREE_INSERT_STATUS_INSERT,
94 BTREE_INSERT_STATUS_BACK_MERGE,
95 BTREE_INSERT_STATUS_OVERWROTE,
96 BTREE_INSERT_STATUS_FRONT_MERGE,
97};
98
99#define MAX_NEED_GC 64 92#define MAX_NEED_GC 64
100#define MAX_SAVE_PRIO 72 93#define MAX_SAVE_PRIO 72
101 94
@@ -106,14 +99,6 @@ enum {
106 99
107static struct workqueue_struct *btree_io_wq; 100static struct workqueue_struct *btree_io_wq;
108 101
109static inline bool should_split(struct btree *b)
110{
111 struct bset *i = write_block(b);
112 return b->written >= btree_blocks(b) ||
113 (b->written + __set_blocks(i, i->keys + 15, b->c)
114 > btree_blocks(b));
115}
116
117#define insert_lock(s, b) ((b)->level <= (s)->lock) 102#define insert_lock(s, b) ((b)->level <= (s)->lock)
118 103
119/* 104/*
@@ -167,6 +152,8 @@ static inline bool should_split(struct btree *b)
167 _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ 152 _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
168 } \ 153 } \
169 rw_unlock(_w, _b); \ 154 rw_unlock(_w, _b); \
155 if (_r == -EINTR) \
156 schedule(); \
170 bch_cannibalize_unlock(c); \ 157 bch_cannibalize_unlock(c); \
171 if (_r == -ENOSPC) { \ 158 if (_r == -ENOSPC) { \
172 wait_event((c)->try_wait, \ 159 wait_event((c)->try_wait, \
@@ -175,9 +162,15 @@ static inline bool should_split(struct btree *b)
175 } \ 162 } \
176 } while (_r == -EINTR); \ 163 } while (_r == -EINTR); \
177 \ 164 \
165 finish_wait(&(c)->bucket_wait, &(op)->wait); \
178 _r; \ 166 _r; \
179}) 167})
180 168
169static inline struct bset *write_block(struct btree *b)
170{
171 return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c);
172}
173
181/* Btree key manipulation */ 174/* Btree key manipulation */
182 175
183void bkey_put(struct cache_set *c, struct bkey *k) 176void bkey_put(struct cache_set *c, struct bkey *k)
@@ -194,16 +187,16 @@ void bkey_put(struct cache_set *c, struct bkey *k)
194static uint64_t btree_csum_set(struct btree *b, struct bset *i) 187static uint64_t btree_csum_set(struct btree *b, struct bset *i)
195{ 188{
196 uint64_t crc = b->key.ptr[0]; 189 uint64_t crc = b->key.ptr[0];
197 void *data = (void *) i + 8, *end = end(i); 190 void *data = (void *) i + 8, *end = bset_bkey_last(i);
198 191
199 crc = bch_crc64_update(crc, data, end - data); 192 crc = bch_crc64_update(crc, data, end - data);
200 return crc ^ 0xffffffffffffffffULL; 193 return crc ^ 0xffffffffffffffffULL;
201} 194}
202 195
203static void bch_btree_node_read_done(struct btree *b) 196void bch_btree_node_read_done(struct btree *b)
204{ 197{
205 const char *err = "bad btree header"; 198 const char *err = "bad btree header";
206 struct bset *i = b->sets[0].data; 199 struct bset *i = btree_bset_first(b);
207 struct btree_iter *iter; 200 struct btree_iter *iter;
208 201
209 iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); 202 iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
@@ -211,21 +204,22 @@ static void bch_btree_node_read_done(struct btree *b)
211 iter->used = 0; 204 iter->used = 0;
212 205
213#ifdef CONFIG_BCACHE_DEBUG 206#ifdef CONFIG_BCACHE_DEBUG
214 iter->b = b; 207 iter->b = &b->keys;
215#endif 208#endif
216 209
217 if (!i->seq) 210 if (!i->seq)
218 goto err; 211 goto err;
219 212
220 for (; 213 for (;
221 b->written < btree_blocks(b) && i->seq == b->sets[0].data->seq; 214 b->written < btree_blocks(b) && i->seq == b->keys.set[0].data->seq;
222 i = write_block(b)) { 215 i = write_block(b)) {
223 err = "unsupported bset version"; 216 err = "unsupported bset version";
224 if (i->version > BCACHE_BSET_VERSION) 217 if (i->version > BCACHE_BSET_VERSION)
225 goto err; 218 goto err;
226 219
227 err = "bad btree header"; 220 err = "bad btree header";
228 if (b->written + set_blocks(i, b->c) > btree_blocks(b)) 221 if (b->written + set_blocks(i, block_bytes(b->c)) >
222 btree_blocks(b))
229 goto err; 223 goto err;
230 224
231 err = "bad magic"; 225 err = "bad magic";
@@ -245,39 +239,40 @@ static void bch_btree_node_read_done(struct btree *b)
245 } 239 }
246 240
247 err = "empty set"; 241 err = "empty set";
248 if (i != b->sets[0].data && !i->keys) 242 if (i != b->keys.set[0].data && !i->keys)
249 goto err; 243 goto err;
250 244
251 bch_btree_iter_push(iter, i->start, end(i)); 245 bch_btree_iter_push(iter, i->start, bset_bkey_last(i));
252 246
253 b->written += set_blocks(i, b->c); 247 b->written += set_blocks(i, block_bytes(b->c));
254 } 248 }
255 249
256 err = "corrupted btree"; 250 err = "corrupted btree";
257 for (i = write_block(b); 251 for (i = write_block(b);
258 index(i, b) < btree_blocks(b); 252 bset_sector_offset(&b->keys, i) < KEY_SIZE(&b->key);
259 i = ((void *) i) + block_bytes(b->c)) 253 i = ((void *) i) + block_bytes(b->c))
260 if (i->seq == b->sets[0].data->seq) 254 if (i->seq == b->keys.set[0].data->seq)
261 goto err; 255 goto err;
262 256
263 bch_btree_sort_and_fix_extents(b, iter); 257 bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort);
264 258
265 i = b->sets[0].data; 259 i = b->keys.set[0].data;
266 err = "short btree key"; 260 err = "short btree key";
267 if (b->sets[0].size && 261 if (b->keys.set[0].size &&
268 bkey_cmp(&b->key, &b->sets[0].end) < 0) 262 bkey_cmp(&b->key, &b->keys.set[0].end) < 0)
269 goto err; 263 goto err;
270 264
271 if (b->written < btree_blocks(b)) 265 if (b->written < btree_blocks(b))
272 bch_bset_init_next(b); 266 bch_bset_init_next(&b->keys, write_block(b),
267 bset_magic(&b->c->sb));
273out: 268out:
274 mempool_free(iter, b->c->fill_iter); 269 mempool_free(iter, b->c->fill_iter);
275 return; 270 return;
276err: 271err:
277 set_btree_node_io_error(b); 272 set_btree_node_io_error(b);
278 bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys", 273 bch_cache_set_error(b->c, "%s at bucket %zu, block %u, %u keys",
279 err, PTR_BUCKET_NR(b->c, &b->key, 0), 274 err, PTR_BUCKET_NR(b->c, &b->key, 0),
280 index(i, b), i->keys); 275 bset_block_offset(b, i), i->keys);
281 goto out; 276 goto out;
282} 277}
283 278
@@ -287,7 +282,7 @@ static void btree_node_read_endio(struct bio *bio, int error)
287 closure_put(cl); 282 closure_put(cl);
288} 283}
289 284
290void bch_btree_node_read(struct btree *b) 285static void bch_btree_node_read(struct btree *b)
291{ 286{
292 uint64_t start_time = local_clock(); 287 uint64_t start_time = local_clock();
293 struct closure cl; 288 struct closure cl;
@@ -299,11 +294,11 @@ void bch_btree_node_read(struct btree *b)
299 294
300 bio = bch_bbio_alloc(b->c); 295 bio = bch_bbio_alloc(b->c);
301 bio->bi_rw = REQ_META|READ_SYNC; 296 bio->bi_rw = REQ_META|READ_SYNC;
302 bio->bi_size = KEY_SIZE(&b->key) << 9; 297 bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9;
303 bio->bi_end_io = btree_node_read_endio; 298 bio->bi_end_io = btree_node_read_endio;
304 bio->bi_private = &cl; 299 bio->bi_private = &cl;
305 300
306 bch_bio_map(bio, b->sets[0].data); 301 bch_bio_map(bio, b->keys.set[0].data);
307 302
308 bch_submit_bbio(bio, b->c, &b->key, 0); 303 bch_submit_bbio(bio, b->c, &b->key, 0);
309 closure_sync(&cl); 304 closure_sync(&cl);
@@ -340,9 +335,16 @@ static void btree_complete_write(struct btree *b, struct btree_write *w)
340 w->journal = NULL; 335 w->journal = NULL;
341} 336}
342 337
338static void btree_node_write_unlock(struct closure *cl)
339{
340 struct btree *b = container_of(cl, struct btree, io);
341
342 up(&b->io_mutex);
343}
344
343static void __btree_node_write_done(struct closure *cl) 345static void __btree_node_write_done(struct closure *cl)
344{ 346{
345 struct btree *b = container_of(cl, struct btree, io.cl); 347 struct btree *b = container_of(cl, struct btree, io);
346 struct btree_write *w = btree_prev_write(b); 348 struct btree_write *w = btree_prev_write(b);
347 349
348 bch_bbio_free(b->bio, b->c); 350 bch_bbio_free(b->bio, b->c);
@@ -353,16 +355,16 @@ static void __btree_node_write_done(struct closure *cl)
353 queue_delayed_work(btree_io_wq, &b->work, 355 queue_delayed_work(btree_io_wq, &b->work,
354 msecs_to_jiffies(30000)); 356 msecs_to_jiffies(30000));
355 357
356 closure_return(cl); 358 closure_return_with_destructor(cl, btree_node_write_unlock);
357} 359}
358 360
359static void btree_node_write_done(struct closure *cl) 361static void btree_node_write_done(struct closure *cl)
360{ 362{
361 struct btree *b = container_of(cl, struct btree, io.cl); 363 struct btree *b = container_of(cl, struct btree, io);
362 struct bio_vec *bv; 364 struct bio_vec *bv;
363 int n; 365 int n;
364 366
365 __bio_for_each_segment(bv, b->bio, n, 0) 367 bio_for_each_segment_all(bv, b->bio, n)
366 __free_page(bv->bv_page); 368 __free_page(bv->bv_page);
367 369
368 __btree_node_write_done(cl); 370 __btree_node_write_done(cl);
@@ -371,7 +373,7 @@ static void btree_node_write_done(struct closure *cl)
371static void btree_node_write_endio(struct bio *bio, int error) 373static void btree_node_write_endio(struct bio *bio, int error)
372{ 374{
373 struct closure *cl = bio->bi_private; 375 struct closure *cl = bio->bi_private;
374 struct btree *b = container_of(cl, struct btree, io.cl); 376 struct btree *b = container_of(cl, struct btree, io);
375 377
376 if (error) 378 if (error)
377 set_btree_node_io_error(b); 379 set_btree_node_io_error(b);
@@ -382,8 +384,8 @@ static void btree_node_write_endio(struct bio *bio, int error)
382 384
383static void do_btree_node_write(struct btree *b) 385static void do_btree_node_write(struct btree *b)
384{ 386{
385 struct closure *cl = &b->io.cl; 387 struct closure *cl = &b->io;
386 struct bset *i = b->sets[b->nsets].data; 388 struct bset *i = btree_bset_last(b);
387 BKEY_PADDED(key) k; 389 BKEY_PADDED(key) k;
388 390
389 i->version = BCACHE_BSET_VERSION; 391 i->version = BCACHE_BSET_VERSION;
@@ -395,7 +397,7 @@ static void do_btree_node_write(struct btree *b)
395 b->bio->bi_end_io = btree_node_write_endio; 397 b->bio->bi_end_io = btree_node_write_endio;
396 b->bio->bi_private = cl; 398 b->bio->bi_private = cl;
397 b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; 399 b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA;
398 b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); 400 b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c));
399 bch_bio_map(b->bio, i); 401 bch_bio_map(b->bio, i);
400 402
401 /* 403 /*
@@ -414,14 +416,15 @@ static void do_btree_node_write(struct btree *b)
414 */ 416 */
415 417
416 bkey_copy(&k.key, &b->key); 418 bkey_copy(&k.key, &b->key);
417 SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); 419 SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
420 bset_sector_offset(&b->keys, i));
418 421
419 if (!bio_alloc_pages(b->bio, GFP_NOIO)) { 422 if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
420 int j; 423 int j;
421 struct bio_vec *bv; 424 struct bio_vec *bv;
422 void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); 425 void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
423 426
424 bio_for_each_segment(bv, b->bio, j) 427 bio_for_each_segment_all(bv, b->bio, j)
425 memcpy(page_address(bv->bv_page), 428 memcpy(page_address(bv->bv_page),
426 base + j * PAGE_SIZE, PAGE_SIZE); 429 base + j * PAGE_SIZE, PAGE_SIZE);
427 430
@@ -435,40 +438,54 @@ static void do_btree_node_write(struct btree *b)
435 bch_submit_bbio(b->bio, b->c, &k.key, 0); 438 bch_submit_bbio(b->bio, b->c, &k.key, 0);
436 439
437 closure_sync(cl); 440 closure_sync(cl);
438 __btree_node_write_done(cl); 441 continue_at_nobarrier(cl, __btree_node_write_done, NULL);
439 } 442 }
440} 443}
441 444
442void bch_btree_node_write(struct btree *b, struct closure *parent) 445void bch_btree_node_write(struct btree *b, struct closure *parent)
443{ 446{
444 struct bset *i = b->sets[b->nsets].data; 447 struct bset *i = btree_bset_last(b);
445 448
446 trace_bcache_btree_write(b); 449 trace_bcache_btree_write(b);
447 450
448 BUG_ON(current->bio_list); 451 BUG_ON(current->bio_list);
449 BUG_ON(b->written >= btree_blocks(b)); 452 BUG_ON(b->written >= btree_blocks(b));
450 BUG_ON(b->written && !i->keys); 453 BUG_ON(b->written && !i->keys);
451 BUG_ON(b->sets->data->seq != i->seq); 454 BUG_ON(btree_bset_first(b)->seq != i->seq);
452 bch_check_keys(b, "writing"); 455 bch_check_keys(&b->keys, "writing");
453 456
454 cancel_delayed_work(&b->work); 457 cancel_delayed_work(&b->work);
455 458
456 /* If caller isn't waiting for write, parent refcount is cache set */ 459 /* If caller isn't waiting for write, parent refcount is cache set */
457 closure_lock(&b->io, parent ?: &b->c->cl); 460 down(&b->io_mutex);
461 closure_init(&b->io, parent ?: &b->c->cl);
458 462
459 clear_bit(BTREE_NODE_dirty, &b->flags); 463 clear_bit(BTREE_NODE_dirty, &b->flags);
460 change_bit(BTREE_NODE_write_idx, &b->flags); 464 change_bit(BTREE_NODE_write_idx, &b->flags);
461 465
462 do_btree_node_write(b); 466 do_btree_node_write(b);
463 467
464 b->written += set_blocks(i, b->c); 468 atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size,
465 atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
466 &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); 469 &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
467 470
468 bch_btree_sort_lazy(b); 471 b->written += set_blocks(i, block_bytes(b->c));
472
473 /* If not a leaf node, always sort */
474 if (b->level && b->keys.nsets)
475 bch_btree_sort(&b->keys, &b->c->sort);
476 else
477 bch_btree_sort_lazy(&b->keys, &b->c->sort);
478
479 /*
480 * do verify if there was more than one set initially (i.e. we did a
481 * sort) and we sorted down to a single set:
482 */
483 if (i != b->keys.set->data && !b->keys.nsets)
484 bch_btree_verify(b);
469 485
470 if (b->written < btree_blocks(b)) 486 if (b->written < btree_blocks(b))
471 bch_bset_init_next(b); 487 bch_bset_init_next(&b->keys, write_block(b),
488 bset_magic(&b->c->sb));
472} 489}
473 490
474static void bch_btree_node_write_sync(struct btree *b) 491static void bch_btree_node_write_sync(struct btree *b)
@@ -493,7 +510,7 @@ static void btree_node_write_work(struct work_struct *w)
493 510
494static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) 511static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
495{ 512{
496 struct bset *i = b->sets[b->nsets].data; 513 struct bset *i = btree_bset_last(b);
497 struct btree_write *w = btree_current_write(b); 514 struct btree_write *w = btree_current_write(b);
498 515
499 BUG_ON(!b->written); 516 BUG_ON(!b->written);
@@ -528,24 +545,6 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
528 * mca -> memory cache 545 * mca -> memory cache
529 */ 546 */
530 547
531static void mca_reinit(struct btree *b)
532{
533 unsigned i;
534
535 b->flags = 0;
536 b->written = 0;
537 b->nsets = 0;
538
539 for (i = 0; i < MAX_BSETS; i++)
540 b->sets[i].size = 0;
541 /*
542 * Second loop starts at 1 because b->sets[0]->data is the memory we
543 * allocated
544 */
545 for (i = 1; i < MAX_BSETS; i++)
546 b->sets[i].data = NULL;
547}
548
549#define mca_reserve(c) (((c->root && c->root->level) \ 548#define mca_reserve(c) (((c->root && c->root->level) \
550 ? c->root->level : 1) * 8 + 16) 549 ? c->root->level : 1) * 8 + 16)
551#define mca_can_free(c) \ 550#define mca_can_free(c) \
@@ -553,28 +552,12 @@ static void mca_reinit(struct btree *b)
553 552
554static void mca_data_free(struct btree *b) 553static void mca_data_free(struct btree *b)
555{ 554{
556 struct bset_tree *t = b->sets; 555 BUG_ON(b->io_mutex.count != 1);
557 BUG_ON(!closure_is_unlocked(&b->io.cl));
558 556
559 if (bset_prev_bytes(b) < PAGE_SIZE) 557 bch_btree_keys_free(&b->keys);
560 kfree(t->prev);
561 else
562 free_pages((unsigned long) t->prev,
563 get_order(bset_prev_bytes(b)));
564 558
565 if (bset_tree_bytes(b) < PAGE_SIZE)
566 kfree(t->tree);
567 else
568 free_pages((unsigned long) t->tree,
569 get_order(bset_tree_bytes(b)));
570
571 free_pages((unsigned long) t->data, b->page_order);
572
573 t->prev = NULL;
574 t->tree = NULL;
575 t->data = NULL;
576 list_move(&b->list, &b->c->btree_cache_freed);
577 b->c->bucket_cache_used--; 559 b->c->bucket_cache_used--;
560 list_move(&b->list, &b->c->btree_cache_freed);
578} 561}
579 562
580static void mca_bucket_free(struct btree *b) 563static void mca_bucket_free(struct btree *b)
@@ -593,34 +576,16 @@ static unsigned btree_order(struct bkey *k)
593 576
594static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) 577static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
595{ 578{
596 struct bset_tree *t = b->sets; 579 if (!bch_btree_keys_alloc(&b->keys,
597 BUG_ON(t->data); 580 max_t(unsigned,
598 581 ilog2(b->c->btree_pages),
599 b->page_order = max_t(unsigned, 582 btree_order(k)),
600 ilog2(b->c->btree_pages), 583 gfp)) {
601 btree_order(k)); 584 b->c->bucket_cache_used++;
602 585 list_move(&b->list, &b->c->btree_cache);
603 t->data = (void *) __get_free_pages(gfp, b->page_order); 586 } else {
604 if (!t->data) 587 list_move(&b->list, &b->c->btree_cache_freed);
605 goto err; 588 }
606
607 t->tree = bset_tree_bytes(b) < PAGE_SIZE
608 ? kmalloc(bset_tree_bytes(b), gfp)
609 : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
610 if (!t->tree)
611 goto err;
612
613 t->prev = bset_prev_bytes(b) < PAGE_SIZE
614 ? kmalloc(bset_prev_bytes(b), gfp)
615 : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
616 if (!t->prev)
617 goto err;
618
619 list_move(&b->list, &b->c->btree_cache);
620 b->c->bucket_cache_used++;
621 return;
622err:
623 mca_data_free(b);
624} 589}
625 590
626static struct btree *mca_bucket_alloc(struct cache_set *c, 591static struct btree *mca_bucket_alloc(struct cache_set *c,
@@ -635,7 +600,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
635 INIT_LIST_HEAD(&b->list); 600 INIT_LIST_HEAD(&b->list);
636 INIT_DELAYED_WORK(&b->work, btree_node_write_work); 601 INIT_DELAYED_WORK(&b->work, btree_node_write_work);
637 b->c = c; 602 b->c = c;
638 closure_init_unlocked(&b->io); 603 sema_init(&b->io_mutex, 1);
639 604
640 mca_data_alloc(b, k, gfp); 605 mca_data_alloc(b, k, gfp);
641 return b; 606 return b;
@@ -651,24 +616,31 @@ static int mca_reap(struct btree *b, unsigned min_order, bool flush)
651 if (!down_write_trylock(&b->lock)) 616 if (!down_write_trylock(&b->lock))
652 return -ENOMEM; 617 return -ENOMEM;
653 618
654 BUG_ON(btree_node_dirty(b) && !b->sets[0].data); 619 BUG_ON(btree_node_dirty(b) && !b->keys.set[0].data);
655 620
656 if (b->page_order < min_order || 621 if (b->keys.page_order < min_order)
657 (!flush && 622 goto out_unlock;
658 (btree_node_dirty(b) || 623
659 atomic_read(&b->io.cl.remaining) != -1))) { 624 if (!flush) {
660 rw_unlock(true, b); 625 if (btree_node_dirty(b))
661 return -ENOMEM; 626 goto out_unlock;
627
628 if (down_trylock(&b->io_mutex))
629 goto out_unlock;
630 up(&b->io_mutex);
662 } 631 }
663 632
664 if (btree_node_dirty(b)) 633 if (btree_node_dirty(b))
665 bch_btree_node_write_sync(b); 634 bch_btree_node_write_sync(b);
666 635
667 /* wait for any in flight btree write */ 636 /* wait for any in flight btree write */
668 closure_wait_event(&b->io.wait, &cl, 637 down(&b->io_mutex);
669 atomic_read(&b->io.cl.remaining) == -1); 638 up(&b->io_mutex);
670 639
671 return 0; 640 return 0;
641out_unlock:
642 rw_unlock(true, b);
643 return -ENOMEM;
672} 644}
673 645
674static unsigned long bch_mca_scan(struct shrinker *shrink, 646static unsigned long bch_mca_scan(struct shrinker *shrink,
@@ -714,14 +686,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
714 } 686 }
715 } 687 }
716 688
717 /*
718 * Can happen right when we first start up, before we've read in any
719 * btree nodes
720 */
721 if (list_empty(&c->btree_cache))
722 goto out;
723
724 for (i = 0; (nr--) && i < c->bucket_cache_used; i++) { 689 for (i = 0; (nr--) && i < c->bucket_cache_used; i++) {
690 if (list_empty(&c->btree_cache))
691 goto out;
692
725 b = list_first_entry(&c->btree_cache, struct btree, list); 693 b = list_first_entry(&c->btree_cache, struct btree, list);
726 list_rotate_left(&c->btree_cache); 694 list_rotate_left(&c->btree_cache);
727 695
@@ -767,6 +735,8 @@ void bch_btree_cache_free(struct cache_set *c)
767#ifdef CONFIG_BCACHE_DEBUG 735#ifdef CONFIG_BCACHE_DEBUG
768 if (c->verify_data) 736 if (c->verify_data)
769 list_move(&c->verify_data->list, &c->btree_cache); 737 list_move(&c->verify_data->list, &c->btree_cache);
738
739 free_pages((unsigned long) c->verify_ondisk, ilog2(bucket_pages(c)));
770#endif 740#endif
771 741
772 list_splice(&c->btree_cache_freeable, 742 list_splice(&c->btree_cache_freeable,
@@ -807,10 +777,13 @@ int bch_btree_cache_alloc(struct cache_set *c)
807#ifdef CONFIG_BCACHE_DEBUG 777#ifdef CONFIG_BCACHE_DEBUG
808 mutex_init(&c->verify_lock); 778 mutex_init(&c->verify_lock);
809 779
780 c->verify_ondisk = (void *)
781 __get_free_pages(GFP_KERNEL, ilog2(bucket_pages(c)));
782
810 c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); 783 c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL);
811 784
812 if (c->verify_data && 785 if (c->verify_data &&
813 c->verify_data->sets[0].data) 786 c->verify_data->keys.set->data)
814 list_del_init(&c->verify_data->list); 787 list_del_init(&c->verify_data->list);
815 else 788 else
816 c->verify_data = NULL; 789 c->verify_data = NULL;
@@ -908,7 +881,7 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level)
908 list_for_each_entry(b, &c->btree_cache_freed, list) 881 list_for_each_entry(b, &c->btree_cache_freed, list)
909 if (!mca_reap(b, 0, false)) { 882 if (!mca_reap(b, 0, false)) {
910 mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO); 883 mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO);
911 if (!b->sets[0].data) 884 if (!b->keys.set[0].data)
912 goto err; 885 goto err;
913 else 886 else
914 goto out; 887 goto out;
@@ -919,10 +892,10 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level)
919 goto err; 892 goto err;
920 893
921 BUG_ON(!down_write_trylock(&b->lock)); 894 BUG_ON(!down_write_trylock(&b->lock));
922 if (!b->sets->data) 895 if (!b->keys.set->data)
923 goto err; 896 goto err;
924out: 897out:
925 BUG_ON(!closure_is_unlocked(&b->io.cl)); 898 BUG_ON(b->io_mutex.count != 1);
926 899
927 bkey_copy(&b->key, k); 900 bkey_copy(&b->key, k);
928 list_move(&b->list, &c->btree_cache); 901 list_move(&b->list, &c->btree_cache);
@@ -930,10 +903,17 @@ out:
930 hlist_add_head_rcu(&b->hash, mca_hash(c, k)); 903 hlist_add_head_rcu(&b->hash, mca_hash(c, k));
931 904
932 lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_); 905 lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_);
933 b->level = level;
934 b->parent = (void *) ~0UL; 906 b->parent = (void *) ~0UL;
907 b->flags = 0;
908 b->written = 0;
909 b->level = level;
935 910
936 mca_reinit(b); 911 if (!b->level)
912 bch_btree_keys_init(&b->keys, &bch_extent_keys_ops,
913 &b->c->expensive_debug_checks);
914 else
915 bch_btree_keys_init(&b->keys, &bch_btree_keys_ops,
916 &b->c->expensive_debug_checks);
937 917
938 return b; 918 return b;
939err: 919err:
@@ -994,13 +974,13 @@ retry:
994 974
995 b->accessed = 1; 975 b->accessed = 1;
996 976
997 for (; i <= b->nsets && b->sets[i].size; i++) { 977 for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
998 prefetch(b->sets[i].tree); 978 prefetch(b->keys.set[i].tree);
999 prefetch(b->sets[i].data); 979 prefetch(b->keys.set[i].data);
1000 } 980 }
1001 981
1002 for (; i <= b->nsets; i++) 982 for (; i <= b->keys.nsets; i++)
1003 prefetch(b->sets[i].data); 983 prefetch(b->keys.set[i].data);
1004 984
1005 if (btree_node_io_error(b)) { 985 if (btree_node_io_error(b)) {
1006 rw_unlock(write, b); 986 rw_unlock(write, b);
@@ -1063,7 +1043,7 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait)
1063 1043
1064 mutex_lock(&c->bucket_lock); 1044 mutex_lock(&c->bucket_lock);
1065retry: 1045retry:
1066 if (__bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, wait)) 1046 if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
1067 goto err; 1047 goto err;
1068 1048
1069 bkey_put(c, &k.key); 1049 bkey_put(c, &k.key);
@@ -1080,7 +1060,7 @@ retry:
1080 } 1060 }
1081 1061
1082 b->accessed = 1; 1062 b->accessed = 1;
1083 bch_bset_init_next(b); 1063 bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
1084 1064
1085 mutex_unlock(&c->bucket_lock); 1065 mutex_unlock(&c->bucket_lock);
1086 1066
@@ -1098,8 +1078,10 @@ err:
1098static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) 1078static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait)
1099{ 1079{
1100 struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); 1080 struct btree *n = bch_btree_node_alloc(b->c, b->level, wait);
1101 if (!IS_ERR_OR_NULL(n)) 1081 if (!IS_ERR_OR_NULL(n)) {
1102 bch_btree_sort_into(b, n); 1082 bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
1083 bkey_copy_key(&n->key, &b->key);
1084 }
1103 1085
1104 return n; 1086 return n;
1105} 1087}
@@ -1120,6 +1102,28 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k)
1120 atomic_inc(&b->c->prio_blocked); 1102 atomic_inc(&b->c->prio_blocked);
1121} 1103}
1122 1104
1105static int btree_check_reserve(struct btree *b, struct btree_op *op)
1106{
1107 struct cache_set *c = b->c;
1108 struct cache *ca;
1109 unsigned i, reserve = c->root->level * 2 + 1;
1110 int ret = 0;
1111
1112 mutex_lock(&c->bucket_lock);
1113
1114 for_each_cache(ca, c, i)
1115 if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) {
1116 if (op)
1117 prepare_to_wait(&c->bucket_wait, &op->wait,
1118 TASK_UNINTERRUPTIBLE);
1119 ret = -EINTR;
1120 break;
1121 }
1122
1123 mutex_unlock(&c->bucket_lock);
1124 return ret;
1125}
1126
1123/* Garbage collection */ 1127/* Garbage collection */
1124 1128
1125uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) 1129uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
@@ -1183,11 +1187,11 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
1183 1187
1184 gc->nodes++; 1188 gc->nodes++;
1185 1189
1186 for_each_key_filter(b, k, &iter, bch_ptr_invalid) { 1190 for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
1187 stale = max(stale, btree_mark_key(b, k)); 1191 stale = max(stale, btree_mark_key(b, k));
1188 keys++; 1192 keys++;
1189 1193
1190 if (bch_ptr_bad(b, k)) 1194 if (bch_ptr_bad(&b->keys, k))
1191 continue; 1195 continue;
1192 1196
1193 gc->key_bytes += bkey_u64s(k); 1197 gc->key_bytes += bkey_u64s(k);
@@ -1197,9 +1201,9 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
1197 gc->data += KEY_SIZE(k); 1201 gc->data += KEY_SIZE(k);
1198 } 1202 }
1199 1203
1200 for (t = b->sets; t <= &b->sets[b->nsets]; t++) 1204 for (t = b->keys.set; t <= &b->keys.set[b->keys.nsets]; t++)
1201 btree_bug_on(t->size && 1205 btree_bug_on(t->size &&
1202 bset_written(b, t) && 1206 bset_written(&b->keys, t) &&
1203 bkey_cmp(&b->key, &t->end) < 0, 1207 bkey_cmp(&b->key, &t->end) < 0,
1204 b, "found short btree key in gc"); 1208 b, "found short btree key in gc");
1205 1209
@@ -1243,7 +1247,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
1243 blocks = btree_default_blocks(b->c) * 2 / 3; 1247 blocks = btree_default_blocks(b->c) * 2 / 3;
1244 1248
1245 if (nodes < 2 || 1249 if (nodes < 2 ||
1246 __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1)) 1250 __set_blocks(b->keys.set[0].data, keys,
1251 block_bytes(b->c)) > blocks * (nodes - 1))
1247 return 0; 1252 return 0;
1248 1253
1249 for (i = 0; i < nodes; i++) { 1254 for (i = 0; i < nodes; i++) {
@@ -1253,18 +1258,19 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
1253 } 1258 }
1254 1259
1255 for (i = nodes - 1; i > 0; --i) { 1260 for (i = nodes - 1; i > 0; --i) {
1256 struct bset *n1 = new_nodes[i]->sets->data; 1261 struct bset *n1 = btree_bset_first(new_nodes[i]);
1257 struct bset *n2 = new_nodes[i - 1]->sets->data; 1262 struct bset *n2 = btree_bset_first(new_nodes[i - 1]);
1258 struct bkey *k, *last = NULL; 1263 struct bkey *k, *last = NULL;
1259 1264
1260 keys = 0; 1265 keys = 0;
1261 1266
1262 if (i > 1) { 1267 if (i > 1) {
1263 for (k = n2->start; 1268 for (k = n2->start;
1264 k < end(n2); 1269 k < bset_bkey_last(n2);
1265 k = bkey_next(k)) { 1270 k = bkey_next(k)) {
1266 if (__set_blocks(n1, n1->keys + keys + 1271 if (__set_blocks(n1, n1->keys + keys +
1267 bkey_u64s(k), b->c) > blocks) 1272 bkey_u64s(k),
1273 block_bytes(b->c)) > blocks)
1268 break; 1274 break;
1269 1275
1270 last = k; 1276 last = k;
@@ -1280,7 +1286,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
1280 * though) 1286 * though)
1281 */ 1287 */
1282 if (__set_blocks(n1, n1->keys + n2->keys, 1288 if (__set_blocks(n1, n1->keys + n2->keys,
1283 b->c) > btree_blocks(new_nodes[i])) 1289 block_bytes(b->c)) >
1290 btree_blocks(new_nodes[i]))
1284 goto out_nocoalesce; 1291 goto out_nocoalesce;
1285 1292
1286 keys = n2->keys; 1293 keys = n2->keys;
@@ -1288,27 +1295,28 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
1288 last = &r->b->key; 1295 last = &r->b->key;
1289 } 1296 }
1290 1297
1291 BUG_ON(__set_blocks(n1, n1->keys + keys, 1298 BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) >
1292 b->c) > btree_blocks(new_nodes[i])); 1299 btree_blocks(new_nodes[i]));
1293 1300
1294 if (last) 1301 if (last)
1295 bkey_copy_key(&new_nodes[i]->key, last); 1302 bkey_copy_key(&new_nodes[i]->key, last);
1296 1303
1297 memcpy(end(n1), 1304 memcpy(bset_bkey_last(n1),
1298 n2->start, 1305 n2->start,
1299 (void *) node(n2, keys) - (void *) n2->start); 1306 (void *) bset_bkey_idx(n2, keys) - (void *) n2->start);
1300 1307
1301 n1->keys += keys; 1308 n1->keys += keys;
1302 r[i].keys = n1->keys; 1309 r[i].keys = n1->keys;
1303 1310
1304 memmove(n2->start, 1311 memmove(n2->start,
1305 node(n2, keys), 1312 bset_bkey_idx(n2, keys),
1306 (void *) end(n2) - (void *) node(n2, keys)); 1313 (void *) bset_bkey_last(n2) -
1314 (void *) bset_bkey_idx(n2, keys));
1307 1315
1308 n2->keys -= keys; 1316 n2->keys -= keys;
1309 1317
1310 if (bch_keylist_realloc(keylist, 1318 if (__bch_keylist_realloc(keylist,
1311 KEY_PTRS(&new_nodes[i]->key), b->c)) 1319 bkey_u64s(&new_nodes[i]->key)))
1312 goto out_nocoalesce; 1320 goto out_nocoalesce;
1313 1321
1314 bch_btree_node_write(new_nodes[i], &cl); 1322 bch_btree_node_write(new_nodes[i], &cl);
@@ -1316,7 +1324,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
1316 } 1324 }
1317 1325
1318 for (i = 0; i < nodes; i++) { 1326 for (i = 0; i < nodes; i++) {
1319 if (bch_keylist_realloc(keylist, KEY_PTRS(&r[i].b->key), b->c)) 1327 if (__bch_keylist_realloc(keylist, bkey_u64s(&r[i].b->key)))
1320 goto out_nocoalesce; 1328 goto out_nocoalesce;
1321 1329
1322 make_btree_freeing_key(r[i].b, keylist->top); 1330 make_btree_freeing_key(r[i].b, keylist->top);
@@ -1324,7 +1332,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
1324 } 1332 }
1325 1333
1326 /* We emptied out this node */ 1334 /* We emptied out this node */
1327 BUG_ON(new_nodes[0]->sets->data->keys); 1335 BUG_ON(btree_bset_first(new_nodes[0])->keys);
1328 btree_node_free(new_nodes[0]); 1336 btree_node_free(new_nodes[0]);
1329 rw_unlock(true, new_nodes[0]); 1337 rw_unlock(true, new_nodes[0]);
1330 1338
@@ -1370,7 +1378,7 @@ static unsigned btree_gc_count_keys(struct btree *b)
1370 struct btree_iter iter; 1378 struct btree_iter iter;
1371 unsigned ret = 0; 1379 unsigned ret = 0;
1372 1380
1373 for_each_key_filter(b, k, &iter, bch_ptr_bad) 1381 for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
1374 ret += bkey_u64s(k); 1382 ret += bkey_u64s(k);
1375 1383
1376 return ret; 1384 return ret;
@@ -1390,13 +1398,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
1390 struct gc_merge_info *last = r + GC_MERGE_NODES - 1; 1398 struct gc_merge_info *last = r + GC_MERGE_NODES - 1;
1391 1399
1392 bch_keylist_init(&keys); 1400 bch_keylist_init(&keys);
1393 bch_btree_iter_init(b, &iter, &b->c->gc_done); 1401 bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
1394 1402
1395 for (i = 0; i < GC_MERGE_NODES; i++) 1403 for (i = 0; i < GC_MERGE_NODES; i++)
1396 r[i].b = ERR_PTR(-EINTR); 1404 r[i].b = ERR_PTR(-EINTR);
1397 1405
1398 while (1) { 1406 while (1) {
1399 k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); 1407 k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
1400 if (k) { 1408 if (k) {
1401 r->b = bch_btree_node_get(b->c, k, b->level - 1, true); 1409 r->b = bch_btree_node_get(b->c, k, b->level - 1, true);
1402 if (IS_ERR(r->b)) { 1410 if (IS_ERR(r->b)) {
@@ -1416,7 +1424,8 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
1416 1424
1417 if (!IS_ERR(last->b)) { 1425 if (!IS_ERR(last->b)) {
1418 should_rewrite = btree_gc_mark_node(last->b, gc); 1426 should_rewrite = btree_gc_mark_node(last->b, gc);
1419 if (should_rewrite) { 1427 if (should_rewrite &&
1428 !btree_check_reserve(b, NULL)) {
1420 n = btree_node_alloc_replacement(last->b, 1429 n = btree_node_alloc_replacement(last->b,
1421 false); 1430 false);
1422 1431
@@ -1705,7 +1714,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op,
1705 struct bucket *g; 1714 struct bucket *g;
1706 struct btree_iter iter; 1715 struct btree_iter iter;
1707 1716
1708 for_each_key_filter(b, k, &iter, bch_ptr_invalid) { 1717 for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
1709 for (i = 0; i < KEY_PTRS(k); i++) { 1718 for (i = 0; i < KEY_PTRS(k); i++) {
1710 if (!ptr_available(b->c, k, i)) 1719 if (!ptr_available(b->c, k, i))
1711 continue; 1720 continue;
@@ -1728,10 +1737,11 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op,
1728 } 1737 }
1729 1738
1730 if (b->level) { 1739 if (b->level) {
1731 bch_btree_iter_init(b, &iter, NULL); 1740 bch_btree_iter_init(&b->keys, &iter, NULL);
1732 1741
1733 do { 1742 do {
1734 k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); 1743 k = bch_btree_iter_next_filter(&iter, &b->keys,
1744 bch_ptr_bad);
1735 if (k) 1745 if (k)
1736 btree_node_prefetch(b->c, k, b->level - 1); 1746 btree_node_prefetch(b->c, k, b->level - 1);
1737 1747
@@ -1774,235 +1784,36 @@ err:
1774 1784
1775/* Btree insertion */ 1785/* Btree insertion */
1776 1786
1777static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert) 1787static bool btree_insert_key(struct btree *b, struct bkey *k,
1778{ 1788 struct bkey *replace_key)
1779 struct bset *i = b->sets[b->nsets].data;
1780
1781 memmove((uint64_t *) where + bkey_u64s(insert),
1782 where,
1783 (void *) end(i) - (void *) where);
1784
1785 i->keys += bkey_u64s(insert);
1786 bkey_copy(where, insert);
1787 bch_bset_fix_lookup_table(b, where);
1788}
1789
1790static bool fix_overlapping_extents(struct btree *b, struct bkey *insert,
1791 struct btree_iter *iter,
1792 struct bkey *replace_key)
1793{ 1789{
1794 void subtract_dirty(struct bkey *k, uint64_t offset, int sectors) 1790 unsigned status;
1795 {
1796 if (KEY_DIRTY(k))
1797 bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
1798 offset, -sectors);
1799 }
1800
1801 uint64_t old_offset;
1802 unsigned old_size, sectors_found = 0;
1803
1804 while (1) {
1805 struct bkey *k = bch_btree_iter_next(iter);
1806 if (!k ||
1807 bkey_cmp(&START_KEY(k), insert) >= 0)
1808 break;
1809
1810 if (bkey_cmp(k, &START_KEY(insert)) <= 0)
1811 continue;
1812
1813 old_offset = KEY_START(k);
1814 old_size = KEY_SIZE(k);
1815
1816 /*
1817 * We might overlap with 0 size extents; we can't skip these
1818 * because if they're in the set we're inserting to we have to
1819 * adjust them so they don't overlap with the key we're
1820 * inserting. But we don't want to check them for replace
1821 * operations.
1822 */
1823
1824 if (replace_key && KEY_SIZE(k)) {
1825 /*
1826 * k might have been split since we inserted/found the
1827 * key we're replacing
1828 */
1829 unsigned i;
1830 uint64_t offset = KEY_START(k) -
1831 KEY_START(replace_key);
1832
1833 /* But it must be a subset of the replace key */
1834 if (KEY_START(k) < KEY_START(replace_key) ||
1835 KEY_OFFSET(k) > KEY_OFFSET(replace_key))
1836 goto check_failed;
1837
1838 /* We didn't find a key that we were supposed to */
1839 if (KEY_START(k) > KEY_START(insert) + sectors_found)
1840 goto check_failed;
1841
1842 if (KEY_PTRS(k) != KEY_PTRS(replace_key) ||
1843 KEY_DIRTY(k) != KEY_DIRTY(replace_key))
1844 goto check_failed;
1845
1846 /* skip past gen */
1847 offset <<= 8;
1848
1849 BUG_ON(!KEY_PTRS(replace_key));
1850 1791
1851 for (i = 0; i < KEY_PTRS(replace_key); i++) 1792 BUG_ON(bkey_cmp(k, &b->key) > 0);
1852 if (k->ptr[i] != replace_key->ptr[i] + offset)
1853 goto check_failed;
1854
1855 sectors_found = KEY_OFFSET(k) - KEY_START(insert);
1856 }
1857
1858 if (bkey_cmp(insert, k) < 0 &&
1859 bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) {
1860 /*
1861 * We overlapped in the middle of an existing key: that
1862 * means we have to split the old key. But we have to do
1863 * slightly different things depending on whether the
1864 * old key has been written out yet.
1865 */
1866
1867 struct bkey *top;
1868
1869 subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
1870
1871 if (bkey_written(b, k)) {
1872 /*
1873 * We insert a new key to cover the top of the
1874 * old key, and the old key is modified in place
1875 * to represent the bottom split.
1876 *
1877 * It's completely arbitrary whether the new key
1878 * is the top or the bottom, but it has to match
1879 * up with what btree_sort_fixup() does - it
1880 * doesn't check for this kind of overlap, it
1881 * depends on us inserting a new key for the top
1882 * here.
1883 */
1884 top = bch_bset_search(b, &b->sets[b->nsets],
1885 insert);
1886 shift_keys(b, top, k);
1887 } else {
1888 BKEY_PADDED(key) temp;
1889 bkey_copy(&temp.key, k);
1890 shift_keys(b, k, &temp.key);
1891 top = bkey_next(k);
1892 }
1893
1894 bch_cut_front(insert, top);
1895 bch_cut_back(&START_KEY(insert), k);
1896 bch_bset_fix_invalidated_key(b, k);
1897 return false;
1898 }
1899
1900 if (bkey_cmp(insert, k) < 0) {
1901 bch_cut_front(insert, k);
1902 } else {
1903 if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0)
1904 old_offset = KEY_START(insert);
1905
1906 if (bkey_written(b, k) &&
1907 bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) {
1908 /*
1909 * Completely overwrote, so we don't have to
1910 * invalidate the binary search tree
1911 */
1912 bch_cut_front(k, k);
1913 } else {
1914 __bch_cut_back(&START_KEY(insert), k);
1915 bch_bset_fix_invalidated_key(b, k);
1916 }
1917 }
1918
1919 subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
1920 }
1921 1793
1922check_failed: 1794 status = bch_btree_insert_key(&b->keys, k, replace_key);
1923 if (replace_key) { 1795 if (status != BTREE_INSERT_STATUS_NO_INSERT) {
1924 if (!sectors_found) { 1796 bch_check_keys(&b->keys, "%u for %s", status,
1925 return true; 1797 replace_key ? "replace" : "insert");
1926 } else if (sectors_found < KEY_SIZE(insert)) {
1927 SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
1928 (KEY_SIZE(insert) - sectors_found));
1929 SET_KEY_SIZE(insert, sectors_found);
1930 }
1931 }
1932 1798
1933 return false; 1799 trace_bcache_btree_insert_key(b, k, replace_key != NULL,
1800 status);
1801 return true;
1802 } else
1803 return false;
1934} 1804}
1935 1805
1936static bool btree_insert_key(struct btree *b, struct btree_op *op, 1806static size_t insert_u64s_remaining(struct btree *b)
1937 struct bkey *k, struct bkey *replace_key)
1938{ 1807{
1939 struct bset *i = b->sets[b->nsets].data; 1808 ssize_t ret = bch_btree_keys_u64s_remaining(&b->keys);
1940 struct bkey *m, *prev;
1941 unsigned status = BTREE_INSERT_STATUS_INSERT;
1942
1943 BUG_ON(bkey_cmp(k, &b->key) > 0);
1944 BUG_ON(b->level && !KEY_PTRS(k));
1945 BUG_ON(!b->level && !KEY_OFFSET(k));
1946
1947 if (!b->level) {
1948 struct btree_iter iter;
1949
1950 /*
1951 * bset_search() returns the first key that is strictly greater
1952 * than the search key - but for back merging, we want to find
1953 * the previous key.
1954 */
1955 prev = NULL;
1956 m = bch_btree_iter_init(b, &iter, PRECEDING_KEY(&START_KEY(k)));
1957 1809
1958 if (fix_overlapping_extents(b, k, &iter, replace_key)) { 1810 /*
1959 op->insert_collision = true; 1811 * Might land in the middle of an existing extent and have to split it
1960 return false; 1812 */
1961 } 1813 if (b->keys.ops->is_extents)
1962 1814 ret -= KEY_MAX_U64S;
1963 if (KEY_DIRTY(k))
1964 bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
1965 KEY_START(k), KEY_SIZE(k));
1966
1967 while (m != end(i) &&
1968 bkey_cmp(k, &START_KEY(m)) > 0)
1969 prev = m, m = bkey_next(m);
1970
1971 if (key_merging_disabled(b->c))
1972 goto insert;
1973
1974 /* prev is in the tree, if we merge we're done */
1975 status = BTREE_INSERT_STATUS_BACK_MERGE;
1976 if (prev &&
1977 bch_bkey_try_merge(b, prev, k))
1978 goto merged;
1979
1980 status = BTREE_INSERT_STATUS_OVERWROTE;
1981 if (m != end(i) &&
1982 KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
1983 goto copy;
1984
1985 status = BTREE_INSERT_STATUS_FRONT_MERGE;
1986 if (m != end(i) &&
1987 bch_bkey_try_merge(b, k, m))
1988 goto copy;
1989 } else {
1990 BUG_ON(replace_key);
1991 m = bch_bset_search(b, &b->sets[b->nsets], k);
1992 }
1993
1994insert: shift_keys(b, m, k);
1995copy: bkey_copy(m, k);
1996merged:
1997 bch_check_keys(b, "%u for %s", status,
1998 replace_key ? "replace" : "insert");
1999
2000 if (b->level && !KEY_OFFSET(k))
2001 btree_current_write(b)->prio_blocked++;
2002
2003 trace_bcache_btree_insert_key(b, k, replace_key != NULL, status);
2004 1815
2005 return true; 1816 return max(ret, 0L);
2006} 1817}
2007 1818
2008static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, 1819static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op,
@@ -2010,21 +1821,19 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op,
2010 struct bkey *replace_key) 1821 struct bkey *replace_key)
2011{ 1822{
2012 bool ret = false; 1823 bool ret = false;
2013 int oldsize = bch_count_data(b); 1824 int oldsize = bch_count_data(&b->keys);
2014 1825
2015 while (!bch_keylist_empty(insert_keys)) { 1826 while (!bch_keylist_empty(insert_keys)) {
2016 struct bset *i = write_block(b);
2017 struct bkey *k = insert_keys->keys; 1827 struct bkey *k = insert_keys->keys;
2018 1828
2019 if (b->written + __set_blocks(i, i->keys + bkey_u64s(k), b->c) 1829 if (bkey_u64s(k) > insert_u64s_remaining(b))
2020 > btree_blocks(b))
2021 break; 1830 break;
2022 1831
2023 if (bkey_cmp(k, &b->key) <= 0) { 1832 if (bkey_cmp(k, &b->key) <= 0) {
2024 if (!b->level) 1833 if (!b->level)
2025 bkey_put(b->c, k); 1834 bkey_put(b->c, k);
2026 1835
2027 ret |= btree_insert_key(b, op, k, replace_key); 1836 ret |= btree_insert_key(b, k, replace_key);
2028 bch_keylist_pop_front(insert_keys); 1837 bch_keylist_pop_front(insert_keys);
2029 } else if (bkey_cmp(&START_KEY(k), &b->key) < 0) { 1838 } else if (bkey_cmp(&START_KEY(k), &b->key) < 0) {
2030 BKEY_PADDED(key) temp; 1839 BKEY_PADDED(key) temp;
@@ -2033,16 +1842,19 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op,
2033 bch_cut_back(&b->key, &temp.key); 1842 bch_cut_back(&b->key, &temp.key);
2034 bch_cut_front(&b->key, insert_keys->keys); 1843 bch_cut_front(&b->key, insert_keys->keys);
2035 1844
2036 ret |= btree_insert_key(b, op, &temp.key, replace_key); 1845 ret |= btree_insert_key(b, &temp.key, replace_key);
2037 break; 1846 break;
2038 } else { 1847 } else {
2039 break; 1848 break;
2040 } 1849 }
2041 } 1850 }
2042 1851
1852 if (!ret)
1853 op->insert_collision = true;
1854
2043 BUG_ON(!bch_keylist_empty(insert_keys) && b->level); 1855 BUG_ON(!bch_keylist_empty(insert_keys) && b->level);
2044 1856
2045 BUG_ON(bch_count_data(b) < oldsize); 1857 BUG_ON(bch_count_data(&b->keys) < oldsize);
2046 return ret; 1858 return ret;
2047} 1859}
2048 1860
@@ -2059,16 +1871,21 @@ static int btree_split(struct btree *b, struct btree_op *op,
2059 closure_init_stack(&cl); 1871 closure_init_stack(&cl);
2060 bch_keylist_init(&parent_keys); 1872 bch_keylist_init(&parent_keys);
2061 1873
1874 if (!b->level &&
1875 btree_check_reserve(b, op))
1876 return -EINTR;
1877
2062 n1 = btree_node_alloc_replacement(b, true); 1878 n1 = btree_node_alloc_replacement(b, true);
2063 if (IS_ERR(n1)) 1879 if (IS_ERR(n1))
2064 goto err; 1880 goto err;
2065 1881
2066 split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; 1882 split = set_blocks(btree_bset_first(n1),
1883 block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
2067 1884
2068 if (split) { 1885 if (split) {
2069 unsigned keys = 0; 1886 unsigned keys = 0;
2070 1887
2071 trace_bcache_btree_node_split(b, n1->sets[0].data->keys); 1888 trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
2072 1889
2073 n2 = bch_btree_node_alloc(b->c, b->level, true); 1890 n2 = bch_btree_node_alloc(b->c, b->level, true);
2074 if (IS_ERR(n2)) 1891 if (IS_ERR(n2))
@@ -2087,18 +1904,20 @@ static int btree_split(struct btree *b, struct btree_op *op,
2087 * search tree yet 1904 * search tree yet
2088 */ 1905 */
2089 1906
2090 while (keys < (n1->sets[0].data->keys * 3) / 5) 1907 while (keys < (btree_bset_first(n1)->keys * 3) / 5)
2091 keys += bkey_u64s(node(n1->sets[0].data, keys)); 1908 keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1),
1909 keys));
2092 1910
2093 bkey_copy_key(&n1->key, node(n1->sets[0].data, keys)); 1911 bkey_copy_key(&n1->key,
2094 keys += bkey_u64s(node(n1->sets[0].data, keys)); 1912 bset_bkey_idx(btree_bset_first(n1), keys));
1913 keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys));
2095 1914
2096 n2->sets[0].data->keys = n1->sets[0].data->keys - keys; 1915 btree_bset_first(n2)->keys = btree_bset_first(n1)->keys - keys;
2097 n1->sets[0].data->keys = keys; 1916 btree_bset_first(n1)->keys = keys;
2098 1917
2099 memcpy(n2->sets[0].data->start, 1918 memcpy(btree_bset_first(n2)->start,
2100 end(n1->sets[0].data), 1919 bset_bkey_last(btree_bset_first(n1)),
2101 n2->sets[0].data->keys * sizeof(uint64_t)); 1920 btree_bset_first(n2)->keys * sizeof(uint64_t));
2102 1921
2103 bkey_copy_key(&n2->key, &b->key); 1922 bkey_copy_key(&n2->key, &b->key);
2104 1923
@@ -2106,7 +1925,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
2106 bch_btree_node_write(n2, &cl); 1925 bch_btree_node_write(n2, &cl);
2107 rw_unlock(true, n2); 1926 rw_unlock(true, n2);
2108 } else { 1927 } else {
2109 trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); 1928 trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys);
2110 1929
2111 bch_btree_insert_keys(n1, op, insert_keys, replace_key); 1930 bch_btree_insert_keys(n1, op, insert_keys, replace_key);
2112 } 1931 }
@@ -2149,18 +1968,21 @@ static int btree_split(struct btree *b, struct btree_op *op,
2149 1968
2150 return 0; 1969 return 0;
2151err_free2: 1970err_free2:
1971 bkey_put(b->c, &n2->key);
2152 btree_node_free(n2); 1972 btree_node_free(n2);
2153 rw_unlock(true, n2); 1973 rw_unlock(true, n2);
2154err_free1: 1974err_free1:
1975 bkey_put(b->c, &n1->key);
2155 btree_node_free(n1); 1976 btree_node_free(n1);
2156 rw_unlock(true, n1); 1977 rw_unlock(true, n1);
2157err: 1978err:
1979 WARN(1, "bcache: btree split failed");
1980
2158 if (n3 == ERR_PTR(-EAGAIN) || 1981 if (n3 == ERR_PTR(-EAGAIN) ||
2159 n2 == ERR_PTR(-EAGAIN) || 1982 n2 == ERR_PTR(-EAGAIN) ||
2160 n1 == ERR_PTR(-EAGAIN)) 1983 n1 == ERR_PTR(-EAGAIN))
2161 return -EAGAIN; 1984 return -EAGAIN;
2162 1985
2163 pr_warn("couldn't split");
2164 return -ENOMEM; 1986 return -ENOMEM;
2165} 1987}
2166 1988
@@ -2171,7 +1993,7 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
2171{ 1993{
2172 BUG_ON(b->level && replace_key); 1994 BUG_ON(b->level && replace_key);
2173 1995
2174 if (should_split(b)) { 1996 if (bch_keylist_nkeys(insert_keys) > insert_u64s_remaining(b)) {
2175 if (current->bio_list) { 1997 if (current->bio_list) {
2176 op->lock = b->c->root->level + 1; 1998 op->lock = b->c->root->level + 1;
2177 return -EAGAIN; 1999 return -EAGAIN;
@@ -2180,11 +2002,13 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
2180 return -EINTR; 2002 return -EINTR;
2181 } else { 2003 } else {
2182 /* Invalidated all iterators */ 2004 /* Invalidated all iterators */
2183 return btree_split(b, op, insert_keys, replace_key) ?: 2005 int ret = btree_split(b, op, insert_keys, replace_key);
2184 -EINTR; 2006
2007 return bch_keylist_empty(insert_keys) ?
2008 0 : ret ?: -EINTR;
2185 } 2009 }
2186 } else { 2010 } else {
2187 BUG_ON(write_block(b) != b->sets[b->nsets].data); 2011 BUG_ON(write_block(b) != btree_bset_last(b));
2188 2012
2189 if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { 2013 if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) {
2190 if (!b->level) 2014 if (!b->level)
@@ -2323,9 +2147,9 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
2323 struct bkey *k; 2147 struct bkey *k;
2324 struct btree_iter iter; 2148 struct btree_iter iter;
2325 2149
2326 bch_btree_iter_init(b, &iter, from); 2150 bch_btree_iter_init(&b->keys, &iter, from);
2327 2151
2328 while ((k = bch_btree_iter_next_filter(&iter, b, 2152 while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
2329 bch_ptr_bad))) { 2153 bch_ptr_bad))) {
2330 ret = btree(map_nodes_recurse, k, b, 2154 ret = btree(map_nodes_recurse, k, b,
2331 op, from, fn, flags); 2155 op, from, fn, flags);
@@ -2356,9 +2180,9 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
2356 struct bkey *k; 2180 struct bkey *k;
2357 struct btree_iter iter; 2181 struct btree_iter iter;
2358 2182
2359 bch_btree_iter_init(b, &iter, from); 2183 bch_btree_iter_init(&b->keys, &iter, from);
2360 2184
2361 while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad))) { 2185 while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
2362 ret = !b->level 2186 ret = !b->level
2363 ? fn(op, b, k) 2187 ? fn(op, b, k)
2364 : btree(map_keys_recurse, k, b, op, from, fn, flags); 2188 : btree(map_keys_recurse, k, b, op, from, fn, flags);
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 767e75570896..af065e97e55c 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -130,20 +130,12 @@ struct btree {
130 unsigned long flags; 130 unsigned long flags;
131 uint16_t written; /* would be nice to kill */ 131 uint16_t written; /* would be nice to kill */
132 uint8_t level; 132 uint8_t level;
133 uint8_t nsets; 133
134 uint8_t page_order; 134 struct btree_keys keys;
135
136 /*
137 * Set of sorted keys - the real btree node - plus a binary search tree
138 *
139 * sets[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
140 * to the memory we have allocated for this btree node. Additionally,
141 * set[0]->data points to the entire btree node as it exists on disk.
142 */
143 struct bset_tree sets[MAX_BSETS];
144 135
145 /* For outstanding btree writes, used as a lock - protects write_idx */ 136 /* For outstanding btree writes, used as a lock - protects write_idx */
146 struct closure_with_waitlist io; 137 struct closure io;
138 struct semaphore io_mutex;
147 139
148 struct list_head list; 140 struct list_head list;
149 struct delayed_work work; 141 struct delayed_work work;
@@ -179,24 +171,19 @@ static inline struct btree_write *btree_prev_write(struct btree *b)
179 return b->writes + (btree_node_write_idx(b) ^ 1); 171 return b->writes + (btree_node_write_idx(b) ^ 1);
180} 172}
181 173
182static inline unsigned bset_offset(struct btree *b, struct bset *i) 174static inline struct bset *btree_bset_first(struct btree *b)
183{ 175{
184 return (((size_t) i) - ((size_t) b->sets->data)) >> 9; 176 return b->keys.set->data;
185} 177}
186 178
187static inline struct bset *write_block(struct btree *b) 179static inline struct bset *btree_bset_last(struct btree *b)
188{ 180{
189 return ((void *) b->sets[0].data) + b->written * block_bytes(b->c); 181 return bset_tree_last(&b->keys)->data;
190} 182}
191 183
192static inline bool bset_written(struct btree *b, struct bset_tree *t) 184static inline unsigned bset_block_offset(struct btree *b, struct bset *i)
193{ 185{
194 return t->data < write_block(b); 186 return bset_sector_offset(&b->keys, i) >> b->c->block_bits;
195}
196
197static inline bool bkey_written(struct btree *b, struct bkey *k)
198{
199 return k < write_block(b)->start;
200} 187}
201 188
202static inline void set_gc_sectors(struct cache_set *c) 189static inline void set_gc_sectors(struct cache_set *c)
@@ -204,21 +191,6 @@ static inline void set_gc_sectors(struct cache_set *c)
204 atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16); 191 atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16);
205} 192}
206 193
207static inline struct bkey *bch_btree_iter_init(struct btree *b,
208 struct btree_iter *iter,
209 struct bkey *search)
210{
211 return __bch_btree_iter_init(b, iter, search, b->sets);
212}
213
214static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
215{
216 if (b->level)
217 return bch_btree_ptr_invalid(b->c, k);
218 else
219 return bch_extent_ptr_invalid(b->c, k);
220}
221
222void bkey_put(struct cache_set *c, struct bkey *k); 194void bkey_put(struct cache_set *c, struct bkey *k);
223 195
224/* Looping macros */ 196/* Looping macros */
@@ -229,17 +201,12 @@ void bkey_put(struct cache_set *c, struct bkey *k);
229 iter++) \ 201 iter++) \
230 hlist_for_each_entry_rcu((b), (c)->bucket_hash + iter, hash) 202 hlist_for_each_entry_rcu((b), (c)->bucket_hash + iter, hash)
231 203
232#define for_each_key_filter(b, k, iter, filter) \
233 for (bch_btree_iter_init((b), (iter), NULL); \
234 ((k) = bch_btree_iter_next_filter((iter), b, filter));)
235
236#define for_each_key(b, k, iter) \
237 for (bch_btree_iter_init((b), (iter), NULL); \
238 ((k) = bch_btree_iter_next(iter));)
239
240/* Recursing down the btree */ 204/* Recursing down the btree */
241 205
242struct btree_op { 206struct btree_op {
207 /* for waiting on btree reserve in btree_split() */
208 wait_queue_t wait;
209
243 /* Btree level at which we start taking write locks */ 210 /* Btree level at which we start taking write locks */
244 short lock; 211 short lock;
245 212
@@ -249,6 +216,7 @@ struct btree_op {
249static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level) 216static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
250{ 217{
251 memset(op, 0, sizeof(struct btree_op)); 218 memset(op, 0, sizeof(struct btree_op));
219 init_wait(&op->wait);
252 op->lock = write_lock_level; 220 op->lock = write_lock_level;
253} 221}
254 222
@@ -267,7 +235,7 @@ static inline void rw_unlock(bool w, struct btree *b)
267 (w ? up_write : up_read)(&b->lock); 235 (w ? up_write : up_read)(&b->lock);
268} 236}
269 237
270void bch_btree_node_read(struct btree *); 238void bch_btree_node_read_done(struct btree *);
271void bch_btree_node_write(struct btree *, struct closure *); 239void bch_btree_node_write(struct btree *, struct closure *);
272 240
273void bch_btree_set_root(struct btree *); 241void bch_btree_set_root(struct btree *);
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index dfff2410322e..7a228de95fd7 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -11,19 +11,6 @@
11 11
12#include "closure.h" 12#include "closure.h"
13 13
14#define CL_FIELD(type, field) \
15 case TYPE_ ## type: \
16 return &container_of(cl, struct type, cl)->field
17
18static struct closure_waitlist *closure_waitlist(struct closure *cl)
19{
20 switch (cl->type) {
21 CL_FIELD(closure_with_waitlist, wait);
22 default:
23 return NULL;
24 }
25}
26
27static inline void closure_put_after_sub(struct closure *cl, int flags) 14static inline void closure_put_after_sub(struct closure *cl, int flags)
28{ 15{
29 int r = flags & CLOSURE_REMAINING_MASK; 16 int r = flags & CLOSURE_REMAINING_MASK;
@@ -42,17 +29,10 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
42 closure_queue(cl); 29 closure_queue(cl);
43 } else { 30 } else {
44 struct closure *parent = cl->parent; 31 struct closure *parent = cl->parent;
45 struct closure_waitlist *wait = closure_waitlist(cl);
46 closure_fn *destructor = cl->fn; 32 closure_fn *destructor = cl->fn;
47 33
48 closure_debug_destroy(cl); 34 closure_debug_destroy(cl);
49 35
50 smp_mb();
51 atomic_set(&cl->remaining, -1);
52
53 if (wait)
54 closure_wake_up(wait);
55
56 if (destructor) 36 if (destructor)
57 destructor(cl); 37 destructor(cl);
58 38
@@ -69,19 +49,18 @@ void closure_sub(struct closure *cl, int v)
69} 49}
70EXPORT_SYMBOL(closure_sub); 50EXPORT_SYMBOL(closure_sub);
71 51
52/**
53 * closure_put - decrement a closure's refcount
54 */
72void closure_put(struct closure *cl) 55void closure_put(struct closure *cl)
73{ 56{
74 closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); 57 closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
75} 58}
76EXPORT_SYMBOL(closure_put); 59EXPORT_SYMBOL(closure_put);
77 60
78static void set_waiting(struct closure *cl, unsigned long f) 61/**
79{ 62 * closure_wake_up - wake up all closures on a wait list, without memory barrier
80#ifdef CONFIG_BCACHE_CLOSURES_DEBUG 63 */
81 cl->waiting_on = f;
82#endif
83}
84
85void __closure_wake_up(struct closure_waitlist *wait_list) 64void __closure_wake_up(struct closure_waitlist *wait_list)
86{ 65{
87 struct llist_node *list; 66 struct llist_node *list;
@@ -106,27 +85,34 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
106 cl = container_of(reverse, struct closure, list); 85 cl = container_of(reverse, struct closure, list);
107 reverse = llist_next(reverse); 86 reverse = llist_next(reverse);
108 87
109 set_waiting(cl, 0); 88 closure_set_waiting(cl, 0);
110 closure_sub(cl, CLOSURE_WAITING + 1); 89 closure_sub(cl, CLOSURE_WAITING + 1);
111 } 90 }
112} 91}
113EXPORT_SYMBOL(__closure_wake_up); 92EXPORT_SYMBOL(__closure_wake_up);
114 93
115bool closure_wait(struct closure_waitlist *list, struct closure *cl) 94/**
95 * closure_wait - add a closure to a waitlist
96 *
97 * @waitlist will own a ref on @cl, which will be released when
98 * closure_wake_up() is called on @waitlist.
99 *
100 */
101bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
116{ 102{
117 if (atomic_read(&cl->remaining) & CLOSURE_WAITING) 103 if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
118 return false; 104 return false;
119 105
120 set_waiting(cl, _RET_IP_); 106 closure_set_waiting(cl, _RET_IP_);
121 atomic_add(CLOSURE_WAITING + 1, &cl->remaining); 107 atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
122 llist_add(&cl->list, &list->list); 108 llist_add(&cl->list, &waitlist->list);
123 109
124 return true; 110 return true;
125} 111}
126EXPORT_SYMBOL(closure_wait); 112EXPORT_SYMBOL(closure_wait);
127 113
128/** 114/**
129 * closure_sync() - sleep until a closure a closure has nothing left to wait on 115 * closure_sync - sleep until a closure a closure has nothing left to wait on
130 * 116 *
131 * Sleeps until the refcount hits 1 - the thread that's running the closure owns 117 * Sleeps until the refcount hits 1 - the thread that's running the closure owns
132 * the last refcount. 118 * the last refcount.
@@ -148,46 +134,6 @@ void closure_sync(struct closure *cl)
148} 134}
149EXPORT_SYMBOL(closure_sync); 135EXPORT_SYMBOL(closure_sync);
150 136
151/**
152 * closure_trylock() - try to acquire the closure, without waiting
153 * @cl: closure to lock
154 *
155 * Returns true if the closure was succesfully locked.
156 */
157bool closure_trylock(struct closure *cl, struct closure *parent)
158{
159 if (atomic_cmpxchg(&cl->remaining, -1,
160 CLOSURE_REMAINING_INITIALIZER) != -1)
161 return false;
162
163 smp_mb();
164
165 cl->parent = parent;
166 if (parent)
167 closure_get(parent);
168
169 closure_set_ret_ip(cl);
170 closure_debug_create(cl);
171 return true;
172}
173EXPORT_SYMBOL(closure_trylock);
174
175void __closure_lock(struct closure *cl, struct closure *parent,
176 struct closure_waitlist *wait_list)
177{
178 struct closure wait;
179 closure_init_stack(&wait);
180
181 while (1) {
182 if (closure_trylock(cl, parent))
183 return;
184
185 closure_wait_event(wait_list, &wait,
186 atomic_read(&cl->remaining) == -1);
187 }
188}
189EXPORT_SYMBOL(__closure_lock);
190
191#ifdef CONFIG_BCACHE_CLOSURES_DEBUG 137#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
192 138
193static LIST_HEAD(closure_list); 139static LIST_HEAD(closure_list);
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 9762f1be3304..7ef7461912be 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -72,30 +72,6 @@
72 * closure - _always_ use continue_at(). Doing so consistently will help 72 * closure - _always_ use continue_at(). Doing so consistently will help
73 * eliminate an entire class of particularly pernicious races. 73 * eliminate an entire class of particularly pernicious races.
74 * 74 *
75 * For a closure to wait on an arbitrary event, we need to introduce waitlists:
76 *
77 * struct closure_waitlist list;
78 * closure_wait_event(list, cl, condition);
79 * closure_wake_up(wait_list);
80 *
81 * These work analagously to wait_event() and wake_up() - except that instead of
82 * operating on the current thread (for wait_event()) and lists of threads, they
83 * operate on an explicit closure and lists of closures.
84 *
85 * Because it's a closure we can now wait either synchronously or
86 * asynchronously. closure_wait_event() returns the current value of the
87 * condition, and if it returned false continue_at() or closure_sync() can be
88 * used to wait for it to become true.
89 *
90 * It's useful for waiting on things when you can't sleep in the context in
91 * which you must check the condition (perhaps a spinlock held, or you might be
92 * beneath generic_make_request() - in which case you can't sleep on IO).
93 *
94 * closure_wait_event() will wait either synchronously or asynchronously,
95 * depending on whether the closure is in blocking mode or not. You can pick a
96 * mode explicitly with closure_wait_event_sync() and
97 * closure_wait_event_async(), which do just what you might expect.
98 *
99 * Lastly, you might have a wait list dedicated to a specific event, and have no 75 * Lastly, you might have a wait list dedicated to a specific event, and have no
100 * need for specifying the condition - you just want to wait until someone runs 76 * need for specifying the condition - you just want to wait until someone runs
101 * closure_wake_up() on the appropriate wait list. In that case, just use 77 * closure_wake_up() on the appropriate wait list. In that case, just use
@@ -121,40 +97,6 @@
121 * All this implies that a closure should typically be embedded in a particular 97 * All this implies that a closure should typically be embedded in a particular
122 * struct (which its refcount will normally control the lifetime of), and that 98 * struct (which its refcount will normally control the lifetime of), and that
123 * struct can very much be thought of as a stack frame. 99 * struct can very much be thought of as a stack frame.
124 *
125 * Locking:
126 *
127 * Closures are based on work items but they can be thought of as more like
128 * threads - in that like threads and unlike work items they have a well
129 * defined lifetime; they are created (with closure_init()) and eventually
130 * complete after a continue_at(cl, NULL, NULL).
131 *
132 * Suppose you've got some larger structure with a closure embedded in it that's
133 * used for periodically doing garbage collection. You only want one garbage
134 * collection happening at a time, so the natural thing to do is protect it with
135 * a lock. However, it's difficult to use a lock protecting a closure correctly
136 * because the unlock should come after the last continue_to() (additionally, if
137 * you're using the closure asynchronously a mutex won't work since a mutex has
138 * to be unlocked by the same process that locked it).
139 *
140 * So to make it less error prone and more efficient, we also have the ability
141 * to use closures as locks:
142 *
143 * closure_init_unlocked();
144 * closure_trylock();
145 *
146 * That's all we need for trylock() - the last closure_put() implicitly unlocks
147 * it for you. But for closure_lock(), we also need a wait list:
148 *
149 * struct closure_with_waitlist frobnicator_cl;
150 *
151 * closure_init_unlocked(&frobnicator_cl);
152 * closure_lock(&frobnicator_cl);
153 *
154 * A closure_with_waitlist embeds a closure and a wait list - much like struct
155 * delayed_work embeds a work item and a timer_list. The important thing is, use
156 * it exactly like you would a regular closure and closure_put() will magically
157 * handle everything for you.
158 */ 100 */
159 101
160struct closure; 102struct closure;
@@ -164,12 +106,6 @@ struct closure_waitlist {
164 struct llist_head list; 106 struct llist_head list;
165}; 107};
166 108
167enum closure_type {
168 TYPE_closure = 0,
169 TYPE_closure_with_waitlist = 1,
170 MAX_CLOSURE_TYPE = 1,
171};
172
173enum closure_state { 109enum closure_state {
174 /* 110 /*
175 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by 111 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
@@ -224,8 +160,6 @@ struct closure {
224 160
225 atomic_t remaining; 161 atomic_t remaining;
226 162
227 enum closure_type type;
228
229#ifdef CONFIG_BCACHE_CLOSURES_DEBUG 163#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
230#define CLOSURE_MAGIC_DEAD 0xc054dead 164#define CLOSURE_MAGIC_DEAD 0xc054dead
231#define CLOSURE_MAGIC_ALIVE 0xc054a11e 165#define CLOSURE_MAGIC_ALIVE 0xc054a11e
@@ -237,34 +171,12 @@ struct closure {
237#endif 171#endif
238}; 172};
239 173
240struct closure_with_waitlist {
241 struct closure cl;
242 struct closure_waitlist wait;
243};
244
245extern unsigned invalid_closure_type(void);
246
247#define __CLOSURE_TYPE(cl, _t) \
248 __builtin_types_compatible_p(typeof(cl), struct _t) \
249 ? TYPE_ ## _t : \
250
251#define __closure_type(cl) \
252( \
253 __CLOSURE_TYPE(cl, closure) \
254 __CLOSURE_TYPE(cl, closure_with_waitlist) \
255 invalid_closure_type() \
256)
257
258void closure_sub(struct closure *cl, int v); 174void closure_sub(struct closure *cl, int v);
259void closure_put(struct closure *cl); 175void closure_put(struct closure *cl);
260void __closure_wake_up(struct closure_waitlist *list); 176void __closure_wake_up(struct closure_waitlist *list);
261bool closure_wait(struct closure_waitlist *list, struct closure *cl); 177bool closure_wait(struct closure_waitlist *list, struct closure *cl);
262void closure_sync(struct closure *cl); 178void closure_sync(struct closure *cl);
263 179
264bool closure_trylock(struct closure *cl, struct closure *parent);
265void __closure_lock(struct closure *cl, struct closure *parent,
266 struct closure_waitlist *wait_list);
267
268#ifdef CONFIG_BCACHE_CLOSURES_DEBUG 180#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
269 181
270void closure_debug_init(void); 182void closure_debug_init(void);
@@ -293,134 +205,97 @@ static inline void closure_set_ret_ip(struct closure *cl)
293#endif 205#endif
294} 206}
295 207
296static inline void closure_get(struct closure *cl) 208static inline void closure_set_waiting(struct closure *cl, unsigned long f)
297{ 209{
298#ifdef CONFIG_BCACHE_CLOSURES_DEBUG 210#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
299 BUG_ON((atomic_inc_return(&cl->remaining) & 211 cl->waiting_on = f;
300 CLOSURE_REMAINING_MASK) <= 1);
301#else
302 atomic_inc(&cl->remaining);
303#endif 212#endif
304} 213}
305 214
306static inline void closure_set_stopped(struct closure *cl) 215static inline void __closure_end_sleep(struct closure *cl)
307{ 216{
308 atomic_sub(CLOSURE_RUNNING, &cl->remaining); 217 __set_current_state(TASK_RUNNING);
218
219 if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING)
220 atomic_sub(CLOSURE_SLEEPING, &cl->remaining);
309} 221}
310 222
311static inline bool closure_is_unlocked(struct closure *cl) 223static inline void __closure_start_sleep(struct closure *cl)
312{ 224{
313 return atomic_read(&cl->remaining) == -1; 225 closure_set_ip(cl);
226 cl->task = current;
227 set_current_state(TASK_UNINTERRUPTIBLE);
228
229 if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
230 atomic_add(CLOSURE_SLEEPING, &cl->remaining);
314} 231}
315 232
316static inline void do_closure_init(struct closure *cl, struct closure *parent, 233static inline void closure_set_stopped(struct closure *cl)
317 bool running)
318{ 234{
319 cl->parent = parent; 235 atomic_sub(CLOSURE_RUNNING, &cl->remaining);
320 if (parent) 236}
321 closure_get(parent);
322
323 if (running) {
324 closure_debug_create(cl);
325 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
326 } else
327 atomic_set(&cl->remaining, -1);
328 237
238static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
239 struct workqueue_struct *wq)
240{
241 BUG_ON(object_is_on_stack(cl));
329 closure_set_ip(cl); 242 closure_set_ip(cl);
243 cl->fn = fn;
244 cl->wq = wq;
245 /* between atomic_dec() in closure_put() */
246 smp_mb__before_atomic_dec();
330} 247}
331 248
332/* 249static inline void closure_queue(struct closure *cl)
333 * Hack to get at the embedded closure if there is one, by doing an unsafe cast: 250{
334 * the result of __closure_type() is thrown away, it's used merely for type 251 struct workqueue_struct *wq = cl->wq;
335 * checking. 252 if (wq) {
336 */ 253 INIT_WORK(&cl->work, cl->work.func);
337#define __to_internal_closure(cl) \ 254 BUG_ON(!queue_work(wq, &cl->work));
338({ \ 255 } else
339 BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE); \ 256 cl->fn(cl);
340 (struct closure *) cl; \ 257}
341})
342
343#define closure_init_type(cl, parent, running) \
344do { \
345 struct closure *_cl = __to_internal_closure(cl); \
346 _cl->type = __closure_type(*(cl)); \
347 do_closure_init(_cl, parent, running); \
348} while (0)
349 258
350/** 259/**
351 * __closure_init() - Initialize a closure, skipping the memset() 260 * closure_get - increment a closure's refcount
352 *
353 * May be used instead of closure_init() when memory has already been zeroed.
354 */ 261 */
355#define __closure_init(cl, parent) \ 262static inline void closure_get(struct closure *cl)
356 closure_init_type(cl, parent, true) 263{
264#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
265 BUG_ON((atomic_inc_return(&cl->remaining) &
266 CLOSURE_REMAINING_MASK) <= 1);
267#else
268 atomic_inc(&cl->remaining);
269#endif
270}
357 271
358/** 272/**
359 * closure_init() - Initialize a closure, setting the refcount to 1 273 * closure_init - Initialize a closure, setting the refcount to 1
360 * @cl: closure to initialize 274 * @cl: closure to initialize
361 * @parent: parent of the new closure. cl will take a refcount on it for its 275 * @parent: parent of the new closure. cl will take a refcount on it for its
362 * lifetime; may be NULL. 276 * lifetime; may be NULL.
363 */ 277 */
364#define closure_init(cl, parent) \ 278static inline void closure_init(struct closure *cl, struct closure *parent)
365do { \
366 memset((cl), 0, sizeof(*(cl))); \
367 __closure_init(cl, parent); \
368} while (0)
369
370static inline void closure_init_stack(struct closure *cl)
371{ 279{
372 memset(cl, 0, sizeof(struct closure)); 280 memset(cl, 0, sizeof(struct closure));
373 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK); 281 cl->parent = parent;
374} 282 if (parent)
375 283 closure_get(parent);
376/**
377 * closure_init_unlocked() - Initialize a closure but leave it unlocked.
378 * @cl: closure to initialize
379 *
380 * For when the closure will be used as a lock. The closure may not be used
381 * until after a closure_lock() or closure_trylock().
382 */
383#define closure_init_unlocked(cl) \
384do { \
385 memset((cl), 0, sizeof(*(cl))); \
386 closure_init_type(cl, NULL, false); \
387} while (0)
388
389/**
390 * closure_lock() - lock and initialize a closure.
391 * @cl: the closure to lock
392 * @parent: the new parent for this closure
393 *
394 * The closure must be of one of the types that has a waitlist (otherwise we
395 * wouldn't be able to sleep on contention).
396 *
397 * @parent has exactly the same meaning as in closure_init(); if non null, the
398 * closure will take a reference on @parent which will be released when it is
399 * unlocked.
400 */
401#define closure_lock(cl, parent) \
402 __closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)
403 284
404static inline void __closure_end_sleep(struct closure *cl) 285 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
405{
406 __set_current_state(TASK_RUNNING);
407 286
408 if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING) 287 closure_debug_create(cl);
409 atomic_sub(CLOSURE_SLEEPING, &cl->remaining); 288 closure_set_ip(cl);
410} 289}
411 290
412static inline void __closure_start_sleep(struct closure *cl) 291static inline void closure_init_stack(struct closure *cl)
413{ 292{
414 closure_set_ip(cl); 293 memset(cl, 0, sizeof(struct closure));
415 cl->task = current; 294 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK);
416 set_current_state(TASK_UNINTERRUPTIBLE);
417
418 if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
419 atomic_add(CLOSURE_SLEEPING, &cl->remaining);
420} 295}
421 296
422/** 297/**
423 * closure_wake_up() - wake up all closures on a wait list. 298 * closure_wake_up - wake up all closures on a wait list.
424 */ 299 */
425static inline void closure_wake_up(struct closure_waitlist *list) 300static inline void closure_wake_up(struct closure_waitlist *list)
426{ 301{
@@ -428,69 +303,19 @@ static inline void closure_wake_up(struct closure_waitlist *list)
428 __closure_wake_up(list); 303 __closure_wake_up(list);
429} 304}
430 305
431/* 306/**
432 * Wait on an event, synchronously or asynchronously - analogous to wait_event() 307 * continue_at - jump to another function with barrier
433 * but for closures. 308 *
434 * 309 * After @cl is no longer waiting on anything (i.e. all outstanding refs have
435 * The loop is oddly structured so as to avoid a race; we must check the 310 * been dropped with closure_put()), it will resume execution at @fn running out
436 * condition again after we've added ourself to the waitlist. We know if we were 311 * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
437 * already on the waitlist because closure_wait() returns false; thus, we only 312 *
438 * schedule or break if closure_wait() returns false. If it returns true, we 313 * NOTE: This macro expands to a return in the calling function!
439 * just loop again - rechecking the condition. 314 *
440 * 315 * This is because after calling continue_at() you no longer have a ref on @cl,
441 * The __closure_wake_up() is necessary because we may race with the event 316 * and whatever @cl owns may be freed out from under you - a running closure fn
442 * becoming true; i.e. we see event false -> wait -> recheck condition, but the 317 * has a ref on its own closure which continue_at() drops.
443 * thread that made the event true may have called closure_wake_up() before we
444 * added ourself to the wait list.
445 *
446 * We have to call closure_sync() at the end instead of just
447 * __closure_end_sleep() because a different thread might've called
448 * closure_wake_up() before us and gotten preempted before they dropped the
449 * refcount on our closure. If this was a stack allocated closure, that would be
450 * bad.
451 */ 318 */
452#define closure_wait_event(list, cl, condition) \
453({ \
454 typeof(condition) ret; \
455 \
456 while (1) { \
457 ret = (condition); \
458 if (ret) { \
459 __closure_wake_up(list); \
460 closure_sync(cl); \
461 break; \
462 } \
463 \
464 __closure_start_sleep(cl); \
465 \
466 if (!closure_wait(list, cl)) \
467 schedule(); \
468 } \
469 \
470 ret; \
471})
472
473static inline void closure_queue(struct closure *cl)
474{
475 struct workqueue_struct *wq = cl->wq;
476 if (wq) {
477 INIT_WORK(&cl->work, cl->work.func);
478 BUG_ON(!queue_work(wq, &cl->work));
479 } else
480 cl->fn(cl);
481}
482
483static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
484 struct workqueue_struct *wq)
485{
486 BUG_ON(object_is_on_stack(cl));
487 closure_set_ip(cl);
488 cl->fn = fn;
489 cl->wq = wq;
490 /* between atomic_dec() in closure_put() */
491 smp_mb__before_atomic_dec();
492}
493
494#define continue_at(_cl, _fn, _wq) \ 319#define continue_at(_cl, _fn, _wq) \
495do { \ 320do { \
496 set_closure_fn(_cl, _fn, _wq); \ 321 set_closure_fn(_cl, _fn, _wq); \
@@ -498,8 +323,28 @@ do { \
498 return; \ 323 return; \
499} while (0) 324} while (0)
500 325
326/**
327 * closure_return - finish execution of a closure
328 *
329 * This is used to indicate that @cl is finished: when all outstanding refs on
330 * @cl have been dropped @cl's ref on its parent closure (as passed to
331 * closure_init()) will be dropped, if one was specified - thus this can be
332 * thought of as returning to the parent closure.
333 */
501#define closure_return(_cl) continue_at((_cl), NULL, NULL) 334#define closure_return(_cl) continue_at((_cl), NULL, NULL)
502 335
336/**
337 * continue_at_nobarrier - jump to another function without barrier
338 *
339 * Causes @fn to be executed out of @cl, in @wq context (or called directly if
340 * @wq is NULL).
341 *
342 * NOTE: like continue_at(), this macro expands to a return in the caller!
343 *
344 * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
345 * thus it's not safe to touch anything protected by @cl after a
346 * continue_at_nobarrier().
347 */
503#define continue_at_nobarrier(_cl, _fn, _wq) \ 348#define continue_at_nobarrier(_cl, _fn, _wq) \
504do { \ 349do { \
505 set_closure_fn(_cl, _fn, _wq); \ 350 set_closure_fn(_cl, _fn, _wq); \
@@ -507,6 +352,15 @@ do { \
507 return; \ 352 return; \
508} while (0) 353} while (0)
509 354
355/**
356 * closure_return - finish execution of a closure, with destructor
357 *
358 * Works like closure_return(), except @destructor will be called when all
359 * outstanding refs on @cl have been dropped; @destructor may be used to safely
360 * free the memory occupied by @cl, and it is called with the ref on the parent
361 * closure still held - so @destructor could safely return an item to a
362 * freelist protected by @cl's parent.
363 */
510#define closure_return_with_destructor(_cl, _destructor) \ 364#define closure_return_with_destructor(_cl, _destructor) \
511do { \ 365do { \
512 set_closure_fn(_cl, _destructor, NULL); \ 366 set_closure_fn(_cl, _destructor, NULL); \
@@ -514,6 +368,13 @@ do { \
514 return; \ 368 return; \
515} while (0) 369} while (0)
516 370
371/**
372 * closure_call - execute @fn out of a new, uninitialized closure
373 *
374 * Typically used when running out of one closure, and we want to run @fn
375 * asynchronously out of a new closure - @parent will then wait for @cl to
376 * finish.
377 */
517static inline void closure_call(struct closure *cl, closure_fn fn, 378static inline void closure_call(struct closure *cl, closure_fn fn,
518 struct workqueue_struct *wq, 379 struct workqueue_struct *wq,
519 struct closure *parent) 380 struct closure *parent)
@@ -522,12 +383,4 @@ static inline void closure_call(struct closure *cl, closure_fn fn,
522 continue_at_nobarrier(cl, fn, wq); 383 continue_at_nobarrier(cl, fn, wq);
523} 384}
524 385
525static inline void closure_trylock_call(struct closure *cl, closure_fn fn,
526 struct workqueue_struct *wq,
527 struct closure *parent)
528{
529 if (closure_trylock(cl, parent))
530 continue_at_nobarrier(cl, fn, wq);
531}
532
533#endif /* _LINUX_CLOSURE_H */ 386#endif /* _LINUX_CLOSURE_H */
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 264fcfbd6290..8b1f1d5c1819 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -8,6 +8,7 @@
8#include "bcache.h" 8#include "bcache.h"
9#include "btree.h" 9#include "btree.h"
10#include "debug.h" 10#include "debug.h"
11#include "extents.h"
11 12
12#include <linux/console.h> 13#include <linux/console.h>
13#include <linux/debugfs.h> 14#include <linux/debugfs.h>
@@ -17,163 +18,96 @@
17 18
18static struct dentry *debug; 19static struct dentry *debug;
19 20
20const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
21{
22 unsigned i;
23
24 for (i = 0; i < KEY_PTRS(k); i++)
25 if (ptr_available(c, k, i)) {
26 struct cache *ca = PTR_CACHE(c, k, i);
27 size_t bucket = PTR_BUCKET_NR(c, k, i);
28 size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
29
30 if (KEY_SIZE(k) + r > c->sb.bucket_size)
31 return "bad, length too big";
32 if (bucket < ca->sb.first_bucket)
33 return "bad, short offset";
34 if (bucket >= ca->sb.nbuckets)
35 return "bad, offset past end of device";
36 if (ptr_stale(c, k, i))
37 return "stale";
38 }
39
40 if (!bkey_cmp(k, &ZERO_KEY))
41 return "bad, null key";
42 if (!KEY_PTRS(k))
43 return "bad, no pointers";
44 if (!KEY_SIZE(k))
45 return "zeroed key";
46 return "";
47}
48
49int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
50{
51 unsigned i = 0;
52 char *out = buf, *end = buf + size;
53
54#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
55
56 p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_OFFSET(k), KEY_SIZE(k));
57
58 if (KEY_PTRS(k))
59 while (1) {
60 p("%llu:%llu gen %llu",
61 PTR_DEV(k, i), PTR_OFFSET(k, i), PTR_GEN(k, i));
62
63 if (++i == KEY_PTRS(k))
64 break;
65
66 p(", ");
67 }
68
69 p("]");
70
71 if (KEY_DIRTY(k))
72 p(" dirty");
73 if (KEY_CSUM(k))
74 p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
75#undef p
76 return out - buf;
77}
78
79#ifdef CONFIG_BCACHE_DEBUG 21#ifdef CONFIG_BCACHE_DEBUG
80 22
81static void dump_bset(struct btree *b, struct bset *i) 23#define for_each_written_bset(b, start, i) \
82{ 24 for (i = (start); \
83 struct bkey *k, *next; 25 (void *) i < (void *) (start) + (KEY_SIZE(&b->key) << 9) &&\
84 unsigned j; 26 i->seq == (start)->seq; \
85 char buf[80]; 27 i = (void *) i + set_blocks(i, block_bytes(b->c)) * \
86 28 block_bytes(b->c))
87 for (k = i->start; k < end(i); k = next) {
88 next = bkey_next(k);
89
90 bch_bkey_to_text(buf, sizeof(buf), k);
91 printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
92 (uint64_t *) k - i->d, i->keys, buf);
93
94 for (j = 0; j < KEY_PTRS(k); j++) {
95 size_t n = PTR_BUCKET_NR(b->c, k, j);
96 printk(" bucket %zu", n);
97
98 if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
99 printk(" prio %i",
100 PTR_BUCKET(b->c, k, j)->prio);
101 }
102 29
103 printk(" %s\n", bch_ptr_status(b->c, k)); 30void bch_btree_verify(struct btree *b)
104
105 if (next < end(i) &&
106 bkey_cmp(k, !b->level ? &START_KEY(next) : next) > 0)
107 printk(KERN_ERR "Key skipped backwards\n");
108 }
109}
110
111static void bch_dump_bucket(struct btree *b)
112{
113 unsigned i;
114
115 console_lock();
116 for (i = 0; i <= b->nsets; i++)
117 dump_bset(b, b->sets[i].data);
118 console_unlock();
119}
120
121void bch_btree_verify(struct btree *b, struct bset *new)
122{ 31{
123 struct btree *v = b->c->verify_data; 32 struct btree *v = b->c->verify_data;
124 struct closure cl; 33 struct bset *ondisk, *sorted, *inmemory;
125 closure_init_stack(&cl); 34 struct bio *bio;
126 35
127 if (!b->c->verify) 36 if (!b->c->verify || !b->c->verify_ondisk)
128 return; 37 return;
129 38
130 closure_wait_event(&b->io.wait, &cl, 39 down(&b->io_mutex);
131 atomic_read(&b->io.cl.remaining) == -1);
132
133 mutex_lock(&b->c->verify_lock); 40 mutex_lock(&b->c->verify_lock);
134 41
42 ondisk = b->c->verify_ondisk;
43 sorted = b->c->verify_data->keys.set->data;
44 inmemory = b->keys.set->data;
45
135 bkey_copy(&v->key, &b->key); 46 bkey_copy(&v->key, &b->key);
136 v->written = 0; 47 v->written = 0;
137 v->level = b->level; 48 v->level = b->level;
49 v->keys.ops = b->keys.ops;
50
51 bio = bch_bbio_alloc(b->c);
52 bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev;
53 bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
54 bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9;
55 bch_bio_map(bio, sorted);
138 56
139 bch_btree_node_read(v); 57 submit_bio_wait(REQ_META|READ_SYNC, bio);
140 closure_wait_event(&v->io.wait, &cl, 58 bch_bbio_free(bio, b->c);
141 atomic_read(&b->io.cl.remaining) == -1);
142 59
143 if (new->keys != v->sets[0].data->keys || 60 memcpy(ondisk, sorted, KEY_SIZE(&v->key) << 9);
144 memcmp(new->start, 61
145 v->sets[0].data->start, 62 bch_btree_node_read_done(v);
146 (void *) end(new) - (void *) new->start)) { 63 sorted = v->keys.set->data;
147 unsigned i, j; 64
65 if (inmemory->keys != sorted->keys ||
66 memcmp(inmemory->start,
67 sorted->start,
68 (void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) {
69 struct bset *i;
70 unsigned j;
148 71
149 console_lock(); 72 console_lock();
150 73
151 printk(KERN_ERR "*** original memory node:\n"); 74 printk(KERN_ERR "*** in memory:\n");
152 for (i = 0; i <= b->nsets; i++) 75 bch_dump_bset(&b->keys, inmemory, 0);
153 dump_bset(b, b->sets[i].data);
154 76
155 printk(KERN_ERR "*** sorted memory node:\n"); 77 printk(KERN_ERR "*** read back in:\n");
156 dump_bset(b, new); 78 bch_dump_bset(&v->keys, sorted, 0);
157 79
158 printk(KERN_ERR "*** on disk node:\n"); 80 for_each_written_bset(b, ondisk, i) {
159 dump_bset(v, v->sets[0].data); 81 unsigned block = ((void *) i - (void *) ondisk) /
82 block_bytes(b->c);
83
84 printk(KERN_ERR "*** on disk block %u:\n", block);
85 bch_dump_bset(&b->keys, i, block);
86 }
160 87
161 for (j = 0; j < new->keys; j++) 88 printk(KERN_ERR "*** block %zu not written\n",
162 if (new->d[j] != v->sets[0].data->d[j]) 89 ((void *) i - (void *) ondisk) / block_bytes(b->c));
90
91 for (j = 0; j < inmemory->keys; j++)
92 if (inmemory->d[j] != sorted->d[j])
163 break; 93 break;
164 94
95 printk(KERN_ERR "b->written %u\n", b->written);
96
165 console_unlock(); 97 console_unlock();
166 panic("verify failed at %u\n", j); 98 panic("verify failed at %u\n", j);
167 } 99 }
168 100
169 mutex_unlock(&b->c->verify_lock); 101 mutex_unlock(&b->c->verify_lock);
102 up(&b->io_mutex);
170} 103}
171 104
172void bch_data_verify(struct cached_dev *dc, struct bio *bio) 105void bch_data_verify(struct cached_dev *dc, struct bio *bio)
173{ 106{
174 char name[BDEVNAME_SIZE]; 107 char name[BDEVNAME_SIZE];
175 struct bio *check; 108 struct bio *check;
176 struct bio_vec *bv; 109 struct bio_vec bv, *bv2;
110 struct bvec_iter iter;
177 int i; 111 int i;
178 112
179 check = bio_clone(bio, GFP_NOIO); 113 check = bio_clone(bio, GFP_NOIO);
@@ -185,95 +119,27 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
185 119
186 submit_bio_wait(READ_SYNC, check); 120 submit_bio_wait(READ_SYNC, check);
187 121
188 bio_for_each_segment(bv, bio, i) { 122 bio_for_each_segment(bv, bio, iter) {
189 void *p1 = kmap_atomic(bv->bv_page); 123 void *p1 = kmap_atomic(bv.bv_page);
190 void *p2 = page_address(check->bi_io_vec[i].bv_page); 124 void *p2 = page_address(check->bi_io_vec[iter.bi_idx].bv_page);
191 125
192 cache_set_err_on(memcmp(p1 + bv->bv_offset, 126 cache_set_err_on(memcmp(p1 + bv.bv_offset,
193 p2 + bv->bv_offset, 127 p2 + bv.bv_offset,
194 bv->bv_len), 128 bv.bv_len),
195 dc->disk.c, 129 dc->disk.c,
196 "verify failed at dev %s sector %llu", 130 "verify failed at dev %s sector %llu",
197 bdevname(dc->bdev, name), 131 bdevname(dc->bdev, name),
198 (uint64_t) bio->bi_sector); 132 (uint64_t) bio->bi_iter.bi_sector);
199 133
200 kunmap_atomic(p1); 134 kunmap_atomic(p1);
201 } 135 }
202 136
203 bio_for_each_segment_all(bv, check, i) 137 bio_for_each_segment_all(bv2, check, i)
204 __free_page(bv->bv_page); 138 __free_page(bv2->bv_page);
205out_put: 139out_put:
206 bio_put(check); 140 bio_put(check);
207} 141}
208 142
209int __bch_count_data(struct btree *b)
210{
211 unsigned ret = 0;
212 struct btree_iter iter;
213 struct bkey *k;
214
215 if (!b->level)
216 for_each_key(b, k, &iter)
217 ret += KEY_SIZE(k);
218 return ret;
219}
220
221void __bch_check_keys(struct btree *b, const char *fmt, ...)
222{
223 va_list args;
224 struct bkey *k, *p = NULL;
225 struct btree_iter iter;
226 const char *err;
227
228 for_each_key(b, k, &iter) {
229 if (!b->level) {
230 err = "Keys out of order";
231 if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
232 goto bug;
233
234 if (bch_ptr_invalid(b, k))
235 continue;
236
237 err = "Overlapping keys";
238 if (p && bkey_cmp(p, &START_KEY(k)) > 0)
239 goto bug;
240 } else {
241 if (bch_ptr_bad(b, k))
242 continue;
243
244 err = "Duplicate keys";
245 if (p && !bkey_cmp(p, k))
246 goto bug;
247 }
248 p = k;
249 }
250
251 err = "Key larger than btree node key";
252 if (p && bkey_cmp(p, &b->key) > 0)
253 goto bug;
254
255 return;
256bug:
257 bch_dump_bucket(b);
258
259 va_start(args, fmt);
260 vprintk(fmt, args);
261 va_end(args);
262
263 panic("bcache error: %s:\n", err);
264}
265
266void bch_btree_iter_next_check(struct btree_iter *iter)
267{
268 struct bkey *k = iter->data->k, *next = bkey_next(k);
269
270 if (next < iter->data->end &&
271 bkey_cmp(k, iter->b->level ? next : &START_KEY(next)) > 0) {
272 bch_dump_bucket(iter->b);
273 panic("Key skipped backwards\n");
274 }
275}
276
277#endif 143#endif
278 144
279#ifdef CONFIG_DEBUG_FS 145#ifdef CONFIG_DEBUG_FS
@@ -320,7 +186,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
320 if (!w) 186 if (!w)
321 break; 187 break;
322 188
323 bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key); 189 bch_extent_to_text(kbuf, sizeof(kbuf), &w->key);
324 i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf); 190 i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf);
325 bch_keybuf_del(&i->keys, w); 191 bch_keybuf_del(&i->keys, w);
326 } 192 }
diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h
index 2ede60e31874..1f63c195d247 100644
--- a/drivers/md/bcache/debug.h
+++ b/drivers/md/bcache/debug.h
@@ -1,47 +1,30 @@
1#ifndef _BCACHE_DEBUG_H 1#ifndef _BCACHE_DEBUG_H
2#define _BCACHE_DEBUG_H 2#define _BCACHE_DEBUG_H
3 3
4/* Btree/bkey debug printing */ 4struct bio;
5 5struct cached_dev;
6int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k); 6struct cache_set;
7 7
8#ifdef CONFIG_BCACHE_DEBUG 8#ifdef CONFIG_BCACHE_DEBUG
9 9
10void bch_btree_verify(struct btree *, struct bset *); 10void bch_btree_verify(struct btree *);
11void bch_data_verify(struct cached_dev *, struct bio *); 11void bch_data_verify(struct cached_dev *, struct bio *);
12int __bch_count_data(struct btree *);
13void __bch_check_keys(struct btree *, const char *, ...);
14void bch_btree_iter_next_check(struct btree_iter *);
15 12
16#define EBUG_ON(cond) BUG_ON(cond)
17#define expensive_debug_checks(c) ((c)->expensive_debug_checks) 13#define expensive_debug_checks(c) ((c)->expensive_debug_checks)
18#define key_merging_disabled(c) ((c)->key_merging_disabled) 14#define key_merging_disabled(c) ((c)->key_merging_disabled)
19#define bypass_torture_test(d) ((d)->bypass_torture_test) 15#define bypass_torture_test(d) ((d)->bypass_torture_test)
20 16
21#else /* DEBUG */ 17#else /* DEBUG */
22 18
23static inline void bch_btree_verify(struct btree *b, struct bset *i) {} 19static inline void bch_btree_verify(struct btree *b) {}
24static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {} 20static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {}
25static inline int __bch_count_data(struct btree *b) { return -1; }
26static inline void __bch_check_keys(struct btree *b, const char *fmt, ...) {}
27static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
28 21
29#define EBUG_ON(cond) do { if (cond); } while (0)
30#define expensive_debug_checks(c) 0 22#define expensive_debug_checks(c) 0
31#define key_merging_disabled(c) 0 23#define key_merging_disabled(c) 0
32#define bypass_torture_test(d) 0 24#define bypass_torture_test(d) 0
33 25
34#endif 26#endif
35 27
36#define bch_count_data(b) \
37 (expensive_debug_checks((b)->c) ? __bch_count_data(b) : -1)
38
39#define bch_check_keys(b, ...) \
40do { \
41 if (expensive_debug_checks((b)->c)) \
42 __bch_check_keys(b, __VA_ARGS__); \
43} while (0)
44
45#ifdef CONFIG_DEBUG_FS 28#ifdef CONFIG_DEBUG_FS
46void bch_debug_init_cache_set(struct cache_set *); 29void bch_debug_init_cache_set(struct cache_set *);
47#else 30#else
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
new file mode 100644
index 000000000000..c3ead586dc27
--- /dev/null
+++ b/drivers/md/bcache/extents.c
@@ -0,0 +1,616 @@
1/*
2 * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
3 *
4 * Uses a block device as cache for other block devices; optimized for SSDs.
5 * All allocation is done in buckets, which should match the erase block size
6 * of the device.
7 *
8 * Buckets containing cached data are kept on a heap sorted by priority;
9 * bucket priority is increased on cache hit, and periodically all the buckets
10 * on the heap have their priority scaled down. This currently is just used as
11 * an LRU but in the future should allow for more intelligent heuristics.
12 *
13 * Buckets have an 8 bit counter; freeing is accomplished by incrementing the
14 * counter. Garbage collection is used to remove stale pointers.
15 *
16 * Indexing is done via a btree; nodes are not necessarily fully sorted, rather
17 * as keys are inserted we only sort the pages that have not yet been written.
18 * When garbage collection is run, we resort the entire node.
19 *
20 * All configuration is done via sysfs; see Documentation/bcache.txt.
21 */
22
23#include "bcache.h"
24#include "btree.h"
25#include "debug.h"
26#include "extents.h"
27#include "writeback.h"
28
29static void sort_key_next(struct btree_iter *iter,
30 struct btree_iter_set *i)
31{
32 i->k = bkey_next(i->k);
33
34 if (i->k == i->end)
35 *i = iter->data[--iter->used];
36}
37
38static bool bch_key_sort_cmp(struct btree_iter_set l,
39 struct btree_iter_set r)
40{
41 int64_t c = bkey_cmp(l.k, r.k);
42
43 return c ? c > 0 : l.k < r.k;
44}
45
46static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
47{
48 unsigned i;
49
50 for (i = 0; i < KEY_PTRS(k); i++)
51 if (ptr_available(c, k, i)) {
52 struct cache *ca = PTR_CACHE(c, k, i);
53 size_t bucket = PTR_BUCKET_NR(c, k, i);
54 size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
55
56 if (KEY_SIZE(k) + r > c->sb.bucket_size ||
57 bucket < ca->sb.first_bucket ||
58 bucket >= ca->sb.nbuckets)
59 return true;
60 }
61
62 return false;
63}
64
65/* Common among btree and extent ptrs */
66
67static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
68{
69 unsigned i;
70
71 for (i = 0; i < KEY_PTRS(k); i++)
72 if (ptr_available(c, k, i)) {
73 struct cache *ca = PTR_CACHE(c, k, i);
74 size_t bucket = PTR_BUCKET_NR(c, k, i);
75 size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
76
77 if (KEY_SIZE(k) + r > c->sb.bucket_size)
78 return "bad, length too big";
79 if (bucket < ca->sb.first_bucket)
80 return "bad, short offset";
81 if (bucket >= ca->sb.nbuckets)
82 return "bad, offset past end of device";
83 if (ptr_stale(c, k, i))
84 return "stale";
85 }
86
87 if (!bkey_cmp(k, &ZERO_KEY))
88 return "bad, null key";
89 if (!KEY_PTRS(k))
90 return "bad, no pointers";
91 if (!KEY_SIZE(k))
92 return "zeroed key";
93 return "";
94}
95
96void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
97{
98 unsigned i = 0;
99 char *out = buf, *end = buf + size;
100
101#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
102
103 p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_START(k), KEY_SIZE(k));
104
105 for (i = 0; i < KEY_PTRS(k); i++) {
106 if (i)
107 p(", ");
108
109 if (PTR_DEV(k, i) == PTR_CHECK_DEV)
110 p("check dev");
111 else
112 p("%llu:%llu gen %llu", PTR_DEV(k, i),
113 PTR_OFFSET(k, i), PTR_GEN(k, i));
114 }
115
116 p("]");
117
118 if (KEY_DIRTY(k))
119 p(" dirty");
120 if (KEY_CSUM(k))
121 p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
122#undef p
123}
124
125static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
126{
127 struct btree *b = container_of(keys, struct btree, keys);
128 unsigned j;
129 char buf[80];
130
131 bch_extent_to_text(buf, sizeof(buf), k);
132 printk(" %s", buf);
133
134 for (j = 0; j < KEY_PTRS(k); j++) {
135 size_t n = PTR_BUCKET_NR(b->c, k, j);
136 printk(" bucket %zu", n);
137
138 if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
139 printk(" prio %i",
140 PTR_BUCKET(b->c, k, j)->prio);
141 }
142
143 printk(" %s\n", bch_ptr_status(b->c, k));
144}
145
146/* Btree ptrs */
147
148bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
149{
150 char buf[80];
151
152 if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
153 goto bad;
154
155 if (__ptr_invalid(c, k))
156 goto bad;
157
158 return false;
159bad:
160 bch_extent_to_text(buf, sizeof(buf), k);
161 cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
162 return true;
163}
164
165static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
166{
167 struct btree *b = container_of(bk, struct btree, keys);
168 return __bch_btree_ptr_invalid(b->c, k);
169}
170
171static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
172{
173 unsigned i;
174 char buf[80];
175 struct bucket *g;
176
177 if (mutex_trylock(&b->c->bucket_lock)) {
178 for (i = 0; i < KEY_PTRS(k); i++)
179 if (ptr_available(b->c, k, i)) {
180 g = PTR_BUCKET(b->c, k, i);
181
182 if (KEY_DIRTY(k) ||
183 g->prio != BTREE_PRIO ||
184 (b->c->gc_mark_valid &&
185 GC_MARK(g) != GC_MARK_METADATA))
186 goto err;
187 }
188
189 mutex_unlock(&b->c->bucket_lock);
190 }
191
192 return false;
193err:
194 mutex_unlock(&b->c->bucket_lock);
195 bch_extent_to_text(buf, sizeof(buf), k);
196 btree_bug(b,
197"inconsistent btree pointer %s: bucket %li pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
198 buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
199 g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
200 return true;
201}
202
203static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
204{
205 struct btree *b = container_of(bk, struct btree, keys);
206 unsigned i;
207
208 if (!bkey_cmp(k, &ZERO_KEY) ||
209 !KEY_PTRS(k) ||
210 bch_ptr_invalid(bk, k))
211 return true;
212
213 for (i = 0; i < KEY_PTRS(k); i++)
214 if (!ptr_available(b->c, k, i) ||
215 ptr_stale(b->c, k, i))
216 return true;
217
218 if (expensive_debug_checks(b->c) &&
219 btree_ptr_bad_expensive(b, k))
220 return true;
221
222 return false;
223}
224
225static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
226 struct bkey *insert,
227 struct btree_iter *iter,
228 struct bkey *replace_key)
229{
230 struct btree *b = container_of(bk, struct btree, keys);
231
232 if (!KEY_OFFSET(insert))
233 btree_current_write(b)->prio_blocked++;
234
235 return false;
236}
237
238const struct btree_keys_ops bch_btree_keys_ops = {
239 .sort_cmp = bch_key_sort_cmp,
240 .insert_fixup = bch_btree_ptr_insert_fixup,
241 .key_invalid = bch_btree_ptr_invalid,
242 .key_bad = bch_btree_ptr_bad,
243 .key_to_text = bch_extent_to_text,
244 .key_dump = bch_bkey_dump,
245};
246
247/* Extents */
248
249/*
250 * Returns true if l > r - unless l == r, in which case returns true if l is
251 * older than r.
252 *
253 * Necessary for btree_sort_fixup() - if there are multiple keys that compare
254 * equal in different sets, we have to process them newest to oldest.
255 */
256static bool bch_extent_sort_cmp(struct btree_iter_set l,
257 struct btree_iter_set r)
258{
259 int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
260
261 return c ? c > 0 : l.k < r.k;
262}
263
264static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
265 struct bkey *tmp)
266{
267 while (iter->used > 1) {
268 struct btree_iter_set *top = iter->data, *i = top + 1;
269
270 if (iter->used > 2 &&
271 bch_extent_sort_cmp(i[0], i[1]))
272 i++;
273
274 if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
275 break;
276
277 if (!KEY_SIZE(i->k)) {
278 sort_key_next(iter, i);
279 heap_sift(iter, i - top, bch_extent_sort_cmp);
280 continue;
281 }
282
283 if (top->k > i->k) {
284 if (bkey_cmp(top->k, i->k) >= 0)
285 sort_key_next(iter, i);
286 else
287 bch_cut_front(top->k, i->k);
288
289 heap_sift(iter, i - top, bch_extent_sort_cmp);
290 } else {
291 /* can't happen because of comparison func */
292 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
293
294 if (bkey_cmp(i->k, top->k) < 0) {
295 bkey_copy(tmp, top->k);
296
297 bch_cut_back(&START_KEY(i->k), tmp);
298 bch_cut_front(i->k, top->k);
299 heap_sift(iter, 0, bch_extent_sort_cmp);
300
301 return tmp;
302 } else {
303 bch_cut_back(&START_KEY(i->k), top->k);
304 }
305 }
306 }
307
308 return NULL;
309}
310
311static bool bch_extent_insert_fixup(struct btree_keys *b,
312 struct bkey *insert,
313 struct btree_iter *iter,
314 struct bkey *replace_key)
315{
316 struct cache_set *c = container_of(b, struct btree, keys)->c;
317
318 void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
319 {
320 if (KEY_DIRTY(k))
321 bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
322 offset, -sectors);
323 }
324
325 uint64_t old_offset;
326 unsigned old_size, sectors_found = 0;
327
328 BUG_ON(!KEY_OFFSET(insert));
329 BUG_ON(!KEY_SIZE(insert));
330
331 while (1) {
332 struct bkey *k = bch_btree_iter_next(iter);
333 if (!k)
334 break;
335
336 if (bkey_cmp(&START_KEY(k), insert) >= 0) {
337 if (KEY_SIZE(k))
338 break;
339 else
340 continue;
341 }
342
343 if (bkey_cmp(k, &START_KEY(insert)) <= 0)
344 continue;
345
346 old_offset = KEY_START(k);
347 old_size = KEY_SIZE(k);
348
349 /*
350 * We might overlap with 0 size extents; we can't skip these
351 * because if they're in the set we're inserting to we have to
352 * adjust them so they don't overlap with the key we're
353 * inserting. But we don't want to check them for replace
354 * operations.
355 */
356
357 if (replace_key && KEY_SIZE(k)) {
358 /*
359 * k might have been split since we inserted/found the
360 * key we're replacing
361 */
362 unsigned i;
363 uint64_t offset = KEY_START(k) -
364 KEY_START(replace_key);
365
366 /* But it must be a subset of the replace key */
367 if (KEY_START(k) < KEY_START(replace_key) ||
368 KEY_OFFSET(k) > KEY_OFFSET(replace_key))
369 goto check_failed;
370
371 /* We didn't find a key that we were supposed to */
372 if (KEY_START(k) > KEY_START(insert) + sectors_found)
373 goto check_failed;
374
375 if (!bch_bkey_equal_header(k, replace_key))
376 goto check_failed;
377
378 /* skip past gen */
379 offset <<= 8;
380
381 BUG_ON(!KEY_PTRS(replace_key));
382
383 for (i = 0; i < KEY_PTRS(replace_key); i++)
384 if (k->ptr[i] != replace_key->ptr[i] + offset)
385 goto check_failed;
386
387 sectors_found = KEY_OFFSET(k) - KEY_START(insert);
388 }
389
390 if (bkey_cmp(insert, k) < 0 &&
391 bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) {
392 /*
393 * We overlapped in the middle of an existing key: that
394 * means we have to split the old key. But we have to do
395 * slightly different things depending on whether the
396 * old key has been written out yet.
397 */
398
399 struct bkey *top;
400
401 subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
402
403 if (bkey_written(b, k)) {
404 /*
405 * We insert a new key to cover the top of the
406 * old key, and the old key is modified in place
407 * to represent the bottom split.
408 *
409 * It's completely arbitrary whether the new key
410 * is the top or the bottom, but it has to match
411 * up with what btree_sort_fixup() does - it
412 * doesn't check for this kind of overlap, it
413 * depends on us inserting a new key for the top
414 * here.
415 */
416 top = bch_bset_search(b, bset_tree_last(b),
417 insert);
418 bch_bset_insert(b, top, k);
419 } else {
420 BKEY_PADDED(key) temp;
421 bkey_copy(&temp.key, k);
422 bch_bset_insert(b, k, &temp.key);
423 top = bkey_next(k);
424 }
425
426 bch_cut_front(insert, top);
427 bch_cut_back(&START_KEY(insert), k);
428 bch_bset_fix_invalidated_key(b, k);
429 goto out;
430 }
431
432 if (bkey_cmp(insert, k) < 0) {
433 bch_cut_front(insert, k);
434 } else {
435 if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0)
436 old_offset = KEY_START(insert);
437
438 if (bkey_written(b, k) &&
439 bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) {
440 /*
441 * Completely overwrote, so we don't have to
442 * invalidate the binary search tree
443 */
444 bch_cut_front(k, k);
445 } else {
446 __bch_cut_back(&START_KEY(insert), k);
447 bch_bset_fix_invalidated_key(b, k);
448 }
449 }
450
451 subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
452 }
453
454check_failed:
455 if (replace_key) {
456 if (!sectors_found) {
457 return true;
458 } else if (sectors_found < KEY_SIZE(insert)) {
459 SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
460 (KEY_SIZE(insert) - sectors_found));
461 SET_KEY_SIZE(insert, sectors_found);
462 }
463 }
464out:
465 if (KEY_DIRTY(insert))
466 bcache_dev_sectors_dirty_add(c, KEY_INODE(insert),
467 KEY_START(insert),
468 KEY_SIZE(insert));
469
470 return false;
471}
472
473static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
474{
475 struct btree *b = container_of(bk, struct btree, keys);
476 char buf[80];
477
478 if (!KEY_SIZE(k))
479 return true;
480
481 if (KEY_SIZE(k) > KEY_OFFSET(k))
482 goto bad;
483
484 if (__ptr_invalid(b->c, k))
485 goto bad;
486
487 return false;
488bad:
489 bch_extent_to_text(buf, sizeof(buf), k);
490 cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k));
491 return true;
492}
493
494static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
495 unsigned ptr)
496{
497 struct bucket *g = PTR_BUCKET(b->c, k, ptr);
498 char buf[80];
499
500 if (mutex_trylock(&b->c->bucket_lock)) {
501 if (b->c->gc_mark_valid &&
502 ((GC_MARK(g) != GC_MARK_DIRTY &&
503 KEY_DIRTY(k)) ||
504 GC_MARK(g) == GC_MARK_METADATA))
505 goto err;
506
507 if (g->prio == BTREE_PRIO)
508 goto err;
509
510 mutex_unlock(&b->c->bucket_lock);
511 }
512
513 return false;
514err:
515 mutex_unlock(&b->c->bucket_lock);
516 bch_extent_to_text(buf, sizeof(buf), k);
517 btree_bug(b,
518"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
519 buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
520 g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
521 return true;
522}
523
524static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
525{
526 struct btree *b = container_of(bk, struct btree, keys);
527 struct bucket *g;
528 unsigned i, stale;
529
530 if (!KEY_PTRS(k) ||
531 bch_extent_invalid(bk, k))
532 return true;
533
534 for (i = 0; i < KEY_PTRS(k); i++)
535 if (!ptr_available(b->c, k, i))
536 return true;
537
538 if (!expensive_debug_checks(b->c) && KEY_DIRTY(k))
539 return false;
540
541 for (i = 0; i < KEY_PTRS(k); i++) {
542 g = PTR_BUCKET(b->c, k, i);
543 stale = ptr_stale(b->c, k, i);
544
545 btree_bug_on(stale > 96, b,
546 "key too stale: %i, need_gc %u",
547 stale, b->c->need_gc);
548
549 btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
550 b, "stale dirty pointer");
551
552 if (stale)
553 return true;
554
555 if (expensive_debug_checks(b->c) &&
556 bch_extent_bad_expensive(b, k, i))
557 return true;
558 }
559
560 return false;
561}
562
563static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
564{
565 return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
566 ~((uint64_t)1 << 63);
567}
568
569static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey *r)
570{
571 struct btree *b = container_of(bk, struct btree, keys);
572 unsigned i;
573
574 if (key_merging_disabled(b->c))
575 return false;
576
577 for (i = 0; i < KEY_PTRS(l); i++)
578 if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
579 PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
580 return false;
581
582 /* Keys with no pointers aren't restricted to one bucket and could
583 * overflow KEY_SIZE
584 */
585 if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
586 SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
587 SET_KEY_SIZE(l, USHRT_MAX);
588
589 bch_cut_front(l, r);
590 return false;
591 }
592
593 if (KEY_CSUM(l)) {
594 if (KEY_CSUM(r))
595 l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
596 else
597 SET_KEY_CSUM(l, 0);
598 }
599
600 SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
601 SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
602
603 return true;
604}
605
606const struct btree_keys_ops bch_extent_keys_ops = {
607 .sort_cmp = bch_extent_sort_cmp,
608 .sort_fixup = bch_extent_sort_fixup,
609 .insert_fixup = bch_extent_insert_fixup,
610 .key_invalid = bch_extent_invalid,
611 .key_bad = bch_extent_bad,
612 .key_merge = bch_extent_merge,
613 .key_to_text = bch_extent_to_text,
614 .key_dump = bch_bkey_dump,
615 .is_extents = true,
616};
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
new file mode 100644
index 000000000000..e4e23409782d
--- /dev/null
+++ b/drivers/md/bcache/extents.h
@@ -0,0 +1,13 @@
1#ifndef _BCACHE_EXTENTS_H
2#define _BCACHE_EXTENTS_H
3
4extern const struct btree_keys_ops bch_btree_keys_ops;
5extern const struct btree_keys_ops bch_extent_keys_ops;
6
7struct bkey;
8struct cache_set;
9
10void bch_extent_to_text(char *, size_t, const struct bkey *);
11bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
12
13#endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 9056632995b1..fa028fa82df4 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -11,178 +11,40 @@
11 11
12#include <linux/blkdev.h> 12#include <linux/blkdev.h>
13 13
14static void bch_bi_idx_hack_endio(struct bio *bio, int error)
15{
16 struct bio *p = bio->bi_private;
17
18 bio_endio(p, error);
19 bio_put(bio);
20}
21
22static void bch_generic_make_request_hack(struct bio *bio)
23{
24 if (bio->bi_idx) {
25 struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio));
26
27 memcpy(clone->bi_io_vec,
28 bio_iovec(bio),
29 bio_segments(bio) * sizeof(struct bio_vec));
30
31 clone->bi_sector = bio->bi_sector;
32 clone->bi_bdev = bio->bi_bdev;
33 clone->bi_rw = bio->bi_rw;
34 clone->bi_vcnt = bio_segments(bio);
35 clone->bi_size = bio->bi_size;
36
37 clone->bi_private = bio;
38 clone->bi_end_io = bch_bi_idx_hack_endio;
39
40 bio = clone;
41 }
42
43 /*
44 * Hack, since drivers that clone bios clone up to bi_max_vecs, but our
45 * bios might have had more than that (before we split them per device
46 * limitations).
47 *
48 * To be taken out once immutable bvec stuff is in.
49 */
50 bio->bi_max_vecs = bio->bi_vcnt;
51
52 generic_make_request(bio);
53}
54
55/**
56 * bch_bio_split - split a bio
57 * @bio: bio to split
58 * @sectors: number of sectors to split from the front of @bio
59 * @gfp: gfp mask
60 * @bs: bio set to allocate from
61 *
62 * Allocates and returns a new bio which represents @sectors from the start of
63 * @bio, and updates @bio to represent the remaining sectors.
64 *
65 * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio
66 * unchanged.
67 *
68 * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
69 * bvec boundry; it is the caller's responsibility to ensure that @bio is not
70 * freed before the split.
71 */
72struct bio *bch_bio_split(struct bio *bio, int sectors,
73 gfp_t gfp, struct bio_set *bs)
74{
75 unsigned idx = bio->bi_idx, vcnt = 0, nbytes = sectors << 9;
76 struct bio_vec *bv;
77 struct bio *ret = NULL;
78
79 BUG_ON(sectors <= 0);
80
81 if (sectors >= bio_sectors(bio))
82 return bio;
83
84 if (bio->bi_rw & REQ_DISCARD) {
85 ret = bio_alloc_bioset(gfp, 1, bs);
86 if (!ret)
87 return NULL;
88 idx = 0;
89 goto out;
90 }
91
92 bio_for_each_segment(bv, bio, idx) {
93 vcnt = idx - bio->bi_idx;
94
95 if (!nbytes) {
96 ret = bio_alloc_bioset(gfp, vcnt, bs);
97 if (!ret)
98 return NULL;
99
100 memcpy(ret->bi_io_vec, bio_iovec(bio),
101 sizeof(struct bio_vec) * vcnt);
102
103 break;
104 } else if (nbytes < bv->bv_len) {
105 ret = bio_alloc_bioset(gfp, ++vcnt, bs);
106 if (!ret)
107 return NULL;
108
109 memcpy(ret->bi_io_vec, bio_iovec(bio),
110 sizeof(struct bio_vec) * vcnt);
111
112 ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
113 bv->bv_offset += nbytes;
114 bv->bv_len -= nbytes;
115 break;
116 }
117
118 nbytes -= bv->bv_len;
119 }
120out:
121 ret->bi_bdev = bio->bi_bdev;
122 ret->bi_sector = bio->bi_sector;
123 ret->bi_size = sectors << 9;
124 ret->bi_rw = bio->bi_rw;
125 ret->bi_vcnt = vcnt;
126 ret->bi_max_vecs = vcnt;
127
128 bio->bi_sector += sectors;
129 bio->bi_size -= sectors << 9;
130 bio->bi_idx = idx;
131
132 if (bio_integrity(bio)) {
133 if (bio_integrity_clone(ret, bio, gfp)) {
134 bio_put(ret);
135 return NULL;
136 }
137
138 bio_integrity_trim(ret, 0, bio_sectors(ret));
139 bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
140 }
141
142 return ret;
143}
144
145static unsigned bch_bio_max_sectors(struct bio *bio) 14static unsigned bch_bio_max_sectors(struct bio *bio)
146{ 15{
147 unsigned ret = bio_sectors(bio);
148 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 16 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
149 unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES, 17 struct bio_vec bv;
150 queue_max_segments(q)); 18 struct bvec_iter iter;
19 unsigned ret = 0, seg = 0;
151 20
152 if (bio->bi_rw & REQ_DISCARD) 21 if (bio->bi_rw & REQ_DISCARD)
153 return min(ret, q->limits.max_discard_sectors); 22 return min(bio_sectors(bio), q->limits.max_discard_sectors);
154 23
155 if (bio_segments(bio) > max_segments || 24 bio_for_each_segment(bv, bio, iter) {
156 q->merge_bvec_fn) { 25 struct bvec_merge_data bvm = {
157 struct bio_vec *bv; 26 .bi_bdev = bio->bi_bdev,
158 int i, seg = 0; 27 .bi_sector = bio->bi_iter.bi_sector,
159 28 .bi_size = ret << 9,
160 ret = 0; 29 .bi_rw = bio->bi_rw,
161 30 };
162 bio_for_each_segment(bv, bio, i) { 31
163 struct bvec_merge_data bvm = { 32 if (seg == min_t(unsigned, BIO_MAX_PAGES,
164 .bi_bdev = bio->bi_bdev, 33 queue_max_segments(q)))
165 .bi_sector = bio->bi_sector, 34 break;
166 .bi_size = ret << 9,
167 .bi_rw = bio->bi_rw,
168 };
169
170 if (seg == max_segments)
171 break;
172 35
173 if (q->merge_bvec_fn && 36 if (q->merge_bvec_fn &&
174 q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) 37 q->merge_bvec_fn(q, &bvm, &bv) < (int) bv.bv_len)
175 break; 38 break;
176 39
177 seg++; 40 seg++;
178 ret += bv->bv_len >> 9; 41 ret += bv.bv_len >> 9;
179 }
180 } 42 }
181 43
182 ret = min(ret, queue_max_sectors(q)); 44 ret = min(ret, queue_max_sectors(q));
183 45
184 WARN_ON(!ret); 46 WARN_ON(!ret);
185 ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9); 47 ret = max_t(int, ret, bio_iovec(bio).bv_len >> 9);
186 48
187 return ret; 49 return ret;
188} 50}
@@ -193,7 +55,7 @@ static void bch_bio_submit_split_done(struct closure *cl)
193 55
194 s->bio->bi_end_io = s->bi_end_io; 56 s->bio->bi_end_io = s->bi_end_io;
195 s->bio->bi_private = s->bi_private; 57 s->bio->bi_private = s->bi_private;
196 bio_endio(s->bio, 0); 58 bio_endio_nodec(s->bio, 0);
197 59
198 closure_debug_destroy(&s->cl); 60 closure_debug_destroy(&s->cl);
199 mempool_free(s, s->p->bio_split_hook); 61 mempool_free(s, s->p->bio_split_hook);
@@ -232,19 +94,19 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
232 bio_get(bio); 94 bio_get(bio);
233 95
234 do { 96 do {
235 n = bch_bio_split(bio, bch_bio_max_sectors(bio), 97 n = bio_next_split(bio, bch_bio_max_sectors(bio),
236 GFP_NOIO, s->p->bio_split); 98 GFP_NOIO, s->p->bio_split);
237 99
238 n->bi_end_io = bch_bio_submit_split_endio; 100 n->bi_end_io = bch_bio_submit_split_endio;
239 n->bi_private = &s->cl; 101 n->bi_private = &s->cl;
240 102
241 closure_get(&s->cl); 103 closure_get(&s->cl);
242 bch_generic_make_request_hack(n); 104 generic_make_request(n);
243 } while (n != bio); 105 } while (n != bio);
244 106
245 continue_at(&s->cl, bch_bio_submit_split_done, NULL); 107 continue_at(&s->cl, bch_bio_submit_split_done, NULL);
246submit: 108submit:
247 bch_generic_make_request_hack(bio); 109 generic_make_request(bio);
248} 110}
249 111
250/* Bios with headers */ 112/* Bios with headers */
@@ -272,8 +134,8 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
272{ 134{
273 struct bbio *b = container_of(bio, struct bbio, bio); 135 struct bbio *b = container_of(bio, struct bbio, bio);
274 136
275 bio->bi_sector = PTR_OFFSET(&b->key, 0); 137 bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
276 bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; 138 bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev;
277 139
278 b->submit_time_us = local_clock_us(); 140 b->submit_time_us = local_clock_us();
279 closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0)); 141 closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0));
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index ecdaa671bd50..18039affc306 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -44,17 +44,17 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
44 44
45 closure_init_stack(&cl); 45 closure_init_stack(&cl);
46 46
47 pr_debug("reading %llu", (uint64_t) bucket); 47 pr_debug("reading %u", bucket_index);
48 48
49 while (offset < ca->sb.bucket_size) { 49 while (offset < ca->sb.bucket_size) {
50reread: left = ca->sb.bucket_size - offset; 50reread: left = ca->sb.bucket_size - offset;
51 len = min_t(unsigned, left, PAGE_SECTORS * 8); 51 len = min_t(unsigned, left, PAGE_SECTORS << JSET_BITS);
52 52
53 bio_reset(bio); 53 bio_reset(bio);
54 bio->bi_sector = bucket + offset; 54 bio->bi_iter.bi_sector = bucket + offset;
55 bio->bi_bdev = ca->bdev; 55 bio->bi_bdev = ca->bdev;
56 bio->bi_rw = READ; 56 bio->bi_rw = READ;
57 bio->bi_size = len << 9; 57 bio->bi_iter.bi_size = len << 9;
58 58
59 bio->bi_end_io = journal_read_endio; 59 bio->bi_end_io = journal_read_endio;
60 bio->bi_private = &cl; 60 bio->bi_private = &cl;
@@ -74,19 +74,28 @@ reread: left = ca->sb.bucket_size - offset;
74 struct list_head *where; 74 struct list_head *where;
75 size_t blocks, bytes = set_bytes(j); 75 size_t blocks, bytes = set_bytes(j);
76 76
77 if (j->magic != jset_magic(&ca->sb)) 77 if (j->magic != jset_magic(&ca->sb)) {
78 pr_debug("%u: bad magic", bucket_index);
78 return ret; 79 return ret;
80 }
79 81
80 if (bytes > left << 9) 82 if (bytes > left << 9 ||
83 bytes > PAGE_SIZE << JSET_BITS) {
84 pr_info("%u: too big, %zu bytes, offset %u",
85 bucket_index, bytes, offset);
81 return ret; 86 return ret;
87 }
82 88
83 if (bytes > len << 9) 89 if (bytes > len << 9)
84 goto reread; 90 goto reread;
85 91
86 if (j->csum != csum_set(j)) 92 if (j->csum != csum_set(j)) {
93 pr_info("%u: bad csum, %zu bytes, offset %u",
94 bucket_index, bytes, offset);
87 return ret; 95 return ret;
96 }
88 97
89 blocks = set_blocks(j, ca->set); 98 blocks = set_blocks(j, block_bytes(ca->set));
90 99
91 while (!list_empty(list)) { 100 while (!list_empty(list)) {
92 i = list_first_entry(list, 101 i = list_first_entry(list,
@@ -275,7 +284,7 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
275 } 284 }
276 285
277 for (k = i->j.start; 286 for (k = i->j.start;
278 k < end(&i->j); 287 k < bset_bkey_last(&i->j);
279 k = bkey_next(k)) { 288 k = bkey_next(k)) {
280 unsigned j; 289 unsigned j;
281 290
@@ -313,7 +322,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
313 n, i->j.seq - 1, start, end); 322 n, i->j.seq - 1, start, end);
314 323
315 for (k = i->j.start; 324 for (k = i->j.start;
316 k < end(&i->j); 325 k < bset_bkey_last(&i->j);
317 k = bkey_next(k)) { 326 k = bkey_next(k)) {
318 trace_bcache_journal_replay_key(k); 327 trace_bcache_journal_replay_key(k);
319 328
@@ -437,13 +446,13 @@ static void do_journal_discard(struct cache *ca)
437 atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT); 446 atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
438 447
439 bio_init(bio); 448 bio_init(bio);
440 bio->bi_sector = bucket_to_sector(ca->set, 449 bio->bi_iter.bi_sector = bucket_to_sector(ca->set,
441 ca->sb.d[ja->discard_idx]); 450 ca->sb.d[ja->discard_idx]);
442 bio->bi_bdev = ca->bdev; 451 bio->bi_bdev = ca->bdev;
443 bio->bi_rw = REQ_WRITE|REQ_DISCARD; 452 bio->bi_rw = REQ_WRITE|REQ_DISCARD;
444 bio->bi_max_vecs = 1; 453 bio->bi_max_vecs = 1;
445 bio->bi_io_vec = bio->bi_inline_vecs; 454 bio->bi_io_vec = bio->bi_inline_vecs;
446 bio->bi_size = bucket_bytes(ca); 455 bio->bi_iter.bi_size = bucket_bytes(ca);
447 bio->bi_end_io = journal_discard_endio; 456 bio->bi_end_io = journal_discard_endio;
448 457
449 closure_get(&ca->set->cl); 458 closure_get(&ca->set->cl);
@@ -555,6 +564,14 @@ static void journal_write_done(struct closure *cl)
555 continue_at_nobarrier(cl, journal_write, system_wq); 564 continue_at_nobarrier(cl, journal_write, system_wq);
556} 565}
557 566
567static void journal_write_unlock(struct closure *cl)
568{
569 struct cache_set *c = container_of(cl, struct cache_set, journal.io);
570
571 c->journal.io_in_flight = 0;
572 spin_unlock(&c->journal.lock);
573}
574
558static void journal_write_unlocked(struct closure *cl) 575static void journal_write_unlocked(struct closure *cl)
559 __releases(c->journal.lock) 576 __releases(c->journal.lock)
560{ 577{
@@ -562,22 +579,15 @@ static void journal_write_unlocked(struct closure *cl)
562 struct cache *ca; 579 struct cache *ca;
563 struct journal_write *w = c->journal.cur; 580 struct journal_write *w = c->journal.cur;
564 struct bkey *k = &c->journal.key; 581 struct bkey *k = &c->journal.key;
565 unsigned i, sectors = set_blocks(w->data, c) * c->sb.block_size; 582 unsigned i, sectors = set_blocks(w->data, block_bytes(c)) *
583 c->sb.block_size;
566 584
567 struct bio *bio; 585 struct bio *bio;
568 struct bio_list list; 586 struct bio_list list;
569 bio_list_init(&list); 587 bio_list_init(&list);
570 588
571 if (!w->need_write) { 589 if (!w->need_write) {
572 /* 590 closure_return_with_destructor(cl, journal_write_unlock);
573 * XXX: have to unlock closure before we unlock journal lock,
574 * else we race with bch_journal(). But this way we race
575 * against cache set unregister. Doh.
576 */
577 set_closure_fn(cl, NULL, NULL);
578 closure_sub(cl, CLOSURE_RUNNING + 1);
579 spin_unlock(&c->journal.lock);
580 return;
581 } else if (journal_full(&c->journal)) { 591 } else if (journal_full(&c->journal)) {
582 journal_reclaim(c); 592 journal_reclaim(c);
583 spin_unlock(&c->journal.lock); 593 spin_unlock(&c->journal.lock);
@@ -586,7 +596,7 @@ static void journal_write_unlocked(struct closure *cl)
586 continue_at(cl, journal_write, system_wq); 596 continue_at(cl, journal_write, system_wq);
587 } 597 }
588 598
589 c->journal.blocks_free -= set_blocks(w->data, c); 599 c->journal.blocks_free -= set_blocks(w->data, block_bytes(c));
590 600
591 w->data->btree_level = c->root->level; 601 w->data->btree_level = c->root->level;
592 602
@@ -608,10 +618,10 @@ static void journal_write_unlocked(struct closure *cl)
608 atomic_long_add(sectors, &ca->meta_sectors_written); 618 atomic_long_add(sectors, &ca->meta_sectors_written);
609 619
610 bio_reset(bio); 620 bio_reset(bio);
611 bio->bi_sector = PTR_OFFSET(k, i); 621 bio->bi_iter.bi_sector = PTR_OFFSET(k, i);
612 bio->bi_bdev = ca->bdev; 622 bio->bi_bdev = ca->bdev;
613 bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA; 623 bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA;
614 bio->bi_size = sectors << 9; 624 bio->bi_iter.bi_size = sectors << 9;
615 625
616 bio->bi_end_io = journal_write_endio; 626 bio->bi_end_io = journal_write_endio;
617 bio->bi_private = w; 627 bio->bi_private = w;
@@ -653,10 +663,12 @@ static void journal_try_write(struct cache_set *c)
653 663
654 w->need_write = true; 664 w->need_write = true;
655 665
656 if (closure_trylock(cl, &c->cl)) 666 if (!c->journal.io_in_flight) {
657 journal_write_unlocked(cl); 667 c->journal.io_in_flight = 1;
658 else 668 closure_call(cl, journal_write_unlocked, NULL, &c->cl);
669 } else {
659 spin_unlock(&c->journal.lock); 670 spin_unlock(&c->journal.lock);
671 }
660} 672}
661 673
662static struct journal_write *journal_wait_for_write(struct cache_set *c, 674static struct journal_write *journal_wait_for_write(struct cache_set *c,
@@ -664,6 +676,7 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
664{ 676{
665 size_t sectors; 677 size_t sectors;
666 struct closure cl; 678 struct closure cl;
679 bool wait = false;
667 680
668 closure_init_stack(&cl); 681 closure_init_stack(&cl);
669 682
@@ -673,16 +686,19 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
673 struct journal_write *w = c->journal.cur; 686 struct journal_write *w = c->journal.cur;
674 687
675 sectors = __set_blocks(w->data, w->data->keys + nkeys, 688 sectors = __set_blocks(w->data, w->data->keys + nkeys,
676 c) * c->sb.block_size; 689 block_bytes(c)) * c->sb.block_size;
677 690
678 if (sectors <= min_t(size_t, 691 if (sectors <= min_t(size_t,
679 c->journal.blocks_free * c->sb.block_size, 692 c->journal.blocks_free * c->sb.block_size,
680 PAGE_SECTORS << JSET_BITS)) 693 PAGE_SECTORS << JSET_BITS))
681 return w; 694 return w;
682 695
683 /* XXX: tracepoint */ 696 if (wait)
697 closure_wait(&c->journal.wait, &cl);
698
684 if (!journal_full(&c->journal)) { 699 if (!journal_full(&c->journal)) {
685 trace_bcache_journal_entry_full(c); 700 if (wait)
701 trace_bcache_journal_entry_full(c);
686 702
687 /* 703 /*
688 * XXX: If we were inserting so many keys that they 704 * XXX: If we were inserting so many keys that they
@@ -692,12 +708,11 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
692 */ 708 */
693 BUG_ON(!w->data->keys); 709 BUG_ON(!w->data->keys);
694 710
695 closure_wait(&w->wait, &cl);
696 journal_try_write(c); /* unlocks */ 711 journal_try_write(c); /* unlocks */
697 } else { 712 } else {
698 trace_bcache_journal_full(c); 713 if (wait)
714 trace_bcache_journal_full(c);
699 715
700 closure_wait(&c->journal.wait, &cl);
701 journal_reclaim(c); 716 journal_reclaim(c);
702 spin_unlock(&c->journal.lock); 717 spin_unlock(&c->journal.lock);
703 718
@@ -706,6 +721,7 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
706 721
707 closure_sync(&cl); 722 closure_sync(&cl);
708 spin_lock(&c->journal.lock); 723 spin_lock(&c->journal.lock);
724 wait = true;
709 } 725 }
710} 726}
711 727
@@ -736,7 +752,7 @@ atomic_t *bch_journal(struct cache_set *c,
736 752
737 w = journal_wait_for_write(c, bch_keylist_nkeys(keys)); 753 w = journal_wait_for_write(c, bch_keylist_nkeys(keys));
738 754
739 memcpy(end(w->data), keys->keys, bch_keylist_bytes(keys)); 755 memcpy(bset_bkey_last(w->data), keys->keys, bch_keylist_bytes(keys));
740 w->data->keys += bch_keylist_nkeys(keys); 756 w->data->keys += bch_keylist_nkeys(keys);
741 757
742 ret = &fifo_back(&c->journal.pin); 758 ret = &fifo_back(&c->journal.pin);
@@ -780,7 +796,6 @@ int bch_journal_alloc(struct cache_set *c)
780{ 796{
781 struct journal *j = &c->journal; 797 struct journal *j = &c->journal;
782 798
783 closure_init_unlocked(&j->io);
784 spin_lock_init(&j->lock); 799 spin_lock_init(&j->lock);
785 INIT_DELAYED_WORK(&j->work, journal_write_work); 800 INIT_DELAYED_WORK(&j->work, journal_write_work);
786 801
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index a6472fda94b2..9180c4465075 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -104,6 +104,7 @@ struct journal {
104 /* used when waiting because the journal was full */ 104 /* used when waiting because the journal was full */
105 struct closure_waitlist wait; 105 struct closure_waitlist wait;
106 struct closure io; 106 struct closure io;
107 int io_in_flight;
107 struct delayed_work work; 108 struct delayed_work work;
108 109
109 /* Number of blocks free in the bucket(s) we're currently writing to */ 110 /* Number of blocks free in the bucket(s) we're currently writing to */
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index f2f0998c4a91..9eb60d102de8 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -86,7 +86,7 @@ static void moving_init(struct moving_io *io)
86 bio_get(bio); 86 bio_get(bio);
87 bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); 87 bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
88 88
89 bio->bi_size = KEY_SIZE(&io->w->key) << 9; 89 bio->bi_iter.bi_size = KEY_SIZE(&io->w->key) << 9;
90 bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key), 90 bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key),
91 PAGE_SECTORS); 91 PAGE_SECTORS);
92 bio->bi_private = &io->cl; 92 bio->bi_private = &io->cl;
@@ -102,7 +102,7 @@ static void write_moving(struct closure *cl)
102 if (!op->error) { 102 if (!op->error) {
103 moving_init(io); 103 moving_init(io);
104 104
105 io->bio.bio.bi_sector = KEY_START(&io->w->key); 105 io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
106 op->write_prio = 1; 106 op->write_prio = 1;
107 op->bio = &io->bio.bio; 107 op->bio = &io->bio.bio;
108 108
@@ -211,7 +211,7 @@ void bch_moving_gc(struct cache_set *c)
211 for_each_cache(ca, c, i) { 211 for_each_cache(ca, c, i) {
212 unsigned sectors_to_move = 0; 212 unsigned sectors_to_move = 0;
213 unsigned reserve_sectors = ca->sb.bucket_size * 213 unsigned reserve_sectors = ca->sb.bucket_size *
214 min(fifo_used(&ca->free), ca->free.size / 2); 214 fifo_used(&ca->free[RESERVE_MOVINGGC]);
215 215
216 ca->heap.used = 0; 216 ca->heap.used = 0;
217 217
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 61bcfc21d2a0..72cd213f213f 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -197,14 +197,14 @@ static bool verify(struct cached_dev *dc, struct bio *bio)
197 197
198static void bio_csum(struct bio *bio, struct bkey *k) 198static void bio_csum(struct bio *bio, struct bkey *k)
199{ 199{
200 struct bio_vec *bv; 200 struct bio_vec bv;
201 struct bvec_iter iter;
201 uint64_t csum = 0; 202 uint64_t csum = 0;
202 int i;
203 203
204 bio_for_each_segment(bv, bio, i) { 204 bio_for_each_segment(bv, bio, iter) {
205 void *d = kmap(bv->bv_page) + bv->bv_offset; 205 void *d = kmap(bv.bv_page) + bv.bv_offset;
206 csum = bch_crc64_update(csum, d, bv->bv_len); 206 csum = bch_crc64_update(csum, d, bv.bv_len);
207 kunmap(bv->bv_page); 207 kunmap(bv.bv_page);
208 } 208 }
209 209
210 k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1); 210 k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1);
@@ -254,26 +254,44 @@ static void bch_data_insert_keys(struct closure *cl)
254 closure_return(cl); 254 closure_return(cl);
255} 255}
256 256
257static int bch_keylist_realloc(struct keylist *l, unsigned u64s,
258 struct cache_set *c)
259{
260 size_t oldsize = bch_keylist_nkeys(l);
261 size_t newsize = oldsize + u64s;
262
263 /*
264 * The journalling code doesn't handle the case where the keys to insert
265 * is bigger than an empty write: If we just return -ENOMEM here,
266 * bio_insert() and bio_invalidate() will insert the keys created so far
267 * and finish the rest when the keylist is empty.
268 */
269 if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset))
270 return -ENOMEM;
271
272 return __bch_keylist_realloc(l, u64s);
273}
274
257static void bch_data_invalidate(struct closure *cl) 275static void bch_data_invalidate(struct closure *cl)
258{ 276{
259 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); 277 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
260 struct bio *bio = op->bio; 278 struct bio *bio = op->bio;
261 279
262 pr_debug("invalidating %i sectors from %llu", 280 pr_debug("invalidating %i sectors from %llu",
263 bio_sectors(bio), (uint64_t) bio->bi_sector); 281 bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
264 282
265 while (bio_sectors(bio)) { 283 while (bio_sectors(bio)) {
266 unsigned sectors = min(bio_sectors(bio), 284 unsigned sectors = min(bio_sectors(bio),
267 1U << (KEY_SIZE_BITS - 1)); 285 1U << (KEY_SIZE_BITS - 1));
268 286
269 if (bch_keylist_realloc(&op->insert_keys, 0, op->c)) 287 if (bch_keylist_realloc(&op->insert_keys, 2, op->c))
270 goto out; 288 goto out;
271 289
272 bio->bi_sector += sectors; 290 bio->bi_iter.bi_sector += sectors;
273 bio->bi_size -= sectors << 9; 291 bio->bi_iter.bi_size -= sectors << 9;
274 292
275 bch_keylist_add(&op->insert_keys, 293 bch_keylist_add(&op->insert_keys,
276 &KEY(op->inode, bio->bi_sector, sectors)); 294 &KEY(op->inode, bio->bi_iter.bi_sector, sectors));
277 } 295 }
278 296
279 op->insert_data_done = true; 297 op->insert_data_done = true;
@@ -356,21 +374,21 @@ static void bch_data_insert_start(struct closure *cl)
356 374
357 /* 1 for the device pointer and 1 for the chksum */ 375 /* 1 for the device pointer and 1 for the chksum */
358 if (bch_keylist_realloc(&op->insert_keys, 376 if (bch_keylist_realloc(&op->insert_keys,
359 1 + (op->csum ? 1 : 0), 377 3 + (op->csum ? 1 : 0),
360 op->c)) 378 op->c))
361 continue_at(cl, bch_data_insert_keys, bcache_wq); 379 continue_at(cl, bch_data_insert_keys, bcache_wq);
362 380
363 k = op->insert_keys.top; 381 k = op->insert_keys.top;
364 bkey_init(k); 382 bkey_init(k);
365 SET_KEY_INODE(k, op->inode); 383 SET_KEY_INODE(k, op->inode);
366 SET_KEY_OFFSET(k, bio->bi_sector); 384 SET_KEY_OFFSET(k, bio->bi_iter.bi_sector);
367 385
368 if (!bch_alloc_sectors(op->c, k, bio_sectors(bio), 386 if (!bch_alloc_sectors(op->c, k, bio_sectors(bio),
369 op->write_point, op->write_prio, 387 op->write_point, op->write_prio,
370 op->writeback)) 388 op->writeback))
371 goto err; 389 goto err;
372 390
373 n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); 391 n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split);
374 392
375 n->bi_end_io = bch_data_insert_endio; 393 n->bi_end_io = bch_data_insert_endio;
376 n->bi_private = cl; 394 n->bi_private = cl;
@@ -521,7 +539,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
521 (bio->bi_rw & REQ_WRITE))) 539 (bio->bi_rw & REQ_WRITE)))
522 goto skip; 540 goto skip;
523 541
524 if (bio->bi_sector & (c->sb.block_size - 1) || 542 if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
525 bio_sectors(bio) & (c->sb.block_size - 1)) { 543 bio_sectors(bio) & (c->sb.block_size - 1)) {
526 pr_debug("skipping unaligned io"); 544 pr_debug("skipping unaligned io");
527 goto skip; 545 goto skip;
@@ -545,8 +563,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
545 563
546 spin_lock(&dc->io_lock); 564 spin_lock(&dc->io_lock);
547 565
548 hlist_for_each_entry(i, iohash(dc, bio->bi_sector), hash) 566 hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
549 if (i->last == bio->bi_sector && 567 if (i->last == bio->bi_iter.bi_sector &&
550 time_before(jiffies, i->jiffies)) 568 time_before(jiffies, i->jiffies))
551 goto found; 569 goto found;
552 570
@@ -555,8 +573,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
555 add_sequential(task); 573 add_sequential(task);
556 i->sequential = 0; 574 i->sequential = 0;
557found: 575found:
558 if (i->sequential + bio->bi_size > i->sequential) 576 if (i->sequential + bio->bi_iter.bi_size > i->sequential)
559 i->sequential += bio->bi_size; 577 i->sequential += bio->bi_iter.bi_size;
560 578
561 i->last = bio_end_sector(bio); 579 i->last = bio_end_sector(bio);
562 i->jiffies = jiffies + msecs_to_jiffies(5000); 580 i->jiffies = jiffies + msecs_to_jiffies(5000);
@@ -596,16 +614,13 @@ struct search {
596 /* Stack frame for bio_complete */ 614 /* Stack frame for bio_complete */
597 struct closure cl; 615 struct closure cl;
598 616
599 struct bcache_device *d;
600
601 struct bbio bio; 617 struct bbio bio;
602 struct bio *orig_bio; 618 struct bio *orig_bio;
603 struct bio *cache_miss; 619 struct bio *cache_miss;
620 struct bcache_device *d;
604 621
605 unsigned insert_bio_sectors; 622 unsigned insert_bio_sectors;
606
607 unsigned recoverable:1; 623 unsigned recoverable:1;
608 unsigned unaligned_bvec:1;
609 unsigned write:1; 624 unsigned write:1;
610 unsigned read_dirty_data:1; 625 unsigned read_dirty_data:1;
611 626
@@ -630,7 +645,8 @@ static void bch_cache_read_endio(struct bio *bio, int error)
630 645
631 if (error) 646 if (error)
632 s->iop.error = error; 647 s->iop.error = error;
633 else if (ptr_stale(s->iop.c, &b->key, 0)) { 648 else if (!KEY_DIRTY(&b->key) &&
649 ptr_stale(s->iop.c, &b->key, 0)) {
634 atomic_long_inc(&s->iop.c->cache_read_races); 650 atomic_long_inc(&s->iop.c->cache_read_races);
635 s->iop.error = -EINTR; 651 s->iop.error = -EINTR;
636 } 652 }
@@ -649,15 +665,15 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
649 struct bkey *bio_key; 665 struct bkey *bio_key;
650 unsigned ptr; 666 unsigned ptr;
651 667
652 if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_sector, 0)) <= 0) 668 if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0)
653 return MAP_CONTINUE; 669 return MAP_CONTINUE;
654 670
655 if (KEY_INODE(k) != s->iop.inode || 671 if (KEY_INODE(k) != s->iop.inode ||
656 KEY_START(k) > bio->bi_sector) { 672 KEY_START(k) > bio->bi_iter.bi_sector) {
657 unsigned bio_sectors = bio_sectors(bio); 673 unsigned bio_sectors = bio_sectors(bio);
658 unsigned sectors = KEY_INODE(k) == s->iop.inode 674 unsigned sectors = KEY_INODE(k) == s->iop.inode
659 ? min_t(uint64_t, INT_MAX, 675 ? min_t(uint64_t, INT_MAX,
660 KEY_START(k) - bio->bi_sector) 676 KEY_START(k) - bio->bi_iter.bi_sector)
661 : INT_MAX; 677 : INT_MAX;
662 678
663 int ret = s->d->cache_miss(b, s, bio, sectors); 679 int ret = s->d->cache_miss(b, s, bio, sectors);
@@ -679,14 +695,14 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
679 if (KEY_DIRTY(k)) 695 if (KEY_DIRTY(k))
680 s->read_dirty_data = true; 696 s->read_dirty_data = true;
681 697
682 n = bch_bio_split(bio, min_t(uint64_t, INT_MAX, 698 n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
683 KEY_OFFSET(k) - bio->bi_sector), 699 KEY_OFFSET(k) - bio->bi_iter.bi_sector),
684 GFP_NOIO, s->d->bio_split); 700 GFP_NOIO, s->d->bio_split);
685 701
686 bio_key = &container_of(n, struct bbio, bio)->key; 702 bio_key = &container_of(n, struct bbio, bio)->key;
687 bch_bkey_copy_single_ptr(bio_key, k, ptr); 703 bch_bkey_copy_single_ptr(bio_key, k, ptr);
688 704
689 bch_cut_front(&KEY(s->iop.inode, n->bi_sector, 0), bio_key); 705 bch_cut_front(&KEY(s->iop.inode, n->bi_iter.bi_sector, 0), bio_key);
690 bch_cut_back(&KEY(s->iop.inode, bio_end_sector(n), 0), bio_key); 706 bch_cut_back(&KEY(s->iop.inode, bio_end_sector(n), 0), bio_key);
691 707
692 n->bi_end_io = bch_cache_read_endio; 708 n->bi_end_io = bch_cache_read_endio;
@@ -711,10 +727,13 @@ static void cache_lookup(struct closure *cl)
711{ 727{
712 struct search *s = container_of(cl, struct search, iop.cl); 728 struct search *s = container_of(cl, struct search, iop.cl);
713 struct bio *bio = &s->bio.bio; 729 struct bio *bio = &s->bio.bio;
730 int ret;
714 731
715 int ret = bch_btree_map_keys(&s->op, s->iop.c, 732 bch_btree_op_init(&s->op, -1);
716 &KEY(s->iop.inode, bio->bi_sector, 0), 733
717 cache_lookup_fn, MAP_END_KEY); 734 ret = bch_btree_map_keys(&s->op, s->iop.c,
735 &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0),
736 cache_lookup_fn, MAP_END_KEY);
718 if (ret == -EAGAIN) 737 if (ret == -EAGAIN)
719 continue_at(cl, cache_lookup, bcache_wq); 738 continue_at(cl, cache_lookup, bcache_wq);
720 739
@@ -755,13 +774,15 @@ static void bio_complete(struct search *s)
755 } 774 }
756} 775}
757 776
758static void do_bio_hook(struct search *s) 777static void do_bio_hook(struct search *s, struct bio *orig_bio)
759{ 778{
760 struct bio *bio = &s->bio.bio; 779 struct bio *bio = &s->bio.bio;
761 memcpy(bio, s->orig_bio, sizeof(struct bio));
762 780
781 bio_init(bio);
782 __bio_clone_fast(bio, orig_bio);
763 bio->bi_end_io = request_endio; 783 bio->bi_end_io = request_endio;
764 bio->bi_private = &s->cl; 784 bio->bi_private = &s->cl;
785
765 atomic_set(&bio->bi_cnt, 3); 786 atomic_set(&bio->bi_cnt, 3);
766} 787}
767 788
@@ -773,43 +794,36 @@ static void search_free(struct closure *cl)
773 if (s->iop.bio) 794 if (s->iop.bio)
774 bio_put(s->iop.bio); 795 bio_put(s->iop.bio);
775 796
776 if (s->unaligned_bvec)
777 mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec);
778
779 closure_debug_destroy(cl); 797 closure_debug_destroy(cl);
780 mempool_free(s, s->d->c->search); 798 mempool_free(s, s->d->c->search);
781} 799}
782 800
783static struct search *search_alloc(struct bio *bio, struct bcache_device *d) 801static inline struct search *search_alloc(struct bio *bio,
802 struct bcache_device *d)
784{ 803{
785 struct search *s; 804 struct search *s;
786 struct bio_vec *bv;
787 805
788 s = mempool_alloc(d->c->search, GFP_NOIO); 806 s = mempool_alloc(d->c->search, GFP_NOIO);
789 memset(s, 0, offsetof(struct search, iop.insert_keys));
790 807
791 __closure_init(&s->cl, NULL); 808 closure_init(&s->cl, NULL);
809 do_bio_hook(s, bio);
792 810
793 s->iop.inode = d->id;
794 s->iop.c = d->c;
795 s->d = d;
796 s->op.lock = -1;
797 s->iop.write_point = hash_long((unsigned long) current, 16);
798 s->orig_bio = bio; 811 s->orig_bio = bio;
799 s->write = (bio->bi_rw & REQ_WRITE) != 0; 812 s->cache_miss = NULL;
800 s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; 813 s->d = d;
801 s->recoverable = 1; 814 s->recoverable = 1;
815 s->write = (bio->bi_rw & REQ_WRITE) != 0;
816 s->read_dirty_data = 0;
802 s->start_time = jiffies; 817 s->start_time = jiffies;
803 do_bio_hook(s);
804 818
805 if (bio->bi_size != bio_segments(bio) * PAGE_SIZE) { 819 s->iop.c = d->c;
806 bv = mempool_alloc(d->unaligned_bvec, GFP_NOIO); 820 s->iop.bio = NULL;
807 memcpy(bv, bio_iovec(bio), 821 s->iop.inode = d->id;
808 sizeof(struct bio_vec) * bio_segments(bio)); 822 s->iop.write_point = hash_long((unsigned long) current, 16);
809 823 s->iop.write_prio = 0;
810 s->bio.bio.bi_io_vec = bv; 824 s->iop.error = 0;
811 s->unaligned_bvec = 1; 825 s->iop.flags = 0;
812 } 826 s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
813 827
814 return s; 828 return s;
815} 829}
@@ -849,26 +863,13 @@ static void cached_dev_read_error(struct closure *cl)
849{ 863{
850 struct search *s = container_of(cl, struct search, cl); 864 struct search *s = container_of(cl, struct search, cl);
851 struct bio *bio = &s->bio.bio; 865 struct bio *bio = &s->bio.bio;
852 struct bio_vec *bv;
853 int i;
854 866
855 if (s->recoverable) { 867 if (s->recoverable) {
856 /* Retry from the backing device: */ 868 /* Retry from the backing device: */
857 trace_bcache_read_retry(s->orig_bio); 869 trace_bcache_read_retry(s->orig_bio);
858 870
859 s->iop.error = 0; 871 s->iop.error = 0;
860 bv = s->bio.bio.bi_io_vec; 872 do_bio_hook(s, s->orig_bio);
861 do_bio_hook(s);
862 s->bio.bio.bi_io_vec = bv;
863
864 if (!s->unaligned_bvec)
865 bio_for_each_segment(bv, s->orig_bio, i)
866 bv->bv_offset = 0, bv->bv_len = PAGE_SIZE;
867 else
868 memcpy(s->bio.bio.bi_io_vec,
869 bio_iovec(s->orig_bio),
870 sizeof(struct bio_vec) *
871 bio_segments(s->orig_bio));
872 873
873 /* XXX: invalidate cache */ 874 /* XXX: invalidate cache */
874 875
@@ -893,9 +894,9 @@ static void cached_dev_read_done(struct closure *cl)
893 894
894 if (s->iop.bio) { 895 if (s->iop.bio) {
895 bio_reset(s->iop.bio); 896 bio_reset(s->iop.bio);
896 s->iop.bio->bi_sector = s->cache_miss->bi_sector; 897 s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector;
897 s->iop.bio->bi_bdev = s->cache_miss->bi_bdev; 898 s->iop.bio->bi_bdev = s->cache_miss->bi_bdev;
898 s->iop.bio->bi_size = s->insert_bio_sectors << 9; 899 s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
899 bch_bio_map(s->iop.bio, NULL); 900 bch_bio_map(s->iop.bio, NULL);
900 901
901 bio_copy_data(s->cache_miss, s->iop.bio); 902 bio_copy_data(s->cache_miss, s->iop.bio);
@@ -904,8 +905,7 @@ static void cached_dev_read_done(struct closure *cl)
904 s->cache_miss = NULL; 905 s->cache_miss = NULL;
905 } 906 }
906 907
907 if (verify(dc, &s->bio.bio) && s->recoverable && 908 if (verify(dc, &s->bio.bio) && s->recoverable && !s->read_dirty_data)
908 !s->unaligned_bvec && !s->read_dirty_data)
909 bch_data_verify(dc, s->orig_bio); 909 bch_data_verify(dc, s->orig_bio);
910 910
911 bio_complete(s); 911 bio_complete(s);
@@ -945,7 +945,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
945 struct bio *miss, *cache_bio; 945 struct bio *miss, *cache_bio;
946 946
947 if (s->cache_miss || s->iop.bypass) { 947 if (s->cache_miss || s->iop.bypass) {
948 miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); 948 miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
949 ret = miss == bio ? MAP_DONE : MAP_CONTINUE; 949 ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
950 goto out_submit; 950 goto out_submit;
951 } 951 }
@@ -959,7 +959,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
959 s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada); 959 s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
960 960
961 s->iop.replace_key = KEY(s->iop.inode, 961 s->iop.replace_key = KEY(s->iop.inode,
962 bio->bi_sector + s->insert_bio_sectors, 962 bio->bi_iter.bi_sector + s->insert_bio_sectors,
963 s->insert_bio_sectors); 963 s->insert_bio_sectors);
964 964
965 ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key); 965 ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key);
@@ -968,7 +968,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
968 968
969 s->iop.replace = true; 969 s->iop.replace = true;
970 970
971 miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); 971 miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
972 972
973 /* btree_search_recurse()'s btree iterator is no good anymore */ 973 /* btree_search_recurse()'s btree iterator is no good anymore */
974 ret = miss == bio ? MAP_DONE : -EINTR; 974 ret = miss == bio ? MAP_DONE : -EINTR;
@@ -979,9 +979,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
979 if (!cache_bio) 979 if (!cache_bio)
980 goto out_submit; 980 goto out_submit;
981 981
982 cache_bio->bi_sector = miss->bi_sector; 982 cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector;
983 cache_bio->bi_bdev = miss->bi_bdev; 983 cache_bio->bi_bdev = miss->bi_bdev;
984 cache_bio->bi_size = s->insert_bio_sectors << 9; 984 cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
985 985
986 cache_bio->bi_end_io = request_endio; 986 cache_bio->bi_end_io = request_endio;
987 cache_bio->bi_private = &s->cl; 987 cache_bio->bi_private = &s->cl;
@@ -1031,7 +1031,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
1031{ 1031{
1032 struct closure *cl = &s->cl; 1032 struct closure *cl = &s->cl;
1033 struct bio *bio = &s->bio.bio; 1033 struct bio *bio = &s->bio.bio;
1034 struct bkey start = KEY(dc->disk.id, bio->bi_sector, 0); 1034 struct bkey start = KEY(dc->disk.id, bio->bi_iter.bi_sector, 0);
1035 struct bkey end = KEY(dc->disk.id, bio_end_sector(bio), 0); 1035 struct bkey end = KEY(dc->disk.id, bio_end_sector(bio), 0);
1036 1036
1037 bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &start, &end); 1037 bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &start, &end);
@@ -1087,8 +1087,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
1087 closure_bio_submit(flush, cl, s->d); 1087 closure_bio_submit(flush, cl, s->d);
1088 } 1088 }
1089 } else { 1089 } else {
1090 s->iop.bio = bio_clone_bioset(bio, GFP_NOIO, 1090 s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
1091 dc->disk.bio_split);
1092 1091
1093 closure_bio_submit(bio, cl, s->d); 1092 closure_bio_submit(bio, cl, s->d);
1094 } 1093 }
@@ -1126,13 +1125,13 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio)
1126 part_stat_unlock(); 1125 part_stat_unlock();
1127 1126
1128 bio->bi_bdev = dc->bdev; 1127 bio->bi_bdev = dc->bdev;
1129 bio->bi_sector += dc->sb.data_offset; 1128 bio->bi_iter.bi_sector += dc->sb.data_offset;
1130 1129
1131 if (cached_dev_get(dc)) { 1130 if (cached_dev_get(dc)) {
1132 s = search_alloc(bio, d); 1131 s = search_alloc(bio, d);
1133 trace_bcache_request_start(s->d, bio); 1132 trace_bcache_request_start(s->d, bio);
1134 1133
1135 if (!bio->bi_size) { 1134 if (!bio->bi_iter.bi_size) {
1136 /* 1135 /*
1137 * can't call bch_journal_meta from under 1136 * can't call bch_journal_meta from under
1138 * generic_make_request 1137 * generic_make_request
@@ -1204,24 +1203,24 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
1204static int flash_dev_cache_miss(struct btree *b, struct search *s, 1203static int flash_dev_cache_miss(struct btree *b, struct search *s,
1205 struct bio *bio, unsigned sectors) 1204 struct bio *bio, unsigned sectors)
1206{ 1205{
1207 struct bio_vec *bv; 1206 struct bio_vec bv;
1208 int i; 1207 struct bvec_iter iter;
1209 1208
1210 /* Zero fill bio */ 1209 /* Zero fill bio */
1211 1210
1212 bio_for_each_segment(bv, bio, i) { 1211 bio_for_each_segment(bv, bio, iter) {
1213 unsigned j = min(bv->bv_len >> 9, sectors); 1212 unsigned j = min(bv.bv_len >> 9, sectors);
1214 1213
1215 void *p = kmap(bv->bv_page); 1214 void *p = kmap(bv.bv_page);
1216 memset(p + bv->bv_offset, 0, j << 9); 1215 memset(p + bv.bv_offset, 0, j << 9);
1217 kunmap(bv->bv_page); 1216 kunmap(bv.bv_page);
1218 1217
1219 sectors -= j; 1218 sectors -= j;
1220 } 1219 }
1221 1220
1222 bio_advance(bio, min(sectors << 9, bio->bi_size)); 1221 bio_advance(bio, min(sectors << 9, bio->bi_iter.bi_size));
1223 1222
1224 if (!bio->bi_size) 1223 if (!bio->bi_iter.bi_size)
1225 return MAP_DONE; 1224 return MAP_DONE;
1226 1225
1227 return MAP_CONTINUE; 1226 return MAP_CONTINUE;
@@ -1255,7 +1254,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
1255 1254
1256 trace_bcache_request_start(s->d, bio); 1255 trace_bcache_request_start(s->d, bio);
1257 1256
1258 if (!bio->bi_size) { 1257 if (!bio->bi_iter.bi_size) {
1259 /* 1258 /*
1260 * can't call bch_journal_meta from under 1259 * can't call bch_journal_meta from under
1261 * generic_make_request 1260 * generic_make_request
@@ -1265,7 +1264,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
1265 bcache_wq); 1264 bcache_wq);
1266 } else if (rw) { 1265 } else if (rw) {
1267 bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, 1266 bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
1268 &KEY(d->id, bio->bi_sector, 0), 1267 &KEY(d->id, bio->bi_iter.bi_sector, 0),
1269 &KEY(d->id, bio_end_sector(bio), 0)); 1268 &KEY(d->id, bio_end_sector(bio), 0));
1270 1269
1271 s->iop.bypass = (bio->bi_rw & REQ_DISCARD) != 0; 1270 s->iop.bypass = (bio->bi_rw & REQ_DISCARD) != 0;
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 2cd65bf073c2..39f21dbedc38 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -13,17 +13,22 @@ struct data_insert_op {
13 uint16_t write_prio; 13 uint16_t write_prio;
14 short error; 14 short error;
15 15
16 unsigned bypass:1; 16 union {
17 unsigned writeback:1; 17 uint16_t flags;
18 unsigned flush_journal:1;
19 unsigned csum:1;
20 18
21 unsigned replace:1; 19 struct {
22 unsigned replace_collision:1; 20 unsigned bypass:1;
21 unsigned writeback:1;
22 unsigned flush_journal:1;
23 unsigned csum:1;
23 24
24 unsigned insert_data_done:1; 25 unsigned replace:1;
26 unsigned replace_collision:1;
27
28 unsigned insert_data_done:1;
29 };
30 };
25 31
26 /* Anything past this point won't get zeroed in search_alloc() */
27 struct keylist insert_keys; 32 struct keylist insert_keys;
28 BKEY_PADDED(replace_key); 33 BKEY_PADDED(replace_key);
29}; 34};
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index c57bfa071a57..24a3a1546caa 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -9,6 +9,7 @@
9#include "bcache.h" 9#include "bcache.h"
10#include "btree.h" 10#include "btree.h"
11#include "debug.h" 11#include "debug.h"
12#include "extents.h"
12#include "request.h" 13#include "request.h"
13#include "writeback.h" 14#include "writeback.h"
14 15
@@ -225,7 +226,7 @@ static void write_bdev_super_endio(struct bio *bio, int error)
225 struct cached_dev *dc = bio->bi_private; 226 struct cached_dev *dc = bio->bi_private;
226 /* XXX: error checking */ 227 /* XXX: error checking */
227 228
228 closure_put(&dc->sb_write.cl); 229 closure_put(&dc->sb_write);
229} 230}
230 231
231static void __write_super(struct cache_sb *sb, struct bio *bio) 232static void __write_super(struct cache_sb *sb, struct bio *bio)
@@ -233,9 +234,9 @@ static void __write_super(struct cache_sb *sb, struct bio *bio)
233 struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page); 234 struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page);
234 unsigned i; 235 unsigned i;
235 236
236 bio->bi_sector = SB_SECTOR; 237 bio->bi_iter.bi_sector = SB_SECTOR;
237 bio->bi_rw = REQ_SYNC|REQ_META; 238 bio->bi_rw = REQ_SYNC|REQ_META;
238 bio->bi_size = SB_SIZE; 239 bio->bi_iter.bi_size = SB_SIZE;
239 bch_bio_map(bio, NULL); 240 bch_bio_map(bio, NULL);
240 241
241 out->offset = cpu_to_le64(sb->offset); 242 out->offset = cpu_to_le64(sb->offset);
@@ -263,12 +264,20 @@ static void __write_super(struct cache_sb *sb, struct bio *bio)
263 submit_bio(REQ_WRITE, bio); 264 submit_bio(REQ_WRITE, bio);
264} 265}
265 266
267static void bch_write_bdev_super_unlock(struct closure *cl)
268{
269 struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
270
271 up(&dc->sb_write_mutex);
272}
273
266void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) 274void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
267{ 275{
268 struct closure *cl = &dc->sb_write.cl; 276 struct closure *cl = &dc->sb_write;
269 struct bio *bio = &dc->sb_bio; 277 struct bio *bio = &dc->sb_bio;
270 278
271 closure_lock(&dc->sb_write, parent); 279 down(&dc->sb_write_mutex);
280 closure_init(cl, parent);
272 281
273 bio_reset(bio); 282 bio_reset(bio);
274 bio->bi_bdev = dc->bdev; 283 bio->bi_bdev = dc->bdev;
@@ -278,7 +287,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
278 closure_get(cl); 287 closure_get(cl);
279 __write_super(&dc->sb, bio); 288 __write_super(&dc->sb, bio);
280 289
281 closure_return(cl); 290 closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
282} 291}
283 292
284static void write_super_endio(struct bio *bio, int error) 293static void write_super_endio(struct bio *bio, int error)
@@ -286,16 +295,24 @@ static void write_super_endio(struct bio *bio, int error)
286 struct cache *ca = bio->bi_private; 295 struct cache *ca = bio->bi_private;
287 296
288 bch_count_io_errors(ca, error, "writing superblock"); 297 bch_count_io_errors(ca, error, "writing superblock");
289 closure_put(&ca->set->sb_write.cl); 298 closure_put(&ca->set->sb_write);
299}
300
301static void bcache_write_super_unlock(struct closure *cl)
302{
303 struct cache_set *c = container_of(cl, struct cache_set, sb_write);
304
305 up(&c->sb_write_mutex);
290} 306}
291 307
292void bcache_write_super(struct cache_set *c) 308void bcache_write_super(struct cache_set *c)
293{ 309{
294 struct closure *cl = &c->sb_write.cl; 310 struct closure *cl = &c->sb_write;
295 struct cache *ca; 311 struct cache *ca;
296 unsigned i; 312 unsigned i;
297 313
298 closure_lock(&c->sb_write, &c->cl); 314 down(&c->sb_write_mutex);
315 closure_init(cl, &c->cl);
299 316
300 c->sb.seq++; 317 c->sb.seq++;
301 318
@@ -317,7 +334,7 @@ void bcache_write_super(struct cache_set *c)
317 __write_super(&ca->sb, bio); 334 __write_super(&ca->sb, bio);
318 } 335 }
319 336
320 closure_return(cl); 337 closure_return_with_destructor(cl, bcache_write_super_unlock);
321} 338}
322 339
323/* UUID io */ 340/* UUID io */
@@ -325,29 +342,37 @@ void bcache_write_super(struct cache_set *c)
325static void uuid_endio(struct bio *bio, int error) 342static void uuid_endio(struct bio *bio, int error)
326{ 343{
327 struct closure *cl = bio->bi_private; 344 struct closure *cl = bio->bi_private;
328 struct cache_set *c = container_of(cl, struct cache_set, uuid_write.cl); 345 struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
329 346
330 cache_set_err_on(error, c, "accessing uuids"); 347 cache_set_err_on(error, c, "accessing uuids");
331 bch_bbio_free(bio, c); 348 bch_bbio_free(bio, c);
332 closure_put(cl); 349 closure_put(cl);
333} 350}
334 351
352static void uuid_io_unlock(struct closure *cl)
353{
354 struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
355
356 up(&c->uuid_write_mutex);
357}
358
335static void uuid_io(struct cache_set *c, unsigned long rw, 359static void uuid_io(struct cache_set *c, unsigned long rw,
336 struct bkey *k, struct closure *parent) 360 struct bkey *k, struct closure *parent)
337{ 361{
338 struct closure *cl = &c->uuid_write.cl; 362 struct closure *cl = &c->uuid_write;
339 struct uuid_entry *u; 363 struct uuid_entry *u;
340 unsigned i; 364 unsigned i;
341 char buf[80]; 365 char buf[80];
342 366
343 BUG_ON(!parent); 367 BUG_ON(!parent);
344 closure_lock(&c->uuid_write, parent); 368 down(&c->uuid_write_mutex);
369 closure_init(cl, parent);
345 370
346 for (i = 0; i < KEY_PTRS(k); i++) { 371 for (i = 0; i < KEY_PTRS(k); i++) {
347 struct bio *bio = bch_bbio_alloc(c); 372 struct bio *bio = bch_bbio_alloc(c);
348 373
349 bio->bi_rw = REQ_SYNC|REQ_META|rw; 374 bio->bi_rw = REQ_SYNC|REQ_META|rw;
350 bio->bi_size = KEY_SIZE(k) << 9; 375 bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
351 376
352 bio->bi_end_io = uuid_endio; 377 bio->bi_end_io = uuid_endio;
353 bio->bi_private = cl; 378 bio->bi_private = cl;
@@ -359,7 +384,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
359 break; 384 break;
360 } 385 }
361 386
362 bch_bkey_to_text(buf, sizeof(buf), k); 387 bch_extent_to_text(buf, sizeof(buf), k);
363 pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf); 388 pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf);
364 389
365 for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) 390 for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
@@ -368,14 +393,14 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
368 u - c->uuids, u->uuid, u->label, 393 u - c->uuids, u->uuid, u->label,
369 u->first_reg, u->last_reg, u->invalidated); 394 u->first_reg, u->last_reg, u->invalidated);
370 395
371 closure_return(cl); 396 closure_return_with_destructor(cl, uuid_io_unlock);
372} 397}
373 398
374static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) 399static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
375{ 400{
376 struct bkey *k = &j->uuid_bucket; 401 struct bkey *k = &j->uuid_bucket;
377 402
378 if (bch_btree_ptr_invalid(c, k)) 403 if (__bch_btree_ptr_invalid(c, k))
379 return "bad uuid pointer"; 404 return "bad uuid pointer";
380 405
381 bkey_copy(&c->uuid_bucket, k); 406 bkey_copy(&c->uuid_bucket, k);
@@ -420,7 +445,7 @@ static int __uuid_write(struct cache_set *c)
420 445
421 lockdep_assert_held(&bch_register_lock); 446 lockdep_assert_held(&bch_register_lock);
422 447
423 if (bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, true)) 448 if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true))
424 return 1; 449 return 1;
425 450
426 SET_KEY_SIZE(&k.key, c->sb.bucket_size); 451 SET_KEY_SIZE(&k.key, c->sb.bucket_size);
@@ -503,10 +528,10 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
503 528
504 closure_init_stack(cl); 529 closure_init_stack(cl);
505 530
506 bio->bi_sector = bucket * ca->sb.bucket_size; 531 bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size;
507 bio->bi_bdev = ca->bdev; 532 bio->bi_bdev = ca->bdev;
508 bio->bi_rw = REQ_SYNC|REQ_META|rw; 533 bio->bi_rw = REQ_SYNC|REQ_META|rw;
509 bio->bi_size = bucket_bytes(ca); 534 bio->bi_iter.bi_size = bucket_bytes(ca);
510 535
511 bio->bi_end_io = prio_endio; 536 bio->bi_end_io = prio_endio;
512 bio->bi_private = ca; 537 bio->bi_private = ca;
@@ -538,8 +563,8 @@ void bch_prio_write(struct cache *ca)
538 atomic_long_add(ca->sb.bucket_size * prio_buckets(ca), 563 atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
539 &ca->meta_sectors_written); 564 &ca->meta_sectors_written);
540 565
541 pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), 566 //pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
542 fifo_used(&ca->free_inc), fifo_used(&ca->unused)); 567 // fifo_used(&ca->free_inc), fifo_used(&ca->unused));
543 568
544 for (i = prio_buckets(ca) - 1; i >= 0; --i) { 569 for (i = prio_buckets(ca) - 1; i >= 0; --i) {
545 long bucket; 570 long bucket;
@@ -558,7 +583,7 @@ void bch_prio_write(struct cache *ca)
558 p->magic = pset_magic(&ca->sb); 583 p->magic = pset_magic(&ca->sb);
559 p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8); 584 p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8);
560 585
561 bucket = bch_bucket_alloc(ca, WATERMARK_PRIO, true); 586 bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
562 BUG_ON(bucket == -1); 587 BUG_ON(bucket == -1);
563 588
564 mutex_unlock(&ca->set->bucket_lock); 589 mutex_unlock(&ca->set->bucket_lock);
@@ -739,8 +764,6 @@ static void bcache_device_free(struct bcache_device *d)
739 } 764 }
740 765
741 bio_split_pool_free(&d->bio_split_hook); 766 bio_split_pool_free(&d->bio_split_hook);
742 if (d->unaligned_bvec)
743 mempool_destroy(d->unaligned_bvec);
744 if (d->bio_split) 767 if (d->bio_split)
745 bioset_free(d->bio_split); 768 bioset_free(d->bio_split);
746 if (is_vmalloc_addr(d->full_dirty_stripes)) 769 if (is_vmalloc_addr(d->full_dirty_stripes))
@@ -793,8 +816,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
793 return minor; 816 return minor;
794 817
795 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || 818 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
796 !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
797 sizeof(struct bio_vec) * BIO_MAX_PAGES)) ||
798 bio_split_pool_init(&d->bio_split_hook) || 819 bio_split_pool_init(&d->bio_split_hook) ||
799 !(d->disk = alloc_disk(1))) { 820 !(d->disk = alloc_disk(1))) {
800 ida_simple_remove(&bcache_minor, minor); 821 ida_simple_remove(&bcache_minor, minor);
@@ -1102,7 +1123,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
1102 set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq); 1123 set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
1103 kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); 1124 kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
1104 INIT_WORK(&dc->detach, cached_dev_detach_finish); 1125 INIT_WORK(&dc->detach, cached_dev_detach_finish);
1105 closure_init_unlocked(&dc->sb_write); 1126 sema_init(&dc->sb_write_mutex, 1);
1106 INIT_LIST_HEAD(&dc->io_lru); 1127 INIT_LIST_HEAD(&dc->io_lru);
1107 spin_lock_init(&dc->io_lock); 1128 spin_lock_init(&dc->io_lock);
1108 bch_cache_accounting_init(&dc->accounting, &dc->disk.cl); 1129 bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
@@ -1114,6 +1135,12 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
1114 hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); 1135 hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
1115 } 1136 }
1116 1137
1138 dc->disk.stripe_size = q->limits.io_opt >> 9;
1139
1140 if (dc->disk.stripe_size)
1141 dc->partial_stripes_expensive =
1142 q->limits.raid_partial_stripes_expensive;
1143
1117 ret = bcache_device_init(&dc->disk, block_size, 1144 ret = bcache_device_init(&dc->disk, block_size,
1118 dc->bdev->bd_part->nr_sects - dc->sb.data_offset); 1145 dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
1119 if (ret) 1146 if (ret)
@@ -1325,8 +1352,8 @@ static void cache_set_free(struct closure *cl)
1325 if (ca) 1352 if (ca)
1326 kobject_put(&ca->kobj); 1353 kobject_put(&ca->kobj);
1327 1354
1355 bch_bset_sort_state_free(&c->sort);
1328 free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); 1356 free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
1329 free_pages((unsigned long) c->sort, ilog2(bucket_pages(c)));
1330 1357
1331 if (c->bio_split) 1358 if (c->bio_split)
1332 bioset_free(c->bio_split); 1359 bioset_free(c->bio_split);
@@ -1451,21 +1478,17 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
1451 c->block_bits = ilog2(sb->block_size); 1478 c->block_bits = ilog2(sb->block_size);
1452 c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); 1479 c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);
1453 1480
1454 c->btree_pages = c->sb.bucket_size / PAGE_SECTORS; 1481 c->btree_pages = bucket_pages(c);
1455 if (c->btree_pages > BTREE_MAX_PAGES) 1482 if (c->btree_pages > BTREE_MAX_PAGES)
1456 c->btree_pages = max_t(int, c->btree_pages / 4, 1483 c->btree_pages = max_t(int, c->btree_pages / 4,
1457 BTREE_MAX_PAGES); 1484 BTREE_MAX_PAGES);
1458 1485
1459 c->sort_crit_factor = int_sqrt(c->btree_pages); 1486 sema_init(&c->sb_write_mutex, 1);
1460
1461 closure_init_unlocked(&c->sb_write);
1462 mutex_init(&c->bucket_lock); 1487 mutex_init(&c->bucket_lock);
1463 init_waitqueue_head(&c->try_wait); 1488 init_waitqueue_head(&c->try_wait);
1464 init_waitqueue_head(&c->bucket_wait); 1489 init_waitqueue_head(&c->bucket_wait);
1465 closure_init_unlocked(&c->uuid_write); 1490 sema_init(&c->uuid_write_mutex, 1);
1466 mutex_init(&c->sort_lock);
1467 1491
1468 spin_lock_init(&c->sort_time.lock);
1469 spin_lock_init(&c->btree_gc_time.lock); 1492 spin_lock_init(&c->btree_gc_time.lock);
1470 spin_lock_init(&c->btree_split_time.lock); 1493 spin_lock_init(&c->btree_split_time.lock);
1471 spin_lock_init(&c->btree_read_time.lock); 1494 spin_lock_init(&c->btree_read_time.lock);
@@ -1493,11 +1516,11 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
1493 bucket_pages(c))) || 1516 bucket_pages(c))) ||
1494 !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || 1517 !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
1495 !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || 1518 !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
1496 !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) ||
1497 !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || 1519 !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
1498 bch_journal_alloc(c) || 1520 bch_journal_alloc(c) ||
1499 bch_btree_cache_alloc(c) || 1521 bch_btree_cache_alloc(c) ||
1500 bch_open_buckets_alloc(c)) 1522 bch_open_buckets_alloc(c) ||
1523 bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
1501 goto err; 1524 goto err;
1502 1525
1503 c->congested_read_threshold_us = 2000; 1526 c->congested_read_threshold_us = 2000;
@@ -1553,7 +1576,7 @@ static void run_cache_set(struct cache_set *c)
1553 k = &j->btree_root; 1576 k = &j->btree_root;
1554 1577
1555 err = "bad btree root"; 1578 err = "bad btree root";
1556 if (bch_btree_ptr_invalid(c, k)) 1579 if (__bch_btree_ptr_invalid(c, k))
1557 goto err; 1580 goto err;
1558 1581
1559 err = "error reading btree root"; 1582 err = "error reading btree root";
@@ -1747,6 +1770,7 @@ err:
1747void bch_cache_release(struct kobject *kobj) 1770void bch_cache_release(struct kobject *kobj)
1748{ 1771{
1749 struct cache *ca = container_of(kobj, struct cache, kobj); 1772 struct cache *ca = container_of(kobj, struct cache, kobj);
1773 unsigned i;
1750 1774
1751 if (ca->set) 1775 if (ca->set)
1752 ca->set->cache[ca->sb.nr_this_dev] = NULL; 1776 ca->set->cache[ca->sb.nr_this_dev] = NULL;
@@ -1760,7 +1784,9 @@ void bch_cache_release(struct kobject *kobj)
1760 free_heap(&ca->heap); 1784 free_heap(&ca->heap);
1761 free_fifo(&ca->unused); 1785 free_fifo(&ca->unused);
1762 free_fifo(&ca->free_inc); 1786 free_fifo(&ca->free_inc);
1763 free_fifo(&ca->free); 1787
1788 for (i = 0; i < RESERVE_NR; i++)
1789 free_fifo(&ca->free[i]);
1764 1790
1765 if (ca->sb_bio.bi_inline_vecs[0].bv_page) 1791 if (ca->sb_bio.bi_inline_vecs[0].bv_page)
1766 put_page(ca->sb_bio.bi_io_vec[0].bv_page); 1792 put_page(ca->sb_bio.bi_io_vec[0].bv_page);
@@ -1786,10 +1812,12 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
1786 ca->journal.bio.bi_max_vecs = 8; 1812 ca->journal.bio.bi_max_vecs = 8;
1787 ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs; 1813 ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs;
1788 1814
1789 free = roundup_pow_of_two(ca->sb.nbuckets) >> 9; 1815 free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
1790 free = max_t(size_t, free, (prio_buckets(ca) + 8) * 2);
1791 1816
1792 if (!init_fifo(&ca->free, free, GFP_KERNEL) || 1817 if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
1818 !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
1819 !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
1820 !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
1793 !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || 1821 !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
1794 !init_fifo(&ca->unused, free << 2, GFP_KERNEL) || 1822 !init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
1795 !init_heap(&ca->heap, free << 3, GFP_KERNEL) || 1823 !init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
@@ -2034,7 +2062,8 @@ static void bcache_exit(void)
2034 kobject_put(bcache_kobj); 2062 kobject_put(bcache_kobj);
2035 if (bcache_wq) 2063 if (bcache_wq)
2036 destroy_workqueue(bcache_wq); 2064 destroy_workqueue(bcache_wq);
2037 unregister_blkdev(bcache_major, "bcache"); 2065 if (bcache_major)
2066 unregister_blkdev(bcache_major, "bcache");
2038 unregister_reboot_notifier(&reboot); 2067 unregister_reboot_notifier(&reboot);
2039} 2068}
2040 2069
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index a1f85612f0b3..c6ab69333a6d 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -102,7 +102,6 @@ rw_attribute(bypass_torture_test);
102rw_attribute(key_merging_disabled); 102rw_attribute(key_merging_disabled);
103rw_attribute(gc_always_rewrite); 103rw_attribute(gc_always_rewrite);
104rw_attribute(expensive_debug_checks); 104rw_attribute(expensive_debug_checks);
105rw_attribute(freelist_percent);
106rw_attribute(cache_replacement_policy); 105rw_attribute(cache_replacement_policy);
107rw_attribute(btree_shrinker_disabled); 106rw_attribute(btree_shrinker_disabled);
108rw_attribute(copy_gc_enabled); 107rw_attribute(copy_gc_enabled);
@@ -401,6 +400,48 @@ static struct attribute *bch_flash_dev_files[] = {
401}; 400};
402KTYPE(bch_flash_dev); 401KTYPE(bch_flash_dev);
403 402
403struct bset_stats_op {
404 struct btree_op op;
405 size_t nodes;
406 struct bset_stats stats;
407};
408
409static int btree_bset_stats(struct btree_op *b_op, struct btree *b)
410{
411 struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op);
412
413 op->nodes++;
414 bch_btree_keys_stats(&b->keys, &op->stats);
415
416 return MAP_CONTINUE;
417}
418
419int bch_bset_print_stats(struct cache_set *c, char *buf)
420{
421 struct bset_stats_op op;
422 int ret;
423
424 memset(&op, 0, sizeof(op));
425 bch_btree_op_init(&op.op, -1);
426
427 ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, btree_bset_stats);
428 if (ret < 0)
429 return ret;
430
431 return snprintf(buf, PAGE_SIZE,
432 "btree nodes: %zu\n"
433 "written sets: %zu\n"
434 "unwritten sets: %zu\n"
435 "written key bytes: %zu\n"
436 "unwritten key bytes: %zu\n"
437 "floats: %zu\n"
438 "failed: %zu\n",
439 op.nodes,
440 op.stats.sets_written, op.stats.sets_unwritten,
441 op.stats.bytes_written, op.stats.bytes_unwritten,
442 op.stats.floats, op.stats.failed);
443}
444
404SHOW(__bch_cache_set) 445SHOW(__bch_cache_set)
405{ 446{
406 unsigned root_usage(struct cache_set *c) 447 unsigned root_usage(struct cache_set *c)
@@ -419,7 +460,7 @@ lock_root:
419 rw_lock(false, b, b->level); 460 rw_lock(false, b, b->level);
420 } while (b != c->root); 461 } while (b != c->root);
421 462
422 for_each_key_filter(b, k, &iter, bch_ptr_bad) 463 for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
423 bytes += bkey_bytes(k); 464 bytes += bkey_bytes(k);
424 465
425 rw_unlock(false, b); 466 rw_unlock(false, b);
@@ -434,7 +475,7 @@ lock_root:
434 475
435 mutex_lock(&c->bucket_lock); 476 mutex_lock(&c->bucket_lock);
436 list_for_each_entry(b, &c->btree_cache, list) 477 list_for_each_entry(b, &c->btree_cache, list)
437 ret += 1 << (b->page_order + PAGE_SHIFT); 478 ret += 1 << (b->keys.page_order + PAGE_SHIFT);
438 479
439 mutex_unlock(&c->bucket_lock); 480 mutex_unlock(&c->bucket_lock);
440 return ret; 481 return ret;
@@ -491,7 +532,7 @@ lock_root:
491 532
492 sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); 533 sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms);
493 sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); 534 sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
494 sysfs_print_time_stats(&c->sort_time, btree_sort, ms, us); 535 sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
495 sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); 536 sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
496 sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); 537 sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us);
497 538
@@ -711,9 +752,6 @@ SHOW(__bch_cache)
711 sysfs_print(io_errors, 752 sysfs_print(io_errors,
712 atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT); 753 atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);
713 754
714 sysfs_print(freelist_percent, ca->free.size * 100 /
715 ((size_t) ca->sb.nbuckets));
716
717 if (attr == &sysfs_cache_replacement_policy) 755 if (attr == &sysfs_cache_replacement_policy)
718 return bch_snprint_string_list(buf, PAGE_SIZE, 756 return bch_snprint_string_list(buf, PAGE_SIZE,
719 cache_replacement_policies, 757 cache_replacement_policies,
@@ -820,32 +858,6 @@ STORE(__bch_cache)
820 } 858 }
821 } 859 }
822 860
823 if (attr == &sysfs_freelist_percent) {
824 DECLARE_FIFO(long, free);
825 long i;
826 size_t p = strtoul_or_return(buf);
827
828 p = clamp_t(size_t,
829 ((size_t) ca->sb.nbuckets * p) / 100,
830 roundup_pow_of_two(ca->sb.nbuckets) >> 9,
831 ca->sb.nbuckets / 2);
832
833 if (!init_fifo_exact(&free, p, GFP_KERNEL))
834 return -ENOMEM;
835
836 mutex_lock(&ca->set->bucket_lock);
837
838 fifo_move(&free, &ca->free);
839 fifo_swap(&free, &ca->free);
840
841 mutex_unlock(&ca->set->bucket_lock);
842
843 while (fifo_pop(&free, i))
844 atomic_dec(&ca->buckets[i].pin);
845
846 free_fifo(&free);
847 }
848
849 if (attr == &sysfs_clear_stats) { 861 if (attr == &sysfs_clear_stats) {
850 atomic_long_set(&ca->sectors_written, 0); 862 atomic_long_set(&ca->sectors_written, 0);
851 atomic_long_set(&ca->btree_sectors_written, 0); 863 atomic_long_set(&ca->btree_sectors_written, 0);
@@ -869,7 +881,6 @@ static struct attribute *bch_cache_files[] = {
869 &sysfs_metadata_written, 881 &sysfs_metadata_written,
870 &sysfs_io_errors, 882 &sysfs_io_errors,
871 &sysfs_clear_stats, 883 &sysfs_clear_stats,
872 &sysfs_freelist_percent,
873 &sysfs_cache_replacement_policy, 884 &sysfs_cache_replacement_policy,
874 NULL 885 NULL
875}; 886};
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index bb37618e7664..db3ae4c2b223 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -224,10 +224,10 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
224 224
225void bch_bio_map(struct bio *bio, void *base) 225void bch_bio_map(struct bio *bio, void *base)
226{ 226{
227 size_t size = bio->bi_size; 227 size_t size = bio->bi_iter.bi_size;
228 struct bio_vec *bv = bio->bi_io_vec; 228 struct bio_vec *bv = bio->bi_io_vec;
229 229
230 BUG_ON(!bio->bi_size); 230 BUG_ON(!bio->bi_iter.bi_size);
231 BUG_ON(bio->bi_vcnt); 231 BUG_ON(bio->bi_vcnt);
232 232
233 bv->bv_offset = base ? ((unsigned long) base) % PAGE_SIZE : 0; 233 bv->bv_offset = base ? ((unsigned long) base) % PAGE_SIZE : 0;
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 1030c6020e98..ac7d0d1f70d7 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -2,6 +2,7 @@
2#ifndef _BCACHE_UTIL_H 2#ifndef _BCACHE_UTIL_H
3#define _BCACHE_UTIL_H 3#define _BCACHE_UTIL_H
4 4
5#include <linux/blkdev.h>
5#include <linux/errno.h> 6#include <linux/errno.h>
6#include <linux/kernel.h> 7#include <linux/kernel.h>
7#include <linux/llist.h> 8#include <linux/llist.h>
@@ -17,11 +18,13 @@ struct closure;
17 18
18#ifdef CONFIG_BCACHE_DEBUG 19#ifdef CONFIG_BCACHE_DEBUG
19 20
21#define EBUG_ON(cond) BUG_ON(cond)
20#define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0) 22#define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0)
21#define atomic_inc_bug(v, i) BUG_ON(atomic_inc_return(v) <= i) 23#define atomic_inc_bug(v, i) BUG_ON(atomic_inc_return(v) <= i)
22 24
23#else /* DEBUG */ 25#else /* DEBUG */
24 26
27#define EBUG_ON(cond) do { if (cond); } while (0)
25#define atomic_dec_bug(v) atomic_dec(v) 28#define atomic_dec_bug(v) atomic_dec(v)
26#define atomic_inc_bug(v, i) atomic_inc(v) 29#define atomic_inc_bug(v, i) atomic_inc(v)
27 30
@@ -391,6 +394,11 @@ struct time_stats {
391 394
392void bch_time_stats_update(struct time_stats *stats, uint64_t time); 395void bch_time_stats_update(struct time_stats *stats, uint64_t time);
393 396
397static inline unsigned local_clock_us(void)
398{
399 return local_clock() >> 10;
400}
401
394#define NSEC_PER_ns 1L 402#define NSEC_PER_ns 1L
395#define NSEC_PER_us NSEC_PER_USEC 403#define NSEC_PER_us NSEC_PER_USEC
396#define NSEC_PER_ms NSEC_PER_MSEC 404#define NSEC_PER_ms NSEC_PER_MSEC
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6c44fe059c27..f4300e4c0114 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -111,7 +111,7 @@ static void dirty_init(struct keybuf_key *w)
111 if (!io->dc->writeback_percent) 111 if (!io->dc->writeback_percent)
112 bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); 112 bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
113 113
114 bio->bi_size = KEY_SIZE(&w->key) << 9; 114 bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9;
115 bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS); 115 bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS);
116 bio->bi_private = w; 116 bio->bi_private = w;
117 bio->bi_io_vec = bio->bi_inline_vecs; 117 bio->bi_io_vec = bio->bi_inline_vecs;
@@ -184,7 +184,7 @@ static void write_dirty(struct closure *cl)
184 184
185 dirty_init(w); 185 dirty_init(w);
186 io->bio.bi_rw = WRITE; 186 io->bio.bi_rw = WRITE;
187 io->bio.bi_sector = KEY_START(&w->key); 187 io->bio.bi_iter.bi_sector = KEY_START(&w->key);
188 io->bio.bi_bdev = io->dc->bdev; 188 io->bio.bi_bdev = io->dc->bdev;
189 io->bio.bi_end_io = dirty_endio; 189 io->bio.bi_end_io = dirty_endio;
190 190
@@ -253,7 +253,7 @@ static void read_dirty(struct cached_dev *dc)
253 io->dc = dc; 253 io->dc = dc;
254 254
255 dirty_init(w); 255 dirty_init(w);
256 io->bio.bi_sector = PTR_OFFSET(&w->key, 0); 256 io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
257 io->bio.bi_bdev = PTR_CACHE(dc->disk.c, 257 io->bio.bi_bdev = PTR_CACHE(dc->disk.c,
258 &w->key, 0)->bdev; 258 &w->key, 0)->bdev;
259 io->bio.bi_rw = READ; 259 io->bio.bi_rw = READ;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index c9ddcf4614b9..e2f8598937ac 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -50,7 +50,7 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
50 return false; 50 return false;
51 51
52 if (dc->partial_stripes_expensive && 52 if (dc->partial_stripes_expensive &&
53 bcache_dev_stripe_dirty(dc, bio->bi_sector, 53 bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
54 bio_sectors(bio))) 54 bio_sectors(bio)))
55 return true; 55 return true;
56 56
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index 3a8cfa2645c7..dd3646111561 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -17,55 +17,24 @@
17 * original bio state. 17 * original bio state.
18 */ 18 */
19 19
20struct dm_bio_vec_details {
21#if PAGE_SIZE < 65536
22 __u16 bv_len;
23 __u16 bv_offset;
24#else
25 unsigned bv_len;
26 unsigned bv_offset;
27#endif
28};
29
30struct dm_bio_details { 20struct dm_bio_details {
31 sector_t bi_sector;
32 struct block_device *bi_bdev; 21 struct block_device *bi_bdev;
33 unsigned int bi_size;
34 unsigned short bi_idx;
35 unsigned long bi_flags; 22 unsigned long bi_flags;
36 struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES]; 23 struct bvec_iter bi_iter;
37}; 24};
38 25
39static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) 26static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
40{ 27{
41 unsigned i;
42
43 bd->bi_sector = bio->bi_sector;
44 bd->bi_bdev = bio->bi_bdev; 28 bd->bi_bdev = bio->bi_bdev;
45 bd->bi_size = bio->bi_size;
46 bd->bi_idx = bio->bi_idx;
47 bd->bi_flags = bio->bi_flags; 29 bd->bi_flags = bio->bi_flags;
48 30 bd->bi_iter = bio->bi_iter;
49 for (i = 0; i < bio->bi_vcnt; i++) {
50 bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
51 bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
52 }
53} 31}
54 32
55static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) 33static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
56{ 34{
57 unsigned i;
58
59 bio->bi_sector = bd->bi_sector;
60 bio->bi_bdev = bd->bi_bdev; 35 bio->bi_bdev = bd->bi_bdev;
61 bio->bi_size = bd->bi_size;
62 bio->bi_idx = bd->bi_idx;
63 bio->bi_flags = bd->bi_flags; 36 bio->bi_flags = bd->bi_flags;
64 37 bio->bi_iter = bd->bi_iter;
65 for (i = 0; i < bio->bi_vcnt; i++) {
66 bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
67 bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
68 }
69} 38}
70 39
71#endif 40#endif
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 9ed42125514b..66c5d130c8c2 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -540,7 +540,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
540 bio_init(&b->bio); 540 bio_init(&b->bio);
541 b->bio.bi_io_vec = b->bio_vec; 541 b->bio.bi_io_vec = b->bio_vec;
542 b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS; 542 b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
543 b->bio.bi_sector = block << b->c->sectors_per_block_bits; 543 b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits;
544 b->bio.bi_bdev = b->c->bdev; 544 b->bio.bi_bdev = b->c->bdev;
545 b->bio.bi_end_io = end_io; 545 b->bio.bi_end_io = end_io;
546 546
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 930e8c3d73e9..1e018e986610 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -72,7 +72,7 @@ static enum io_pattern iot_pattern(struct io_tracker *t)
72 72
73static void iot_update_stats(struct io_tracker *t, struct bio *bio) 73static void iot_update_stats(struct io_tracker *t, struct bio *bio)
74{ 74{
75 if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1) 75 if (bio->bi_iter.bi_sector == from_oblock(t->last_end_oblock) + 1)
76 t->nr_seq_samples++; 76 t->nr_seq_samples++;
77 else { 77 else {
78 /* 78 /*
@@ -87,7 +87,7 @@ static void iot_update_stats(struct io_tracker *t, struct bio *bio)
87 t->nr_rand_samples++; 87 t->nr_rand_samples++;
88 } 88 }
89 89
90 t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1); 90 t->last_end_oblock = to_oblock(bio_end_sector(bio) - 1);
91} 91}
92 92
93static void iot_check_for_pattern_switch(struct io_tracker *t) 93static void iot_check_for_pattern_switch(struct io_tracker *t)
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 09334c275c79..ffd472e015ca 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -85,6 +85,12 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
85{ 85{
86 bio->bi_end_io = h->bi_end_io; 86 bio->bi_end_io = h->bi_end_io;
87 bio->bi_private = h->bi_private; 87 bio->bi_private = h->bi_private;
88
89 /*
90 * Must bump bi_remaining to allow bio to complete with
91 * restored bi_end_io.
92 */
93 atomic_inc(&bio->bi_remaining);
88} 94}
89 95
90/*----------------------------------------------------------------*/ 96/*----------------------------------------------------------------*/
@@ -664,15 +670,17 @@ static void remap_to_origin(struct cache *cache, struct bio *bio)
664static void remap_to_cache(struct cache *cache, struct bio *bio, 670static void remap_to_cache(struct cache *cache, struct bio *bio,
665 dm_cblock_t cblock) 671 dm_cblock_t cblock)
666{ 672{
667 sector_t bi_sector = bio->bi_sector; 673 sector_t bi_sector = bio->bi_iter.bi_sector;
668 674
669 bio->bi_bdev = cache->cache_dev->bdev; 675 bio->bi_bdev = cache->cache_dev->bdev;
670 if (!block_size_is_power_of_two(cache)) 676 if (!block_size_is_power_of_two(cache))
671 bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) + 677 bio->bi_iter.bi_sector =
672 sector_div(bi_sector, cache->sectors_per_block); 678 (from_cblock(cblock) * cache->sectors_per_block) +
679 sector_div(bi_sector, cache->sectors_per_block);
673 else 680 else
674 bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) | 681 bio->bi_iter.bi_sector =
675 (bi_sector & (cache->sectors_per_block - 1)); 682 (from_cblock(cblock) << cache->sectors_per_block_shift) |
683 (bi_sector & (cache->sectors_per_block - 1));
676} 684}
677 685
678static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) 686static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
@@ -712,7 +720,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
712 720
713static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) 721static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
714{ 722{
715 sector_t block_nr = bio->bi_sector; 723 sector_t block_nr = bio->bi_iter.bi_sector;
716 724
717 if (!block_size_is_power_of_two(cache)) 725 if (!block_size_is_power_of_two(cache))
718 (void) sector_div(block_nr, cache->sectors_per_block); 726 (void) sector_div(block_nr, cache->sectors_per_block);
@@ -1027,7 +1035,7 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1027static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) 1035static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1028{ 1036{
1029 return (bio_data_dir(bio) == WRITE) && 1037 return (bio_data_dir(bio) == WRITE) &&
1030 (bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); 1038 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1031} 1039}
1032 1040
1033static void avoid_copy(struct dm_cache_migration *mg) 1041static void avoid_copy(struct dm_cache_migration *mg)
@@ -1252,7 +1260,7 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
1252 size_t pb_data_size = get_per_bio_data_size(cache); 1260 size_t pb_data_size = get_per_bio_data_size(cache);
1253 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1261 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1254 1262
1255 BUG_ON(bio->bi_size); 1263 BUG_ON(bio->bi_iter.bi_size);
1256 if (!pb->req_nr) 1264 if (!pb->req_nr)
1257 remap_to_origin(cache, bio); 1265 remap_to_origin(cache, bio);
1258 else 1266 else
@@ -1275,9 +1283,9 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
1275 */ 1283 */
1276static void process_discard_bio(struct cache *cache, struct bio *bio) 1284static void process_discard_bio(struct cache *cache, struct bio *bio)
1277{ 1285{
1278 dm_block_t start_block = dm_sector_div_up(bio->bi_sector, 1286 dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
1279 cache->discard_block_size); 1287 cache->discard_block_size);
1280 dm_block_t end_block = bio->bi_sector + bio_sectors(bio); 1288 dm_block_t end_block = bio_end_sector(bio);
1281 dm_block_t b; 1289 dm_block_t b;
1282 1290
1283 end_block = block_div(end_block, cache->discard_block_size); 1291 end_block = block_div(end_block, cache->discard_block_size);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 81b0fa660452..784695d22fde 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -39,10 +39,8 @@ struct convert_context {
39 struct completion restart; 39 struct completion restart;
40 struct bio *bio_in; 40 struct bio *bio_in;
41 struct bio *bio_out; 41 struct bio *bio_out;
42 unsigned int offset_in; 42 struct bvec_iter iter_in;
43 unsigned int offset_out; 43 struct bvec_iter iter_out;
44 unsigned int idx_in;
45 unsigned int idx_out;
46 sector_t cc_sector; 44 sector_t cc_sector;
47 atomic_t cc_pending; 45 atomic_t cc_pending;
48}; 46};
@@ -826,10 +824,10 @@ static void crypt_convert_init(struct crypt_config *cc,
826{ 824{
827 ctx->bio_in = bio_in; 825 ctx->bio_in = bio_in;
828 ctx->bio_out = bio_out; 826 ctx->bio_out = bio_out;
829 ctx->offset_in = 0; 827 if (bio_in)
830 ctx->offset_out = 0; 828 ctx->iter_in = bio_in->bi_iter;
831 ctx->idx_in = bio_in ? bio_in->bi_idx : 0; 829 if (bio_out)
832 ctx->idx_out = bio_out ? bio_out->bi_idx : 0; 830 ctx->iter_out = bio_out->bi_iter;
833 ctx->cc_sector = sector + cc->iv_offset; 831 ctx->cc_sector = sector + cc->iv_offset;
834 init_completion(&ctx->restart); 832 init_completion(&ctx->restart);
835} 833}
@@ -857,8 +855,8 @@ static int crypt_convert_block(struct crypt_config *cc,
857 struct convert_context *ctx, 855 struct convert_context *ctx,
858 struct ablkcipher_request *req) 856 struct ablkcipher_request *req)
859{ 857{
860 struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in); 858 struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in);
861 struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); 859 struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out);
862 struct dm_crypt_request *dmreq; 860 struct dm_crypt_request *dmreq;
863 u8 *iv; 861 u8 *iv;
864 int r; 862 int r;
@@ -869,24 +867,15 @@ static int crypt_convert_block(struct crypt_config *cc,
869 dmreq->iv_sector = ctx->cc_sector; 867 dmreq->iv_sector = ctx->cc_sector;
870 dmreq->ctx = ctx; 868 dmreq->ctx = ctx;
871 sg_init_table(&dmreq->sg_in, 1); 869 sg_init_table(&dmreq->sg_in, 1);
872 sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, 870 sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT,
873 bv_in->bv_offset + ctx->offset_in); 871 bv_in.bv_offset);
874 872
875 sg_init_table(&dmreq->sg_out, 1); 873 sg_init_table(&dmreq->sg_out, 1);
876 sg_set_page(&dmreq->sg_out, bv_out->bv_page, 1 << SECTOR_SHIFT, 874 sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT,
877 bv_out->bv_offset + ctx->offset_out); 875 bv_out.bv_offset);
878 876
879 ctx->offset_in += 1 << SECTOR_SHIFT; 877 bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT);
880 if (ctx->offset_in >= bv_in->bv_len) { 878 bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT);
881 ctx->offset_in = 0;
882 ctx->idx_in++;
883 }
884
885 ctx->offset_out += 1 << SECTOR_SHIFT;
886 if (ctx->offset_out >= bv_out->bv_len) {
887 ctx->offset_out = 0;
888 ctx->idx_out++;
889 }
890 879
891 if (cc->iv_gen_ops) { 880 if (cc->iv_gen_ops) {
892 r = cc->iv_gen_ops->generator(cc, iv, dmreq); 881 r = cc->iv_gen_ops->generator(cc, iv, dmreq);
@@ -937,8 +926,7 @@ static int crypt_convert(struct crypt_config *cc,
937 926
938 atomic_set(&ctx->cc_pending, 1); 927 atomic_set(&ctx->cc_pending, 1);
939 928
940 while(ctx->idx_in < ctx->bio_in->bi_vcnt && 929 while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
941 ctx->idx_out < ctx->bio_out->bi_vcnt) {
942 930
943 crypt_alloc_req(cc, ctx); 931 crypt_alloc_req(cc, ctx);
944 932
@@ -1021,7 +1009,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
1021 size -= len; 1009 size -= len;
1022 } 1010 }
1023 1011
1024 if (!clone->bi_size) { 1012 if (!clone->bi_iter.bi_size) {
1025 bio_put(clone); 1013 bio_put(clone);
1026 return NULL; 1014 return NULL;
1027 } 1015 }
@@ -1161,7 +1149,7 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
1161 crypt_inc_pending(io); 1149 crypt_inc_pending(io);
1162 1150
1163 clone_init(io, clone); 1151 clone_init(io, clone);
1164 clone->bi_sector = cc->start + io->sector; 1152 clone->bi_iter.bi_sector = cc->start + io->sector;
1165 1153
1166 generic_make_request(clone); 1154 generic_make_request(clone);
1167 return 0; 1155 return 0;
@@ -1207,9 +1195,9 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1207 } 1195 }
1208 1196
1209 /* crypt_convert should have filled the clone bio */ 1197 /* crypt_convert should have filled the clone bio */
1210 BUG_ON(io->ctx.idx_out < clone->bi_vcnt); 1198 BUG_ON(io->ctx.iter_out.bi_size);
1211 1199
1212 clone->bi_sector = cc->start + io->sector; 1200 clone->bi_iter.bi_sector = cc->start + io->sector;
1213 1201
1214 if (async) 1202 if (async)
1215 kcryptd_queue_io(io); 1203 kcryptd_queue_io(io);
@@ -1224,7 +1212,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1224 struct dm_crypt_io *new_io; 1212 struct dm_crypt_io *new_io;
1225 int crypt_finished; 1213 int crypt_finished;
1226 unsigned out_of_pages = 0; 1214 unsigned out_of_pages = 0;
1227 unsigned remaining = io->base_bio->bi_size; 1215 unsigned remaining = io->base_bio->bi_iter.bi_size;
1228 sector_t sector = io->sector; 1216 sector_t sector = io->sector;
1229 int r; 1217 int r;
1230 1218
@@ -1246,9 +1234,9 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1246 } 1234 }
1247 1235
1248 io->ctx.bio_out = clone; 1236 io->ctx.bio_out = clone;
1249 io->ctx.idx_out = 0; 1237 io->ctx.iter_out = clone->bi_iter;
1250 1238
1251 remaining -= clone->bi_size; 1239 remaining -= clone->bi_iter.bi_size;
1252 sector += bio_sectors(clone); 1240 sector += bio_sectors(clone);
1253 1241
1254 crypt_inc_pending(io); 1242 crypt_inc_pending(io);
@@ -1290,8 +1278,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1290 crypt_inc_pending(new_io); 1278 crypt_inc_pending(new_io);
1291 crypt_convert_init(cc, &new_io->ctx, NULL, 1279 crypt_convert_init(cc, &new_io->ctx, NULL,
1292 io->base_bio, sector); 1280 io->base_bio, sector);
1293 new_io->ctx.idx_in = io->ctx.idx_in; 1281 new_io->ctx.iter_in = io->ctx.iter_in;
1294 new_io->ctx.offset_in = io->ctx.offset_in;
1295 1282
1296 /* 1283 /*
1297 * Fragments after the first use the base_io 1284 * Fragments after the first use the base_io
@@ -1869,11 +1856,12 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
1869 if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { 1856 if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
1870 bio->bi_bdev = cc->dev->bdev; 1857 bio->bi_bdev = cc->dev->bdev;
1871 if (bio_sectors(bio)) 1858 if (bio_sectors(bio))
1872 bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); 1859 bio->bi_iter.bi_sector = cc->start +
1860 dm_target_offset(ti, bio->bi_iter.bi_sector);
1873 return DM_MAPIO_REMAPPED; 1861 return DM_MAPIO_REMAPPED;
1874 } 1862 }
1875 1863
1876 io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_sector)); 1864 io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
1877 1865
1878 if (bio_data_dir(io->base_bio) == READ) { 1866 if (bio_data_dir(io->base_bio) == READ) {
1879 if (kcryptd_io_read(io, GFP_NOWAIT)) 1867 if (kcryptd_io_read(io, GFP_NOWAIT))
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index a8a511c053a5..42c3a27a14cc 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -277,14 +277,15 @@ static int delay_map(struct dm_target *ti, struct bio *bio)
277 if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { 277 if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
278 bio->bi_bdev = dc->dev_write->bdev; 278 bio->bi_bdev = dc->dev_write->bdev;
279 if (bio_sectors(bio)) 279 if (bio_sectors(bio))
280 bio->bi_sector = dc->start_write + 280 bio->bi_iter.bi_sector = dc->start_write +
281 dm_target_offset(ti, bio->bi_sector); 281 dm_target_offset(ti, bio->bi_iter.bi_sector);
282 282
283 return delay_bio(dc, dc->write_delay, bio); 283 return delay_bio(dc, dc->write_delay, bio);
284 } 284 }
285 285
286 bio->bi_bdev = dc->dev_read->bdev; 286 bio->bi_bdev = dc->dev_read->bdev;
287 bio->bi_sector = dc->start_read + dm_target_offset(ti, bio->bi_sector); 287 bio->bi_iter.bi_sector = dc->start_read +
288 dm_target_offset(ti, bio->bi_iter.bi_sector);
288 289
289 return delay_bio(dc, dc->read_delay, bio); 290 return delay_bio(dc, dc->read_delay, bio);
290} 291}
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index c80a0ec5f126..b257e46876d3 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -248,7 +248,8 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
248 248
249 bio->bi_bdev = fc->dev->bdev; 249 bio->bi_bdev = fc->dev->bdev;
250 if (bio_sectors(bio)) 250 if (bio_sectors(bio))
251 bio->bi_sector = flakey_map_sector(ti, bio->bi_sector); 251 bio->bi_iter.bi_sector =
252 flakey_map_sector(ti, bio->bi_iter.bi_sector);
252} 253}
253 254
254static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) 255static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
@@ -265,8 +266,8 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
265 DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " 266 DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
266 "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n", 267 "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
267 bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, 268 bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
268 (bio_data_dir(bio) == WRITE) ? 'w' : 'r', 269 (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_rw,
269 bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes); 270 (unsigned long long)bio->bi_iter.bi_sector, bio_bytes);
270 } 271 }
271} 272}
272 273
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2a20986a2fec..b2b8a10e8427 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -201,26 +201,29 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse
201/* 201/*
202 * Functions for getting the pages from a bvec. 202 * Functions for getting the pages from a bvec.
203 */ 203 */
204static void bvec_get_page(struct dpages *dp, 204static void bio_get_page(struct dpages *dp,
205 struct page **p, unsigned long *len, unsigned *offset) 205 struct page **p, unsigned long *len, unsigned *offset)
206{ 206{
207 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; 207 struct bio *bio = dp->context_ptr;
208 *p = bvec->bv_page; 208 struct bio_vec bvec = bio_iovec(bio);
209 *len = bvec->bv_len; 209 *p = bvec.bv_page;
210 *offset = bvec->bv_offset; 210 *len = bvec.bv_len;
211 *offset = bvec.bv_offset;
211} 212}
212 213
213static void bvec_next_page(struct dpages *dp) 214static void bio_next_page(struct dpages *dp)
214{ 215{
215 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; 216 struct bio *bio = dp->context_ptr;
216 dp->context_ptr = bvec + 1; 217 struct bio_vec bvec = bio_iovec(bio);
218
219 bio_advance(bio, bvec.bv_len);
217} 220}
218 221
219static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) 222static void bio_dp_init(struct dpages *dp, struct bio *bio)
220{ 223{
221 dp->get_page = bvec_get_page; 224 dp->get_page = bio_get_page;
222 dp->next_page = bvec_next_page; 225 dp->next_page = bio_next_page;
223 dp->context_ptr = bvec; 226 dp->context_ptr = bio;
224} 227}
225 228
226/* 229/*
@@ -304,14 +307,14 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
304 dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); 307 dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
305 308
306 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); 309 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
307 bio->bi_sector = where->sector + (where->count - remaining); 310 bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
308 bio->bi_bdev = where->bdev; 311 bio->bi_bdev = where->bdev;
309 bio->bi_end_io = endio; 312 bio->bi_end_io = endio;
310 store_io_and_region_in_bio(bio, io, region); 313 store_io_and_region_in_bio(bio, io, region);
311 314
312 if (rw & REQ_DISCARD) { 315 if (rw & REQ_DISCARD) {
313 num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); 316 num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
314 bio->bi_size = num_sectors << SECTOR_SHIFT; 317 bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
315 remaining -= num_sectors; 318 remaining -= num_sectors;
316 } else if (rw & REQ_WRITE_SAME) { 319 } else if (rw & REQ_WRITE_SAME) {
317 /* 320 /*
@@ -320,7 +323,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
320 dp->get_page(dp, &page, &len, &offset); 323 dp->get_page(dp, &page, &len, &offset);
321 bio_add_page(bio, page, logical_block_size, offset); 324 bio_add_page(bio, page, logical_block_size, offset);
322 num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); 325 num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining);
323 bio->bi_size = num_sectors << SECTOR_SHIFT; 326 bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
324 327
325 offset = 0; 328 offset = 0;
326 remaining -= num_sectors; 329 remaining -= num_sectors;
@@ -457,8 +460,8 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
457 list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); 460 list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
458 break; 461 break;
459 462
460 case DM_IO_BVEC: 463 case DM_IO_BIO:
461 bvec_dp_init(dp, io_req->mem.ptr.bvec); 464 bio_dp_init(dp, io_req->mem.ptr.bio);
462 break; 465 break;
463 466
464 case DM_IO_VMA: 467 case DM_IO_VMA:
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 4f99d267340c..53e848c10939 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -85,7 +85,8 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
85 85
86 bio->bi_bdev = lc->dev->bdev; 86 bio->bi_bdev = lc->dev->bdev;
87 if (bio_sectors(bio)) 87 if (bio_sectors(bio))
88 bio->bi_sector = linear_map_sector(ti, bio->bi_sector); 88 bio->bi_iter.bi_sector =
89 linear_map_sector(ti, bio->bi_iter.bi_sector);
89} 90}
90 91
91static int linear_map(struct dm_target *ti, struct bio *bio) 92static int linear_map(struct dm_target *ti, struct bio *bio)
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9584443c5614..f284e0bfb25f 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -432,7 +432,7 @@ static int mirror_available(struct mirror_set *ms, struct bio *bio)
432 region_t region = dm_rh_bio_to_region(ms->rh, bio); 432 region_t region = dm_rh_bio_to_region(ms->rh, bio);
433 433
434 if (log->type->in_sync(log, region, 0)) 434 if (log->type->in_sync(log, region, 0))
435 return choose_mirror(ms, bio->bi_sector) ? 1 : 0; 435 return choose_mirror(ms, bio->bi_iter.bi_sector) ? 1 : 0;
436 436
437 return 0; 437 return 0;
438} 438}
@@ -442,15 +442,15 @@ static int mirror_available(struct mirror_set *ms, struct bio *bio)
442 */ 442 */
443static sector_t map_sector(struct mirror *m, struct bio *bio) 443static sector_t map_sector(struct mirror *m, struct bio *bio)
444{ 444{
445 if (unlikely(!bio->bi_size)) 445 if (unlikely(!bio->bi_iter.bi_size))
446 return 0; 446 return 0;
447 return m->offset + dm_target_offset(m->ms->ti, bio->bi_sector); 447 return m->offset + dm_target_offset(m->ms->ti, bio->bi_iter.bi_sector);
448} 448}
449 449
450static void map_bio(struct mirror *m, struct bio *bio) 450static void map_bio(struct mirror *m, struct bio *bio)
451{ 451{
452 bio->bi_bdev = m->dev->bdev; 452 bio->bi_bdev = m->dev->bdev;
453 bio->bi_sector = map_sector(m, bio); 453 bio->bi_iter.bi_sector = map_sector(m, bio);
454} 454}
455 455
456static void map_region(struct dm_io_region *io, struct mirror *m, 456static void map_region(struct dm_io_region *io, struct mirror *m,
@@ -526,8 +526,8 @@ static void read_async_bio(struct mirror *m, struct bio *bio)
526 struct dm_io_region io; 526 struct dm_io_region io;
527 struct dm_io_request io_req = { 527 struct dm_io_request io_req = {
528 .bi_rw = READ, 528 .bi_rw = READ,
529 .mem.type = DM_IO_BVEC, 529 .mem.type = DM_IO_BIO,
530 .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, 530 .mem.ptr.bio = bio,
531 .notify.fn = read_callback, 531 .notify.fn = read_callback,
532 .notify.context = bio, 532 .notify.context = bio,
533 .client = m->ms->io_client, 533 .client = m->ms->io_client,
@@ -559,7 +559,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads)
559 * We can only read balance if the region is in sync. 559 * We can only read balance if the region is in sync.
560 */ 560 */
561 if (likely(region_in_sync(ms, region, 1))) 561 if (likely(region_in_sync(ms, region, 1)))
562 m = choose_mirror(ms, bio->bi_sector); 562 m = choose_mirror(ms, bio->bi_iter.bi_sector);
563 else if (m && atomic_read(&m->error_count)) 563 else if (m && atomic_read(&m->error_count))
564 m = NULL; 564 m = NULL;
565 565
@@ -629,8 +629,8 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
629 struct mirror *m; 629 struct mirror *m;
630 struct dm_io_request io_req = { 630 struct dm_io_request io_req = {
631 .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA), 631 .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
632 .mem.type = DM_IO_BVEC, 632 .mem.type = DM_IO_BIO,
633 .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, 633 .mem.ptr.bio = bio,
634 .notify.fn = write_callback, 634 .notify.fn = write_callback,
635 .notify.context = bio, 635 .notify.context = bio,
636 .client = ms->io_client, 636 .client = ms->io_client,
@@ -1181,7 +1181,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
1181 * The region is in-sync and we can perform reads directly. 1181 * The region is in-sync and we can perform reads directly.
1182 * Store enough information so we can retry if it fails. 1182 * Store enough information so we can retry if it fails.
1183 */ 1183 */
1184 m = choose_mirror(ms, bio->bi_sector); 1184 m = choose_mirror(ms, bio->bi_iter.bi_sector);
1185 if (unlikely(!m)) 1185 if (unlikely(!m))
1186 return -EIO; 1186 return -EIO;
1187 1187
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 69732e03eb34..b929fd5f4984 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -126,7 +126,8 @@ EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
126 126
127region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio) 127region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
128{ 128{
129 return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin); 129 return dm_rh_sector_to_region(rh, bio->bi_iter.bi_sector -
130 rh->target_begin);
130} 131}
131EXPORT_SYMBOL_GPL(dm_rh_bio_to_region); 132EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
132 133
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 717718558bd9..ebddef5237e4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1438,6 +1438,7 @@ out:
1438 if (full_bio) { 1438 if (full_bio) {
1439 full_bio->bi_end_io = pe->full_bio_end_io; 1439 full_bio->bi_end_io = pe->full_bio_end_io;
1440 full_bio->bi_private = pe->full_bio_private; 1440 full_bio->bi_private = pe->full_bio_private;
1441 atomic_inc(&full_bio->bi_remaining);
1441 } 1442 }
1442 free_pending_exception(pe); 1443 free_pending_exception(pe);
1443 1444
@@ -1619,11 +1620,10 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
1619 struct bio *bio, chunk_t chunk) 1620 struct bio *bio, chunk_t chunk)
1620{ 1621{
1621 bio->bi_bdev = s->cow->bdev; 1622 bio->bi_bdev = s->cow->bdev;
1622 bio->bi_sector = chunk_to_sector(s->store, 1623 bio->bi_iter.bi_sector =
1623 dm_chunk_number(e->new_chunk) + 1624 chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) +
1624 (chunk - e->old_chunk)) + 1625 (chunk - e->old_chunk)) +
1625 (bio->bi_sector & 1626 (bio->bi_iter.bi_sector & s->store->chunk_mask);
1626 s->store->chunk_mask);
1627} 1627}
1628 1628
1629static int snapshot_map(struct dm_target *ti, struct bio *bio) 1629static int snapshot_map(struct dm_target *ti, struct bio *bio)
@@ -1641,7 +1641,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1641 return DM_MAPIO_REMAPPED; 1641 return DM_MAPIO_REMAPPED;
1642 } 1642 }
1643 1643
1644 chunk = sector_to_chunk(s->store, bio->bi_sector); 1644 chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
1645 1645
1646 /* Full snapshots are not usable */ 1646 /* Full snapshots are not usable */
1647 /* To get here the table must be live so s->active is always set. */ 1647 /* To get here the table must be live so s->active is always set. */
@@ -1702,7 +1702,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1702 r = DM_MAPIO_SUBMITTED; 1702 r = DM_MAPIO_SUBMITTED;
1703 1703
1704 if (!pe->started && 1704 if (!pe->started &&
1705 bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { 1705 bio->bi_iter.bi_size ==
1706 (s->store->chunk_size << SECTOR_SHIFT)) {
1706 pe->started = 1; 1707 pe->started = 1;
1707 up_write(&s->lock); 1708 up_write(&s->lock);
1708 start_full_bio(pe, bio); 1709 start_full_bio(pe, bio);
@@ -1758,7 +1759,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
1758 return DM_MAPIO_REMAPPED; 1759 return DM_MAPIO_REMAPPED;
1759 } 1760 }
1760 1761
1761 chunk = sector_to_chunk(s->store, bio->bi_sector); 1762 chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
1762 1763
1763 down_write(&s->lock); 1764 down_write(&s->lock);
1764 1765
@@ -2095,7 +2096,7 @@ static int do_origin(struct dm_dev *origin, struct bio *bio)
2095 down_read(&_origins_lock); 2096 down_read(&_origins_lock);
2096 o = __lookup_origin(origin->bdev); 2097 o = __lookup_origin(origin->bdev);
2097 if (o) 2098 if (o)
2098 r = __origin_write(&o->snapshots, bio->bi_sector, bio); 2099 r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
2099 up_read(&_origins_lock); 2100 up_read(&_origins_lock);
2100 2101
2101 return r; 2102 return r;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 73c1712dad96..d1600d2aa2e2 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -259,13 +259,15 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
259{ 259{
260 sector_t begin, end; 260 sector_t begin, end;
261 261
262 stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin); 262 stripe_map_range_sector(sc, bio->bi_iter.bi_sector,
263 target_stripe, &begin);
263 stripe_map_range_sector(sc, bio_end_sector(bio), 264 stripe_map_range_sector(sc, bio_end_sector(bio),
264 target_stripe, &end); 265 target_stripe, &end);
265 if (begin < end) { 266 if (begin < end) {
266 bio->bi_bdev = sc->stripe[target_stripe].dev->bdev; 267 bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
267 bio->bi_sector = begin + sc->stripe[target_stripe].physical_start; 268 bio->bi_iter.bi_sector = begin +
268 bio->bi_size = to_bytes(end - begin); 269 sc->stripe[target_stripe].physical_start;
270 bio->bi_iter.bi_size = to_bytes(end - begin);
269 return DM_MAPIO_REMAPPED; 271 return DM_MAPIO_REMAPPED;
270 } else { 272 } else {
271 /* The range doesn't map to the target stripe */ 273 /* The range doesn't map to the target stripe */
@@ -293,9 +295,10 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
293 return stripe_map_range(sc, bio, target_bio_nr); 295 return stripe_map_range(sc, bio, target_bio_nr);
294 } 296 }
295 297
296 stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); 298 stripe_map_sector(sc, bio->bi_iter.bi_sector,
299 &stripe, &bio->bi_iter.bi_sector);
297 300
298 bio->bi_sector += sc->stripe[stripe].physical_start; 301 bio->bi_iter.bi_sector += sc->stripe[stripe].physical_start;
299 bio->bi_bdev = sc->stripe[stripe].dev->bdev; 302 bio->bi_bdev = sc->stripe[stripe].dev->bdev;
300 303
301 return DM_MAPIO_REMAPPED; 304 return DM_MAPIO_REMAPPED;
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index ff9ac4be4721..09a688b3d48c 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -311,11 +311,11 @@ error:
311static int switch_map(struct dm_target *ti, struct bio *bio) 311static int switch_map(struct dm_target *ti, struct bio *bio)
312{ 312{
313 struct switch_ctx *sctx = ti->private; 313 struct switch_ctx *sctx = ti->private;
314 sector_t offset = dm_target_offset(ti, bio->bi_sector); 314 sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector);
315 unsigned path_nr = switch_get_path_nr(sctx, offset); 315 unsigned path_nr = switch_get_path_nr(sctx, offset);
316 316
317 bio->bi_bdev = sctx->path_list[path_nr].dmdev->bdev; 317 bio->bi_bdev = sctx->path_list[path_nr].dmdev->bdev;
318 bio->bi_sector = sctx->path_list[path_nr].start + offset; 318 bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset;
319 319
320 return DM_MAPIO_REMAPPED; 320 return DM_MAPIO_REMAPPED;
321} 321}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 726228b33a01..faaf944597ab 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -414,7 +414,7 @@ static bool block_size_is_power_of_two(struct pool *pool)
414static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) 414static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
415{ 415{
416 struct pool *pool = tc->pool; 416 struct pool *pool = tc->pool;
417 sector_t block_nr = bio->bi_sector; 417 sector_t block_nr = bio->bi_iter.bi_sector;
418 418
419 if (block_size_is_power_of_two(pool)) 419 if (block_size_is_power_of_two(pool))
420 block_nr >>= pool->sectors_per_block_shift; 420 block_nr >>= pool->sectors_per_block_shift;
@@ -427,14 +427,15 @@ static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
427static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) 427static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
428{ 428{
429 struct pool *pool = tc->pool; 429 struct pool *pool = tc->pool;
430 sector_t bi_sector = bio->bi_sector; 430 sector_t bi_sector = bio->bi_iter.bi_sector;
431 431
432 bio->bi_bdev = tc->pool_dev->bdev; 432 bio->bi_bdev = tc->pool_dev->bdev;
433 if (block_size_is_power_of_two(pool)) 433 if (block_size_is_power_of_two(pool))
434 bio->bi_sector = (block << pool->sectors_per_block_shift) | 434 bio->bi_iter.bi_sector =
435 (bi_sector & (pool->sectors_per_block - 1)); 435 (block << pool->sectors_per_block_shift) |
436 (bi_sector & (pool->sectors_per_block - 1));
436 else 437 else
437 bio->bi_sector = (block * pool->sectors_per_block) + 438 bio->bi_iter.bi_sector = (block * pool->sectors_per_block) +
438 sector_div(bi_sector, pool->sectors_per_block); 439 sector_div(bi_sector, pool->sectors_per_block);
439} 440}
440 441
@@ -612,8 +613,10 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c
612 613
613static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) 614static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
614{ 615{
615 if (m->bio) 616 if (m->bio) {
616 m->bio->bi_end_io = m->saved_bi_end_io; 617 m->bio->bi_end_io = m->saved_bi_end_io;
618 atomic_inc(&m->bio->bi_remaining);
619 }
617 cell_error(m->tc->pool, m->cell); 620 cell_error(m->tc->pool, m->cell);
618 list_del(&m->list); 621 list_del(&m->list);
619 mempool_free(m, m->tc->pool->mapping_pool); 622 mempool_free(m, m->tc->pool->mapping_pool);
@@ -627,8 +630,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
627 int r; 630 int r;
628 631
629 bio = m->bio; 632 bio = m->bio;
630 if (bio) 633 if (bio) {
631 bio->bi_end_io = m->saved_bi_end_io; 634 bio->bi_end_io = m->saved_bi_end_io;
635 atomic_inc(&bio->bi_remaining);
636 }
632 637
633 if (m->err) { 638 if (m->err) {
634 cell_error(pool, m->cell); 639 cell_error(pool, m->cell);
@@ -731,7 +736,8 @@ static void process_prepared(struct pool *pool, struct list_head *head,
731 */ 736 */
732static int io_overlaps_block(struct pool *pool, struct bio *bio) 737static int io_overlaps_block(struct pool *pool, struct bio *bio)
733{ 738{
734 return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT); 739 return bio->bi_iter.bi_size ==
740 (pool->sectors_per_block << SECTOR_SHIFT);
735} 741}
736 742
737static int io_overwrites_block(struct pool *pool, struct bio *bio) 743static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@ -1136,7 +1142,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
1136 if (bio_detain(pool, &key, bio, &cell)) 1142 if (bio_detain(pool, &key, bio, &cell))
1137 return; 1143 return;
1138 1144
1139 if (bio_data_dir(bio) == WRITE && bio->bi_size) 1145 if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size)
1140 break_sharing(tc, bio, block, &key, lookup_result, cell); 1146 break_sharing(tc, bio, block, &key, lookup_result, cell);
1141 else { 1147 else {
1142 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); 1148 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -1159,7 +1165,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
1159 /* 1165 /*
1160 * Remap empty bios (flushes) immediately, without provisioning. 1166 * Remap empty bios (flushes) immediately, without provisioning.
1161 */ 1167 */
1162 if (!bio->bi_size) { 1168 if (!bio->bi_iter.bi_size) {
1163 inc_all_io_entry(pool, bio); 1169 inc_all_io_entry(pool, bio);
1164 cell_defer_no_holder(tc, cell); 1170 cell_defer_no_holder(tc, cell);
1165 1171
@@ -1258,7 +1264,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
1258 r = dm_thin_find_block(tc->td, block, 1, &lookup_result); 1264 r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
1259 switch (r) { 1265 switch (r) {
1260 case 0: 1266 case 0:
1261 if (lookup_result.shared && (rw == WRITE) && bio->bi_size) 1267 if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size)
1262 handle_unserviceable_bio(tc->pool, bio); 1268 handle_unserviceable_bio(tc->pool, bio);
1263 else { 1269 else {
1264 inc_all_io_entry(tc->pool, bio); 1270 inc_all_io_entry(tc->pool, bio);
@@ -2939,7 +2945,7 @@ out_unlock:
2939 2945
2940static int thin_map(struct dm_target *ti, struct bio *bio) 2946static int thin_map(struct dm_target *ti, struct bio *bio)
2941{ 2947{
2942 bio->bi_sector = dm_target_offset(ti, bio->bi_sector); 2948 bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
2943 2949
2944 return thin_bio_map(ti, bio); 2950 return thin_bio_map(ti, bio);
2945} 2951}
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 4b7941db3aff..796007a5e0e1 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -73,15 +73,10 @@ struct dm_verity_io {
73 sector_t block; 73 sector_t block;
74 unsigned n_blocks; 74 unsigned n_blocks;
75 75
76 /* saved bio vector */ 76 struct bvec_iter iter;
77 struct bio_vec *io_vec;
78 unsigned io_vec_size;
79 77
80 struct work_struct work; 78 struct work_struct work;
81 79
82 /* A space for short vectors; longer vectors are allocated separately. */
83 struct bio_vec io_vec_inline[DM_VERITY_IO_VEC_INLINE];
84
85 /* 80 /*
86 * Three variably-size fields follow this struct: 81 * Three variably-size fields follow this struct:
87 * 82 *
@@ -284,9 +279,10 @@ release_ret_r:
284static int verity_verify_io(struct dm_verity_io *io) 279static int verity_verify_io(struct dm_verity_io *io)
285{ 280{
286 struct dm_verity *v = io->v; 281 struct dm_verity *v = io->v;
282 struct bio *bio = dm_bio_from_per_bio_data(io,
283 v->ti->per_bio_data_size);
287 unsigned b; 284 unsigned b;
288 int i; 285 int i;
289 unsigned vector = 0, offset = 0;
290 286
291 for (b = 0; b < io->n_blocks; b++) { 287 for (b = 0; b < io->n_blocks; b++) {
292 struct shash_desc *desc; 288 struct shash_desc *desc;
@@ -336,31 +332,22 @@ test_block_hash:
336 } 332 }
337 333
338 todo = 1 << v->data_dev_block_bits; 334 todo = 1 << v->data_dev_block_bits;
339 do { 335 while (io->iter.bi_size) {
340 struct bio_vec *bv;
341 u8 *page; 336 u8 *page;
342 unsigned len; 337 struct bio_vec bv = bio_iter_iovec(bio, io->iter);
343 338
344 BUG_ON(vector >= io->io_vec_size); 339 page = kmap_atomic(bv.bv_page);
345 bv = &io->io_vec[vector]; 340 r = crypto_shash_update(desc, page + bv.bv_offset,
346 page = kmap_atomic(bv->bv_page); 341 bv.bv_len);
347 len = bv->bv_len - offset;
348 if (likely(len >= todo))
349 len = todo;
350 r = crypto_shash_update(desc,
351 page + bv->bv_offset + offset, len);
352 kunmap_atomic(page); 342 kunmap_atomic(page);
343
353 if (r < 0) { 344 if (r < 0) {
354 DMERR("crypto_shash_update failed: %d", r); 345 DMERR("crypto_shash_update failed: %d", r);
355 return r; 346 return r;
356 } 347 }
357 offset += len; 348
358 if (likely(offset == bv->bv_len)) { 349 bio_advance_iter(bio, &io->iter, bv.bv_len);
359 offset = 0; 350 }
360 vector++;
361 }
362 todo -= len;
363 } while (todo);
364 351
365 if (!v->version) { 352 if (!v->version) {
366 r = crypto_shash_update(desc, v->salt, v->salt_size); 353 r = crypto_shash_update(desc, v->salt, v->salt_size);
@@ -383,8 +370,6 @@ test_block_hash:
383 return -EIO; 370 return -EIO;
384 } 371 }
385 } 372 }
386 BUG_ON(vector != io->io_vec_size);
387 BUG_ON(offset);
388 373
389 return 0; 374 return 0;
390} 375}
@@ -400,10 +385,7 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
400 bio->bi_end_io = io->orig_bi_end_io; 385 bio->bi_end_io = io->orig_bi_end_io;
401 bio->bi_private = io->orig_bi_private; 386 bio->bi_private = io->orig_bi_private;
402 387
403 if (io->io_vec != io->io_vec_inline) 388 bio_endio_nodec(bio, error);
404 mempool_free(io->io_vec, v->vec_mempool);
405
406 bio_endio(bio, error);
407} 389}
408 390
409static void verity_work(struct work_struct *w) 391static void verity_work(struct work_struct *w)
@@ -493,9 +475,9 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
493 struct dm_verity_io *io; 475 struct dm_verity_io *io;
494 476
495 bio->bi_bdev = v->data_dev->bdev; 477 bio->bi_bdev = v->data_dev->bdev;
496 bio->bi_sector = verity_map_sector(v, bio->bi_sector); 478 bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector);
497 479
498 if (((unsigned)bio->bi_sector | bio_sectors(bio)) & 480 if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
499 ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) { 481 ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
500 DMERR_LIMIT("unaligned io"); 482 DMERR_LIMIT("unaligned io");
501 return -EIO; 483 return -EIO;
@@ -514,18 +496,12 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
514 io->v = v; 496 io->v = v;
515 io->orig_bi_end_io = bio->bi_end_io; 497 io->orig_bi_end_io = bio->bi_end_io;
516 io->orig_bi_private = bio->bi_private; 498 io->orig_bi_private = bio->bi_private;
517 io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); 499 io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
518 io->n_blocks = bio->bi_size >> v->data_dev_block_bits; 500 io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits;
519 501
520 bio->bi_end_io = verity_end_io; 502 bio->bi_end_io = verity_end_io;
521 bio->bi_private = io; 503 bio->bi_private = io;
522 io->io_vec_size = bio_segments(bio); 504 io->iter = bio->bi_iter;
523 if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
524 io->io_vec = io->io_vec_inline;
525 else
526 io->io_vec = mempool_alloc(v->vec_mempool, GFP_NOIO);
527 memcpy(io->io_vec, bio_iovec(bio),
528 io->io_vec_size * sizeof(struct bio_vec));
529 505
530 verity_submit_prefetch(v, io); 506 verity_submit_prefetch(v, io);
531 507
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b49c76284241..8c53b09b9a2c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -575,7 +575,7 @@ static void start_io_acct(struct dm_io *io)
575 atomic_inc_return(&md->pending[rw])); 575 atomic_inc_return(&md->pending[rw]));
576 576
577 if (unlikely(dm_stats_used(&md->stats))) 577 if (unlikely(dm_stats_used(&md->stats)))
578 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_sector, 578 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
579 bio_sectors(bio), false, 0, &io->stats_aux); 579 bio_sectors(bio), false, 0, &io->stats_aux);
580} 580}
581 581
@@ -593,7 +593,7 @@ static void end_io_acct(struct dm_io *io)
593 part_stat_unlock(); 593 part_stat_unlock();
594 594
595 if (unlikely(dm_stats_used(&md->stats))) 595 if (unlikely(dm_stats_used(&md->stats)))
596 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_sector, 596 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
597 bio_sectors(bio), true, duration, &io->stats_aux); 597 bio_sectors(bio), true, duration, &io->stats_aux);
598 598
599 /* 599 /*
@@ -742,7 +742,7 @@ static void dec_pending(struct dm_io *io, int error)
742 if (io_error == DM_ENDIO_REQUEUE) 742 if (io_error == DM_ENDIO_REQUEUE)
743 return; 743 return;
744 744
745 if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) { 745 if ((bio->bi_rw & REQ_FLUSH) && bio->bi_iter.bi_size) {
746 /* 746 /*
747 * Preflush done for flush with data, reissue 747 * Preflush done for flush with data, reissue
748 * without REQ_FLUSH. 748 * without REQ_FLUSH.
@@ -797,7 +797,7 @@ static void end_clone_bio(struct bio *clone, int error)
797 struct dm_rq_clone_bio_info *info = clone->bi_private; 797 struct dm_rq_clone_bio_info *info = clone->bi_private;
798 struct dm_rq_target_io *tio = info->tio; 798 struct dm_rq_target_io *tio = info->tio;
799 struct bio *bio = info->orig; 799 struct bio *bio = info->orig;
800 unsigned int nr_bytes = info->orig->bi_size; 800 unsigned int nr_bytes = info->orig->bi_iter.bi_size;
801 801
802 bio_put(clone); 802 bio_put(clone);
803 803
@@ -1128,7 +1128,7 @@ static void __map_bio(struct dm_target_io *tio)
1128 * this io. 1128 * this io.
1129 */ 1129 */
1130 atomic_inc(&tio->io->io_count); 1130 atomic_inc(&tio->io->io_count);
1131 sector = clone->bi_sector; 1131 sector = clone->bi_iter.bi_sector;
1132 r = ti->type->map(ti, clone); 1132 r = ti->type->map(ti, clone);
1133 if (r == DM_MAPIO_REMAPPED) { 1133 if (r == DM_MAPIO_REMAPPED) {
1134 /* the bio has been remapped so dispatch it */ 1134 /* the bio has been remapped so dispatch it */
@@ -1155,76 +1155,32 @@ struct clone_info {
1155 struct dm_io *io; 1155 struct dm_io *io;
1156 sector_t sector; 1156 sector_t sector;
1157 sector_t sector_count; 1157 sector_t sector_count;
1158 unsigned short idx;
1159}; 1158};
1160 1159
1161static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) 1160static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len)
1162{ 1161{
1163 bio->bi_sector = sector; 1162 bio->bi_iter.bi_sector = sector;
1164 bio->bi_size = to_bytes(len); 1163 bio->bi_iter.bi_size = to_bytes(len);
1165}
1166
1167static void bio_setup_bv(struct bio *bio, unsigned short idx, unsigned short bv_count)
1168{
1169 bio->bi_idx = idx;
1170 bio->bi_vcnt = idx + bv_count;
1171 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
1172}
1173
1174static void clone_bio_integrity(struct bio *bio, struct bio *clone,
1175 unsigned short idx, unsigned len, unsigned offset,
1176 unsigned trim)
1177{
1178 if (!bio_integrity(bio))
1179 return;
1180
1181 bio_integrity_clone(clone, bio, GFP_NOIO);
1182
1183 if (trim)
1184 bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len);
1185}
1186
1187/*
1188 * Creates a little bio that just does part of a bvec.
1189 */
1190static void clone_split_bio(struct dm_target_io *tio, struct bio *bio,
1191 sector_t sector, unsigned short idx,
1192 unsigned offset, unsigned len)
1193{
1194 struct bio *clone = &tio->clone;
1195 struct bio_vec *bv = bio->bi_io_vec + idx;
1196
1197 *clone->bi_io_vec = *bv;
1198
1199 bio_setup_sector(clone, sector, len);
1200
1201 clone->bi_bdev = bio->bi_bdev;
1202 clone->bi_rw = bio->bi_rw;
1203 clone->bi_vcnt = 1;
1204 clone->bi_io_vec->bv_offset = offset;
1205 clone->bi_io_vec->bv_len = clone->bi_size;
1206 clone->bi_flags |= 1 << BIO_CLONED;
1207
1208 clone_bio_integrity(bio, clone, idx, len, offset, 1);
1209} 1164}
1210 1165
1211/* 1166/*
1212 * Creates a bio that consists of range of complete bvecs. 1167 * Creates a bio that consists of range of complete bvecs.
1213 */ 1168 */
1214static void clone_bio(struct dm_target_io *tio, struct bio *bio, 1169static void clone_bio(struct dm_target_io *tio, struct bio *bio,
1215 sector_t sector, unsigned short idx, 1170 sector_t sector, unsigned len)
1216 unsigned short bv_count, unsigned len)
1217{ 1171{
1218 struct bio *clone = &tio->clone; 1172 struct bio *clone = &tio->clone;
1219 unsigned trim = 0;
1220 1173
1221 __bio_clone(clone, bio); 1174 __bio_clone_fast(clone, bio);
1222 bio_setup_sector(clone, sector, len); 1175
1223 bio_setup_bv(clone, idx, bv_count); 1176 if (bio_integrity(bio))
1177 bio_integrity_clone(clone, bio, GFP_NOIO);
1178
1179 bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
1180 clone->bi_iter.bi_size = to_bytes(len);
1224 1181
1225 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) 1182 if (bio_integrity(bio))
1226 trim = 1; 1183 bio_integrity_trim(clone, 0, len);
1227 clone_bio_integrity(bio, clone, idx, len, 0, trim);
1228} 1184}
1229 1185
1230static struct dm_target_io *alloc_tio(struct clone_info *ci, 1186static struct dm_target_io *alloc_tio(struct clone_info *ci,
@@ -1257,7 +1213,7 @@ static void __clone_and_map_simple_bio(struct clone_info *ci,
1257 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush 1213 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
1258 * and discard, so no need for concern about wasted bvec allocations. 1214 * and discard, so no need for concern about wasted bvec allocations.
1259 */ 1215 */
1260 __bio_clone(clone, ci->bio); 1216 __bio_clone_fast(clone, ci->bio);
1261 if (len) 1217 if (len)
1262 bio_setup_sector(clone, ci->sector, len); 1218 bio_setup_sector(clone, ci->sector, len);
1263 1219
@@ -1286,10 +1242,7 @@ static int __send_empty_flush(struct clone_info *ci)
1286} 1242}
1287 1243
1288static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, 1244static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
1289 sector_t sector, int nr_iovecs, 1245 sector_t sector, unsigned len)
1290 unsigned short idx, unsigned short bv_count,
1291 unsigned offset, unsigned len,
1292 unsigned split_bvec)
1293{ 1246{
1294 struct bio *bio = ci->bio; 1247 struct bio *bio = ci->bio;
1295 struct dm_target_io *tio; 1248 struct dm_target_io *tio;
@@ -1303,11 +1256,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti
1303 num_target_bios = ti->num_write_bios(ti, bio); 1256 num_target_bios = ti->num_write_bios(ti, bio);
1304 1257
1305 for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { 1258 for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
1306 tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr); 1259 tio = alloc_tio(ci, ti, 0, target_bio_nr);
1307 if (split_bvec) 1260 clone_bio(tio, bio, sector, len);
1308 clone_split_bio(tio, bio, sector, idx, offset, len);
1309 else
1310 clone_bio(tio, bio, sector, idx, bv_count, len);
1311 __map_bio(tio); 1261 __map_bio(tio);
1312 } 1262 }
1313} 1263}
@@ -1379,68 +1329,13 @@ static int __send_write_same(struct clone_info *ci)
1379} 1329}
1380 1330
1381/* 1331/*
1382 * Find maximum number of sectors / bvecs we can process with a single bio.
1383 */
1384static sector_t __len_within_target(struct clone_info *ci, sector_t max, int *idx)
1385{
1386 struct bio *bio = ci->bio;
1387 sector_t bv_len, total_len = 0;
1388
1389 for (*idx = ci->idx; max && (*idx < bio->bi_vcnt); (*idx)++) {
1390 bv_len = to_sector(bio->bi_io_vec[*idx].bv_len);
1391
1392 if (bv_len > max)
1393 break;
1394
1395 max -= bv_len;
1396 total_len += bv_len;
1397 }
1398
1399 return total_len;
1400}
1401
1402static int __split_bvec_across_targets(struct clone_info *ci,
1403 struct dm_target *ti, sector_t max)
1404{
1405 struct bio *bio = ci->bio;
1406 struct bio_vec *bv = bio->bi_io_vec + ci->idx;
1407 sector_t remaining = to_sector(bv->bv_len);
1408 unsigned offset = 0;
1409 sector_t len;
1410
1411 do {
1412 if (offset) {
1413 ti = dm_table_find_target(ci->map, ci->sector);
1414 if (!dm_target_is_valid(ti))
1415 return -EIO;
1416
1417 max = max_io_len(ci->sector, ti);
1418 }
1419
1420 len = min(remaining, max);
1421
1422 __clone_and_map_data_bio(ci, ti, ci->sector, 1, ci->idx, 0,
1423 bv->bv_offset + offset, len, 1);
1424
1425 ci->sector += len;
1426 ci->sector_count -= len;
1427 offset += to_bytes(len);
1428 } while (remaining -= len);
1429
1430 ci->idx++;
1431
1432 return 0;
1433}
1434
1435/*
1436 * Select the correct strategy for processing a non-flush bio. 1332 * Select the correct strategy for processing a non-flush bio.
1437 */ 1333 */
1438static int __split_and_process_non_flush(struct clone_info *ci) 1334static int __split_and_process_non_flush(struct clone_info *ci)
1439{ 1335{
1440 struct bio *bio = ci->bio; 1336 struct bio *bio = ci->bio;
1441 struct dm_target *ti; 1337 struct dm_target *ti;
1442 sector_t len, max; 1338 unsigned len;
1443 int idx;
1444 1339
1445 if (unlikely(bio->bi_rw & REQ_DISCARD)) 1340 if (unlikely(bio->bi_rw & REQ_DISCARD))
1446 return __send_discard(ci); 1341 return __send_discard(ci);
@@ -1451,41 +1346,14 @@ static int __split_and_process_non_flush(struct clone_info *ci)
1451 if (!dm_target_is_valid(ti)) 1346 if (!dm_target_is_valid(ti))
1452 return -EIO; 1347 return -EIO;
1453 1348
1454 max = max_io_len(ci->sector, ti); 1349 len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
1455
1456 /*
1457 * Optimise for the simple case where we can do all of
1458 * the remaining io with a single clone.
1459 */
1460 if (ci->sector_count <= max) {
1461 __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs,
1462 ci->idx, bio->bi_vcnt - ci->idx, 0,
1463 ci->sector_count, 0);
1464 ci->sector_count = 0;
1465 return 0;
1466 }
1467
1468 /*
1469 * There are some bvecs that don't span targets.
1470 * Do as many of these as possible.
1471 */
1472 if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
1473 len = __len_within_target(ci, max, &idx);
1474
1475 __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs,
1476 ci->idx, idx - ci->idx, 0, len, 0);
1477 1350
1478 ci->sector += len; 1351 __clone_and_map_data_bio(ci, ti, ci->sector, len);
1479 ci->sector_count -= len;
1480 ci->idx = idx;
1481 1352
1482 return 0; 1353 ci->sector += len;
1483 } 1354 ci->sector_count -= len;
1484 1355
1485 /* 1356 return 0;
1486 * Handle a bvec that must be split between two or more targets.
1487 */
1488 return __split_bvec_across_targets(ci, ti, max);
1489} 1357}
1490 1358
1491/* 1359/*
@@ -1510,8 +1378,7 @@ static void __split_and_process_bio(struct mapped_device *md,
1510 ci.io->bio = bio; 1378 ci.io->bio = bio;
1511 ci.io->md = md; 1379 ci.io->md = md;
1512 spin_lock_init(&ci.io->endio_lock); 1380 spin_lock_init(&ci.io->endio_lock);
1513 ci.sector = bio->bi_sector; 1381 ci.sector = bio->bi_iter.bi_sector;
1514 ci.idx = bio->bi_idx;
1515 1382
1516 start_io_acct(ci.io); 1383 start_io_acct(ci.io);
1517 1384
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 3193aefe982b..e8b4574956c7 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -74,8 +74,8 @@ static void faulty_fail(struct bio *bio, int error)
74{ 74{
75 struct bio *b = bio->bi_private; 75 struct bio *b = bio->bi_private;
76 76
77 b->bi_size = bio->bi_size; 77 b->bi_iter.bi_size = bio->bi_iter.bi_size;
78 b->bi_sector = bio->bi_sector; 78 b->bi_iter.bi_sector = bio->bi_iter.bi_sector;
79 79
80 bio_put(bio); 80 bio_put(bio);
81 81
@@ -185,26 +185,31 @@ static void make_request(struct mddev *mddev, struct bio *bio)
185 return; 185 return;
186 } 186 }
187 187
188 if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), WRITE)) 188 if (check_sector(conf, bio->bi_iter.bi_sector,
189 bio_end_sector(bio), WRITE))
189 failit = 1; 190 failit = 1;
190 if (check_mode(conf, WritePersistent)) { 191 if (check_mode(conf, WritePersistent)) {
191 add_sector(conf, bio->bi_sector, WritePersistent); 192 add_sector(conf, bio->bi_iter.bi_sector,
193 WritePersistent);
192 failit = 1; 194 failit = 1;
193 } 195 }
194 if (check_mode(conf, WriteTransient)) 196 if (check_mode(conf, WriteTransient))
195 failit = 1; 197 failit = 1;
196 } else { 198 } else {
197 /* read request */ 199 /* read request */
198 if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), READ)) 200 if (check_sector(conf, bio->bi_iter.bi_sector,
201 bio_end_sector(bio), READ))
199 failit = 1; 202 failit = 1;
200 if (check_mode(conf, ReadTransient)) 203 if (check_mode(conf, ReadTransient))
201 failit = 1; 204 failit = 1;
202 if (check_mode(conf, ReadPersistent)) { 205 if (check_mode(conf, ReadPersistent)) {
203 add_sector(conf, bio->bi_sector, ReadPersistent); 206 add_sector(conf, bio->bi_iter.bi_sector,
207 ReadPersistent);
204 failit = 1; 208 failit = 1;
205 } 209 }
206 if (check_mode(conf, ReadFixable)) { 210 if (check_mode(conf, ReadFixable)) {
207 add_sector(conf, bio->bi_sector, ReadFixable); 211 add_sector(conf, bio->bi_iter.bi_sector,
212 ReadFixable);
208 failit = 1; 213 failit = 1;
209 } 214 }
210 } 215 }
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index f03fabd2b37b..56f534b4a2d2 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -288,65 +288,65 @@ static int linear_stop (struct mddev *mddev)
288 288
289static void linear_make_request(struct mddev *mddev, struct bio *bio) 289static void linear_make_request(struct mddev *mddev, struct bio *bio)
290{ 290{
291 char b[BDEVNAME_SIZE];
291 struct dev_info *tmp_dev; 292 struct dev_info *tmp_dev;
292 sector_t start_sector; 293 struct bio *split;
294 sector_t start_sector, end_sector, data_offset;
293 295
294 if (unlikely(bio->bi_rw & REQ_FLUSH)) { 296 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
295 md_flush_request(mddev, bio); 297 md_flush_request(mddev, bio);
296 return; 298 return;
297 } 299 }
298 300
299 rcu_read_lock(); 301 do {
300 tmp_dev = which_dev(mddev, bio->bi_sector); 302 rcu_read_lock();
301 start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
302
303
304 if (unlikely(bio->bi_sector >= (tmp_dev->end_sector)
305 || (bio->bi_sector < start_sector))) {
306 char b[BDEVNAME_SIZE];
307
308 printk(KERN_ERR
309 "md/linear:%s: make_request: Sector %llu out of bounds on "
310 "dev %s: %llu sectors, offset %llu\n",
311 mdname(mddev),
312 (unsigned long long)bio->bi_sector,
313 bdevname(tmp_dev->rdev->bdev, b),
314 (unsigned long long)tmp_dev->rdev->sectors,
315 (unsigned long long)start_sector);
316 rcu_read_unlock();
317 bio_io_error(bio);
318 return;
319 }
320 if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) {
321 /* This bio crosses a device boundary, so we have to
322 * split it.
323 */
324 struct bio_pair *bp;
325 sector_t end_sector = tmp_dev->end_sector;
326 303
327 rcu_read_unlock(); 304 tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
328 305 start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
329 bp = bio_split(bio, end_sector - bio->bi_sector); 306 end_sector = tmp_dev->end_sector;
307 data_offset = tmp_dev->rdev->data_offset;
308 bio->bi_bdev = tmp_dev->rdev->bdev;
330 309
331 linear_make_request(mddev, &bp->bio1); 310 rcu_read_unlock();
332 linear_make_request(mddev, &bp->bio2);
333 bio_pair_release(bp);
334 return;
335 }
336
337 bio->bi_bdev = tmp_dev->rdev->bdev;
338 bio->bi_sector = bio->bi_sector - start_sector
339 + tmp_dev->rdev->data_offset;
340 rcu_read_unlock();
341 311
342 if (unlikely((bio->bi_rw & REQ_DISCARD) && 312 if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
343 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { 313 bio->bi_iter.bi_sector < start_sector))
344 /* Just ignore it */ 314 goto out_of_bounds;
345 bio_endio(bio, 0); 315
346 return; 316 if (unlikely(bio_end_sector(bio) > end_sector)) {
347 } 317 /* This bio crosses a device boundary, so we have to
318 * split it.
319 */
320 split = bio_split(bio, end_sector -
321 bio->bi_iter.bi_sector,
322 GFP_NOIO, fs_bio_set);
323 bio_chain(split, bio);
324 } else {
325 split = bio;
326 }
348 327
349 generic_make_request(bio); 328 split->bi_iter.bi_sector = split->bi_iter.bi_sector -
329 start_sector + data_offset;
330
331 if (unlikely((split->bi_rw & REQ_DISCARD) &&
332 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
333 /* Just ignore it */
334 bio_endio(split, 0);
335 } else
336 generic_make_request(split);
337 } while (split != bio);
338 return;
339
340out_of_bounds:
341 printk(KERN_ERR
342 "md/linear:%s: make_request: Sector %llu out of bounds on "
343 "dev %s: %llu sectors, offset %llu\n",
344 mdname(mddev),
345 (unsigned long long)bio->bi_iter.bi_sector,
346 bdevname(tmp_dev->rdev->bdev, b),
347 (unsigned long long)tmp_dev->rdev->sectors,
348 (unsigned long long)start_sector);
349 bio_io_error(bio);
350} 350}
351 351
352static void linear_status (struct seq_file *seq, struct mddev *mddev) 352static void linear_status (struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 40c531359a15..4ad5cc4e63e8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -393,7 +393,7 @@ static void md_submit_flush_data(struct work_struct *ws)
393 struct mddev *mddev = container_of(ws, struct mddev, flush_work); 393 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
394 struct bio *bio = mddev->flush_bio; 394 struct bio *bio = mddev->flush_bio;
395 395
396 if (bio->bi_size == 0) 396 if (bio->bi_iter.bi_size == 0)
397 /* an empty barrier - all done */ 397 /* an empty barrier - all done */
398 bio_endio(bio, 0); 398 bio_endio(bio, 0);
399 else { 399 else {
@@ -754,7 +754,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
754 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); 754 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
755 755
756 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev; 756 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
757 bio->bi_sector = sector; 757 bio->bi_iter.bi_sector = sector;
758 bio_add_page(bio, page, size, 0); 758 bio_add_page(bio, page, size, 0);
759 bio->bi_private = rdev; 759 bio->bi_private = rdev;
760 bio->bi_end_io = super_written; 760 bio->bi_end_io = super_written;
@@ -782,18 +782,16 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
782 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev); 782 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
783 int ret; 783 int ret;
784 784
785 rw |= REQ_SYNC;
786
787 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ? 785 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
788 rdev->meta_bdev : rdev->bdev; 786 rdev->meta_bdev : rdev->bdev;
789 if (metadata_op) 787 if (metadata_op)
790 bio->bi_sector = sector + rdev->sb_start; 788 bio->bi_iter.bi_sector = sector + rdev->sb_start;
791 else if (rdev->mddev->reshape_position != MaxSector && 789 else if (rdev->mddev->reshape_position != MaxSector &&
792 (rdev->mddev->reshape_backwards == 790 (rdev->mddev->reshape_backwards ==
793 (sector >= rdev->mddev->reshape_position))) 791 (sector >= rdev->mddev->reshape_position)))
794 bio->bi_sector = sector + rdev->new_data_offset; 792 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
795 else 793 else
796 bio->bi_sector = sector + rdev->data_offset; 794 bio->bi_iter.bi_sector = sector + rdev->data_offset;
797 bio_add_page(bio, page, size, 0); 795 bio_add_page(bio, page, size, 0);
798 submit_bio_wait(rw, bio); 796 submit_bio_wait(rw, bio);
799 797
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 1642eae75a33..849ad39f547b 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -100,7 +100,7 @@ static void multipath_end_request(struct bio *bio, int error)
100 md_error (mp_bh->mddev, rdev); 100 md_error (mp_bh->mddev, rdev);
101 printk(KERN_ERR "multipath: %s: rescheduling sector %llu\n", 101 printk(KERN_ERR "multipath: %s: rescheduling sector %llu\n",
102 bdevname(rdev->bdev,b), 102 bdevname(rdev->bdev,b),
103 (unsigned long long)bio->bi_sector); 103 (unsigned long long)bio->bi_iter.bi_sector);
104 multipath_reschedule_retry(mp_bh); 104 multipath_reschedule_retry(mp_bh);
105 } else 105 } else
106 multipath_end_bh_io(mp_bh, error); 106 multipath_end_bh_io(mp_bh, error);
@@ -132,7 +132,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
132 multipath = conf->multipaths + mp_bh->path; 132 multipath = conf->multipaths + mp_bh->path;
133 133
134 mp_bh->bio = *bio; 134 mp_bh->bio = *bio;
135 mp_bh->bio.bi_sector += multipath->rdev->data_offset; 135 mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
136 mp_bh->bio.bi_bdev = multipath->rdev->bdev; 136 mp_bh->bio.bi_bdev = multipath->rdev->bdev;
137 mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT; 137 mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
138 mp_bh->bio.bi_end_io = multipath_end_request; 138 mp_bh->bio.bi_end_io = multipath_end_request;
@@ -355,21 +355,22 @@ static void multipathd(struct md_thread *thread)
355 spin_unlock_irqrestore(&conf->device_lock, flags); 355 spin_unlock_irqrestore(&conf->device_lock, flags);
356 356
357 bio = &mp_bh->bio; 357 bio = &mp_bh->bio;
358 bio->bi_sector = mp_bh->master_bio->bi_sector; 358 bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
359 359
360 if ((mp_bh->path = multipath_map (conf))<0) { 360 if ((mp_bh->path = multipath_map (conf))<0) {
361 printk(KERN_ALERT "multipath: %s: unrecoverable IO read" 361 printk(KERN_ALERT "multipath: %s: unrecoverable IO read"
362 " error for block %llu\n", 362 " error for block %llu\n",
363 bdevname(bio->bi_bdev,b), 363 bdevname(bio->bi_bdev,b),
364 (unsigned long long)bio->bi_sector); 364 (unsigned long long)bio->bi_iter.bi_sector);
365 multipath_end_bh_io(mp_bh, -EIO); 365 multipath_end_bh_io(mp_bh, -EIO);
366 } else { 366 } else {
367 printk(KERN_ERR "multipath: %s: redirecting sector %llu" 367 printk(KERN_ERR "multipath: %s: redirecting sector %llu"
368 " to another IO path\n", 368 " to another IO path\n",
369 bdevname(bio->bi_bdev,b), 369 bdevname(bio->bi_bdev,b),
370 (unsigned long long)bio->bi_sector); 370 (unsigned long long)bio->bi_iter.bi_sector);
371 *bio = *(mp_bh->master_bio); 371 *bio = *(mp_bh->master_bio);
372 bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; 372 bio->bi_iter.bi_sector +=
373 conf->multipaths[mp_bh->path].rdev->data_offset;
373 bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; 374 bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev;
374 bio->bi_rw |= REQ_FAILFAST_TRANSPORT; 375 bio->bi_rw |= REQ_FAILFAST_TRANSPORT;
375 bio->bi_end_io = multipath_end_request; 376 bio->bi_end_io = multipath_end_request;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c4d420b7d2f4..407a99e46f69 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -501,10 +501,11 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
501 unsigned int chunk_sects, struct bio *bio) 501 unsigned int chunk_sects, struct bio *bio)
502{ 502{
503 if (likely(is_power_of_2(chunk_sects))) { 503 if (likely(is_power_of_2(chunk_sects))) {
504 return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) 504 return chunk_sects >=
505 ((bio->bi_iter.bi_sector & (chunk_sects-1))
505 + bio_sectors(bio)); 506 + bio_sectors(bio));
506 } else{ 507 } else{
507 sector_t sector = bio->bi_sector; 508 sector_t sector = bio->bi_iter.bi_sector;
508 return chunk_sects >= (sector_div(sector, chunk_sects) 509 return chunk_sects >= (sector_div(sector, chunk_sects)
509 + bio_sectors(bio)); 510 + bio_sectors(bio));
510 } 511 }
@@ -512,64 +513,44 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
512 513
513static void raid0_make_request(struct mddev *mddev, struct bio *bio) 514static void raid0_make_request(struct mddev *mddev, struct bio *bio)
514{ 515{
515 unsigned int chunk_sects;
516 sector_t sector_offset;
517 struct strip_zone *zone; 516 struct strip_zone *zone;
518 struct md_rdev *tmp_dev; 517 struct md_rdev *tmp_dev;
518 struct bio *split;
519 519
520 if (unlikely(bio->bi_rw & REQ_FLUSH)) { 520 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
521 md_flush_request(mddev, bio); 521 md_flush_request(mddev, bio);
522 return; 522 return;
523 } 523 }
524 524
525 chunk_sects = mddev->chunk_sectors; 525 do {
526 if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { 526 sector_t sector = bio->bi_iter.bi_sector;
527 sector_t sector = bio->bi_sector; 527 unsigned chunk_sects = mddev->chunk_sectors;
528 struct bio_pair *bp;
529 /* Sanity check -- queue functions should prevent this happening */
530 if (bio_segments(bio) > 1)
531 goto bad_map;
532 /* This is a one page bio that upper layers
533 * refuse to split for us, so we need to split it.
534 */
535 if (likely(is_power_of_2(chunk_sects)))
536 bp = bio_split(bio, chunk_sects - (sector &
537 (chunk_sects-1)));
538 else
539 bp = bio_split(bio, chunk_sects -
540 sector_div(sector, chunk_sects));
541 raid0_make_request(mddev, &bp->bio1);
542 raid0_make_request(mddev, &bp->bio2);
543 bio_pair_release(bp);
544 return;
545 }
546 528
547 sector_offset = bio->bi_sector; 529 unsigned sectors = chunk_sects -
548 zone = find_zone(mddev->private, &sector_offset); 530 (likely(is_power_of_2(chunk_sects))
549 tmp_dev = map_sector(mddev, zone, bio->bi_sector, 531 ? (sector & (chunk_sects-1))
550 &sector_offset); 532 : sector_div(sector, chunk_sects));
551 bio->bi_bdev = tmp_dev->bdev;
552 bio->bi_sector = sector_offset + zone->dev_start +
553 tmp_dev->data_offset;
554
555 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
556 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
557 /* Just ignore it */
558 bio_endio(bio, 0);
559 return;
560 }
561 533
562 generic_make_request(bio); 534 if (sectors < bio_sectors(bio)) {
563 return; 535 split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
564 536 bio_chain(split, bio);
565bad_map: 537 } else {
566 printk("md/raid0:%s: make_request bug: can't convert block across chunks" 538 split = bio;
567 " or bigger than %dk %llu %d\n", 539 }
568 mdname(mddev), chunk_sects / 2,
569 (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
570 540
571 bio_io_error(bio); 541 zone = find_zone(mddev->private, &sector);
572 return; 542 tmp_dev = map_sector(mddev, zone, sector, &sector);
543 split->bi_bdev = tmp_dev->bdev;
544 split->bi_iter.bi_sector = sector + zone->dev_start +
545 tmp_dev->data_offset;
546
547 if (unlikely((split->bi_rw & REQ_DISCARD) &&
548 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
549 /* Just ignore it */
550 bio_endio(split, 0);
551 } else
552 generic_make_request(split);
553 } while (split != bio);
573} 554}
574 555
575static void raid0_status(struct seq_file *seq, struct mddev *mddev) 556static void raid0_status(struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a49cfcc7a343..fd3a2a14b587 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -229,7 +229,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
229 int done; 229 int done;
230 struct r1conf *conf = r1_bio->mddev->private; 230 struct r1conf *conf = r1_bio->mddev->private;
231 sector_t start_next_window = r1_bio->start_next_window; 231 sector_t start_next_window = r1_bio->start_next_window;
232 sector_t bi_sector = bio->bi_sector; 232 sector_t bi_sector = bio->bi_iter.bi_sector;
233 233
234 if (bio->bi_phys_segments) { 234 if (bio->bi_phys_segments) {
235 unsigned long flags; 235 unsigned long flags;
@@ -265,9 +265,8 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
265 if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { 265 if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
266 pr_debug("raid1: sync end %s on sectors %llu-%llu\n", 266 pr_debug("raid1: sync end %s on sectors %llu-%llu\n",
267 (bio_data_dir(bio) == WRITE) ? "write" : "read", 267 (bio_data_dir(bio) == WRITE) ? "write" : "read",
268 (unsigned long long) bio->bi_sector, 268 (unsigned long long) bio->bi_iter.bi_sector,
269 (unsigned long long) bio->bi_sector + 269 (unsigned long long) bio_end_sector(bio) - 1);
270 bio_sectors(bio) - 1);
271 270
272 call_bio_endio(r1_bio); 271 call_bio_endio(r1_bio);
273 } 272 }
@@ -466,9 +465,8 @@ static void raid1_end_write_request(struct bio *bio, int error)
466 struct bio *mbio = r1_bio->master_bio; 465 struct bio *mbio = r1_bio->master_bio;
467 pr_debug("raid1: behind end write sectors" 466 pr_debug("raid1: behind end write sectors"
468 " %llu-%llu\n", 467 " %llu-%llu\n",
469 (unsigned long long) mbio->bi_sector, 468 (unsigned long long) mbio->bi_iter.bi_sector,
470 (unsigned long long) mbio->bi_sector + 469 (unsigned long long) bio_end_sector(mbio) - 1);
471 bio_sectors(mbio) - 1);
472 call_bio_endio(r1_bio); 470 call_bio_endio(r1_bio);
473 } 471 }
474 } 472 }
@@ -875,7 +873,7 @@ static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
875 else if ((conf->next_resync - RESYNC_WINDOW_SECTORS 873 else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
876 >= bio_end_sector(bio)) || 874 >= bio_end_sector(bio)) ||
877 (conf->next_resync + NEXT_NORMALIO_DISTANCE 875 (conf->next_resync + NEXT_NORMALIO_DISTANCE
878 <= bio->bi_sector)) 876 <= bio->bi_iter.bi_sector))
879 wait = false; 877 wait = false;
880 else 878 else
881 wait = true; 879 wait = true;
@@ -913,14 +911,14 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
913 911
914 if (bio && bio_data_dir(bio) == WRITE) { 912 if (bio && bio_data_dir(bio) == WRITE) {
915 if (conf->next_resync + NEXT_NORMALIO_DISTANCE 913 if (conf->next_resync + NEXT_NORMALIO_DISTANCE
916 <= bio->bi_sector) { 914 <= bio->bi_iter.bi_sector) {
917 if (conf->start_next_window == MaxSector) 915 if (conf->start_next_window == MaxSector)
918 conf->start_next_window = 916 conf->start_next_window =
919 conf->next_resync + 917 conf->next_resync +
920 NEXT_NORMALIO_DISTANCE; 918 NEXT_NORMALIO_DISTANCE;
921 919
922 if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE) 920 if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
923 <= bio->bi_sector) 921 <= bio->bi_iter.bi_sector)
924 conf->next_window_requests++; 922 conf->next_window_requests++;
925 else 923 else
926 conf->current_window_requests++; 924 conf->current_window_requests++;
@@ -1027,7 +1025,8 @@ do_sync_io:
1027 if (bvecs[i].bv_page) 1025 if (bvecs[i].bv_page)
1028 put_page(bvecs[i].bv_page); 1026 put_page(bvecs[i].bv_page);
1029 kfree(bvecs); 1027 kfree(bvecs);
1030 pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); 1028 pr_debug("%dB behind alloc failed, doing sync I/O\n",
1029 bio->bi_iter.bi_size);
1031} 1030}
1032 1031
1033struct raid1_plug_cb { 1032struct raid1_plug_cb {
@@ -1107,7 +1106,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1107 1106
1108 if (bio_data_dir(bio) == WRITE && 1107 if (bio_data_dir(bio) == WRITE &&
1109 bio_end_sector(bio) > mddev->suspend_lo && 1108 bio_end_sector(bio) > mddev->suspend_lo &&
1110 bio->bi_sector < mddev->suspend_hi) { 1109 bio->bi_iter.bi_sector < mddev->suspend_hi) {
1111 /* As the suspend_* range is controlled by 1110 /* As the suspend_* range is controlled by
1112 * userspace, we want an interruptible 1111 * userspace, we want an interruptible
1113 * wait. 1112 * wait.
@@ -1118,7 +1117,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1118 prepare_to_wait(&conf->wait_barrier, 1117 prepare_to_wait(&conf->wait_barrier,
1119 &w, TASK_INTERRUPTIBLE); 1118 &w, TASK_INTERRUPTIBLE);
1120 if (bio_end_sector(bio) <= mddev->suspend_lo || 1119 if (bio_end_sector(bio) <= mddev->suspend_lo ||
1121 bio->bi_sector >= mddev->suspend_hi) 1120 bio->bi_iter.bi_sector >= mddev->suspend_hi)
1122 break; 1121 break;
1123 schedule(); 1122 schedule();
1124 } 1123 }
@@ -1140,7 +1139,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1140 r1_bio->sectors = bio_sectors(bio); 1139 r1_bio->sectors = bio_sectors(bio);
1141 r1_bio->state = 0; 1140 r1_bio->state = 0;
1142 r1_bio->mddev = mddev; 1141 r1_bio->mddev = mddev;
1143 r1_bio->sector = bio->bi_sector; 1142 r1_bio->sector = bio->bi_iter.bi_sector;
1144 1143
1145 /* We might need to issue multiple reads to different 1144 /* We might need to issue multiple reads to different
1146 * devices if there are bad blocks around, so we keep 1145 * devices if there are bad blocks around, so we keep
@@ -1180,12 +1179,13 @@ read_again:
1180 r1_bio->read_disk = rdisk; 1179 r1_bio->read_disk = rdisk;
1181 1180
1182 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); 1181 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1183 bio_trim(read_bio, r1_bio->sector - bio->bi_sector, 1182 bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
1184 max_sectors); 1183 max_sectors);
1185 1184
1186 r1_bio->bios[rdisk] = read_bio; 1185 r1_bio->bios[rdisk] = read_bio;
1187 1186
1188 read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset; 1187 read_bio->bi_iter.bi_sector = r1_bio->sector +
1188 mirror->rdev->data_offset;
1189 read_bio->bi_bdev = mirror->rdev->bdev; 1189 read_bio->bi_bdev = mirror->rdev->bdev;
1190 read_bio->bi_end_io = raid1_end_read_request; 1190 read_bio->bi_end_io = raid1_end_read_request;
1191 read_bio->bi_rw = READ | do_sync; 1191 read_bio->bi_rw = READ | do_sync;
@@ -1197,7 +1197,7 @@ read_again:
1197 */ 1197 */
1198 1198
1199 sectors_handled = (r1_bio->sector + max_sectors 1199 sectors_handled = (r1_bio->sector + max_sectors
1200 - bio->bi_sector); 1200 - bio->bi_iter.bi_sector);
1201 r1_bio->sectors = max_sectors; 1201 r1_bio->sectors = max_sectors;
1202 spin_lock_irq(&conf->device_lock); 1202 spin_lock_irq(&conf->device_lock);
1203 if (bio->bi_phys_segments == 0) 1203 if (bio->bi_phys_segments == 0)
@@ -1218,7 +1218,8 @@ read_again:
1218 r1_bio->sectors = bio_sectors(bio) - sectors_handled; 1218 r1_bio->sectors = bio_sectors(bio) - sectors_handled;
1219 r1_bio->state = 0; 1219 r1_bio->state = 0;
1220 r1_bio->mddev = mddev; 1220 r1_bio->mddev = mddev;
1221 r1_bio->sector = bio->bi_sector + sectors_handled; 1221 r1_bio->sector = bio->bi_iter.bi_sector +
1222 sectors_handled;
1222 goto read_again; 1223 goto read_again;
1223 } else 1224 } else
1224 generic_make_request(read_bio); 1225 generic_make_request(read_bio);
@@ -1321,7 +1322,7 @@ read_again:
1321 if (r1_bio->bios[j]) 1322 if (r1_bio->bios[j])
1322 rdev_dec_pending(conf->mirrors[j].rdev, mddev); 1323 rdev_dec_pending(conf->mirrors[j].rdev, mddev);
1323 r1_bio->state = 0; 1324 r1_bio->state = 0;
1324 allow_barrier(conf, start_next_window, bio->bi_sector); 1325 allow_barrier(conf, start_next_window, bio->bi_iter.bi_sector);
1325 md_wait_for_blocked_rdev(blocked_rdev, mddev); 1326 md_wait_for_blocked_rdev(blocked_rdev, mddev);
1326 start_next_window = wait_barrier(conf, bio); 1327 start_next_window = wait_barrier(conf, bio);
1327 /* 1328 /*
@@ -1348,7 +1349,7 @@ read_again:
1348 bio->bi_phys_segments++; 1349 bio->bi_phys_segments++;
1349 spin_unlock_irq(&conf->device_lock); 1350 spin_unlock_irq(&conf->device_lock);
1350 } 1351 }
1351 sectors_handled = r1_bio->sector + max_sectors - bio->bi_sector; 1352 sectors_handled = r1_bio->sector + max_sectors - bio->bi_iter.bi_sector;
1352 1353
1353 atomic_set(&r1_bio->remaining, 1); 1354 atomic_set(&r1_bio->remaining, 1);
1354 atomic_set(&r1_bio->behind_remaining, 0); 1355 atomic_set(&r1_bio->behind_remaining, 0);
@@ -1360,7 +1361,7 @@ read_again:
1360 continue; 1361 continue;
1361 1362
1362 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); 1363 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1363 bio_trim(mbio, r1_bio->sector - bio->bi_sector, max_sectors); 1364 bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
1364 1365
1365 if (first_clone) { 1366 if (first_clone) {
1366 /* do behind I/O ? 1367 /* do behind I/O ?
@@ -1394,7 +1395,7 @@ read_again:
1394 1395
1395 r1_bio->bios[i] = mbio; 1396 r1_bio->bios[i] = mbio;
1396 1397
1397 mbio->bi_sector = (r1_bio->sector + 1398 mbio->bi_iter.bi_sector = (r1_bio->sector +
1398 conf->mirrors[i].rdev->data_offset); 1399 conf->mirrors[i].rdev->data_offset);
1399 mbio->bi_bdev = conf->mirrors[i].rdev->bdev; 1400 mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1400 mbio->bi_end_io = raid1_end_write_request; 1401 mbio->bi_end_io = raid1_end_write_request;
@@ -1434,7 +1435,7 @@ read_again:
1434 r1_bio->sectors = bio_sectors(bio) - sectors_handled; 1435 r1_bio->sectors = bio_sectors(bio) - sectors_handled;
1435 r1_bio->state = 0; 1436 r1_bio->state = 0;
1436 r1_bio->mddev = mddev; 1437 r1_bio->mddev = mddev;
1437 r1_bio->sector = bio->bi_sector + sectors_handled; 1438 r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1438 goto retry_write; 1439 goto retry_write;
1439 } 1440 }
1440 1441
@@ -1958,14 +1959,14 @@ static int process_checks(struct r1bio *r1_bio)
1958 /* fixup the bio for reuse */ 1959 /* fixup the bio for reuse */
1959 bio_reset(b); 1960 bio_reset(b);
1960 b->bi_vcnt = vcnt; 1961 b->bi_vcnt = vcnt;
1961 b->bi_size = r1_bio->sectors << 9; 1962 b->bi_iter.bi_size = r1_bio->sectors << 9;
1962 b->bi_sector = r1_bio->sector + 1963 b->bi_iter.bi_sector = r1_bio->sector +
1963 conf->mirrors[i].rdev->data_offset; 1964 conf->mirrors[i].rdev->data_offset;
1964 b->bi_bdev = conf->mirrors[i].rdev->bdev; 1965 b->bi_bdev = conf->mirrors[i].rdev->bdev;
1965 b->bi_end_io = end_sync_read; 1966 b->bi_end_io = end_sync_read;
1966 b->bi_private = r1_bio; 1967 b->bi_private = r1_bio;
1967 1968
1968 size = b->bi_size; 1969 size = b->bi_iter.bi_size;
1969 for (j = 0; j < vcnt ; j++) { 1970 for (j = 0; j < vcnt ; j++) {
1970 struct bio_vec *bi; 1971 struct bio_vec *bi;
1971 bi = &b->bi_io_vec[j]; 1972 bi = &b->bi_io_vec[j];
@@ -2220,11 +2221,11 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
2220 } 2221 }
2221 2222
2222 wbio->bi_rw = WRITE; 2223 wbio->bi_rw = WRITE;
2223 wbio->bi_sector = r1_bio->sector; 2224 wbio->bi_iter.bi_sector = r1_bio->sector;
2224 wbio->bi_size = r1_bio->sectors << 9; 2225 wbio->bi_iter.bi_size = r1_bio->sectors << 9;
2225 2226
2226 bio_trim(wbio, sector - r1_bio->sector, sectors); 2227 bio_trim(wbio, sector - r1_bio->sector, sectors);
2227 wbio->bi_sector += rdev->data_offset; 2228 wbio->bi_iter.bi_sector += rdev->data_offset;
2228 wbio->bi_bdev = rdev->bdev; 2229 wbio->bi_bdev = rdev->bdev;
2229 if (submit_bio_wait(WRITE, wbio) == 0) 2230 if (submit_bio_wait(WRITE, wbio) == 0)
2230 /* failure! */ 2231 /* failure! */
@@ -2338,7 +2339,8 @@ read_more:
2338 } 2339 }
2339 r1_bio->read_disk = disk; 2340 r1_bio->read_disk = disk;
2340 bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev); 2341 bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
2341 bio_trim(bio, r1_bio->sector - bio->bi_sector, max_sectors); 2342 bio_trim(bio, r1_bio->sector - bio->bi_iter.bi_sector,
2343 max_sectors);
2342 r1_bio->bios[r1_bio->read_disk] = bio; 2344 r1_bio->bios[r1_bio->read_disk] = bio;
2343 rdev = conf->mirrors[disk].rdev; 2345 rdev = conf->mirrors[disk].rdev;
2344 printk_ratelimited(KERN_ERR 2346 printk_ratelimited(KERN_ERR
@@ -2347,7 +2349,7 @@ read_more:
2347 mdname(mddev), 2349 mdname(mddev),
2348 (unsigned long long)r1_bio->sector, 2350 (unsigned long long)r1_bio->sector,
2349 bdevname(rdev->bdev, b)); 2351 bdevname(rdev->bdev, b));
2350 bio->bi_sector = r1_bio->sector + rdev->data_offset; 2352 bio->bi_iter.bi_sector = r1_bio->sector + rdev->data_offset;
2351 bio->bi_bdev = rdev->bdev; 2353 bio->bi_bdev = rdev->bdev;
2352 bio->bi_end_io = raid1_end_read_request; 2354 bio->bi_end_io = raid1_end_read_request;
2353 bio->bi_rw = READ | do_sync; 2355 bio->bi_rw = READ | do_sync;
@@ -2356,7 +2358,7 @@ read_more:
2356 /* Drat - have to split this up more */ 2358 /* Drat - have to split this up more */
2357 struct bio *mbio = r1_bio->master_bio; 2359 struct bio *mbio = r1_bio->master_bio;
2358 int sectors_handled = (r1_bio->sector + max_sectors 2360 int sectors_handled = (r1_bio->sector + max_sectors
2359 - mbio->bi_sector); 2361 - mbio->bi_iter.bi_sector);
2360 r1_bio->sectors = max_sectors; 2362 r1_bio->sectors = max_sectors;
2361 spin_lock_irq(&conf->device_lock); 2363 spin_lock_irq(&conf->device_lock);
2362 if (mbio->bi_phys_segments == 0) 2364 if (mbio->bi_phys_segments == 0)
@@ -2374,7 +2376,8 @@ read_more:
2374 r1_bio->state = 0; 2376 r1_bio->state = 0;
2375 set_bit(R1BIO_ReadError, &r1_bio->state); 2377 set_bit(R1BIO_ReadError, &r1_bio->state);
2376 r1_bio->mddev = mddev; 2378 r1_bio->mddev = mddev;
2377 r1_bio->sector = mbio->bi_sector + sectors_handled; 2379 r1_bio->sector = mbio->bi_iter.bi_sector +
2380 sectors_handled;
2378 2381
2379 goto read_more; 2382 goto read_more;
2380 } else 2383 } else
@@ -2598,7 +2601,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2598 } 2601 }
2599 if (bio->bi_end_io) { 2602 if (bio->bi_end_io) {
2600 atomic_inc(&rdev->nr_pending); 2603 atomic_inc(&rdev->nr_pending);
2601 bio->bi_sector = sector_nr + rdev->data_offset; 2604 bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
2602 bio->bi_bdev = rdev->bdev; 2605 bio->bi_bdev = rdev->bdev;
2603 bio->bi_private = r1_bio; 2606 bio->bi_private = r1_bio;
2604 } 2607 }
@@ -2698,7 +2701,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2698 continue; 2701 continue;
2699 /* remove last page from this bio */ 2702 /* remove last page from this bio */
2700 bio->bi_vcnt--; 2703 bio->bi_vcnt--;
2701 bio->bi_size -= len; 2704 bio->bi_iter.bi_size -= len;
2702 bio->bi_flags &= ~(1<< BIO_SEG_VALID); 2705 bio->bi_flags &= ~(1<< BIO_SEG_VALID);
2703 } 2706 }
2704 goto bio_full; 2707 goto bio_full;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8d39d63281b9..33fc408e5eac 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1152,14 +1152,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1152 kfree(plug); 1152 kfree(plug);
1153} 1153}
1154 1154
1155static void make_request(struct mddev *mddev, struct bio * bio) 1155static void __make_request(struct mddev *mddev, struct bio *bio)
1156{ 1156{
1157 struct r10conf *conf = mddev->private; 1157 struct r10conf *conf = mddev->private;
1158 struct r10bio *r10_bio; 1158 struct r10bio *r10_bio;
1159 struct bio *read_bio; 1159 struct bio *read_bio;
1160 int i; 1160 int i;
1161 sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
1162 int chunk_sects = chunk_mask + 1;
1163 const int rw = bio_data_dir(bio); 1161 const int rw = bio_data_dir(bio);
1164 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 1162 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
1165 const unsigned long do_fua = (bio->bi_rw & REQ_FUA); 1163 const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
@@ -1174,88 +1172,27 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1174 int max_sectors; 1172 int max_sectors;
1175 int sectors; 1173 int sectors;
1176 1174
1177 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
1178 md_flush_request(mddev, bio);
1179 return;
1180 }
1181
1182 /* If this request crosses a chunk boundary, we need to
1183 * split it. This will only happen for 1 PAGE (or less) requests.
1184 */
1185 if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio)
1186 > chunk_sects
1187 && (conf->geo.near_copies < conf->geo.raid_disks
1188 || conf->prev.near_copies < conf->prev.raid_disks))) {
1189 struct bio_pair *bp;
1190 /* Sanity check -- queue functions should prevent this happening */
1191 if (bio_segments(bio) > 1)
1192 goto bad_map;
1193 /* This is a one page bio that upper layers
1194 * refuse to split for us, so we need to split it.
1195 */
1196 bp = bio_split(bio,
1197 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
1198
1199 /* Each of these 'make_request' calls will call 'wait_barrier'.
1200 * If the first succeeds but the second blocks due to the resync
1201 * thread raising the barrier, we will deadlock because the
1202 * IO to the underlying device will be queued in generic_make_request
1203 * and will never complete, so will never reduce nr_pending.
1204 * So increment nr_waiting here so no new raise_barriers will
1205 * succeed, and so the second wait_barrier cannot block.
1206 */
1207 spin_lock_irq(&conf->resync_lock);
1208 conf->nr_waiting++;
1209 spin_unlock_irq(&conf->resync_lock);
1210
1211 make_request(mddev, &bp->bio1);
1212 make_request(mddev, &bp->bio2);
1213
1214 spin_lock_irq(&conf->resync_lock);
1215 conf->nr_waiting--;
1216 wake_up(&conf->wait_barrier);
1217 spin_unlock_irq(&conf->resync_lock);
1218
1219 bio_pair_release(bp);
1220 return;
1221 bad_map:
1222 printk("md/raid10:%s: make_request bug: can't convert block across chunks"
1223 " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
1224 (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
1225
1226 bio_io_error(bio);
1227 return;
1228 }
1229
1230 md_write_start(mddev, bio);
1231
1232 /*
1233 * Register the new request and wait if the reconstruction
1234 * thread has put up a bar for new requests.
1235 * Continue immediately if no resync is active currently.
1236 */
1237 wait_barrier(conf);
1238
1239 sectors = bio_sectors(bio); 1175 sectors = bio_sectors(bio);
1240 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 1176 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1241 bio->bi_sector < conf->reshape_progress && 1177 bio->bi_iter.bi_sector < conf->reshape_progress &&
1242 bio->bi_sector + sectors > conf->reshape_progress) { 1178 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
1243 /* IO spans the reshape position. Need to wait for 1179 /* IO spans the reshape position. Need to wait for
1244 * reshape to pass 1180 * reshape to pass
1245 */ 1181 */
1246 allow_barrier(conf); 1182 allow_barrier(conf);
1247 wait_event(conf->wait_barrier, 1183 wait_event(conf->wait_barrier,
1248 conf->reshape_progress <= bio->bi_sector || 1184 conf->reshape_progress <= bio->bi_iter.bi_sector ||
1249 conf->reshape_progress >= bio->bi_sector + sectors); 1185 conf->reshape_progress >= bio->bi_iter.bi_sector +
1186 sectors);
1250 wait_barrier(conf); 1187 wait_barrier(conf);
1251 } 1188 }
1252 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 1189 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1253 bio_data_dir(bio) == WRITE && 1190 bio_data_dir(bio) == WRITE &&
1254 (mddev->reshape_backwards 1191 (mddev->reshape_backwards
1255 ? (bio->bi_sector < conf->reshape_safe && 1192 ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
1256 bio->bi_sector + sectors > conf->reshape_progress) 1193 bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
1257 : (bio->bi_sector + sectors > conf->reshape_safe && 1194 : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
1258 bio->bi_sector < conf->reshape_progress))) { 1195 bio->bi_iter.bi_sector < conf->reshape_progress))) {
1259 /* Need to update reshape_position in metadata */ 1196 /* Need to update reshape_position in metadata */
1260 mddev->reshape_position = conf->reshape_progress; 1197 mddev->reshape_position = conf->reshape_progress;
1261 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1198 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1273,7 +1210,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1273 r10_bio->sectors = sectors; 1210 r10_bio->sectors = sectors;
1274 1211
1275 r10_bio->mddev = mddev; 1212 r10_bio->mddev = mddev;
1276 r10_bio->sector = bio->bi_sector; 1213 r10_bio->sector = bio->bi_iter.bi_sector;
1277 r10_bio->state = 0; 1214 r10_bio->state = 0;
1278 1215
1279 /* We might need to issue multiple reads to different 1216 /* We might need to issue multiple reads to different
@@ -1302,13 +1239,13 @@ read_again:
1302 slot = r10_bio->read_slot; 1239 slot = r10_bio->read_slot;
1303 1240
1304 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); 1241 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1305 bio_trim(read_bio, r10_bio->sector - bio->bi_sector, 1242 bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
1306 max_sectors); 1243 max_sectors);
1307 1244
1308 r10_bio->devs[slot].bio = read_bio; 1245 r10_bio->devs[slot].bio = read_bio;
1309 r10_bio->devs[slot].rdev = rdev; 1246 r10_bio->devs[slot].rdev = rdev;
1310 1247
1311 read_bio->bi_sector = r10_bio->devs[slot].addr + 1248 read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
1312 choose_data_offset(r10_bio, rdev); 1249 choose_data_offset(r10_bio, rdev);
1313 read_bio->bi_bdev = rdev->bdev; 1250 read_bio->bi_bdev = rdev->bdev;
1314 read_bio->bi_end_io = raid10_end_read_request; 1251 read_bio->bi_end_io = raid10_end_read_request;
@@ -1320,7 +1257,7 @@ read_again:
1320 * need another r10_bio. 1257 * need another r10_bio.
1321 */ 1258 */
1322 sectors_handled = (r10_bio->sector + max_sectors 1259 sectors_handled = (r10_bio->sector + max_sectors
1323 - bio->bi_sector); 1260 - bio->bi_iter.bi_sector);
1324 r10_bio->sectors = max_sectors; 1261 r10_bio->sectors = max_sectors;
1325 spin_lock_irq(&conf->device_lock); 1262 spin_lock_irq(&conf->device_lock);
1326 if (bio->bi_phys_segments == 0) 1263 if (bio->bi_phys_segments == 0)
@@ -1341,7 +1278,8 @@ read_again:
1341 r10_bio->sectors = bio_sectors(bio) - sectors_handled; 1278 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1342 r10_bio->state = 0; 1279 r10_bio->state = 0;
1343 r10_bio->mddev = mddev; 1280 r10_bio->mddev = mddev;
1344 r10_bio->sector = bio->bi_sector + sectors_handled; 1281 r10_bio->sector = bio->bi_iter.bi_sector +
1282 sectors_handled;
1345 goto read_again; 1283 goto read_again;
1346 } else 1284 } else
1347 generic_make_request(read_bio); 1285 generic_make_request(read_bio);
@@ -1499,7 +1437,8 @@ retry_write:
1499 bio->bi_phys_segments++; 1437 bio->bi_phys_segments++;
1500 spin_unlock_irq(&conf->device_lock); 1438 spin_unlock_irq(&conf->device_lock);
1501 } 1439 }
1502 sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector; 1440 sectors_handled = r10_bio->sector + max_sectors -
1441 bio->bi_iter.bi_sector;
1503 1442
1504 atomic_set(&r10_bio->remaining, 1); 1443 atomic_set(&r10_bio->remaining, 1);
1505 bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); 1444 bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
@@ -1510,11 +1449,11 @@ retry_write:
1510 if (r10_bio->devs[i].bio) { 1449 if (r10_bio->devs[i].bio) {
1511 struct md_rdev *rdev = conf->mirrors[d].rdev; 1450 struct md_rdev *rdev = conf->mirrors[d].rdev;
1512 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); 1451 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1513 bio_trim(mbio, r10_bio->sector - bio->bi_sector, 1452 bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
1514 max_sectors); 1453 max_sectors);
1515 r10_bio->devs[i].bio = mbio; 1454 r10_bio->devs[i].bio = mbio;
1516 1455
1517 mbio->bi_sector = (r10_bio->devs[i].addr+ 1456 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
1518 choose_data_offset(r10_bio, 1457 choose_data_offset(r10_bio,
1519 rdev)); 1458 rdev));
1520 mbio->bi_bdev = rdev->bdev; 1459 mbio->bi_bdev = rdev->bdev;
@@ -1553,11 +1492,11 @@ retry_write:
1553 rdev = conf->mirrors[d].rdev; 1492 rdev = conf->mirrors[d].rdev;
1554 } 1493 }
1555 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); 1494 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1556 bio_trim(mbio, r10_bio->sector - bio->bi_sector, 1495 bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
1557 max_sectors); 1496 max_sectors);
1558 r10_bio->devs[i].repl_bio = mbio; 1497 r10_bio->devs[i].repl_bio = mbio;
1559 1498
1560 mbio->bi_sector = (r10_bio->devs[i].addr + 1499 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
1561 choose_data_offset( 1500 choose_data_offset(
1562 r10_bio, rdev)); 1501 r10_bio, rdev));
1563 mbio->bi_bdev = rdev->bdev; 1502 mbio->bi_bdev = rdev->bdev;
@@ -1591,11 +1530,57 @@ retry_write:
1591 r10_bio->sectors = bio_sectors(bio) - sectors_handled; 1530 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1592 1531
1593 r10_bio->mddev = mddev; 1532 r10_bio->mddev = mddev;
1594 r10_bio->sector = bio->bi_sector + sectors_handled; 1533 r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1595 r10_bio->state = 0; 1534 r10_bio->state = 0;
1596 goto retry_write; 1535 goto retry_write;
1597 } 1536 }
1598 one_write_done(r10_bio); 1537 one_write_done(r10_bio);
1538}
1539
1540static void make_request(struct mddev *mddev, struct bio *bio)
1541{
1542 struct r10conf *conf = mddev->private;
1543 sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
1544 int chunk_sects = chunk_mask + 1;
1545
1546 struct bio *split;
1547
1548 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
1549 md_flush_request(mddev, bio);
1550 return;
1551 }
1552
1553 md_write_start(mddev, bio);
1554
1555 /*
1556 * Register the new request and wait if the reconstruction
1557 * thread has put up a bar for new requests.
1558 * Continue immediately if no resync is active currently.
1559 */
1560 wait_barrier(conf);
1561
1562 do {
1563
1564 /*
1565 * If this request crosses a chunk boundary, we need to split
1566 * it.
1567 */
1568 if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
1569 bio_sectors(bio) > chunk_sects
1570 && (conf->geo.near_copies < conf->geo.raid_disks
1571 || conf->prev.near_copies <
1572 conf->prev.raid_disks))) {
1573 split = bio_split(bio, chunk_sects -
1574 (bio->bi_iter.bi_sector &
1575 (chunk_sects - 1)),
1576 GFP_NOIO, fs_bio_set);
1577 bio_chain(split, bio);
1578 } else {
1579 split = bio;
1580 }
1581
1582 __make_request(mddev, split);
1583 } while (split != bio);
1599 1584
1600 /* In case raid10d snuck in to freeze_array */ 1585 /* In case raid10d snuck in to freeze_array */
1601 wake_up(&conf->wait_barrier); 1586 wake_up(&conf->wait_barrier);
@@ -2124,10 +2109,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2124 bio_reset(tbio); 2109 bio_reset(tbio);
2125 2110
2126 tbio->bi_vcnt = vcnt; 2111 tbio->bi_vcnt = vcnt;
2127 tbio->bi_size = r10_bio->sectors << 9; 2112 tbio->bi_iter.bi_size = r10_bio->sectors << 9;
2128 tbio->bi_rw = WRITE; 2113 tbio->bi_rw = WRITE;
2129 tbio->bi_private = r10_bio; 2114 tbio->bi_private = r10_bio;
2130 tbio->bi_sector = r10_bio->devs[i].addr; 2115 tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
2131 2116
2132 for (j=0; j < vcnt ; j++) { 2117 for (j=0; j < vcnt ; j++) {
2133 tbio->bi_io_vec[j].bv_offset = 0; 2118 tbio->bi_io_vec[j].bv_offset = 0;
@@ -2144,7 +2129,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2144 atomic_inc(&r10_bio->remaining); 2129 atomic_inc(&r10_bio->remaining);
2145 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio)); 2130 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
2146 2131
2147 tbio->bi_sector += conf->mirrors[d].rdev->data_offset; 2132 tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
2148 tbio->bi_bdev = conf->mirrors[d].rdev->bdev; 2133 tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
2149 generic_make_request(tbio); 2134 generic_make_request(tbio);
2150 } 2135 }
@@ -2614,8 +2599,8 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
2614 sectors = sect_to_write; 2599 sectors = sect_to_write;
2615 /* Write at 'sector' for 'sectors' */ 2600 /* Write at 'sector' for 'sectors' */
2616 wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); 2601 wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
2617 bio_trim(wbio, sector - bio->bi_sector, sectors); 2602 bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
2618 wbio->bi_sector = (r10_bio->devs[i].addr+ 2603 wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
2619 choose_data_offset(r10_bio, rdev) + 2604 choose_data_offset(r10_bio, rdev) +
2620 (sector - r10_bio->sector)); 2605 (sector - r10_bio->sector));
2621 wbio->bi_bdev = rdev->bdev; 2606 wbio->bi_bdev = rdev->bdev;
@@ -2687,10 +2672,10 @@ read_more:
2687 (unsigned long long)r10_bio->sector); 2672 (unsigned long long)r10_bio->sector);
2688 bio = bio_clone_mddev(r10_bio->master_bio, 2673 bio = bio_clone_mddev(r10_bio->master_bio,
2689 GFP_NOIO, mddev); 2674 GFP_NOIO, mddev);
2690 bio_trim(bio, r10_bio->sector - bio->bi_sector, max_sectors); 2675 bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors);
2691 r10_bio->devs[slot].bio = bio; 2676 r10_bio->devs[slot].bio = bio;
2692 r10_bio->devs[slot].rdev = rdev; 2677 r10_bio->devs[slot].rdev = rdev;
2693 bio->bi_sector = r10_bio->devs[slot].addr 2678 bio->bi_iter.bi_sector = r10_bio->devs[slot].addr
2694 + choose_data_offset(r10_bio, rdev); 2679 + choose_data_offset(r10_bio, rdev);
2695 bio->bi_bdev = rdev->bdev; 2680 bio->bi_bdev = rdev->bdev;
2696 bio->bi_rw = READ | do_sync; 2681 bio->bi_rw = READ | do_sync;
@@ -2701,7 +2686,7 @@ read_more:
2701 struct bio *mbio = r10_bio->master_bio; 2686 struct bio *mbio = r10_bio->master_bio;
2702 int sectors_handled = 2687 int sectors_handled =
2703 r10_bio->sector + max_sectors 2688 r10_bio->sector + max_sectors
2704 - mbio->bi_sector; 2689 - mbio->bi_iter.bi_sector;
2705 r10_bio->sectors = max_sectors; 2690 r10_bio->sectors = max_sectors;
2706 spin_lock_irq(&conf->device_lock); 2691 spin_lock_irq(&conf->device_lock);
2707 if (mbio->bi_phys_segments == 0) 2692 if (mbio->bi_phys_segments == 0)
@@ -2719,7 +2704,7 @@ read_more:
2719 set_bit(R10BIO_ReadError, 2704 set_bit(R10BIO_ReadError,
2720 &r10_bio->state); 2705 &r10_bio->state);
2721 r10_bio->mddev = mddev; 2706 r10_bio->mddev = mddev;
2722 r10_bio->sector = mbio->bi_sector 2707 r10_bio->sector = mbio->bi_iter.bi_sector
2723 + sectors_handled; 2708 + sectors_handled;
2724 2709
2725 goto read_more; 2710 goto read_more;
@@ -3157,7 +3142,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3157 bio->bi_end_io = end_sync_read; 3142 bio->bi_end_io = end_sync_read;
3158 bio->bi_rw = READ; 3143 bio->bi_rw = READ;
3159 from_addr = r10_bio->devs[j].addr; 3144 from_addr = r10_bio->devs[j].addr;
3160 bio->bi_sector = from_addr + rdev->data_offset; 3145 bio->bi_iter.bi_sector = from_addr +
3146 rdev->data_offset;
3161 bio->bi_bdev = rdev->bdev; 3147 bio->bi_bdev = rdev->bdev;
3162 atomic_inc(&rdev->nr_pending); 3148 atomic_inc(&rdev->nr_pending);
3163 /* and we write to 'i' (if not in_sync) */ 3149 /* and we write to 'i' (if not in_sync) */
@@ -3181,7 +3167,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3181 bio->bi_private = r10_bio; 3167 bio->bi_private = r10_bio;
3182 bio->bi_end_io = end_sync_write; 3168 bio->bi_end_io = end_sync_write;
3183 bio->bi_rw = WRITE; 3169 bio->bi_rw = WRITE;
3184 bio->bi_sector = to_addr 3170 bio->bi_iter.bi_sector = to_addr
3185 + rdev->data_offset; 3171 + rdev->data_offset;
3186 bio->bi_bdev = rdev->bdev; 3172 bio->bi_bdev = rdev->bdev;
3187 atomic_inc(&r10_bio->remaining); 3173 atomic_inc(&r10_bio->remaining);
@@ -3210,7 +3196,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3210 bio->bi_private = r10_bio; 3196 bio->bi_private = r10_bio;
3211 bio->bi_end_io = end_sync_write; 3197 bio->bi_end_io = end_sync_write;
3212 bio->bi_rw = WRITE; 3198 bio->bi_rw = WRITE;
3213 bio->bi_sector = to_addr + rdev->data_offset; 3199 bio->bi_iter.bi_sector = to_addr +
3200 rdev->data_offset;
3214 bio->bi_bdev = rdev->bdev; 3201 bio->bi_bdev = rdev->bdev;
3215 atomic_inc(&r10_bio->remaining); 3202 atomic_inc(&r10_bio->remaining);
3216 break; 3203 break;
@@ -3328,7 +3315,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3328 bio->bi_private = r10_bio; 3315 bio->bi_private = r10_bio;
3329 bio->bi_end_io = end_sync_read; 3316 bio->bi_end_io = end_sync_read;
3330 bio->bi_rw = READ; 3317 bio->bi_rw = READ;
3331 bio->bi_sector = sector + 3318 bio->bi_iter.bi_sector = sector +
3332 conf->mirrors[d].rdev->data_offset; 3319 conf->mirrors[d].rdev->data_offset;
3333 bio->bi_bdev = conf->mirrors[d].rdev->bdev; 3320 bio->bi_bdev = conf->mirrors[d].rdev->bdev;
3334 count++; 3321 count++;
@@ -3350,7 +3337,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3350 bio->bi_private = r10_bio; 3337 bio->bi_private = r10_bio;
3351 bio->bi_end_io = end_sync_write; 3338 bio->bi_end_io = end_sync_write;
3352 bio->bi_rw = WRITE; 3339 bio->bi_rw = WRITE;
3353 bio->bi_sector = sector + 3340 bio->bi_iter.bi_sector = sector +
3354 conf->mirrors[d].replacement->data_offset; 3341 conf->mirrors[d].replacement->data_offset;
3355 bio->bi_bdev = conf->mirrors[d].replacement->bdev; 3342 bio->bi_bdev = conf->mirrors[d].replacement->bdev;
3356 count++; 3343 count++;
@@ -3397,7 +3384,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3397 bio2 = bio2->bi_next) { 3384 bio2 = bio2->bi_next) {
3398 /* remove last page from this bio */ 3385 /* remove last page from this bio */
3399 bio2->bi_vcnt--; 3386 bio2->bi_vcnt--;
3400 bio2->bi_size -= len; 3387 bio2->bi_iter.bi_size -= len;
3401 bio2->bi_flags &= ~(1<< BIO_SEG_VALID); 3388 bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
3402 } 3389 }
3403 goto bio_full; 3390 goto bio_full;
@@ -4418,7 +4405,7 @@ read_more:
4418 read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev); 4405 read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);
4419 4406
4420 read_bio->bi_bdev = rdev->bdev; 4407 read_bio->bi_bdev = rdev->bdev;
4421 read_bio->bi_sector = (r10_bio->devs[r10_bio->read_slot].addr 4408 read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
4422 + rdev->data_offset); 4409 + rdev->data_offset);
4423 read_bio->bi_private = r10_bio; 4410 read_bio->bi_private = r10_bio;
4424 read_bio->bi_end_io = end_sync_read; 4411 read_bio->bi_end_io = end_sync_read;
@@ -4426,7 +4413,7 @@ read_more:
4426 read_bio->bi_flags &= ~(BIO_POOL_MASK - 1); 4413 read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
4427 read_bio->bi_flags |= 1 << BIO_UPTODATE; 4414 read_bio->bi_flags |= 1 << BIO_UPTODATE;
4428 read_bio->bi_vcnt = 0; 4415 read_bio->bi_vcnt = 0;
4429 read_bio->bi_size = 0; 4416 read_bio->bi_iter.bi_size = 0;
4430 r10_bio->master_bio = read_bio; 4417 r10_bio->master_bio = read_bio;
4431 r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; 4418 r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
4432 4419
@@ -4452,7 +4439,8 @@ read_more:
4452 4439
4453 bio_reset(b); 4440 bio_reset(b);
4454 b->bi_bdev = rdev2->bdev; 4441 b->bi_bdev = rdev2->bdev;
4455 b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; 4442 b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
4443 rdev2->new_data_offset;
4456 b->bi_private = r10_bio; 4444 b->bi_private = r10_bio;
4457 b->bi_end_io = end_reshape_write; 4445 b->bi_end_io = end_reshape_write;
4458 b->bi_rw = WRITE; 4446 b->bi_rw = WRITE;
@@ -4479,7 +4467,7 @@ read_more:
4479 bio2 = bio2->bi_next) { 4467 bio2 = bio2->bi_next) {
4480 /* Remove last page from this bio */ 4468 /* Remove last page from this bio */
4481 bio2->bi_vcnt--; 4469 bio2->bi_vcnt--;
4482 bio2->bi_size -= len; 4470 bio2->bi_iter.bi_size -= len;
4483 bio2->bi_flags &= ~(1<<BIO_SEG_VALID); 4471 bio2->bi_flags &= ~(1<<BIO_SEG_VALID);
4484 } 4472 }
4485 goto bio_full; 4473 goto bio_full;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 03f82ab87d9e..f1feadeb7bb2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -133,7 +133,7 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
133static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) 133static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
134{ 134{
135 int sectors = bio_sectors(bio); 135 int sectors = bio_sectors(bio);
136 if (bio->bi_sector + sectors < sector + STRIPE_SECTORS) 136 if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
137 return bio->bi_next; 137 return bio->bi_next;
138 else 138 else
139 return NULL; 139 return NULL;
@@ -225,7 +225,7 @@ static void return_io(struct bio *return_bi)
225 225
226 return_bi = bi->bi_next; 226 return_bi = bi->bi_next;
227 bi->bi_next = NULL; 227 bi->bi_next = NULL;
228 bi->bi_size = 0; 228 bi->bi_iter.bi_size = 0;
229 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), 229 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
230 bi, 0); 230 bi, 0);
231 bio_endio(bi, 0); 231 bio_endio(bi, 0);
@@ -852,10 +852,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
852 bi->bi_rw, i); 852 bi->bi_rw, i);
853 atomic_inc(&sh->count); 853 atomic_inc(&sh->count);
854 if (use_new_offset(conf, sh)) 854 if (use_new_offset(conf, sh))
855 bi->bi_sector = (sh->sector 855 bi->bi_iter.bi_sector = (sh->sector
856 + rdev->new_data_offset); 856 + rdev->new_data_offset);
857 else 857 else
858 bi->bi_sector = (sh->sector 858 bi->bi_iter.bi_sector = (sh->sector
859 + rdev->data_offset); 859 + rdev->data_offset);
860 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 860 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
861 bi->bi_rw |= REQ_NOMERGE; 861 bi->bi_rw |= REQ_NOMERGE;
@@ -863,7 +863,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
863 bi->bi_vcnt = 1; 863 bi->bi_vcnt = 1;
864 bi->bi_io_vec[0].bv_len = STRIPE_SIZE; 864 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
865 bi->bi_io_vec[0].bv_offset = 0; 865 bi->bi_io_vec[0].bv_offset = 0;
866 bi->bi_size = STRIPE_SIZE; 866 bi->bi_iter.bi_size = STRIPE_SIZE;
867 /* 867 /*
868 * If this is discard request, set bi_vcnt 0. We don't 868 * If this is discard request, set bi_vcnt 0. We don't
869 * want to confuse SCSI because SCSI will replace payload 869 * want to confuse SCSI because SCSI will replace payload
@@ -899,15 +899,15 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
899 rbi->bi_rw, i); 899 rbi->bi_rw, i);
900 atomic_inc(&sh->count); 900 atomic_inc(&sh->count);
901 if (use_new_offset(conf, sh)) 901 if (use_new_offset(conf, sh))
902 rbi->bi_sector = (sh->sector 902 rbi->bi_iter.bi_sector = (sh->sector
903 + rrdev->new_data_offset); 903 + rrdev->new_data_offset);
904 else 904 else
905 rbi->bi_sector = (sh->sector 905 rbi->bi_iter.bi_sector = (sh->sector
906 + rrdev->data_offset); 906 + rrdev->data_offset);
907 rbi->bi_vcnt = 1; 907 rbi->bi_vcnt = 1;
908 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; 908 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
909 rbi->bi_io_vec[0].bv_offset = 0; 909 rbi->bi_io_vec[0].bv_offset = 0;
910 rbi->bi_size = STRIPE_SIZE; 910 rbi->bi_iter.bi_size = STRIPE_SIZE;
911 /* 911 /*
912 * If this is discard request, set bi_vcnt 0. We don't 912 * If this is discard request, set bi_vcnt 0. We don't
913 * want to confuse SCSI because SCSI will replace payload 913 * want to confuse SCSI because SCSI will replace payload
@@ -935,24 +935,24 @@ static struct dma_async_tx_descriptor *
935async_copy_data(int frombio, struct bio *bio, struct page *page, 935async_copy_data(int frombio, struct bio *bio, struct page *page,
936 sector_t sector, struct dma_async_tx_descriptor *tx) 936 sector_t sector, struct dma_async_tx_descriptor *tx)
937{ 937{
938 struct bio_vec *bvl; 938 struct bio_vec bvl;
939 struct bvec_iter iter;
939 struct page *bio_page; 940 struct page *bio_page;
940 int i;
941 int page_offset; 941 int page_offset;
942 struct async_submit_ctl submit; 942 struct async_submit_ctl submit;
943 enum async_tx_flags flags = 0; 943 enum async_tx_flags flags = 0;
944 944
945 if (bio->bi_sector >= sector) 945 if (bio->bi_iter.bi_sector >= sector)
946 page_offset = (signed)(bio->bi_sector - sector) * 512; 946 page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512;
947 else 947 else
948 page_offset = (signed)(sector - bio->bi_sector) * -512; 948 page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512;
949 949
950 if (frombio) 950 if (frombio)
951 flags |= ASYNC_TX_FENCE; 951 flags |= ASYNC_TX_FENCE;
952 init_async_submit(&submit, flags, tx, NULL, NULL, NULL); 952 init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
953 953
954 bio_for_each_segment(bvl, bio, i) { 954 bio_for_each_segment(bvl, bio, iter) {
955 int len = bvl->bv_len; 955 int len = bvl.bv_len;
956 int clen; 956 int clen;
957 int b_offset = 0; 957 int b_offset = 0;
958 958
@@ -968,8 +968,8 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
968 clen = len; 968 clen = len;
969 969
970 if (clen > 0) { 970 if (clen > 0) {
971 b_offset += bvl->bv_offset; 971 b_offset += bvl.bv_offset;
972 bio_page = bvl->bv_page; 972 bio_page = bvl.bv_page;
973 if (frombio) 973 if (frombio)
974 tx = async_memcpy(page, bio_page, page_offset, 974 tx = async_memcpy(page, bio_page, page_offset,
975 b_offset, clen, &submit); 975 b_offset, clen, &submit);
@@ -1012,7 +1012,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
1012 BUG_ON(!dev->read); 1012 BUG_ON(!dev->read);
1013 rbi = dev->read; 1013 rbi = dev->read;
1014 dev->read = NULL; 1014 dev->read = NULL;
1015 while (rbi && rbi->bi_sector < 1015 while (rbi && rbi->bi_iter.bi_sector <
1016 dev->sector + STRIPE_SECTORS) { 1016 dev->sector + STRIPE_SECTORS) {
1017 rbi2 = r5_next_bio(rbi, dev->sector); 1017 rbi2 = r5_next_bio(rbi, dev->sector);
1018 if (!raid5_dec_bi_active_stripes(rbi)) { 1018 if (!raid5_dec_bi_active_stripes(rbi)) {
@@ -1048,7 +1048,7 @@ static void ops_run_biofill(struct stripe_head *sh)
1048 dev->read = rbi = dev->toread; 1048 dev->read = rbi = dev->toread;
1049 dev->toread = NULL; 1049 dev->toread = NULL;
1050 spin_unlock_irq(&sh->stripe_lock); 1050 spin_unlock_irq(&sh->stripe_lock);
1051 while (rbi && rbi->bi_sector < 1051 while (rbi && rbi->bi_iter.bi_sector <
1052 dev->sector + STRIPE_SECTORS) { 1052 dev->sector + STRIPE_SECTORS) {
1053 tx = async_copy_data(0, rbi, dev->page, 1053 tx = async_copy_data(0, rbi, dev->page,
1054 dev->sector, tx); 1054 dev->sector, tx);
@@ -1390,7 +1390,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1390 wbi = dev->written = chosen; 1390 wbi = dev->written = chosen;
1391 spin_unlock_irq(&sh->stripe_lock); 1391 spin_unlock_irq(&sh->stripe_lock);
1392 1392
1393 while (wbi && wbi->bi_sector < 1393 while (wbi && wbi->bi_iter.bi_sector <
1394 dev->sector + STRIPE_SECTORS) { 1394 dev->sector + STRIPE_SECTORS) {
1395 if (wbi->bi_rw & REQ_FUA) 1395 if (wbi->bi_rw & REQ_FUA)
1396 set_bit(R5_WantFUA, &dev->flags); 1396 set_bit(R5_WantFUA, &dev->flags);
@@ -2615,7 +2615,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2615 int firstwrite=0; 2615 int firstwrite=0;
2616 2616
2617 pr_debug("adding bi b#%llu to stripe s#%llu\n", 2617 pr_debug("adding bi b#%llu to stripe s#%llu\n",
2618 (unsigned long long)bi->bi_sector, 2618 (unsigned long long)bi->bi_iter.bi_sector,
2619 (unsigned long long)sh->sector); 2619 (unsigned long long)sh->sector);
2620 2620
2621 /* 2621 /*
@@ -2633,12 +2633,12 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2633 firstwrite = 1; 2633 firstwrite = 1;
2634 } else 2634 } else
2635 bip = &sh->dev[dd_idx].toread; 2635 bip = &sh->dev[dd_idx].toread;
2636 while (*bip && (*bip)->bi_sector < bi->bi_sector) { 2636 while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
2637 if (bio_end_sector(*bip) > bi->bi_sector) 2637 if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
2638 goto overlap; 2638 goto overlap;
2639 bip = & (*bip)->bi_next; 2639 bip = & (*bip)->bi_next;
2640 } 2640 }
2641 if (*bip && (*bip)->bi_sector < bio_end_sector(bi)) 2641 if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
2642 goto overlap; 2642 goto overlap;
2643 2643
2644 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); 2644 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
@@ -2652,7 +2652,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2652 sector_t sector = sh->dev[dd_idx].sector; 2652 sector_t sector = sh->dev[dd_idx].sector;
2653 for (bi=sh->dev[dd_idx].towrite; 2653 for (bi=sh->dev[dd_idx].towrite;
2654 sector < sh->dev[dd_idx].sector + STRIPE_SECTORS && 2654 sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
2655 bi && bi->bi_sector <= sector; 2655 bi && bi->bi_iter.bi_sector <= sector;
2656 bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) { 2656 bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
2657 if (bio_end_sector(bi) >= sector) 2657 if (bio_end_sector(bi) >= sector)
2658 sector = bio_end_sector(bi); 2658 sector = bio_end_sector(bi);
@@ -2662,7 +2662,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2662 } 2662 }
2663 2663
2664 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", 2664 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
2665 (unsigned long long)(*bip)->bi_sector, 2665 (unsigned long long)(*bip)->bi_iter.bi_sector,
2666 (unsigned long long)sh->sector, dd_idx); 2666 (unsigned long long)sh->sector, dd_idx);
2667 spin_unlock_irq(&sh->stripe_lock); 2667 spin_unlock_irq(&sh->stripe_lock);
2668 2668
@@ -2737,7 +2737,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2737 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 2737 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
2738 wake_up(&conf->wait_for_overlap); 2738 wake_up(&conf->wait_for_overlap);
2739 2739
2740 while (bi && bi->bi_sector < 2740 while (bi && bi->bi_iter.bi_sector <
2741 sh->dev[i].sector + STRIPE_SECTORS) { 2741 sh->dev[i].sector + STRIPE_SECTORS) {
2742 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 2742 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
2743 clear_bit(BIO_UPTODATE, &bi->bi_flags); 2743 clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2756,7 +2756,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2756 bi = sh->dev[i].written; 2756 bi = sh->dev[i].written;
2757 sh->dev[i].written = NULL; 2757 sh->dev[i].written = NULL;
2758 if (bi) bitmap_end = 1; 2758 if (bi) bitmap_end = 1;
2759 while (bi && bi->bi_sector < 2759 while (bi && bi->bi_iter.bi_sector <
2760 sh->dev[i].sector + STRIPE_SECTORS) { 2760 sh->dev[i].sector + STRIPE_SECTORS) {
2761 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 2761 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
2762 clear_bit(BIO_UPTODATE, &bi->bi_flags); 2762 clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2780,7 +2780,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2780 spin_unlock_irq(&sh->stripe_lock); 2780 spin_unlock_irq(&sh->stripe_lock);
2781 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 2781 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
2782 wake_up(&conf->wait_for_overlap); 2782 wake_up(&conf->wait_for_overlap);
2783 while (bi && bi->bi_sector < 2783 while (bi && bi->bi_iter.bi_sector <
2784 sh->dev[i].sector + STRIPE_SECTORS) { 2784 sh->dev[i].sector + STRIPE_SECTORS) {
2785 struct bio *nextbi = 2785 struct bio *nextbi =
2786 r5_next_bio(bi, sh->dev[i].sector); 2786 r5_next_bio(bi, sh->dev[i].sector);
@@ -3004,7 +3004,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
3004 clear_bit(R5_UPTODATE, &dev->flags); 3004 clear_bit(R5_UPTODATE, &dev->flags);
3005 wbi = dev->written; 3005 wbi = dev->written;
3006 dev->written = NULL; 3006 dev->written = NULL;
3007 while (wbi && wbi->bi_sector < 3007 while (wbi && wbi->bi_iter.bi_sector <
3008 dev->sector + STRIPE_SECTORS) { 3008 dev->sector + STRIPE_SECTORS) {
3009 wbi2 = r5_next_bio(wbi, dev->sector); 3009 wbi2 = r5_next_bio(wbi, dev->sector);
3010 if (!raid5_dec_bi_active_stripes(wbi)) { 3010 if (!raid5_dec_bi_active_stripes(wbi)) {
@@ -4096,7 +4096,7 @@ static int raid5_mergeable_bvec(struct request_queue *q,
4096 4096
4097static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) 4097static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
4098{ 4098{
4099 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 4099 sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
4100 unsigned int chunk_sectors = mddev->chunk_sectors; 4100 unsigned int chunk_sectors = mddev->chunk_sectors;
4101 unsigned int bio_sectors = bio_sectors(bio); 4101 unsigned int bio_sectors = bio_sectors(bio);
4102 4102
@@ -4233,9 +4233,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4233 /* 4233 /*
4234 * compute position 4234 * compute position
4235 */ 4235 */
4236 align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector, 4236 align_bi->bi_iter.bi_sector =
4237 0, 4237 raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector,
4238 &dd_idx, NULL); 4238 0, &dd_idx, NULL);
4239 4239
4240 end_sector = bio_end_sector(align_bi); 4240 end_sector = bio_end_sector(align_bi);
4241 rcu_read_lock(); 4241 rcu_read_lock();
@@ -4260,7 +4260,8 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4260 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); 4260 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
4261 4261
4262 if (!bio_fits_rdev(align_bi) || 4262 if (!bio_fits_rdev(align_bi) ||
4263 is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi), 4263 is_badblock(rdev, align_bi->bi_iter.bi_sector,
4264 bio_sectors(align_bi),
4264 &first_bad, &bad_sectors)) { 4265 &first_bad, &bad_sectors)) {
4265 /* too big in some way, or has a known bad block */ 4266 /* too big in some way, or has a known bad block */
4266 bio_put(align_bi); 4267 bio_put(align_bi);
@@ -4269,7 +4270,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4269 } 4270 }
4270 4271
4271 /* No reshape active, so we can trust rdev->data_offset */ 4272 /* No reshape active, so we can trust rdev->data_offset */
4272 align_bi->bi_sector += rdev->data_offset; 4273 align_bi->bi_iter.bi_sector += rdev->data_offset;
4273 4274
4274 spin_lock_irq(&conf->device_lock); 4275 spin_lock_irq(&conf->device_lock);
4275 wait_event_lock_irq(conf->wait_for_stripe, 4276 wait_event_lock_irq(conf->wait_for_stripe,
@@ -4281,7 +4282,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4281 if (mddev->gendisk) 4282 if (mddev->gendisk)
4282 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), 4283 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
4283 align_bi, disk_devt(mddev->gendisk), 4284 align_bi, disk_devt(mddev->gendisk),
4284 raid_bio->bi_sector); 4285 raid_bio->bi_iter.bi_sector);
4285 generic_make_request(align_bi); 4286 generic_make_request(align_bi);
4286 return 1; 4287 return 1;
4287 } else { 4288 } else {
@@ -4464,8 +4465,8 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
4464 /* Skip discard while reshape is happening */ 4465 /* Skip discard while reshape is happening */
4465 return; 4466 return;
4466 4467
4467 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 4468 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
4468 last_sector = bi->bi_sector + (bi->bi_size>>9); 4469 last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
4469 4470
4470 bi->bi_next = NULL; 4471 bi->bi_next = NULL;
4471 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4472 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
@@ -4569,7 +4570,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4569 return; 4570 return;
4570 } 4571 }
4571 4572
4572 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 4573 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
4573 last_sector = bio_end_sector(bi); 4574 last_sector = bio_end_sector(bi);
4574 bi->bi_next = NULL; 4575 bi->bi_next = NULL;
4575 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4576 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
@@ -5053,7 +5054,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
5053 int remaining; 5054 int remaining;
5054 int handled = 0; 5055 int handled = 0;
5055 5056
5056 logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 5057 logical_sector = raid_bio->bi_iter.bi_sector &
5058 ~((sector_t)STRIPE_SECTORS-1);
5057 sector = raid5_compute_sector(conf, logical_sector, 5059 sector = raid5_compute_sector(conf, logical_sector,
5058 0, &dd_idx, NULL); 5060 0, &dd_idx, NULL);
5059 last_sector = bio_end_sector(raid_bio); 5061 last_sector = bio_end_sector(raid_bio);
@@ -6101,6 +6103,7 @@ static int run(struct mddev *mddev)
6101 blk_queue_io_min(mddev->queue, chunk_size); 6103 blk_queue_io_min(mddev->queue, chunk_size);
6102 blk_queue_io_opt(mddev->queue, chunk_size * 6104 blk_queue_io_opt(mddev->queue, chunk_size *
6103 (conf->raid_disks - conf->max_degraded)); 6105 (conf->raid_disks - conf->max_degraded));
6106 mddev->queue->limits.raid_partial_stripes_expensive = 1;
6104 /* 6107 /*
6105 * We can only discard a whole stripe. It doesn't make sense to 6108 * We can only discard a whole stripe. It doesn't make sense to
6106 * discard data disk but write parity disk 6109 * discard data disk but write parity disk
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index dd239bdbfcb4..00d339c361fc 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -2235,10 +2235,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2235 } 2235 }
2236 2236
2237 /* do we need to support multiple segments? */ 2237 /* do we need to support multiple segments? */
2238 if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) { 2238 if (bio_multiple_segments(req->bio) ||
2239 printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n", 2239 bio_multiple_segments(rsp->bio)) {
2240 ioc->name, __func__, bio_segments(req->bio), blk_rq_bytes(req), 2240 printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u, rsp %u\n",
2241 bio_segments(rsp->bio), blk_rq_bytes(rsp)); 2241 ioc->name, __func__, blk_rq_bytes(req), blk_rq_bytes(rsp));
2242 return -EINVAL; 2242 return -EINVAL;
2243 } 2243 }
2244 2244
diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
index 8aa20df55e50..507d9a9ee69a 100644
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c
@@ -1764,7 +1764,7 @@ static struct usb_device_id ar5523_id_table[] = {
1764 AR5523_DEVICE_UG(0x07d1, 0x3a07), /* D-Link / WUA-2340 rev A1 */ 1764 AR5523_DEVICE_UG(0x07d1, 0x3a07), /* D-Link / WUA-2340 rev A1 */
1765 AR5523_DEVICE_UG(0x1690, 0x0712), /* Gigaset / AR5523 */ 1765 AR5523_DEVICE_UG(0x1690, 0x0712), /* Gigaset / AR5523 */
1766 AR5523_DEVICE_UG(0x1690, 0x0710), /* Gigaset / SMCWUSBTG */ 1766 AR5523_DEVICE_UG(0x1690, 0x0710), /* Gigaset / SMCWUSBTG */
1767 AR5523_DEVICE_UG(0x129b, 0x160c), /* Gigaset / USB stick 108 1767 AR5523_DEVICE_UG(0x129b, 0x160b), /* Gigaset / USB stick 108
1768 (CyberTAN Technology) */ 1768 (CyberTAN Technology) */
1769 AR5523_DEVICE_UG(0x16ab, 0x7801), /* Globalsun / AR5523_1 */ 1769 AR5523_DEVICE_UG(0x16ab, 0x7801), /* Globalsun / AR5523_1 */
1770 AR5523_DEVICE_UX(0x16ab, 0x7811), /* Globalsun / AR5523_2 */ 1770 AR5523_DEVICE_UX(0x16ab, 0x7811), /* Globalsun / AR5523_2 */
diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c
index d6bc7cb61bfb..1a2973b7acf2 100644
--- a/drivers/net/wireless/ath/ath5k/phy.c
+++ b/drivers/net/wireless/ath/ath5k/phy.c
@@ -110,7 +110,7 @@ ath5k_hw_radio_revision(struct ath5k_hw *ah, enum ieee80211_band band)
110 ath5k_hw_reg_write(ah, 0x00010000, AR5K_PHY(0x20)); 110 ath5k_hw_reg_write(ah, 0x00010000, AR5K_PHY(0x20));
111 111
112 if (ah->ah_version == AR5K_AR5210) { 112 if (ah->ah_version == AR5K_AR5210) {
113 srev = ath5k_hw_reg_read(ah, AR5K_PHY(256) >> 28) & 0xf; 113 srev = (ath5k_hw_reg_read(ah, AR5K_PHY(256)) >> 28) & 0xf;
114 ret = (u16)ath5k_hw_bitswap(srev, 4) + 1; 114 ret = (u16)ath5k_hw_bitswap(srev, 4) + 1;
115 } else { 115 } else {
116 srev = (ath5k_hw_reg_read(ah, AR5K_PHY(0x100)) >> 24) & 0xff; 116 srev = (ath5k_hw_reg_read(ah, AR5K_PHY(0x100)) >> 24) & 0xff;
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
index 25243cbc07f0..b8daff78b9d1 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
@@ -5065,6 +5065,10 @@ static u16 ar9003_hw_get_max_edge_power(struct ar9300_eeprom *eep,
5065 break; 5065 break;
5066 } 5066 }
5067 } 5067 }
5068
5069 if (is2GHz && !twiceMaxEdgePower)
5070 twiceMaxEdgePower = 60;
5071
5068 return twiceMaxEdgePower; 5072 return twiceMaxEdgePower;
5069} 5073}
5070 5074
diff --git a/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h b/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h
index 1cc13569b17b..1b6b4d0cfa97 100644
--- a/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h
+++ b/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h
@@ -57,7 +57,7 @@ static const u32 ar9462_2p0_baseband_postamble[][5] = {
57 {0x00009e14, 0x37b95d5e, 0x37b9605e, 0x3236605e, 0x32365a5e}, 57 {0x00009e14, 0x37b95d5e, 0x37b9605e, 0x3236605e, 0x32365a5e},
58 {0x00009e18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, 58 {0x00009e18, 0x00000000, 0x00000000, 0x00000000, 0x00000000},
59 {0x00009e1c, 0x0001cf9c, 0x0001cf9c, 0x00021f9c, 0x00021f9c}, 59 {0x00009e1c, 0x0001cf9c, 0x0001cf9c, 0x00021f9c, 0x00021f9c},
60 {0x00009e20, 0x000003b5, 0x000003b5, 0x000003ce, 0x000003ce}, 60 {0x00009e20, 0x000003a5, 0x000003a5, 0x000003a5, 0x000003a5},
61 {0x00009e2c, 0x0000001c, 0x0000001c, 0x00000021, 0x00000021}, 61 {0x00009e2c, 0x0000001c, 0x0000001c, 0x00000021, 0x00000021},
62 {0x00009e3c, 0xcf946220, 0xcf946220, 0xcfd5c782, 0xcfd5c282}, 62 {0x00009e3c, 0xcf946220, 0xcf946220, 0xcfd5c782, 0xcfd5c282},
63 {0x00009e44, 0x62321e27, 0x62321e27, 0xfe291e27, 0xfe291e27}, 63 {0x00009e44, 0x62321e27, 0x62321e27, 0xfe291e27, 0xfe291e27},
@@ -96,7 +96,7 @@ static const u32 ar9462_2p0_baseband_postamble[][5] = {
96 {0x0000ae04, 0x001c0000, 0x001c0000, 0x001c0000, 0x00100000}, 96 {0x0000ae04, 0x001c0000, 0x001c0000, 0x001c0000, 0x00100000},
97 {0x0000ae18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, 97 {0x0000ae18, 0x00000000, 0x00000000, 0x00000000, 0x00000000},
98 {0x0000ae1c, 0x0000019c, 0x0000019c, 0x0000019c, 0x0000019c}, 98 {0x0000ae1c, 0x0000019c, 0x0000019c, 0x0000019c, 0x0000019c},
99 {0x0000ae20, 0x000001b5, 0x000001b5, 0x000001ce, 0x000001ce}, 99 {0x0000ae20, 0x000001a6, 0x000001a6, 0x000001aa, 0x000001aa},
100 {0x0000b284, 0x00000000, 0x00000000, 0x00000550, 0x00000550}, 100 {0x0000b284, 0x00000000, 0x00000000, 0x00000550, 0x00000550},
101}; 101};
102 102
diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h
index 58da3468d1f0..99a203174f45 100644
--- a/drivers/net/wireless/ath/ath9k/htc.h
+++ b/drivers/net/wireless/ath/ath9k/htc.h
@@ -262,6 +262,8 @@ enum tid_aggr_state {
262struct ath9k_htc_sta { 262struct ath9k_htc_sta {
263 u8 index; 263 u8 index;
264 enum tid_aggr_state tid_state[ATH9K_HTC_MAX_TID]; 264 enum tid_aggr_state tid_state[ATH9K_HTC_MAX_TID];
265 struct work_struct rc_update_work;
266 struct ath9k_htc_priv *htc_priv;
265}; 267};
266 268
267#define ATH9K_HTC_RXBUF 256 269#define ATH9K_HTC_RXBUF 256
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index f4e1de20d99c..c57d6b859c04 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -34,6 +34,10 @@ static int ath9k_htc_btcoex_enable;
34module_param_named(btcoex_enable, ath9k_htc_btcoex_enable, int, 0444); 34module_param_named(btcoex_enable, ath9k_htc_btcoex_enable, int, 0444);
35MODULE_PARM_DESC(btcoex_enable, "Enable wifi-BT coexistence"); 35MODULE_PARM_DESC(btcoex_enable, "Enable wifi-BT coexistence");
36 36
37static int ath9k_ps_enable;
38module_param_named(ps_enable, ath9k_ps_enable, int, 0444);
39MODULE_PARM_DESC(ps_enable, "Enable WLAN PowerSave");
40
37#define CHAN2G(_freq, _idx) { \ 41#define CHAN2G(_freq, _idx) { \
38 .center_freq = (_freq), \ 42 .center_freq = (_freq), \
39 .hw_value = (_idx), \ 43 .hw_value = (_idx), \
@@ -725,12 +729,14 @@ static void ath9k_set_hw_capab(struct ath9k_htc_priv *priv,
725 IEEE80211_HW_SPECTRUM_MGMT | 729 IEEE80211_HW_SPECTRUM_MGMT |
726 IEEE80211_HW_HAS_RATE_CONTROL | 730 IEEE80211_HW_HAS_RATE_CONTROL |
727 IEEE80211_HW_RX_INCLUDES_FCS | 731 IEEE80211_HW_RX_INCLUDES_FCS |
728 IEEE80211_HW_SUPPORTS_PS |
729 IEEE80211_HW_PS_NULLFUNC_STACK | 732 IEEE80211_HW_PS_NULLFUNC_STACK |
730 IEEE80211_HW_REPORTS_TX_ACK_STATUS | 733 IEEE80211_HW_REPORTS_TX_ACK_STATUS |
731 IEEE80211_HW_MFP_CAPABLE | 734 IEEE80211_HW_MFP_CAPABLE |
732 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING; 735 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING;
733 736
737 if (ath9k_ps_enable)
738 hw->flags |= IEEE80211_HW_SUPPORTS_PS;
739
734 hw->wiphy->interface_modes = 740 hw->wiphy->interface_modes =
735 BIT(NL80211_IFTYPE_STATION) | 741 BIT(NL80211_IFTYPE_STATION) |
736 BIT(NL80211_IFTYPE_ADHOC) | 742 BIT(NL80211_IFTYPE_ADHOC) |
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index 608d739d1378..c9254a61ca52 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -1270,18 +1270,50 @@ static void ath9k_htc_configure_filter(struct ieee80211_hw *hw,
1270 mutex_unlock(&priv->mutex); 1270 mutex_unlock(&priv->mutex);
1271} 1271}
1272 1272
1273static void ath9k_htc_sta_rc_update_work(struct work_struct *work)
1274{
1275 struct ath9k_htc_sta *ista =
1276 container_of(work, struct ath9k_htc_sta, rc_update_work);
1277 struct ieee80211_sta *sta =
1278 container_of((void *)ista, struct ieee80211_sta, drv_priv);
1279 struct ath9k_htc_priv *priv = ista->htc_priv;
1280 struct ath_common *common = ath9k_hw_common(priv->ah);
1281 struct ath9k_htc_target_rate trate;
1282
1283 mutex_lock(&priv->mutex);
1284 ath9k_htc_ps_wakeup(priv);
1285
1286 memset(&trate, 0, sizeof(struct ath9k_htc_target_rate));
1287 ath9k_htc_setup_rate(priv, sta, &trate);
1288 if (!ath9k_htc_send_rate_cmd(priv, &trate))
1289 ath_dbg(common, CONFIG,
1290 "Supported rates for sta: %pM updated, rate caps: 0x%X\n",
1291 sta->addr, be32_to_cpu(trate.capflags));
1292 else
1293 ath_dbg(common, CONFIG,
1294 "Unable to update supported rates for sta: %pM\n",
1295 sta->addr);
1296
1297 ath9k_htc_ps_restore(priv);
1298 mutex_unlock(&priv->mutex);
1299}
1300
1273static int ath9k_htc_sta_add(struct ieee80211_hw *hw, 1301static int ath9k_htc_sta_add(struct ieee80211_hw *hw,
1274 struct ieee80211_vif *vif, 1302 struct ieee80211_vif *vif,
1275 struct ieee80211_sta *sta) 1303 struct ieee80211_sta *sta)
1276{ 1304{
1277 struct ath9k_htc_priv *priv = hw->priv; 1305 struct ath9k_htc_priv *priv = hw->priv;
1306 struct ath9k_htc_sta *ista = (struct ath9k_htc_sta *) sta->drv_priv;
1278 int ret; 1307 int ret;
1279 1308
1280 mutex_lock(&priv->mutex); 1309 mutex_lock(&priv->mutex);
1281 ath9k_htc_ps_wakeup(priv); 1310 ath9k_htc_ps_wakeup(priv);
1282 ret = ath9k_htc_add_station(priv, vif, sta); 1311 ret = ath9k_htc_add_station(priv, vif, sta);
1283 if (!ret) 1312 if (!ret) {
1313 INIT_WORK(&ista->rc_update_work, ath9k_htc_sta_rc_update_work);
1314 ista->htc_priv = priv;
1284 ath9k_htc_init_rate(priv, sta); 1315 ath9k_htc_init_rate(priv, sta);
1316 }
1285 ath9k_htc_ps_restore(priv); 1317 ath9k_htc_ps_restore(priv);
1286 mutex_unlock(&priv->mutex); 1318 mutex_unlock(&priv->mutex);
1287 1319
@@ -1293,12 +1325,13 @@ static int ath9k_htc_sta_remove(struct ieee80211_hw *hw,
1293 struct ieee80211_sta *sta) 1325 struct ieee80211_sta *sta)
1294{ 1326{
1295 struct ath9k_htc_priv *priv = hw->priv; 1327 struct ath9k_htc_priv *priv = hw->priv;
1296 struct ath9k_htc_sta *ista; 1328 struct ath9k_htc_sta *ista = (struct ath9k_htc_sta *) sta->drv_priv;
1297 int ret; 1329 int ret;
1298 1330
1331 cancel_work_sync(&ista->rc_update_work);
1332
1299 mutex_lock(&priv->mutex); 1333 mutex_lock(&priv->mutex);
1300 ath9k_htc_ps_wakeup(priv); 1334 ath9k_htc_ps_wakeup(priv);
1301 ista = (struct ath9k_htc_sta *) sta->drv_priv;
1302 htc_sta_drain(priv->htc, ista->index); 1335 htc_sta_drain(priv->htc, ista->index);
1303 ret = ath9k_htc_remove_station(priv, vif, sta); 1336 ret = ath9k_htc_remove_station(priv, vif, sta);
1304 ath9k_htc_ps_restore(priv); 1337 ath9k_htc_ps_restore(priv);
@@ -1311,28 +1344,12 @@ static void ath9k_htc_sta_rc_update(struct ieee80211_hw *hw,
1311 struct ieee80211_vif *vif, 1344 struct ieee80211_vif *vif,
1312 struct ieee80211_sta *sta, u32 changed) 1345 struct ieee80211_sta *sta, u32 changed)
1313{ 1346{
1314 struct ath9k_htc_priv *priv = hw->priv; 1347 struct ath9k_htc_sta *ista = (struct ath9k_htc_sta *) sta->drv_priv;
1315 struct ath_common *common = ath9k_hw_common(priv->ah);
1316 struct ath9k_htc_target_rate trate;
1317
1318 mutex_lock(&priv->mutex);
1319 ath9k_htc_ps_wakeup(priv);
1320 1348
1321 if (changed & IEEE80211_RC_SUPP_RATES_CHANGED) { 1349 if (!(changed & IEEE80211_RC_SUPP_RATES_CHANGED))
1322 memset(&trate, 0, sizeof(struct ath9k_htc_target_rate)); 1350 return;
1323 ath9k_htc_setup_rate(priv, sta, &trate);
1324 if (!ath9k_htc_send_rate_cmd(priv, &trate))
1325 ath_dbg(common, CONFIG,
1326 "Supported rates for sta: %pM updated, rate caps: 0x%X\n",
1327 sta->addr, be32_to_cpu(trate.capflags));
1328 else
1329 ath_dbg(common, CONFIG,
1330 "Unable to update supported rates for sta: %pM\n",
1331 sta->addr);
1332 }
1333 1351
1334 ath9k_htc_ps_restore(priv); 1352 schedule_work(&ista->rc_update_work);
1335 mutex_unlock(&priv->mutex);
1336} 1353}
1337 1354
1338static int ath9k_htc_conf_tx(struct ieee80211_hw *hw, 1355static int ath9k_htc_conf_tx(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index fbf43c05713f..303ce27964c1 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -1316,7 +1316,7 @@ static bool ath9k_hw_set_reset(struct ath_hw *ah, int type)
1316 if (AR_SREV_9300_20_OR_LATER(ah)) 1316 if (AR_SREV_9300_20_OR_LATER(ah))
1317 udelay(50); 1317 udelay(50);
1318 else if (AR_SREV_9100(ah)) 1318 else if (AR_SREV_9100(ah))
1319 udelay(10000); 1319 mdelay(10);
1320 else 1320 else
1321 udelay(100); 1321 udelay(100);
1322 1322
@@ -1534,7 +1534,7 @@ EXPORT_SYMBOL(ath9k_hw_check_nav);
1534bool ath9k_hw_check_alive(struct ath_hw *ah) 1534bool ath9k_hw_check_alive(struct ath_hw *ah)
1535{ 1535{
1536 int count = 50; 1536 int count = 50;
1537 u32 reg; 1537 u32 reg, last_val;
1538 1538
1539 if (AR_SREV_9300(ah)) 1539 if (AR_SREV_9300(ah))
1540 return !ath9k_hw_detect_mac_hang(ah); 1540 return !ath9k_hw_detect_mac_hang(ah);
@@ -1542,9 +1542,13 @@ bool ath9k_hw_check_alive(struct ath_hw *ah)
1542 if (AR_SREV_9285_12_OR_LATER(ah)) 1542 if (AR_SREV_9285_12_OR_LATER(ah))
1543 return true; 1543 return true;
1544 1544
1545 last_val = REG_READ(ah, AR_OBS_BUS_1);
1545 do { 1546 do {
1546 reg = REG_READ(ah, AR_OBS_BUS_1); 1547 reg = REG_READ(ah, AR_OBS_BUS_1);
1548 if (reg != last_val)
1549 return true;
1547 1550
1551 last_val = reg;
1548 if ((reg & 0x7E7FFFEF) == 0x00702400) 1552 if ((reg & 0x7E7FFFEF) == 0x00702400)
1549 continue; 1553 continue;
1550 1554
@@ -1556,6 +1560,8 @@ bool ath9k_hw_check_alive(struct ath_hw *ah)
1556 default: 1560 default:
1557 return true; 1561 return true;
1558 } 1562 }
1563
1564 udelay(1);
1559 } while (count-- > 0); 1565 } while (count-- > 0);
1560 1566
1561 return false; 1567 return false;
@@ -2051,9 +2057,8 @@ static bool ath9k_hw_set_power_awake(struct ath_hw *ah)
2051 2057
2052 REG_SET_BIT(ah, AR_RTC_FORCE_WAKE, 2058 REG_SET_BIT(ah, AR_RTC_FORCE_WAKE,
2053 AR_RTC_FORCE_WAKE_EN); 2059 AR_RTC_FORCE_WAKE_EN);
2054
2055 if (AR_SREV_9100(ah)) 2060 if (AR_SREV_9100(ah))
2056 udelay(10000); 2061 mdelay(10);
2057 else 2062 else
2058 udelay(50); 2063 udelay(50);
2059 2064
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index c36de303c8f3..1fc2e5a26b52 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -57,6 +57,10 @@ static int ath9k_bt_ant_diversity;
57module_param_named(bt_ant_diversity, ath9k_bt_ant_diversity, int, 0444); 57module_param_named(bt_ant_diversity, ath9k_bt_ant_diversity, int, 0444);
58MODULE_PARM_DESC(bt_ant_diversity, "Enable WLAN/BT RX antenna diversity"); 58MODULE_PARM_DESC(bt_ant_diversity, "Enable WLAN/BT RX antenna diversity");
59 59
60static int ath9k_ps_enable;
61module_param_named(ps_enable, ath9k_ps_enable, int, 0444);
62MODULE_PARM_DESC(ps_enable, "Enable WLAN PowerSave");
63
60bool is_ath9k_unloaded; 64bool is_ath9k_unloaded;
61/* We use the hw_value as an index into our private channel structure */ 65/* We use the hw_value as an index into our private channel structure */
62 66
@@ -903,13 +907,15 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw)
903 hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | 907 hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
904 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | 908 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
905 IEEE80211_HW_SIGNAL_DBM | 909 IEEE80211_HW_SIGNAL_DBM |
906 IEEE80211_HW_SUPPORTS_PS |
907 IEEE80211_HW_PS_NULLFUNC_STACK | 910 IEEE80211_HW_PS_NULLFUNC_STACK |
908 IEEE80211_HW_SPECTRUM_MGMT | 911 IEEE80211_HW_SPECTRUM_MGMT |
909 IEEE80211_HW_REPORTS_TX_ACK_STATUS | 912 IEEE80211_HW_REPORTS_TX_ACK_STATUS |
910 IEEE80211_HW_SUPPORTS_RC_TABLE | 913 IEEE80211_HW_SUPPORTS_RC_TABLE |
911 IEEE80211_HW_SUPPORTS_HT_CCK_RATES; 914 IEEE80211_HW_SUPPORTS_HT_CCK_RATES;
912 915
916 if (ath9k_ps_enable)
917 hw->flags |= IEEE80211_HW_SUPPORTS_PS;
918
913 if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_HT) { 919 if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_HT) {
914 hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION; 920 hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION;
915 921
diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index a0ebdd000fc2..82e340d3ec60 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -732,11 +732,18 @@ static struct ath_rxbuf *ath_get_next_rx_buf(struct ath_softc *sc,
732 return NULL; 732 return NULL;
733 733
734 /* 734 /*
735 * mark descriptor as zero-length and set the 'more' 735 * Re-check previous descriptor, in case it has been filled
736 * flag to ensure that both buffers get discarded 736 * in the mean time.
737 */ 737 */
738 rs->rs_datalen = 0; 738 ret = ath9k_hw_rxprocdesc(ah, ds, rs);
739 rs->rs_more = true; 739 if (ret == -EINPROGRESS) {
740 /*
741 * mark descriptor as zero-length and set the 'more'
742 * flag to ensure that both buffers get discarded
743 */
744 rs->rs_datalen = 0;
745 rs->rs_more = true;
746 }
740 } 747 }
741 748
742 list_del(&bf->list); 749 list_del(&bf->list);
@@ -985,32 +992,32 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc,
985 struct ath_common *common = ath9k_hw_common(ah); 992 struct ath_common *common = ath9k_hw_common(ah);
986 struct ieee80211_hdr *hdr; 993 struct ieee80211_hdr *hdr;
987 bool discard_current = sc->rx.discard_next; 994 bool discard_current = sc->rx.discard_next;
988 int ret = 0;
989 995
990 /* 996 /*
991 * Discard corrupt descriptors which are marked in 997 * Discard corrupt descriptors which are marked in
992 * ath_get_next_rx_buf(). 998 * ath_get_next_rx_buf().
993 */ 999 */
994 sc->rx.discard_next = rx_stats->rs_more;
995 if (discard_current) 1000 if (discard_current)
996 return -EINVAL; 1001 goto corrupt;
1002
1003 sc->rx.discard_next = false;
997 1004
998 /* 1005 /*
999 * Discard zero-length packets. 1006 * Discard zero-length packets.
1000 */ 1007 */
1001 if (!rx_stats->rs_datalen) { 1008 if (!rx_stats->rs_datalen) {
1002 RX_STAT_INC(rx_len_err); 1009 RX_STAT_INC(rx_len_err);
1003 return -EINVAL; 1010 goto corrupt;
1004 } 1011 }
1005 1012
1006 /* 1013 /*
1007 * rs_status follows rs_datalen so if rs_datalen is too large 1014 * rs_status follows rs_datalen so if rs_datalen is too large
1008 * we can take a hint that hardware corrupted it, so ignore 1015 * we can take a hint that hardware corrupted it, so ignore
1009 * those frames. 1016 * those frames.
1010 */ 1017 */
1011 if (rx_stats->rs_datalen > (common->rx_bufsize - ah->caps.rx_status_len)) { 1018 if (rx_stats->rs_datalen > (common->rx_bufsize - ah->caps.rx_status_len)) {
1012 RX_STAT_INC(rx_len_err); 1019 RX_STAT_INC(rx_len_err);
1013 return -EINVAL; 1020 goto corrupt;
1014 } 1021 }
1015 1022
1016 /* Only use status info from the last fragment */ 1023 /* Only use status info from the last fragment */
@@ -1024,10 +1031,8 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc,
1024 * This is different from the other corrupt descriptor 1031 * This is different from the other corrupt descriptor
1025 * condition handled above. 1032 * condition handled above.
1026 */ 1033 */
1027 if (rx_stats->rs_status & ATH9K_RXERR_CORRUPT_DESC) { 1034 if (rx_stats->rs_status & ATH9K_RXERR_CORRUPT_DESC)
1028 ret = -EINVAL; 1035 goto corrupt;
1029 goto exit;
1030 }
1031 1036
1032 hdr = (struct ieee80211_hdr *) (skb->data + ah->caps.rx_status_len); 1037 hdr = (struct ieee80211_hdr *) (skb->data + ah->caps.rx_status_len);
1033 1038
@@ -1043,18 +1048,15 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc,
1043 if (ath_process_fft(sc, hdr, rx_stats, rx_status->mactime)) 1048 if (ath_process_fft(sc, hdr, rx_stats, rx_status->mactime))
1044 RX_STAT_INC(rx_spectral); 1049 RX_STAT_INC(rx_spectral);
1045 1050
1046 ret = -EINVAL; 1051 return -EINVAL;
1047 goto exit;
1048 } 1052 }
1049 1053
1050 /* 1054 /*
1051 * everything but the rate is checked here, the rate check is done 1055 * everything but the rate is checked here, the rate check is done
1052 * separately to avoid doing two lookups for a rate for each frame. 1056 * separately to avoid doing two lookups for a rate for each frame.
1053 */ 1057 */
1054 if (!ath9k_rx_accept(common, hdr, rx_status, rx_stats, decrypt_error)) { 1058 if (!ath9k_rx_accept(common, hdr, rx_status, rx_stats, decrypt_error))
1055 ret = -EINVAL; 1059 return -EINVAL;
1056 goto exit;
1057 }
1058 1060
1059 if (ath_is_mybeacon(common, hdr)) { 1061 if (ath_is_mybeacon(common, hdr)) {
1060 RX_STAT_INC(rx_beacons); 1062 RX_STAT_INC(rx_beacons);
@@ -1064,15 +1066,11 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc,
1064 /* 1066 /*
1065 * This shouldn't happen, but have a safety check anyway. 1067 * This shouldn't happen, but have a safety check anyway.
1066 */ 1068 */
1067 if (WARN_ON(!ah->curchan)) { 1069 if (WARN_ON(!ah->curchan))
1068 ret = -EINVAL; 1070 return -EINVAL;
1069 goto exit;
1070 }
1071 1071
1072 if (ath9k_process_rate(common, hw, rx_stats, rx_status)) { 1072 if (ath9k_process_rate(common, hw, rx_stats, rx_status))
1073 ret =-EINVAL; 1073 return -EINVAL;
1074 goto exit;
1075 }
1076 1074
1077 ath9k_process_rssi(common, hw, rx_stats, rx_status); 1075 ath9k_process_rssi(common, hw, rx_stats, rx_status);
1078 1076
@@ -1087,9 +1085,11 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc,
1087 sc->rx.num_pkts++; 1085 sc->rx.num_pkts++;
1088#endif 1086#endif
1089 1087
1090exit: 1088 return 0;
1091 sc->rx.discard_next = false; 1089
1092 return ret; 1090corrupt:
1091 sc->rx.discard_next = rx_stats->rs_more;
1092 return -EINVAL;
1093} 1093}
1094 1094
1095static void ath9k_rx_skb_postprocess(struct ath_common *common, 1095static void ath9k_rx_skb_postprocess(struct ath_common *common,
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 0a75e2f68c9d..f042a18c8495 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -1444,14 +1444,16 @@ void ath_tx_aggr_sleep(struct ieee80211_sta *sta, struct ath_softc *sc,
1444 for (tidno = 0, tid = &an->tid[tidno]; 1444 for (tidno = 0, tid = &an->tid[tidno];
1445 tidno < IEEE80211_NUM_TIDS; tidno++, tid++) { 1445 tidno < IEEE80211_NUM_TIDS; tidno++, tid++) {
1446 1446
1447 if (!tid->sched)
1448 continue;
1449
1450 ac = tid->ac; 1447 ac = tid->ac;
1451 txq = ac->txq; 1448 txq = ac->txq;
1452 1449
1453 ath_txq_lock(sc, txq); 1450 ath_txq_lock(sc, txq);
1454 1451
1452 if (!tid->sched) {
1453 ath_txq_unlock(sc, txq);
1454 continue;
1455 }
1456
1455 buffered = ath_tid_has_buffered(tid); 1457 buffered = ath_tid_has_buffered(tid);
1456 1458
1457 tid->sched = false; 1459 tid->sched = false;
@@ -2184,14 +2186,15 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb,
2184 txq->stopped = true; 2186 txq->stopped = true;
2185 } 2187 }
2186 2188
2189 if (txctl->an)
2190 tid = ath_get_skb_tid(sc, txctl->an, skb);
2191
2187 if (info->flags & IEEE80211_TX_CTL_PS_RESPONSE) { 2192 if (info->flags & IEEE80211_TX_CTL_PS_RESPONSE) {
2188 ath_txq_unlock(sc, txq); 2193 ath_txq_unlock(sc, txq);
2189 txq = sc->tx.uapsdq; 2194 txq = sc->tx.uapsdq;
2190 ath_txq_lock(sc, txq); 2195 ath_txq_lock(sc, txq);
2191 } else if (txctl->an && 2196 } else if (txctl->an &&
2192 ieee80211_is_data_present(hdr->frame_control)) { 2197 ieee80211_is_data_present(hdr->frame_control)) {
2193 tid = ath_get_skb_tid(sc, txctl->an, skb);
2194
2195 WARN_ON(tid->ac->txq != txctl->txq); 2198 WARN_ON(tid->ac->txq != txctl->txq);
2196 2199
2197 if (info->flags & IEEE80211_TX_CTL_CLEAR_PS_FILT) 2200 if (info->flags & IEEE80211_TX_CTL_CLEAR_PS_FILT)
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
index 3e991897d7ca..119ee6eaf1c3 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
@@ -457,7 +457,6 @@ struct brcmf_sdio {
457 457
458 u8 tx_hdrlen; /* sdio bus header length for tx packet */ 458 u8 tx_hdrlen; /* sdio bus header length for tx packet */
459 bool txglom; /* host tx glomming enable flag */ 459 bool txglom; /* host tx glomming enable flag */
460 struct sk_buff *txglom_sgpad; /* scatter-gather padding buffer */
461 u16 head_align; /* buffer pointer alignment */ 460 u16 head_align; /* buffer pointer alignment */
462 u16 sgentry_align; /* scatter-gather buffer alignment */ 461 u16 sgentry_align; /* scatter-gather buffer alignment */
463}; 462};
@@ -1944,9 +1943,8 @@ static int brcmf_sdio_txpkt_prep_sg(struct brcmf_sdio *bus,
1944 if (lastfrm && chain_pad) 1943 if (lastfrm && chain_pad)
1945 tail_pad += blksize - chain_pad; 1944 tail_pad += blksize - chain_pad;
1946 if (skb_tailroom(pkt) < tail_pad && pkt->len > blksize) { 1945 if (skb_tailroom(pkt) < tail_pad && pkt->len > blksize) {
1947 pkt_pad = bus->txglom_sgpad; 1946 pkt_pad = brcmu_pkt_buf_get_skb(tail_pad + tail_chop +
1948 if (pkt_pad == NULL) 1947 bus->head_align);
1949 brcmu_pkt_buf_get_skb(tail_pad + tail_chop);
1950 if (pkt_pad == NULL) 1948 if (pkt_pad == NULL)
1951 return -ENOMEM; 1949 return -ENOMEM;
1952 ret = brcmf_sdio_txpkt_hdalign(bus, pkt_pad); 1950 ret = brcmf_sdio_txpkt_hdalign(bus, pkt_pad);
@@ -1957,6 +1955,7 @@ static int brcmf_sdio_txpkt_prep_sg(struct brcmf_sdio *bus,
1957 tail_chop); 1955 tail_chop);
1958 *(u32 *)(pkt_pad->cb) = ALIGN_SKB_FLAG + tail_chop; 1956 *(u32 *)(pkt_pad->cb) = ALIGN_SKB_FLAG + tail_chop;
1959 skb_trim(pkt, pkt->len - tail_chop); 1957 skb_trim(pkt, pkt->len - tail_chop);
1958 skb_trim(pkt_pad, tail_pad + tail_chop);
1960 __skb_queue_after(pktq, pkt, pkt_pad); 1959 __skb_queue_after(pktq, pkt, pkt_pad);
1961 } else { 1960 } else {
1962 ntail = pkt->data_len + tail_pad - 1961 ntail = pkt->data_len + tail_pad -
@@ -2011,7 +2010,7 @@ brcmf_sdio_txpkt_prep(struct brcmf_sdio *bus, struct sk_buff_head *pktq,
2011 return ret; 2010 return ret;
2012 head_pad = (u16)ret; 2011 head_pad = (u16)ret;
2013 if (head_pad) 2012 if (head_pad)
2014 memset(pkt_next->data, 0, head_pad + bus->tx_hdrlen); 2013 memset(pkt_next->data + bus->tx_hdrlen, 0, head_pad);
2015 2014
2016 total_len += pkt_next->len; 2015 total_len += pkt_next->len;
2017 2016
@@ -3486,10 +3485,6 @@ static int brcmf_sdio_bus_preinit(struct device *dev)
3486 bus->txglom = false; 3485 bus->txglom = false;
3487 value = 1; 3486 value = 1;
3488 pad_size = bus->sdiodev->func[2]->cur_blksize << 1; 3487 pad_size = bus->sdiodev->func[2]->cur_blksize << 1;
3489 bus->txglom_sgpad = brcmu_pkt_buf_get_skb(pad_size);
3490 if (!bus->txglom_sgpad)
3491 brcmf_err("allocating txglom padding skb failed, reduced performance\n");
3492
3493 err = brcmf_iovar_data_set(bus->sdiodev->dev, "bus:rxglom", 3488 err = brcmf_iovar_data_set(bus->sdiodev->dev, "bus:rxglom",
3494 &value, sizeof(u32)); 3489 &value, sizeof(u32));
3495 if (err < 0) { 3490 if (err < 0) {
@@ -4053,7 +4048,6 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus)
4053 brcmf_sdio_chip_detach(&bus->ci); 4048 brcmf_sdio_chip_detach(&bus->ci);
4054 } 4049 }
4055 4050
4056 brcmu_pkt_buf_free_skb(bus->txglom_sgpad);
4057 kfree(bus->rxbuf); 4051 kfree(bus->rxbuf);
4058 kfree(bus->hdrbuf); 4052 kfree(bus->hdrbuf);
4059 kfree(bus); 4053 kfree(bus);
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index d36e252d2ccb..596525528f50 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -147,7 +147,7 @@ static void ap_free_sta(struct ap_data *ap, struct sta_info *sta)
147 147
148 if (!sta->ap && sta->u.sta.challenge) 148 if (!sta->ap && sta->u.sta.challenge)
149 kfree(sta->u.sta.challenge); 149 kfree(sta->u.sta.challenge);
150 del_timer(&sta->timer); 150 del_timer_sync(&sta->timer);
151#endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */ 151#endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
152 152
153 kfree(sta); 153 kfree(sta);
diff --git a/drivers/net/wireless/hostap/hostap_proc.c b/drivers/net/wireless/hostap/hostap_proc.c
index aa7ad3a7a69b..4e5c0f8c9496 100644
--- a/drivers/net/wireless/hostap/hostap_proc.c
+++ b/drivers/net/wireless/hostap/hostap_proc.c
@@ -496,7 +496,7 @@ void hostap_init_proc(local_info_t *local)
496 496
497void hostap_remove_proc(local_info_t *local) 497void hostap_remove_proc(local_info_t *local)
498{ 498{
499 remove_proc_subtree(local->ddev->name, hostap_proc); 499 proc_remove(local->proc);
500} 500}
501 501
502 502
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index 32f75007a825..cb6d189bc3e6 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -621,7 +621,7 @@ static int lbs_ret_scan(struct lbs_private *priv, unsigned long dummy,
621 id = *pos++; 621 id = *pos++;
622 elen = *pos++; 622 elen = *pos++;
623 left -= 2; 623 left -= 2;
624 if (elen > left || elen == 0) { 624 if (elen > left) {
625 lbs_deb_scan("scan response: invalid IE fmt\n"); 625 lbs_deb_scan("scan response: invalid IE fmt\n");
626 goto done; 626 goto done;
627 } 627 }
diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c
index 03688aa14e8a..7fe7b53fb17a 100644
--- a/drivers/net/wireless/mwifiex/pcie.c
+++ b/drivers/net/wireless/mwifiex/pcie.c
@@ -1211,6 +1211,12 @@ static int mwifiex_pcie_process_recv_data(struct mwifiex_adapter *adapter)
1211 rd_index = card->rxbd_rdptr & reg->rx_mask; 1211 rd_index = card->rxbd_rdptr & reg->rx_mask;
1212 skb_data = card->rx_buf_list[rd_index]; 1212 skb_data = card->rx_buf_list[rd_index];
1213 1213
1214 /* If skb allocation was failed earlier for Rx packet,
1215 * rx_buf_list[rd_index] would have been left with a NULL.
1216 */
1217 if (!skb_data)
1218 return -ENOMEM;
1219
1214 MWIFIEX_SKB_PACB(skb_data, &buf_pa); 1220 MWIFIEX_SKB_PACB(skb_data, &buf_pa);
1215 pci_unmap_single(card->dev, buf_pa, MWIFIEX_RX_DATA_BUF_SIZE, 1221 pci_unmap_single(card->dev, buf_pa, MWIFIEX_RX_DATA_BUF_SIZE,
1216 PCI_DMA_FROMDEVICE); 1222 PCI_DMA_FROMDEVICE);
@@ -1525,6 +1531,14 @@ static int mwifiex_pcie_process_cmd_complete(struct mwifiex_adapter *adapter)
1525 if (adapter->ps_state == PS_STATE_SLEEP_CFM) { 1531 if (adapter->ps_state == PS_STATE_SLEEP_CFM) {
1526 mwifiex_process_sleep_confirm_resp(adapter, skb->data, 1532 mwifiex_process_sleep_confirm_resp(adapter, skb->data,
1527 skb->len); 1533 skb->len);
1534 mwifiex_pcie_enable_host_int(adapter);
1535 if (mwifiex_write_reg(adapter,
1536 PCIE_CPU_INT_EVENT,
1537 CPU_INTR_SLEEP_CFM_DONE)) {
1538 dev_warn(adapter->dev,
1539 "Write register failed\n");
1540 return -1;
1541 }
1528 while (reg->sleep_cookie && (count++ < 10) && 1542 while (reg->sleep_cookie && (count++ < 10) &&
1529 mwifiex_pcie_ok_to_access_hw(adapter)) 1543 mwifiex_pcie_ok_to_access_hw(adapter))
1530 usleep_range(50, 60); 1544 usleep_range(50, 60);
@@ -1993,23 +2007,9 @@ static void mwifiex_interrupt_status(struct mwifiex_adapter *adapter)
1993 adapter->int_status |= pcie_ireg; 2007 adapter->int_status |= pcie_ireg;
1994 spin_unlock_irqrestore(&adapter->int_lock, flags); 2008 spin_unlock_irqrestore(&adapter->int_lock, flags);
1995 2009
1996 if (pcie_ireg & HOST_INTR_CMD_DONE) { 2010 if (!adapter->pps_uapsd_mode &&
1997 if ((adapter->ps_state == PS_STATE_SLEEP_CFM) || 2011 adapter->ps_state == PS_STATE_SLEEP &&
1998 (adapter->ps_state == PS_STATE_SLEEP)) { 2012 mwifiex_pcie_ok_to_access_hw(adapter)) {
1999 mwifiex_pcie_enable_host_int(adapter);
2000 if (mwifiex_write_reg(adapter,
2001 PCIE_CPU_INT_EVENT,
2002 CPU_INTR_SLEEP_CFM_DONE)
2003 ) {
2004 dev_warn(adapter->dev,
2005 "Write register failed\n");
2006 return;
2007
2008 }
2009 }
2010 } else if (!adapter->pps_uapsd_mode &&
2011 adapter->ps_state == PS_STATE_SLEEP &&
2012 mwifiex_pcie_ok_to_access_hw(adapter)) {
2013 /* Potentially for PCIe we could get other 2013 /* Potentially for PCIe we could get other
2014 * interrupts like shared. Don't change power 2014 * interrupts like shared. Don't change power
2015 * state until cookie is set */ 2015 * state until cookie is set */
diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c
index e8ebbd4bc3cd..208748804a55 100644
--- a/drivers/net/wireless/mwifiex/usb.c
+++ b/drivers/net/wireless/mwifiex/usb.c
@@ -22,8 +22,6 @@
22 22
23#define USB_VERSION "1.0" 23#define USB_VERSION "1.0"
24 24
25static const char usbdriver_name[] = "usb8xxx";
26
27static struct mwifiex_if_ops usb_ops; 25static struct mwifiex_if_ops usb_ops;
28static struct semaphore add_remove_card_sem; 26static struct semaphore add_remove_card_sem;
29static struct usb_card_rec *usb_card; 27static struct usb_card_rec *usb_card;
@@ -527,13 +525,6 @@ static int mwifiex_usb_resume(struct usb_interface *intf)
527 MWIFIEX_BSS_ROLE_ANY), 525 MWIFIEX_BSS_ROLE_ANY),
528 MWIFIEX_ASYNC_CMD); 526 MWIFIEX_ASYNC_CMD);
529 527
530#ifdef CONFIG_PM
531 /* Resume handler may be called due to remote wakeup,
532 * force to exit suspend anyway
533 */
534 usb_disable_autosuspend(card->udev);
535#endif /* CONFIG_PM */
536
537 return 0; 528 return 0;
538} 529}
539 530
@@ -567,13 +558,12 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf)
567} 558}
568 559
569static struct usb_driver mwifiex_usb_driver = { 560static struct usb_driver mwifiex_usb_driver = {
570 .name = usbdriver_name, 561 .name = "mwifiex_usb",
571 .probe = mwifiex_usb_probe, 562 .probe = mwifiex_usb_probe,
572 .disconnect = mwifiex_usb_disconnect, 563 .disconnect = mwifiex_usb_disconnect,
573 .id_table = mwifiex_usb_table, 564 .id_table = mwifiex_usb_table,
574 .suspend = mwifiex_usb_suspend, 565 .suspend = mwifiex_usb_suspend,
575 .resume = mwifiex_usb_resume, 566 .resume = mwifiex_usb_resume,
576 .supports_autosuspend = 1,
577}; 567};
578 568
579static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter) 569static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter)
diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c
index 13eaeed03898..981cf6e7c73b 100644
--- a/drivers/net/wireless/mwifiex/wmm.c
+++ b/drivers/net/wireless/mwifiex/wmm.c
@@ -559,7 +559,8 @@ mwifiex_clean_txrx(struct mwifiex_private *priv)
559 mwifiex_wmm_delete_all_ralist(priv); 559 mwifiex_wmm_delete_all_ralist(priv);
560 memcpy(tos_to_tid, ac_to_tid, sizeof(tos_to_tid)); 560 memcpy(tos_to_tid, ac_to_tid, sizeof(tos_to_tid));
561 561
562 if (priv->adapter->if_ops.clean_pcie_ring) 562 if (priv->adapter->if_ops.clean_pcie_ring &&
563 !priv->adapter->surprise_removed)
563 priv->adapter->if_ops.clean_pcie_ring(priv->adapter); 564 priv->adapter->if_ops.clean_pcie_ring(priv->adapter);
564 spin_unlock_irqrestore(&priv->wmm.ra_list_spinlock, flags); 565 spin_unlock_irqrestore(&priv->wmm.ra_list_spinlock, flags);
565} 566}
diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c
index abc5f56f29fe..2f1cd929c6f6 100644
--- a/drivers/net/wireless/rt2x00/rt2500pci.c
+++ b/drivers/net/wireless/rt2x00/rt2500pci.c
@@ -1877,6 +1877,11 @@ static int rt2500pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
1877 EEPROM_MAC_ADDR_0)); 1877 EEPROM_MAC_ADDR_0));
1878 1878
1879 /* 1879 /*
1880 * Disable powersaving as default.
1881 */
1882 rt2x00dev->hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
1883
1884 /*
1880 * Initialize hw_mode information. 1885 * Initialize hw_mode information.
1881 */ 1886 */
1882 spec->supported_bands = SUPPORT_BAND_2GHZ; 1887 spec->supported_bands = SUPPORT_BAND_2GHZ;
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 9f16824cd1bc..d849d590de25 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -1706,6 +1706,11 @@ static int rt2500usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
1706 IEEE80211_HW_SUPPORTS_PS | 1706 IEEE80211_HW_SUPPORTS_PS |
1707 IEEE80211_HW_PS_NULLFUNC_STACK; 1707 IEEE80211_HW_PS_NULLFUNC_STACK;
1708 1708
1709 /*
1710 * Disable powersaving as default.
1711 */
1712 rt2x00dev->hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
1713
1709 SET_IEEE80211_DEV(rt2x00dev->hw, rt2x00dev->dev); 1714 SET_IEEE80211_DEV(rt2x00dev->hw, rt2x00dev->dev);
1710 SET_IEEE80211_PERM_ADDR(rt2x00dev->hw, 1715 SET_IEEE80211_PERM_ADDR(rt2x00dev->hw,
1711 rt2x00_eeprom_addr(rt2x00dev, 1716 rt2x00_eeprom_addr(rt2x00dev,
diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index b8f5b06006c4..7f8b5d156c8c 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -7458,10 +7458,9 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
7458 u32 reg; 7458 u32 reg;
7459 7459
7460 /* 7460 /*
7461 * Disable powersaving as default on PCI devices. 7461 * Disable powersaving as default.
7462 */ 7462 */
7463 if (rt2x00_is_pci(rt2x00dev) || rt2x00_is_soc(rt2x00dev)) 7463 rt2x00dev->hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
7464 rt2x00dev->hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
7465 7464
7466 /* 7465 /*
7467 * Initialize all hw fields. 7466 * Initialize all hw fields.
diff --git a/drivers/net/wireless/rtl818x/rtl8180/dev.c b/drivers/net/wireless/rtl818x/rtl8180/dev.c
index 8ec17aad0e52..3867d1470b36 100644
--- a/drivers/net/wireless/rtl818x/rtl8180/dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180/dev.c
@@ -107,6 +107,7 @@ static void rtl8180_handle_rx(struct ieee80211_hw *dev)
107 struct rtl8180_priv *priv = dev->priv; 107 struct rtl8180_priv *priv = dev->priv;
108 unsigned int count = 32; 108 unsigned int count = 32;
109 u8 signal, agc, sq; 109 u8 signal, agc, sq;
110 dma_addr_t mapping;
110 111
111 while (count--) { 112 while (count--) {
112 struct rtl8180_rx_desc *entry = &priv->rx_ring[priv->rx_idx]; 113 struct rtl8180_rx_desc *entry = &priv->rx_ring[priv->rx_idx];
@@ -128,6 +129,17 @@ static void rtl8180_handle_rx(struct ieee80211_hw *dev)
128 if (unlikely(!new_skb)) 129 if (unlikely(!new_skb))
129 goto done; 130 goto done;
130 131
132 mapping = pci_map_single(priv->pdev,
133 skb_tail_pointer(new_skb),
134 MAX_RX_SIZE, PCI_DMA_FROMDEVICE);
135
136 if (pci_dma_mapping_error(priv->pdev, mapping)) {
137 kfree_skb(new_skb);
138 dev_err(&priv->pdev->dev, "RX DMA map error\n");
139
140 goto done;
141 }
142
131 pci_unmap_single(priv->pdev, 143 pci_unmap_single(priv->pdev,
132 *((dma_addr_t *)skb->cb), 144 *((dma_addr_t *)skb->cb),
133 MAX_RX_SIZE, PCI_DMA_FROMDEVICE); 145 MAX_RX_SIZE, PCI_DMA_FROMDEVICE);
@@ -158,9 +170,7 @@ static void rtl8180_handle_rx(struct ieee80211_hw *dev)
158 170
159 skb = new_skb; 171 skb = new_skb;
160 priv->rx_buf[priv->rx_idx] = skb; 172 priv->rx_buf[priv->rx_idx] = skb;
161 *((dma_addr_t *) skb->cb) = 173 *((dma_addr_t *) skb->cb) = mapping;
162 pci_map_single(priv->pdev, skb_tail_pointer(skb),
163 MAX_RX_SIZE, PCI_DMA_FROMDEVICE);
164 } 174 }
165 175
166 done: 176 done:
@@ -266,6 +276,13 @@ static void rtl8180_tx(struct ieee80211_hw *dev,
266 mapping = pci_map_single(priv->pdev, skb->data, 276 mapping = pci_map_single(priv->pdev, skb->data,
267 skb->len, PCI_DMA_TODEVICE); 277 skb->len, PCI_DMA_TODEVICE);
268 278
279 if (pci_dma_mapping_error(priv->pdev, mapping)) {
280 kfree_skb(skb);
281 dev_err(&priv->pdev->dev, "TX DMA mapping error\n");
282 return;
283
284 }
285
269 tx_flags = RTL818X_TX_DESC_FLAG_OWN | RTL818X_TX_DESC_FLAG_FS | 286 tx_flags = RTL818X_TX_DESC_FLAG_OWN | RTL818X_TX_DESC_FLAG_FS |
270 RTL818X_TX_DESC_FLAG_LS | 287 RTL818X_TX_DESC_FLAG_LS |
271 (ieee80211_get_tx_rate(dev, info)->hw_value << 24) | 288 (ieee80211_get_tx_rate(dev, info)->hw_value << 24) |
diff --git a/drivers/net/wireless/rtl818x/rtl8187/rtl8187.h b/drivers/net/wireless/rtl818x/rtl8187/rtl8187.h
index 56aee067f324..a6ad79f61bf9 100644
--- a/drivers/net/wireless/rtl818x/rtl8187/rtl8187.h
+++ b/drivers/net/wireless/rtl818x/rtl8187/rtl8187.h
@@ -15,6 +15,8 @@
15#ifndef RTL8187_H 15#ifndef RTL8187_H
16#define RTL8187_H 16#define RTL8187_H
17 17
18#include <linux/cache.h>
19
18#include "rtl818x.h" 20#include "rtl818x.h"
19#include "leds.h" 21#include "leds.h"
20 22
@@ -139,7 +141,10 @@ struct rtl8187_priv {
139 u8 aifsn[4]; 141 u8 aifsn[4];
140 u8 rfkill_mask; 142 u8 rfkill_mask;
141 struct { 143 struct {
142 __le64 buf; 144 union {
145 __le64 buf;
146 u8 dummy1[L1_CACHE_BYTES];
147 } ____cacheline_aligned;
143 struct sk_buff_head queue; 148 struct sk_buff_head queue;
144 } b_tx_status; /* This queue is used by both -b and non-b devices */ 149 } b_tx_status; /* This queue is used by both -b and non-b devices */
145 struct mutex io_mutex; 150 struct mutex io_mutex;
@@ -147,7 +152,8 @@ struct rtl8187_priv {
147 u8 bits8; 152 u8 bits8;
148 __le16 bits16; 153 __le16 bits16;
149 __le32 bits32; 154 __le32 bits32;
150 } *io_dmabuf; 155 u8 dummy2[L1_CACHE_BYTES];
156 } *io_dmabuf ____cacheline_aligned;
151 bool rfkill_off; 157 bool rfkill_off;
152 u16 seqno; 158 u16 seqno;
153}; 159};
diff --git a/drivers/net/wireless/rtlwifi/ps.c b/drivers/net/wireless/rtlwifi/ps.c
index deedae3c5449..d1c0191a195b 100644
--- a/drivers/net/wireless/rtlwifi/ps.c
+++ b/drivers/net/wireless/rtlwifi/ps.c
@@ -48,7 +48,7 @@ bool rtl_ps_enable_nic(struct ieee80211_hw *hw)
48 48
49 /*<2> Enable Adapter */ 49 /*<2> Enable Adapter */
50 if (rtlpriv->cfg->ops->hw_init(hw)) 50 if (rtlpriv->cfg->ops->hw_init(hw))
51 return 1; 51 return false;
52 RT_CLEAR_PS_LEVEL(ppsc, RT_RF_OFF_LEVL_HALT_NIC); 52 RT_CLEAR_PS_LEVEL(ppsc, RT_RF_OFF_LEVL_HALT_NIC);
53 53
54 /*<3> Enable Interrupt */ 54 /*<3> Enable Interrupt */
diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
index a82b30a1996c..2eb0b38384dd 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
@@ -937,14 +937,26 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
937 bool is92c; 937 bool is92c;
938 int err; 938 int err;
939 u8 tmp_u1b; 939 u8 tmp_u1b;
940 unsigned long flags;
940 941
941 rtlpci->being_init_adapter = true; 942 rtlpci->being_init_adapter = true;
943
944 /* Since this function can take a very long time (up to 350 ms)
945 * and can be called with irqs disabled, reenable the irqs
946 * to let the other devices continue being serviced.
947 *
948 * It is safe doing so since our own interrupts will only be enabled
949 * in a subsequent step.
950 */
951 local_save_flags(flags);
952 local_irq_enable();
953
942 rtlpriv->intf_ops->disable_aspm(hw); 954 rtlpriv->intf_ops->disable_aspm(hw);
943 rtstatus = _rtl92ce_init_mac(hw); 955 rtstatus = _rtl92ce_init_mac(hw);
944 if (!rtstatus) { 956 if (!rtstatus) {
945 RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Init MAC failed\n"); 957 RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Init MAC failed\n");
946 err = 1; 958 err = 1;
947 return err; 959 goto exit;
948 } 960 }
949 961
950 err = rtl92c_download_fw(hw); 962 err = rtl92c_download_fw(hw);
@@ -952,7 +964,7 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
952 RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, 964 RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
953 "Failed to download FW. Init HW without FW now..\n"); 965 "Failed to download FW. Init HW without FW now..\n");
954 err = 1; 966 err = 1;
955 return err; 967 goto exit;
956 } 968 }
957 969
958 rtlhal->last_hmeboxnum = 0; 970 rtlhal->last_hmeboxnum = 0;
@@ -1032,6 +1044,8 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
1032 RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, "under 1.5V\n"); 1044 RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, "under 1.5V\n");
1033 } 1045 }
1034 rtl92c_dm_init(hw); 1046 rtl92c_dm_init(hw);
1047exit:
1048 local_irq_restore(flags);
1035 rtlpci->being_init_adapter = false; 1049 rtlpci->being_init_adapter = false;
1036 return err; 1050 return err;
1037} 1051}
diff --git a/drivers/net/wireless/ti/wl1251/rx.c b/drivers/net/wireless/ti/wl1251/rx.c
index 123c4bb50e0a..cde0eaf99714 100644
--- a/drivers/net/wireless/ti/wl1251/rx.c
+++ b/drivers/net/wireless/ti/wl1251/rx.c
@@ -180,7 +180,7 @@ static void wl1251_rx_body(struct wl1251 *wl,
180 wl1251_mem_read(wl, rx_packet_ring_addr, rx_buffer, length); 180 wl1251_mem_read(wl, rx_packet_ring_addr, rx_buffer, length);
181 181
182 /* The actual length doesn't include the target's alignment */ 182 /* The actual length doesn't include the target's alignment */
183 skb->len = desc->length - PLCP_HEADER_LENGTH; 183 skb_trim(skb, desc->length - PLCP_HEADER_LENGTH);
184 184
185 fc = (u16 *)skb->data; 185 fc = (u16 *)skb->data;
186 186
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 92bd22ce6760..9cbc567698ce 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -504,7 +504,7 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
504 struct dasd_diag_req *dreq; 504 struct dasd_diag_req *dreq;
505 struct dasd_diag_bio *dbio; 505 struct dasd_diag_bio *dbio;
506 struct req_iterator iter; 506 struct req_iterator iter;
507 struct bio_vec *bv; 507 struct bio_vec bv;
508 char *dst; 508 char *dst;
509 unsigned int count, datasize; 509 unsigned int count, datasize;
510 sector_t recid, first_rec, last_rec; 510 sector_t recid, first_rec, last_rec;
@@ -525,10 +525,10 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
525 /* Check struct bio and count the number of blocks for the request. */ 525 /* Check struct bio and count the number of blocks for the request. */
526 count = 0; 526 count = 0;
527 rq_for_each_segment(bv, req, iter) { 527 rq_for_each_segment(bv, req, iter) {
528 if (bv->bv_len & (blksize - 1)) 528 if (bv.bv_len & (blksize - 1))
529 /* Fba can only do full blocks. */ 529 /* Fba can only do full blocks. */
530 return ERR_PTR(-EINVAL); 530 return ERR_PTR(-EINVAL);
531 count += bv->bv_len >> (block->s2b_shift + 9); 531 count += bv.bv_len >> (block->s2b_shift + 9);
532 } 532 }
533 /* Paranoia. */ 533 /* Paranoia. */
534 if (count != last_rec - first_rec + 1) 534 if (count != last_rec - first_rec + 1)
@@ -545,8 +545,8 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
545 dbio = dreq->bio; 545 dbio = dreq->bio;
546 recid = first_rec; 546 recid = first_rec;
547 rq_for_each_segment(bv, req, iter) { 547 rq_for_each_segment(bv, req, iter) {
548 dst = page_address(bv->bv_page) + bv->bv_offset; 548 dst = page_address(bv.bv_page) + bv.bv_offset;
549 for (off = 0; off < bv->bv_len; off += blksize) { 549 for (off = 0; off < bv.bv_len; off += blksize) {
550 memset(dbio, 0, sizeof (struct dasd_diag_bio)); 550 memset(dbio, 0, sizeof (struct dasd_diag_bio));
551 dbio->type = rw_cmd; 551 dbio->type = rw_cmd;
552 dbio->block_number = recid + 1; 552 dbio->block_number = recid + 1;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 95e45782692f..2e8e0755070b 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2551,7 +2551,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single(
2551 struct dasd_ccw_req *cqr; 2551 struct dasd_ccw_req *cqr;
2552 struct ccw1 *ccw; 2552 struct ccw1 *ccw;
2553 struct req_iterator iter; 2553 struct req_iterator iter;
2554 struct bio_vec *bv; 2554 struct bio_vec bv;
2555 char *dst; 2555 char *dst;
2556 unsigned int off; 2556 unsigned int off;
2557 int count, cidaw, cplength, datasize; 2557 int count, cidaw, cplength, datasize;
@@ -2573,13 +2573,13 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single(
2573 count = 0; 2573 count = 0;
2574 cidaw = 0; 2574 cidaw = 0;
2575 rq_for_each_segment(bv, req, iter) { 2575 rq_for_each_segment(bv, req, iter) {
2576 if (bv->bv_len & (blksize - 1)) 2576 if (bv.bv_len & (blksize - 1))
2577 /* Eckd can only do full blocks. */ 2577 /* Eckd can only do full blocks. */
2578 return ERR_PTR(-EINVAL); 2578 return ERR_PTR(-EINVAL);
2579 count += bv->bv_len >> (block->s2b_shift + 9); 2579 count += bv.bv_len >> (block->s2b_shift + 9);
2580#if defined(CONFIG_64BIT) 2580#if defined(CONFIG_64BIT)
2581 if (idal_is_needed (page_address(bv->bv_page), bv->bv_len)) 2581 if (idal_is_needed (page_address(bv.bv_page), bv.bv_len))
2582 cidaw += bv->bv_len >> (block->s2b_shift + 9); 2582 cidaw += bv.bv_len >> (block->s2b_shift + 9);
2583#endif 2583#endif
2584 } 2584 }
2585 /* Paranoia. */ 2585 /* Paranoia. */
@@ -2650,16 +2650,16 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single(
2650 last_rec - recid + 1, cmd, basedev, blksize); 2650 last_rec - recid + 1, cmd, basedev, blksize);
2651 } 2651 }
2652 rq_for_each_segment(bv, req, iter) { 2652 rq_for_each_segment(bv, req, iter) {
2653 dst = page_address(bv->bv_page) + bv->bv_offset; 2653 dst = page_address(bv.bv_page) + bv.bv_offset;
2654 if (dasd_page_cache) { 2654 if (dasd_page_cache) {
2655 char *copy = kmem_cache_alloc(dasd_page_cache, 2655 char *copy = kmem_cache_alloc(dasd_page_cache,
2656 GFP_DMA | __GFP_NOWARN); 2656 GFP_DMA | __GFP_NOWARN);
2657 if (copy && rq_data_dir(req) == WRITE) 2657 if (copy && rq_data_dir(req) == WRITE)
2658 memcpy(copy + bv->bv_offset, dst, bv->bv_len); 2658 memcpy(copy + bv.bv_offset, dst, bv.bv_len);
2659 if (copy) 2659 if (copy)
2660 dst = copy + bv->bv_offset; 2660 dst = copy + bv.bv_offset;
2661 } 2661 }
2662 for (off = 0; off < bv->bv_len; off += blksize) { 2662 for (off = 0; off < bv.bv_len; off += blksize) {
2663 sector_t trkid = recid; 2663 sector_t trkid = recid;
2664 unsigned int recoffs = sector_div(trkid, blk_per_trk); 2664 unsigned int recoffs = sector_div(trkid, blk_per_trk);
2665 rcmd = cmd; 2665 rcmd = cmd;
@@ -2735,7 +2735,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
2735 struct dasd_ccw_req *cqr; 2735 struct dasd_ccw_req *cqr;
2736 struct ccw1 *ccw; 2736 struct ccw1 *ccw;
2737 struct req_iterator iter; 2737 struct req_iterator iter;
2738 struct bio_vec *bv; 2738 struct bio_vec bv;
2739 char *dst, *idaw_dst; 2739 char *dst, *idaw_dst;
2740 unsigned int cidaw, cplength, datasize; 2740 unsigned int cidaw, cplength, datasize;
2741 unsigned int tlf; 2741 unsigned int tlf;
@@ -2813,8 +2813,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
2813 idaw_dst = NULL; 2813 idaw_dst = NULL;
2814 idaw_len = 0; 2814 idaw_len = 0;
2815 rq_for_each_segment(bv, req, iter) { 2815 rq_for_each_segment(bv, req, iter) {
2816 dst = page_address(bv->bv_page) + bv->bv_offset; 2816 dst = page_address(bv.bv_page) + bv.bv_offset;
2817 seg_len = bv->bv_len; 2817 seg_len = bv.bv_len;
2818 while (seg_len) { 2818 while (seg_len) {
2819 if (new_track) { 2819 if (new_track) {
2820 trkid = recid; 2820 trkid = recid;
@@ -3039,7 +3039,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track(
3039{ 3039{
3040 struct dasd_ccw_req *cqr; 3040 struct dasd_ccw_req *cqr;
3041 struct req_iterator iter; 3041 struct req_iterator iter;
3042 struct bio_vec *bv; 3042 struct bio_vec bv;
3043 char *dst; 3043 char *dst;
3044 unsigned int trkcount, ctidaw; 3044 unsigned int trkcount, ctidaw;
3045 unsigned char cmd; 3045 unsigned char cmd;
@@ -3125,8 +3125,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track(
3125 new_track = 1; 3125 new_track = 1;
3126 recid = first_rec; 3126 recid = first_rec;
3127 rq_for_each_segment(bv, req, iter) { 3127 rq_for_each_segment(bv, req, iter) {
3128 dst = page_address(bv->bv_page) + bv->bv_offset; 3128 dst = page_address(bv.bv_page) + bv.bv_offset;
3129 seg_len = bv->bv_len; 3129 seg_len = bv.bv_len;
3130 while (seg_len) { 3130 while (seg_len) {
3131 if (new_track) { 3131 if (new_track) {
3132 trkid = recid; 3132 trkid = recid;
@@ -3158,9 +3158,9 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track(
3158 } 3158 }
3159 } else { 3159 } else {
3160 rq_for_each_segment(bv, req, iter) { 3160 rq_for_each_segment(bv, req, iter) {
3161 dst = page_address(bv->bv_page) + bv->bv_offset; 3161 dst = page_address(bv.bv_page) + bv.bv_offset;
3162 last_tidaw = itcw_add_tidaw(itcw, 0x00, 3162 last_tidaw = itcw_add_tidaw(itcw, 0x00,
3163 dst, bv->bv_len); 3163 dst, bv.bv_len);
3164 if (IS_ERR(last_tidaw)) { 3164 if (IS_ERR(last_tidaw)) {
3165 ret = -EINVAL; 3165 ret = -EINVAL;
3166 goto out_error; 3166 goto out_error;
@@ -3278,7 +3278,7 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev,
3278 struct dasd_ccw_req *cqr; 3278 struct dasd_ccw_req *cqr;
3279 struct ccw1 *ccw; 3279 struct ccw1 *ccw;
3280 struct req_iterator iter; 3280 struct req_iterator iter;
3281 struct bio_vec *bv; 3281 struct bio_vec bv;
3282 char *dst; 3282 char *dst;
3283 unsigned char cmd; 3283 unsigned char cmd;
3284 unsigned int trkcount; 3284 unsigned int trkcount;
@@ -3378,8 +3378,8 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev,
3378 idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE); 3378 idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE);
3379 } 3379 }
3380 rq_for_each_segment(bv, req, iter) { 3380 rq_for_each_segment(bv, req, iter) {
3381 dst = page_address(bv->bv_page) + bv->bv_offset; 3381 dst = page_address(bv.bv_page) + bv.bv_offset;
3382 seg_len = bv->bv_len; 3382 seg_len = bv.bv_len;
3383 if (cmd == DASD_ECKD_CCW_READ_TRACK) 3383 if (cmd == DASD_ECKD_CCW_READ_TRACK)
3384 memset(dst, 0, seg_len); 3384 memset(dst, 0, seg_len);
3385 if (!len_to_track_end) { 3385 if (!len_to_track_end) {
@@ -3424,7 +3424,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req)
3424 struct dasd_eckd_private *private; 3424 struct dasd_eckd_private *private;
3425 struct ccw1 *ccw; 3425 struct ccw1 *ccw;
3426 struct req_iterator iter; 3426 struct req_iterator iter;
3427 struct bio_vec *bv; 3427 struct bio_vec bv;
3428 char *dst, *cda; 3428 char *dst, *cda;
3429 unsigned int blksize, blk_per_trk, off; 3429 unsigned int blksize, blk_per_trk, off;
3430 sector_t recid; 3430 sector_t recid;
@@ -3442,8 +3442,8 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req)
3442 if (private->uses_cdl == 0 || recid > 2*blk_per_trk) 3442 if (private->uses_cdl == 0 || recid > 2*blk_per_trk)
3443 ccw++; 3443 ccw++;
3444 rq_for_each_segment(bv, req, iter) { 3444 rq_for_each_segment(bv, req, iter) {
3445 dst = page_address(bv->bv_page) + bv->bv_offset; 3445 dst = page_address(bv.bv_page) + bv.bv_offset;
3446 for (off = 0; off < bv->bv_len; off += blksize) { 3446 for (off = 0; off < bv.bv_len; off += blksize) {
3447 /* Skip locate record. */ 3447 /* Skip locate record. */
3448 if (private->uses_cdl && recid <= 2*blk_per_trk) 3448 if (private->uses_cdl && recid <= 2*blk_per_trk)
3449 ccw++; 3449 ccw++;
@@ -3454,7 +3454,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req)
3454 cda = (char *)((addr_t) ccw->cda); 3454 cda = (char *)((addr_t) ccw->cda);
3455 if (dst != cda) { 3455 if (dst != cda) {
3456 if (rq_data_dir(req) == READ) 3456 if (rq_data_dir(req) == READ)
3457 memcpy(dst, cda, bv->bv_len); 3457 memcpy(dst, cda, bv.bv_len);
3458 kmem_cache_free(dasd_page_cache, 3458 kmem_cache_free(dasd_page_cache,
3459 (void *)((addr_t)cda & PAGE_MASK)); 3459 (void *)((addr_t)cda & PAGE_MASK));
3460 } 3460 }
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 9cbc8c32ba59..2c8e68bf9a1c 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -260,7 +260,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
260 struct dasd_ccw_req *cqr; 260 struct dasd_ccw_req *cqr;
261 struct ccw1 *ccw; 261 struct ccw1 *ccw;
262 struct req_iterator iter; 262 struct req_iterator iter;
263 struct bio_vec *bv; 263 struct bio_vec bv;
264 char *dst; 264 char *dst;
265 int count, cidaw, cplength, datasize; 265 int count, cidaw, cplength, datasize;
266 sector_t recid, first_rec, last_rec; 266 sector_t recid, first_rec, last_rec;
@@ -283,13 +283,13 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
283 count = 0; 283 count = 0;
284 cidaw = 0; 284 cidaw = 0;
285 rq_for_each_segment(bv, req, iter) { 285 rq_for_each_segment(bv, req, iter) {
286 if (bv->bv_len & (blksize - 1)) 286 if (bv.bv_len & (blksize - 1))
287 /* Fba can only do full blocks. */ 287 /* Fba can only do full blocks. */
288 return ERR_PTR(-EINVAL); 288 return ERR_PTR(-EINVAL);
289 count += bv->bv_len >> (block->s2b_shift + 9); 289 count += bv.bv_len >> (block->s2b_shift + 9);
290#if defined(CONFIG_64BIT) 290#if defined(CONFIG_64BIT)
291 if (idal_is_needed (page_address(bv->bv_page), bv->bv_len)) 291 if (idal_is_needed (page_address(bv.bv_page), bv.bv_len))
292 cidaw += bv->bv_len / blksize; 292 cidaw += bv.bv_len / blksize;
293#endif 293#endif
294 } 294 }
295 /* Paranoia. */ 295 /* Paranoia. */
@@ -326,16 +326,16 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
326 } 326 }
327 recid = first_rec; 327 recid = first_rec;
328 rq_for_each_segment(bv, req, iter) { 328 rq_for_each_segment(bv, req, iter) {
329 dst = page_address(bv->bv_page) + bv->bv_offset; 329 dst = page_address(bv.bv_page) + bv.bv_offset;
330 if (dasd_page_cache) { 330 if (dasd_page_cache) {
331 char *copy = kmem_cache_alloc(dasd_page_cache, 331 char *copy = kmem_cache_alloc(dasd_page_cache,
332 GFP_DMA | __GFP_NOWARN); 332 GFP_DMA | __GFP_NOWARN);
333 if (copy && rq_data_dir(req) == WRITE) 333 if (copy && rq_data_dir(req) == WRITE)
334 memcpy(copy + bv->bv_offset, dst, bv->bv_len); 334 memcpy(copy + bv.bv_offset, dst, bv.bv_len);
335 if (copy) 335 if (copy)
336 dst = copy + bv->bv_offset; 336 dst = copy + bv.bv_offset;
337 } 337 }
338 for (off = 0; off < bv->bv_len; off += blksize) { 338 for (off = 0; off < bv.bv_len; off += blksize) {
339 /* Locate record for stupid devices. */ 339 /* Locate record for stupid devices. */
340 if (private->rdc_data.mode.bits.data_chain == 0) { 340 if (private->rdc_data.mode.bits.data_chain == 0) {
341 ccw[-1].flags |= CCW_FLAG_CC; 341 ccw[-1].flags |= CCW_FLAG_CC;
@@ -384,7 +384,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
384 struct dasd_fba_private *private; 384 struct dasd_fba_private *private;
385 struct ccw1 *ccw; 385 struct ccw1 *ccw;
386 struct req_iterator iter; 386 struct req_iterator iter;
387 struct bio_vec *bv; 387 struct bio_vec bv;
388 char *dst, *cda; 388 char *dst, *cda;
389 unsigned int blksize, off; 389 unsigned int blksize, off;
390 int status; 390 int status;
@@ -399,8 +399,8 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
399 if (private->rdc_data.mode.bits.data_chain != 0) 399 if (private->rdc_data.mode.bits.data_chain != 0)
400 ccw++; 400 ccw++;
401 rq_for_each_segment(bv, req, iter) { 401 rq_for_each_segment(bv, req, iter) {
402 dst = page_address(bv->bv_page) + bv->bv_offset; 402 dst = page_address(bv.bv_page) + bv.bv_offset;
403 for (off = 0; off < bv->bv_len; off += blksize) { 403 for (off = 0; off < bv.bv_len; off += blksize) {
404 /* Skip locate record. */ 404 /* Skip locate record. */
405 if (private->rdc_data.mode.bits.data_chain == 0) 405 if (private->rdc_data.mode.bits.data_chain == 0)
406 ccw++; 406 ccw++;
@@ -411,7 +411,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
411 cda = (char *)((addr_t) ccw->cda); 411 cda = (char *)((addr_t) ccw->cda);
412 if (dst != cda) { 412 if (dst != cda) {
413 if (rq_data_dir(req) == READ) 413 if (rq_data_dir(req) == READ)
414 memcpy(dst, cda, bv->bv_len); 414 memcpy(dst, cda, bv.bv_len);
415 kmem_cache_free(dasd_page_cache, 415 kmem_cache_free(dasd_page_cache,
416 (void *)((addr_t)cda & PAGE_MASK)); 416 (void *)((addr_t)cda & PAGE_MASK));
417 } 417 }
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 6eca019bcf30..ebf41e228e55 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -808,18 +808,19 @@ static void
808dcssblk_make_request(struct request_queue *q, struct bio *bio) 808dcssblk_make_request(struct request_queue *q, struct bio *bio)
809{ 809{
810 struct dcssblk_dev_info *dev_info; 810 struct dcssblk_dev_info *dev_info;
811 struct bio_vec *bvec; 811 struct bio_vec bvec;
812 struct bvec_iter iter;
812 unsigned long index; 813 unsigned long index;
813 unsigned long page_addr; 814 unsigned long page_addr;
814 unsigned long source_addr; 815 unsigned long source_addr;
815 unsigned long bytes_done; 816 unsigned long bytes_done;
816 int i;
817 817
818 bytes_done = 0; 818 bytes_done = 0;
819 dev_info = bio->bi_bdev->bd_disk->private_data; 819 dev_info = bio->bi_bdev->bd_disk->private_data;
820 if (dev_info == NULL) 820 if (dev_info == NULL)
821 goto fail; 821 goto fail;
822 if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0) 822 if ((bio->bi_iter.bi_sector & 7) != 0 ||
823 (bio->bi_iter.bi_size & 4095) != 0)
823 /* Request is not page-aligned. */ 824 /* Request is not page-aligned. */
824 goto fail; 825 goto fail;
825 if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) { 826 if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) {
@@ -842,22 +843,22 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
842 } 843 }
843 } 844 }
844 845
845 index = (bio->bi_sector >> 3); 846 index = (bio->bi_iter.bi_sector >> 3);
846 bio_for_each_segment(bvec, bio, i) { 847 bio_for_each_segment(bvec, bio, iter) {
847 page_addr = (unsigned long) 848 page_addr = (unsigned long)
848 page_address(bvec->bv_page) + bvec->bv_offset; 849 page_address(bvec.bv_page) + bvec.bv_offset;
849 source_addr = dev_info->start + (index<<12) + bytes_done; 850 source_addr = dev_info->start + (index<<12) + bytes_done;
850 if (unlikely((page_addr & 4095) != 0) || (bvec->bv_len & 4095) != 0) 851 if (unlikely((page_addr & 4095) != 0) || (bvec.bv_len & 4095) != 0)
851 // More paranoia. 852 // More paranoia.
852 goto fail; 853 goto fail;
853 if (bio_data_dir(bio) == READ) { 854 if (bio_data_dir(bio) == READ) {
854 memcpy((void*)page_addr, (void*)source_addr, 855 memcpy((void*)page_addr, (void*)source_addr,
855 bvec->bv_len); 856 bvec.bv_len);
856 } else { 857 } else {
857 memcpy((void*)source_addr, (void*)page_addr, 858 memcpy((void*)source_addr, (void*)page_addr,
858 bvec->bv_len); 859 bvec.bv_len);
859 } 860 }
860 bytes_done += bvec->bv_len; 861 bytes_done += bvec.bv_len;
861 } 862 }
862 bio_endio(bio, 0); 863 bio_endio(bio, 0);
863 return; 864 return;
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index d0ab5019d885..76bed1743db1 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -130,7 +130,7 @@ static void scm_request_prepare(struct scm_request *scmrq)
130 struct aidaw *aidaw = scmrq->aidaw; 130 struct aidaw *aidaw = scmrq->aidaw;
131 struct msb *msb = &scmrq->aob->msb[0]; 131 struct msb *msb = &scmrq->aob->msb[0];
132 struct req_iterator iter; 132 struct req_iterator iter;
133 struct bio_vec *bv; 133 struct bio_vec bv;
134 134
135 msb->bs = MSB_BS_4K; 135 msb->bs = MSB_BS_4K;
136 scmrq->aob->request.msb_count = 1; 136 scmrq->aob->request.msb_count = 1;
@@ -142,9 +142,9 @@ static void scm_request_prepare(struct scm_request *scmrq)
142 msb->data_addr = (u64) aidaw; 142 msb->data_addr = (u64) aidaw;
143 143
144 rq_for_each_segment(bv, scmrq->request, iter) { 144 rq_for_each_segment(bv, scmrq->request, iter) {
145 WARN_ON(bv->bv_offset); 145 WARN_ON(bv.bv_offset);
146 msb->blk_count += bv->bv_len >> 12; 146 msb->blk_count += bv.bv_len >> 12;
147 aidaw->data_addr = (u64) page_address(bv->bv_page); 147 aidaw->data_addr = (u64) page_address(bv.bv_page);
148 aidaw++; 148 aidaw++;
149 } 149 }
150} 150}
diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c
index 27f930cd657f..9aae909d47a5 100644
--- a/drivers/s390/block/scm_blk_cluster.c
+++ b/drivers/s390/block/scm_blk_cluster.c
@@ -122,7 +122,7 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq)
122 struct aidaw *aidaw = scmrq->aidaw; 122 struct aidaw *aidaw = scmrq->aidaw;
123 struct msb *msb = &scmrq->aob->msb[0]; 123 struct msb *msb = &scmrq->aob->msb[0];
124 struct req_iterator iter; 124 struct req_iterator iter;
125 struct bio_vec *bv; 125 struct bio_vec bv;
126 int i = 0; 126 int i = 0;
127 u64 addr; 127 u64 addr;
128 128
@@ -163,7 +163,7 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq)
163 i++; 163 i++;
164 } 164 }
165 rq_for_each_segment(bv, req, iter) { 165 rq_for_each_segment(bv, req, iter) {
166 aidaw->data_addr = (u64) page_address(bv->bv_page); 166 aidaw->data_addr = (u64) page_address(bv.bv_page);
167 aidaw++; 167 aidaw++;
168 i++; 168 i++;
169 } 169 }
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 58141f0651f2..6969d39f1e2e 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -184,25 +184,26 @@ static unsigned long xpram_highest_page_index(void)
184static void xpram_make_request(struct request_queue *q, struct bio *bio) 184static void xpram_make_request(struct request_queue *q, struct bio *bio)
185{ 185{
186 xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data; 186 xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
187 struct bio_vec *bvec; 187 struct bio_vec bvec;
188 struct bvec_iter iter;
188 unsigned int index; 189 unsigned int index;
189 unsigned long page_addr; 190 unsigned long page_addr;
190 unsigned long bytes; 191 unsigned long bytes;
191 int i;
192 192
193 if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0) 193 if ((bio->bi_iter.bi_sector & 7) != 0 ||
194 (bio->bi_iter.bi_size & 4095) != 0)
194 /* Request is not page-aligned. */ 195 /* Request is not page-aligned. */
195 goto fail; 196 goto fail;
196 if ((bio->bi_size >> 12) > xdev->size) 197 if ((bio->bi_iter.bi_size >> 12) > xdev->size)
197 /* Request size is no page-aligned. */ 198 /* Request size is no page-aligned. */
198 goto fail; 199 goto fail;
199 if ((bio->bi_sector >> 3) > 0xffffffffU - xdev->offset) 200 if ((bio->bi_iter.bi_sector >> 3) > 0xffffffffU - xdev->offset)
200 goto fail; 201 goto fail;
201 index = (bio->bi_sector >> 3) + xdev->offset; 202 index = (bio->bi_iter.bi_sector >> 3) + xdev->offset;
202 bio_for_each_segment(bvec, bio, i) { 203 bio_for_each_segment(bvec, bio, iter) {
203 page_addr = (unsigned long) 204 page_addr = (unsigned long)
204 kmap(bvec->bv_page) + bvec->bv_offset; 205 kmap(bvec.bv_page) + bvec.bv_offset;
205 bytes = bvec->bv_len; 206 bytes = bvec.bv_len;
206 if ((page_addr & 4095) != 0 || (bytes & 4095) != 0) 207 if ((page_addr & 4095) != 0 || (bytes & 4095) != 0)
207 /* More paranoia. */ 208 /* More paranoia. */
208 goto fail; 209 goto fail;
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 446b85110a1f..0cac7d8fd0f7 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2163,10 +2163,10 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2163 } 2163 }
2164 2164
2165 /* do we need to support multiple segments? */ 2165 /* do we need to support multiple segments? */
2166 if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) { 2166 if (bio_multiple_segments(req->bio) ||
2167 printk("%s: multiple segments req %u %u, rsp %u %u\n", 2167 bio_multiple_segments(rsp->bio)) {
2168 __func__, bio_segments(req->bio), blk_rq_bytes(req), 2168 printk("%s: multiple segments req %u, rsp %u\n",
2169 bio_segments(rsp->bio), blk_rq_bytes(rsp)); 2169 __func__, blk_rq_bytes(req), blk_rq_bytes(rsp));
2170 return -EINVAL; 2170 return -EINVAL;
2171 } 2171 }
2172 2172
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index 9d26637308be..410f4a3e8888 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1901,7 +1901,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1901 struct MPT2SAS_ADAPTER *ioc = shost_priv(shost); 1901 struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
1902 Mpi2SmpPassthroughRequest_t *mpi_request; 1902 Mpi2SmpPassthroughRequest_t *mpi_request;
1903 Mpi2SmpPassthroughReply_t *mpi_reply; 1903 Mpi2SmpPassthroughReply_t *mpi_reply;
1904 int rc, i; 1904 int rc;
1905 u16 smid; 1905 u16 smid;
1906 u32 ioc_state; 1906 u32 ioc_state;
1907 unsigned long timeleft; 1907 unsigned long timeleft;
@@ -1916,7 +1916,8 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1916 void *pci_addr_out = NULL; 1916 void *pci_addr_out = NULL;
1917 u16 wait_state_count; 1917 u16 wait_state_count;
1918 struct request *rsp = req->next_rq; 1918 struct request *rsp = req->next_rq;
1919 struct bio_vec *bvec = NULL; 1919 struct bio_vec bvec;
1920 struct bvec_iter iter;
1920 1921
1921 if (!rsp) { 1922 if (!rsp) {
1922 printk(MPT2SAS_ERR_FMT "%s: the smp response space is " 1923 printk(MPT2SAS_ERR_FMT "%s: the smp response space is "
@@ -1942,7 +1943,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1942 ioc->transport_cmds.status = MPT2_CMD_PENDING; 1943 ioc->transport_cmds.status = MPT2_CMD_PENDING;
1943 1944
1944 /* Check if the request is split across multiple segments */ 1945 /* Check if the request is split across multiple segments */
1945 if (bio_segments(req->bio) > 1) { 1946 if (bio_multiple_segments(req->bio)) {
1946 u32 offset = 0; 1947 u32 offset = 0;
1947 1948
1948 /* Allocate memory and copy the request */ 1949 /* Allocate memory and copy the request */
@@ -1955,11 +1956,11 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1955 goto out; 1956 goto out;
1956 } 1957 }
1957 1958
1958 bio_for_each_segment(bvec, req->bio, i) { 1959 bio_for_each_segment(bvec, req->bio, iter) {
1959 memcpy(pci_addr_out + offset, 1960 memcpy(pci_addr_out + offset,
1960 page_address(bvec->bv_page) + bvec->bv_offset, 1961 page_address(bvec.bv_page) + bvec.bv_offset,
1961 bvec->bv_len); 1962 bvec.bv_len);
1962 offset += bvec->bv_len; 1963 offset += bvec.bv_len;
1963 } 1964 }
1964 } else { 1965 } else {
1965 dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio), 1966 dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio),
@@ -1974,7 +1975,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1974 1975
1975 /* Check if the response needs to be populated across 1976 /* Check if the response needs to be populated across
1976 * multiple segments */ 1977 * multiple segments */
1977 if (bio_segments(rsp->bio) > 1) { 1978 if (bio_multiple_segments(rsp->bio)) {
1978 pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp), 1979 pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp),
1979 &pci_dma_in); 1980 &pci_dma_in);
1980 if (!pci_addr_in) { 1981 if (!pci_addr_in) {
@@ -2041,7 +2042,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2041 sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT | 2042 sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
2042 MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC); 2043 MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC);
2043 sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; 2044 sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
2044 if (bio_segments(req->bio) > 1) { 2045 if (bio_multiple_segments(req->bio)) {
2045 ioc->base_add_sg_single(psge, sgl_flags | 2046 ioc->base_add_sg_single(psge, sgl_flags |
2046 (blk_rq_bytes(req) - 4), pci_dma_out); 2047 (blk_rq_bytes(req) - 4), pci_dma_out);
2047 } else { 2048 } else {
@@ -2057,7 +2058,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2057 MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | 2058 MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
2058 MPI2_SGE_FLAGS_END_OF_LIST); 2059 MPI2_SGE_FLAGS_END_OF_LIST);
2059 sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; 2060 sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
2060 if (bio_segments(rsp->bio) > 1) { 2061 if (bio_multiple_segments(rsp->bio)) {
2061 ioc->base_add_sg_single(psge, sgl_flags | 2062 ioc->base_add_sg_single(psge, sgl_flags |
2062 (blk_rq_bytes(rsp) + 4), pci_dma_in); 2063 (blk_rq_bytes(rsp) + 4), pci_dma_in);
2063 } else { 2064 } else {
@@ -2102,23 +2103,23 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2102 le16_to_cpu(mpi_reply->ResponseDataLength); 2103 le16_to_cpu(mpi_reply->ResponseDataLength);
2103 /* check if the resp needs to be copied from the allocated 2104 /* check if the resp needs to be copied from the allocated
2104 * pci mem */ 2105 * pci mem */
2105 if (bio_segments(rsp->bio) > 1) { 2106 if (bio_multiple_segments(rsp->bio)) {
2106 u32 offset = 0; 2107 u32 offset = 0;
2107 u32 bytes_to_copy = 2108 u32 bytes_to_copy =
2108 le16_to_cpu(mpi_reply->ResponseDataLength); 2109 le16_to_cpu(mpi_reply->ResponseDataLength);
2109 bio_for_each_segment(bvec, rsp->bio, i) { 2110 bio_for_each_segment(bvec, rsp->bio, iter) {
2110 if (bytes_to_copy <= bvec->bv_len) { 2111 if (bytes_to_copy <= bvec.bv_len) {
2111 memcpy(page_address(bvec->bv_page) + 2112 memcpy(page_address(bvec.bv_page) +
2112 bvec->bv_offset, pci_addr_in + 2113 bvec.bv_offset, pci_addr_in +
2113 offset, bytes_to_copy); 2114 offset, bytes_to_copy);
2114 break; 2115 break;
2115 } else { 2116 } else {
2116 memcpy(page_address(bvec->bv_page) + 2117 memcpy(page_address(bvec.bv_page) +
2117 bvec->bv_offset, pci_addr_in + 2118 bvec.bv_offset, pci_addr_in +
2118 offset, bvec->bv_len); 2119 offset, bvec.bv_len);
2119 bytes_to_copy -= bvec->bv_len; 2120 bytes_to_copy -= bvec.bv_len;
2120 } 2121 }
2121 offset += bvec->bv_len; 2122 offset += bvec.bv_len;
2122 } 2123 }
2123 } 2124 }
2124 } else { 2125 } else {
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index e771a88c6a74..65170cb1a00f 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -1884,7 +1884,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1884 struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); 1884 struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
1885 Mpi2SmpPassthroughRequest_t *mpi_request; 1885 Mpi2SmpPassthroughRequest_t *mpi_request;
1886 Mpi2SmpPassthroughReply_t *mpi_reply; 1886 Mpi2SmpPassthroughReply_t *mpi_reply;
1887 int rc, i; 1887 int rc;
1888 u16 smid; 1888 u16 smid;
1889 u32 ioc_state; 1889 u32 ioc_state;
1890 unsigned long timeleft; 1890 unsigned long timeleft;
@@ -1898,7 +1898,8 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1898 void *pci_addr_out = NULL; 1898 void *pci_addr_out = NULL;
1899 u16 wait_state_count; 1899 u16 wait_state_count;
1900 struct request *rsp = req->next_rq; 1900 struct request *rsp = req->next_rq;
1901 struct bio_vec *bvec = NULL; 1901 struct bio_vec bvec;
1902 struct bvec_iter iter;
1902 1903
1903 if (!rsp) { 1904 if (!rsp) {
1904 pr_err(MPT3SAS_FMT "%s: the smp response space is missing\n", 1905 pr_err(MPT3SAS_FMT "%s: the smp response space is missing\n",
@@ -1925,7 +1926,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1925 ioc->transport_cmds.status = MPT3_CMD_PENDING; 1926 ioc->transport_cmds.status = MPT3_CMD_PENDING;
1926 1927
1927 /* Check if the request is split across multiple segments */ 1928 /* Check if the request is split across multiple segments */
1928 if (req->bio->bi_vcnt > 1) { 1929 if (bio_multiple_segments(req->bio)) {
1929 u32 offset = 0; 1930 u32 offset = 0;
1930 1931
1931 /* Allocate memory and copy the request */ 1932 /* Allocate memory and copy the request */
@@ -1938,11 +1939,11 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1938 goto out; 1939 goto out;
1939 } 1940 }
1940 1941
1941 bio_for_each_segment(bvec, req->bio, i) { 1942 bio_for_each_segment(bvec, req->bio, iter) {
1942 memcpy(pci_addr_out + offset, 1943 memcpy(pci_addr_out + offset,
1943 page_address(bvec->bv_page) + bvec->bv_offset, 1944 page_address(bvec.bv_page) + bvec.bv_offset,
1944 bvec->bv_len); 1945 bvec.bv_len);
1945 offset += bvec->bv_len; 1946 offset += bvec.bv_len;
1946 } 1947 }
1947 } else { 1948 } else {
1948 dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio), 1949 dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio),
@@ -1957,7 +1958,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1957 1958
1958 /* Check if the response needs to be populated across 1959 /* Check if the response needs to be populated across
1959 * multiple segments */ 1960 * multiple segments */
1960 if (rsp->bio->bi_vcnt > 1) { 1961 if (bio_multiple_segments(rsp->bio)) {
1961 pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp), 1962 pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp),
1962 &pci_dma_in); 1963 &pci_dma_in);
1963 if (!pci_addr_in) { 1964 if (!pci_addr_in) {
@@ -2018,7 +2019,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2018 mpi_request->RequestDataLength = cpu_to_le16(blk_rq_bytes(req) - 4); 2019 mpi_request->RequestDataLength = cpu_to_le16(blk_rq_bytes(req) - 4);
2019 psge = &mpi_request->SGL; 2020 psge = &mpi_request->SGL;
2020 2021
2021 if (req->bio->bi_vcnt > 1) 2022 if (bio_multiple_segments(req->bio))
2022 ioc->build_sg(ioc, psge, pci_dma_out, (blk_rq_bytes(req) - 4), 2023 ioc->build_sg(ioc, psge, pci_dma_out, (blk_rq_bytes(req) - 4),
2023 pci_dma_in, (blk_rq_bytes(rsp) + 4)); 2024 pci_dma_in, (blk_rq_bytes(rsp) + 4));
2024 else 2025 else
@@ -2063,23 +2064,23 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2063 2064
2064 /* check if the resp needs to be copied from the allocated 2065 /* check if the resp needs to be copied from the allocated
2065 * pci mem */ 2066 * pci mem */
2066 if (rsp->bio->bi_vcnt > 1) { 2067 if (bio_multiple_segments(rsp->bio)) {
2067 u32 offset = 0; 2068 u32 offset = 0;
2068 u32 bytes_to_copy = 2069 u32 bytes_to_copy =
2069 le16_to_cpu(mpi_reply->ResponseDataLength); 2070 le16_to_cpu(mpi_reply->ResponseDataLength);
2070 bio_for_each_segment(bvec, rsp->bio, i) { 2071 bio_for_each_segment(bvec, rsp->bio, iter) {
2071 if (bytes_to_copy <= bvec->bv_len) { 2072 if (bytes_to_copy <= bvec.bv_len) {
2072 memcpy(page_address(bvec->bv_page) + 2073 memcpy(page_address(bvec.bv_page) +
2073 bvec->bv_offset, pci_addr_in + 2074 bvec.bv_offset, pci_addr_in +
2074 offset, bytes_to_copy); 2075 offset, bytes_to_copy);
2075 break; 2076 break;
2076 } else { 2077 } else {
2077 memcpy(page_address(bvec->bv_page) + 2078 memcpy(page_address(bvec.bv_page) +
2078 bvec->bv_offset, pci_addr_in + 2079 bvec.bv_offset, pci_addr_in +
2079 offset, bvec->bv_len); 2080 offset, bvec.bv_len);
2080 bytes_to_copy -= bvec->bv_len; 2081 bytes_to_copy -= bvec.bv_len;
2081 } 2082 }
2082 offset += bvec->bv_len; 2083 offset += bvec.bv_len;
2083 } 2084 }
2084 } 2085 }
2085 } else { 2086 } else {
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index aa66361ed44b..bac04c2335aa 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -731,7 +731,7 @@ static int _osd_req_list_objects(struct osd_request *or,
731 731
732 bio->bi_rw &= ~REQ_WRITE; 732 bio->bi_rw &= ~REQ_WRITE;
733 or->in.bio = bio; 733 or->in.bio = bio;
734 or->in.total_bytes = bio->bi_size; 734 or->in.total_bytes = bio->bi_iter.bi_size;
735 return 0; 735 return 0;
736} 736}
737 737
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9846c6ab2aaa..470954aba728 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -801,7 +801,7 @@ static int sd_setup_write_same_cmnd(struct scsi_device *sdp, struct request *rq)
801 if (sdkp->device->no_write_same) 801 if (sdkp->device->no_write_same)
802 return BLKPREP_KILL; 802 return BLKPREP_KILL;
803 803
804 BUG_ON(bio_offset(bio) || bio_iovec(bio)->bv_len != sdp->sector_size); 804 BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size);
805 805
806 sector >>= ilog2(sdp->sector_size) - 9; 806 sector >>= ilog2(sdp->sector_size) - 9;
807 nr_sectors >>= ilog2(sdp->sector_size) - 9; 807 nr_sectors >>= ilog2(sdp->sector_size) - 9;
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 6174ca4ea275..a7a691d0af7d 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -365,7 +365,6 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
365 struct bio *bio; 365 struct bio *bio;
366 struct scsi_disk *sdkp; 366 struct scsi_disk *sdkp;
367 struct sd_dif_tuple *sdt; 367 struct sd_dif_tuple *sdt;
368 unsigned int i, j;
369 u32 phys, virt; 368 u32 phys, virt;
370 369
371 sdkp = rq->bio->bi_bdev->bd_disk->private_data; 370 sdkp = rq->bio->bi_bdev->bd_disk->private_data;
@@ -376,19 +375,21 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
376 phys = hw_sector & 0xffffffff; 375 phys = hw_sector & 0xffffffff;
377 376
378 __rq_for_each_bio(bio, rq) { 377 __rq_for_each_bio(bio, rq) {
379 struct bio_vec *iv; 378 struct bio_vec iv;
379 struct bvec_iter iter;
380 unsigned int j;
380 381
381 /* Already remapped? */ 382 /* Already remapped? */
382 if (bio_flagged(bio, BIO_MAPPED_INTEGRITY)) 383 if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
383 break; 384 break;
384 385
385 virt = bio->bi_integrity->bip_sector & 0xffffffff; 386 virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
386 387
387 bip_for_each_vec(iv, bio->bi_integrity, i) { 388 bip_for_each_vec(iv, bio->bi_integrity, iter) {
388 sdt = kmap_atomic(iv->bv_page) 389 sdt = kmap_atomic(iv.bv_page)
389 + iv->bv_offset; 390 + iv.bv_offset;
390 391
391 for (j = 0 ; j < iv->bv_len ; j += tuple_sz, sdt++) { 392 for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) {
392 393
393 if (be32_to_cpu(sdt->ref_tag) == virt) 394 if (be32_to_cpu(sdt->ref_tag) == virt)
394 sdt->ref_tag = cpu_to_be32(phys); 395 sdt->ref_tag = cpu_to_be32(phys);
@@ -414,7 +415,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
414 struct scsi_disk *sdkp; 415 struct scsi_disk *sdkp;
415 struct bio *bio; 416 struct bio *bio;
416 struct sd_dif_tuple *sdt; 417 struct sd_dif_tuple *sdt;
417 unsigned int i, j, sectors, sector_sz; 418 unsigned int j, sectors, sector_sz;
418 u32 phys, virt; 419 u32 phys, virt;
419 420
420 sdkp = scsi_disk(scmd->request->rq_disk); 421 sdkp = scsi_disk(scmd->request->rq_disk);
@@ -430,15 +431,16 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
430 phys >>= 3; 431 phys >>= 3;
431 432
432 __rq_for_each_bio(bio, scmd->request) { 433 __rq_for_each_bio(bio, scmd->request) {
433 struct bio_vec *iv; 434 struct bio_vec iv;
435 struct bvec_iter iter;
434 436
435 virt = bio->bi_integrity->bip_sector & 0xffffffff; 437 virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
436 438
437 bip_for_each_vec(iv, bio->bi_integrity, i) { 439 bip_for_each_vec(iv, bio->bi_integrity, iter) {
438 sdt = kmap_atomic(iv->bv_page) 440 sdt = kmap_atomic(iv.bv_page)
439 + iv->bv_offset; 441 + iv.bv_offset;
440 442
441 for (j = 0 ; j < iv->bv_len ; j += tuple_sz, sdt++) { 443 for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) {
442 444
443 if (sectors == 0) { 445 if (sectors == 0) {
444 kunmap_atomic(sdt); 446 kunmap_atomic(sdt);
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index 5338e8d4c50f..0718905adeb2 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -194,10 +194,10 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
194 struct cl_object *obj = ll_i2info(inode)->lli_clob; 194 struct cl_object *obj = ll_i2info(inode)->lli_clob;
195 pgoff_t offset; 195 pgoff_t offset;
196 int ret; 196 int ret;
197 int i;
198 int rw; 197 int rw;
199 obd_count page_count = 0; 198 obd_count page_count = 0;
200 struct bio_vec *bvec; 199 struct bio_vec bvec;
200 struct bvec_iter iter;
201 struct bio *bio; 201 struct bio *bio;
202 ssize_t bytes; 202 ssize_t bytes;
203 203
@@ -220,15 +220,15 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
220 for (bio = head; bio != NULL; bio = bio->bi_next) { 220 for (bio = head; bio != NULL; bio = bio->bi_next) {
221 LASSERT(rw == bio->bi_rw); 221 LASSERT(rw == bio->bi_rw);
222 222
223 offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset; 223 offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset;
224 bio_for_each_segment(bvec, bio, i) { 224 bio_for_each_segment(bvec, bio, iter) {
225 BUG_ON(bvec->bv_offset != 0); 225 BUG_ON(bvec.bv_offset != 0);
226 BUG_ON(bvec->bv_len != PAGE_CACHE_SIZE); 226 BUG_ON(bvec.bv_len != PAGE_CACHE_SIZE);
227 227
228 pages[page_count] = bvec->bv_page; 228 pages[page_count] = bvec.bv_page;
229 offsets[page_count] = offset; 229 offsets[page_count] = offset;
230 page_count++; 230 page_count++;
231 offset += bvec->bv_len; 231 offset += bvec.bv_len;
232 } 232 }
233 LASSERT(page_count <= LLOOP_MAX_SEGMENTS); 233 LASSERT(page_count <= LLOOP_MAX_SEGMENTS);
234 } 234 }
@@ -313,7 +313,8 @@ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
313 bio = &lo->lo_bio; 313 bio = &lo->lo_bio;
314 while (*bio && (*bio)->bi_rw == rw) { 314 while (*bio && (*bio)->bi_rw == rw) {
315 CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u \n", 315 CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u \n",
316 (unsigned long long)(*bio)->bi_sector, (*bio)->bi_size, 316 (unsigned long long)(*bio)->bi_iter.bi_sector,
317 (*bio)->bi_iter.bi_size,
317 page_count, (*bio)->bi_vcnt); 318 page_count, (*bio)->bi_vcnt);
318 if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS) 319 if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS)
319 break; 320 break;
@@ -347,7 +348,8 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
347 goto err; 348 goto err;
348 349
349 CDEBUG(D_INFO, "submit bio sector %llu size %u\n", 350 CDEBUG(D_INFO, "submit bio sector %llu size %u\n",
350 (unsigned long long)old_bio->bi_sector, old_bio->bi_size); 351 (unsigned long long)old_bio->bi_iter.bi_sector,
352 old_bio->bi_iter.bi_size);
351 353
352 spin_lock_irq(&lo->lo_lock); 354 spin_lock_irq(&lo->lo_lock);
353 inactive = (lo->lo_state != LLOOP_BOUND); 355 inactive = (lo->lo_state != LLOOP_BOUND);
@@ -367,7 +369,7 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
367 loop_add_bio(lo, old_bio); 369 loop_add_bio(lo, old_bio);
368 return; 370 return;
369err: 371err:
370 cfs_bio_io_error(old_bio, old_bio->bi_size); 372 cfs_bio_io_error(old_bio, old_bio->bi_iter.bi_size);
371} 373}
372 374
373 375
@@ -378,7 +380,7 @@ static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
378 while (bio) { 380 while (bio) {
379 struct bio *tmp = bio->bi_next; 381 struct bio *tmp = bio->bi_next;
380 bio->bi_next = NULL; 382 bio->bi_next = NULL;
381 cfs_bio_endio(bio, bio->bi_size, ret); 383 cfs_bio_endio(bio, bio->bi_iter.bi_size, ret);
382 bio = tmp; 384 bio = tmp;
383 } 385 }
384} 386}
diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 3277d9838f4e..108f2733106d 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -171,13 +171,14 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio)
171 u64 start, end, bound; 171 u64 start, end, bound;
172 172
173 /* unaligned request */ 173 /* unaligned request */
174 if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 174 if (unlikely(bio->bi_iter.bi_sector &
175 (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
175 return 0; 176 return 0;
176 if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 177 if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
177 return 0; 178 return 0;
178 179
179 start = bio->bi_sector; 180 start = bio->bi_iter.bi_sector;
180 end = start + (bio->bi_size >> SECTOR_SHIFT); 181 end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
181 bound = zram->disksize >> SECTOR_SHIFT; 182 bound = zram->disksize >> SECTOR_SHIFT;
182 /* out of range range */ 183 /* out of range range */
183 if (unlikely(start >= bound || end > bound || start > end)) 184 if (unlikely(start >= bound || end > bound || start > end))
@@ -680,9 +681,10 @@ out:
680 681
681static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) 682static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
682{ 683{
683 int i, offset; 684 int offset;
684 u32 index; 685 u32 index;
685 struct bio_vec *bvec; 686 struct bio_vec bvec;
687 struct bvec_iter iter;
686 688
687 switch (rw) { 689 switch (rw) {
688 case READ: 690 case READ:
@@ -693,36 +695,37 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
693 break; 695 break;
694 } 696 }
695 697
696 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; 698 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
697 offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 699 offset = (bio->bi_iter.bi_sector &
700 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
698 701
699 bio_for_each_segment(bvec, bio, i) { 702 bio_for_each_segment(bvec, bio, iter) {
700 int max_transfer_size = PAGE_SIZE - offset; 703 int max_transfer_size = PAGE_SIZE - offset;
701 704
702 if (bvec->bv_len > max_transfer_size) { 705 if (bvec.bv_len > max_transfer_size) {
703 /* 706 /*
704 * zram_bvec_rw() can only make operation on a single 707 * zram_bvec_rw() can only make operation on a single
705 * zram page. Split the bio vector. 708 * zram page. Split the bio vector.
706 */ 709 */
707 struct bio_vec bv; 710 struct bio_vec bv;
708 711
709 bv.bv_page = bvec->bv_page; 712 bv.bv_page = bvec.bv_page;
710 bv.bv_len = max_transfer_size; 713 bv.bv_len = max_transfer_size;
711 bv.bv_offset = bvec->bv_offset; 714 bv.bv_offset = bvec.bv_offset;
712 715
713 if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0) 716 if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
714 goto out; 717 goto out;
715 718
716 bv.bv_len = bvec->bv_len - max_transfer_size; 719 bv.bv_len = bvec.bv_len - max_transfer_size;
717 bv.bv_offset += max_transfer_size; 720 bv.bv_offset += max_transfer_size;
718 if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0) 721 if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
719 goto out; 722 goto out;
720 } else 723 } else
721 if (zram_bvec_rw(zram, bvec, index, offset, bio, rw) 724 if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw)
722 < 0) 725 < 0)
723 goto out; 726 goto out;
724 727
725 update_position(&index, &offset, bvec); 728 update_position(&index, &offset, &bvec);
726 } 729 }
727 730
728 set_bit(BIO_UPTODATE, &bio->bi_flags); 731 set_bit(BIO_UPTODATE, &bio->bi_flags);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index c87959f12760..2d29356d0c85 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -319,7 +319,7 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num)
319 bio->bi_bdev = ib_dev->ibd_bd; 319 bio->bi_bdev = ib_dev->ibd_bd;
320 bio->bi_private = cmd; 320 bio->bi_private = cmd;
321 bio->bi_end_io = &iblock_bio_done; 321 bio->bi_end_io = &iblock_bio_done;
322 bio->bi_sector = lba; 322 bio->bi_iter.bi_sector = lba;
323 323
324 return bio; 324 return bio;
325} 325}
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index fc60b31453ee..0bad24ddc2e7 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -134,8 +134,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
134 return 0; 134 return 0;
135 } 135 }
136 136
137 iv = bip_vec_idx(bip, bip->bip_vcnt); 137 iv = bip->bip_vec + bip->bip_vcnt;
138 BUG_ON(iv == NULL);
139 138
140 iv->bv_page = page; 139 iv->bv_page = page;
141 iv->bv_len = len; 140 iv->bv_len = len;
@@ -203,6 +202,12 @@ static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
203 return sectors; 202 return sectors;
204} 203}
205 204
205static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
206 unsigned int sectors)
207{
208 return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size;
209}
210
206/** 211/**
207 * bio_integrity_tag_size - Retrieve integrity tag space 212 * bio_integrity_tag_size - Retrieve integrity tag space
208 * @bio: bio to inspect 213 * @bio: bio to inspect
@@ -215,9 +220,9 @@ unsigned int bio_integrity_tag_size(struct bio *bio)
215{ 220{
216 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 221 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
217 222
218 BUG_ON(bio->bi_size == 0); 223 BUG_ON(bio->bi_iter.bi_size == 0);
219 224
220 return bi->tag_size * (bio->bi_size / bi->sector_size); 225 return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size);
221} 226}
222EXPORT_SYMBOL(bio_integrity_tag_size); 227EXPORT_SYMBOL(bio_integrity_tag_size);
223 228
@@ -235,9 +240,9 @@ int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
235 nr_sectors = bio_integrity_hw_sectors(bi, 240 nr_sectors = bio_integrity_hw_sectors(bi,
236 DIV_ROUND_UP(len, bi->tag_size)); 241 DIV_ROUND_UP(len, bi->tag_size));
237 242
238 if (nr_sectors * bi->tuple_size > bip->bip_size) { 243 if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) {
239 printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", 244 printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__,
240 __func__, nr_sectors * bi->tuple_size, bip->bip_size); 245 nr_sectors * bi->tuple_size, bip->bip_iter.bi_size);
241 return -1; 246 return -1;
242 } 247 }
243 248
@@ -299,29 +304,30 @@ static void bio_integrity_generate(struct bio *bio)
299{ 304{
300 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 305 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
301 struct blk_integrity_exchg bix; 306 struct blk_integrity_exchg bix;
302 struct bio_vec *bv; 307 struct bio_vec bv;
303 sector_t sector = bio->bi_sector; 308 struct bvec_iter iter;
304 unsigned int i, sectors, total; 309 sector_t sector = bio->bi_iter.bi_sector;
310 unsigned int sectors, total;
305 void *prot_buf = bio->bi_integrity->bip_buf; 311 void *prot_buf = bio->bi_integrity->bip_buf;
306 312
307 total = 0; 313 total = 0;
308 bix.disk_name = bio->bi_bdev->bd_disk->disk_name; 314 bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
309 bix.sector_size = bi->sector_size; 315 bix.sector_size = bi->sector_size;
310 316
311 bio_for_each_segment(bv, bio, i) { 317 bio_for_each_segment(bv, bio, iter) {
312 void *kaddr = kmap_atomic(bv->bv_page); 318 void *kaddr = kmap_atomic(bv.bv_page);
313 bix.data_buf = kaddr + bv->bv_offset; 319 bix.data_buf = kaddr + bv.bv_offset;
314 bix.data_size = bv->bv_len; 320 bix.data_size = bv.bv_len;
315 bix.prot_buf = prot_buf; 321 bix.prot_buf = prot_buf;
316 bix.sector = sector; 322 bix.sector = sector;
317 323
318 bi->generate_fn(&bix); 324 bi->generate_fn(&bix);
319 325
320 sectors = bv->bv_len / bi->sector_size; 326 sectors = bv.bv_len / bi->sector_size;
321 sector += sectors; 327 sector += sectors;
322 prot_buf += sectors * bi->tuple_size; 328 prot_buf += sectors * bi->tuple_size;
323 total += sectors * bi->tuple_size; 329 total += sectors * bi->tuple_size;
324 BUG_ON(total > bio->bi_integrity->bip_size); 330 BUG_ON(total > bio->bi_integrity->bip_iter.bi_size);
325 331
326 kunmap_atomic(kaddr); 332 kunmap_atomic(kaddr);
327 } 333 }
@@ -386,8 +392,8 @@ int bio_integrity_prep(struct bio *bio)
386 392
387 bip->bip_owns_buf = 1; 393 bip->bip_owns_buf = 1;
388 bip->bip_buf = buf; 394 bip->bip_buf = buf;
389 bip->bip_size = len; 395 bip->bip_iter.bi_size = len;
390 bip->bip_sector = bio->bi_sector; 396 bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
391 397
392 /* Map it */ 398 /* Map it */
393 offset = offset_in_page(buf); 399 offset = offset_in_page(buf);
@@ -442,16 +448,18 @@ static int bio_integrity_verify(struct bio *bio)
442 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 448 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
443 struct blk_integrity_exchg bix; 449 struct blk_integrity_exchg bix;
444 struct bio_vec *bv; 450 struct bio_vec *bv;
445 sector_t sector = bio->bi_integrity->bip_sector; 451 sector_t sector = bio->bi_integrity->bip_iter.bi_sector;
446 unsigned int i, sectors, total, ret; 452 unsigned int sectors, total, ret;
447 void *prot_buf = bio->bi_integrity->bip_buf; 453 void *prot_buf = bio->bi_integrity->bip_buf;
454 int i;
448 455
449 ret = total = 0; 456 ret = total = 0;
450 bix.disk_name = bio->bi_bdev->bd_disk->disk_name; 457 bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
451 bix.sector_size = bi->sector_size; 458 bix.sector_size = bi->sector_size;
452 459
453 bio_for_each_segment(bv, bio, i) { 460 bio_for_each_segment_all(bv, bio, i) {
454 void *kaddr = kmap_atomic(bv->bv_page); 461 void *kaddr = kmap_atomic(bv->bv_page);
462
455 bix.data_buf = kaddr + bv->bv_offset; 463 bix.data_buf = kaddr + bv->bv_offset;
456 bix.data_size = bv->bv_len; 464 bix.data_size = bv->bv_len;
457 bix.prot_buf = prot_buf; 465 bix.prot_buf = prot_buf;
@@ -468,7 +476,7 @@ static int bio_integrity_verify(struct bio *bio)
468 sector += sectors; 476 sector += sectors;
469 prot_buf += sectors * bi->tuple_size; 477 prot_buf += sectors * bi->tuple_size;
470 total += sectors * bi->tuple_size; 478 total += sectors * bi->tuple_size;
471 BUG_ON(total > bio->bi_integrity->bip_size); 479 BUG_ON(total > bio->bi_integrity->bip_iter.bi_size);
472 480
473 kunmap_atomic(kaddr); 481 kunmap_atomic(kaddr);
474 } 482 }
@@ -495,7 +503,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
495 503
496 /* Restore original bio completion handler */ 504 /* Restore original bio completion handler */
497 bio->bi_end_io = bip->bip_end_io; 505 bio->bi_end_io = bip->bip_end_io;
498 bio_endio(bio, error); 506 bio_endio_nodec(bio, error);
499} 507}
500 508
501/** 509/**
@@ -533,56 +541,6 @@ void bio_integrity_endio(struct bio *bio, int error)
533EXPORT_SYMBOL(bio_integrity_endio); 541EXPORT_SYMBOL(bio_integrity_endio);
534 542
535/** 543/**
536 * bio_integrity_mark_head - Advance bip_vec skip bytes
537 * @bip: Integrity vector to advance
538 * @skip: Number of bytes to advance it
539 */
540void bio_integrity_mark_head(struct bio_integrity_payload *bip,
541 unsigned int skip)
542{
543 struct bio_vec *iv;
544 unsigned int i;
545
546 bip_for_each_vec(iv, bip, i) {
547 if (skip == 0) {
548 bip->bip_idx = i;
549 return;
550 } else if (skip >= iv->bv_len) {
551 skip -= iv->bv_len;
552 } else { /* skip < iv->bv_len) */
553 iv->bv_offset += skip;
554 iv->bv_len -= skip;
555 bip->bip_idx = i;
556 return;
557 }
558 }
559}
560
561/**
562 * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long
563 * @bip: Integrity vector to truncate
564 * @len: New length of integrity vector
565 */
566void bio_integrity_mark_tail(struct bio_integrity_payload *bip,
567 unsigned int len)
568{
569 struct bio_vec *iv;
570 unsigned int i;
571
572 bip_for_each_vec(iv, bip, i) {
573 if (len == 0) {
574 bip->bip_vcnt = i;
575 return;
576 } else if (len >= iv->bv_len) {
577 len -= iv->bv_len;
578 } else { /* len < iv->bv_len) */
579 iv->bv_len = len;
580 len = 0;
581 }
582 }
583}
584
585/**
586 * bio_integrity_advance - Advance integrity vector 544 * bio_integrity_advance - Advance integrity vector
587 * @bio: bio whose integrity vector to update 545 * @bio: bio whose integrity vector to update
588 * @bytes_done: number of data bytes that have been completed 546 * @bytes_done: number of data bytes that have been completed
@@ -595,13 +553,9 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
595{ 553{
596 struct bio_integrity_payload *bip = bio->bi_integrity; 554 struct bio_integrity_payload *bip = bio->bi_integrity;
597 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 555 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
598 unsigned int nr_sectors; 556 unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
599 557
600 BUG_ON(bip == NULL); 558 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
601 BUG_ON(bi == NULL);
602
603 nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9);
604 bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size);
605} 559}
606EXPORT_SYMBOL(bio_integrity_advance); 560EXPORT_SYMBOL(bio_integrity_advance);
607 561
@@ -621,64 +575,13 @@ void bio_integrity_trim(struct bio *bio, unsigned int offset,
621{ 575{
622 struct bio_integrity_payload *bip = bio->bi_integrity; 576 struct bio_integrity_payload *bip = bio->bi_integrity;
623 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 577 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
624 unsigned int nr_sectors;
625
626 BUG_ON(bip == NULL);
627 BUG_ON(bi == NULL);
628 BUG_ON(!bio_flagged(bio, BIO_CLONED));
629 578
630 nr_sectors = bio_integrity_hw_sectors(bi, sectors); 579 bio_integrity_advance(bio, offset << 9);
631 bip->bip_sector = bip->bip_sector + offset; 580 bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
632 bio_integrity_mark_head(bip, offset * bi->tuple_size);
633 bio_integrity_mark_tail(bip, sectors * bi->tuple_size);
634} 581}
635EXPORT_SYMBOL(bio_integrity_trim); 582EXPORT_SYMBOL(bio_integrity_trim);
636 583
637/** 584/**
638 * bio_integrity_split - Split integrity metadata
639 * @bio: Protected bio
640 * @bp: Resulting bio_pair
641 * @sectors: Offset
642 *
643 * Description: Splits an integrity page into a bio_pair.
644 */
645void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
646{
647 struct blk_integrity *bi;
648 struct bio_integrity_payload *bip = bio->bi_integrity;
649 unsigned int nr_sectors;
650
651 if (bio_integrity(bio) == 0)
652 return;
653
654 bi = bdev_get_integrity(bio->bi_bdev);
655 BUG_ON(bi == NULL);
656 BUG_ON(bip->bip_vcnt != 1);
657
658 nr_sectors = bio_integrity_hw_sectors(bi, sectors);
659
660 bp->bio1.bi_integrity = &bp->bip1;
661 bp->bio2.bi_integrity = &bp->bip2;
662
663 bp->iv1 = bip->bip_vec[bip->bip_idx];
664 bp->iv2 = bip->bip_vec[bip->bip_idx];
665
666 bp->bip1.bip_vec = &bp->iv1;
667 bp->bip2.bip_vec = &bp->iv2;
668
669 bp->iv1.bv_len = sectors * bi->tuple_size;
670 bp->iv2.bv_offset += sectors * bi->tuple_size;
671 bp->iv2.bv_len -= sectors * bi->tuple_size;
672
673 bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
674 bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
675
676 bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
677 bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
678}
679EXPORT_SYMBOL(bio_integrity_split);
680
681/**
682 * bio_integrity_clone - Callback for cloning bios with integrity metadata 585 * bio_integrity_clone - Callback for cloning bios with integrity metadata
683 * @bio: New bio 586 * @bio: New bio
684 * @bio_src: Original bio 587 * @bio_src: Original bio
@@ -702,9 +605,8 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
702 memcpy(bip->bip_vec, bip_src->bip_vec, 605 memcpy(bip->bip_vec, bip_src->bip_vec,
703 bip_src->bip_vcnt * sizeof(struct bio_vec)); 606 bip_src->bip_vcnt * sizeof(struct bio_vec));
704 607
705 bip->bip_sector = bip_src->bip_sector;
706 bip->bip_vcnt = bip_src->bip_vcnt; 608 bip->bip_vcnt = bip_src->bip_vcnt;
707 bip->bip_idx = bip_src->bip_idx; 609 bip->bip_iter = bip_src->bip_iter;
708 610
709 return 0; 611 return 0;
710} 612}
diff --git a/fs/bio.c b/fs/bio.c
index 33d79a4eb92d..75c49a382239 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -38,8 +38,6 @@
38 */ 38 */
39#define BIO_INLINE_VECS 4 39#define BIO_INLINE_VECS 4
40 40
41static mempool_t *bio_split_pool __read_mostly;
42
43/* 41/*
44 * if you change this list, also change bvec_alloc or things will 42 * if you change this list, also change bvec_alloc or things will
45 * break badly! cannot be bigger than what you can fit into an 43 * break badly! cannot be bigger than what you can fit into an
@@ -273,6 +271,7 @@ void bio_init(struct bio *bio)
273{ 271{
274 memset(bio, 0, sizeof(*bio)); 272 memset(bio, 0, sizeof(*bio));
275 bio->bi_flags = 1 << BIO_UPTODATE; 273 bio->bi_flags = 1 << BIO_UPTODATE;
274 atomic_set(&bio->bi_remaining, 1);
276 atomic_set(&bio->bi_cnt, 1); 275 atomic_set(&bio->bi_cnt, 1);
277} 276}
278EXPORT_SYMBOL(bio_init); 277EXPORT_SYMBOL(bio_init);
@@ -295,9 +294,35 @@ void bio_reset(struct bio *bio)
295 294
296 memset(bio, 0, BIO_RESET_BYTES); 295 memset(bio, 0, BIO_RESET_BYTES);
297 bio->bi_flags = flags|(1 << BIO_UPTODATE); 296 bio->bi_flags = flags|(1 << BIO_UPTODATE);
297 atomic_set(&bio->bi_remaining, 1);
298} 298}
299EXPORT_SYMBOL(bio_reset); 299EXPORT_SYMBOL(bio_reset);
300 300
301static void bio_chain_endio(struct bio *bio, int error)
302{
303 bio_endio(bio->bi_private, error);
304 bio_put(bio);
305}
306
307/**
308 * bio_chain - chain bio completions
309 *
310 * The caller won't have a bi_end_io called when @bio completes - instead,
311 * @parent's bi_end_io won't be called until both @parent and @bio have
312 * completed; the chained bio will also be freed when it completes.
313 *
314 * The caller must not set bi_private or bi_end_io in @bio.
315 */
316void bio_chain(struct bio *bio, struct bio *parent)
317{
318 BUG_ON(bio->bi_private || bio->bi_end_io);
319
320 bio->bi_private = parent;
321 bio->bi_end_io = bio_chain_endio;
322 atomic_inc(&parent->bi_remaining);
323}
324EXPORT_SYMBOL(bio_chain);
325
301static void bio_alloc_rescue(struct work_struct *work) 326static void bio_alloc_rescue(struct work_struct *work)
302{ 327{
303 struct bio_set *bs = container_of(work, struct bio_set, rescue_work); 328 struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
@@ -473,13 +498,13 @@ EXPORT_SYMBOL(bio_alloc_bioset);
473void zero_fill_bio(struct bio *bio) 498void zero_fill_bio(struct bio *bio)
474{ 499{
475 unsigned long flags; 500 unsigned long flags;
476 struct bio_vec *bv; 501 struct bio_vec bv;
477 int i; 502 struct bvec_iter iter;
478 503
479 bio_for_each_segment(bv, bio, i) { 504 bio_for_each_segment(bv, bio, iter) {
480 char *data = bvec_kmap_irq(bv, &flags); 505 char *data = bvec_kmap_irq(&bv, &flags);
481 memset(data, 0, bv->bv_len); 506 memset(data, 0, bv.bv_len);
482 flush_dcache_page(bv->bv_page); 507 flush_dcache_page(bv.bv_page);
483 bvec_kunmap_irq(data, &flags); 508 bvec_kunmap_irq(data, &flags);
484 } 509 }
485} 510}
@@ -515,51 +540,49 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
515EXPORT_SYMBOL(bio_phys_segments); 540EXPORT_SYMBOL(bio_phys_segments);
516 541
517/** 542/**
518 * __bio_clone - clone a bio 543 * __bio_clone_fast - clone a bio that shares the original bio's biovec
519 * @bio: destination bio 544 * @bio: destination bio
520 * @bio_src: bio to clone 545 * @bio_src: bio to clone
521 * 546 *
522 * Clone a &bio. Caller will own the returned bio, but not 547 * Clone a &bio. Caller will own the returned bio, but not
523 * the actual data it points to. Reference count of returned 548 * the actual data it points to. Reference count of returned
524 * bio will be one. 549 * bio will be one.
550 *
551 * Caller must ensure that @bio_src is not freed before @bio.
525 */ 552 */
526void __bio_clone(struct bio *bio, struct bio *bio_src) 553void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
527{ 554{
528 memcpy(bio->bi_io_vec, bio_src->bi_io_vec, 555 BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE);
529 bio_src->bi_max_vecs * sizeof(struct bio_vec));
530 556
531 /* 557 /*
532 * most users will be overriding ->bi_bdev with a new target, 558 * most users will be overriding ->bi_bdev with a new target,
533 * so we don't set nor calculate new physical/hw segment counts here 559 * so we don't set nor calculate new physical/hw segment counts here
534 */ 560 */
535 bio->bi_sector = bio_src->bi_sector;
536 bio->bi_bdev = bio_src->bi_bdev; 561 bio->bi_bdev = bio_src->bi_bdev;
537 bio->bi_flags |= 1 << BIO_CLONED; 562 bio->bi_flags |= 1 << BIO_CLONED;
538 bio->bi_rw = bio_src->bi_rw; 563 bio->bi_rw = bio_src->bi_rw;
539 bio->bi_vcnt = bio_src->bi_vcnt; 564 bio->bi_iter = bio_src->bi_iter;
540 bio->bi_size = bio_src->bi_size; 565 bio->bi_io_vec = bio_src->bi_io_vec;
541 bio->bi_idx = bio_src->bi_idx;
542} 566}
543EXPORT_SYMBOL(__bio_clone); 567EXPORT_SYMBOL(__bio_clone_fast);
544 568
545/** 569/**
546 * bio_clone_bioset - clone a bio 570 * bio_clone_fast - clone a bio that shares the original bio's biovec
547 * @bio: bio to clone 571 * @bio: bio to clone
548 * @gfp_mask: allocation priority 572 * @gfp_mask: allocation priority
549 * @bs: bio_set to allocate from 573 * @bs: bio_set to allocate from
550 * 574 *
551 * Like __bio_clone, only also allocates the returned bio 575 * Like __bio_clone_fast, only also allocates the returned bio
552 */ 576 */
553struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask, 577struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
554 struct bio_set *bs)
555{ 578{
556 struct bio *b; 579 struct bio *b;
557 580
558 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs); 581 b = bio_alloc_bioset(gfp_mask, 0, bs);
559 if (!b) 582 if (!b)
560 return NULL; 583 return NULL;
561 584
562 __bio_clone(b, bio); 585 __bio_clone_fast(b, bio);
563 586
564 if (bio_integrity(bio)) { 587 if (bio_integrity(bio)) {
565 int ret; 588 int ret;
@@ -574,6 +597,74 @@ struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask,
574 597
575 return b; 598 return b;
576} 599}
600EXPORT_SYMBOL(bio_clone_fast);
601
602/**
603 * bio_clone_bioset - clone a bio
604 * @bio_src: bio to clone
605 * @gfp_mask: allocation priority
606 * @bs: bio_set to allocate from
607 *
608 * Clone bio. Caller will own the returned bio, but not the actual data it
609 * points to. Reference count of returned bio will be one.
610 */
611struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
612 struct bio_set *bs)
613{
614 unsigned nr_iovecs = 0;
615 struct bvec_iter iter;
616 struct bio_vec bv;
617 struct bio *bio;
618
619 /*
620 * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
621 * bio_src->bi_io_vec to bio->bi_io_vec.
622 *
623 * We can't do that anymore, because:
624 *
625 * - The point of cloning the biovec is to produce a bio with a biovec
626 * the caller can modify: bi_idx and bi_bvec_done should be 0.
627 *
628 * - The original bio could've had more than BIO_MAX_PAGES biovecs; if
629 * we tried to clone the whole thing bio_alloc_bioset() would fail.
630 * But the clone should succeed as long as the number of biovecs we
631 * actually need to allocate is fewer than BIO_MAX_PAGES.
632 *
633 * - Lastly, bi_vcnt should not be looked at or relied upon by code
634 * that does not own the bio - reason being drivers don't use it for
635 * iterating over the biovec anymore, so expecting it to be kept up
636 * to date (i.e. for clones that share the parent biovec) is just
637 * asking for trouble and would force extra work on
638 * __bio_clone_fast() anyways.
639 */
640
641 bio_for_each_segment(bv, bio_src, iter)
642 nr_iovecs++;
643
644 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, bs);
645 if (!bio)
646 return NULL;
647
648 bio->bi_bdev = bio_src->bi_bdev;
649 bio->bi_rw = bio_src->bi_rw;
650 bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
651 bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
652
653 bio_for_each_segment(bv, bio_src, iter)
654 bio->bi_io_vec[bio->bi_vcnt++] = bv;
655
656 if (bio_integrity(bio_src)) {
657 int ret;
658
659 ret = bio_integrity_clone(bio, bio_src, gfp_mask);
660 if (ret < 0) {
661 bio_put(bio);
662 return NULL;
663 }
664 }
665
666 return bio;
667}
577EXPORT_SYMBOL(bio_clone_bioset); 668EXPORT_SYMBOL(bio_clone_bioset);
578 669
579/** 670/**
@@ -612,7 +703,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
612 if (unlikely(bio_flagged(bio, BIO_CLONED))) 703 if (unlikely(bio_flagged(bio, BIO_CLONED)))
613 return 0; 704 return 0;
614 705
615 if (((bio->bi_size + len) >> 9) > max_sectors) 706 if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
616 return 0; 707 return 0;
617 708
618 /* 709 /*
@@ -635,8 +726,9 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
635 simulate merging updated prev_bvec 726 simulate merging updated prev_bvec
636 as new bvec. */ 727 as new bvec. */
637 .bi_bdev = bio->bi_bdev, 728 .bi_bdev = bio->bi_bdev,
638 .bi_sector = bio->bi_sector, 729 .bi_sector = bio->bi_iter.bi_sector,
639 .bi_size = bio->bi_size - prev_bv_len, 730 .bi_size = bio->bi_iter.bi_size -
731 prev_bv_len,
640 .bi_rw = bio->bi_rw, 732 .bi_rw = bio->bi_rw,
641 }; 733 };
642 734
@@ -684,8 +776,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
684 if (q->merge_bvec_fn) { 776 if (q->merge_bvec_fn) {
685 struct bvec_merge_data bvm = { 777 struct bvec_merge_data bvm = {
686 .bi_bdev = bio->bi_bdev, 778 .bi_bdev = bio->bi_bdev,
687 .bi_sector = bio->bi_sector, 779 .bi_sector = bio->bi_iter.bi_sector,
688 .bi_size = bio->bi_size, 780 .bi_size = bio->bi_iter.bi_size,
689 .bi_rw = bio->bi_rw, 781 .bi_rw = bio->bi_rw,
690 }; 782 };
691 783
@@ -708,7 +800,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
708 bio->bi_vcnt++; 800 bio->bi_vcnt++;
709 bio->bi_phys_segments++; 801 bio->bi_phys_segments++;
710 done: 802 done:
711 bio->bi_size += len; 803 bio->bi_iter.bi_size += len;
712 return len; 804 return len;
713} 805}
714 806
@@ -807,28 +899,7 @@ void bio_advance(struct bio *bio, unsigned bytes)
807 if (bio_integrity(bio)) 899 if (bio_integrity(bio))
808 bio_integrity_advance(bio, bytes); 900 bio_integrity_advance(bio, bytes);
809 901
810 bio->bi_sector += bytes >> 9; 902 bio_advance_iter(bio, &bio->bi_iter, bytes);
811 bio->bi_size -= bytes;
812
813 if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
814 return;
815
816 while (bytes) {
817 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
818 WARN_ONCE(1, "bio idx %d >= vcnt %d\n",
819 bio->bi_idx, bio->bi_vcnt);
820 break;
821 }
822
823 if (bytes >= bio_iovec(bio)->bv_len) {
824 bytes -= bio_iovec(bio)->bv_len;
825 bio->bi_idx++;
826 } else {
827 bio_iovec(bio)->bv_len -= bytes;
828 bio_iovec(bio)->bv_offset += bytes;
829 bytes = 0;
830 }
831 }
832} 903}
833EXPORT_SYMBOL(bio_advance); 904EXPORT_SYMBOL(bio_advance);
834 905
@@ -874,117 +945,80 @@ EXPORT_SYMBOL(bio_alloc_pages);
874 */ 945 */
875void bio_copy_data(struct bio *dst, struct bio *src) 946void bio_copy_data(struct bio *dst, struct bio *src)
876{ 947{
877 struct bio_vec *src_bv, *dst_bv; 948 struct bvec_iter src_iter, dst_iter;
878 unsigned src_offset, dst_offset, bytes; 949 struct bio_vec src_bv, dst_bv;
879 void *src_p, *dst_p; 950 void *src_p, *dst_p;
951 unsigned bytes;
880 952
881 src_bv = bio_iovec(src); 953 src_iter = src->bi_iter;
882 dst_bv = bio_iovec(dst); 954 dst_iter = dst->bi_iter;
883
884 src_offset = src_bv->bv_offset;
885 dst_offset = dst_bv->bv_offset;
886 955
887 while (1) { 956 while (1) {
888 if (src_offset == src_bv->bv_offset + src_bv->bv_len) { 957 if (!src_iter.bi_size) {
889 src_bv++; 958 src = src->bi_next;
890 if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) { 959 if (!src)
891 src = src->bi_next; 960 break;
892 if (!src)
893 break;
894
895 src_bv = bio_iovec(src);
896 }
897 961
898 src_offset = src_bv->bv_offset; 962 src_iter = src->bi_iter;
899 } 963 }
900 964
901 if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) { 965 if (!dst_iter.bi_size) {
902 dst_bv++; 966 dst = dst->bi_next;
903 if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) { 967 if (!dst)
904 dst = dst->bi_next; 968 break;
905 if (!dst)
906 break;
907
908 dst_bv = bio_iovec(dst);
909 }
910 969
911 dst_offset = dst_bv->bv_offset; 970 dst_iter = dst->bi_iter;
912 } 971 }
913 972
914 bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset, 973 src_bv = bio_iter_iovec(src, src_iter);
915 src_bv->bv_offset + src_bv->bv_len - src_offset); 974 dst_bv = bio_iter_iovec(dst, dst_iter);
975
976 bytes = min(src_bv.bv_len, dst_bv.bv_len);
916 977
917 src_p = kmap_atomic(src_bv->bv_page); 978 src_p = kmap_atomic(src_bv.bv_page);
918 dst_p = kmap_atomic(dst_bv->bv_page); 979 dst_p = kmap_atomic(dst_bv.bv_page);
919 980
920 memcpy(dst_p + dst_offset, 981 memcpy(dst_p + dst_bv.bv_offset,
921 src_p + src_offset, 982 src_p + src_bv.bv_offset,
922 bytes); 983 bytes);
923 984
924 kunmap_atomic(dst_p); 985 kunmap_atomic(dst_p);
925 kunmap_atomic(src_p); 986 kunmap_atomic(src_p);
926 987
927 src_offset += bytes; 988 bio_advance_iter(src, &src_iter, bytes);
928 dst_offset += bytes; 989 bio_advance_iter(dst, &dst_iter, bytes);
929 } 990 }
930} 991}
931EXPORT_SYMBOL(bio_copy_data); 992EXPORT_SYMBOL(bio_copy_data);
932 993
933struct bio_map_data { 994struct bio_map_data {
934 struct bio_vec *iovecs;
935 struct sg_iovec *sgvecs;
936 int nr_sgvecs; 995 int nr_sgvecs;
937 int is_our_pages; 996 int is_our_pages;
997 struct sg_iovec sgvecs[];
938}; 998};
939 999
940static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, 1000static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
941 struct sg_iovec *iov, int iov_count, 1001 struct sg_iovec *iov, int iov_count,
942 int is_our_pages) 1002 int is_our_pages)
943{ 1003{
944 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
945 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); 1004 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
946 bmd->nr_sgvecs = iov_count; 1005 bmd->nr_sgvecs = iov_count;
947 bmd->is_our_pages = is_our_pages; 1006 bmd->is_our_pages = is_our_pages;
948 bio->bi_private = bmd; 1007 bio->bi_private = bmd;
949} 1008}
950 1009
951static void bio_free_map_data(struct bio_map_data *bmd)
952{
953 kfree(bmd->iovecs);
954 kfree(bmd->sgvecs);
955 kfree(bmd);
956}
957
958static struct bio_map_data *bio_alloc_map_data(int nr_segs, 1010static struct bio_map_data *bio_alloc_map_data(int nr_segs,
959 unsigned int iov_count, 1011 unsigned int iov_count,
960 gfp_t gfp_mask) 1012 gfp_t gfp_mask)
961{ 1013{
962 struct bio_map_data *bmd;
963
964 if (iov_count > UIO_MAXIOV) 1014 if (iov_count > UIO_MAXIOV)
965 return NULL; 1015 return NULL;
966 1016
967 bmd = kmalloc(sizeof(*bmd), gfp_mask); 1017 return kmalloc(sizeof(struct bio_map_data) +
968 if (!bmd) 1018 sizeof(struct sg_iovec) * iov_count, gfp_mask);
969 return NULL;
970
971 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
972 if (!bmd->iovecs) {
973 kfree(bmd);
974 return NULL;
975 }
976
977 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
978 if (bmd->sgvecs)
979 return bmd;
980
981 kfree(bmd->iovecs);
982 kfree(bmd);
983 return NULL;
984} 1019}
985 1020
986static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, 1021static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
987 struct sg_iovec *iov, int iov_count,
988 int to_user, int from_user, int do_free_page) 1022 int to_user, int from_user, int do_free_page)
989{ 1023{
990 int ret = 0, i; 1024 int ret = 0, i;
@@ -994,7 +1028,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
994 1028
995 bio_for_each_segment_all(bvec, bio, i) { 1029 bio_for_each_segment_all(bvec, bio, i) {
996 char *bv_addr = page_address(bvec->bv_page); 1030 char *bv_addr = page_address(bvec->bv_page);
997 unsigned int bv_len = iovecs[i].bv_len; 1031 unsigned int bv_len = bvec->bv_len;
998 1032
999 while (bv_len && iov_idx < iov_count) { 1033 while (bv_len && iov_idx < iov_count) {
1000 unsigned int bytes; 1034 unsigned int bytes;
@@ -1054,14 +1088,14 @@ int bio_uncopy_user(struct bio *bio)
1054 * don't copy into a random user address space, just free. 1088 * don't copy into a random user address space, just free.
1055 */ 1089 */
1056 if (current->mm) 1090 if (current->mm)
1057 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, 1091 ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs,
1058 bmd->nr_sgvecs, bio_data_dir(bio) == READ, 1092 bio_data_dir(bio) == READ,
1059 0, bmd->is_our_pages); 1093 0, bmd->is_our_pages);
1060 else if (bmd->is_our_pages) 1094 else if (bmd->is_our_pages)
1061 bio_for_each_segment_all(bvec, bio, i) 1095 bio_for_each_segment_all(bvec, bio, i)
1062 __free_page(bvec->bv_page); 1096 __free_page(bvec->bv_page);
1063 } 1097 }
1064 bio_free_map_data(bmd); 1098 kfree(bmd);
1065 bio_put(bio); 1099 bio_put(bio);
1066 return ret; 1100 return ret;
1067} 1101}
@@ -1175,7 +1209,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
1175 */ 1209 */
1176 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || 1210 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
1177 (map_data && map_data->from_user)) { 1211 (map_data && map_data->from_user)) {
1178 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0); 1212 ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0);
1179 if (ret) 1213 if (ret)
1180 goto cleanup; 1214 goto cleanup;
1181 } 1215 }
@@ -1189,7 +1223,7 @@ cleanup:
1189 1223
1190 bio_put(bio); 1224 bio_put(bio);
1191out_bmd: 1225out_bmd:
1192 bio_free_map_data(bmd); 1226 kfree(bmd);
1193 return ERR_PTR(ret); 1227 return ERR_PTR(ret);
1194} 1228}
1195 1229
@@ -1485,7 +1519,7 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
1485 if (IS_ERR(bio)) 1519 if (IS_ERR(bio))
1486 return bio; 1520 return bio;
1487 1521
1488 if (bio->bi_size == len) 1522 if (bio->bi_iter.bi_size == len)
1489 return bio; 1523 return bio;
1490 1524
1491 /* 1525 /*
@@ -1506,16 +1540,15 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
1506 1540
1507 bio_for_each_segment_all(bvec, bio, i) { 1541 bio_for_each_segment_all(bvec, bio, i) {
1508 char *addr = page_address(bvec->bv_page); 1542 char *addr = page_address(bvec->bv_page);
1509 int len = bmd->iovecs[i].bv_len;
1510 1543
1511 if (read) 1544 if (read)
1512 memcpy(p, addr, len); 1545 memcpy(p, addr, bvec->bv_len);
1513 1546
1514 __free_page(bvec->bv_page); 1547 __free_page(bvec->bv_page);
1515 p += len; 1548 p += bvec->bv_len;
1516 } 1549 }
1517 1550
1518 bio_free_map_data(bmd); 1551 kfree(bmd);
1519 bio_put(bio); 1552 bio_put(bio);
1520} 1553}
1521 1554
@@ -1686,11 +1719,11 @@ void bio_check_pages_dirty(struct bio *bio)
1686#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1719#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1687void bio_flush_dcache_pages(struct bio *bi) 1720void bio_flush_dcache_pages(struct bio *bi)
1688{ 1721{
1689 int i; 1722 struct bio_vec bvec;
1690 struct bio_vec *bvec; 1723 struct bvec_iter iter;
1691 1724
1692 bio_for_each_segment(bvec, bi, i) 1725 bio_for_each_segment(bvec, bi, iter)
1693 flush_dcache_page(bvec->bv_page); 1726 flush_dcache_page(bvec.bv_page);
1694} 1727}
1695EXPORT_SYMBOL(bio_flush_dcache_pages); 1728EXPORT_SYMBOL(bio_flush_dcache_pages);
1696#endif 1729#endif
@@ -1711,96 +1744,86 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
1711 **/ 1744 **/
1712void bio_endio(struct bio *bio, int error) 1745void bio_endio(struct bio *bio, int error)
1713{ 1746{
1714 if (error) 1747 while (bio) {
1715 clear_bit(BIO_UPTODATE, &bio->bi_flags); 1748 BUG_ON(atomic_read(&bio->bi_remaining) <= 0);
1716 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1717 error = -EIO;
1718 1749
1719 if (bio->bi_end_io) 1750 if (error)
1720 bio->bi_end_io(bio, error); 1751 clear_bit(BIO_UPTODATE, &bio->bi_flags);
1721} 1752 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1722EXPORT_SYMBOL(bio_endio); 1753 error = -EIO;
1723 1754
1724void bio_pair_release(struct bio_pair *bp) 1755 if (!atomic_dec_and_test(&bio->bi_remaining))
1725{ 1756 return;
1726 if (atomic_dec_and_test(&bp->cnt)) {
1727 struct bio *master = bp->bio1.bi_private;
1728 1757
1729 bio_endio(master, bp->error); 1758 /*
1730 mempool_free(bp, bp->bio2.bi_private); 1759 * Need to have a real endio function for chained bios,
1760 * otherwise various corner cases will break (like stacking
1761 * block devices that save/restore bi_end_io) - however, we want
1762 * to avoid unbounded recursion and blowing the stack. Tail call
1763 * optimization would handle this, but compiling with frame
1764 * pointers also disables gcc's sibling call optimization.
1765 */
1766 if (bio->bi_end_io == bio_chain_endio) {
1767 struct bio *parent = bio->bi_private;
1768 bio_put(bio);
1769 bio = parent;
1770 } else {
1771 if (bio->bi_end_io)
1772 bio->bi_end_io(bio, error);
1773 bio = NULL;
1774 }
1731 } 1775 }
1732} 1776}
1733EXPORT_SYMBOL(bio_pair_release); 1777EXPORT_SYMBOL(bio_endio);
1734 1778
1735static void bio_pair_end_1(struct bio *bi, int err) 1779/**
1780 * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining
1781 * @bio: bio
1782 * @error: error, if any
1783 *
1784 * For code that has saved and restored bi_end_io; thing hard before using this
1785 * function, probably you should've cloned the entire bio.
1786 **/
1787void bio_endio_nodec(struct bio *bio, int error)
1736{ 1788{
1737 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1); 1789 atomic_inc(&bio->bi_remaining);
1738 1790 bio_endio(bio, error);
1739 if (err)
1740 bp->error = err;
1741
1742 bio_pair_release(bp);
1743} 1791}
1792EXPORT_SYMBOL(bio_endio_nodec);
1744 1793
1745static void bio_pair_end_2(struct bio *bi, int err) 1794/**
1746{ 1795 * bio_split - split a bio
1747 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2); 1796 * @bio: bio to split
1748 1797 * @sectors: number of sectors to split from the front of @bio
1749 if (err) 1798 * @gfp: gfp mask
1750 bp->error = err; 1799 * @bs: bio set to allocate from
1751 1800 *
1752 bio_pair_release(bp); 1801 * Allocates and returns a new bio which represents @sectors from the start of
1753} 1802 * @bio, and updates @bio to represent the remaining sectors.
1754 1803 *
1755/* 1804 * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
1756 * split a bio - only worry about a bio with a single page in its iovec 1805 * responsibility to ensure that @bio is not freed before the split.
1757 */ 1806 */
1758struct bio_pair *bio_split(struct bio *bi, int first_sectors) 1807struct bio *bio_split(struct bio *bio, int sectors,
1808 gfp_t gfp, struct bio_set *bs)
1759{ 1809{
1760 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); 1810 struct bio *split = NULL;
1761
1762 if (!bp)
1763 return bp;
1764
1765 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1766 bi->bi_sector + first_sectors);
1767
1768 BUG_ON(bio_segments(bi) > 1);
1769 atomic_set(&bp->cnt, 3);
1770 bp->error = 0;
1771 bp->bio1 = *bi;
1772 bp->bio2 = *bi;
1773 bp->bio2.bi_sector += first_sectors;
1774 bp->bio2.bi_size -= first_sectors << 9;
1775 bp->bio1.bi_size = first_sectors << 9;
1776
1777 if (bi->bi_vcnt != 0) {
1778 bp->bv1 = *bio_iovec(bi);
1779 bp->bv2 = *bio_iovec(bi);
1780
1781 if (bio_is_rw(bi)) {
1782 bp->bv2.bv_offset += first_sectors << 9;
1783 bp->bv2.bv_len -= first_sectors << 9;
1784 bp->bv1.bv_len = first_sectors << 9;
1785 }
1786 1811
1787 bp->bio1.bi_io_vec = &bp->bv1; 1812 BUG_ON(sectors <= 0);
1788 bp->bio2.bi_io_vec = &bp->bv2; 1813 BUG_ON(sectors >= bio_sectors(bio));
1789 1814
1790 bp->bio1.bi_max_vecs = 1; 1815 split = bio_clone_fast(bio, gfp, bs);
1791 bp->bio2.bi_max_vecs = 1; 1816 if (!split)
1792 } 1817 return NULL;
1793 1818
1794 bp->bio1.bi_end_io = bio_pair_end_1; 1819 split->bi_iter.bi_size = sectors << 9;
1795 bp->bio2.bi_end_io = bio_pair_end_2;
1796 1820
1797 bp->bio1.bi_private = bi; 1821 if (bio_integrity(split))
1798 bp->bio2.bi_private = bio_split_pool; 1822 bio_integrity_trim(split, 0, sectors);
1799 1823
1800 if (bio_integrity(bi)) 1824 bio_advance(bio, split->bi_iter.bi_size);
1801 bio_integrity_split(bi, bp, first_sectors);
1802 1825
1803 return bp; 1826 return split;
1804} 1827}
1805EXPORT_SYMBOL(bio_split); 1828EXPORT_SYMBOL(bio_split);
1806 1829
@@ -1814,80 +1837,20 @@ void bio_trim(struct bio *bio, int offset, int size)
1814{ 1837{
1815 /* 'bio' is a cloned bio which we need to trim to match 1838 /* 'bio' is a cloned bio which we need to trim to match
1816 * the given offset and size. 1839 * the given offset and size.
1817 * This requires adjusting bi_sector, bi_size, and bi_io_vec
1818 */ 1840 */
1819 int i;
1820 struct bio_vec *bvec;
1821 int sofar = 0;
1822 1841
1823 size <<= 9; 1842 size <<= 9;
1824 if (offset == 0 && size == bio->bi_size) 1843 if (offset == 0 && size == bio->bi_iter.bi_size)
1825 return; 1844 return;
1826 1845
1827 clear_bit(BIO_SEG_VALID, &bio->bi_flags); 1846 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
1828 1847
1829 bio_advance(bio, offset << 9); 1848 bio_advance(bio, offset << 9);
1830 1849
1831 bio->bi_size = size; 1850 bio->bi_iter.bi_size = size;
1832
1833 /* avoid any complications with bi_idx being non-zero*/
1834 if (bio->bi_idx) {
1835 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
1836 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
1837 bio->bi_vcnt -= bio->bi_idx;
1838 bio->bi_idx = 0;
1839 }
1840 /* Make sure vcnt and last bv are not too big */
1841 bio_for_each_segment(bvec, bio, i) {
1842 if (sofar + bvec->bv_len > size)
1843 bvec->bv_len = size - sofar;
1844 if (bvec->bv_len == 0) {
1845 bio->bi_vcnt = i;
1846 break;
1847 }
1848 sofar += bvec->bv_len;
1849 }
1850} 1851}
1851EXPORT_SYMBOL_GPL(bio_trim); 1852EXPORT_SYMBOL_GPL(bio_trim);
1852 1853
1853/**
1854 * bio_sector_offset - Find hardware sector offset in bio
1855 * @bio: bio to inspect
1856 * @index: bio_vec index
1857 * @offset: offset in bv_page
1858 *
1859 * Return the number of hardware sectors between beginning of bio
1860 * and an end point indicated by a bio_vec index and an offset
1861 * within that vector's page.
1862 */
1863sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1864 unsigned int offset)
1865{
1866 unsigned int sector_sz;
1867 struct bio_vec *bv;
1868 sector_t sectors;
1869 int i;
1870
1871 sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
1872 sectors = 0;
1873
1874 if (index >= bio->bi_idx)
1875 index = bio->bi_vcnt - 1;
1876
1877 bio_for_each_segment_all(bv, bio, i) {
1878 if (i == index) {
1879 if (offset > bv->bv_offset)
1880 sectors += (offset - bv->bv_offset) / sector_sz;
1881 break;
1882 }
1883
1884 sectors += bv->bv_len / sector_sz;
1885 }
1886
1887 return sectors;
1888}
1889EXPORT_SYMBOL(bio_sector_offset);
1890
1891/* 1854/*
1892 * create memory pools for biovec's in a bio_set. 1855 * create memory pools for biovec's in a bio_set.
1893 * use the global biovec slabs created for general use. 1856 * use the global biovec slabs created for general use.
@@ -2065,11 +2028,6 @@ static int __init init_bio(void)
2065 if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE)) 2028 if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
2066 panic("bio: can't create integrity pool\n"); 2029 panic("bio: can't create integrity pool\n");
2067 2030
2068 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
2069 sizeof(struct bio_pair));
2070 if (!bio_split_pool)
2071 panic("bio: can't create split pool\n");
2072
2073 return 0; 2031 return 0;
2074} 2032}
2075subsys_initcall(init_bio); 2033subsys_initcall(init_bio);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 131d82800b3a..cb05e1c842c5 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1695,7 +1695,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1695 return -1; 1695 return -1;
1696 } 1696 }
1697 bio->bi_bdev = block_ctx->dev->bdev; 1697 bio->bi_bdev = block_ctx->dev->bdev;
1698 bio->bi_sector = dev_bytenr >> 9; 1698 bio->bi_iter.bi_sector = dev_bytenr >> 9;
1699 1699
1700 for (j = i; j < num_pages; j++) { 1700 for (j = i; j < num_pages; j++) {
1701 ret = bio_add_page(bio, block_ctx->pagev[j], 1701 ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -3013,7 +3013,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
3013 int bio_is_patched; 3013 int bio_is_patched;
3014 char **mapped_datav; 3014 char **mapped_datav;
3015 3015
3016 dev_bytenr = 512 * bio->bi_sector; 3016 dev_bytenr = 512 * bio->bi_iter.bi_sector;
3017 bio_is_patched = 0; 3017 bio_is_patched = 0;
3018 if (dev_state->state->print_mask & 3018 if (dev_state->state->print_mask &
3019 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3019 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
@@ -3021,8 +3021,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
3021 "submit_bio(rw=0x%x, bi_vcnt=%u," 3021 "submit_bio(rw=0x%x, bi_vcnt=%u,"
3022 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", 3022 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
3023 rw, bio->bi_vcnt, 3023 rw, bio->bi_vcnt,
3024 (unsigned long long)bio->bi_sector, dev_bytenr, 3024 (unsigned long long)bio->bi_iter.bi_sector,
3025 bio->bi_bdev); 3025 dev_bytenr, bio->bi_bdev);
3026 3026
3027 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 3027 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3028 GFP_NOFS); 3028 GFP_NOFS);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1499b27b4186..f5cdeb4b5538 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -172,7 +172,8 @@ static void end_compressed_bio_read(struct bio *bio, int err)
172 goto out; 172 goto out;
173 173
174 inode = cb->inode; 174 inode = cb->inode;
175 ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9); 175 ret = check_compressed_csum(inode, cb,
176 (u64)bio->bi_iter.bi_sector << 9);
176 if (ret) 177 if (ret)
177 goto csum_failed; 178 goto csum_failed;
178 179
@@ -201,18 +202,16 @@ csum_failed:
201 if (cb->errors) { 202 if (cb->errors) {
202 bio_io_error(cb->orig_bio); 203 bio_io_error(cb->orig_bio);
203 } else { 204 } else {
204 int bio_index = 0; 205 int i;
205 struct bio_vec *bvec = cb->orig_bio->bi_io_vec; 206 struct bio_vec *bvec;
206 207
207 /* 208 /*
208 * we have verified the checksum already, set page 209 * we have verified the checksum already, set page
209 * checked so the end_io handlers know about it 210 * checked so the end_io handlers know about it
210 */ 211 */
211 while (bio_index < cb->orig_bio->bi_vcnt) { 212 bio_for_each_segment_all(bvec, cb->orig_bio, i)
212 SetPageChecked(bvec->bv_page); 213 SetPageChecked(bvec->bv_page);
213 bvec++; 214
214 bio_index++;
215 }
216 bio_endio(cb->orig_bio, 0); 215 bio_endio(cb->orig_bio, 0);
217 } 216 }
218 217
@@ -372,7 +371,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
372 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { 371 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
373 page = compressed_pages[pg_index]; 372 page = compressed_pages[pg_index];
374 page->mapping = inode->i_mapping; 373 page->mapping = inode->i_mapping;
375 if (bio->bi_size) 374 if (bio->bi_iter.bi_size)
376 ret = io_tree->ops->merge_bio_hook(WRITE, page, 0, 375 ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
377 PAGE_CACHE_SIZE, 376 PAGE_CACHE_SIZE,
378 bio, 0); 377 bio, 0);
@@ -506,7 +505,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
506 505
507 if (!em || last_offset < em->start || 506 if (!em || last_offset < em->start ||
508 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || 507 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
509 (em->block_start >> 9) != cb->orig_bio->bi_sector) { 508 (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
510 free_extent_map(em); 509 free_extent_map(em);
511 unlock_extent(tree, last_offset, end); 510 unlock_extent(tree, last_offset, end);
512 unlock_page(page); 511 unlock_page(page);
@@ -552,7 +551,7 @@ next:
552 * in it. We don't actually do IO on those pages but allocate new ones 551 * in it. We don't actually do IO on those pages but allocate new ones
553 * to hold the compressed pages on disk. 552 * to hold the compressed pages on disk.
554 * 553 *
555 * bio->bi_sector points to the compressed extent on disk 554 * bio->bi_iter.bi_sector points to the compressed extent on disk
556 * bio->bi_io_vec points to all of the inode pages 555 * bio->bi_io_vec points to all of the inode pages
557 * bio->bi_vcnt is a count of pages 556 * bio->bi_vcnt is a count of pages
558 * 557 *
@@ -573,7 +572,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
573 struct page *page; 572 struct page *page;
574 struct block_device *bdev; 573 struct block_device *bdev;
575 struct bio *comp_bio; 574 struct bio *comp_bio;
576 u64 cur_disk_byte = (u64)bio->bi_sector << 9; 575 u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
577 u64 em_len; 576 u64 em_len;
578 u64 em_start; 577 u64 em_start;
579 struct extent_map *em; 578 struct extent_map *em;
@@ -659,7 +658,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
659 page->mapping = inode->i_mapping; 658 page->mapping = inode->i_mapping;
660 page->index = em_start >> PAGE_CACHE_SHIFT; 659 page->index = em_start >> PAGE_CACHE_SHIFT;
661 660
662 if (comp_bio->bi_size) 661 if (comp_bio->bi_iter.bi_size)
663 ret = tree->ops->merge_bio_hook(READ, page, 0, 662 ret = tree->ops->merge_bio_hook(READ, page, 0,
664 PAGE_CACHE_SIZE, 663 PAGE_CACHE_SIZE,
665 comp_bio, 0); 664 comp_bio, 0);
@@ -687,8 +686,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
687 comp_bio, sums); 686 comp_bio, sums);
688 BUG_ON(ret); /* -ENOMEM */ 687 BUG_ON(ret); /* -ENOMEM */
689 } 688 }
690 sums += (comp_bio->bi_size + root->sectorsize - 1) / 689 sums += (comp_bio->bi_iter.bi_size +
691 root->sectorsize; 690 root->sectorsize - 1) / root->sectorsize;
692 691
693 ret = btrfs_map_bio(root, READ, comp_bio, 692 ret = btrfs_map_bio(root, READ, comp_bio,
694 mirror_num, 0); 693 mirror_num, 0);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8072cfa8a3b1..e71039ea66cf 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -842,20 +842,17 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
842 842
843static int btree_csum_one_bio(struct bio *bio) 843static int btree_csum_one_bio(struct bio *bio)
844{ 844{
845 struct bio_vec *bvec = bio->bi_io_vec; 845 struct bio_vec *bvec;
846 int bio_index = 0;
847 struct btrfs_root *root; 846 struct btrfs_root *root;
848 int ret = 0; 847 int i, ret = 0;
849 848
850 WARN_ON(bio->bi_vcnt <= 0); 849 bio_for_each_segment_all(bvec, bio, i) {
851 while (bio_index < bio->bi_vcnt) {
852 root = BTRFS_I(bvec->bv_page->mapping->host)->root; 850 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
853 ret = csum_dirty_buffer(root, bvec->bv_page); 851 ret = csum_dirty_buffer(root, bvec->bv_page);
854 if (ret) 852 if (ret)
855 break; 853 break;
856 bio_index++;
857 bvec++;
858 } 854 }
855
859 return ret; 856 return ret;
860} 857}
861 858
@@ -1695,7 +1692,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
1695 bio->bi_private = end_io_wq->private; 1692 bio->bi_private = end_io_wq->private;
1696 bio->bi_end_io = end_io_wq->end_io; 1693 bio->bi_end_io = end_io_wq->end_io;
1697 kfree(end_io_wq); 1694 kfree(end_io_wq);
1698 bio_endio(bio, error); 1695 bio_endio_nodec(bio, error);
1699} 1696}
1700 1697
1701static int cleaner_kthread(void *arg) 1698static int cleaner_kthread(void *arg)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ff43802a7c88..bcb6f1b780d6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1984,7 +1984,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
1984 bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 1984 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1985 if (!bio) 1985 if (!bio)
1986 return -EIO; 1986 return -EIO;
1987 bio->bi_size = 0; 1987 bio->bi_iter.bi_size = 0;
1988 map_length = length; 1988 map_length = length;
1989 1989
1990 ret = btrfs_map_block(fs_info, WRITE, logical, 1990 ret = btrfs_map_block(fs_info, WRITE, logical,
@@ -1995,7 +1995,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
1995 } 1995 }
1996 BUG_ON(mirror_num != bbio->mirror_num); 1996 BUG_ON(mirror_num != bbio->mirror_num);
1997 sector = bbio->stripes[mirror_num-1].physical >> 9; 1997 sector = bbio->stripes[mirror_num-1].physical >> 9;
1998 bio->bi_sector = sector; 1998 bio->bi_iter.bi_sector = sector;
1999 dev = bbio->stripes[mirror_num-1].dev; 1999 dev = bbio->stripes[mirror_num-1].dev;
2000 kfree(bbio); 2000 kfree(bbio);
2001 if (!dev || !dev->bdev || !dev->writeable) { 2001 if (!dev || !dev->bdev || !dev->writeable) {
@@ -2268,9 +2268,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2268 return -EIO; 2268 return -EIO;
2269 } 2269 }
2270 bio->bi_end_io = failed_bio->bi_end_io; 2270 bio->bi_end_io = failed_bio->bi_end_io;
2271 bio->bi_sector = failrec->logical >> 9; 2271 bio->bi_iter.bi_sector = failrec->logical >> 9;
2272 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2272 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2273 bio->bi_size = 0; 2273 bio->bi_iter.bi_size = 0;
2274 2274
2275 btrfs_failed_bio = btrfs_io_bio(failed_bio); 2275 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2276 if (btrfs_failed_bio->csum) { 2276 if (btrfs_failed_bio->csum) {
@@ -2332,12 +2332,13 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2332 */ 2332 */
2333static void end_bio_extent_writepage(struct bio *bio, int err) 2333static void end_bio_extent_writepage(struct bio *bio, int err)
2334{ 2334{
2335 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 2335 struct bio_vec *bvec;
2336 struct extent_io_tree *tree; 2336 struct extent_io_tree *tree;
2337 u64 start; 2337 u64 start;
2338 u64 end; 2338 u64 end;
2339 int i;
2339 2340
2340 do { 2341 bio_for_each_segment_all(bvec, bio, i) {
2341 struct page *page = bvec->bv_page; 2342 struct page *page = bvec->bv_page;
2342 tree = &BTRFS_I(page->mapping->host)->io_tree; 2343 tree = &BTRFS_I(page->mapping->host)->io_tree;
2343 2344
@@ -2355,14 +2356,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2355 start = page_offset(page); 2356 start = page_offset(page);
2356 end = start + bvec->bv_offset + bvec->bv_len - 1; 2357 end = start + bvec->bv_offset + bvec->bv_len - 1;
2357 2358
2358 if (--bvec >= bio->bi_io_vec)
2359 prefetchw(&bvec->bv_page->flags);
2360
2361 if (end_extent_writepage(page, err, start, end)) 2359 if (end_extent_writepage(page, err, start, end))
2362 continue; 2360 continue;
2363 2361
2364 end_page_writeback(page); 2362 end_page_writeback(page);
2365 } while (bvec >= bio->bi_io_vec); 2363 }
2366 2364
2367 bio_put(bio); 2365 bio_put(bio);
2368} 2366}
@@ -2392,9 +2390,8 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2392 */ 2390 */
2393static void end_bio_extent_readpage(struct bio *bio, int err) 2391static void end_bio_extent_readpage(struct bio *bio, int err)
2394{ 2392{
2393 struct bio_vec *bvec;
2395 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 2394 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
2396 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2397 struct bio_vec *bvec = bio->bi_io_vec;
2398 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 2395 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2399 struct extent_io_tree *tree; 2396 struct extent_io_tree *tree;
2400 u64 offset = 0; 2397 u64 offset = 0;
@@ -2405,16 +2402,17 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2405 u64 extent_len = 0; 2402 u64 extent_len = 0;
2406 int mirror; 2403 int mirror;
2407 int ret; 2404 int ret;
2405 int i;
2408 2406
2409 if (err) 2407 if (err)
2410 uptodate = 0; 2408 uptodate = 0;
2411 2409
2412 do { 2410 bio_for_each_segment_all(bvec, bio, i) {
2413 struct page *page = bvec->bv_page; 2411 struct page *page = bvec->bv_page;
2414 struct inode *inode = page->mapping->host; 2412 struct inode *inode = page->mapping->host;
2415 2413
2416 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2414 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2417 "mirror=%lu\n", (u64)bio->bi_sector, err, 2415 "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err,
2418 io_bio->mirror_num); 2416 io_bio->mirror_num);
2419 tree = &BTRFS_I(inode)->io_tree; 2417 tree = &BTRFS_I(inode)->io_tree;
2420 2418
@@ -2433,9 +2431,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2433 end = start + bvec->bv_offset + bvec->bv_len - 1; 2431 end = start + bvec->bv_offset + bvec->bv_len - 1;
2434 len = bvec->bv_len; 2432 len = bvec->bv_len;
2435 2433
2436 if (++bvec <= bvec_end)
2437 prefetchw(&bvec->bv_page->flags);
2438
2439 mirror = io_bio->mirror_num; 2434 mirror = io_bio->mirror_num;
2440 if (likely(uptodate && tree->ops && 2435 if (likely(uptodate && tree->ops &&
2441 tree->ops->readpage_end_io_hook)) { 2436 tree->ops->readpage_end_io_hook)) {
@@ -2516,7 +2511,7 @@ readpage_ok:
2516 extent_start = start; 2511 extent_start = start;
2517 extent_len = end + 1 - start; 2512 extent_len = end + 1 - start;
2518 } 2513 }
2519 } while (bvec <= bvec_end); 2514 }
2520 2515
2521 if (extent_len) 2516 if (extent_len)
2522 endio_readpage_release_extent(tree, extent_start, extent_len, 2517 endio_readpage_release_extent(tree, extent_start, extent_len,
@@ -2547,9 +2542,8 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2547 } 2542 }
2548 2543
2549 if (bio) { 2544 if (bio) {
2550 bio->bi_size = 0;
2551 bio->bi_bdev = bdev; 2545 bio->bi_bdev = bdev;
2552 bio->bi_sector = first_sector; 2546 bio->bi_iter.bi_sector = first_sector;
2553 btrfs_bio = btrfs_io_bio(bio); 2547 btrfs_bio = btrfs_io_bio(bio);
2554 btrfs_bio->csum = NULL; 2548 btrfs_bio->csum = NULL;
2555 btrfs_bio->csum_allocated = NULL; 2549 btrfs_bio->csum_allocated = NULL;
@@ -2643,7 +2637,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2643 if (bio_ret && *bio_ret) { 2637 if (bio_ret && *bio_ret) {
2644 bio = *bio_ret; 2638 bio = *bio_ret;
2645 if (old_compressed) 2639 if (old_compressed)
2646 contig = bio->bi_sector == sector; 2640 contig = bio->bi_iter.bi_sector == sector;
2647 else 2641 else
2648 contig = bio_end_sector(bio) == sector; 2642 contig = bio_end_sector(bio) == sector;
2649 2643
@@ -3410,20 +3404,18 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)
3410 3404
3411static void end_bio_extent_buffer_writepage(struct bio *bio, int err) 3405static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
3412{ 3406{
3413 int uptodate = err == 0; 3407 struct bio_vec *bvec;
3414 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
3415 struct extent_buffer *eb; 3408 struct extent_buffer *eb;
3416 int done; 3409 int i, done;
3417 3410
3418 do { 3411 bio_for_each_segment_all(bvec, bio, i) {
3419 struct page *page = bvec->bv_page; 3412 struct page *page = bvec->bv_page;
3420 3413
3421 bvec--;
3422 eb = (struct extent_buffer *)page->private; 3414 eb = (struct extent_buffer *)page->private;
3423 BUG_ON(!eb); 3415 BUG_ON(!eb);
3424 done = atomic_dec_and_test(&eb->io_pages); 3416 done = atomic_dec_and_test(&eb->io_pages);
3425 3417
3426 if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { 3418 if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
3427 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 3419 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3428 ClearPageUptodate(page); 3420 ClearPageUptodate(page);
3429 SetPageError(page); 3421 SetPageError(page);
@@ -3435,10 +3427,9 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
3435 continue; 3427 continue;
3436 3428
3437 end_extent_buffer_writeback(eb); 3429 end_extent_buffer_writeback(eb);
3438 } while (bvec >= bio->bi_io_vec); 3430 }
3439 3431
3440 bio_put(bio); 3432 bio_put(bio);
3441
3442} 3433}
3443 3434
3444static int write_one_eb(struct extent_buffer *eb, 3435static int write_one_eb(struct extent_buffer *eb,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 6f3848860283..84a46a42d262 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
182 if (!path) 182 if (!path)
183 return -ENOMEM; 183 return -ENOMEM;
184 184
185 nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits; 185 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
186 if (!dst) { 186 if (!dst) {
187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { 187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
188 btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size, 188 btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
@@ -201,7 +201,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
201 csum = (u8 *)dst; 201 csum = (u8 *)dst;
202 } 202 }
203 203
204 if (bio->bi_size > PAGE_CACHE_SIZE * 8) 204 if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
205 path->reada = 2; 205 path->reada = 2;
206 206
207 WARN_ON(bio->bi_vcnt <= 0); 207 WARN_ON(bio->bi_vcnt <= 0);
@@ -217,7 +217,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
217 path->skip_locking = 1; 217 path->skip_locking = 1;
218 } 218 }
219 219
220 disk_bytenr = (u64)bio->bi_sector << 9; 220 disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
221 if (dio) 221 if (dio)
222 offset = logical_offset; 222 offset = logical_offset;
223 while (bio_index < bio->bi_vcnt) { 223 while (bio_index < bio->bi_vcnt) {
@@ -302,7 +302,7 @@ int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
302 struct btrfs_dio_private *dip, struct bio *bio, 302 struct btrfs_dio_private *dip, struct bio *bio,
303 u64 offset) 303 u64 offset)
304{ 304{
305 int len = (bio->bi_sector << 9) - dip->disk_bytenr; 305 int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
306 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 306 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
307 int ret; 307 int ret;
308 308
@@ -447,11 +447,12 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
447 u64 offset; 447 u64 offset;
448 448
449 WARN_ON(bio->bi_vcnt <= 0); 449 WARN_ON(bio->bi_vcnt <= 0);
450 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); 450 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_iter.bi_size),
451 GFP_NOFS);
451 if (!sums) 452 if (!sums)
452 return -ENOMEM; 453 return -ENOMEM;
453 454
454 sums->len = bio->bi_size; 455 sums->len = bio->bi_iter.bi_size;
455 INIT_LIST_HEAD(&sums->list); 456 INIT_LIST_HEAD(&sums->list);
456 457
457 if (contig) 458 if (contig)
@@ -461,7 +462,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
461 462
462 ordered = btrfs_lookup_ordered_extent(inode, offset); 463 ordered = btrfs_lookup_ordered_extent(inode, offset);
463 BUG_ON(!ordered); /* Logic error */ 464 BUG_ON(!ordered); /* Logic error */
464 sums->bytenr = (u64)bio->bi_sector << 9; 465 sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
465 index = 0; 466 index = 0;
466 467
467 while (bio_index < bio->bi_vcnt) { 468 while (bio_index < bio->bi_vcnt) {
@@ -476,7 +477,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
476 btrfs_add_ordered_sum(inode, ordered, sums); 477 btrfs_add_ordered_sum(inode, ordered, sums);
477 btrfs_put_ordered_extent(ordered); 478 btrfs_put_ordered_extent(ordered);
478 479
479 bytes_left = bio->bi_size - total_bytes; 480 bytes_left = bio->bi_iter.bi_size - total_bytes;
480 481
481 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), 482 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
482 GFP_NOFS); 483 GFP_NOFS);
@@ -484,7 +485,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
484 sums->len = bytes_left; 485 sums->len = bytes_left;
485 ordered = btrfs_lookup_ordered_extent(inode, offset); 486 ordered = btrfs_lookup_ordered_extent(inode, offset);
486 BUG_ON(!ordered); /* Logic error */ 487 BUG_ON(!ordered); /* Logic error */
487 sums->bytenr = ((u64)bio->bi_sector << 9) + 488 sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) +
488 total_bytes; 489 total_bytes;
489 index = 0; 490 index = 0;
490 } 491 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 514b291b1354..d546d8c3038b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1577,7 +1577,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
1577 unsigned long bio_flags) 1577 unsigned long bio_flags)
1578{ 1578{
1579 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 1579 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
1580 u64 logical = (u64)bio->bi_sector << 9; 1580 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
1581 u64 length = 0; 1581 u64 length = 0;
1582 u64 map_length; 1582 u64 map_length;
1583 int ret; 1583 int ret;
@@ -1585,7 +1585,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
1585 if (bio_flags & EXTENT_BIO_COMPRESSED) 1585 if (bio_flags & EXTENT_BIO_COMPRESSED)
1586 return 0; 1586 return 0;
1587 1587
1588 length = bio->bi_size; 1588 length = bio->bi_iter.bi_size;
1589 map_length = length; 1589 map_length = length;
1590 ret = btrfs_map_block(root->fs_info, rw, logical, 1590 ret = btrfs_map_block(root->fs_info, rw, logical,
1591 &map_length, NULL, 0); 1591 &map_length, NULL, 0);
@@ -6783,17 +6783,16 @@ unlock_err:
6783static void btrfs_endio_direct_read(struct bio *bio, int err) 6783static void btrfs_endio_direct_read(struct bio *bio, int err)
6784{ 6784{
6785 struct btrfs_dio_private *dip = bio->bi_private; 6785 struct btrfs_dio_private *dip = bio->bi_private;
6786 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 6786 struct bio_vec *bvec;
6787 struct bio_vec *bvec = bio->bi_io_vec;
6788 struct inode *inode = dip->inode; 6787 struct inode *inode = dip->inode;
6789 struct btrfs_root *root = BTRFS_I(inode)->root; 6788 struct btrfs_root *root = BTRFS_I(inode)->root;
6790 struct bio *dio_bio; 6789 struct bio *dio_bio;
6791 u32 *csums = (u32 *)dip->csum; 6790 u32 *csums = (u32 *)dip->csum;
6792 int index = 0;
6793 u64 start; 6791 u64 start;
6792 int i;
6794 6793
6795 start = dip->logical_offset; 6794 start = dip->logical_offset;
6796 do { 6795 bio_for_each_segment_all(bvec, bio, i) {
6797 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 6796 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
6798 struct page *page = bvec->bv_page; 6797 struct page *page = bvec->bv_page;
6799 char *kaddr; 6798 char *kaddr;
@@ -6809,18 +6808,16 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6809 local_irq_restore(flags); 6808 local_irq_restore(flags);
6810 6809
6811 flush_dcache_page(bvec->bv_page); 6810 flush_dcache_page(bvec->bv_page);
6812 if (csum != csums[index]) { 6811 if (csum != csums[i]) {
6813 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", 6812 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
6814 btrfs_ino(inode), start, csum, 6813 btrfs_ino(inode), start, csum,
6815 csums[index]); 6814 csums[i]);
6816 err = -EIO; 6815 err = -EIO;
6817 } 6816 }
6818 } 6817 }
6819 6818
6820 start += bvec->bv_len; 6819 start += bvec->bv_len;
6821 bvec++; 6820 }
6822 index++;
6823 } while (bvec <= bvec_end);
6824 6821
6825 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 6822 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
6826 dip->logical_offset + dip->bytes - 1); 6823 dip->logical_offset + dip->bytes - 1);
@@ -6901,7 +6898,8 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
6901 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " 6898 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
6902 "sector %#Lx len %u err no %d\n", 6899 "sector %#Lx len %u err no %d\n",
6903 btrfs_ino(dip->inode), bio->bi_rw, 6900 btrfs_ino(dip->inode), bio->bi_rw,
6904 (unsigned long long)bio->bi_sector, bio->bi_size, err); 6901 (unsigned long long)bio->bi_iter.bi_sector,
6902 bio->bi_iter.bi_size, err);
6905 dip->errors = 1; 6903 dip->errors = 1;
6906 6904
6907 /* 6905 /*
@@ -6992,7 +6990,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6992 struct bio *bio; 6990 struct bio *bio;
6993 struct bio *orig_bio = dip->orig_bio; 6991 struct bio *orig_bio = dip->orig_bio;
6994 struct bio_vec *bvec = orig_bio->bi_io_vec; 6992 struct bio_vec *bvec = orig_bio->bi_io_vec;
6995 u64 start_sector = orig_bio->bi_sector; 6993 u64 start_sector = orig_bio->bi_iter.bi_sector;
6996 u64 file_offset = dip->logical_offset; 6994 u64 file_offset = dip->logical_offset;
6997 u64 submit_len = 0; 6995 u64 submit_len = 0;
6998 u64 map_length; 6996 u64 map_length;
@@ -7000,7 +6998,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7000 int ret = 0; 6998 int ret = 0;
7001 int async_submit = 0; 6999 int async_submit = 0;
7002 7000
7003 map_length = orig_bio->bi_size; 7001 map_length = orig_bio->bi_iter.bi_size;
7004 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, 7002 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
7005 &map_length, NULL, 0); 7003 &map_length, NULL, 0);
7006 if (ret) { 7004 if (ret) {
@@ -7008,7 +7006,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7008 return -EIO; 7006 return -EIO;
7009 } 7007 }
7010 7008
7011 if (map_length >= orig_bio->bi_size) { 7009 if (map_length >= orig_bio->bi_iter.bi_size) {
7012 bio = orig_bio; 7010 bio = orig_bio;
7013 goto submit; 7011 goto submit;
7014 } 7012 }
@@ -7060,7 +7058,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7060 bio->bi_private = dip; 7058 bio->bi_private = dip;
7061 bio->bi_end_io = btrfs_end_dio_bio; 7059 bio->bi_end_io = btrfs_end_dio_bio;
7062 7060
7063 map_length = orig_bio->bi_size; 7061 map_length = orig_bio->bi_iter.bi_size;
7064 ret = btrfs_map_block(root->fs_info, rw, 7062 ret = btrfs_map_block(root->fs_info, rw,
7065 start_sector << 9, 7063 start_sector << 9,
7066 &map_length, NULL, 0); 7064 &map_length, NULL, 0);
@@ -7118,7 +7116,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7118 7116
7119 if (!skip_sum && !write) { 7117 if (!skip_sum && !write) {
7120 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 7118 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7121 sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits; 7119 sum_len = dio_bio->bi_iter.bi_size >>
7120 inode->i_sb->s_blocksize_bits;
7122 sum_len *= csum_size; 7121 sum_len *= csum_size;
7123 } else { 7122 } else {
7124 sum_len = 0; 7123 sum_len = 0;
@@ -7133,8 +7132,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7133 dip->private = dio_bio->bi_private; 7132 dip->private = dio_bio->bi_private;
7134 dip->inode = inode; 7133 dip->inode = inode;
7135 dip->logical_offset = file_offset; 7134 dip->logical_offset = file_offset;
7136 dip->bytes = dio_bio->bi_size; 7135 dip->bytes = dio_bio->bi_iter.bi_size;
7137 dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; 7136 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
7138 io_bio->bi_private = dip; 7137 io_bio->bi_private = dip;
7139 dip->errors = 0; 7138 dip->errors = 0;
7140 dip->orig_bio = io_bio; 7139 dip->orig_bio = io_bio;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 24ac21840a9a..9af0b25d991a 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1032,8 +1032,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1032 1032
1033 /* see if we can add this page onto our existing bio */ 1033 /* see if we can add this page onto our existing bio */
1034 if (last) { 1034 if (last) {
1035 last_end = (u64)last->bi_sector << 9; 1035 last_end = (u64)last->bi_iter.bi_sector << 9;
1036 last_end += last->bi_size; 1036 last_end += last->bi_iter.bi_size;
1037 1037
1038 /* 1038 /*
1039 * we can't merge these if they are from different 1039 * we can't merge these if they are from different
@@ -1053,9 +1053,9 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1053 if (!bio) 1053 if (!bio)
1054 return -ENOMEM; 1054 return -ENOMEM;
1055 1055
1056 bio->bi_size = 0; 1056 bio->bi_iter.bi_size = 0;
1057 bio->bi_bdev = stripe->dev->bdev; 1057 bio->bi_bdev = stripe->dev->bdev;
1058 bio->bi_sector = disk_start >> 9; 1058 bio->bi_iter.bi_sector = disk_start >> 9;
1059 set_bit(BIO_UPTODATE, &bio->bi_flags); 1059 set_bit(BIO_UPTODATE, &bio->bi_flags);
1060 1060
1061 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); 1061 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
@@ -1111,7 +1111,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1111 1111
1112 spin_lock_irq(&rbio->bio_list_lock); 1112 spin_lock_irq(&rbio->bio_list_lock);
1113 bio_list_for_each(bio, &rbio->bio_list) { 1113 bio_list_for_each(bio, &rbio->bio_list) {
1114 start = (u64)bio->bi_sector << 9; 1114 start = (u64)bio->bi_iter.bi_sector << 9;
1115 stripe_offset = start - rbio->raid_map[0]; 1115 stripe_offset = start - rbio->raid_map[0];
1116 page_index = stripe_offset >> PAGE_CACHE_SHIFT; 1116 page_index = stripe_offset >> PAGE_CACHE_SHIFT;
1117 1117
@@ -1272,7 +1272,7 @@ cleanup:
1272static int find_bio_stripe(struct btrfs_raid_bio *rbio, 1272static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1273 struct bio *bio) 1273 struct bio *bio)
1274{ 1274{
1275 u64 physical = bio->bi_sector; 1275 u64 physical = bio->bi_iter.bi_sector;
1276 u64 stripe_start; 1276 u64 stripe_start;
1277 int i; 1277 int i;
1278 struct btrfs_bio_stripe *stripe; 1278 struct btrfs_bio_stripe *stripe;
@@ -1298,7 +1298,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1298static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio, 1298static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1299 struct bio *bio) 1299 struct bio *bio)
1300{ 1300{
1301 u64 logical = bio->bi_sector; 1301 u64 logical = bio->bi_iter.bi_sector;
1302 u64 stripe_start; 1302 u64 stripe_start;
1303 int i; 1303 int i;
1304 1304
@@ -1602,8 +1602,8 @@ static int plug_cmp(void *priv, struct list_head *a, struct list_head *b)
1602 plug_list); 1602 plug_list);
1603 struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio, 1603 struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
1604 plug_list); 1604 plug_list);
1605 u64 a_sector = ra->bio_list.head->bi_sector; 1605 u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
1606 u64 b_sector = rb->bio_list.head->bi_sector; 1606 u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
1607 1607
1608 if (a_sector < b_sector) 1608 if (a_sector < b_sector)
1609 return -1; 1609 return -1;
@@ -1691,7 +1691,7 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1691 if (IS_ERR(rbio)) 1691 if (IS_ERR(rbio))
1692 return PTR_ERR(rbio); 1692 return PTR_ERR(rbio);
1693 bio_list_add(&rbio->bio_list, bio); 1693 bio_list_add(&rbio->bio_list, bio);
1694 rbio->bio_list_bytes = bio->bi_size; 1694 rbio->bio_list_bytes = bio->bi_iter.bi_size;
1695 1695
1696 /* 1696 /*
1697 * don't plug on full rbios, just get them out the door 1697 * don't plug on full rbios, just get them out the door
@@ -2044,7 +2044,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2044 2044
2045 rbio->read_rebuild = 1; 2045 rbio->read_rebuild = 1;
2046 bio_list_add(&rbio->bio_list, bio); 2046 bio_list_add(&rbio->bio_list, bio);
2047 rbio->bio_list_bytes = bio->bi_size; 2047 rbio->bio_list_bytes = bio->bi_iter.bi_size;
2048 2048
2049 rbio->faila = find_logical_bio_stripe(rbio, bio); 2049 rbio->faila = find_logical_bio_stripe(rbio, bio);
2050 if (rbio->faila == -1) { 2050 if (rbio->faila == -1) {
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 1fd3f33c330a..bb9a928fa3a8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1308,7 +1308,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1308 continue; 1308 continue;
1309 } 1309 }
1310 bio->bi_bdev = page->dev->bdev; 1310 bio->bi_bdev = page->dev->bdev;
1311 bio->bi_sector = page->physical >> 9; 1311 bio->bi_iter.bi_sector = page->physical >> 9;
1312 1312
1313 bio_add_page(bio, page->page, PAGE_SIZE, 0); 1313 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1314 if (btrfsic_submit_bio_wait(READ, bio)) 1314 if (btrfsic_submit_bio_wait(READ, bio))
@@ -1427,7 +1427,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1427 if (!bio) 1427 if (!bio)
1428 return -EIO; 1428 return -EIO;
1429 bio->bi_bdev = page_bad->dev->bdev; 1429 bio->bi_bdev = page_bad->dev->bdev;
1430 bio->bi_sector = page_bad->physical >> 9; 1430 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1431 1431
1432 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); 1432 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1433 if (PAGE_SIZE != ret) { 1433 if (PAGE_SIZE != ret) {
@@ -1520,7 +1520,7 @@ again:
1520 bio->bi_private = sbio; 1520 bio->bi_private = sbio;
1521 bio->bi_end_io = scrub_wr_bio_end_io; 1521 bio->bi_end_io = scrub_wr_bio_end_io;
1522 bio->bi_bdev = sbio->dev->bdev; 1522 bio->bi_bdev = sbio->dev->bdev;
1523 bio->bi_sector = sbio->physical >> 9; 1523 bio->bi_iter.bi_sector = sbio->physical >> 9;
1524 sbio->err = 0; 1524 sbio->err = 0;
1525 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 1525 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1526 spage->physical_for_dev_replace || 1526 spage->physical_for_dev_replace ||
@@ -1926,7 +1926,7 @@ again:
1926 bio->bi_private = sbio; 1926 bio->bi_private = sbio;
1927 bio->bi_end_io = scrub_bio_end_io; 1927 bio->bi_end_io = scrub_bio_end_io;
1928 bio->bi_bdev = sbio->dev->bdev; 1928 bio->bi_bdev = sbio->dev->bdev;
1929 bio->bi_sector = sbio->physical >> 9; 1929 bio->bi_iter.bi_sector = sbio->physical >> 9;
1930 sbio->err = 0; 1930 sbio->err = 0;
1931 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 1931 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1932 spage->physical || 1932 spage->physical ||
@@ -3371,8 +3371,8 @@ static int write_page_nocow(struct scrub_ctx *sctx,
3371 spin_unlock(&sctx->stat_lock); 3371 spin_unlock(&sctx->stat_lock);
3372 return -ENOMEM; 3372 return -ENOMEM;
3373 } 3373 }
3374 bio->bi_size = 0; 3374 bio->bi_iter.bi_size = 0;
3375 bio->bi_sector = physical_for_dev_replace >> 9; 3375 bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
3376 bio->bi_bdev = dev->bdev; 3376 bio->bi_bdev = dev->bdev;
3377 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); 3377 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
3378 if (ret != PAGE_CACHE_SIZE) { 3378 if (ret != PAGE_CACHE_SIZE) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 92303f42baaa..54d2685a3071 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5298,6 +5298,13 @@ static void btrfs_end_bio(struct bio *bio, int err)
5298 bio_put(bio); 5298 bio_put(bio);
5299 bio = bbio->orig_bio; 5299 bio = bbio->orig_bio;
5300 } 5300 }
5301
5302 /*
5303 * We have original bio now. So increment bi_remaining to
5304 * account for it in endio
5305 */
5306 atomic_inc(&bio->bi_remaining);
5307
5301 bio->bi_private = bbio->private; 5308 bio->bi_private = bbio->private;
5302 bio->bi_end_io = bbio->end_io; 5309 bio->bi_end_io = bbio->end_io;
5303 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5310 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
@@ -5411,7 +5418,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
5411 if (!q->merge_bvec_fn) 5418 if (!q->merge_bvec_fn)
5412 return 1; 5419 return 1;
5413 5420
5414 bvm.bi_size = bio->bi_size - prev->bv_len; 5421 bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len;
5415 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) 5422 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
5416 return 0; 5423 return 0;
5417 return 1; 5424 return 1;
@@ -5426,7 +5433,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
5426 bio->bi_private = bbio; 5433 bio->bi_private = bbio;
5427 btrfs_io_bio(bio)->stripe_index = dev_nr; 5434 btrfs_io_bio(bio)->stripe_index = dev_nr;
5428 bio->bi_end_io = btrfs_end_bio; 5435 bio->bi_end_io = btrfs_end_bio;
5429 bio->bi_sector = physical >> 9; 5436 bio->bi_iter.bi_sector = physical >> 9;
5430#ifdef DEBUG 5437#ifdef DEBUG
5431 { 5438 {
5432 struct rcu_string *name; 5439 struct rcu_string *name;
@@ -5464,7 +5471,7 @@ again:
5464 while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) { 5471 while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
5465 if (bio_add_page(bio, bvec->bv_page, bvec->bv_len, 5472 if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
5466 bvec->bv_offset) < bvec->bv_len) { 5473 bvec->bv_offset) < bvec->bv_len) {
5467 u64 len = bio->bi_size; 5474 u64 len = bio->bi_iter.bi_size;
5468 5475
5469 atomic_inc(&bbio->stripes_pending); 5476 atomic_inc(&bbio->stripes_pending);
5470 submit_stripe_bio(root, bbio, bio, physical, dev_nr, 5477 submit_stripe_bio(root, bbio, bio, physical, dev_nr,
@@ -5486,7 +5493,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
5486 bio->bi_private = bbio->private; 5493 bio->bi_private = bbio->private;
5487 bio->bi_end_io = bbio->end_io; 5494 bio->bi_end_io = bbio->end_io;
5488 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5495 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
5489 bio->bi_sector = logical >> 9; 5496 bio->bi_iter.bi_sector = logical >> 9;
5490 kfree(bbio); 5497 kfree(bbio);
5491 bio_endio(bio, -EIO); 5498 bio_endio(bio, -EIO);
5492 } 5499 }
@@ -5497,7 +5504,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5497{ 5504{
5498 struct btrfs_device *dev; 5505 struct btrfs_device *dev;
5499 struct bio *first_bio = bio; 5506 struct bio *first_bio = bio;
5500 u64 logical = (u64)bio->bi_sector << 9; 5507 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
5501 u64 length = 0; 5508 u64 length = 0;
5502 u64 map_length; 5509 u64 map_length;
5503 u64 *raid_map = NULL; 5510 u64 *raid_map = NULL;
@@ -5506,7 +5513,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5506 int total_devs = 1; 5513 int total_devs = 1;
5507 struct btrfs_bio *bbio = NULL; 5514 struct btrfs_bio *bbio = NULL;
5508 5515
5509 length = bio->bi_size; 5516 length = bio->bi_iter.bi_size;
5510 map_length = length; 5517 map_length = length;
5511 5518
5512 ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, 5519 ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
diff --git a/fs/buffer.c b/fs/buffer.c
index 6024877335ca..651dba10b9c2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1312,7 +1312,7 @@ static void bh_lru_install(struct buffer_head *bh)
1312 } 1312 }
1313 while (out < BH_LRU_SIZE) 1313 while (out < BH_LRU_SIZE)
1314 bhs[out++] = NULL; 1314 bhs[out++] = NULL;
1315 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs)); 1315 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1316 } 1316 }
1317 bh_lru_unlock(); 1317 bh_lru_unlock();
1318 1318
@@ -2982,11 +2982,11 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2982 * let it through, and the IO layer will turn it into 2982 * let it through, and the IO layer will turn it into
2983 * an EIO. 2983 * an EIO.
2984 */ 2984 */
2985 if (unlikely(bio->bi_sector >= maxsector)) 2985 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
2986 return; 2986 return;
2987 2987
2988 maxsector -= bio->bi_sector; 2988 maxsector -= bio->bi_iter.bi_sector;
2989 bytes = bio->bi_size; 2989 bytes = bio->bi_iter.bi_size;
2990 if (likely((bytes >> 9) <= maxsector)) 2990 if (likely((bytes >> 9) <= maxsector))
2991 return; 2991 return;
2992 2992
@@ -2994,7 +2994,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2994 bytes = maxsector << 9; 2994 bytes = maxsector << 9;
2995 2995
2996 /* Truncate the bio.. */ 2996 /* Truncate the bio.. */
2997 bio->bi_size = bytes; 2997 bio->bi_iter.bi_size = bytes;
2998 bio->bi_io_vec[0].bv_len = bytes; 2998 bio->bi_io_vec[0].bv_len = bytes;
2999 2999
3000 /* ..and clear the end of the buffer for reads */ 3000 /* ..and clear the end of the buffer for reads */
@@ -3029,14 +3029,14 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3029 */ 3029 */
3030 bio = bio_alloc(GFP_NOIO, 1); 3030 bio = bio_alloc(GFP_NOIO, 1);
3031 3031
3032 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 3032 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3033 bio->bi_bdev = bh->b_bdev; 3033 bio->bi_bdev = bh->b_bdev;
3034 bio->bi_io_vec[0].bv_page = bh->b_page; 3034 bio->bi_io_vec[0].bv_page = bh->b_page;
3035 bio->bi_io_vec[0].bv_len = bh->b_size; 3035 bio->bi_io_vec[0].bv_len = bh->b_size;
3036 bio->bi_io_vec[0].bv_offset = bh_offset(bh); 3036 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
3037 3037
3038 bio->bi_vcnt = 1; 3038 bio->bi_vcnt = 1;
3039 bio->bi_size = bh->b_size; 3039 bio->bi_iter.bi_size = bh->b_size;
3040 3040
3041 bio->bi_end_io = end_bio_bh_io_sync; 3041 bio->bi_end_io = end_bio_bh_io_sync;
3042 bio->bi_private = bh; 3042 bio->bi_private = bh;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0e04142d5962..160a5489a939 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -375,7 +375,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
375 bio = bio_alloc(GFP_KERNEL, nr_vecs); 375 bio = bio_alloc(GFP_KERNEL, nr_vecs);
376 376
377 bio->bi_bdev = bdev; 377 bio->bi_bdev = bdev;
378 bio->bi_sector = first_sector; 378 bio->bi_iter.bi_sector = first_sector;
379 if (dio->is_async) 379 if (dio->is_async)
380 bio->bi_end_io = dio_bio_end_aio; 380 bio->bi_end_io = dio_bio_end_aio;
381 else 381 else
@@ -719,7 +719,7 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
719 if (sdio->bio) { 719 if (sdio->bio) {
720 loff_t cur_offset = sdio->cur_page_fs_offset; 720 loff_t cur_offset = sdio->cur_page_fs_offset;
721 loff_t bio_next_offset = sdio->logical_offset_in_bio + 721 loff_t bio_next_offset = sdio->logical_offset_in_bio +
722 sdio->bio->bi_size; 722 sdio->bio->bi_iter.bi_size;
723 723
724 /* 724 /*
725 * See whether this new request is contiguous with the old. 725 * See whether this new request is contiguous with the old.
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index d488f80ee32d..ab95508e3d40 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -65,9 +65,9 @@ static void ext4_finish_bio(struct bio *bio)
65{ 65{
66 int i; 66 int i;
67 int error = !test_bit(BIO_UPTODATE, &bio->bi_flags); 67 int error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
68 struct bio_vec *bvec;
68 69
69 for (i = 0; i < bio->bi_vcnt; i++) { 70 bio_for_each_segment_all(bvec, bio, i) {
70 struct bio_vec *bvec = &bio->bi_io_vec[i];
71 struct page *page = bvec->bv_page; 71 struct page *page = bvec->bv_page;
72 struct buffer_head *bh, *head; 72 struct buffer_head *bh, *head;
73 unsigned bio_start = bvec->bv_offset; 73 unsigned bio_start = bvec->bv_offset;
@@ -298,7 +298,7 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
298static void ext4_end_bio(struct bio *bio, int error) 298static void ext4_end_bio(struct bio *bio, int error)
299{ 299{
300 ext4_io_end_t *io_end = bio->bi_private; 300 ext4_io_end_t *io_end = bio->bi_private;
301 sector_t bi_sector = bio->bi_sector; 301 sector_t bi_sector = bio->bi_iter.bi_sector;
302 302
303 BUG_ON(!io_end); 303 BUG_ON(!io_end);
304 bio->bi_end_io = NULL; 304 bio->bi_end_io = NULL;
@@ -366,7 +366,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
366 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); 366 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
367 if (!bio) 367 if (!bio)
368 return -ENOMEM; 368 return -ENOMEM;
369 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 369 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
370 bio->bi_bdev = bh->b_bdev; 370 bio->bi_bdev = bh->b_bdev;
371 bio->bi_end_io = ext4_end_bio; 371 bio->bi_end_io = ext4_end_bio;
372 bio->bi_private = ext4_get_io_end(io->io_end); 372 bio->bi_private = ext4_get_io_end(io->io_end);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0ae558723506..2261ccdd0b5f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -26,40 +26,33 @@
26 26
27static void f2fs_read_end_io(struct bio *bio, int err) 27static void f2fs_read_end_io(struct bio *bio, int err)
28{ 28{
29 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 29 struct bio_vec *bvec;
30 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 30 int i;
31 31
32 do { 32 bio_for_each_segment_all(bvec, bio, i) {
33 struct page *page = bvec->bv_page; 33 struct page *page = bvec->bv_page;
34 34
35 if (--bvec >= bio->bi_io_vec) 35 if (!err) {
36 prefetchw(&bvec->bv_page->flags); 36 SetPageUptodate(page);
37 37 } else {
38 if (unlikely(!uptodate)) {
39 ClearPageUptodate(page); 38 ClearPageUptodate(page);
40 SetPageError(page); 39 SetPageError(page);
41 } else {
42 SetPageUptodate(page);
43 } 40 }
44 unlock_page(page); 41 unlock_page(page);
45 } while (bvec >= bio->bi_io_vec); 42 }
46
47 bio_put(bio); 43 bio_put(bio);
48} 44}
49 45
50static void f2fs_write_end_io(struct bio *bio, int err) 46static void f2fs_write_end_io(struct bio *bio, int err)
51{ 47{
52 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 48 struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb);
53 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 49 struct bio_vec *bvec;
54 struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb); 50 int i;
55 51
56 do { 52 bio_for_each_segment_all(bvec, bio, i) {
57 struct page *page = bvec->bv_page; 53 struct page *page = bvec->bv_page;
58 54
59 if (--bvec >= bio->bi_io_vec) 55 if (unlikely(err)) {
60 prefetchw(&bvec->bv_page->flags);
61
62 if (unlikely(!uptodate)) {
63 SetPageError(page); 56 SetPageError(page);
64 set_bit(AS_EIO, &page->mapping->flags); 57 set_bit(AS_EIO, &page->mapping->flags);
65 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 58 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
@@ -67,7 +60,7 @@ static void f2fs_write_end_io(struct bio *bio, int err)
67 } 60 }
68 end_page_writeback(page); 61 end_page_writeback(page);
69 dec_page_count(sbi, F2FS_WRITEBACK); 62 dec_page_count(sbi, F2FS_WRITEBACK);
70 } while (bvec >= bio->bi_io_vec); 63 }
71 64
72 if (bio->bi_private) 65 if (bio->bi_private)
73 complete(bio->bi_private); 66 complete(bio->bi_private);
@@ -91,7 +84,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
91 bio = bio_alloc(GFP_NOIO, npages); 84 bio = bio_alloc(GFP_NOIO, npages);
92 85
93 bio->bi_bdev = sbi->sb->s_bdev; 86 bio->bi_bdev = sbi->sb->s_bdev;
94 bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 87 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
95 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 88 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
96 89
97 return bio; 90 return bio;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 58f06400b7b8..76693793cedd 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -273,7 +273,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
273 nrvecs = max(nrvecs/2, 1U); 273 nrvecs = max(nrvecs/2, 1U);
274 } 274 }
275 275
276 bio->bi_sector = blkno * (sb->s_blocksize >> 9); 276 bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
277 bio->bi_bdev = sb->s_bdev; 277 bio->bi_bdev = sb->s_bdev;
278 bio->bi_end_io = gfs2_end_log_write; 278 bio->bi_end_io = gfs2_end_log_write;
279 bio->bi_private = sdp; 279 bio->bi_private = sdp;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1e712b566d76..c6872d09561a 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -238,7 +238,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
238 lock_page(page); 238 lock_page(page);
239 239
240 bio = bio_alloc(GFP_NOFS, 1); 240 bio = bio_alloc(GFP_NOFS, 1);
241 bio->bi_sector = sector * (sb->s_blocksize >> 9); 241 bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
242 bio->bi_bdev = sb->s_bdev; 242 bio->bi_bdev = sb->s_bdev;
243 bio_add_page(bio, page, PAGE_SIZE, 0); 243 bio_add_page(bio, page, PAGE_SIZE, 0);
244 244
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index e9a97a0d4314..3f999649587f 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -63,7 +63,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
63 sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1); 63 sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
64 64
65 bio = bio_alloc(GFP_NOIO, 1); 65 bio = bio_alloc(GFP_NOIO, 1);
66 bio->bi_sector = sector; 66 bio->bi_iter.bi_sector = sector;
67 bio->bi_bdev = sb->s_bdev; 67 bio->bi_bdev = sb->s_bdev;
68 68
69 if (!(rw & WRITE) && data) 69 if (!(rw & WRITE) && data)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 360d27c48887..8d811e02b4b9 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1998,20 +1998,20 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1998 1998
1999 bio = bio_alloc(GFP_NOFS, 1); 1999 bio = bio_alloc(GFP_NOFS, 1);
2000 2000
2001 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 2001 bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2002 bio->bi_bdev = log->bdev; 2002 bio->bi_bdev = log->bdev;
2003 bio->bi_io_vec[0].bv_page = bp->l_page; 2003 bio->bi_io_vec[0].bv_page = bp->l_page;
2004 bio->bi_io_vec[0].bv_len = LOGPSIZE; 2004 bio->bi_io_vec[0].bv_len = LOGPSIZE;
2005 bio->bi_io_vec[0].bv_offset = bp->l_offset; 2005 bio->bi_io_vec[0].bv_offset = bp->l_offset;
2006 2006
2007 bio->bi_vcnt = 1; 2007 bio->bi_vcnt = 1;
2008 bio->bi_size = LOGPSIZE; 2008 bio->bi_iter.bi_size = LOGPSIZE;
2009 2009
2010 bio->bi_end_io = lbmIODone; 2010 bio->bi_end_io = lbmIODone;
2011 bio->bi_private = bp; 2011 bio->bi_private = bp;
2012 /*check if journaling to disk has been disabled*/ 2012 /*check if journaling to disk has been disabled*/
2013 if (log->no_integrity) { 2013 if (log->no_integrity) {
2014 bio->bi_size = 0; 2014 bio->bi_iter.bi_size = 0;
2015 lbmIODone(bio, 0); 2015 lbmIODone(bio, 0);
2016 } else { 2016 } else {
2017 submit_bio(READ_SYNC, bio); 2017 submit_bio(READ_SYNC, bio);
@@ -2144,21 +2144,21 @@ static void lbmStartIO(struct lbuf * bp)
2144 jfs_info("lbmStartIO\n"); 2144 jfs_info("lbmStartIO\n");
2145 2145
2146 bio = bio_alloc(GFP_NOFS, 1); 2146 bio = bio_alloc(GFP_NOFS, 1);
2147 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 2147 bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2148 bio->bi_bdev = log->bdev; 2148 bio->bi_bdev = log->bdev;
2149 bio->bi_io_vec[0].bv_page = bp->l_page; 2149 bio->bi_io_vec[0].bv_page = bp->l_page;
2150 bio->bi_io_vec[0].bv_len = LOGPSIZE; 2150 bio->bi_io_vec[0].bv_len = LOGPSIZE;
2151 bio->bi_io_vec[0].bv_offset = bp->l_offset; 2151 bio->bi_io_vec[0].bv_offset = bp->l_offset;
2152 2152
2153 bio->bi_vcnt = 1; 2153 bio->bi_vcnt = 1;
2154 bio->bi_size = LOGPSIZE; 2154 bio->bi_iter.bi_size = LOGPSIZE;
2155 2155
2156 bio->bi_end_io = lbmIODone; 2156 bio->bi_end_io = lbmIODone;
2157 bio->bi_private = bp; 2157 bio->bi_private = bp;
2158 2158
2159 /* check if journaling to disk has been disabled */ 2159 /* check if journaling to disk has been disabled */
2160 if (log->no_integrity) { 2160 if (log->no_integrity) {
2161 bio->bi_size = 0; 2161 bio->bi_iter.bi_size = 0;
2162 lbmIODone(bio, 0); 2162 lbmIODone(bio, 0);
2163 } else { 2163 } else {
2164 submit_bio(WRITE_SYNC, bio); 2164 submit_bio(WRITE_SYNC, bio);
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index d165cde0c68d..49ba7ff1bbb9 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -416,7 +416,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
416 * count from hitting zero before we're through 416 * count from hitting zero before we're through
417 */ 417 */
418 inc_io(page); 418 inc_io(page);
419 if (!bio->bi_size) 419 if (!bio->bi_iter.bi_size)
420 goto dump_bio; 420 goto dump_bio;
421 submit_bio(WRITE, bio); 421 submit_bio(WRITE, bio);
422 nr_underway++; 422 nr_underway++;
@@ -438,7 +438,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
438 438
439 bio = bio_alloc(GFP_NOFS, 1); 439 bio = bio_alloc(GFP_NOFS, 1);
440 bio->bi_bdev = inode->i_sb->s_bdev; 440 bio->bi_bdev = inode->i_sb->s_bdev;
441 bio->bi_sector = pblock << (inode->i_blkbits - 9); 441 bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
442 bio->bi_end_io = metapage_write_end_io; 442 bio->bi_end_io = metapage_write_end_io;
443 bio->bi_private = page; 443 bio->bi_private = page;
444 444
@@ -452,7 +452,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
452 if (bio) { 452 if (bio) {
453 if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes) 453 if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
454 goto add_failed; 454 goto add_failed;
455 if (!bio->bi_size) 455 if (!bio->bi_iter.bi_size)
456 goto dump_bio; 456 goto dump_bio;
457 457
458 submit_bio(WRITE, bio); 458 submit_bio(WRITE, bio);
@@ -517,7 +517,8 @@ static int metapage_readpage(struct file *fp, struct page *page)
517 517
518 bio = bio_alloc(GFP_NOFS, 1); 518 bio = bio_alloc(GFP_NOFS, 1);
519 bio->bi_bdev = inode->i_sb->s_bdev; 519 bio->bi_bdev = inode->i_sb->s_bdev;
520 bio->bi_sector = pblock << (inode->i_blkbits - 9); 520 bio->bi_iter.bi_sector =
521 pblock << (inode->i_blkbits - 9);
521 bio->bi_end_io = metapage_read_end_io; 522 bio->bi_end_io = metapage_read_end_io;
522 bio->bi_private = page; 523 bio->bi_private = page;
523 len = xlen << inode->i_blkbits; 524 len = xlen << inode->i_blkbits;
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 0f95f0d0b313..76279e11982d 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -26,9 +26,9 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
26 bio_vec.bv_len = PAGE_SIZE; 26 bio_vec.bv_len = PAGE_SIZE;
27 bio_vec.bv_offset = 0; 27 bio_vec.bv_offset = 0;
28 bio.bi_vcnt = 1; 28 bio.bi_vcnt = 1;
29 bio.bi_size = PAGE_SIZE;
30 bio.bi_bdev = bdev; 29 bio.bi_bdev = bdev;
31 bio.bi_sector = page->index * (PAGE_SIZE >> 9); 30 bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9);
31 bio.bi_iter.bi_size = PAGE_SIZE;
32 32
33 return submit_bio_wait(rw, &bio); 33 return submit_bio_wait(rw, &bio);
34} 34}
@@ -56,22 +56,18 @@ static DECLARE_WAIT_QUEUE_HEAD(wq);
56static void writeseg_end_io(struct bio *bio, int err) 56static void writeseg_end_io(struct bio *bio, int err)
57{ 57{
58 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 58 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
59 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 59 struct bio_vec *bvec;
60 int i;
60 struct super_block *sb = bio->bi_private; 61 struct super_block *sb = bio->bi_private;
61 struct logfs_super *super = logfs_super(sb); 62 struct logfs_super *super = logfs_super(sb);
62 struct page *page;
63 63
64 BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */ 64 BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
65 BUG_ON(err); 65 BUG_ON(err);
66 BUG_ON(bio->bi_vcnt == 0); 66
67 do { 67 bio_for_each_segment_all(bvec, bio, i) {
68 page = bvec->bv_page; 68 end_page_writeback(bvec->bv_page);
69 if (--bvec >= bio->bi_io_vec) 69 page_cache_release(bvec->bv_page);
70 prefetchw(&bvec->bv_page->flags); 70 }
71
72 end_page_writeback(page);
73 page_cache_release(page);
74 } while (bvec >= bio->bi_io_vec);
75 bio_put(bio); 71 bio_put(bio);
76 if (atomic_dec_and_test(&super->s_pending_writes)) 72 if (atomic_dec_and_test(&super->s_pending_writes))
77 wake_up(&wq); 73 wake_up(&wq);
@@ -96,9 +92,9 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
96 if (i >= max_pages) { 92 if (i >= max_pages) {
97 /* Block layer cannot split bios :( */ 93 /* Block layer cannot split bios :( */
98 bio->bi_vcnt = i; 94 bio->bi_vcnt = i;
99 bio->bi_size = i * PAGE_SIZE; 95 bio->bi_iter.bi_size = i * PAGE_SIZE;
100 bio->bi_bdev = super->s_bdev; 96 bio->bi_bdev = super->s_bdev;
101 bio->bi_sector = ofs >> 9; 97 bio->bi_iter.bi_sector = ofs >> 9;
102 bio->bi_private = sb; 98 bio->bi_private = sb;
103 bio->bi_end_io = writeseg_end_io; 99 bio->bi_end_io = writeseg_end_io;
104 atomic_inc(&super->s_pending_writes); 100 atomic_inc(&super->s_pending_writes);
@@ -123,9 +119,9 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
123 unlock_page(page); 119 unlock_page(page);
124 } 120 }
125 bio->bi_vcnt = nr_pages; 121 bio->bi_vcnt = nr_pages;
126 bio->bi_size = nr_pages * PAGE_SIZE; 122 bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
127 bio->bi_bdev = super->s_bdev; 123 bio->bi_bdev = super->s_bdev;
128 bio->bi_sector = ofs >> 9; 124 bio->bi_iter.bi_sector = ofs >> 9;
129 bio->bi_private = sb; 125 bio->bi_private = sb;
130 bio->bi_end_io = writeseg_end_io; 126 bio->bi_end_io = writeseg_end_io;
131 atomic_inc(&super->s_pending_writes); 127 atomic_inc(&super->s_pending_writes);
@@ -188,9 +184,9 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
188 if (i >= max_pages) { 184 if (i >= max_pages) {
189 /* Block layer cannot split bios :( */ 185 /* Block layer cannot split bios :( */
190 bio->bi_vcnt = i; 186 bio->bi_vcnt = i;
191 bio->bi_size = i * PAGE_SIZE; 187 bio->bi_iter.bi_size = i * PAGE_SIZE;
192 bio->bi_bdev = super->s_bdev; 188 bio->bi_bdev = super->s_bdev;
193 bio->bi_sector = ofs >> 9; 189 bio->bi_iter.bi_sector = ofs >> 9;
194 bio->bi_private = sb; 190 bio->bi_private = sb;
195 bio->bi_end_io = erase_end_io; 191 bio->bi_end_io = erase_end_io;
196 atomic_inc(&super->s_pending_writes); 192 atomic_inc(&super->s_pending_writes);
@@ -209,9 +205,9 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
209 bio->bi_io_vec[i].bv_offset = 0; 205 bio->bi_io_vec[i].bv_offset = 0;
210 } 206 }
211 bio->bi_vcnt = nr_pages; 207 bio->bi_vcnt = nr_pages;
212 bio->bi_size = nr_pages * PAGE_SIZE; 208 bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
213 bio->bi_bdev = super->s_bdev; 209 bio->bi_bdev = super->s_bdev;
214 bio->bi_sector = ofs >> 9; 210 bio->bi_iter.bi_sector = ofs >> 9;
215 bio->bi_private = sb; 211 bio->bi_private = sb;
216 bio->bi_end_io = erase_end_io; 212 bio->bi_end_io = erase_end_io;
217 atomic_inc(&super->s_pending_writes); 213 atomic_inc(&super->s_pending_writes);
diff --git a/fs/mpage.c b/fs/mpage.c
index 0face1c4d4c6..4979ffa60aaa 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -43,16 +43,14 @@
43 */ 43 */
44static void mpage_end_io(struct bio *bio, int err) 44static void mpage_end_io(struct bio *bio, int err)
45{ 45{
46 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 46 struct bio_vec *bv;
47 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 47 int i;
48 48
49 do { 49 bio_for_each_segment_all(bv, bio, i) {
50 struct page *page = bvec->bv_page; 50 struct page *page = bv->bv_page;
51 51
52 if (--bvec >= bio->bi_io_vec)
53 prefetchw(&bvec->bv_page->flags);
54 if (bio_data_dir(bio) == READ) { 52 if (bio_data_dir(bio) == READ) {
55 if (uptodate) { 53 if (!err) {
56 SetPageUptodate(page); 54 SetPageUptodate(page);
57 } else { 55 } else {
58 ClearPageUptodate(page); 56 ClearPageUptodate(page);
@@ -60,14 +58,15 @@ static void mpage_end_io(struct bio *bio, int err)
60 } 58 }
61 unlock_page(page); 59 unlock_page(page);
62 } else { /* bio_data_dir(bio) == WRITE */ 60 } else { /* bio_data_dir(bio) == WRITE */
63 if (!uptodate) { 61 if (err) {
64 SetPageError(page); 62 SetPageError(page);
65 if (page->mapping) 63 if (page->mapping)
66 set_bit(AS_EIO, &page->mapping->flags); 64 set_bit(AS_EIO, &page->mapping->flags);
67 } 65 }
68 end_page_writeback(page); 66 end_page_writeback(page);
69 } 67 }
70 } while (bvec >= bio->bi_io_vec); 68 }
69
71 bio_put(bio); 70 bio_put(bio);
72} 71}
73 72
@@ -94,7 +93,7 @@ mpage_alloc(struct block_device *bdev,
94 93
95 if (bio) { 94 if (bio) {
96 bio->bi_bdev = bdev; 95 bio->bi_bdev = bdev;
97 bio->bi_sector = first_sector; 96 bio->bi_iter.bi_sector = first_sector;
98 } 97 }
99 return bio; 98 return bio;
100} 99}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index e242bbf72972..56ff823ca82e 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -134,8 +134,8 @@ bl_submit_bio(int rw, struct bio *bio)
134 if (bio) { 134 if (bio) {
135 get_parallel(bio->bi_private); 135 get_parallel(bio->bi_private);
136 dprintk("%s submitting %s bio %u@%llu\n", __func__, 136 dprintk("%s submitting %s bio %u@%llu\n", __func__,
137 rw == READ ? "read" : "write", 137 rw == READ ? "read" : "write", bio->bi_iter.bi_size,
138 bio->bi_size, (unsigned long long)bio->bi_sector); 138 (unsigned long long)bio->bi_iter.bi_sector);
139 submit_bio(rw, bio); 139 submit_bio(rw, bio);
140 } 140 }
141 return NULL; 141 return NULL;
@@ -156,7 +156,8 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
156 } 156 }
157 157
158 if (bio) { 158 if (bio) {
159 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; 159 bio->bi_iter.bi_sector = isect - be->be_f_offset +
160 be->be_v_offset;
160 bio->bi_bdev = be->be_mdev; 161 bio->bi_bdev = be->be_mdev;
161 bio->bi_end_io = end_io; 162 bio->bi_end_io = end_io;
162 bio->bi_private = par; 163 bio->bi_private = par;
@@ -201,18 +202,14 @@ static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
201static void bl_end_io_read(struct bio *bio, int err) 202static void bl_end_io_read(struct bio *bio, int err)
202{ 203{
203 struct parallel_io *par = bio->bi_private; 204 struct parallel_io *par = bio->bi_private;
204 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 205 struct bio_vec *bvec;
205 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 206 int i;
206 207
207 do { 208 if (!err)
208 struct page *page = bvec->bv_page; 209 bio_for_each_segment_all(bvec, bio, i)
210 SetPageUptodate(bvec->bv_page);
209 211
210 if (--bvec >= bio->bi_io_vec) 212 if (err) {
211 prefetchw(&bvec->bv_page->flags);
212 if (uptodate)
213 SetPageUptodate(page);
214 } while (bvec >= bio->bi_io_vec);
215 if (!uptodate) {
216 struct nfs_read_data *rdata = par->data; 213 struct nfs_read_data *rdata = par->data;
217 struct nfs_pgio_header *header = rdata->header; 214 struct nfs_pgio_header *header = rdata->header;
218 215
@@ -383,20 +380,16 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
383static void bl_end_io_write_zero(struct bio *bio, int err) 380static void bl_end_io_write_zero(struct bio *bio, int err)
384{ 381{
385 struct parallel_io *par = bio->bi_private; 382 struct parallel_io *par = bio->bi_private;
386 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 383 struct bio_vec *bvec;
387 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 384 int i;
388
389 do {
390 struct page *page = bvec->bv_page;
391 385
392 if (--bvec >= bio->bi_io_vec) 386 bio_for_each_segment_all(bvec, bio, i) {
393 prefetchw(&bvec->bv_page->flags);
394 /* This is the zeroing page we added */ 387 /* This is the zeroing page we added */
395 end_page_writeback(page); 388 end_page_writeback(bvec->bv_page);
396 page_cache_release(page); 389 page_cache_release(bvec->bv_page);
397 } while (bvec >= bio->bi_io_vec); 390 }
398 391
399 if (unlikely(!uptodate)) { 392 if (unlikely(err)) {
400 struct nfs_write_data *data = par->data; 393 struct nfs_write_data *data = par->data;
401 struct nfs_pgio_header *header = data->header; 394 struct nfs_pgio_header *header = data->header;
402 395
@@ -519,7 +512,7 @@ bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
519 isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + 512 isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
520 (offset / SECTOR_SIZE); 513 (offset / SECTOR_SIZE);
521 514
522 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; 515 bio->bi_iter.bi_sector = isect - be->be_f_offset + be->be_v_offset;
523 bio->bi_bdev = be->be_mdev; 516 bio->bi_bdev = be->be_mdev;
524 bio->bi_end_io = bl_read_single_end_io; 517 bio->bi_end_io = bl_read_single_end_io;
525 518
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d2255d705421..aa9bc973f36a 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -924,11 +924,11 @@ static const struct inode_operations nfs3_dir_inode_operations = {
924 .permission = nfs_permission, 924 .permission = nfs_permission,
925 .getattr = nfs_getattr, 925 .getattr = nfs_getattr,
926 .setattr = nfs_setattr, 926 .setattr = nfs_setattr,
927#ifdef CONFIG_NFS_V3_ACL
927 .listxattr = generic_listxattr, 928 .listxattr = generic_listxattr,
928 .getxattr = generic_getxattr, 929 .getxattr = generic_getxattr,
929 .setxattr = generic_setxattr, 930 .setxattr = generic_setxattr,
930 .removexattr = generic_removexattr, 931 .removexattr = generic_removexattr,
931#ifdef CONFIG_NFS_V3_ACL
932 .get_acl = nfs3_get_acl, 932 .get_acl = nfs3_get_acl,
933 .set_acl = nfs3_set_acl, 933 .set_acl = nfs3_set_acl,
934#endif 934#endif
@@ -938,11 +938,11 @@ static const struct inode_operations nfs3_file_inode_operations = {
938 .permission = nfs_permission, 938 .permission = nfs_permission,
939 .getattr = nfs_getattr, 939 .getattr = nfs_getattr,
940 .setattr = nfs_setattr, 940 .setattr = nfs_setattr,
941#ifdef CONFIG_NFS_V3_ACL
941 .listxattr = generic_listxattr, 942 .listxattr = generic_listxattr,
942 .getxattr = generic_getxattr, 943 .getxattr = generic_getxattr,
943 .setxattr = generic_setxattr, 944 .setxattr = generic_setxattr,
944 .removexattr = generic_removexattr, 945 .removexattr = generic_removexattr,
945#ifdef CONFIG_NFS_V3_ACL
946 .get_acl = nfs3_get_acl, 946 .get_acl = nfs3_get_acl,
947 .set_acl = nfs3_set_acl, 947 .set_acl = nfs3_set_acl,
948#endif 948#endif
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 8b68218e2c1c..a812fd1b92a4 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -45,7 +45,7 @@ struct svc_rqst;
45 45
46struct nfs4_acl *nfs4_acl_new(int); 46struct nfs4_acl *nfs4_acl_new(int);
47int nfs4_acl_get_whotype(char *, u32); 47int nfs4_acl_get_whotype(char *, u32);
48int nfs4_acl_write_who(int who, char *p); 48__be32 nfs4_acl_write_who(int who, __be32 **p, int *len);
49 49
50int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, 50int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
51 struct nfs4_acl **acl); 51 struct nfs4_acl **acl);
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index d5c5b3e00266..b582f9ab6b2a 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -84,12 +84,4 @@ int nfsd_cache_lookup(struct svc_rqst *);
84void nfsd_cache_update(struct svc_rqst *, int, __be32 *); 84void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
85int nfsd_reply_cache_stats_open(struct inode *, struct file *); 85int nfsd_reply_cache_stats_open(struct inode *, struct file *);
86 86
87#ifdef CONFIG_NFSD_V4
88void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
89#else /* CONFIG_NFSD_V4 */
90static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
91{
92}
93#endif /* CONFIG_NFSD_V4 */
94
95#endif /* NFSCACHE_H */ 87#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
index bf95f6b817a4..66e58db01936 100644
--- a/fs/nfsd/idmap.h
+++ b/fs/nfsd/idmap.h
@@ -56,7 +56,7 @@ static inline void nfsd_idmap_shutdown(struct net *net)
56 56
57__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *); 57__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);
58__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *); 58__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *);
59int nfsd_map_uid_to_name(struct svc_rqst *, kuid_t, char *); 59__be32 nfsd4_encode_user(struct svc_rqst *, kuid_t, __be32 **, int *);
60int nfsd_map_gid_to_name(struct svc_rqst *, kgid_t, char *); 60__be32 nfsd4_encode_group(struct svc_rqst *, kgid_t, __be32 **, int *);
61 61
62#endif /* LINUX_NFSD_IDMAP_H */ 62#endif /* LINUX_NFSD_IDMAP_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 849a7c3ced22..d32b3aa6600d 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -95,6 +95,7 @@ struct nfsd_net {
95 time_t nfsd4_grace; 95 time_t nfsd4_grace;
96 96
97 bool nfsd_net_up; 97 bool nfsd_net_up;
98 bool lockd_up;
98 99
99 /* 100 /*
100 * Time of server startup 101 * Time of server startup
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 14d9ecb96cff..de6e39e12cb3 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -168,7 +168,7 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
168 struct kstat *stat) 168 struct kstat *stat)
169{ 169{
170 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); 170 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
171 *p++ = htonl((u32) stat->mode); 171 *p++ = htonl((u32) (stat->mode & S_IALLUGO));
172 *p++ = htonl((u32) stat->nlink); 172 *p++ = htonl((u32) stat->nlink);
173 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); 173 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
174 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); 174 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
@@ -842,21 +842,21 @@ out:
842 842
843static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) 843static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
844{ 844{
845 struct svc_fh fh; 845 struct svc_fh *fh = &cd->scratch;
846 __be32 err; 846 __be32 err;
847 847
848 fh_init(&fh, NFS3_FHSIZE); 848 fh_init(fh, NFS3_FHSIZE);
849 err = compose_entry_fh(cd, &fh, name, namlen); 849 err = compose_entry_fh(cd, fh, name, namlen);
850 if (err) { 850 if (err) {
851 *p++ = 0; 851 *p++ = 0;
852 *p++ = 0; 852 *p++ = 0;
853 goto out; 853 goto out;
854 } 854 }
855 p = encode_post_op_attr(cd->rqstp, p, &fh); 855 p = encode_post_op_attr(cd->rqstp, p, fh);
856 *p++ = xdr_one; /* yes, a file handle follows */ 856 *p++ = xdr_one; /* yes, a file handle follows */
857 p = encode_fh(p, &fh); 857 p = encode_fh(p, fh);
858out: 858out:
859 fh_put(&fh); 859 fh_put(fh);
860 return p; 860 return p;
861} 861}
862 862
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 649ad7cf2204..d3a587144222 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -38,6 +38,7 @@
38#include <linux/nfs_fs.h> 38#include <linux/nfs_fs.h>
39#include <linux/export.h> 39#include <linux/export.h>
40#include "nfsfh.h" 40#include "nfsfh.h"
41#include "nfsd.h"
41#include "acl.h" 42#include "acl.h"
42#include "vfs.h" 43#include "vfs.h"
43 44
@@ -916,17 +917,22 @@ nfs4_acl_get_whotype(char *p, u32 len)
916 return NFS4_ACL_WHO_NAMED; 917 return NFS4_ACL_WHO_NAMED;
917} 918}
918 919
919int 920__be32 nfs4_acl_write_who(int who, __be32 **p, int *len)
920nfs4_acl_write_who(int who, char *p)
921{ 921{
922 int i; 922 int i;
923 int bytes;
923 924
924 for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { 925 for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
925 if (s2t_map[i].type == who) { 926 if (s2t_map[i].type != who)
926 memcpy(p, s2t_map[i].string, s2t_map[i].stringlen); 927 continue;
927 return s2t_map[i].stringlen; 928 bytes = 4 + (XDR_QUADLEN(s2t_map[i].stringlen) << 2);
928 } 929 if (bytes > *len)
930 return nfserr_resource;
931 *p = xdr_encode_opaque(*p, s2t_map[i].string,
932 s2t_map[i].stringlen);
933 *len -= bytes;
934 return 0;
929 } 935 }
930 BUG(); 936 WARN_ON_ONCE(1);
931 return -1; 937 return -1;
932} 938}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4832fd819f88..c0dfde68742e 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -551,27 +551,46 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
551 return 0; 551 return 0;
552} 552}
553 553
554static int 554static __be32 encode_ascii_id(u32 id, __be32 **p, int *buflen)
555idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name) 555{
556 char buf[11];
557 int len;
558 int bytes;
559
560 len = sprintf(buf, "%u", id);
561 bytes = 4 + (XDR_QUADLEN(len) << 2);
562 if (bytes > *buflen)
563 return nfserr_resource;
564 *p = xdr_encode_opaque(*p, buf, len);
565 *buflen -= bytes;
566 return 0;
567}
568
569static __be32 idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
556{ 570{
557 struct ent *item, key = { 571 struct ent *item, key = {
558 .id = id, 572 .id = id,
559 .type = type, 573 .type = type,
560 }; 574 };
561 int ret; 575 int ret;
576 int bytes;
562 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 577 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
563 578
564 strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); 579 strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
565 ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); 580 ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
566 if (ret == -ENOENT) 581 if (ret == -ENOENT)
567 return sprintf(name, "%u", id); 582 return encode_ascii_id(id, p, buflen);
568 if (ret) 583 if (ret)
569 return ret; 584 return nfserrno(ret);
570 ret = strlen(item->name); 585 ret = strlen(item->name);
571 BUG_ON(ret > IDMAP_NAMESZ); 586 WARN_ON_ONCE(ret > IDMAP_NAMESZ);
572 memcpy(name, item->name, ret); 587 bytes = 4 + (XDR_QUADLEN(ret) << 2);
588 if (bytes > *buflen)
589 return nfserr_resource;
590 *p = xdr_encode_opaque(*p, item->name, ret);
591 *buflen -= bytes;
573 cache_put(&item->h, nn->idtoname_cache); 592 cache_put(&item->h, nn->idtoname_cache);
574 return ret; 593 return 0;
575} 594}
576 595
577static bool 596static bool
@@ -603,12 +622,11 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u
603 return idmap_name_to_id(rqstp, type, name, namelen, id); 622 return idmap_name_to_id(rqstp, type, name, namelen, id);
604} 623}
605 624
606static int 625static __be32 encode_name_from_id(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
607do_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name)
608{ 626{
609 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) 627 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
610 return sprintf(name, "%u", id); 628 return encode_ascii_id(id, p, buflen);
611 return idmap_id_to_name(rqstp, type, id, name); 629 return idmap_id_to_name(rqstp, type, id, p, buflen);
612} 630}
613 631
614__be32 632__be32
@@ -637,16 +655,14 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
637 return status; 655 return status;
638} 656}
639 657
640int 658__be32 nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t uid, __be32 **p, int *buflen)
641nfsd_map_uid_to_name(struct svc_rqst *rqstp, kuid_t uid, char *name)
642{ 659{
643 u32 id = from_kuid(&init_user_ns, uid); 660 u32 id = from_kuid(&init_user_ns, uid);
644 return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); 661 return encode_name_from_id(rqstp, IDMAP_TYPE_USER, id, p, buflen);
645} 662}
646 663
647int 664__be32 nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t gid, __be32 **p, int *buflen)
648nfsd_map_gid_to_name(struct svc_rqst *rqstp, kgid_t gid, char *name)
649{ 665{
650 u32 id = from_kgid(&init_user_ns, gid); 666 u32 id = from_kgid(&init_user_ns, gid);
651 return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); 667 return encode_name_from_id(rqstp, IDMAP_TYPE_GROUP, id, p, buflen);
652} 668}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 825b8a99b99b..82189b208af3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -231,17 +231,16 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate
231} 231}
232 232
233static __be32 233static __be32
234do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) 234do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
235{ 235{
236 struct svc_fh *current_fh = &cstate->current_fh; 236 struct svc_fh *current_fh = &cstate->current_fh;
237 struct svc_fh *resfh;
238 int accmode; 237 int accmode;
239 __be32 status; 238 __be32 status;
240 239
241 resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); 240 *resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
242 if (!resfh) 241 if (!*resfh)
243 return nfserr_jukebox; 242 return nfserr_jukebox;
244 fh_init(resfh, NFS4_FHSIZE); 243 fh_init(*resfh, NFS4_FHSIZE);
245 open->op_truncate = 0; 244 open->op_truncate = 0;
246 245
247 if (open->op_create) { 246 if (open->op_create) {
@@ -266,12 +265,12 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
266 */ 265 */
267 status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, 266 status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
268 open->op_fname.len, &open->op_iattr, 267 open->op_fname.len, &open->op_iattr,
269 resfh, open->op_createmode, 268 *resfh, open->op_createmode,
270 (u32 *)open->op_verf.data, 269 (u32 *)open->op_verf.data,
271 &open->op_truncate, &open->op_created); 270 &open->op_truncate, &open->op_created);
272 271
273 if (!status && open->op_label.len) 272 if (!status && open->op_label.len)
274 nfsd4_security_inode_setsecctx(resfh, &open->op_label, open->op_bmval); 273 nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
275 274
276 /* 275 /*
277 * Following rfc 3530 14.2.16, use the returned bitmask 276 * Following rfc 3530 14.2.16, use the returned bitmask
@@ -281,31 +280,32 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
281 if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) 280 if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0)
282 open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | 281 open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS |
283 FATTR4_WORD1_TIME_MODIFY); 282 FATTR4_WORD1_TIME_MODIFY);
284 } else { 283 } else
284 /*
285 * Note this may exit with the parent still locked.
286 * We will hold the lock until nfsd4_open's final
287 * lookup, to prevent renames or unlinks until we've had
288 * a chance to an acquire a delegation if appropriate.
289 */
285 status = nfsd_lookup(rqstp, current_fh, 290 status = nfsd_lookup(rqstp, current_fh,
286 open->op_fname.data, open->op_fname.len, resfh); 291 open->op_fname.data, open->op_fname.len, *resfh);
287 fh_unlock(current_fh);
288 }
289 if (status) 292 if (status)
290 goto out; 293 goto out;
291 status = nfsd_check_obj_isreg(resfh); 294 status = nfsd_check_obj_isreg(*resfh);
292 if (status) 295 if (status)
293 goto out; 296 goto out;
294 297
295 if (is_create_with_attrs(open) && open->op_acl != NULL) 298 if (is_create_with_attrs(open) && open->op_acl != NULL)
296 do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval); 299 do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval);
297 300
298 nfsd4_set_open_owner_reply_cache(cstate, open, resfh); 301 nfsd4_set_open_owner_reply_cache(cstate, open, *resfh);
299 accmode = NFSD_MAY_NOP; 302 accmode = NFSD_MAY_NOP;
300 if (open->op_created || 303 if (open->op_created ||
301 open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) 304 open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
302 accmode |= NFSD_MAY_OWNER_OVERRIDE; 305 accmode |= NFSD_MAY_OWNER_OVERRIDE;
303 status = do_open_permission(rqstp, resfh, open, accmode); 306 status = do_open_permission(rqstp, *resfh, open, accmode);
304 set_change_info(&open->op_cinfo, current_fh); 307 set_change_info(&open->op_cinfo, current_fh);
305 fh_dup2(current_fh, resfh);
306out: 308out:
307 fh_put(resfh);
308 kfree(resfh);
309 return status; 309 return status;
310} 310}
311 311
@@ -358,6 +358,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
358 struct nfsd4_open *open) 358 struct nfsd4_open *open)
359{ 359{
360 __be32 status; 360 __be32 status;
361 struct svc_fh *resfh = NULL;
361 struct nfsd4_compoundres *resp; 362 struct nfsd4_compoundres *resp;
362 struct net *net = SVC_NET(rqstp); 363 struct net *net = SVC_NET(rqstp);
363 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 364 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -424,7 +425,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
424 switch (open->op_claim_type) { 425 switch (open->op_claim_type) {
425 case NFS4_OPEN_CLAIM_DELEGATE_CUR: 426 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
426 case NFS4_OPEN_CLAIM_NULL: 427 case NFS4_OPEN_CLAIM_NULL:
427 status = do_open_lookup(rqstp, cstate, open); 428 status = do_open_lookup(rqstp, cstate, open, &resfh);
428 if (status) 429 if (status)
429 goto out; 430 goto out;
430 break; 431 break;
@@ -440,6 +441,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
440 status = do_open_fhandle(rqstp, cstate, open); 441 status = do_open_fhandle(rqstp, cstate, open);
441 if (status) 442 if (status)
442 goto out; 443 goto out;
444 resfh = &cstate->current_fh;
443 break; 445 break;
444 case NFS4_OPEN_CLAIM_DELEG_PREV_FH: 446 case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
445 case NFS4_OPEN_CLAIM_DELEGATE_PREV: 447 case NFS4_OPEN_CLAIM_DELEGATE_PREV:
@@ -459,9 +461,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
459 * successful, it (1) truncates the file if open->op_truncate was 461 * successful, it (1) truncates the file if open->op_truncate was
460 * set, (2) sets open->op_stateid, (3) sets open->op_delegation. 462 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
461 */ 463 */
462 status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); 464 status = nfsd4_process_open2(rqstp, resfh, open);
463 WARN_ON(status && open->op_created); 465 WARN_ON(status && open->op_created);
464out: 466out:
467 if (resfh && resfh != &cstate->current_fh) {
468 fh_dup2(&cstate->current_fh, resfh);
469 fh_put(resfh);
470 kfree(resfh);
471 }
465 nfsd4_cleanup_open_state(open, status); 472 nfsd4_cleanup_open_state(open, status);
466 if (open->op_openowner && !nfsd4_has_session(cstate)) 473 if (open->op_openowner && !nfsd4_has_session(cstate))
467 cstate->replay_owner = &open->op_openowner->oo_owner; 474 cstate->replay_owner = &open->op_openowner->oo_owner;
@@ -1070,8 +1077,10 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1070 cstate->current_fh.fh_dentry, &p, 1077 cstate->current_fh.fh_dentry, &p,
1071 count, verify->ve_bmval, 1078 count, verify->ve_bmval,
1072 rqstp, 0); 1079 rqstp, 0);
1073 1080 /*
1074 /* this means that nfsd4_encode_fattr() ran out of space */ 1081 * If nfsd4_encode_fattr() ran out of space, assume that's because
1082 * the attributes are longer (hence different) than those given:
1083 */
1075 if (status == nfserr_resource) 1084 if (status == nfserr_resource)
1076 status = nfserr_not_same; 1085 status = nfserr_not_same;
1077 if (status) 1086 if (status)
@@ -1525,7 +1534,8 @@ static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1525static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1534static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1526{ 1535{
1527 return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ 1536 return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
1528 1 + 1 + 2 + /* eir_flags, spr_how, spo_must_enforce & _allow */\ 1537 1 + 1 + /* eir_flags, spr_how */\
1538 4 + /* spo_must_enforce & _allow with bitmap */\
1529 2 + /*eir_server_owner.so_minor_id */\ 1539 2 + /*eir_server_owner.so_minor_id */\
1530 /* eir_server_owner.so_major_id<> */\ 1540 /* eir_server_owner.so_major_id<> */\
1531 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ 1541 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
@@ -1882,6 +1892,7 @@ struct svc_version nfsd_version4 = {
1882 .vs_proc = nfsd_procedures4, 1892 .vs_proc = nfsd_procedures4,
1883 .vs_dispatch = nfsd_dispatch, 1893 .vs_dispatch = nfsd_dispatch,
1884 .vs_xdrsize = NFS4_SVC_XDRSIZE, 1894 .vs_xdrsize = NFS4_SVC_XDRSIZE,
1895 .vs_rpcb_optnl = 1,
1885}; 1896};
1886 1897
1887/* 1898/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 105d6fa7c514..d5d070fbeb35 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -832,10 +832,11 @@ static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
832 spin_unlock(&nfsd_drc_lock); 832 spin_unlock(&nfsd_drc_lock);
833} 833}
834 834
835static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *attrs) 835static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
836 struct nfsd4_channel_attrs *battrs)
836{ 837{
837 int numslots = attrs->maxreqs; 838 int numslots = fattrs->maxreqs;
838 int slotsize = slot_bytes(attrs); 839 int slotsize = slot_bytes(fattrs);
839 struct nfsd4_session *new; 840 struct nfsd4_session *new;
840 int mem, i; 841 int mem, i;
841 842
@@ -852,6 +853,10 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *attrs)
852 if (!new->se_slots[i]) 853 if (!new->se_slots[i])
853 goto out_free; 854 goto out_free;
854 } 855 }
856
857 memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
858 memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));
859
855 return new; 860 return new;
856out_free: 861out_free:
857 while (i--) 862 while (i--)
@@ -997,8 +1002,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
997 list_add(&new->se_perclnt, &clp->cl_sessions); 1002 list_add(&new->se_perclnt, &clp->cl_sessions);
998 spin_unlock(&clp->cl_lock); 1003 spin_unlock(&clp->cl_lock);
999 spin_unlock(&nn->client_lock); 1004 spin_unlock(&nn->client_lock);
1000 memcpy(&new->se_fchannel, &cses->fore_channel, 1005
1001 sizeof(struct nfsd4_channel_attrs));
1002 if (cses->flags & SESSION4_BACK_CHAN) { 1006 if (cses->flags & SESSION4_BACK_CHAN) {
1003 struct sockaddr *sa = svc_addr(rqstp); 1007 struct sockaddr *sa = svc_addr(rqstp);
1004 /* 1008 /*
@@ -1851,6 +1855,11 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
1851 return nfs_ok; 1855 return nfs_ok;
1852} 1856}
1853 1857
1858#define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \
1859 RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32))
1860#define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \
1861 RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32))
1862
1854static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) 1863static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
1855{ 1864{
1856 ca->headerpadsz = 0; 1865 ca->headerpadsz = 0;
@@ -1861,9 +1870,9 @@ static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
1861 * less than 1k. Tighten up this estimate in the unlikely event 1870 * less than 1k. Tighten up this estimate in the unlikely event
1862 * it turns out to be a problem for some client: 1871 * it turns out to be a problem for some client:
1863 */ 1872 */
1864 if (ca->maxreq_sz < NFS4_enc_cb_recall_sz + RPC_MAX_HEADER_WITH_AUTH) 1873 if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
1865 return nfserr_toosmall; 1874 return nfserr_toosmall;
1866 if (ca->maxresp_sz < NFS4_dec_cb_recall_sz + RPC_MAX_REPHEADER_WITH_AUTH) 1875 if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
1867 return nfserr_toosmall; 1876 return nfserr_toosmall;
1868 ca->maxresp_cached = 0; 1877 ca->maxresp_cached = 0;
1869 if (ca->maxops < 2) 1878 if (ca->maxops < 2)
@@ -1913,9 +1922,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1913 return status; 1922 return status;
1914 status = check_backchannel_attrs(&cr_ses->back_channel); 1923 status = check_backchannel_attrs(&cr_ses->back_channel);
1915 if (status) 1924 if (status)
1916 return status; 1925 goto out_release_drc_mem;
1917 status = nfserr_jukebox; 1926 status = nfserr_jukebox;
1918 new = alloc_session(&cr_ses->fore_channel); 1927 new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
1919 if (!new) 1928 if (!new)
1920 goto out_release_drc_mem; 1929 goto out_release_drc_mem;
1921 conn = alloc_conn_from_crses(rqstp, cr_ses); 1930 conn = alloc_conn_from_crses(rqstp, cr_ses);
@@ -3034,18 +3043,18 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
3034 if (!fl) 3043 if (!fl)
3035 return -ENOMEM; 3044 return -ENOMEM;
3036 fl->fl_file = find_readable_file(fp); 3045 fl->fl_file = find_readable_file(fp);
3037 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
3038 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); 3046 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
3039 if (status) { 3047 if (status)
3040 list_del_init(&dp->dl_perclnt); 3048 goto out_free;
3041 locks_free_lock(fl); 3049 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
3042 return status;
3043 }
3044 fp->fi_lease = fl; 3050 fp->fi_lease = fl;
3045 fp->fi_deleg_file = get_file(fl->fl_file); 3051 fp->fi_deleg_file = get_file(fl->fl_file);
3046 atomic_set(&fp->fi_delegees, 1); 3052 atomic_set(&fp->fi_delegees, 1);
3047 list_add(&dp->dl_perfile, &fp->fi_delegations); 3053 list_add(&dp->dl_perfile, &fp->fi_delegations);
3048 return 0; 3054 return 0;
3055out_free:
3056 locks_free_lock(fl);
3057 return status;
3049} 3058}
3050 3059
3051static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) 3060static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp)
@@ -3125,6 +3134,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
3125 goto out_no_deleg; 3134 goto out_no_deleg;
3126 break; 3135 break;
3127 case NFS4_OPEN_CLAIM_NULL: 3136 case NFS4_OPEN_CLAIM_NULL:
3137 case NFS4_OPEN_CLAIM_FH:
3128 /* 3138 /*
3129 * Let's not give out any delegations till everyone's 3139 * Let's not give out any delegations till everyone's
3130 * had the chance to reclaim theirs.... 3140 * had the chance to reclaim theirs....
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ee7237f99f54..63f2395c57ed 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -103,11 +103,6 @@ xdr_error: \
103 (x) = (u64)ntohl(*p++) << 32; \ 103 (x) = (u64)ntohl(*p++) << 32; \
104 (x) |= ntohl(*p++); \ 104 (x) |= ntohl(*p++); \
105} while (0) 105} while (0)
106#define READTIME(x) do { \
107 p++; \
108 (x) = ntohl(*p++); \
109 p++; \
110} while (0)
111#define READMEM(x,nbytes) do { \ 106#define READMEM(x,nbytes) do { \
112 x = (char *)p; \ 107 x = (char *)p; \
113 p += XDR_QUADLEN(nbytes); \ 108 p += XDR_QUADLEN(nbytes); \
@@ -190,6 +185,15 @@ static int zero_clientid(clientid_t *clid)
190 return (clid->cl_boot == 0) && (clid->cl_id == 0); 185 return (clid->cl_boot == 0) && (clid->cl_id == 0);
191} 186}
192 187
188/**
189 * defer_free - mark an allocation as deferred freed
190 * @argp: NFSv4 compound argument structure to be freed with
191 * @release: release callback to free @p, typically kfree()
192 * @p: pointer to be freed
193 *
194 * Marks @p to be freed when processing the compound operation
195 * described in @argp finishes.
196 */
193static int 197static int
194defer_free(struct nfsd4_compoundargs *argp, 198defer_free(struct nfsd4_compoundargs *argp,
195 void (*release)(const void *), void *p) 199 void (*release)(const void *), void *p)
@@ -206,6 +210,16 @@ defer_free(struct nfsd4_compoundargs *argp,
206 return 0; 210 return 0;
207} 211}
208 212
213/**
214 * savemem - duplicate a chunk of memory for later processing
215 * @argp: NFSv4 compound argument structure to be freed with
216 * @p: pointer to be duplicated
217 * @nbytes: length to be duplicated
218 *
219 * Returns a pointer to a copy of @nbytes bytes of memory at @p
220 * that are preserved until processing of the NFSv4 compound
221 * operation described by @argp finishes.
222 */
209static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) 223static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
210{ 224{
211 if (p == argp->tmp) { 225 if (p == argp->tmp) {
@@ -257,7 +271,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
257 int expected_len, len = 0; 271 int expected_len, len = 0;
258 u32 dummy32; 272 u32 dummy32;
259 char *buf; 273 char *buf;
260 int host_err;
261 274
262 DECODE_HEAD; 275 DECODE_HEAD;
263 iattr->ia_valid = 0; 276 iattr->ia_valid = 0;
@@ -284,10 +297,9 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
284 return nfserr_resource; 297 return nfserr_resource;
285 298
286 *acl = nfs4_acl_new(nace); 299 *acl = nfs4_acl_new(nace);
287 if (*acl == NULL) { 300 if (*acl == NULL)
288 host_err = -ENOMEM; 301 return nfserr_jukebox;
289 goto out_nfserr; 302
290 }
291 defer_free(argp, kfree, *acl); 303 defer_free(argp, kfree, *acl);
292 304
293 (*acl)->naces = nace; 305 (*acl)->naces = nace;
@@ -425,10 +437,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
425 goto xdr_error; 437 goto xdr_error;
426 438
427 DECODE_TAIL; 439 DECODE_TAIL;
428
429out_nfserr:
430 status = nfserrno(host_err);
431 goto out;
432} 440}
433 441
434static __be32 442static __be32
@@ -1957,56 +1965,16 @@ static u32 nfs4_file_type(umode_t mode)
1957 }; 1965 };
1958} 1966}
1959 1967
1960static __be32
1961nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, kuid_t uid, kgid_t gid,
1962 __be32 **p, int *buflen)
1963{
1964 int status;
1965
1966 if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4)
1967 return nfserr_resource;
1968 if (whotype != NFS4_ACL_WHO_NAMED)
1969 status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1));
1970 else if (gid_valid(gid))
1971 status = nfsd_map_gid_to_name(rqstp, gid, (u8 *)(*p + 1));
1972 else
1973 status = nfsd_map_uid_to_name(rqstp, uid, (u8 *)(*p + 1));
1974 if (status < 0)
1975 return nfserrno(status);
1976 *p = xdr_encode_opaque(*p, NULL, status);
1977 *buflen -= (XDR_QUADLEN(status) << 2) + 4;
1978 BUG_ON(*buflen < 0);
1979 return 0;
1980}
1981
1982static inline __be32
1983nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t user, __be32 **p, int *buflen)
1984{
1985 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, user, INVALID_GID,
1986 p, buflen);
1987}
1988
1989static inline __be32
1990nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t group, __be32 **p, int *buflen)
1991{
1992 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, INVALID_UID, group,
1993 p, buflen);
1994}
1995
1996static inline __be32 1968static inline __be32
1997nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, 1969nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
1998 __be32 **p, int *buflen) 1970 __be32 **p, int *buflen)
1999{ 1971{
2000 kuid_t uid = INVALID_UID; 1972 if (ace->whotype != NFS4_ACL_WHO_NAMED)
2001 kgid_t gid = INVALID_GID; 1973 return nfs4_acl_write_who(ace->whotype, p, buflen);
2002 1974 else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
2003 if (ace->whotype == NFS4_ACL_WHO_NAMED) { 1975 return nfsd4_encode_group(rqstp, ace->who_gid, p, buflen);
2004 if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) 1976 else
2005 gid = ace->who_gid; 1977 return nfsd4_encode_user(rqstp, ace->who_uid, p, buflen);
2006 else
2007 uid = ace->who_uid;
2008 }
2009 return nfsd4_encode_name(rqstp, ace->whotype, uid, gid, p, buflen);
2010} 1978}
2011 1979
2012#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ 1980#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
@@ -2090,7 +2058,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
2090 u32 bmval1 = bmval[1]; 2058 u32 bmval1 = bmval[1];
2091 u32 bmval2 = bmval[2]; 2059 u32 bmval2 = bmval[2];
2092 struct kstat stat; 2060 struct kstat stat;
2093 struct svc_fh tempfh; 2061 struct svc_fh *tempfh = NULL;
2094 struct kstatfs statfs; 2062 struct kstatfs statfs;
2095 int buflen = count << 2; 2063 int buflen = count << 2;
2096 __be32 *attrlenp; 2064 __be32 *attrlenp;
@@ -2137,11 +2105,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
2137 goto out_nfserr; 2105 goto out_nfserr;
2138 } 2106 }
2139 if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { 2107 if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
2140 fh_init(&tempfh, NFS4_FHSIZE); 2108 tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
2141 status = fh_compose(&tempfh, exp, dentry, NULL); 2109 status = nfserr_jukebox;
2110 if (!tempfh)
2111 goto out;
2112 fh_init(tempfh, NFS4_FHSIZE);
2113 status = fh_compose(tempfh, exp, dentry, NULL);
2142 if (status) 2114 if (status)
2143 goto out; 2115 goto out;
2144 fhp = &tempfh; 2116 fhp = tempfh;
2145 } 2117 }
2146 if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT 2118 if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
2147 | FATTR4_WORD0_SUPPORTED_ATTRS)) { 2119 | FATTR4_WORD0_SUPPORTED_ATTRS)) {
@@ -2222,8 +2194,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
2222 if ((buflen -= 4) < 0) 2194 if ((buflen -= 4) < 0)
2223 goto out_resource; 2195 goto out_resource;
2224 dummy = nfs4_file_type(stat.mode); 2196 dummy = nfs4_file_type(stat.mode);
2225 if (dummy == NF4BAD) 2197 if (dummy == NF4BAD) {
2226 goto out_serverfault; 2198 status = nfserr_serverfault;
2199 goto out;
2200 }
2227 WRITE32(dummy); 2201 WRITE32(dummy);
2228 } 2202 }
2229 if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { 2203 if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
@@ -2317,8 +2291,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
2317 WRITE32(ace->flag); 2291 WRITE32(ace->flag);
2318 WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); 2292 WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
2319 status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen); 2293 status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen);
2320 if (status == nfserr_resource)
2321 goto out_resource;
2322 if (status) 2294 if (status)
2323 goto out; 2295 goto out;
2324 } 2296 }
@@ -2379,8 +2351,6 @@ out_acl:
2379 } 2351 }
2380 if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { 2352 if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
2381 status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); 2353 status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
2382 if (status == nfserr_resource)
2383 goto out_resource;
2384 if (status) 2354 if (status)
2385 goto out; 2355 goto out;
2386 } 2356 }
@@ -2431,15 +2401,11 @@ out_acl:
2431 } 2401 }
2432 if (bmval1 & FATTR4_WORD1_OWNER) { 2402 if (bmval1 & FATTR4_WORD1_OWNER) {
2433 status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); 2403 status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
2434 if (status == nfserr_resource)
2435 goto out_resource;
2436 if (status) 2404 if (status)
2437 goto out; 2405 goto out;
2438 } 2406 }
2439 if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { 2407 if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
2440 status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); 2408 status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
2441 if (status == nfserr_resource)
2442 goto out_resource;
2443 if (status) 2409 if (status)
2444 goto out; 2410 goto out;
2445 } 2411 }
@@ -2533,8 +2499,8 @@ out:
2533 security_release_secctx(context, contextlen); 2499 security_release_secctx(context, contextlen);
2534#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ 2500#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
2535 kfree(acl); 2501 kfree(acl);
2536 if (fhp == &tempfh) 2502 if (tempfh)
2537 fh_put(&tempfh); 2503 fh_put(tempfh);
2538 return status; 2504 return status;
2539out_nfserr: 2505out_nfserr:
2540 status = nfserrno(err); 2506 status = nfserrno(err);
@@ -2542,9 +2508,6 @@ out_nfserr:
2542out_resource: 2508out_resource:
2543 status = nfserr_resource; 2509 status = nfserr_resource;
2544 goto out; 2510 goto out;
2545out_serverfault:
2546 status = nfserr_serverfault;
2547 goto out;
2548} 2511}
2549 2512
2550static inline int attributes_need_mount(u32 *bmval) 2513static inline int attributes_need_mount(u32 *bmval)
@@ -2621,17 +2584,14 @@ out_put:
2621static __be32 * 2584static __be32 *
2622nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) 2585nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
2623{ 2586{
2624 __be32 *attrlenp;
2625
2626 if (buflen < 6) 2587 if (buflen < 6)
2627 return NULL; 2588 return NULL;
2628 *p++ = htonl(2); 2589 *p++ = htonl(2);
2629 *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ 2590 *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
2630 *p++ = htonl(0); /* bmval1 */ 2591 *p++ = htonl(0); /* bmval1 */
2631 2592
2632 attrlenp = p++; 2593 *p++ = htonl(4); /* attribute length */
2633 *p++ = nfserr; /* no htonl */ 2594 *p++ = nfserr; /* no htonl */
2634 *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
2635 return p; 2595 return p;
2636} 2596}
2637 2597
@@ -3244,7 +3204,7 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
3244 3204
3245 if (rpcauth_get_gssinfo(pf, &info) == 0) { 3205 if (rpcauth_get_gssinfo(pf, &info) == 0) {
3246 supported++; 3206 supported++;
3247 RESERVE_SPACE(4 + 4 + info.oid.len + 4 + 4); 3207 RESERVE_SPACE(4 + 4 + XDR_LEN(info.oid.len) + 4 + 4);
3248 WRITE32(RPC_AUTH_GSS); 3208 WRITE32(RPC_AUTH_GSS);
3249 WRITE32(info.oid.len); 3209 WRITE32(info.oid.len);
3250 WRITEMEM(info.oid.data, info.oid.len); 3210 WRITEMEM(info.oid.data, info.oid.len);
@@ -3379,35 +3339,43 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
3379 8 /* eir_clientid */ + 3339 8 /* eir_clientid */ +
3380 4 /* eir_sequenceid */ + 3340 4 /* eir_sequenceid */ +
3381 4 /* eir_flags */ + 3341 4 /* eir_flags */ +
3382 4 /* spr_how */ + 3342 4 /* spr_how */);
3383 8 /* spo_must_enforce, spo_must_allow */ +
3384 8 /* so_minor_id */ +
3385 4 /* so_major_id.len */ +
3386 (XDR_QUADLEN(major_id_sz) * 4) +
3387 4 /* eir_server_scope.len */ +
3388 (XDR_QUADLEN(server_scope_sz) * 4) +
3389 4 /* eir_server_impl_id.count (0) */);
3390 3343
3391 WRITEMEM(&exid->clientid, 8); 3344 WRITEMEM(&exid->clientid, 8);
3392 WRITE32(exid->seqid); 3345 WRITE32(exid->seqid);
3393 WRITE32(exid->flags); 3346 WRITE32(exid->flags);
3394 3347
3395 WRITE32(exid->spa_how); 3348 WRITE32(exid->spa_how);
3349 ADJUST_ARGS();
3350
3396 switch (exid->spa_how) { 3351 switch (exid->spa_how) {
3397 case SP4_NONE: 3352 case SP4_NONE:
3398 break; 3353 break;
3399 case SP4_MACH_CRED: 3354 case SP4_MACH_CRED:
3355 /* spo_must_enforce, spo_must_allow */
3356 RESERVE_SPACE(16);
3357
3400 /* spo_must_enforce bitmap: */ 3358 /* spo_must_enforce bitmap: */
3401 WRITE32(2); 3359 WRITE32(2);
3402 WRITE32(nfs4_minimal_spo_must_enforce[0]); 3360 WRITE32(nfs4_minimal_spo_must_enforce[0]);
3403 WRITE32(nfs4_minimal_spo_must_enforce[1]); 3361 WRITE32(nfs4_minimal_spo_must_enforce[1]);
3404 /* empty spo_must_allow bitmap: */ 3362 /* empty spo_must_allow bitmap: */
3405 WRITE32(0); 3363 WRITE32(0);
3364
3365 ADJUST_ARGS();
3406 break; 3366 break;
3407 default: 3367 default:
3408 WARN_ON_ONCE(1); 3368 WARN_ON_ONCE(1);
3409 } 3369 }
3410 3370
3371 RESERVE_SPACE(
3372 8 /* so_minor_id */ +
3373 4 /* so_major_id.len */ +
3374 (XDR_QUADLEN(major_id_sz) * 4) +
3375 4 /* eir_server_scope.len */ +
3376 (XDR_QUADLEN(server_scope_sz) * 4) +
3377 4 /* eir_server_impl_id.count (0) */);
3378
3411 /* The server_owner struct */ 3379 /* The server_owner struct */
3412 WRITE64(minor_id); /* Minor id */ 3380 WRITE64(minor_id); /* Minor id */
3413 /* major id */ 3381 /* major id */
@@ -3474,28 +3442,6 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
3474} 3442}
3475 3443
3476static __be32 3444static __be32
3477nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, __be32 nfserr,
3478 struct nfsd4_destroy_session *destroy_session)
3479{
3480 return nfserr;
3481}
3482
3483static __be32
3484nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
3485 struct nfsd4_free_stateid *free_stateid)
3486{
3487 __be32 *p;
3488
3489 if (nfserr)
3490 return nfserr;
3491
3492 RESERVE_SPACE(4);
3493 *p++ = nfserr;
3494 ADJUST_ARGS();
3495 return nfserr;
3496}
3497
3498static __be32
3499nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, 3445nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
3500 struct nfsd4_sequence *seq) 3446 struct nfsd4_sequence *seq)
3501{ 3447{
@@ -3593,8 +3539,8 @@ static nfsd4_enc nfsd4_enc_ops[] = {
3593 [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, 3539 [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
3594 [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, 3540 [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id,
3595 [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, 3541 [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session,
3596 [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, 3542 [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
3597 [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_free_stateid, 3543 [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
3598 [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, 3544 [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
3599 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, 3545 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
3600 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, 3546 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index b6af150c96b8..f8f060ffbf4f 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -132,13 +132,6 @@ nfsd_reply_cache_alloc(void)
132} 132}
133 133
134static void 134static void
135nfsd_reply_cache_unhash(struct svc_cacherep *rp)
136{
137 hlist_del_init(&rp->c_hash);
138 list_del_init(&rp->c_lru);
139}
140
141static void
142nfsd_reply_cache_free_locked(struct svc_cacherep *rp) 135nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
143{ 136{
144 if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { 137 if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
@@ -416,22 +409,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
416 409
417 /* 410 /*
418 * Since the common case is a cache miss followed by an insert, 411 * Since the common case is a cache miss followed by an insert,
419 * preallocate an entry. First, try to reuse the first entry on the LRU 412 * preallocate an entry.
420 * if it works, then go ahead and prune the LRU list.
421 */ 413 */
422 spin_lock(&cache_lock);
423 if (!list_empty(&lru_head)) {
424 rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
425 if (nfsd_cache_entry_expired(rp) ||
426 num_drc_entries >= max_drc_entries) {
427 nfsd_reply_cache_unhash(rp);
428 prune_cache_entries();
429 goto search_cache;
430 }
431 }
432
433 /* No expired ones available, allocate a new one. */
434 spin_unlock(&cache_lock);
435 rp = nfsd_reply_cache_alloc(); 414 rp = nfsd_reply_cache_alloc();
436 spin_lock(&cache_lock); 415 spin_lock(&cache_lock);
437 if (likely(rp)) { 416 if (likely(rp)) {
@@ -439,7 +418,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
439 drc_mem_usage += sizeof(*rp); 418 drc_mem_usage += sizeof(*rp);
440 } 419 }
441 420
442search_cache: 421 /* go ahead and prune the cache */
422 prune_cache_entries();
423
443 found = nfsd_cache_search(rqstp, csum); 424 found = nfsd_cache_search(rqstp, csum);
444 if (found) { 425 if (found) {
445 if (likely(rp)) 426 if (likely(rp))
@@ -453,15 +434,6 @@ search_cache:
453 goto out; 434 goto out;
454 } 435 }
455 436
456 /*
457 * We're keeping the one we just allocated. Are we now over the
458 * limit? Prune one off the tip of the LRU in trade for the one we
459 * just allocated if so.
460 */
461 if (num_drc_entries >= max_drc_entries)
462 nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
463 struct svc_cacherep, c_lru));
464
465 nfsdstats.rcmisses++; 437 nfsdstats.rcmisses++;
466 rqstp->rq_cacherep = rp; 438 rqstp->rq_cacherep = rp;
467 rp->c_state = RC_INPROG; 439 rp->c_state = RC_INPROG;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 760c85a6f534..9a4a5f9e7468 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -241,6 +241,15 @@ static void nfsd_shutdown_generic(void)
241 nfsd_racache_shutdown(); 241 nfsd_racache_shutdown();
242} 242}
243 243
244static bool nfsd_needs_lockd(void)
245{
246#if defined(CONFIG_NFSD_V3)
247 return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL);
248#else
249 return (nfsd_versions[2] != NULL);
250#endif
251}
252
244static int nfsd_startup_net(int nrservs, struct net *net) 253static int nfsd_startup_net(int nrservs, struct net *net)
245{ 254{
246 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 255 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -255,9 +264,14 @@ static int nfsd_startup_net(int nrservs, struct net *net)
255 ret = nfsd_init_socks(net); 264 ret = nfsd_init_socks(net);
256 if (ret) 265 if (ret)
257 goto out_socks; 266 goto out_socks;
258 ret = lockd_up(net); 267
259 if (ret) 268 if (nfsd_needs_lockd() && !nn->lockd_up) {
260 goto out_socks; 269 ret = lockd_up(net);
270 if (ret)
271 goto out_socks;
272 nn->lockd_up = 1;
273 }
274
261 ret = nfs4_state_start_net(net); 275 ret = nfs4_state_start_net(net);
262 if (ret) 276 if (ret)
263 goto out_lockd; 277 goto out_lockd;
@@ -266,7 +280,10 @@ static int nfsd_startup_net(int nrservs, struct net *net)
266 return 0; 280 return 0;
267 281
268out_lockd: 282out_lockd:
269 lockd_down(net); 283 if (nn->lockd_up) {
284 lockd_down(net);
285 nn->lockd_up = 0;
286 }
270out_socks: 287out_socks:
271 nfsd_shutdown_generic(); 288 nfsd_shutdown_generic();
272 return ret; 289 return ret;
@@ -277,7 +294,10 @@ static void nfsd_shutdown_net(struct net *net)
277 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 294 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
278 295
279 nfs4_state_shutdown_net(net); 296 nfs4_state_shutdown_net(net);
280 lockd_down(net); 297 if (nn->lockd_up) {
298 lockd_down(net);
299 nn->lockd_up = 0;
300 }
281 nn->nfsd_net_up = false; 301 nn->nfsd_net_up = false;
282 nfsd_shutdown_generic(); 302 nfsd_shutdown_generic();
283} 303}
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 9c769a47ac5a..b17d93214d01 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -152,7 +152,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
152 type = (stat->mode & S_IFMT); 152 type = (stat->mode & S_IFMT);
153 153
154 *p++ = htonl(nfs_ftypes[type >> 12]); 154 *p++ = htonl(nfs_ftypes[type >> 12]);
155 *p++ = htonl((u32) stat->mode); 155 *p++ = htonl((u32) (stat->mode & S_IALLUGO));
156 *p++ = htonl((u32) stat->nlink); 156 *p++ = htonl((u32) stat->nlink);
157 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); 157 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
158 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); 158 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1426eb66c8c6..017d3cb5e99b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -207,7 +207,12 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
207 goto out_nfserr; 207 goto out_nfserr;
208 } 208 }
209 } else { 209 } else {
210 fh_lock(fhp); 210 /*
211 * In the nfsd4_open() case, this may be held across
212 * subsequent open and delegation acquisition which may
213 * need to take the child's i_mutex:
214 */
215 fh_lock_nested(fhp, I_MUTEX_PARENT);
211 dentry = lookup_one_len(name, dparent, len); 216 dentry = lookup_one_len(name, dparent, len);
212 host_err = PTR_ERR(dentry); 217 host_err = PTR_ERR(dentry);
213 if (IS_ERR(dentry)) 218 if (IS_ERR(dentry))
@@ -273,13 +278,6 @@ out:
273 return err; 278 return err;
274} 279}
275 280
276static int nfsd_break_lease(struct inode *inode)
277{
278 if (!S_ISREG(inode->i_mode))
279 return 0;
280 return break_lease(inode, O_WRONLY | O_NONBLOCK);
281}
282
283/* 281/*
284 * Commit metadata changes to stable storage. 282 * Commit metadata changes to stable storage.
285 */ 283 */
@@ -348,8 +346,7 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
348 346
349 /* Revoke setuid/setgid on chown */ 347 /* Revoke setuid/setgid on chown */
350 if (!S_ISDIR(inode->i_mode) && 348 if (!S_ISDIR(inode->i_mode) &&
351 (((iap->ia_valid & ATTR_UID) && !uid_eq(iap->ia_uid, inode->i_uid)) || 349 ((iap->ia_valid & ATTR_UID) || (iap->ia_valid & ATTR_GID))) {
352 ((iap->ia_valid & ATTR_GID) && !gid_eq(iap->ia_gid, inode->i_gid)))) {
353 iap->ia_valid |= ATTR_KILL_PRIV; 350 iap->ia_valid |= ATTR_KILL_PRIV;
354 if (iap->ia_valid & ATTR_MODE) { 351 if (iap->ia_valid & ATTR_MODE) {
355 /* we're setting mode too, just clear the s*id bits */ 352 /* we're setting mode too, just clear the s*id bits */
@@ -449,16 +446,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
449 goto out_put_write_access; 446 goto out_put_write_access;
450 } 447 }
451 448
452 host_err = nfsd_break_lease(inode);
453 if (host_err)
454 goto out_put_write_access_nfserror;
455
456 fh_lock(fhp); 449 fh_lock(fhp);
457 host_err = notify_change(dentry, iap, NULL); 450 host_err = notify_change(dentry, iap, NULL);
458 fh_unlock(fhp); 451 fh_unlock(fhp);
459 452
460out_put_write_access_nfserror:
461 err = nfserrno(host_err);
462out_put_write_access: 453out_put_write_access:
463 if (size_change) 454 if (size_change)
464 put_write_access(inode); 455 put_write_access(inode);
@@ -1609,11 +1600,6 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1609 err = nfserr_noent; 1600 err = nfserr_noent;
1610 if (!dold->d_inode) 1601 if (!dold->d_inode)
1611 goto out_dput; 1602 goto out_dput;
1612 host_err = nfsd_break_lease(dold->d_inode);
1613 if (host_err) {
1614 err = nfserrno(host_err);
1615 goto out_dput;
1616 }
1617 host_err = vfs_link(dold, dirp, dnew, NULL); 1603 host_err = vfs_link(dold, dirp, dnew, NULL);
1618 if (!host_err) { 1604 if (!host_err) {
1619 err = nfserrno(commit_metadata(ffhp)); 1605 err = nfserrno(commit_metadata(ffhp));
@@ -1707,14 +1693,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1707 if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) 1693 if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
1708 goto out_dput_new; 1694 goto out_dput_new;
1709 1695
1710 host_err = nfsd_break_lease(odentry->d_inode);
1711 if (host_err)
1712 goto out_dput_new;
1713 if (ndentry->d_inode) {
1714 host_err = nfsd_break_lease(ndentry->d_inode);
1715 if (host_err)
1716 goto out_dput_new;
1717 }
1718 host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); 1696 host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
1719 if (!host_err) { 1697 if (!host_err) {
1720 host_err = commit_metadata(tfhp); 1698 host_err = commit_metadata(tfhp);
@@ -1784,16 +1762,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1784 if (!type) 1762 if (!type)
1785 type = rdentry->d_inode->i_mode & S_IFMT; 1763 type = rdentry->d_inode->i_mode & S_IFMT;
1786 1764
1787 host_err = nfsd_break_lease(rdentry->d_inode);
1788 if (host_err)
1789 goto out_put;
1790 if (type != S_IFDIR) 1765 if (type != S_IFDIR)
1791 host_err = vfs_unlink(dirp, rdentry, NULL); 1766 host_err = vfs_unlink(dirp, rdentry, NULL);
1792 else 1767 else
1793 host_err = vfs_rmdir(dirp, rdentry); 1768 host_err = vfs_rmdir(dirp, rdentry);
1794 if (!host_err) 1769 if (!host_err)
1795 host_err = commit_metadata(fhp); 1770 host_err = commit_metadata(fhp);
1796out_put:
1797 dput(rdentry); 1771 dput(rdentry);
1798 1772
1799out_nfserr: 1773out_nfserr:
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 1bc1d440a1a5..fbe90bdb2214 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -86,8 +86,6 @@ __be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
86__be32 nfsd_rename(struct svc_rqst *, 86__be32 nfsd_rename(struct svc_rqst *,
87 struct svc_fh *, char *, int, 87 struct svc_fh *, char *, int,
88 struct svc_fh *, char *, int); 88 struct svc_fh *, char *, int);
89__be32 nfsd_remove(struct svc_rqst *,
90 struct svc_fh *, char *, int);
91__be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, 89__be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
92 char *name, int len); 90 char *name, int len);
93__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, 91__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index b6d5542a4ac8..335e04aaf7db 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -174,6 +174,9 @@ struct nfsd3_linkres {
174struct nfsd3_readdirres { 174struct nfsd3_readdirres {
175 __be32 status; 175 __be32 status;
176 struct svc_fh fh; 176 struct svc_fh fh;
177 /* Just to save kmalloc on every readdirplus entry (svc_fh is a
178 * little large for the stack): */
179 struct svc_fh scratch;
177 int count; 180 int count;
178 __be32 verf[2]; 181 __be32 verf[2];
179 182
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index b3ed6446ed8e..d278a0d03496 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -228,7 +228,7 @@ struct nfsd4_open {
228 u32 op_create; /* request */ 228 u32 op_create; /* request */
229 u32 op_createmode; /* request */ 229 u32 op_createmode; /* request */
230 u32 op_bmval[3]; /* request */ 230 u32 op_bmval[3]; /* request */
231 struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ 231 struct iattr op_iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
232 nfs4_verifier op_verf __attribute__((aligned(32))); 232 nfs4_verifier op_verf __attribute__((aligned(32)));
233 /* EXCLUSIVE4 */ 233 /* EXCLUSIVE4 */
234 clientid_t op_clientid; /* request */ 234 clientid_t op_clientid; /* request */
@@ -250,7 +250,6 @@ struct nfsd4_open {
250 struct nfs4_acl *op_acl; 250 struct nfs4_acl *op_acl;
251 struct xdr_netobj op_label; 251 struct xdr_netobj op_label;
252}; 252};
253#define op_iattr iattr
254 253
255struct nfsd4_open_confirm { 254struct nfsd4_open_confirm {
256 stateid_t oc_req_stateid /* request */; 255 stateid_t oc_req_stateid /* request */;
@@ -374,7 +373,6 @@ struct nfsd4_test_stateid {
374 373
375struct nfsd4_free_stateid { 374struct nfsd4_free_stateid {
376 stateid_t fr_stateid; /* request */ 375 stateid_t fr_stateid; /* request */
377 __be32 fr_status; /* response */
378}; 376};
379 377
380/* also used for NVERIFY */ 378/* also used for NVERIFY */
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2d8be51f90dc..dc3a9efdaab8 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -416,7 +416,8 @@ static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
416 } 416 }
417 if (likely(bio)) { 417 if (likely(bio)) {
418 bio->bi_bdev = nilfs->ns_bdev; 418 bio->bi_bdev = nilfs->ns_bdev;
419 bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9); 419 bio->bi_iter.bi_sector =
420 start << (nilfs->ns_blocksize_bits - 9);
420 } 421 }
421 return bio; 422 return bio;
422} 423}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 73920ffda05b..bf482dfed14f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -413,7 +413,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
413 } 413 }
414 414
415 /* Must put everything in 512 byte sectors for the bio... */ 415 /* Must put everything in 512 byte sectors for the bio... */
416 bio->bi_sector = (reg->hr_start_block + cs) << (bits - 9); 416 bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9);
417 bio->bi_bdev = reg->hr_bdev; 417 bio->bi_bdev = reg->hr_bdev;
418 bio->bi_private = wc; 418 bio->bi_private = wc;
419 bio->bi_end_io = o2hb_bio_end_io; 419 bio->bi_end_io = o2hb_bio_end_io;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a26739451b53..db2cfb067d0b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -407,7 +407,7 @@ xfs_alloc_ioend_bio(
407 struct bio *bio = bio_alloc(GFP_NOIO, nvecs); 407 struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
408 408
409 ASSERT(bio->bi_private == NULL); 409 ASSERT(bio->bi_private == NULL);
410 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 410 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
411 bio->bi_bdev = bh->b_bdev; 411 bio->bi_bdev = bh->b_bdev;
412 return bio; 412 return bio;
413} 413}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 51757113a822..9c061ef2b0d9 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1240,7 +1240,7 @@ next_chunk:
1240 1240
1241 bio = bio_alloc(GFP_NOIO, nr_pages); 1241 bio = bio_alloc(GFP_NOIO, nr_pages);
1242 bio->bi_bdev = bp->b_target->bt_bdev; 1242 bio->bi_bdev = bp->b_target->bt_bdev;
1243 bio->bi_sector = sector; 1243 bio->bi_iter.bi_sector = sector;
1244 bio->bi_end_io = xfs_buf_bio_end_io; 1244 bio->bi_end_io = xfs_buf_bio_end_io;
1245 bio->bi_private = bp; 1245 bio->bi_private = bp;
1246 1246
@@ -1262,7 +1262,7 @@ next_chunk:
1262 total_nr_pages--; 1262 total_nr_pages--;
1263 } 1263 }
1264 1264
1265 if (likely(bio->bi_size)) { 1265 if (likely(bio->bi_iter.bi_size)) {
1266 if (xfs_buf_is_vmapped(bp)) { 1266 if (xfs_buf_is_vmapped(bp)) {
1267 flush_kernel_vmap_range(bp->b_addr, 1267 flush_kernel_vmap_range(bp->b_addr,
1268 xfs_buf_vmap_len(bp)); 1268 xfs_buf_vmap_len(bp));
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 060ff695085c..70654521dab6 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -61,25 +61,87 @@
61 * various member access, note that bio_data should of course not be used 61 * various member access, note that bio_data should of course not be used
62 * on highmem page vectors 62 * on highmem page vectors
63 */ 63 */
64#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) 64#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx])
65#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx) 65
66#define bio_page(bio) bio_iovec((bio))->bv_page 66#define bvec_iter_page(bvec, iter) \
67#define bio_offset(bio) bio_iovec((bio))->bv_offset 67 (__bvec_iter_bvec((bvec), (iter))->bv_page)
68#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) 68
69#define bio_sectors(bio) ((bio)->bi_size >> 9) 69#define bvec_iter_len(bvec, iter) \
70#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio))) 70 min((iter).bi_size, \
71 __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
72
73#define bvec_iter_offset(bvec, iter) \
74 (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
75
76#define bvec_iter_bvec(bvec, iter) \
77((struct bio_vec) { \
78 .bv_page = bvec_iter_page((bvec), (iter)), \
79 .bv_len = bvec_iter_len((bvec), (iter)), \
80 .bv_offset = bvec_iter_offset((bvec), (iter)), \
81})
82
83#define bio_iter_iovec(bio, iter) \
84 bvec_iter_bvec((bio)->bi_io_vec, (iter))
85
86#define bio_iter_page(bio, iter) \
87 bvec_iter_page((bio)->bi_io_vec, (iter))
88#define bio_iter_len(bio, iter) \
89 bvec_iter_len((bio)->bi_io_vec, (iter))
90#define bio_iter_offset(bio, iter) \
91 bvec_iter_offset((bio)->bi_io_vec, (iter))
92
93#define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter)
94#define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter)
95#define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter)
96
97#define bio_multiple_segments(bio) \
98 ((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len)
99#define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9)
100#define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio)))
101
102/*
103 * Check whether this bio carries any data or not. A NULL bio is allowed.
104 */
105static inline bool bio_has_data(struct bio *bio)
106{
107 if (bio &&
108 bio->bi_iter.bi_size &&
109 !(bio->bi_rw & REQ_DISCARD))
110 return true;
111
112 return false;
113}
114
115static inline bool bio_is_rw(struct bio *bio)
116{
117 if (!bio_has_data(bio))
118 return false;
119
120 if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
121 return false;
122
123 return true;
124}
125
126static inline bool bio_mergeable(struct bio *bio)
127{
128 if (bio->bi_rw & REQ_NOMERGE_FLAGS)
129 return false;
130
131 return true;
132}
71 133
72static inline unsigned int bio_cur_bytes(struct bio *bio) 134static inline unsigned int bio_cur_bytes(struct bio *bio)
73{ 135{
74 if (bio->bi_vcnt) 136 if (bio_has_data(bio))
75 return bio_iovec(bio)->bv_len; 137 return bio_iovec(bio).bv_len;
76 else /* dataless requests such as discard */ 138 else /* dataless requests such as discard */
77 return bio->bi_size; 139 return bio->bi_iter.bi_size;
78} 140}
79 141
80static inline void *bio_data(struct bio *bio) 142static inline void *bio_data(struct bio *bio)
81{ 143{
82 if (bio->bi_vcnt) 144 if (bio_has_data(bio))
83 return page_address(bio_page(bio)) + bio_offset(bio); 145 return page_address(bio_page(bio)) + bio_offset(bio);
84 146
85 return NULL; 147 return NULL;
@@ -97,19 +159,16 @@ static inline void *bio_data(struct bio *bio)
97 * permanent PIO fall back, user is probably better off disabling highmem 159 * permanent PIO fall back, user is probably better off disabling highmem
98 * I/O completely on that queue (see ide-dma for example) 160 * I/O completely on that queue (see ide-dma for example)
99 */ 161 */
100#define __bio_kmap_atomic(bio, idx) \ 162#define __bio_kmap_atomic(bio, iter) \
101 (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page) + \ 163 (kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) + \
102 bio_iovec_idx((bio), (idx))->bv_offset) 164 bio_iter_iovec((bio), (iter)).bv_offset)
103 165
104#define __bio_kunmap_atomic(addr) kunmap_atomic(addr) 166#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
105 167
106/* 168/*
107 * merge helpers etc 169 * merge helpers etc
108 */ 170 */
109 171
110#define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1)
111#define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx)
112
113/* Default implementation of BIOVEC_PHYS_MERGEABLE */ 172/* Default implementation of BIOVEC_PHYS_MERGEABLE */
114#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ 173#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
115 ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) 174 ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
@@ -126,33 +185,76 @@ static inline void *bio_data(struct bio *bio)
126 (((addr1) | (mask)) == (((addr2) - 1) | (mask))) 185 (((addr1) | (mask)) == (((addr2) - 1) | (mask)))
127#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ 186#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
128 __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q))) 187 __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
129#define BIO_SEG_BOUNDARY(q, b1, b2) \
130 BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2)))
131 188
132#define bio_io_error(bio) bio_endio((bio), -EIO) 189#define bio_io_error(bio) bio_endio((bio), -EIO)
133 190
134/* 191/*
135 * drivers should not use the __ version unless they _really_ know what
136 * they're doing
137 */
138#define __bio_for_each_segment(bvl, bio, i, start_idx) \
139 for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \
140 i < (bio)->bi_vcnt; \
141 bvl++, i++)
142
143/*
144 * drivers should _never_ use the all version - the bio may have been split 192 * drivers should _never_ use the all version - the bio may have been split
145 * before it got to the driver and the driver won't own all of it 193 * before it got to the driver and the driver won't own all of it
146 */ 194 */
147#define bio_for_each_segment_all(bvl, bio, i) \ 195#define bio_for_each_segment_all(bvl, bio, i) \
148 for (i = 0; \ 196 for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
149 bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ 197
150 i++) 198static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
199 unsigned bytes)
200{
201 WARN_ONCE(bytes > iter->bi_size,
202 "Attempted to advance past end of bvec iter\n");
203
204 while (bytes) {
205 unsigned len = min(bytes, bvec_iter_len(bv, *iter));
206
207 bytes -= len;
208 iter->bi_size -= len;
209 iter->bi_bvec_done += len;
210
211 if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
212 iter->bi_bvec_done = 0;
213 iter->bi_idx++;
214 }
215 }
216}
217
218#define for_each_bvec(bvl, bio_vec, iter, start) \
219 for ((iter) = start; \
220 (bvl) = bvec_iter_bvec((bio_vec), (iter)), \
221 (iter).bi_size; \
222 bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
223
224
225static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
226 unsigned bytes)
227{
228 iter->bi_sector += bytes >> 9;
229
230 if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
231 iter->bi_size -= bytes;
232 else
233 bvec_iter_advance(bio->bi_io_vec, iter, bytes);
234}
151 235
152#define bio_for_each_segment(bvl, bio, i) \ 236#define __bio_for_each_segment(bvl, bio, iter, start) \
153 for (i = (bio)->bi_idx; \ 237 for (iter = (start); \
154 bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ 238 (iter).bi_size && \
155 i++) 239 ((bvl = bio_iter_iovec((bio), (iter))), 1); \
240 bio_advance_iter((bio), &(iter), (bvl).bv_len))
241
242#define bio_for_each_segment(bvl, bio, iter) \
243 __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)
244
245#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
246
247static inline unsigned bio_segments(struct bio *bio)
248{
249 unsigned segs = 0;
250 struct bio_vec bv;
251 struct bvec_iter iter;
252
253 bio_for_each_segment(bv, bio, iter)
254 segs++;
255
256 return segs;
257}
156 258
157/* 259/*
158 * get a reference to a bio, so it won't disappear. the intended use is 260 * get a reference to a bio, so it won't disappear. the intended use is
@@ -177,16 +279,15 @@ static inline void *bio_data(struct bio *bio)
177struct bio_integrity_payload { 279struct bio_integrity_payload {
178 struct bio *bip_bio; /* parent bio */ 280 struct bio *bip_bio; /* parent bio */
179 281
180 sector_t bip_sector; /* virtual start sector */ 282 struct bvec_iter bip_iter;
181 283
284 /* kill - should just use bip_vec */
182 void *bip_buf; /* generated integrity data */ 285 void *bip_buf; /* generated integrity data */
183 bio_end_io_t *bip_end_io; /* saved I/O completion fn */
184 286
185 unsigned int bip_size; 287 bio_end_io_t *bip_end_io; /* saved I/O completion fn */
186 288
187 unsigned short bip_slab; /* slab the bip came from */ 289 unsigned short bip_slab; /* slab the bip came from */
188 unsigned short bip_vcnt; /* # of integrity bio_vecs */ 290 unsigned short bip_vcnt; /* # of integrity bio_vecs */
189 unsigned short bip_idx; /* current bip_vec index */
190 unsigned bip_owns_buf:1; /* should free bip_buf */ 291 unsigned bip_owns_buf:1; /* should free bip_buf */
191 292
192 struct work_struct bip_work; /* I/O completion */ 293 struct work_struct bip_work; /* I/O completion */
@@ -196,29 +297,28 @@ struct bio_integrity_payload {
196}; 297};
197#endif /* CONFIG_BLK_DEV_INTEGRITY */ 298#endif /* CONFIG_BLK_DEV_INTEGRITY */
198 299
199/* 300extern void bio_trim(struct bio *bio, int offset, int size);
200 * A bio_pair is used when we need to split a bio. 301extern struct bio *bio_split(struct bio *bio, int sectors,
201 * This can only happen for a bio that refers to just one 302 gfp_t gfp, struct bio_set *bs);
202 * page of data, and in the unusual situation when the 303
203 * page crosses a chunk/device boundary 304/**
305 * bio_next_split - get next @sectors from a bio, splitting if necessary
306 * @bio: bio to split
307 * @sectors: number of sectors to split from the front of @bio
308 * @gfp: gfp mask
309 * @bs: bio set to allocate from
204 * 310 *
205 * The address of the master bio is stored in bio1.bi_private 311 * Returns a bio representing the next @sectors of @bio - if the bio is smaller
206 * The address of the pool the pair was allocated from is stored 312 * than @sectors, returns the original bio unchanged.
207 * in bio2.bi_private
208 */ 313 */
209struct bio_pair { 314static inline struct bio *bio_next_split(struct bio *bio, int sectors,
210 struct bio bio1, bio2; 315 gfp_t gfp, struct bio_set *bs)
211 struct bio_vec bv1, bv2; 316{
212#if defined(CONFIG_BLK_DEV_INTEGRITY) 317 if (sectors >= bio_sectors(bio))
213 struct bio_integrity_payload bip1, bip2; 318 return bio;
214 struct bio_vec iv1, iv2; 319
215#endif 320 return bio_split(bio, sectors, gfp, bs);
216 atomic_t cnt; 321}
217 int error;
218};
219extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
220extern void bio_pair_release(struct bio_pair *dbio);
221extern void bio_trim(struct bio *bio, int offset, int size);
222 322
223extern struct bio_set *bioset_create(unsigned int, unsigned int); 323extern struct bio_set *bioset_create(unsigned int, unsigned int);
224extern void bioset_free(struct bio_set *); 324extern void bioset_free(struct bio_set *);
@@ -227,7 +327,8 @@ extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries);
227extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); 327extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
228extern void bio_put(struct bio *); 328extern void bio_put(struct bio *);
229 329
230extern void __bio_clone(struct bio *, struct bio *); 330extern void __bio_clone_fast(struct bio *, struct bio *);
331extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
231extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); 332extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
232 333
233extern struct bio_set *fs_bio_set; 334extern struct bio_set *fs_bio_set;
@@ -254,6 +355,7 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
254} 355}
255 356
256extern void bio_endio(struct bio *, int); 357extern void bio_endio(struct bio *, int);
358extern void bio_endio_nodec(struct bio *, int);
257struct request_queue; 359struct request_queue;
258extern int bio_phys_segments(struct request_queue *, struct bio *); 360extern int bio_phys_segments(struct request_queue *, struct bio *);
259 361
@@ -262,12 +364,12 @@ extern void bio_advance(struct bio *, unsigned);
262 364
263extern void bio_init(struct bio *); 365extern void bio_init(struct bio *);
264extern void bio_reset(struct bio *); 366extern void bio_reset(struct bio *);
367void bio_chain(struct bio *, struct bio *);
265 368
266extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); 369extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
267extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, 370extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
268 unsigned int, unsigned int); 371 unsigned int, unsigned int);
269extern int bio_get_nr_vecs(struct block_device *); 372extern int bio_get_nr_vecs(struct block_device *);
270extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int);
271extern struct bio *bio_map_user(struct request_queue *, struct block_device *, 373extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
272 unsigned long, unsigned int, int, gfp_t); 374 unsigned long, unsigned int, int, gfp_t);
273struct sg_iovec; 375struct sg_iovec;
@@ -357,48 +459,18 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
357} 459}
358#endif 460#endif
359 461
360static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, 462static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
361 unsigned long *flags) 463 unsigned long *flags)
362{ 464{
363 return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags); 465 return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags);
364} 466}
365#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) 467#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags)
366 468
367#define bio_kmap_irq(bio, flags) \ 469#define bio_kmap_irq(bio, flags) \
368 __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) 470 __bio_kmap_irq((bio), (bio)->bi_iter, (flags))
369#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) 471#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
370 472
371/* 473/*
372 * Check whether this bio carries any data or not. A NULL bio is allowed.
373 */
374static inline bool bio_has_data(struct bio *bio)
375{
376 if (bio && bio->bi_vcnt)
377 return true;
378
379 return false;
380}
381
382static inline bool bio_is_rw(struct bio *bio)
383{
384 if (!bio_has_data(bio))
385 return false;
386
387 if (bio->bi_rw & REQ_WRITE_SAME)
388 return false;
389
390 return true;
391}
392
393static inline bool bio_mergeable(struct bio *bio)
394{
395 if (bio->bi_rw & REQ_NOMERGE_FLAGS)
396 return false;
397
398 return true;
399}
400
401/*
402 * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. 474 * BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
403 * 475 *
404 * A bio_list anchors a singly-linked list of bios chained through the bi_next 476 * A bio_list anchors a singly-linked list of bios chained through the bi_next
@@ -559,16 +631,12 @@ struct biovec_slab {
559 631
560#if defined(CONFIG_BLK_DEV_INTEGRITY) 632#if defined(CONFIG_BLK_DEV_INTEGRITY)
561 633
562#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
563#define bip_vec(bip) bip_vec_idx(bip, 0)
564 634
565#define __bip_for_each_vec(bvl, bip, i, start_idx) \
566 for (bvl = bip_vec_idx((bip), (start_idx)), i = (start_idx); \
567 i < (bip)->bip_vcnt; \
568 bvl++, i++)
569 635
570#define bip_for_each_vec(bvl, bip, i) \ 636#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
571 __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) 637
638#define bip_for_each_vec(bvl, bip, iter) \
639 for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter)
572 640
573#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ 641#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \
574 for_each_bio(_bio) \ 642 for_each_bio(_bio) \
@@ -586,7 +654,6 @@ extern int bio_integrity_prep(struct bio *);
586extern void bio_integrity_endio(struct bio *, int); 654extern void bio_integrity_endio(struct bio *, int);
587extern void bio_integrity_advance(struct bio *, unsigned int); 655extern void bio_integrity_advance(struct bio *, unsigned int);
588extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); 656extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
589extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
590extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); 657extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
591extern int bioset_integrity_create(struct bio_set *, int); 658extern int bioset_integrity_create(struct bio_set *, int);
592extern void bioset_integrity_free(struct bio_set *); 659extern void bioset_integrity_free(struct bio_set *);
@@ -630,12 +697,6 @@ static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
630 return 0; 697 return 0;
631} 698}
632 699
633static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
634 int sectors)
635{
636 return;
637}
638
639static inline void bio_integrity_advance(struct bio *bio, 700static inline void bio_integrity_advance(struct bio *bio,
640 unsigned int bytes_done) 701 unsigned int bytes_done)
641{ 702{
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ab0e9b2025b3..161b23105b1e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -113,7 +113,6 @@ enum {
113}; 113};
114 114
115struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); 115struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *);
116void blk_mq_free_queue(struct request_queue *);
117int blk_mq_register_disk(struct gendisk *); 116int blk_mq_register_disk(struct gendisk *);
118void blk_mq_unregister_disk(struct gendisk *); 117void blk_mq_unregister_disk(struct gendisk *);
119void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); 118void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
@@ -159,16 +158,16 @@ static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx,
159} 158}
160 159
161#define queue_for_each_hw_ctx(q, hctx, i) \ 160#define queue_for_each_hw_ctx(q, hctx, i) \
162 for ((i) = 0, hctx = (q)->queue_hw_ctx[0]; \ 161 for ((i) = 0; (i) < (q)->nr_hw_queues && \
163 (i) < (q)->nr_hw_queues; (i)++, hctx = (q)->queue_hw_ctx[i]) 162 ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
164 163
165#define queue_for_each_ctx(q, ctx, i) \ 164#define queue_for_each_ctx(q, ctx, i) \
166 for ((i) = 0, ctx = per_cpu_ptr((q)->queue_ctx, 0); \ 165 for ((i) = 0; (i) < (q)->nr_queues && \
167 (i) < (q)->nr_queues; (i)++, ctx = per_cpu_ptr(q->queue_ctx, (i))) 166 ({ ctx = per_cpu_ptr((q)->queue_ctx, (i)); 1; }); (i)++)
168 167
169#define hctx_for_each_ctx(hctx, ctx, i) \ 168#define hctx_for_each_ctx(hctx, ctx, i) \
170 for ((i) = 0, ctx = (hctx)->ctxs[0]; \ 169 for ((i) = 0; (i) < (hctx)->nr_ctx && \
171 (i) < (hctx)->nr_ctx; (i)++, ctx = (hctx)->ctxs[(i)]) 170 ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
172 171
173#define blk_ctx_sum(q, sum) \ 172#define blk_ctx_sum(q, sum) \
174({ \ 173({ \
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 238ef0ed62f8..bbc3a6c88fce 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -28,13 +28,22 @@ struct bio_vec {
28 unsigned int bv_offset; 28 unsigned int bv_offset;
29}; 29};
30 30
31struct bvec_iter {
32 sector_t bi_sector; /* device address in 512 byte
33 sectors */
34 unsigned int bi_size; /* residual I/O count */
35
36 unsigned int bi_idx; /* current index into bvl_vec */
37
38 unsigned int bi_bvec_done; /* number of bytes completed in
39 current bvec */
40};
41
31/* 42/*
32 * main unit of I/O for the block layer and lower layers (ie drivers and 43 * main unit of I/O for the block layer and lower layers (ie drivers and
33 * stacking drivers) 44 * stacking drivers)
34 */ 45 */
35struct bio { 46struct bio {
36 sector_t bi_sector; /* device address in 512 byte
37 sectors */
38 struct bio *bi_next; /* request queue link */ 47 struct bio *bi_next; /* request queue link */
39 struct block_device *bi_bdev; 48 struct block_device *bi_bdev;
40 unsigned long bi_flags; /* status, command, etc */ 49 unsigned long bi_flags; /* status, command, etc */
@@ -42,16 +51,13 @@ struct bio {
42 * top bits priority 51 * top bits priority
43 */ 52 */
44 53
45 unsigned short bi_vcnt; /* how many bio_vec's */ 54 struct bvec_iter bi_iter;
46 unsigned short bi_idx; /* current index into bvl_vec */
47 55
48 /* Number of segments in this BIO after 56 /* Number of segments in this BIO after
49 * physical address coalescing is performed. 57 * physical address coalescing is performed.
50 */ 58 */
51 unsigned int bi_phys_segments; 59 unsigned int bi_phys_segments;
52 60
53 unsigned int bi_size; /* residual I/O count */
54
55 /* 61 /*
56 * To keep track of the max segment size, we account for the 62 * To keep track of the max segment size, we account for the
57 * sizes of the first and last mergeable segments in this bio. 63 * sizes of the first and last mergeable segments in this bio.
@@ -59,6 +65,8 @@ struct bio {
59 unsigned int bi_seg_front_size; 65 unsigned int bi_seg_front_size;
60 unsigned int bi_seg_back_size; 66 unsigned int bi_seg_back_size;
61 67
68 atomic_t bi_remaining;
69
62 bio_end_io_t *bi_end_io; 70 bio_end_io_t *bi_end_io;
63 71
64 void *bi_private; 72 void *bi_private;
@@ -74,11 +82,13 @@ struct bio {
74 struct bio_integrity_payload *bi_integrity; /* data integrity */ 82 struct bio_integrity_payload *bi_integrity; /* data integrity */
75#endif 83#endif
76 84
85 unsigned short bi_vcnt; /* how many bio_vec's */
86
77 /* 87 /*
78 * Everything starting with bi_max_vecs will be preserved by bio_reset() 88 * Everything starting with bi_max_vecs will be preserved by bio_reset()
79 */ 89 */
80 90
81 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ 91 unsigned short bi_max_vecs; /* max bvl_vecs we can hold */
82 92
83 atomic_t bi_cnt; /* pin count */ 93 atomic_t bi_cnt; /* pin count */
84 94
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1b135d49b279..0375654adb28 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -291,6 +291,7 @@ struct queue_limits {
291 unsigned char discard_misaligned; 291 unsigned char discard_misaligned;
292 unsigned char cluster; 292 unsigned char cluster;
293 unsigned char discard_zeroes_data; 293 unsigned char discard_zeroes_data;
294 unsigned char raid_partial_stripes_expensive;
294}; 295};
295 296
296struct request_queue { 297struct request_queue {
@@ -735,7 +736,7 @@ struct rq_map_data {
735}; 736};
736 737
737struct req_iterator { 738struct req_iterator {
738 int i; 739 struct bvec_iter iter;
739 struct bio *bio; 740 struct bio *bio;
740}; 741};
741 742
@@ -748,10 +749,11 @@ struct req_iterator {
748 749
749#define rq_for_each_segment(bvl, _rq, _iter) \ 750#define rq_for_each_segment(bvl, _rq, _iter) \
750 __rq_for_each_bio(_iter.bio, _rq) \ 751 __rq_for_each_bio(_iter.bio, _rq) \
751 bio_for_each_segment(bvl, _iter.bio, _iter.i) 752 bio_for_each_segment(bvl, _iter.bio, _iter.iter)
752 753
753#define rq_iter_last(rq, _iter) \ 754#define rq_iter_last(bvec, _iter) \
754 (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) 755 (_iter.bio->bi_next == NULL && \
756 bio_iter_last(bvec, _iter.iter))
755 757
756#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 758#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
757# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" 759# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 20ee8b63a968..d21f2dba0731 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -1,6 +1,7 @@
1#ifndef __FS_CEPH_MESSENGER_H 1#ifndef __FS_CEPH_MESSENGER_H
2#define __FS_CEPH_MESSENGER_H 2#define __FS_CEPH_MESSENGER_H
3 3
4#include <linux/blk_types.h>
4#include <linux/kref.h> 5#include <linux/kref.h>
5#include <linux/mutex.h> 6#include <linux/mutex.h>
6#include <linux/net.h> 7#include <linux/net.h>
@@ -119,8 +120,7 @@ struct ceph_msg_data_cursor {
119#ifdef CONFIG_BLOCK 120#ifdef CONFIG_BLOCK
120 struct { /* bio */ 121 struct { /* bio */
121 struct bio *bio; /* bio from list */ 122 struct bio *bio; /* bio from list */
122 unsigned int vector_index; /* vector from bio */ 123 struct bvec_iter bvec_iter;
123 unsigned int vector_offset; /* bytes from vector */
124 }; 124 };
125#endif /* CONFIG_BLOCK */ 125#endif /* CONFIG_BLOCK */
126 struct { /* pages */ 126 struct { /* pages */
diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h
index a0f9280421ec..2e6dce6e5c2a 100644
--- a/include/linux/cmdline-parser.h
+++ b/include/linux/cmdline-parser.h
@@ -37,9 +37,9 @@ int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline);
37struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, 37struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
38 const char *bdev); 38 const char *bdev);
39 39
40void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, 40int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
41 int slot, 41 int slot,
42 int (*add_part)(int, struct cmdline_subpart *, void *), 42 int (*add_part)(int, struct cmdline_subpart *, void *),
43 void *param); 43 void *param);
44 44
45#endif /* CMDLINEPARSEH */ 45#endif /* CMDLINEPARSEH */
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index f4b0aa3126f5..a68cbe59e6ad 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -29,7 +29,7 @@ typedef void (*io_notify_fn)(unsigned long error, void *context);
29 29
30enum dm_io_mem_type { 30enum dm_io_mem_type {
31 DM_IO_PAGE_LIST,/* Page list */ 31 DM_IO_PAGE_LIST,/* Page list */
32 DM_IO_BVEC, /* Bio vector */ 32 DM_IO_BIO, /* Bio vector */
33 DM_IO_VMA, /* Virtual memory area */ 33 DM_IO_VMA, /* Virtual memory area */
34 DM_IO_KMEM, /* Kernel memory */ 34 DM_IO_KMEM, /* Kernel memory */
35}; 35};
@@ -41,7 +41,7 @@ struct dm_io_memory {
41 41
42 union { 42 union {
43 struct page_list *pl; 43 struct page_list *pl;
44 struct bio_vec *bvec; 44 struct bio *bio;
45 void *vma; 45 void *vma;
46 void *addr; 46 void *addr;
47 } ptr; 47 } ptr;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6eecfc2e4f98..04e763221246 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -368,7 +368,7 @@ struct svc_program {
368 struct svc_program * pg_next; /* other programs (same xprt) */ 368 struct svc_program * pg_next; /* other programs (same xprt) */
369 u32 pg_prog; /* program number */ 369 u32 pg_prog; /* program number */
370 unsigned int pg_lovers; /* lowest version */ 370 unsigned int pg_lovers; /* lowest version */
371 unsigned int pg_hivers; /* lowest version */ 371 unsigned int pg_hivers; /* highest version */
372 unsigned int pg_nvers; /* number of versions */ 372 unsigned int pg_nvers; /* number of versions */
373 struct svc_version ** pg_vers; /* version array */ 373 struct svc_version ** pg_vers; /* version array */
374 char * pg_name; /* service name */ 374 char * pg_name; /* service name */
@@ -386,8 +386,10 @@ struct svc_version {
386 struct svc_procedure * vs_proc; /* per-procedure info */ 386 struct svc_procedure * vs_proc; /* per-procedure info */
387 u32 vs_xdrsize; /* xdrsize needed for this version */ 387 u32 vs_xdrsize; /* xdrsize needed for this version */
388 388
389 unsigned int vs_hidden : 1; /* Don't register with portmapper. 389 unsigned int vs_hidden : 1, /* Don't register with portmapper.
390 * Only used for nfsacl so far. */ 390 * Only used for nfsacl so far. */
391 vs_rpcb_optnl:1;/* Don't care the result of register.
392 * Only used for nfsv4. */
391 393
392 /* Override dispatch function (e.g. when caching replies). 394 /* Override dispatch function (e.g. when caching replies).
393 * A return value of 0 means drop the request. 395 * A return value of 0 means drop the request.
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index e2b9576d00e2..7110897c3dfa 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -24,10 +24,10 @@ DECLARE_EVENT_CLASS(bcache_request,
24 __entry->dev = bio->bi_bdev->bd_dev; 24 __entry->dev = bio->bi_bdev->bd_dev;
25 __entry->orig_major = d->disk->major; 25 __entry->orig_major = d->disk->major;
26 __entry->orig_minor = d->disk->first_minor; 26 __entry->orig_minor = d->disk->first_minor;
27 __entry->sector = bio->bi_sector; 27 __entry->sector = bio->bi_iter.bi_sector;
28 __entry->orig_sector = bio->bi_sector - 16; 28 __entry->orig_sector = bio->bi_iter.bi_sector - 16;
29 __entry->nr_sector = bio->bi_size >> 9; 29 __entry->nr_sector = bio->bi_iter.bi_size >> 9;
30 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 30 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
31 ), 31 ),
32 32
33 TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)", 33 TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
@@ -99,9 +99,9 @@ DECLARE_EVENT_CLASS(bcache_bio,
99 99
100 TP_fast_assign( 100 TP_fast_assign(
101 __entry->dev = bio->bi_bdev->bd_dev; 101 __entry->dev = bio->bi_bdev->bd_dev;
102 __entry->sector = bio->bi_sector; 102 __entry->sector = bio->bi_iter.bi_sector;
103 __entry->nr_sector = bio->bi_size >> 9; 103 __entry->nr_sector = bio->bi_iter.bi_size >> 9;
104 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 104 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
105 ), 105 ),
106 106
107 TP_printk("%d,%d %s %llu + %u", 107 TP_printk("%d,%d %s %llu + %u",
@@ -134,9 +134,9 @@ TRACE_EVENT(bcache_read,
134 134
135 TP_fast_assign( 135 TP_fast_assign(
136 __entry->dev = bio->bi_bdev->bd_dev; 136 __entry->dev = bio->bi_bdev->bd_dev;
137 __entry->sector = bio->bi_sector; 137 __entry->sector = bio->bi_iter.bi_sector;
138 __entry->nr_sector = bio->bi_size >> 9; 138 __entry->nr_sector = bio->bi_iter.bi_size >> 9;
139 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 139 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
140 __entry->cache_hit = hit; 140 __entry->cache_hit = hit;
141 __entry->bypass = bypass; 141 __entry->bypass = bypass;
142 ), 142 ),
@@ -162,9 +162,9 @@ TRACE_EVENT(bcache_write,
162 162
163 TP_fast_assign( 163 TP_fast_assign(
164 __entry->dev = bio->bi_bdev->bd_dev; 164 __entry->dev = bio->bi_bdev->bd_dev;
165 __entry->sector = bio->bi_sector; 165 __entry->sector = bio->bi_iter.bi_sector;
166 __entry->nr_sector = bio->bi_size >> 9; 166 __entry->nr_sector = bio->bi_iter.bi_size >> 9;
167 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 167 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
168 __entry->writeback = writeback; 168 __entry->writeback = writeback;
169 __entry->bypass = bypass; 169 __entry->bypass = bypass;
170 ), 170 ),
@@ -247,7 +247,7 @@ TRACE_EVENT(bcache_btree_write,
247 TP_fast_assign( 247 TP_fast_assign(
248 __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); 248 __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
249 __entry->block = b->written; 249 __entry->block = b->written;
250 __entry->keys = b->sets[b->nsets].data->keys; 250 __entry->keys = b->keys.set[b->keys.nsets].data->keys;
251 ), 251 ),
252 252
253 TP_printk("bucket %zu", __entry->bucket) 253 TP_printk("bucket %zu", __entry->bucket)
@@ -411,7 +411,7 @@ TRACE_EVENT(bcache_alloc_invalidate,
411 ), 411 ),
412 412
413 TP_fast_assign( 413 TP_fast_assign(
414 __entry->free = fifo_used(&ca->free); 414 __entry->free = fifo_used(&ca->free[RESERVE_NONE]);
415 __entry->free_inc = fifo_used(&ca->free_inc); 415 __entry->free_inc = fifo_used(&ca->free_inc);
416 __entry->free_inc_size = ca->free_inc.size; 416 __entry->free_inc_size = ca->free_inc.size;
417 __entry->unused = fifo_used(&ca->unused); 417 __entry->unused = fifo_used(&ca->unused);
@@ -422,8 +422,8 @@ TRACE_EVENT(bcache_alloc_invalidate,
422); 422);
423 423
424TRACE_EVENT(bcache_alloc_fail, 424TRACE_EVENT(bcache_alloc_fail,
425 TP_PROTO(struct cache *ca), 425 TP_PROTO(struct cache *ca, unsigned reserve),
426 TP_ARGS(ca), 426 TP_ARGS(ca, reserve),
427 427
428 TP_STRUCT__entry( 428 TP_STRUCT__entry(
429 __field(unsigned, free ) 429 __field(unsigned, free )
@@ -433,7 +433,7 @@ TRACE_EVENT(bcache_alloc_fail,
433 ), 433 ),
434 434
435 TP_fast_assign( 435 TP_fast_assign(
436 __entry->free = fifo_used(&ca->free); 436 __entry->free = fifo_used(&ca->free[reserve]);
437 __entry->free_inc = fifo_used(&ca->free_inc); 437 __entry->free_inc = fifo_used(&ca->free_inc);
438 __entry->unused = fifo_used(&ca->unused); 438 __entry->unused = fifo_used(&ca->unused);
439 __entry->blocked = atomic_read(&ca->set->prio_blocked); 439 __entry->blocked = atomic_read(&ca->set->prio_blocked);
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 4c2301d2ef1a..e76ae19a8d6f 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -243,9 +243,9 @@ TRACE_EVENT(block_bio_bounce,
243 TP_fast_assign( 243 TP_fast_assign(
244 __entry->dev = bio->bi_bdev ? 244 __entry->dev = bio->bi_bdev ?
245 bio->bi_bdev->bd_dev : 0; 245 bio->bi_bdev->bd_dev : 0;
246 __entry->sector = bio->bi_sector; 246 __entry->sector = bio->bi_iter.bi_sector;
247 __entry->nr_sector = bio_sectors(bio); 247 __entry->nr_sector = bio_sectors(bio);
248 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 248 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
249 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 249 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
250 ), 250 ),
251 251
@@ -280,10 +280,10 @@ TRACE_EVENT(block_bio_complete,
280 280
281 TP_fast_assign( 281 TP_fast_assign(
282 __entry->dev = bio->bi_bdev->bd_dev; 282 __entry->dev = bio->bi_bdev->bd_dev;
283 __entry->sector = bio->bi_sector; 283 __entry->sector = bio->bi_iter.bi_sector;
284 __entry->nr_sector = bio_sectors(bio); 284 __entry->nr_sector = bio_sectors(bio);
285 __entry->error = error; 285 __entry->error = error;
286 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 286 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
287 ), 287 ),
288 288
289 TP_printk("%d,%d %s %llu + %u [%d]", 289 TP_printk("%d,%d %s %llu + %u [%d]",
@@ -308,9 +308,9 @@ DECLARE_EVENT_CLASS(block_bio_merge,
308 308
309 TP_fast_assign( 309 TP_fast_assign(
310 __entry->dev = bio->bi_bdev->bd_dev; 310 __entry->dev = bio->bi_bdev->bd_dev;
311 __entry->sector = bio->bi_sector; 311 __entry->sector = bio->bi_iter.bi_sector;
312 __entry->nr_sector = bio_sectors(bio); 312 __entry->nr_sector = bio_sectors(bio);
313 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 313 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
314 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 314 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
315 ), 315 ),
316 316
@@ -375,9 +375,9 @@ TRACE_EVENT(block_bio_queue,
375 375
376 TP_fast_assign( 376 TP_fast_assign(
377 __entry->dev = bio->bi_bdev->bd_dev; 377 __entry->dev = bio->bi_bdev->bd_dev;
378 __entry->sector = bio->bi_sector; 378 __entry->sector = bio->bi_iter.bi_sector;
379 __entry->nr_sector = bio_sectors(bio); 379 __entry->nr_sector = bio_sectors(bio);
380 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 380 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
381 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 381 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
382 ), 382 ),
383 383
@@ -403,7 +403,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
403 403
404 TP_fast_assign( 404 TP_fast_assign(
405 __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; 405 __entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
406 __entry->sector = bio ? bio->bi_sector : 0; 406 __entry->sector = bio ? bio->bi_iter.bi_sector : 0;
407 __entry->nr_sector = bio ? bio_sectors(bio) : 0; 407 __entry->nr_sector = bio ? bio_sectors(bio) : 0;
408 blk_fill_rwbs(__entry->rwbs, 408 blk_fill_rwbs(__entry->rwbs,
409 bio ? bio->bi_rw : 0, __entry->nr_sector); 409 bio ? bio->bi_rw : 0, __entry->nr_sector);
@@ -538,9 +538,9 @@ TRACE_EVENT(block_split,
538 538
539 TP_fast_assign( 539 TP_fast_assign(
540 __entry->dev = bio->bi_bdev->bd_dev; 540 __entry->dev = bio->bi_bdev->bd_dev;
541 __entry->sector = bio->bi_sector; 541 __entry->sector = bio->bi_iter.bi_sector;
542 __entry->new_sector = new_sector; 542 __entry->new_sector = new_sector;
543 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 543 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
544 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 544 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
545 ), 545 ),
546 546
@@ -579,11 +579,11 @@ TRACE_EVENT(block_bio_remap,
579 579
580 TP_fast_assign( 580 TP_fast_assign(
581 __entry->dev = bio->bi_bdev->bd_dev; 581 __entry->dev = bio->bi_bdev->bd_dev;
582 __entry->sector = bio->bi_sector; 582 __entry->sector = bio->bi_iter.bi_sector;
583 __entry->nr_sector = bio_sectors(bio); 583 __entry->nr_sector = bio_sectors(bio);
584 __entry->old_dev = dev; 584 __entry->old_dev = dev;
585 __entry->old_sector = from; 585 __entry->old_sector = from;
586 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 586 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
587 ), 587 ),
588 588
589 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", 589 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 3b9f28dfc849..67f38faac589 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -629,8 +629,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
629 __entry->dev = sb->s_dev; 629 __entry->dev = sb->s_dev;
630 __entry->rw = rw; 630 __entry->rw = rw;
631 __entry->type = type; 631 __entry->type = type;
632 __entry->sector = bio->bi_sector; 632 __entry->sector = bio->bi_iter.bi_sector;
633 __entry->size = bio->bi_size; 633 __entry->size = bio->bi_iter.bi_size;
634 ), 634 ),
635 635
636 TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u", 636 TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u",
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 164a7e263988..22b6ad31c706 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -39,6 +39,7 @@ static inline void SET_##name(struct bkey *k, unsigned i, __u64 v) \
39} 39}
40 40
41#define KEY_SIZE_BITS 16 41#define KEY_SIZE_BITS 16
42#define KEY_MAX_U64S 8
42 43
43KEY_FIELD(KEY_PTRS, high, 60, 3) 44KEY_FIELD(KEY_PTRS, high, 60, 3)
44KEY_FIELD(HEADER_SIZE, high, 58, 2) 45KEY_FIELD(HEADER_SIZE, high, 58, 2)
@@ -118,7 +119,7 @@ static inline struct bkey *bkey_next(const struct bkey *k)
118 return (struct bkey *) (d + bkey_u64s(k)); 119 return (struct bkey *) (d + bkey_u64s(k));
119} 120}
120 121
121static inline struct bkey *bkey_last(const struct bkey *k, unsigned nr_keys) 122static inline struct bkey *bkey_idx(const struct bkey *k, unsigned nr_keys)
122{ 123{
123 __u64 *d = (void *) k; 124 __u64 *d = (void *) k;
124 return (struct bkey *) (d + nr_keys); 125 return (struct bkey *) (d + nr_keys);
diff --git a/include/uapi/linux/fd.h b/include/uapi/linux/fd.h
index f1f3dd5981b2..84c517cbce90 100644
--- a/include/uapi/linux/fd.h
+++ b/include/uapi/linux/fd.h
@@ -185,7 +185,8 @@ enum {
185 * to clear media change status */ 185 * to clear media change status */
186 FD_UNUSED_BIT, 186 FD_UNUSED_BIT,
187 FD_DISK_CHANGED_BIT, /* disk has been changed since last i/o */ 187 FD_DISK_CHANGED_BIT, /* disk has been changed since last i/o */
188 FD_DISK_WRITABLE_BIT /* disk is writable */ 188 FD_DISK_WRITABLE_BIT, /* disk is writable */
189 FD_OPEN_SHOULD_FAIL_BIT
189}; 190};
190 191
191#define FDSETDRVPRM _IOW(2, 0x90, struct floppy_drive_params) 192#define FDSETDRVPRM _IOW(2, 0x90, struct floppy_drive_params)
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
index d09dd10c5a5e..9a58bc258810 100644
--- a/kernel/power/block_io.c
+++ b/kernel/power/block_io.c
@@ -32,7 +32,7 @@ static int submit(int rw, struct block_device *bdev, sector_t sector,
32 struct bio *bio; 32 struct bio *bio;
33 33
34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
35 bio->bi_sector = sector; 35 bio->bi_iter.bi_sector = sector;
36 bio->bi_bdev = bdev; 36 bio->bi_bdev = bdev;
37 bio->bi_end_io = end_swap_bio_read; 37 bio->bi_end_io = end_swap_bio_read;
38 38
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index f785aef65799..b418cb0d7242 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -781,8 +781,8 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
781 if (!error && !bio_flagged(bio, BIO_UPTODATE)) 781 if (!error && !bio_flagged(bio, BIO_UPTODATE))
782 error = EIO; 782 error = EIO;
783 783
784 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, 784 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
785 error, 0, NULL); 785 bio->bi_rw, what, error, 0, NULL);
786} 786}
787 787
788static void blk_add_trace_bio_bounce(void *ignore, 788static void blk_add_trace_bio_bounce(void *ignore,
@@ -885,8 +885,9 @@ static void blk_add_trace_split(void *ignore,
885 if (bt) { 885 if (bt) {
886 __be64 rpdu = cpu_to_be64(pdu); 886 __be64 rpdu = cpu_to_be64(pdu);
887 887
888 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, 888 __blk_add_trace(bt, bio->bi_iter.bi_sector,
889 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), 889 bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT,
890 !bio_flagged(bio, BIO_UPTODATE),
890 sizeof(rpdu), &rpdu); 891 sizeof(rpdu), &rpdu);
891 } 892 }
892} 893}
@@ -918,9 +919,9 @@ static void blk_add_trace_bio_remap(void *ignore,
918 r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); 919 r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev);
919 r.sector_from = cpu_to_be64(from); 920 r.sector_from = cpu_to_be64(from);
920 921
921 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, 922 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
922 BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), 923 bio->bi_rw, BLK_TA_REMAP,
923 sizeof(r), &r); 924 !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
924} 925}
925 926
926/** 927/**
diff --git a/mm/bounce.c b/mm/bounce.c
index 5a7d58fb883b..523918b8c6dc 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -98,27 +98,24 @@ int init_emergency_isa_pool(void)
98static void copy_to_high_bio_irq(struct bio *to, struct bio *from) 98static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
99{ 99{
100 unsigned char *vfrom; 100 unsigned char *vfrom;
101 struct bio_vec *tovec, *fromvec; 101 struct bio_vec tovec, *fromvec = from->bi_io_vec;
102 int i; 102 struct bvec_iter iter;
103 103
104 bio_for_each_segment(tovec, to, i) { 104 bio_for_each_segment(tovec, to, iter) {
105 fromvec = from->bi_io_vec + i; 105 if (tovec.bv_page != fromvec->bv_page) {
106 106 /*
107 /* 107 * fromvec->bv_offset and fromvec->bv_len might have
108 * not bounced 108 * been modified by the block layer, so use the original
109 */ 109 * copy, bounce_copy_vec already uses tovec->bv_len
110 if (tovec->bv_page == fromvec->bv_page) 110 */
111 continue; 111 vfrom = page_address(fromvec->bv_page) +
112 112 tovec.bv_offset;
113 /* 113
114 * fromvec->bv_offset and fromvec->bv_len might have been 114 bounce_copy_vec(&tovec, vfrom);
115 * modified by the block layer, so use the original copy, 115 flush_dcache_page(tovec.bv_page);
116 * bounce_copy_vec already uses tovec->bv_len 116 }
117 */
118 vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
119 117
120 bounce_copy_vec(tovec, vfrom); 118 fromvec++;
121 flush_dcache_page(tovec->bv_page);
122 } 119 }
123} 120}
124 121
@@ -201,13 +198,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
201{ 198{
202 struct bio *bio; 199 struct bio *bio;
203 int rw = bio_data_dir(*bio_orig); 200 int rw = bio_data_dir(*bio_orig);
204 struct bio_vec *to, *from; 201 struct bio_vec *to, from;
202 struct bvec_iter iter;
205 unsigned i; 203 unsigned i;
206 204
207 if (force) 205 if (force)
208 goto bounce; 206 goto bounce;
209 bio_for_each_segment(from, *bio_orig, i) 207 bio_for_each_segment(from, *bio_orig, iter)
210 if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q)) 208 if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
211 goto bounce; 209 goto bounce;
212 210
213 return; 211 return;
diff --git a/mm/page_io.c b/mm/page_io.c
index 7247be6114ac..7c59ef681381 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -31,13 +31,13 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
31 31
32 bio = bio_alloc(gfp_flags, 1); 32 bio = bio_alloc(gfp_flags, 1);
33 if (bio) { 33 if (bio) {
34 bio->bi_sector = map_swap_page(page, &bio->bi_bdev); 34 bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev);
35 bio->bi_sector <<= PAGE_SHIFT - 9; 35 bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
36 bio->bi_io_vec[0].bv_page = page; 36 bio->bi_io_vec[0].bv_page = page;
37 bio->bi_io_vec[0].bv_len = PAGE_SIZE; 37 bio->bi_io_vec[0].bv_len = PAGE_SIZE;
38 bio->bi_io_vec[0].bv_offset = 0; 38 bio->bi_io_vec[0].bv_offset = 0;
39 bio->bi_vcnt = 1; 39 bio->bi_vcnt = 1;
40 bio->bi_size = PAGE_SIZE; 40 bio->bi_iter.bi_size = PAGE_SIZE;
41 bio->bi_end_io = end_io; 41 bio->bi_end_io = end_io;
42 } 42 }
43 return bio; 43 return bio;
@@ -62,7 +62,7 @@ void end_swap_bio_write(struct bio *bio, int err)
62 printk(KERN_ALERT "Write-error on swap-device (%u:%u:%Lu)\n", 62 printk(KERN_ALERT "Write-error on swap-device (%u:%u:%Lu)\n",
63 imajor(bio->bi_bdev->bd_inode), 63 imajor(bio->bi_bdev->bd_inode),
64 iminor(bio->bi_bdev->bd_inode), 64 iminor(bio->bi_bdev->bd_inode),
65 (unsigned long long)bio->bi_sector); 65 (unsigned long long)bio->bi_iter.bi_sector);
66 ClearPageReclaim(page); 66 ClearPageReclaim(page);
67 } 67 }
68 end_page_writeback(page); 68 end_page_writeback(page);
@@ -80,7 +80,7 @@ void end_swap_bio_read(struct bio *bio, int err)
80 printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", 80 printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
81 imajor(bio->bi_bdev->bd_inode), 81 imajor(bio->bi_bdev->bd_inode),
82 iminor(bio->bi_bdev->bd_inode), 82 iminor(bio->bi_bdev->bd_inode),
83 (unsigned long long)bio->bi_sector); 83 (unsigned long long)bio->bi_iter.bi_sector);
84 goto out; 84 goto out;
85 } 85 }
86 86
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2ed1304d22a7..0e478a0f4204 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -778,13 +778,12 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
778 778
779 bio = data->bio; 779 bio = data->bio;
780 BUG_ON(!bio); 780 BUG_ON(!bio);
781 BUG_ON(!bio->bi_vcnt);
782 781
783 cursor->resid = min(length, data->bio_length); 782 cursor->resid = min(length, data->bio_length);
784 cursor->bio = bio; 783 cursor->bio = bio;
785 cursor->vector_index = 0; 784 cursor->bvec_iter = bio->bi_iter;
786 cursor->vector_offset = 0; 785 cursor->last_piece =
787 cursor->last_piece = length <= bio->bi_io_vec[0].bv_len; 786 cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
788} 787}
789 788
790static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, 789static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
@@ -793,71 +792,63 @@ static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
793{ 792{
794 struct ceph_msg_data *data = cursor->data; 793 struct ceph_msg_data *data = cursor->data;
795 struct bio *bio; 794 struct bio *bio;
796 struct bio_vec *bio_vec; 795 struct bio_vec bio_vec;
797 unsigned int index;
798 796
799 BUG_ON(data->type != CEPH_MSG_DATA_BIO); 797 BUG_ON(data->type != CEPH_MSG_DATA_BIO);
800 798
801 bio = cursor->bio; 799 bio = cursor->bio;
802 BUG_ON(!bio); 800 BUG_ON(!bio);
803 801
804 index = cursor->vector_index; 802 bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
805 BUG_ON(index >= (unsigned int) bio->bi_vcnt);
806 803
807 bio_vec = &bio->bi_io_vec[index]; 804 *page_offset = (size_t) bio_vec.bv_offset;
808 BUG_ON(cursor->vector_offset >= bio_vec->bv_len);
809 *page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset);
810 BUG_ON(*page_offset >= PAGE_SIZE); 805 BUG_ON(*page_offset >= PAGE_SIZE);
811 if (cursor->last_piece) /* pagelist offset is always 0 */ 806 if (cursor->last_piece) /* pagelist offset is always 0 */
812 *length = cursor->resid; 807 *length = cursor->resid;
813 else 808 else
814 *length = (size_t) (bio_vec->bv_len - cursor->vector_offset); 809 *length = (size_t) bio_vec.bv_len;
815 BUG_ON(*length > cursor->resid); 810 BUG_ON(*length > cursor->resid);
816 BUG_ON(*page_offset + *length > PAGE_SIZE); 811 BUG_ON(*page_offset + *length > PAGE_SIZE);
817 812
818 return bio_vec->bv_page; 813 return bio_vec.bv_page;
819} 814}
820 815
821static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, 816static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
822 size_t bytes) 817 size_t bytes)
823{ 818{
824 struct bio *bio; 819 struct bio *bio;
825 struct bio_vec *bio_vec; 820 struct bio_vec bio_vec;
826 unsigned int index;
827 821
828 BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO); 822 BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
829 823
830 bio = cursor->bio; 824 bio = cursor->bio;
831 BUG_ON(!bio); 825 BUG_ON(!bio);
832 826
833 index = cursor->vector_index; 827 bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
834 BUG_ON(index >= (unsigned int) bio->bi_vcnt);
835 bio_vec = &bio->bi_io_vec[index];
836 828
837 /* Advance the cursor offset */ 829 /* Advance the cursor offset */
838 830
839 BUG_ON(cursor->resid < bytes); 831 BUG_ON(cursor->resid < bytes);
840 cursor->resid -= bytes; 832 cursor->resid -= bytes;
841 cursor->vector_offset += bytes; 833
842 if (cursor->vector_offset < bio_vec->bv_len) 834 bio_advance_iter(bio, &cursor->bvec_iter, bytes);
835
836 if (bytes < bio_vec.bv_len)
843 return false; /* more bytes to process in this segment */ 837 return false; /* more bytes to process in this segment */
844 BUG_ON(cursor->vector_offset != bio_vec->bv_len);
845 838
846 /* Move on to the next segment, and possibly the next bio */ 839 /* Move on to the next segment, and possibly the next bio */
847 840
848 if (++index == (unsigned int) bio->bi_vcnt) { 841 if (!cursor->bvec_iter.bi_size) {
849 bio = bio->bi_next; 842 bio = bio->bi_next;
850 index = 0; 843 cursor->bvec_iter = bio->bi_iter;
851 } 844 }
852 cursor->bio = bio; 845 cursor->bio = bio;
853 cursor->vector_index = index;
854 cursor->vector_offset = 0;
855 846
856 if (!cursor->last_piece) { 847 if (!cursor->last_piece) {
857 BUG_ON(!cursor->resid); 848 BUG_ON(!cursor->resid);
858 BUG_ON(!bio); 849 BUG_ON(!bio);
859 /* A short read is OK, so use <= rather than == */ 850 /* A short read is OK, so use <= rather than == */
860 if (cursor->resid <= bio->bi_io_vec[index].bv_len) 851 if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
861 cursor->last_piece = true; 852 cursor->last_piece = true;
862 } 853 }
863 854
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f9ae9b85d4c1..453e974287d1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1021,8 +1021,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
1021 IEEE80211_P2P_OPPPS_ENABLE_BIT; 1021 IEEE80211_P2P_OPPPS_ENABLE_BIT;
1022 1022
1023 err = ieee80211_assign_beacon(sdata, &params->beacon); 1023 err = ieee80211_assign_beacon(sdata, &params->beacon);
1024 if (err < 0) 1024 if (err < 0) {
1025 ieee80211_vif_release_channel(sdata);
1025 return err; 1026 return err;
1027 }
1026 changed |= err; 1028 changed |= err;
1027 1029
1028 err = drv_start_ap(sdata->local, sdata); 1030 err = drv_start_ap(sdata->local, sdata);
@@ -1032,6 +1034,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
1032 if (old) 1034 if (old)
1033 kfree_rcu(old, rcu_head); 1035 kfree_rcu(old, rcu_head);
1034 RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); 1036 RCU_INIT_POINTER(sdata->u.ap.beacon, NULL);
1037 ieee80211_vif_release_channel(sdata);
1035 return err; 1038 return err;
1036 } 1039 }
1037 1040
@@ -1090,8 +1093,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
1090 kfree(sdata->u.ap.next_beacon); 1093 kfree(sdata->u.ap.next_beacon);
1091 sdata->u.ap.next_beacon = NULL; 1094 sdata->u.ap.next_beacon = NULL;
1092 1095
1093 cancel_work_sync(&sdata->u.ap.request_smps_work);
1094
1095 /* turn off carrier for this interface and dependent VLANs */ 1096 /* turn off carrier for this interface and dependent VLANs */
1096 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) 1097 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
1097 netif_carrier_off(vlan->dev); 1098 netif_carrier_off(vlan->dev);
@@ -1103,6 +1104,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
1103 kfree_rcu(old_beacon, rcu_head); 1104 kfree_rcu(old_beacon, rcu_head);
1104 if (old_probe_resp) 1105 if (old_probe_resp)
1105 kfree_rcu(old_probe_resp, rcu_head); 1106 kfree_rcu(old_probe_resp, rcu_head);
1107 sdata->u.ap.driver_smps_mode = IEEE80211_SMPS_OFF;
1106 1108
1107 __sta_info_flush(sdata, true); 1109 __sta_info_flush(sdata, true);
1108 ieee80211_free_keys(sdata, true); 1110 ieee80211_free_keys(sdata, true);
@@ -2638,6 +2640,24 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2638 INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); 2640 INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
2639 INIT_LIST_HEAD(&roc->dependents); 2641 INIT_LIST_HEAD(&roc->dependents);
2640 2642
2643 /*
2644 * cookie is either the roc cookie (for normal roc)
2645 * or the SKB (for mgmt TX)
2646 */
2647 if (!txskb) {
2648 /* local->mtx protects this */
2649 local->roc_cookie_counter++;
2650 roc->cookie = local->roc_cookie_counter;
2651 /* wow, you wrapped 64 bits ... more likely a bug */
2652 if (WARN_ON(roc->cookie == 0)) {
2653 roc->cookie = 1;
2654 local->roc_cookie_counter++;
2655 }
2656 *cookie = roc->cookie;
2657 } else {
2658 *cookie = (unsigned long)txskb;
2659 }
2660
2641 /* if there's one pending or we're scanning, queue this one */ 2661 /* if there's one pending or we're scanning, queue this one */
2642 if (!list_empty(&local->roc_list) || 2662 if (!list_empty(&local->roc_list) ||
2643 local->scanning || local->radar_detect_enabled) 2663 local->scanning || local->radar_detect_enabled)
@@ -2772,24 +2792,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2772 if (!queued) 2792 if (!queued)
2773 list_add_tail(&roc->list, &local->roc_list); 2793 list_add_tail(&roc->list, &local->roc_list);
2774 2794
2775 /*
2776 * cookie is either the roc cookie (for normal roc)
2777 * or the SKB (for mgmt TX)
2778 */
2779 if (!txskb) {
2780 /* local->mtx protects this */
2781 local->roc_cookie_counter++;
2782 roc->cookie = local->roc_cookie_counter;
2783 /* wow, you wrapped 64 bits ... more likely a bug */
2784 if (WARN_ON(roc->cookie == 0)) {
2785 roc->cookie = 1;
2786 local->roc_cookie_counter++;
2787 }
2788 *cookie = roc->cookie;
2789 } else {
2790 *cookie = (unsigned long)txskb;
2791 }
2792
2793 return 0; 2795 return 0;
2794} 2796}
2795 2797
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index fab7b91923e0..70dd013de836 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -466,7 +466,9 @@ void ieee80211_request_smps_ap_work(struct work_struct *work)
466 u.ap.request_smps_work); 466 u.ap.request_smps_work);
467 467
468 sdata_lock(sdata); 468 sdata_lock(sdata);
469 __ieee80211_request_smps_ap(sdata, sdata->u.ap.driver_smps_mode); 469 if (sdata_dereference(sdata->u.ap.beacon, sdata))
470 __ieee80211_request_smps_ap(sdata,
471 sdata->u.ap.driver_smps_mode);
470 sdata_unlock(sdata); 472 sdata_unlock(sdata);
471} 473}
472 474
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 771080ec7212..2796a198728f 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -695,12 +695,9 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
695 struct cfg80211_bss *cbss; 695 struct cfg80211_bss *cbss;
696 struct beacon_data *presp; 696 struct beacon_data *presp;
697 struct sta_info *sta; 697 struct sta_info *sta;
698 int active_ibss;
699 u16 capability; 698 u16 capability;
700 699
701 active_ibss = ieee80211_sta_active_ibss(sdata); 700 if (!is_zero_ether_addr(ifibss->bssid)) {
702
703 if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) {
704 capability = WLAN_CAPABILITY_IBSS; 701 capability = WLAN_CAPABILITY_IBSS;
705 702
706 if (ifibss->privacy) 703 if (ifibss->privacy)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3701930c6649..5e44e3179e02 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1692,14 +1692,8 @@ void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
1692void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue); 1692void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue);
1693void ieee80211_add_pending_skb(struct ieee80211_local *local, 1693void ieee80211_add_pending_skb(struct ieee80211_local *local,
1694 struct sk_buff *skb); 1694 struct sk_buff *skb);
1695void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, 1695void ieee80211_add_pending_skbs(struct ieee80211_local *local,
1696 struct sk_buff_head *skbs, 1696 struct sk_buff_head *skbs);
1697 void (*fn)(void *data), void *data);
1698static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local,
1699 struct sk_buff_head *skbs)
1700{
1701 ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL);
1702}
1703void ieee80211_flush_queues(struct ieee80211_local *local, 1697void ieee80211_flush_queues(struct ieee80211_local *local,
1704 struct ieee80211_sub_if_data *sdata); 1698 struct ieee80211_sub_if_data *sdata);
1705 1699
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3dfd20a453ab..d6d1f1df9119 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -418,20 +418,24 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
418 return ret; 418 return ret;
419 } 419 }
420 420
421 mutex_lock(&local->iflist_mtx);
422 rcu_assign_pointer(local->monitor_sdata, sdata);
423 mutex_unlock(&local->iflist_mtx);
424
421 mutex_lock(&local->mtx); 425 mutex_lock(&local->mtx);
422 ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef, 426 ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef,
423 IEEE80211_CHANCTX_EXCLUSIVE); 427 IEEE80211_CHANCTX_EXCLUSIVE);
424 mutex_unlock(&local->mtx); 428 mutex_unlock(&local->mtx);
425 if (ret) { 429 if (ret) {
430 mutex_lock(&local->iflist_mtx);
431 rcu_assign_pointer(local->monitor_sdata, NULL);
432 mutex_unlock(&local->iflist_mtx);
433 synchronize_net();
426 drv_remove_interface(local, sdata); 434 drv_remove_interface(local, sdata);
427 kfree(sdata); 435 kfree(sdata);
428 return ret; 436 return ret;
429 } 437 }
430 438
431 mutex_lock(&local->iflist_mtx);
432 rcu_assign_pointer(local->monitor_sdata, sdata);
433 mutex_unlock(&local->iflist_mtx);
434
435 return 0; 439 return 0;
436} 440}
437 441
@@ -770,12 +774,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
770 774
771 ieee80211_roc_purge(local, sdata); 775 ieee80211_roc_purge(local, sdata);
772 776
773 if (sdata->vif.type == NL80211_IFTYPE_STATION) 777 switch (sdata->vif.type) {
778 case NL80211_IFTYPE_STATION:
774 ieee80211_mgd_stop(sdata); 779 ieee80211_mgd_stop(sdata);
775 780 break;
776 if (sdata->vif.type == NL80211_IFTYPE_ADHOC) 781 case NL80211_IFTYPE_ADHOC:
777 ieee80211_ibss_stop(sdata); 782 ieee80211_ibss_stop(sdata);
778 783 break;
784 case NL80211_IFTYPE_AP:
785 cancel_work_sync(&sdata->u.ap.request_smps_work);
786 break;
787 default:
788 break;
789 }
779 790
780 /* 791 /*
781 * Remove all stations associated with this interface. 792 * Remove all stations associated with this interface.
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index fc1d82465b3c..245dce969b31 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -222,6 +222,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
222 switch (vht_oper->chan_width) { 222 switch (vht_oper->chan_width) {
223 case IEEE80211_VHT_CHANWIDTH_USE_HT: 223 case IEEE80211_VHT_CHANWIDTH_USE_HT:
224 vht_chandef.width = chandef->width; 224 vht_chandef.width = chandef->width;
225 vht_chandef.center_freq1 = chandef->center_freq1;
225 break; 226 break;
226 case IEEE80211_VHT_CHANWIDTH_80MHZ: 227 case IEEE80211_VHT_CHANWIDTH_80MHZ:
227 vht_chandef.width = NL80211_CHAN_WIDTH_80; 228 vht_chandef.width = NL80211_CHAN_WIDTH_80;
@@ -271,6 +272,28 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
271 ret = 0; 272 ret = 0;
272 273
273out: 274out:
275 /*
276 * When tracking the current AP, don't do any further checks if the
277 * new chandef is identical to the one we're currently using for the
278 * connection. This keeps us from playing ping-pong with regulatory,
279 * without it the following can happen (for example):
280 * - connect to an AP with 80 MHz, world regdom allows 80 MHz
281 * - AP advertises regdom US
282 * - CRDA loads regdom US with 80 MHz prohibited (old database)
283 * - the code below detects an unsupported channel, downgrades, and
284 * we disconnect from the AP in the caller
285 * - disconnect causes CRDA to reload world regdomain and the game
286 * starts anew.
287 * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881)
288 *
289 * It seems possible that there are still scenarios with CSA or real
290 * bandwidth changes where a this could happen, but those cases are
291 * less common and wouldn't completely prevent using the AP.
292 */
293 if (tracking &&
294 cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef))
295 return ret;
296
274 /* don't print the message below for VHT mismatch if VHT is disabled */ 297 /* don't print the message below for VHT mismatch if VHT is disabled */
275 if (ret & IEEE80211_STA_DISABLE_VHT) 298 if (ret & IEEE80211_STA_DISABLE_VHT)
276 vht_chandef = *chandef; 299 vht_chandef = *chandef;
@@ -3753,6 +3776,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
3753 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); 3776 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
3754 if (WARN_ON(!chanctx_conf)) { 3777 if (WARN_ON(!chanctx_conf)) {
3755 rcu_read_unlock(); 3778 rcu_read_unlock();
3779 sta_info_free(local, new_sta);
3756 return -EINVAL; 3780 return -EINVAL;
3757 } 3781 }
3758 rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def); 3782 rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c24ca0d0f469..3e57f96c9666 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1128,6 +1128,13 @@ static void sta_ps_end(struct sta_info *sta)
1128 sta->sta.addr, sta->sta.aid); 1128 sta->sta.addr, sta->sta.aid);
1129 1129
1130 if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { 1130 if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) {
1131 /*
1132 * Clear the flag only if the other one is still set
1133 * so that the TX path won't start TX'ing new frames
1134 * directly ... In the case that the driver flag isn't
1135 * set ieee80211_sta_ps_deliver_wakeup() will clear it.
1136 */
1137 clear_sta_flag(sta, WLAN_STA_PS_STA);
1131 ps_dbg(sta->sdata, "STA %pM aid %d driver-ps-blocked\n", 1138 ps_dbg(sta->sdata, "STA %pM aid %d driver-ps-blocked\n",
1132 sta->sta.addr, sta->sta.aid); 1139 sta->sta.addr, sta->sta.aid);
1133 return; 1140 return;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index decd30c1e290..a023b432143b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -91,7 +91,7 @@ static int sta_info_hash_del(struct ieee80211_local *local,
91 return -ENOENT; 91 return -ENOENT;
92} 92}
93 93
94static void cleanup_single_sta(struct sta_info *sta) 94static void __cleanup_single_sta(struct sta_info *sta)
95{ 95{
96 int ac, i; 96 int ac, i;
97 struct tid_ampdu_tx *tid_tx; 97 struct tid_ampdu_tx *tid_tx;
@@ -99,7 +99,8 @@ static void cleanup_single_sta(struct sta_info *sta)
99 struct ieee80211_local *local = sdata->local; 99 struct ieee80211_local *local = sdata->local;
100 struct ps_data *ps; 100 struct ps_data *ps;
101 101
102 if (test_sta_flag(sta, WLAN_STA_PS_STA)) { 102 if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
103 test_sta_flag(sta, WLAN_STA_PS_DRIVER)) {
103 if (sta->sdata->vif.type == NL80211_IFTYPE_AP || 104 if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
104 sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 105 sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
105 ps = &sdata->bss->ps; 106 ps = &sdata->bss->ps;
@@ -109,6 +110,7 @@ static void cleanup_single_sta(struct sta_info *sta)
109 return; 110 return;
110 111
111 clear_sta_flag(sta, WLAN_STA_PS_STA); 112 clear_sta_flag(sta, WLAN_STA_PS_STA);
113 clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
112 114
113 atomic_dec(&ps->num_sta_ps); 115 atomic_dec(&ps->num_sta_ps);
114 sta_info_recalc_tim(sta); 116 sta_info_recalc_tim(sta);
@@ -139,7 +141,14 @@ static void cleanup_single_sta(struct sta_info *sta)
139 ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending); 141 ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending);
140 kfree(tid_tx); 142 kfree(tid_tx);
141 } 143 }
144}
142 145
146static void cleanup_single_sta(struct sta_info *sta)
147{
148 struct ieee80211_sub_if_data *sdata = sta->sdata;
149 struct ieee80211_local *local = sdata->local;
150
151 __cleanup_single_sta(sta);
143 sta_info_free(local, sta); 152 sta_info_free(local, sta);
144} 153}
145 154
@@ -330,6 +339,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
330 rcu_read_unlock(); 339 rcu_read_unlock();
331 340
332 spin_lock_init(&sta->lock); 341 spin_lock_init(&sta->lock);
342 spin_lock_init(&sta->ps_lock);
333 INIT_WORK(&sta->drv_unblock_wk, sta_unblock); 343 INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
334 INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); 344 INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
335 mutex_init(&sta->ampdu_mlme.mtx); 345 mutex_init(&sta->ampdu_mlme.mtx);
@@ -487,21 +497,26 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
487 goto out_err; 497 goto out_err;
488 } 498 }
489 499
490 /* notify driver */
491 err = sta_info_insert_drv_state(local, sdata, sta);
492 if (err)
493 goto out_err;
494
495 local->num_sta++; 500 local->num_sta++;
496 local->sta_generation++; 501 local->sta_generation++;
497 smp_mb(); 502 smp_mb();
498 503
504 /* simplify things and don't accept BA sessions yet */
505 set_sta_flag(sta, WLAN_STA_BLOCK_BA);
506
499 /* make the station visible */ 507 /* make the station visible */
500 sta_info_hash_add(local, sta); 508 sta_info_hash_add(local, sta);
501 509
502 list_add_rcu(&sta->list, &local->sta_list); 510 list_add_rcu(&sta->list, &local->sta_list);
503 511
512 /* notify driver */
513 err = sta_info_insert_drv_state(local, sdata, sta);
514 if (err)
515 goto out_remove;
516
504 set_sta_flag(sta, WLAN_STA_INSERTED); 517 set_sta_flag(sta, WLAN_STA_INSERTED);
518 /* accept BA sessions now */
519 clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
505 520
506 ieee80211_recalc_min_chandef(sdata); 521 ieee80211_recalc_min_chandef(sdata);
507 ieee80211_sta_debugfs_add(sta); 522 ieee80211_sta_debugfs_add(sta);
@@ -522,6 +537,12 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
522 mesh_accept_plinks_update(sdata); 537 mesh_accept_plinks_update(sdata);
523 538
524 return 0; 539 return 0;
540 out_remove:
541 sta_info_hash_del(local, sta);
542 list_del_rcu(&sta->list);
543 local->num_sta--;
544 synchronize_net();
545 __cleanup_single_sta(sta);
525 out_err: 546 out_err:
526 mutex_unlock(&local->sta_mtx); 547 mutex_unlock(&local->sta_mtx);
527 rcu_read_lock(); 548 rcu_read_lock();
@@ -1071,10 +1092,14 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
1071} 1092}
1072EXPORT_SYMBOL(ieee80211_find_sta); 1093EXPORT_SYMBOL(ieee80211_find_sta);
1073 1094
1074static void clear_sta_ps_flags(void *_sta) 1095/* powersave support code */
1096void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
1075{ 1097{
1076 struct sta_info *sta = _sta;
1077 struct ieee80211_sub_if_data *sdata = sta->sdata; 1098 struct ieee80211_sub_if_data *sdata = sta->sdata;
1099 struct ieee80211_local *local = sdata->local;
1100 struct sk_buff_head pending;
1101 int filtered = 0, buffered = 0, ac;
1102 unsigned long flags;
1078 struct ps_data *ps; 1103 struct ps_data *ps;
1079 1104
1080 if (sdata->vif.type == NL80211_IFTYPE_AP || 1105 if (sdata->vif.type == NL80211_IFTYPE_AP ||
@@ -1085,20 +1110,6 @@ static void clear_sta_ps_flags(void *_sta)
1085 else 1110 else
1086 return; 1111 return;
1087 1112
1088 clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
1089 if (test_and_clear_sta_flag(sta, WLAN_STA_PS_STA))
1090 atomic_dec(&ps->num_sta_ps);
1091}
1092
1093/* powersave support code */
1094void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
1095{
1096 struct ieee80211_sub_if_data *sdata = sta->sdata;
1097 struct ieee80211_local *local = sdata->local;
1098 struct sk_buff_head pending;
1099 int filtered = 0, buffered = 0, ac;
1100 unsigned long flags;
1101
1102 clear_sta_flag(sta, WLAN_STA_SP); 1113 clear_sta_flag(sta, WLAN_STA_SP);
1103 1114
1104 BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1); 1115 BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1);
@@ -1109,6 +1120,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
1109 1120
1110 skb_queue_head_init(&pending); 1121 skb_queue_head_init(&pending);
1111 1122
1123 /* sync with ieee80211_tx_h_unicast_ps_buf */
1124 spin_lock(&sta->ps_lock);
1112 /* Send all buffered frames to the station */ 1125 /* Send all buffered frames to the station */
1113 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { 1126 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
1114 int count = skb_queue_len(&pending), tmp; 1127 int count = skb_queue_len(&pending), tmp;
@@ -1127,7 +1140,12 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
1127 buffered += tmp - count; 1140 buffered += tmp - count;
1128 } 1141 }
1129 1142
1130 ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta); 1143 ieee80211_add_pending_skbs(local, &pending);
1144 clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
1145 clear_sta_flag(sta, WLAN_STA_PS_STA);
1146 spin_unlock(&sta->ps_lock);
1147
1148 atomic_dec(&ps->num_sta_ps);
1131 1149
1132 /* This station just woke up and isn't aware of our SMPS state */ 1150 /* This station just woke up and isn't aware of our SMPS state */
1133 if (!ieee80211_smps_is_restrictive(sta->known_smps_mode, 1151 if (!ieee80211_smps_is_restrictive(sta->known_smps_mode,
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index d77ff7090630..d3a6d8208f2f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -267,6 +267,7 @@ struct ieee80211_tx_latency_stat {
267 * @drv_unblock_wk: used for driver PS unblocking 267 * @drv_unblock_wk: used for driver PS unblocking
268 * @listen_interval: listen interval of this station, when we're acting as AP 268 * @listen_interval: listen interval of this station, when we're acting as AP
269 * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly 269 * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly
270 * @ps_lock: used for powersave (when mac80211 is the AP) related locking
270 * @ps_tx_buf: buffers (per AC) of frames to transmit to this station 271 * @ps_tx_buf: buffers (per AC) of frames to transmit to this station
271 * when it leaves power saving state or polls 272 * when it leaves power saving state or polls
272 * @tx_filtered: buffers (per AC) of frames we already tried to 273 * @tx_filtered: buffers (per AC) of frames we already tried to
@@ -356,10 +357,8 @@ struct sta_info {
356 /* use the accessors defined below */ 357 /* use the accessors defined below */
357 unsigned long _flags; 358 unsigned long _flags;
358 359
359 /* 360 /* STA powersave lock and frame queues */
360 * STA powersave frame queues, no more than the internal 361 spinlock_t ps_lock;
361 * locking required.
362 */
363 struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; 362 struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS];
364 struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; 363 struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS];
365 unsigned long driver_buffered_tids; 364 unsigned long driver_buffered_tids;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 27c990bf2320..4080c615636f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -478,6 +478,20 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
478 sta->sta.addr, sta->sta.aid, ac); 478 sta->sta.addr, sta->sta.aid, ac);
479 if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) 479 if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
480 purge_old_ps_buffers(tx->local); 480 purge_old_ps_buffers(tx->local);
481
482 /* sync with ieee80211_sta_ps_deliver_wakeup */
483 spin_lock(&sta->ps_lock);
484 /*
485 * STA woke up the meantime and all the frames on ps_tx_buf have
486 * been queued to pending queue. No reordering can happen, go
487 * ahead and Tx the packet.
488 */
489 if (!test_sta_flag(sta, WLAN_STA_PS_STA) &&
490 !test_sta_flag(sta, WLAN_STA_PS_DRIVER)) {
491 spin_unlock(&sta->ps_lock);
492 return TX_CONTINUE;
493 }
494
481 if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) { 495 if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) {
482 struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]); 496 struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]);
483 ps_dbg(tx->sdata, 497 ps_dbg(tx->sdata,
@@ -492,6 +506,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
492 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; 506 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
493 info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; 507 info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
494 skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb); 508 skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb);
509 spin_unlock(&sta->ps_lock);
495 510
496 if (!timer_pending(&local->sta_cleanup)) 511 if (!timer_pending(&local->sta_cleanup))
497 mod_timer(&local->sta_cleanup, 512 mod_timer(&local->sta_cleanup,
@@ -878,7 +893,7 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx,
878 } 893 }
879 894
880 /* adjust first fragment's length */ 895 /* adjust first fragment's length */
881 skb->len = hdrlen + per_fragm; 896 skb_trim(skb, hdrlen + per_fragm);
882 return 0; 897 return 0;
883} 898}
884 899
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 676dc0967f37..b8700d417a9c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -435,9 +435,8 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local,
435 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 435 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
436} 436}
437 437
438void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, 438void ieee80211_add_pending_skbs(struct ieee80211_local *local,
439 struct sk_buff_head *skbs, 439 struct sk_buff_head *skbs)
440 void (*fn)(void *data), void *data)
441{ 440{
442 struct ieee80211_hw *hw = &local->hw; 441 struct ieee80211_hw *hw = &local->hw;
443 struct sk_buff *skb; 442 struct sk_buff *skb;
@@ -461,9 +460,6 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
461 __skb_queue_tail(&local->pending[queue], skb); 460 __skb_queue_tail(&local->pending[queue], skb);
462 } 461 }
463 462
464 if (fn)
465 fn(data);
466
467 for (i = 0; i < hw->queues; i++) 463 for (i = 0; i < hw->queues; i++)
468 __ieee80211_wake_queue(hw, i, 464 __ieee80211_wake_queue(hw, i,
469 IEEE80211_QUEUE_STOP_REASON_SKB_ADD); 465 IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
@@ -1741,6 +1737,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1741 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 1737 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
1742 1738
1743 /* 1739 /*
1740 * Reconfigure sched scan if it was interrupted by FW restart or
1741 * suspend.
1742 */
1743 mutex_lock(&local->mtx);
1744 sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
1745 lockdep_is_held(&local->mtx));
1746 if (sched_scan_sdata && local->sched_scan_req)
1747 /*
1748 * Sched scan stopped, but we don't want to report it. Instead,
1749 * we're trying to reschedule.
1750 */
1751 if (__ieee80211_request_sched_scan_start(sched_scan_sdata,
1752 local->sched_scan_req))
1753 sched_scan_stopped = true;
1754 mutex_unlock(&local->mtx);
1755
1756 if (sched_scan_stopped)
1757 cfg80211_sched_scan_stopped(local->hw.wiphy);
1758
1759 /*
1744 * If this is for hw restart things are still running. 1760 * If this is for hw restart things are still running.
1745 * We may want to change that later, however. 1761 * We may want to change that later, however.
1746 */ 1762 */
@@ -1768,26 +1784,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1768 WARN_ON(1); 1784 WARN_ON(1);
1769#endif 1785#endif
1770 1786
1771 /*
1772 * Reconfigure sched scan if it was interrupted by FW restart or
1773 * suspend.
1774 */
1775 mutex_lock(&local->mtx);
1776 sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
1777 lockdep_is_held(&local->mtx));
1778 if (sched_scan_sdata && local->sched_scan_req)
1779 /*
1780 * Sched scan stopped, but we don't want to report it. Instead,
1781 * we're trying to reschedule.
1782 */
1783 if (__ieee80211_request_sched_scan_start(sched_scan_sdata,
1784 local->sched_scan_req))
1785 sched_scan_stopped = true;
1786 mutex_unlock(&local->mtx);
1787
1788 if (sched_scan_stopped)
1789 cfg80211_sched_scan_stopped(local->hw.wiphy);
1790
1791 return 0; 1787 return 0;
1792} 1788}
1793 1789
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 21211c60ca98..d51422c778de 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -154,6 +154,11 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
154 return IEEE80211_AC_BE; 154 return IEEE80211_AC_BE;
155 } 155 }
156 156
157 if (skb->protocol == sdata->control_port_protocol) {
158 skb->priority = 7;
159 return ieee80211_downgrade_queue(sdata, skb);
160 }
161
157 /* use the data classifier to determine what 802.1d tag the 162 /* use the data classifier to determine what 802.1d tag the
158 * data frame has */ 163 * data frame has */
159 rcu_read_lock(); 164 rcu_read_lock();
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 46bda010bf11..56db888b1cd5 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -301,7 +301,7 @@ static int nci_open_device(struct nci_dev *ndev)
301 rc = __nci_request(ndev, nci_reset_req, 0, 301 rc = __nci_request(ndev, nci_reset_req, 0,
302 msecs_to_jiffies(NCI_RESET_TIMEOUT)); 302 msecs_to_jiffies(NCI_RESET_TIMEOUT));
303 303
304 if (ndev->ops->setup(ndev)) 304 if (ndev->ops->setup)
305 ndev->ops->setup(ndev); 305 ndev->ops->setup(ndev);
306 306
307 if (!rc) { 307 if (!rc) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 76e42e6be755..24589bd2a4b6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -59,6 +59,7 @@
59#include <linux/crypto.h> 59#include <linux/crypto.h>
60#include <linux/sunrpc/gss_krb5.h> 60#include <linux/sunrpc/gss_krb5.h>
61#include <linux/sunrpc/xdr.h> 61#include <linux/sunrpc/xdr.h>
62#include <linux/lcm.h>
62 63
63#ifdef RPC_DEBUG 64#ifdef RPC_DEBUG
64# define RPCDBG_FACILITY RPCDBG_AUTH 65# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -72,7 +73,7 @@
72static void krb5_nfold(u32 inbits, const u8 *in, 73static void krb5_nfold(u32 inbits, const u8 *in,
73 u32 outbits, u8 *out) 74 u32 outbits, u8 *out)
74{ 75{
75 int a, b, c, lcm; 76 unsigned long ulcm;
76 int byte, i, msbit; 77 int byte, i, msbit;
77 78
78 /* the code below is more readable if I make these bytes 79 /* the code below is more readable if I make these bytes
@@ -82,17 +83,7 @@ static void krb5_nfold(u32 inbits, const u8 *in,
82 outbits >>= 3; 83 outbits >>= 3;
83 84
84 /* first compute lcm(n,k) */ 85 /* first compute lcm(n,k) */
85 86 ulcm = lcm(inbits, outbits);
86 a = outbits;
87 b = inbits;
88
89 while (b != 0) {
90 c = b;
91 b = a%b;
92 a = c;
93 }
94
95 lcm = outbits*inbits/a;
96 87
97 /* now do the real work */ 88 /* now do the real work */
98 89
@@ -101,7 +92,7 @@ static void krb5_nfold(u32 inbits, const u8 *in,
101 92
102 /* this will end up cycling through k lcm(k,n)/k times, which 93 /* this will end up cycling through k lcm(k,n)/k times, which
103 is correct */ 94 is correct */
104 for (i = lcm-1; i >= 0; i--) { 95 for (i = ulcm-1; i >= 0; i--) {
105 /* compute the msbit in k which gets added into this byte */ 96 /* compute the msbit in k which gets added into this byte */
106 msbit = ( 97 msbit = (
107 /* first, start with the msbit in the first, 98 /* first, start with the msbit in the first,
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 458f85e9b0ba..abbb7dcd1689 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -137,7 +137,6 @@ void init_gssp_clnt(struct sunrpc_net *sn)
137{ 137{
138 mutex_init(&sn->gssp_lock); 138 mutex_init(&sn->gssp_lock);
139 sn->gssp_clnt = NULL; 139 sn->gssp_clnt = NULL;
140 init_waitqueue_head(&sn->gssp_wq);
141} 140}
142 141
143int set_gssp_clnt(struct net *net) 142int set_gssp_clnt(struct net *net)
@@ -154,7 +153,6 @@ int set_gssp_clnt(struct net *net)
154 sn->gssp_clnt = clnt; 153 sn->gssp_clnt = clnt;
155 } 154 }
156 mutex_unlock(&sn->gssp_lock); 155 mutex_unlock(&sn->gssp_lock);
157 wake_up(&sn->gssp_wq);
158 return ret; 156 return ret;
159} 157}
160 158
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 008cdade5aae..0f73f4507746 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1263,65 +1263,34 @@ out:
1263 return ret; 1263 return ret;
1264} 1264}
1265 1265
1266DEFINE_SPINLOCK(use_gssp_lock); 1266/*
1267 1267 * Try to set the sn->use_gss_proxy variable to a new value. We only allow
1268static bool use_gss_proxy(struct net *net) 1268 * it to be changed if it's currently undefined (-1). If it's any other value
1269{ 1269 * then return -EBUSY unless the type wouldn't have changed anyway.
1270 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); 1270 */
1271
1272 if (sn->use_gss_proxy != -1)
1273 return sn->use_gss_proxy;
1274 spin_lock(&use_gssp_lock);
1275 /*
1276 * If you wanted gss-proxy, you should have said so before
1277 * starting to accept requests:
1278 */
1279 sn->use_gss_proxy = 0;
1280 spin_unlock(&use_gssp_lock);
1281 return 0;
1282}
1283
1284#ifdef CONFIG_PROC_FS
1285
1286static int set_gss_proxy(struct net *net, int type) 1271static int set_gss_proxy(struct net *net, int type)
1287{ 1272{
1288 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); 1273 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1289 int ret = 0; 1274 int ret;
1290 1275
1291 WARN_ON_ONCE(type != 0 && type != 1); 1276 WARN_ON_ONCE(type != 0 && type != 1);
1292 spin_lock(&use_gssp_lock); 1277 ret = cmpxchg(&sn->use_gss_proxy, -1, type);
1293 if (sn->use_gss_proxy == -1 || sn->use_gss_proxy == type) 1278 if (ret != -1 && ret != type)
1294 sn->use_gss_proxy = type; 1279 return -EBUSY;
1295 else 1280 return 0;
1296 ret = -EBUSY;
1297 spin_unlock(&use_gssp_lock);
1298 wake_up(&sn->gssp_wq);
1299 return ret;
1300}
1301
1302static inline bool gssp_ready(struct sunrpc_net *sn)
1303{
1304 switch (sn->use_gss_proxy) {
1305 case -1:
1306 return false;
1307 case 0:
1308 return true;
1309 case 1:
1310 return sn->gssp_clnt;
1311 }
1312 WARN_ON_ONCE(1);
1313 return false;
1314} 1281}
1315 1282
1316static int wait_for_gss_proxy(struct net *net, struct file *file) 1283static bool use_gss_proxy(struct net *net)
1317{ 1284{
1318 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); 1285 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1319 1286
1320 if (file->f_flags & O_NONBLOCK && !gssp_ready(sn)) 1287 /* If use_gss_proxy is still undefined, then try to disable it */
1321 return -EAGAIN; 1288 if (sn->use_gss_proxy == -1)
1322 return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn)); 1289 set_gss_proxy(net, 0);
1290 return sn->use_gss_proxy;
1323} 1291}
1324 1292
1293#ifdef CONFIG_PROC_FS
1325 1294
1326static ssize_t write_gssp(struct file *file, const char __user *buf, 1295static ssize_t write_gssp(struct file *file, const char __user *buf,
1327 size_t count, loff_t *ppos) 1296 size_t count, loff_t *ppos)
@@ -1342,10 +1311,10 @@ static ssize_t write_gssp(struct file *file, const char __user *buf,
1342 return res; 1311 return res;
1343 if (i != 1) 1312 if (i != 1)
1344 return -EINVAL; 1313 return -EINVAL;
1345 res = set_gss_proxy(net, 1); 1314 res = set_gssp_clnt(net);
1346 if (res) 1315 if (res)
1347 return res; 1316 return res;
1348 res = set_gssp_clnt(net); 1317 res = set_gss_proxy(net, 1);
1349 if (res) 1318 if (res)
1350 return res; 1319 return res;
1351 return count; 1320 return count;
@@ -1355,16 +1324,12 @@ static ssize_t read_gssp(struct file *file, char __user *buf,
1355 size_t count, loff_t *ppos) 1324 size_t count, loff_t *ppos)
1356{ 1325{
1357 struct net *net = PDE_DATA(file_inode(file)); 1326 struct net *net = PDE_DATA(file_inode(file));
1327 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1358 unsigned long p = *ppos; 1328 unsigned long p = *ppos;
1359 char tbuf[10]; 1329 char tbuf[10];
1360 size_t len; 1330 size_t len;
1361 int ret;
1362 1331
1363 ret = wait_for_gss_proxy(net, file); 1332 snprintf(tbuf, sizeof(tbuf), "%d\n", sn->use_gss_proxy);
1364 if (ret)
1365 return ret;
1366
1367 snprintf(tbuf, sizeof(tbuf), "%d\n", use_gss_proxy(net));
1368 len = strlen(tbuf); 1333 len = strlen(tbuf);
1369 if (p >= len) 1334 if (p >= len)
1370 return 0; 1335 return 0;
@@ -1626,8 +1591,7 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
1626 BUG_ON(integ_len % 4); 1591 BUG_ON(integ_len % 4);
1627 *p++ = htonl(integ_len); 1592 *p++ = htonl(integ_len);
1628 *p++ = htonl(gc->gc_seq); 1593 *p++ = htonl(gc->gc_seq);
1629 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, 1594 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len))
1630 integ_len))
1631 BUG(); 1595 BUG();
1632 if (resbuf->tail[0].iov_base == NULL) { 1596 if (resbuf->tail[0].iov_base == NULL) {
1633 if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) 1597 if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
@@ -1635,10 +1599,8 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
1635 resbuf->tail[0].iov_base = resbuf->head[0].iov_base 1599 resbuf->tail[0].iov_base = resbuf->head[0].iov_base
1636 + resbuf->head[0].iov_len; 1600 + resbuf->head[0].iov_len;
1637 resbuf->tail[0].iov_len = 0; 1601 resbuf->tail[0].iov_len = 0;
1638 resv = &resbuf->tail[0];
1639 } else {
1640 resv = &resbuf->tail[0];
1641 } 1602 }
1603 resv = &resbuf->tail[0];
1642 mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; 1604 mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
1643 if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) 1605 if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic))
1644 goto out_err; 1606 goto out_err;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index e521d20e1970..ae333c1845bb 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1111,9 +1111,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen)
1111 *bp++ = 'x'; 1111 *bp++ = 'x';
1112 len -= 2; 1112 len -= 2;
1113 while (blen && len >= 2) { 1113 while (blen && len >= 2) {
1114 unsigned char c = *buf++; 1114 bp = hex_byte_pack(bp, *buf++);
1115 *bp++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
1116 *bp++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
1117 len -= 2; 1115 len -= 2;
1118 blen--; 1116 blen--;
1119 } 1117 }
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 94e506f9d72b..df5826876535 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -27,7 +27,6 @@ struct sunrpc_net {
27 unsigned int rpcb_is_af_local : 1; 27 unsigned int rpcb_is_af_local : 1;
28 28
29 struct mutex gssp_lock; 29 struct mutex gssp_lock;
30 wait_queue_head_t gssp_wq;
31 struct rpc_clnt *gssp_clnt; 30 struct rpc_clnt *gssp_clnt;
32 int use_gss_proxy; 31 int use_gss_proxy;
33 int pipe_version; 32 int pipe_version;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e7fbe368b4a3..5de6801cd924 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -916,9 +916,6 @@ static int __svc_register(struct net *net, const char *progname,
916#endif 916#endif
917 } 917 }
918 918
919 if (error < 0)
920 printk(KERN_WARNING "svc: failed to register %sv%u RPC "
921 "service (errno %d).\n", progname, version, -error);
922 return error; 919 return error;
923} 920}
924 921
@@ -937,6 +934,7 @@ int svc_register(const struct svc_serv *serv, struct net *net,
937 const unsigned short port) 934 const unsigned short port)
938{ 935{
939 struct svc_program *progp; 936 struct svc_program *progp;
937 struct svc_version *vers;
940 unsigned int i; 938 unsigned int i;
941 int error = 0; 939 int error = 0;
942 940
@@ -946,7 +944,8 @@ int svc_register(const struct svc_serv *serv, struct net *net,
946 944
947 for (progp = serv->sv_program; progp; progp = progp->pg_next) { 945 for (progp = serv->sv_program; progp; progp = progp->pg_next) {
948 for (i = 0; i < progp->pg_nvers; i++) { 946 for (i = 0; i < progp->pg_nvers; i++) {
949 if (progp->pg_vers[i] == NULL) 947 vers = progp->pg_vers[i];
948 if (vers == NULL)
950 continue; 949 continue;
951 950
952 dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n", 951 dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
@@ -955,16 +954,26 @@ int svc_register(const struct svc_serv *serv, struct net *net,
955 proto == IPPROTO_UDP? "udp" : "tcp", 954 proto == IPPROTO_UDP? "udp" : "tcp",
956 port, 955 port,
957 family, 956 family,
958 progp->pg_vers[i]->vs_hidden? 957 vers->vs_hidden ?
959 " (but not telling portmap)" : ""); 958 " (but not telling portmap)" : "");
960 959
961 if (progp->pg_vers[i]->vs_hidden) 960 if (vers->vs_hidden)
962 continue; 961 continue;
963 962
964 error = __svc_register(net, progp->pg_name, progp->pg_prog, 963 error = __svc_register(net, progp->pg_name, progp->pg_prog,
965 i, family, proto, port); 964 i, family, proto, port);
966 if (error < 0) 965
966 if (vers->vs_rpcb_optnl) {
967 error = 0;
968 continue;
969 }
970
971 if (error < 0) {
972 printk(KERN_WARNING "svc: failed to register "
973 "%sv%u RPC service (errno %d).\n",
974 progp->pg_name, i, -error);
967 break; 975 break;
976 }
968 } 977 }
969 } 978 }
970 979
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2a7ca8ffe83a..817a1e523969 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2964,10 +2964,9 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2964 2964
2965 /* 2965 /*
2966 * Once we've associated a backchannel xprt with a connection, 2966 * Once we've associated a backchannel xprt with a connection,
2967 * we want to keep it around as long as long as the connection 2967 * we want to keep it around as long as the connection lasts,
2968 * lasts, in case we need to start using it for a backchannel 2968 * in case we need to start using it for a backchannel again;
2969 * again; this reference won't be dropped until bc_xprt is 2969 * this reference won't be dropped until bc_xprt is destroyed.
2970 * destroyed.
2971 */ 2970 */
2972 xprt_get(xprt); 2971 xprt_get(xprt);
2973 args->bc_xprt->xpt_bc_xprt = xprt; 2972 args->bc_xprt->xpt_bc_xprt = xprt;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d89dee2259b5..010892b81a06 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -203,8 +203,11 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
203 203
204 rdev->opencount--; 204 rdev->opencount--;
205 205
206 WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && 206 if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
207 !rdev->scan_req->notified); 207 if (WARN_ON(!rdev->scan_req->notified))
208 rdev->scan_req->aborted = true;
209 ___cfg80211_scan_done(rdev, false);
210 }
208} 211}
209 212
210static int cfg80211_rfkill_set_block(void *data, bool blocked) 213static int cfg80211_rfkill_set_block(void *data, bool blocked)
@@ -440,9 +443,6 @@ int wiphy_register(struct wiphy *wiphy)
440 int i; 443 int i;
441 u16 ifmodes = wiphy->interface_modes; 444 u16 ifmodes = wiphy->interface_modes;
442 445
443 /* support for 5/10 MHz is broken due to nl80211 API mess - disable */
444 wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ;
445
446 /* 446 /*
447 * There are major locking problems in nl80211/mac80211 for CSA, 447 * There are major locking problems in nl80211/mac80211 for CSA,
448 * disable for all drivers until this has been reworked. 448 * disable for all drivers until this has been reworked.
@@ -859,8 +859,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
859 break; 859 break;
860 case NETDEV_DOWN: 860 case NETDEV_DOWN:
861 cfg80211_update_iface_num(rdev, wdev->iftype, -1); 861 cfg80211_update_iface_num(rdev, wdev->iftype, -1);
862 WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && 862 if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
863 !rdev->scan_req->notified); 863 if (WARN_ON(!rdev->scan_req->notified))
864 rdev->scan_req->aborted = true;
865 ___cfg80211_scan_done(rdev, false);
866 }
864 867
865 if (WARN_ON(rdev->sched_scan_req && 868 if (WARN_ON(rdev->sched_scan_req &&
866 rdev->sched_scan_req->dev == wdev->netdev)) { 869 rdev->sched_scan_req->dev == wdev->netdev)) {
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 37ec16d7bb1a..f1d193b557b6 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -62,6 +62,7 @@ struct cfg80211_registered_device {
62 struct rb_root bss_tree; 62 struct rb_root bss_tree;
63 u32 bss_generation; 63 u32 bss_generation;
64 struct cfg80211_scan_request *scan_req; /* protected by RTNL */ 64 struct cfg80211_scan_request *scan_req; /* protected by RTNL */
65 struct sk_buff *scan_msg;
65 struct cfg80211_sched_scan_request *sched_scan_req; 66 struct cfg80211_sched_scan_request *sched_scan_req;
66 unsigned long suspend_at; 67 unsigned long suspend_at;
67 struct work_struct scan_done_wk; 68 struct work_struct scan_done_wk;
@@ -361,7 +362,8 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
361 struct key_params *params, int key_idx, 362 struct key_params *params, int key_idx,
362 bool pairwise, const u8 *mac_addr); 363 bool pairwise, const u8 *mac_addr);
363void __cfg80211_scan_done(struct work_struct *wk); 364void __cfg80211_scan_done(struct work_struct *wk);
364void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev); 365void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
366 bool send_message);
365void __cfg80211_sched_scan_results(struct work_struct *wk); 367void __cfg80211_sched_scan_results(struct work_struct *wk);
366int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, 368int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
367 bool driver_initiated); 369 bool driver_initiated);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7a742594916e..4fe2e6e2bc76 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1719,9 +1719,10 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
1719 * We can then retry with the larger buffer. 1719 * We can then retry with the larger buffer.
1720 */ 1720 */
1721 if ((ret == -ENOBUFS || ret == -EMSGSIZE) && 1721 if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
1722 !skb->len && 1722 !skb->len && !state->split &&
1723 cb->min_dump_alloc < 4096) { 1723 cb->min_dump_alloc < 4096) {
1724 cb->min_dump_alloc = 4096; 1724 cb->min_dump_alloc = 4096;
1725 state->split_start = 0;
1725 rtnl_unlock(); 1726 rtnl_unlock();
1726 return 1; 1727 return 1;
1727 } 1728 }
@@ -5244,7 +5245,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
5244 if (!rdev->ops->scan) 5245 if (!rdev->ops->scan)
5245 return -EOPNOTSUPP; 5246 return -EOPNOTSUPP;
5246 5247
5247 if (rdev->scan_req) { 5248 if (rdev->scan_req || rdev->scan_msg) {
5248 err = -EBUSY; 5249 err = -EBUSY;
5249 goto unlock; 5250 goto unlock;
5250 } 5251 }
@@ -10011,40 +10012,31 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
10011 NL80211_MCGRP_SCAN, GFP_KERNEL); 10012 NL80211_MCGRP_SCAN, GFP_KERNEL);
10012} 10013}
10013 10014
10014void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, 10015struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
10015 struct wireless_dev *wdev) 10016 struct wireless_dev *wdev, bool aborted)
10016{ 10017{
10017 struct sk_buff *msg; 10018 struct sk_buff *msg;
10018 10019
10019 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 10020 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
10020 if (!msg) 10021 if (!msg)
10021 return; 10022 return NULL;
10022 10023
10023 if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0, 10024 if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0,
10024 NL80211_CMD_NEW_SCAN_RESULTS) < 0) { 10025 aborted ? NL80211_CMD_SCAN_ABORTED :
10026 NL80211_CMD_NEW_SCAN_RESULTS) < 0) {
10025 nlmsg_free(msg); 10027 nlmsg_free(msg);
10026 return; 10028 return NULL;
10027 } 10029 }
10028 10030
10029 genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, 10031 return msg;
10030 NL80211_MCGRP_SCAN, GFP_KERNEL);
10031} 10032}
10032 10033
10033void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, 10034void nl80211_send_scan_result(struct cfg80211_registered_device *rdev,
10034 struct wireless_dev *wdev) 10035 struct sk_buff *msg)
10035{ 10036{
10036 struct sk_buff *msg;
10037
10038 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
10039 if (!msg) 10037 if (!msg)
10040 return; 10038 return;
10041 10039
10042 if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0,
10043 NL80211_CMD_SCAN_ABORTED) < 0) {
10044 nlmsg_free(msg);
10045 return;
10046 }
10047
10048 genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, 10040 genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
10049 NL80211_MCGRP_SCAN, GFP_KERNEL); 10041 NL80211_MCGRP_SCAN, GFP_KERNEL);
10050} 10042}
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b1b231324e10..75799746d845 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -8,10 +8,10 @@ void nl80211_exit(void);
8void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); 8void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev);
9void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, 9void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
10 struct wireless_dev *wdev); 10 struct wireless_dev *wdev);
11void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, 11struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
12 struct wireless_dev *wdev); 12 struct wireless_dev *wdev, bool aborted);
13void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, 13void nl80211_send_scan_result(struct cfg80211_registered_device *rdev,
14 struct wireless_dev *wdev); 14 struct sk_buff *msg);
15void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, 15void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
16 struct net_device *netdev, u32 cmd); 16 struct net_device *netdev, u32 cmd);
17void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev, 17void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 9b897fca7487..f0541370e68e 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1700,7 +1700,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
1700 return; 1700 return;
1701 case NL80211_REGDOM_SET_BY_USER: 1701 case NL80211_REGDOM_SET_BY_USER:
1702 treatment = reg_process_hint_user(reg_request); 1702 treatment = reg_process_hint_user(reg_request);
1703 if (treatment == REG_REQ_OK || 1703 if (treatment == REG_REQ_IGNORE ||
1704 treatment == REG_REQ_ALREADY_SET) 1704 treatment == REG_REQ_ALREADY_SET)
1705 return; 1705 return;
1706 schedule_delayed_work(&reg_timeout, msecs_to_jiffies(3142)); 1706 schedule_delayed_work(&reg_timeout, msecs_to_jiffies(3142));
@@ -2373,6 +2373,7 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
2373int set_regdom(const struct ieee80211_regdomain *rd) 2373int set_regdom(const struct ieee80211_regdomain *rd)
2374{ 2374{
2375 struct regulatory_request *lr; 2375 struct regulatory_request *lr;
2376 bool user_reset = false;
2376 int r; 2377 int r;
2377 2378
2378 if (!reg_is_valid_request(rd->alpha2)) { 2379 if (!reg_is_valid_request(rd->alpha2)) {
@@ -2389,6 +2390,7 @@ int set_regdom(const struct ieee80211_regdomain *rd)
2389 break; 2390 break;
2390 case NL80211_REGDOM_SET_BY_USER: 2391 case NL80211_REGDOM_SET_BY_USER:
2391 r = reg_set_rd_user(rd, lr); 2392 r = reg_set_rd_user(rd, lr);
2393 user_reset = true;
2392 break; 2394 break;
2393 case NL80211_REGDOM_SET_BY_DRIVER: 2395 case NL80211_REGDOM_SET_BY_DRIVER:
2394 r = reg_set_rd_driver(rd, lr); 2396 r = reg_set_rd_driver(rd, lr);
@@ -2402,8 +2404,14 @@ int set_regdom(const struct ieee80211_regdomain *rd)
2402 } 2404 }
2403 2405
2404 if (r) { 2406 if (r) {
2405 if (r == -EALREADY) 2407 switch (r) {
2408 case -EALREADY:
2406 reg_set_request_processed(); 2409 reg_set_request_processed();
2410 break;
2411 default:
2412 /* Back to world regulatory in case of errors */
2413 restore_regulatory_settings(user_reset);
2414 }
2407 2415
2408 kfree(rd); 2416 kfree(rd);
2409 return r; 2417 return r;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index b528e31da2cf..d1ed4aebbbb7 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -161,18 +161,25 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev,
161 dev->bss_generation++; 161 dev->bss_generation++;
162} 162}
163 163
164void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev) 164void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
165 bool send_message)
165{ 166{
166 struct cfg80211_scan_request *request; 167 struct cfg80211_scan_request *request;
167 struct wireless_dev *wdev; 168 struct wireless_dev *wdev;
169 struct sk_buff *msg;
168#ifdef CONFIG_CFG80211_WEXT 170#ifdef CONFIG_CFG80211_WEXT
169 union iwreq_data wrqu; 171 union iwreq_data wrqu;
170#endif 172#endif
171 173
172 ASSERT_RTNL(); 174 ASSERT_RTNL();
173 175
174 request = rdev->scan_req; 176 if (rdev->scan_msg) {
177 nl80211_send_scan_result(rdev, rdev->scan_msg);
178 rdev->scan_msg = NULL;
179 return;
180 }
175 181
182 request = rdev->scan_req;
176 if (!request) 183 if (!request)
177 return; 184 return;
178 185
@@ -186,18 +193,16 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev)
186 if (wdev->netdev) 193 if (wdev->netdev)
187 cfg80211_sme_scan_done(wdev->netdev); 194 cfg80211_sme_scan_done(wdev->netdev);
188 195
189 if (request->aborted) { 196 if (!request->aborted &&
190 nl80211_send_scan_aborted(rdev, wdev); 197 request->flags & NL80211_SCAN_FLAG_FLUSH) {
191 } else { 198 /* flush entries from previous scans */
192 if (request->flags & NL80211_SCAN_FLAG_FLUSH) { 199 spin_lock_bh(&rdev->bss_lock);
193 /* flush entries from previous scans */ 200 __cfg80211_bss_expire(rdev, request->scan_start);
194 spin_lock_bh(&rdev->bss_lock); 201 spin_unlock_bh(&rdev->bss_lock);
195 __cfg80211_bss_expire(rdev, request->scan_start);
196 spin_unlock_bh(&rdev->bss_lock);
197 }
198 nl80211_send_scan_done(rdev, wdev);
199 } 202 }
200 203
204 msg = nl80211_build_scan_msg(rdev, wdev, request->aborted);
205
201#ifdef CONFIG_CFG80211_WEXT 206#ifdef CONFIG_CFG80211_WEXT
202 if (wdev->netdev && !request->aborted) { 207 if (wdev->netdev && !request->aborted) {
203 memset(&wrqu, 0, sizeof(wrqu)); 208 memset(&wrqu, 0, sizeof(wrqu));
@@ -211,6 +216,11 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev)
211 216
212 rdev->scan_req = NULL; 217 rdev->scan_req = NULL;
213 kfree(request); 218 kfree(request);
219
220 if (!send_message)
221 rdev->scan_msg = msg;
222 else
223 nl80211_send_scan_result(rdev, msg);
214} 224}
215 225
216void __cfg80211_scan_done(struct work_struct *wk) 226void __cfg80211_scan_done(struct work_struct *wk)
@@ -221,7 +231,7 @@ void __cfg80211_scan_done(struct work_struct *wk)
221 scan_done_wk); 231 scan_done_wk);
222 232
223 rtnl_lock(); 233 rtnl_lock();
224 ___cfg80211_scan_done(rdev); 234 ___cfg80211_scan_done(rdev, true);
225 rtnl_unlock(); 235 rtnl_unlock();
226} 236}
227 237
@@ -1079,7 +1089,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
1079 if (IS_ERR(rdev)) 1089 if (IS_ERR(rdev))
1080 return PTR_ERR(rdev); 1090 return PTR_ERR(rdev);
1081 1091
1082 if (rdev->scan_req) { 1092 if (rdev->scan_req || rdev->scan_msg) {
1083 err = -EBUSY; 1093 err = -EBUSY;
1084 goto out; 1094 goto out;
1085 } 1095 }
@@ -1481,7 +1491,7 @@ int cfg80211_wext_giwscan(struct net_device *dev,
1481 if (IS_ERR(rdev)) 1491 if (IS_ERR(rdev))
1482 return PTR_ERR(rdev); 1492 return PTR_ERR(rdev);
1483 1493
1484 if (rdev->scan_req) 1494 if (rdev->scan_req || rdev->scan_msg)
1485 return -EAGAIN; 1495 return -EAGAIN;
1486 1496
1487 res = ieee80211_scan_results(rdev, info, extra, data->length); 1497 res = ieee80211_scan_results(rdev, info, extra, data->length);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index a63509118508..f04d4c32e96e 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -67,7 +67,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
67 ASSERT_RDEV_LOCK(rdev); 67 ASSERT_RDEV_LOCK(rdev);
68 ASSERT_WDEV_LOCK(wdev); 68 ASSERT_WDEV_LOCK(wdev);
69 69
70 if (rdev->scan_req) 70 if (rdev->scan_req || rdev->scan_msg)
71 return -EBUSY; 71 return -EBUSY;
72 72
73 if (wdev->conn->params.channel) 73 if (wdev->conn->params.channel)