diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-22 20:07:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-22 20:07:18 -0400 |
commit | a2887097f25cd38cadfc11d10769e2b349fb5eca (patch) | |
tree | cd4adcb305365d6ba9acd2c02d4eb9d0125c6f8d | |
parent | 8abfc6e7a45eb74e51904bbae676fae008b11366 (diff) | |
parent | 005a1d15f5a6b2bb4ada80349513effbf22b4588 (diff) |
Merge branch 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block: (46 commits)
xen-blkfront: disable barrier/flush write support
Added blk-lib.c and blk-barrier.c was renamed to blk-flush.c
block: remove BLKDEV_IFL_WAIT
aic7xxx_old: removed unused 'req' variable
block: remove the BH_Eopnotsupp flag
block: remove the BLKDEV_IFL_BARRIER flag
block: remove the WRITE_BARRIER flag
swap: do not send discards as barriers
fat: do not send discards as barriers
ext4: do not send discards as barriers
jbd2: replace barriers with explicit flush / FUA usage
jbd2: Modify ASYNC_COMMIT code to not rely on queue draining on barrier
jbd: replace barriers with explicit flush / FUA usage
nilfs2: replace barriers with explicit flush / FUA usage
reiserfs: replace barriers with explicit flush / FUA usage
gfs2: replace barriers with explicit flush / FUA usage
btrfs: replace barriers with explicit flush / FUA usage
xfs: replace barriers with explicit flush / FUA usage
block: pass gfp_mask and flags to sb_issue_discard
dm: convey that all flushes are processed as empty
...
80 files changed, 845 insertions, 1921 deletions
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 6899f471fb15..6b4e07f28b69 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl | |||
@@ -257,7 +257,8 @@ X!Earch/x86/kernel/mca_32.c | |||
257 | !Iblock/blk-sysfs.c | 257 | !Iblock/blk-sysfs.c |
258 | !Eblock/blk-settings.c | 258 | !Eblock/blk-settings.c |
259 | !Eblock/blk-exec.c | 259 | !Eblock/blk-exec.c |
260 | !Eblock/blk-barrier.c | 260 | !Eblock/blk-flush.c |
261 | !Eblock/blk-lib.c | ||
261 | !Eblock/blk-tag.c | 262 | !Eblock/blk-tag.c |
262 | !Iblock/blk-tag.c | 263 | !Iblock/blk-tag.c |
263 | !Eblock/blk-integrity.c | 264 | !Eblock/blk-integrity.c |
diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index a406286f6f3e..d111e3b23db0 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX | |||
@@ -1,7 +1,5 @@ | |||
1 | 00-INDEX | 1 | 00-INDEX |
2 | - This file | 2 | - This file |
3 | barrier.txt | ||
4 | - I/O Barriers | ||
5 | biodoc.txt | 3 | biodoc.txt |
6 | - Notes on the Generic Block Layer Rewrite in Linux 2.5 | 4 | - Notes on the Generic Block Layer Rewrite in Linux 2.5 |
7 | capability.txt | 5 | capability.txt |
@@ -16,3 +14,5 @@ stat.txt | |||
16 | - Block layer statistics in /sys/block/<dev>/stat | 14 | - Block layer statistics in /sys/block/<dev>/stat |
17 | switching-sched.txt | 15 | switching-sched.txt |
18 | - Switching I/O schedulers at runtime | 16 | - Switching I/O schedulers at runtime |
17 | writeback_cache_control.txt | ||
18 | - Control of volatile write back caches | ||
diff --git a/Documentation/block/barrier.txt b/Documentation/block/barrier.txt deleted file mode 100644 index 2c2f24f634e4..000000000000 --- a/Documentation/block/barrier.txt +++ /dev/null | |||
@@ -1,261 +0,0 @@ | |||
1 | I/O Barriers | ||
2 | ============ | ||
3 | Tejun Heo <htejun@gmail.com>, July 22 2005 | ||
4 | |||
5 | I/O barrier requests are used to guarantee ordering around the barrier | ||
6 | requests. Unless you're crazy enough to use disk drives for | ||
7 | implementing synchronization constructs (wow, sounds interesting...), | ||
8 | the ordering is meaningful only for write requests for things like | ||
9 | journal checkpoints. All requests queued before a barrier request | ||
10 | must be finished (made it to the physical medium) before the barrier | ||
11 | request is started, and all requests queued after the barrier request | ||
12 | must be started only after the barrier request is finished (again, | ||
13 | made it to the physical medium). | ||
14 | |||
15 | In other words, I/O barrier requests have the following two properties. | ||
16 | |||
17 | 1. Request ordering | ||
18 | |||
19 | Requests cannot pass the barrier request. Preceding requests are | ||
20 | processed before the barrier and following requests after. | ||
21 | |||
22 | Depending on what features a drive supports, this can be done in one | ||
23 | of the following three ways. | ||
24 | |||
25 | i. For devices which have queue depth greater than 1 (TCQ devices) and | ||
26 | support ordered tags, block layer can just issue the barrier as an | ||
27 | ordered request and the lower level driver, controller and drive | ||
28 | itself are responsible for making sure that the ordering constraint is | ||
29 | met. Most modern SCSI controllers/drives should support this. | ||
30 | |||
31 | NOTE: SCSI ordered tag isn't currently used due to limitation in the | ||
32 | SCSI midlayer, see the following random notes section. | ||
33 | |||
34 | ii. For devices which have queue depth greater than 1 but don't | ||
35 | support ordered tags, block layer ensures that the requests preceding | ||
36 | a barrier request finishes before issuing the barrier request. Also, | ||
37 | it defers requests following the barrier until the barrier request is | ||
38 | finished. Older SCSI controllers/drives and SATA drives fall in this | ||
39 | category. | ||
40 | |||
41 | iii. Devices which have queue depth of 1. This is a degenerate case | ||
42 | of ii. Just keeping issue order suffices. Ancient SCSI | ||
43 | controllers/drives and IDE drives are in this category. | ||
44 | |||
45 | 2. Forced flushing to physical medium | ||
46 | |||
47 | Again, if you're not gonna do synchronization with disk drives (dang, | ||
48 | it sounds even more appealing now!), the reason you use I/O barriers | ||
49 | is mainly to protect filesystem integrity when power failure or some | ||
50 | other events abruptly stop the drive from operating and possibly make | ||
51 | the drive lose data in its cache. So, I/O barriers need to guarantee | ||
52 | that requests actually get written to non-volatile medium in order. | ||
53 | |||
54 | There are four cases, | ||
55 | |||
56 | i. No write-back cache. Keeping requests ordered is enough. | ||
57 | |||
58 | ii. Write-back cache but no flush operation. There's no way to | ||
59 | guarantee physical-medium commit order. This kind of devices can't to | ||
60 | I/O barriers. | ||
61 | |||
62 | iii. Write-back cache and flush operation but no FUA (forced unit | ||
63 | access). We need two cache flushes - before and after the barrier | ||
64 | request. | ||
65 | |||
66 | iv. Write-back cache, flush operation and FUA. We still need one | ||
67 | flush to make sure requests preceding a barrier are written to medium, | ||
68 | but post-barrier flush can be avoided by using FUA write on the | ||
69 | barrier itself. | ||
70 | |||
71 | |||
72 | How to support barrier requests in drivers | ||
73 | ------------------------------------------ | ||
74 | |||
75 | All barrier handling is done inside block layer proper. All low level | ||
76 | drivers have to are implementing its prepare_flush_fn and using one | ||
77 | the following two functions to indicate what barrier type it supports | ||
78 | and how to prepare flush requests. Note that the term 'ordered' is | ||
79 | used to indicate the whole sequence of performing barrier requests | ||
80 | including draining and flushing. | ||
81 | |||
82 | typedef void (prepare_flush_fn)(struct request_queue *q, struct request *rq); | ||
83 | |||
84 | int blk_queue_ordered(struct request_queue *q, unsigned ordered, | ||
85 | prepare_flush_fn *prepare_flush_fn); | ||
86 | |||
87 | @q : the queue in question | ||
88 | @ordered : the ordered mode the driver/device supports | ||
89 | @prepare_flush_fn : this function should prepare @rq such that it | ||
90 | flushes cache to physical medium when executed | ||
91 | |||
92 | For example, SCSI disk driver's prepare_flush_fn looks like the | ||
93 | following. | ||
94 | |||
95 | static void sd_prepare_flush(struct request_queue *q, struct request *rq) | ||
96 | { | ||
97 | memset(rq->cmd, 0, sizeof(rq->cmd)); | ||
98 | rq->cmd_type = REQ_TYPE_BLOCK_PC; | ||
99 | rq->timeout = SD_TIMEOUT; | ||
100 | rq->cmd[0] = SYNCHRONIZE_CACHE; | ||
101 | rq->cmd_len = 10; | ||
102 | } | ||
103 | |||
104 | The following seven ordered modes are supported. The following table | ||
105 | shows which mode should be used depending on what features a | ||
106 | device/driver supports. In the leftmost column of table, | ||
107 | QUEUE_ORDERED_ prefix is omitted from the mode names to save space. | ||
108 | |||
109 | The table is followed by description of each mode. Note that in the | ||
110 | descriptions of QUEUE_ORDERED_DRAIN*, '=>' is used whereas '->' is | ||
111 | used for QUEUE_ORDERED_TAG* descriptions. '=>' indicates that the | ||
112 | preceding step must be complete before proceeding to the next step. | ||
113 | '->' indicates that the next step can start as soon as the previous | ||
114 | step is issued. | ||
115 | |||
116 | write-back cache ordered tag flush FUA | ||
117 | ----------------------------------------------------------------------- | ||
118 | NONE yes/no N/A no N/A | ||
119 | DRAIN no no N/A N/A | ||
120 | DRAIN_FLUSH yes no yes no | ||
121 | DRAIN_FUA yes no yes yes | ||
122 | TAG no yes N/A N/A | ||
123 | TAG_FLUSH yes yes yes no | ||
124 | TAG_FUA yes yes yes yes | ||
125 | |||
126 | |||
127 | QUEUE_ORDERED_NONE | ||
128 | I/O barriers are not needed and/or supported. | ||
129 | |||
130 | Sequence: N/A | ||
131 | |||
132 | QUEUE_ORDERED_DRAIN | ||
133 | Requests are ordered by draining the request queue and cache | ||
134 | flushing isn't needed. | ||
135 | |||
136 | Sequence: drain => barrier | ||
137 | |||
138 | QUEUE_ORDERED_DRAIN_FLUSH | ||
139 | Requests are ordered by draining the request queue and both | ||
140 | pre-barrier and post-barrier cache flushings are needed. | ||
141 | |||
142 | Sequence: drain => preflush => barrier => postflush | ||
143 | |||
144 | QUEUE_ORDERED_DRAIN_FUA | ||
145 | Requests are ordered by draining the request queue and | ||
146 | pre-barrier cache flushing is needed. By using FUA on barrier | ||
147 | request, post-barrier flushing can be skipped. | ||
148 | |||
149 | Sequence: drain => preflush => barrier | ||
150 | |||
151 | QUEUE_ORDERED_TAG | ||
152 | Requests are ordered by ordered tag and cache flushing isn't | ||
153 | needed. | ||
154 | |||
155 | Sequence: barrier | ||
156 | |||
157 | QUEUE_ORDERED_TAG_FLUSH | ||
158 | Requests are ordered by ordered tag and both pre-barrier and | ||
159 | post-barrier cache flushings are needed. | ||
160 | |||
161 | Sequence: preflush -> barrier -> postflush | ||
162 | |||
163 | QUEUE_ORDERED_TAG_FUA | ||
164 | Requests are ordered by ordered tag and pre-barrier cache | ||
165 | flushing is needed. By using FUA on barrier request, | ||
166 | post-barrier flushing can be skipped. | ||
167 | |||
168 | Sequence: preflush -> barrier | ||
169 | |||
170 | |||
171 | Random notes/caveats | ||
172 | -------------------- | ||
173 | |||
174 | * SCSI layer currently can't use TAG ordering even if the drive, | ||
175 | controller and driver support it. The problem is that SCSI midlayer | ||
176 | request dispatch function is not atomic. It releases queue lock and | ||
177 | switch to SCSI host lock during issue and it's possible and likely to | ||
178 | happen in time that requests change their relative positions. Once | ||
179 | this problem is solved, TAG ordering can be enabled. | ||
180 | |||
181 | * Currently, no matter which ordered mode is used, there can be only | ||
182 | one barrier request in progress. All I/O barriers are held off by | ||
183 | block layer until the previous I/O barrier is complete. This doesn't | ||
184 | make any difference for DRAIN ordered devices, but, for TAG ordered | ||
185 | devices with very high command latency, passing multiple I/O barriers | ||
186 | to low level *might* be helpful if they are very frequent. Well, this | ||
187 | certainly is a non-issue. I'm writing this just to make clear that no | ||
188 | two I/O barrier is ever passed to low-level driver. | ||
189 | |||
190 | * Completion order. Requests in ordered sequence are issued in order | ||
191 | but not required to finish in order. Barrier implementation can | ||
192 | handle out-of-order completion of ordered sequence. IOW, the requests | ||
193 | MUST be processed in order but the hardware/software completion paths | ||
194 | are allowed to reorder completion notifications - eg. current SCSI | ||
195 | midlayer doesn't preserve completion order during error handling. | ||
196 | |||
197 | * Requeueing order. Low-level drivers are free to requeue any request | ||
198 | after they removed it from the request queue with | ||
199 | blkdev_dequeue_request(). As barrier sequence should be kept in order | ||
200 | when requeued, generic elevator code takes care of putting requests in | ||
201 | order around barrier. See blk_ordered_req_seq() and | ||
202 | ELEVATOR_INSERT_REQUEUE handling in __elv_add_request() for details. | ||
203 | |||
204 | Note that block drivers must not requeue preceding requests while | ||
205 | completing latter requests in an ordered sequence. Currently, no | ||
206 | error checking is done against this. | ||
207 | |||
208 | * Error handling. Currently, block layer will report error to upper | ||
209 | layer if any of requests in an ordered sequence fails. Unfortunately, | ||
210 | this doesn't seem to be enough. Look at the following request flow. | ||
211 | QUEUE_ORDERED_TAG_FLUSH is in use. | ||
212 | |||
213 | [0] [1] [2] [3] [pre] [barrier] [post] < [4] [5] [6] ... > | ||
214 | still in elevator | ||
215 | |||
216 | Let's say request [2], [3] are write requests to update file system | ||
217 | metadata (journal or whatever) and [barrier] is used to mark that | ||
218 | those updates are valid. Consider the following sequence. | ||
219 | |||
220 | i. Requests [0] ~ [post] leaves the request queue and enters | ||
221 | low-level driver. | ||
222 | ii. After a while, unfortunately, something goes wrong and the | ||
223 | drive fails [2]. Note that any of [0], [1] and [3] could have | ||
224 | completed by this time, but [pre] couldn't have been finished | ||
225 | as the drive must process it in order and it failed before | ||
226 | processing that command. | ||
227 | iii. Error handling kicks in and determines that the error is | ||
228 | unrecoverable and fails [2], and resumes operation. | ||
229 | iv. [pre] [barrier] [post] gets processed. | ||
230 | v. *BOOM* power fails | ||
231 | |||
232 | The problem here is that the barrier request is *supposed* to indicate | ||
233 | that filesystem update requests [2] and [3] made it safely to the | ||
234 | physical medium and, if the machine crashes after the barrier is | ||
235 | written, filesystem recovery code can depend on that. Sadly, that | ||
236 | isn't true in this case anymore. IOW, the success of a I/O barrier | ||
237 | should also be dependent on success of some of the preceding requests, | ||
238 | where only upper layer (filesystem) knows what 'some' is. | ||
239 | |||
240 | This can be solved by implementing a way to tell the block layer which | ||
241 | requests affect the success of the following barrier request and | ||
242 | making lower lever drivers to resume operation on error only after | ||
243 | block layer tells it to do so. | ||
244 | |||
245 | As the probability of this happening is very low and the drive should | ||
246 | be faulty, implementing the fix is probably an overkill. But, still, | ||
247 | it's there. | ||
248 | |||
249 | * In previous drafts of barrier implementation, there was fallback | ||
250 | mechanism such that, if FUA or ordered TAG fails, less fancy ordered | ||
251 | mode can be selected and the failed barrier request is retried | ||
252 | automatically. The rationale for this feature was that as FUA is | ||
253 | pretty new in ATA world and ordered tag was never used widely, there | ||
254 | could be devices which report to support those features but choke when | ||
255 | actually given such requests. | ||
256 | |||
257 | This was removed for two reasons 1. it's an overkill 2. it's | ||
258 | impossible to implement properly when TAG ordering is used as low | ||
259 | level drivers resume after an error automatically. If it's ever | ||
260 | needed adding it back and modifying low level drivers accordingly | ||
261 | shouldn't be difficult. | ||
diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.txt new file mode 100644 index 000000000000..83407d36630a --- /dev/null +++ b/Documentation/block/writeback_cache_control.txt | |||
@@ -0,0 +1,86 @@ | |||
1 | |||
2 | Explicit volatile write back cache control | ||
3 | ===================================== | ||
4 | |||
5 | Introduction | ||
6 | ------------ | ||
7 | |||
8 | Many storage devices, especially in the consumer market, come with volatile | ||
9 | write back caches. That means the devices signal I/O completion to the | ||
10 | operating system before data actually has hit the non-volatile storage. This | ||
11 | behavior obviously speeds up various workloads, but it means the operating | ||
12 | system needs to force data out to the non-volatile storage when it performs | ||
13 | a data integrity operation like fsync, sync or an unmount. | ||
14 | |||
15 | The Linux block layer provides two simple mechanisms that let filesystems | ||
16 | control the caching behavior of the storage device. These mechanisms are | ||
17 | a forced cache flush, and the Force Unit Access (FUA) flag for requests. | ||
18 | |||
19 | |||
20 | Explicit cache flushes | ||
21 | ---------------------- | ||
22 | |||
23 | The REQ_FLUSH flag can be OR ed into the r/w flags of a bio submitted from | ||
24 | the filesystem and will make sure the volatile cache of the storage device | ||
25 | has been flushed before the actual I/O operation is started. This explicitly | ||
26 | guarantees that previously completed write requests are on non-volatile | ||
27 | storage before the flagged bio starts. In addition the REQ_FLUSH flag can be | ||
28 | set on an otherwise empty bio structure, which causes only an explicit cache | ||
29 | flush without any dependent I/O. It is recommend to use | ||
30 | the blkdev_issue_flush() helper for a pure cache flush. | ||
31 | |||
32 | |||
33 | Forced Unit Access | ||
34 | ----------------- | ||
35 | |||
36 | The REQ_FUA flag can be OR ed into the r/w flags of a bio submitted from the | ||
37 | filesystem and will make sure that I/O completion for this request is only | ||
38 | signaled after the data has been committed to non-volatile storage. | ||
39 | |||
40 | |||
41 | Implementation details for filesystems | ||
42 | -------------------------------------- | ||
43 | |||
44 | Filesystems can simply set the REQ_FLUSH and REQ_FUA bits and do not have to | ||
45 | worry if the underlying devices need any explicit cache flushing and how | ||
46 | the Forced Unit Access is implemented. The REQ_FLUSH and REQ_FUA flags | ||
47 | may both be set on a single bio. | ||
48 | |||
49 | |||
50 | Implementation details for make_request_fn based block drivers | ||
51 | -------------------------------------------------------------- | ||
52 | |||
53 | These drivers will always see the REQ_FLUSH and REQ_FUA bits as they sit | ||
54 | directly below the submit_bio interface. For remapping drivers the REQ_FUA | ||
55 | bits need to be propagated to underlying devices, and a global flush needs | ||
56 | to be implemented for bios with the REQ_FLUSH bit set. For real device | ||
57 | drivers that do not have a volatile cache the REQ_FLUSH and REQ_FUA bits | ||
58 | on non-empty bios can simply be ignored, and REQ_FLUSH requests without | ||
59 | data can be completed successfully without doing any work. Drivers for | ||
60 | devices with volatile caches need to implement the support for these | ||
61 | flags themselves without any help from the block layer. | ||
62 | |||
63 | |||
64 | Implementation details for request_fn based block drivers | ||
65 | -------------------------------------------------------------- | ||
66 | |||
67 | For devices that do not support volatile write caches there is no driver | ||
68 | support required, the block layer completes empty REQ_FLUSH requests before | ||
69 | entering the driver and strips off the REQ_FLUSH and REQ_FUA bits from | ||
70 | requests that have a payload. For devices with volatile write caches the | ||
71 | driver needs to tell the block layer that it supports flushing caches by | ||
72 | doing: | ||
73 | |||
74 | blk_queue_flush(sdkp->disk->queue, REQ_FLUSH); | ||
75 | |||
76 | and handle empty REQ_FLUSH requests in its prep_fn/request_fn. Note that | ||
77 | REQ_FLUSH requests with a payload are automatically turned into a sequence | ||
78 | of an empty REQ_FLUSH request followed by the actual write by the block | ||
79 | layer. For devices that also support the FUA bit the block layer needs | ||
80 | to be told to pass through the REQ_FUA bit using: | ||
81 | |||
82 | blk_queue_flush(sdkp->disk->queue, REQ_FLUSH | REQ_FUA); | ||
83 | |||
84 | and the driver must handle write requests that have the REQ_FUA bit set | ||
85 | in prep_fn/request_fn. If the FUA bit is not natively supported the block | ||
86 | layer turns it into an empty REQ_FLUSH request after the actual write. | ||
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 8a6a8c6d4980..dc73bc54cc4e 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -1640,15 +1640,6 @@ static void blk_request(struct virtqueue *vq) | |||
1640 | off = out->sector * 512; | 1640 | off = out->sector * 512; |
1641 | 1641 | ||
1642 | /* | 1642 | /* |
1643 | * The block device implements "barriers", where the Guest indicates | ||
1644 | * that it wants all previous writes to occur before this write. We | ||
1645 | * don't have a way of asking our kernel to do a barrier, so we just | ||
1646 | * synchronize all the data in the file. Pretty poor, no? | ||
1647 | */ | ||
1648 | if (out->type & VIRTIO_BLK_T_BARRIER) | ||
1649 | fdatasync(vblk->fd); | ||
1650 | |||
1651 | /* | ||
1652 | * In general the virtio block driver is allowed to try SCSI commands. | 1643 | * In general the virtio block driver is allowed to try SCSI commands. |
1653 | * It'd be nice if we supported eject, for example, but we don't. | 1644 | * It'd be nice if we supported eject, for example, but we don't. |
1654 | */ | 1645 | */ |
@@ -1680,6 +1671,13 @@ static void blk_request(struct virtqueue *vq) | |||
1680 | /* Die, bad Guest, die. */ | 1671 | /* Die, bad Guest, die. */ |
1681 | errx(1, "Write past end %llu+%u", off, ret); | 1672 | errx(1, "Write past end %llu+%u", off, ret); |
1682 | } | 1673 | } |
1674 | |||
1675 | wlen = sizeof(*in); | ||
1676 | *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); | ||
1677 | } else if (out->type & VIRTIO_BLK_T_FLUSH) { | ||
1678 | /* Flush */ | ||
1679 | ret = fdatasync(vblk->fd); | ||
1680 | verbose("FLUSH fdatasync: %i\n", ret); | ||
1683 | wlen = sizeof(*in); | 1681 | wlen = sizeof(*in); |
1684 | *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); | 1682 | *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); |
1685 | } else { | 1683 | } else { |
@@ -1703,15 +1701,6 @@ static void blk_request(struct virtqueue *vq) | |||
1703 | } | 1701 | } |
1704 | } | 1702 | } |
1705 | 1703 | ||
1706 | /* | ||
1707 | * OK, so we noted that it was pretty poor to use an fdatasync as a | ||
1708 | * barrier. But Christoph Hellwig points out that we need a sync | ||
1709 | * *afterwards* as well: "Barriers specify no reordering to the front | ||
1710 | * or the back." And Jens Axboe confirmed it, so here we are: | ||
1711 | */ | ||
1712 | if (out->type & VIRTIO_BLK_T_BARRIER) | ||
1713 | fdatasync(vblk->fd); | ||
1714 | |||
1715 | /* Finished that request. */ | 1704 | /* Finished that request. */ |
1716 | add_used(vq, head, wlen); | 1705 | add_used(vq, head, wlen); |
1717 | } | 1706 | } |
@@ -1736,8 +1725,8 @@ static void setup_block_file(const char *filename) | |||
1736 | vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); | 1725 | vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); |
1737 | vblk->len = lseek64(vblk->fd, 0, SEEK_END); | 1726 | vblk->len = lseek64(vblk->fd, 0, SEEK_END); |
1738 | 1727 | ||
1739 | /* We support barriers. */ | 1728 | /* We support FLUSH. */ |
1740 | add_feature(dev, VIRTIO_BLK_F_BARRIER); | 1729 | add_feature(dev, VIRTIO_BLK_F_FLUSH); |
1741 | 1730 | ||
1742 | /* Tell Guest how many sectors this device has. */ | 1731 | /* Tell Guest how many sectors this device has. */ |
1743 | conf.capacity = cpu_to_le64(vblk->len / 512); | 1732 | conf.capacity = cpu_to_le64(vblk->len / 512); |
diff --git a/block/Makefile b/block/Makefile index c850d5ef80a2..0fec4b3fab51 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -3,7 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | 5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
6 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ |
7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
8 | blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o | 8 | blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o |
9 | 9 | ||
diff --git a/block/blk-barrier.c b/block/blk-barrier.c deleted file mode 100644 index f0faefca032f..000000000000 --- a/block/blk-barrier.c +++ /dev/null | |||
@@ -1,350 +0,0 @@ | |||
1 | /* | ||
2 | * Functions related to barrier IO handling | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/bio.h> | ||
7 | #include <linux/blkdev.h> | ||
8 | #include <linux/gfp.h> | ||
9 | |||
10 | #include "blk.h" | ||
11 | |||
12 | /** | ||
13 | * blk_queue_ordered - does this queue support ordered writes | ||
14 | * @q: the request queue | ||
15 | * @ordered: one of QUEUE_ORDERED_* | ||
16 | * | ||
17 | * Description: | ||
18 | * For journalled file systems, doing ordered writes on a commit | ||
19 | * block instead of explicitly doing wait_on_buffer (which is bad | ||
20 | * for performance) can be a big win. Block drivers supporting this | ||
21 | * feature should call this function and indicate so. | ||
22 | * | ||
23 | **/ | ||
24 | int blk_queue_ordered(struct request_queue *q, unsigned ordered) | ||
25 | { | ||
26 | if (ordered != QUEUE_ORDERED_NONE && | ||
27 | ordered != QUEUE_ORDERED_DRAIN && | ||
28 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && | ||
29 | ordered != QUEUE_ORDERED_DRAIN_FUA && | ||
30 | ordered != QUEUE_ORDERED_TAG && | ||
31 | ordered != QUEUE_ORDERED_TAG_FLUSH && | ||
32 | ordered != QUEUE_ORDERED_TAG_FUA) { | ||
33 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); | ||
34 | return -EINVAL; | ||
35 | } | ||
36 | |||
37 | q->ordered = ordered; | ||
38 | q->next_ordered = ordered; | ||
39 | |||
40 | return 0; | ||
41 | } | ||
42 | EXPORT_SYMBOL(blk_queue_ordered); | ||
43 | |||
44 | /* | ||
45 | * Cache flushing for ordered writes handling | ||
46 | */ | ||
47 | unsigned blk_ordered_cur_seq(struct request_queue *q) | ||
48 | { | ||
49 | if (!q->ordseq) | ||
50 | return 0; | ||
51 | return 1 << ffz(q->ordseq); | ||
52 | } | ||
53 | |||
54 | unsigned blk_ordered_req_seq(struct request *rq) | ||
55 | { | ||
56 | struct request_queue *q = rq->q; | ||
57 | |||
58 | BUG_ON(q->ordseq == 0); | ||
59 | |||
60 | if (rq == &q->pre_flush_rq) | ||
61 | return QUEUE_ORDSEQ_PREFLUSH; | ||
62 | if (rq == &q->bar_rq) | ||
63 | return QUEUE_ORDSEQ_BAR; | ||
64 | if (rq == &q->post_flush_rq) | ||
65 | return QUEUE_ORDSEQ_POSTFLUSH; | ||
66 | |||
67 | /* | ||
68 | * !fs requests don't need to follow barrier ordering. Always | ||
69 | * put them at the front. This fixes the following deadlock. | ||
70 | * | ||
71 | * http://thread.gmane.org/gmane.linux.kernel/537473 | ||
72 | */ | ||
73 | if (rq->cmd_type != REQ_TYPE_FS) | ||
74 | return QUEUE_ORDSEQ_DRAIN; | ||
75 | |||
76 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == | ||
77 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) | ||
78 | return QUEUE_ORDSEQ_DRAIN; | ||
79 | else | ||
80 | return QUEUE_ORDSEQ_DONE; | ||
81 | } | ||
82 | |||
83 | bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) | ||
84 | { | ||
85 | struct request *rq; | ||
86 | |||
87 | if (error && !q->orderr) | ||
88 | q->orderr = error; | ||
89 | |||
90 | BUG_ON(q->ordseq & seq); | ||
91 | q->ordseq |= seq; | ||
92 | |||
93 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) | ||
94 | return false; | ||
95 | |||
96 | /* | ||
97 | * Okay, sequence complete. | ||
98 | */ | ||
99 | q->ordseq = 0; | ||
100 | rq = q->orig_bar_rq; | ||
101 | __blk_end_request_all(rq, q->orderr); | ||
102 | return true; | ||
103 | } | ||
104 | |||
105 | static void pre_flush_end_io(struct request *rq, int error) | ||
106 | { | ||
107 | elv_completed_request(rq->q, rq); | ||
108 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); | ||
109 | } | ||
110 | |||
111 | static void bar_end_io(struct request *rq, int error) | ||
112 | { | ||
113 | elv_completed_request(rq->q, rq); | ||
114 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); | ||
115 | } | ||
116 | |||
117 | static void post_flush_end_io(struct request *rq, int error) | ||
118 | { | ||
119 | elv_completed_request(rq->q, rq); | ||
120 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); | ||
121 | } | ||
122 | |||
123 | static void queue_flush(struct request_queue *q, unsigned which) | ||
124 | { | ||
125 | struct request *rq; | ||
126 | rq_end_io_fn *end_io; | ||
127 | |||
128 | if (which == QUEUE_ORDERED_DO_PREFLUSH) { | ||
129 | rq = &q->pre_flush_rq; | ||
130 | end_io = pre_flush_end_io; | ||
131 | } else { | ||
132 | rq = &q->post_flush_rq; | ||
133 | end_io = post_flush_end_io; | ||
134 | } | ||
135 | |||
136 | blk_rq_init(q, rq); | ||
137 | rq->cmd_type = REQ_TYPE_FS; | ||
138 | rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; | ||
139 | rq->rq_disk = q->orig_bar_rq->rq_disk; | ||
140 | rq->end_io = end_io; | ||
141 | |||
142 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | ||
143 | } | ||
144 | |||
145 | static inline bool start_ordered(struct request_queue *q, struct request **rqp) | ||
146 | { | ||
147 | struct request *rq = *rqp; | ||
148 | unsigned skip = 0; | ||
149 | |||
150 | q->orderr = 0; | ||
151 | q->ordered = q->next_ordered; | ||
152 | q->ordseq |= QUEUE_ORDSEQ_STARTED; | ||
153 | |||
154 | /* | ||
155 | * For an empty barrier, there's no actual BAR request, which | ||
156 | * in turn makes POSTFLUSH unnecessary. Mask them off. | ||
157 | */ | ||
158 | if (!blk_rq_sectors(rq)) { | ||
159 | q->ordered &= ~(QUEUE_ORDERED_DO_BAR | | ||
160 | QUEUE_ORDERED_DO_POSTFLUSH); | ||
161 | /* | ||
162 | * Empty barrier on a write-through device w/ ordered | ||
163 | * tag has no command to issue and without any command | ||
164 | * to issue, ordering by tag can't be used. Drain | ||
165 | * instead. | ||
166 | */ | ||
167 | if ((q->ordered & QUEUE_ORDERED_BY_TAG) && | ||
168 | !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { | ||
169 | q->ordered &= ~QUEUE_ORDERED_BY_TAG; | ||
170 | q->ordered |= QUEUE_ORDERED_BY_DRAIN; | ||
171 | } | ||
172 | } | ||
173 | |||
174 | /* stash away the original request */ | ||
175 | blk_dequeue_request(rq); | ||
176 | q->orig_bar_rq = rq; | ||
177 | rq = NULL; | ||
178 | |||
179 | /* | ||
180 | * Queue ordered sequence. As we stack them at the head, we | ||
181 | * need to queue in reverse order. Note that we rely on that | ||
182 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | ||
183 | * request gets inbetween ordered sequence. | ||
184 | */ | ||
185 | if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { | ||
186 | queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); | ||
187 | rq = &q->post_flush_rq; | ||
188 | } else | ||
189 | skip |= QUEUE_ORDSEQ_POSTFLUSH; | ||
190 | |||
191 | if (q->ordered & QUEUE_ORDERED_DO_BAR) { | ||
192 | rq = &q->bar_rq; | ||
193 | |||
194 | /* initialize proxy request and queue it */ | ||
195 | blk_rq_init(q, rq); | ||
196 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) | ||
197 | rq->cmd_flags |= REQ_WRITE; | ||
198 | if (q->ordered & QUEUE_ORDERED_DO_FUA) | ||
199 | rq->cmd_flags |= REQ_FUA; | ||
200 | init_request_from_bio(rq, q->orig_bar_rq->bio); | ||
201 | rq->end_io = bar_end_io; | ||
202 | |||
203 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | ||
204 | } else | ||
205 | skip |= QUEUE_ORDSEQ_BAR; | ||
206 | |||
207 | if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { | ||
208 | queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); | ||
209 | rq = &q->pre_flush_rq; | ||
210 | } else | ||
211 | skip |= QUEUE_ORDSEQ_PREFLUSH; | ||
212 | |||
213 | if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) | ||
214 | rq = NULL; | ||
215 | else | ||
216 | skip |= QUEUE_ORDSEQ_DRAIN; | ||
217 | |||
218 | *rqp = rq; | ||
219 | |||
220 | /* | ||
221 | * Complete skipped sequences. If whole sequence is complete, | ||
222 | * return false to tell elevator that this request is gone. | ||
223 | */ | ||
224 | return !blk_ordered_complete_seq(q, skip, 0); | ||
225 | } | ||
226 | |||
227 | bool blk_do_ordered(struct request_queue *q, struct request **rqp) | ||
228 | { | ||
229 | struct request *rq = *rqp; | ||
230 | const int is_barrier = rq->cmd_type == REQ_TYPE_FS && | ||
231 | (rq->cmd_flags & REQ_HARDBARRIER); | ||
232 | |||
233 | if (!q->ordseq) { | ||
234 | if (!is_barrier) | ||
235 | return true; | ||
236 | |||
237 | if (q->next_ordered != QUEUE_ORDERED_NONE) | ||
238 | return start_ordered(q, rqp); | ||
239 | else { | ||
240 | /* | ||
241 | * Queue ordering not supported. Terminate | ||
242 | * with prejudice. | ||
243 | */ | ||
244 | blk_dequeue_request(rq); | ||
245 | __blk_end_request_all(rq, -EOPNOTSUPP); | ||
246 | *rqp = NULL; | ||
247 | return false; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Ordered sequence in progress | ||
253 | */ | ||
254 | |||
255 | /* Special requests are not subject to ordering rules. */ | ||
256 | if (rq->cmd_type != REQ_TYPE_FS && | ||
257 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) | ||
258 | return true; | ||
259 | |||
260 | if (q->ordered & QUEUE_ORDERED_BY_TAG) { | ||
261 | /* Ordered by tag. Blocking the next barrier is enough. */ | ||
262 | if (is_barrier && rq != &q->bar_rq) | ||
263 | *rqp = NULL; | ||
264 | } else { | ||
265 | /* Ordered by draining. Wait for turn. */ | ||
266 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); | ||
267 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) | ||
268 | *rqp = NULL; | ||
269 | } | ||
270 | |||
271 | return true; | ||
272 | } | ||
273 | |||
274 | static void bio_end_empty_barrier(struct bio *bio, int err) | ||
275 | { | ||
276 | if (err) { | ||
277 | if (err == -EOPNOTSUPP) | ||
278 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
279 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
280 | } | ||
281 | if (bio->bi_private) | ||
282 | complete(bio->bi_private); | ||
283 | bio_put(bio); | ||
284 | } | ||
285 | |||
286 | /** | ||
287 | * blkdev_issue_flush - queue a flush | ||
288 | * @bdev: blockdev to issue flush for | ||
289 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
290 | * @error_sector: error sector | ||
291 | * @flags: BLKDEV_IFL_* flags to control behaviour | ||
292 | * | ||
293 | * Description: | ||
294 | * Issue a flush for the block device in question. Caller can supply | ||
295 | * room for storing the error offset in case of a flush error, if they | ||
296 | * wish to. If WAIT flag is not passed then caller may check only what | ||
297 | * request was pushed in some internal queue for later handling. | ||
298 | */ | ||
299 | int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, | ||
300 | sector_t *error_sector, unsigned long flags) | ||
301 | { | ||
302 | DECLARE_COMPLETION_ONSTACK(wait); | ||
303 | struct request_queue *q; | ||
304 | struct bio *bio; | ||
305 | int ret = 0; | ||
306 | |||
307 | if (bdev->bd_disk == NULL) | ||
308 | return -ENXIO; | ||
309 | |||
310 | q = bdev_get_queue(bdev); | ||
311 | if (!q) | ||
312 | return -ENXIO; | ||
313 | |||
314 | /* | ||
315 | * some block devices may not have their queue correctly set up here | ||
316 | * (e.g. loop device without a backing file) and so issuing a flush | ||
317 | * here will panic. Ensure there is a request function before issuing | ||
318 | * the barrier. | ||
319 | */ | ||
320 | if (!q->make_request_fn) | ||
321 | return -ENXIO; | ||
322 | |||
323 | bio = bio_alloc(gfp_mask, 0); | ||
324 | bio->bi_end_io = bio_end_empty_barrier; | ||
325 | bio->bi_bdev = bdev; | ||
326 | if (test_bit(BLKDEV_WAIT, &flags)) | ||
327 | bio->bi_private = &wait; | ||
328 | |||
329 | bio_get(bio); | ||
330 | submit_bio(WRITE_BARRIER, bio); | ||
331 | if (test_bit(BLKDEV_WAIT, &flags)) { | ||
332 | wait_for_completion(&wait); | ||
333 | /* | ||
334 | * The driver must store the error location in ->bi_sector, if | ||
335 | * it supports it. For non-stacked drivers, this should be | ||
336 | * copied from blk_rq_pos(rq). | ||
337 | */ | ||
338 | if (error_sector) | ||
339 | *error_sector = bio->bi_sector; | ||
340 | } | ||
341 | |||
342 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
343 | ret = -EOPNOTSUPP; | ||
344 | else if (!bio_flagged(bio, BIO_UPTODATE)) | ||
345 | ret = -EIO; | ||
346 | |||
347 | bio_put(bio); | ||
348 | return ret; | ||
349 | } | ||
350 | EXPORT_SYMBOL(blkdev_issue_flush); | ||
diff --git a/block/blk-core.c b/block/blk-core.c index 500eb859886e..45141469e89e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -139,7 +139,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, | |||
139 | { | 139 | { |
140 | struct request_queue *q = rq->q; | 140 | struct request_queue *q = rq->q; |
141 | 141 | ||
142 | if (&q->bar_rq != rq) { | 142 | if (&q->flush_rq != rq) { |
143 | if (error) | 143 | if (error) |
144 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 144 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
145 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 145 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
@@ -163,13 +163,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio, | |||
163 | if (bio->bi_size == 0) | 163 | if (bio->bi_size == 0) |
164 | bio_endio(bio, error); | 164 | bio_endio(bio, error); |
165 | } else { | 165 | } else { |
166 | |||
167 | /* | 166 | /* |
168 | * Okay, this is the barrier request in progress, just | 167 | * Okay, this is the sequenced flush request in |
169 | * record the error; | 168 | * progress, just record the error; |
170 | */ | 169 | */ |
171 | if (error && !q->orderr) | 170 | if (error && !q->flush_err) |
172 | q->orderr = error; | 171 | q->flush_err = error; |
173 | } | 172 | } |
174 | } | 173 | } |
175 | 174 | ||
@@ -531,6 +530,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
531 | init_timer(&q->unplug_timer); | 530 | init_timer(&q->unplug_timer); |
532 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | 531 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
533 | INIT_LIST_HEAD(&q->timeout_list); | 532 | INIT_LIST_HEAD(&q->timeout_list); |
533 | INIT_LIST_HEAD(&q->pending_flushes); | ||
534 | INIT_WORK(&q->unplug_work, blk_unplug_work); | 534 | INIT_WORK(&q->unplug_work, blk_unplug_work); |
535 | 535 | ||
536 | kobject_init(&q->kobj, &blk_queue_ktype); | 536 | kobject_init(&q->kobj, &blk_queue_ktype); |
@@ -1053,22 +1053,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq, | |||
1053 | } | 1053 | } |
1054 | EXPORT_SYMBOL(blk_insert_request); | 1054 | EXPORT_SYMBOL(blk_insert_request); |
1055 | 1055 | ||
1056 | /* | ||
1057 | * add-request adds a request to the linked list. | ||
1058 | * queue lock is held and interrupts disabled, as we muck with the | ||
1059 | * request queue list. | ||
1060 | */ | ||
1061 | static inline void add_request(struct request_queue *q, struct request *req) | ||
1062 | { | ||
1063 | drive_stat_acct(req, 1); | ||
1064 | |||
1065 | /* | ||
1066 | * elevator indicated where it wants this request to be | ||
1067 | * inserted at elevator_merge time | ||
1068 | */ | ||
1069 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); | ||
1070 | } | ||
1071 | |||
1072 | static void part_round_stats_single(int cpu, struct hd_struct *part, | 1056 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
1073 | unsigned long now) | 1057 | unsigned long now) |
1074 | { | 1058 | { |
@@ -1217,13 +1201,16 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1217 | const bool sync = !!(bio->bi_rw & REQ_SYNC); | 1201 | const bool sync = !!(bio->bi_rw & REQ_SYNC); |
1218 | const bool unplug = !!(bio->bi_rw & REQ_UNPLUG); | 1202 | const bool unplug = !!(bio->bi_rw & REQ_UNPLUG); |
1219 | const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK; | 1203 | const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK; |
1204 | int where = ELEVATOR_INSERT_SORT; | ||
1220 | int rw_flags; | 1205 | int rw_flags; |
1221 | 1206 | ||
1222 | if ((bio->bi_rw & REQ_HARDBARRIER) && | 1207 | /* REQ_HARDBARRIER is no more */ |
1223 | (q->next_ordered == QUEUE_ORDERED_NONE)) { | 1208 | if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER, |
1209 | "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) { | ||
1224 | bio_endio(bio, -EOPNOTSUPP); | 1210 | bio_endio(bio, -EOPNOTSUPP); |
1225 | return 0; | 1211 | return 0; |
1226 | } | 1212 | } |
1213 | |||
1227 | /* | 1214 | /* |
1228 | * low level driver can indicate that it wants pages above a | 1215 | * low level driver can indicate that it wants pages above a |
1229 | * certain limit bounced to low memory (ie for highmem, or even | 1216 | * certain limit bounced to low memory (ie for highmem, or even |
@@ -1233,7 +1220,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1233 | 1220 | ||
1234 | spin_lock_irq(q->queue_lock); | 1221 | spin_lock_irq(q->queue_lock); |
1235 | 1222 | ||
1236 | if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) | 1223 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { |
1224 | where = ELEVATOR_INSERT_FRONT; | ||
1225 | goto get_rq; | ||
1226 | } | ||
1227 | |||
1228 | if (elv_queue_empty(q)) | ||
1237 | goto get_rq; | 1229 | goto get_rq; |
1238 | 1230 | ||
1239 | el_ret = elv_merge(q, &req, bio); | 1231 | el_ret = elv_merge(q, &req, bio); |
@@ -1330,7 +1322,10 @@ get_rq: | |||
1330 | req->cpu = blk_cpu_to_group(smp_processor_id()); | 1322 | req->cpu = blk_cpu_to_group(smp_processor_id()); |
1331 | if (queue_should_plug(q) && elv_queue_empty(q)) | 1323 | if (queue_should_plug(q) && elv_queue_empty(q)) |
1332 | blk_plug_device(q); | 1324 | blk_plug_device(q); |
1333 | add_request(q, req); | 1325 | |
1326 | /* insert the request into the elevator */ | ||
1327 | drive_stat_acct(req, 1); | ||
1328 | __elv_add_request(q, req, where, 0); | ||
1334 | out: | 1329 | out: |
1335 | if (unplug || !queue_should_plug(q)) | 1330 | if (unplug || !queue_should_plug(q)) |
1336 | __generic_unplug_device(q); | 1331 | __generic_unplug_device(q); |
@@ -1530,6 +1525,19 @@ static inline void __generic_make_request(struct bio *bio) | |||
1530 | if (bio_check_eod(bio, nr_sectors)) | 1525 | if (bio_check_eod(bio, nr_sectors)) |
1531 | goto end_io; | 1526 | goto end_io; |
1532 | 1527 | ||
1528 | /* | ||
1529 | * Filter flush bio's early so that make_request based | ||
1530 | * drivers without flush support don't have to worry | ||
1531 | * about them. | ||
1532 | */ | ||
1533 | if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { | ||
1534 | bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); | ||
1535 | if (!nr_sectors) { | ||
1536 | err = 0; | ||
1537 | goto end_io; | ||
1538 | } | ||
1539 | } | ||
1540 | |||
1533 | if ((bio->bi_rw & REQ_DISCARD) && | 1541 | if ((bio->bi_rw & REQ_DISCARD) && |
1534 | (!blk_queue_discard(q) || | 1542 | (!blk_queue_discard(q) || |
1535 | ((bio->bi_rw & REQ_SECURE) && | 1543 | ((bio->bi_rw & REQ_SECURE) && |
@@ -1794,11 +1802,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) | |||
1794 | static void blk_account_io_done(struct request *req) | 1802 | static void blk_account_io_done(struct request *req) |
1795 | { | 1803 | { |
1796 | /* | 1804 | /* |
1797 | * Account IO completion. bar_rq isn't accounted as a normal | 1805 | * Account IO completion. flush_rq isn't accounted as a |
1798 | * IO on queueing nor completion. Accounting the containing | 1806 | * normal IO on queueing nor completion. Accounting the |
1799 | * request is enough. | 1807 | * containing request is enough. |
1800 | */ | 1808 | */ |
1801 | if (blk_do_io_stat(req) && req != &req->q->bar_rq) { | 1809 | if (blk_do_io_stat(req) && req != &req->q->flush_rq) { |
1802 | unsigned long duration = jiffies - req->start_time; | 1810 | unsigned long duration = jiffies - req->start_time; |
1803 | const int rw = rq_data_dir(req); | 1811 | const int rw = rq_data_dir(req); |
1804 | struct hd_struct *part; | 1812 | struct hd_struct *part; |
@@ -2523,9 +2531,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); | |||
2523 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) | 2531 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) |
2524 | { | 2532 | { |
2525 | dst->cpu = src->cpu; | 2533 | dst->cpu = src->cpu; |
2526 | dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); | 2534 | dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; |
2527 | if (src->cmd_flags & REQ_DISCARD) | ||
2528 | dst->cmd_flags |= REQ_DISCARD; | ||
2529 | dst->cmd_type = src->cmd_type; | 2535 | dst->cmd_type = src->cmd_type; |
2530 | dst->__sector = blk_rq_pos(src); | 2536 | dst->__sector = blk_rq_pos(src); |
2531 | dst->__data_len = blk_rq_bytes(src); | 2537 | dst->__data_len = blk_rq_bytes(src); |
diff --git a/block/blk-flush.c b/block/blk-flush.c new file mode 100644 index 000000000000..54b123d6563e --- /dev/null +++ b/block/blk-flush.c | |||
@@ -0,0 +1,262 @@ | |||
1 | /* | ||
2 | * Functions to sequence FLUSH and FUA writes. | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/bio.h> | ||
7 | #include <linux/blkdev.h> | ||
8 | #include <linux/gfp.h> | ||
9 | |||
10 | #include "blk.h" | ||
11 | |||
12 | /* FLUSH/FUA sequences */ | ||
13 | enum { | ||
14 | QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ | ||
15 | QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ | ||
16 | QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ | ||
17 | QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ | ||
18 | QUEUE_FSEQ_DONE = (1 << 4), | ||
19 | }; | ||
20 | |||
21 | static struct request *queue_next_fseq(struct request_queue *q); | ||
22 | |||
23 | unsigned blk_flush_cur_seq(struct request_queue *q) | ||
24 | { | ||
25 | if (!q->flush_seq) | ||
26 | return 0; | ||
27 | return 1 << ffz(q->flush_seq); | ||
28 | } | ||
29 | |||
30 | static struct request *blk_flush_complete_seq(struct request_queue *q, | ||
31 | unsigned seq, int error) | ||
32 | { | ||
33 | struct request *next_rq = NULL; | ||
34 | |||
35 | if (error && !q->flush_err) | ||
36 | q->flush_err = error; | ||
37 | |||
38 | BUG_ON(q->flush_seq & seq); | ||
39 | q->flush_seq |= seq; | ||
40 | |||
41 | if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { | ||
42 | /* not complete yet, queue the next flush sequence */ | ||
43 | next_rq = queue_next_fseq(q); | ||
44 | } else { | ||
45 | /* complete this flush request */ | ||
46 | __blk_end_request_all(q->orig_flush_rq, q->flush_err); | ||
47 | q->orig_flush_rq = NULL; | ||
48 | q->flush_seq = 0; | ||
49 | |||
50 | /* dispatch the next flush if there's one */ | ||
51 | if (!list_empty(&q->pending_flushes)) { | ||
52 | next_rq = list_entry_rq(q->pending_flushes.next); | ||
53 | list_move(&next_rq->queuelist, &q->queue_head); | ||
54 | } | ||
55 | } | ||
56 | return next_rq; | ||
57 | } | ||
58 | |||
59 | static void blk_flush_complete_seq_end_io(struct request_queue *q, | ||
60 | unsigned seq, int error) | ||
61 | { | ||
62 | bool was_empty = elv_queue_empty(q); | ||
63 | struct request *next_rq; | ||
64 | |||
65 | next_rq = blk_flush_complete_seq(q, seq, error); | ||
66 | |||
67 | /* | ||
68 | * Moving a request silently to empty queue_head may stall the | ||
69 | * queue. Kick the queue in those cases. | ||
70 | */ | ||
71 | if (was_empty && next_rq) | ||
72 | __blk_run_queue(q); | ||
73 | } | ||
74 | |||
75 | static void pre_flush_end_io(struct request *rq, int error) | ||
76 | { | ||
77 | elv_completed_request(rq->q, rq); | ||
78 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); | ||
79 | } | ||
80 | |||
81 | static void flush_data_end_io(struct request *rq, int error) | ||
82 | { | ||
83 | elv_completed_request(rq->q, rq); | ||
84 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); | ||
85 | } | ||
86 | |||
87 | static void post_flush_end_io(struct request *rq, int error) | ||
88 | { | ||
89 | elv_completed_request(rq->q, rq); | ||
90 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); | ||
91 | } | ||
92 | |||
93 | static void init_flush_request(struct request *rq, struct gendisk *disk) | ||
94 | { | ||
95 | rq->cmd_type = REQ_TYPE_FS; | ||
96 | rq->cmd_flags = WRITE_FLUSH; | ||
97 | rq->rq_disk = disk; | ||
98 | } | ||
99 | |||
100 | static struct request *queue_next_fseq(struct request_queue *q) | ||
101 | { | ||
102 | struct request *orig_rq = q->orig_flush_rq; | ||
103 | struct request *rq = &q->flush_rq; | ||
104 | |||
105 | blk_rq_init(q, rq); | ||
106 | |||
107 | switch (blk_flush_cur_seq(q)) { | ||
108 | case QUEUE_FSEQ_PREFLUSH: | ||
109 | init_flush_request(rq, orig_rq->rq_disk); | ||
110 | rq->end_io = pre_flush_end_io; | ||
111 | break; | ||
112 | case QUEUE_FSEQ_DATA: | ||
113 | init_request_from_bio(rq, orig_rq->bio); | ||
114 | /* | ||
115 | * orig_rq->rq_disk may be different from | ||
116 | * bio->bi_bdev->bd_disk if orig_rq got here through | ||
117 | * remapping drivers. Make sure rq->rq_disk points | ||
118 | * to the same one as orig_rq. | ||
119 | */ | ||
120 | rq->rq_disk = orig_rq->rq_disk; | ||
121 | rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); | ||
122 | rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); | ||
123 | rq->end_io = flush_data_end_io; | ||
124 | break; | ||
125 | case QUEUE_FSEQ_POSTFLUSH: | ||
126 | init_flush_request(rq, orig_rq->rq_disk); | ||
127 | rq->end_io = post_flush_end_io; | ||
128 | break; | ||
129 | default: | ||
130 | BUG(); | ||
131 | } | ||
132 | |||
133 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | ||
134 | return rq; | ||
135 | } | ||
136 | |||
137 | struct request *blk_do_flush(struct request_queue *q, struct request *rq) | ||
138 | { | ||
139 | unsigned int fflags = q->flush_flags; /* may change, cache it */ | ||
140 | bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; | ||
141 | bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); | ||
142 | bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); | ||
143 | unsigned skip = 0; | ||
144 | |||
145 | /* | ||
146 | * Special case. If there's data but flush is not necessary, | ||
147 | * the request can be issued directly. | ||
148 | * | ||
149 | * Flush w/o data should be able to be issued directly too but | ||
150 | * currently some drivers assume that rq->bio contains | ||
151 | * non-zero data if it isn't NULL and empty FLUSH requests | ||
152 | * getting here usually have bio's without data. | ||
153 | */ | ||
154 | if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { | ||
155 | rq->cmd_flags &= ~REQ_FLUSH; | ||
156 | if (!has_fua) | ||
157 | rq->cmd_flags &= ~REQ_FUA; | ||
158 | return rq; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * Sequenced flushes can't be processed in parallel. If | ||
163 | * another one is already in progress, queue for later | ||
164 | * processing. | ||
165 | */ | ||
166 | if (q->flush_seq) { | ||
167 | list_move_tail(&rq->queuelist, &q->pending_flushes); | ||
168 | return NULL; | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Start a new flush sequence | ||
173 | */ | ||
174 | q->flush_err = 0; | ||
175 | q->flush_seq |= QUEUE_FSEQ_STARTED; | ||
176 | |||
177 | /* adjust FLUSH/FUA of the original request and stash it away */ | ||
178 | rq->cmd_flags &= ~REQ_FLUSH; | ||
179 | if (!has_fua) | ||
180 | rq->cmd_flags &= ~REQ_FUA; | ||
181 | blk_dequeue_request(rq); | ||
182 | q->orig_flush_rq = rq; | ||
183 | |||
184 | /* skip unneded sequences and return the first one */ | ||
185 | if (!do_preflush) | ||
186 | skip |= QUEUE_FSEQ_PREFLUSH; | ||
187 | if (!blk_rq_sectors(rq)) | ||
188 | skip |= QUEUE_FSEQ_DATA; | ||
189 | if (!do_postflush) | ||
190 | skip |= QUEUE_FSEQ_POSTFLUSH; | ||
191 | return blk_flush_complete_seq(q, skip, 0); | ||
192 | } | ||
193 | |||
194 | static void bio_end_flush(struct bio *bio, int err) | ||
195 | { | ||
196 | if (err) | ||
197 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
198 | if (bio->bi_private) | ||
199 | complete(bio->bi_private); | ||
200 | bio_put(bio); | ||
201 | } | ||
202 | |||
203 | /** | ||
204 | * blkdev_issue_flush - queue a flush | ||
205 | * @bdev: blockdev to issue flush for | ||
206 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
207 | * @error_sector: error sector | ||
208 | * | ||
209 | * Description: | ||
210 | * Issue a flush for the block device in question. Caller can supply | ||
211 | * room for storing the error offset in case of a flush error, if they | ||
212 | * wish to. If WAIT flag is not passed then caller may check only what | ||
213 | * request was pushed in some internal queue for later handling. | ||
214 | */ | ||
215 | int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, | ||
216 | sector_t *error_sector) | ||
217 | { | ||
218 | DECLARE_COMPLETION_ONSTACK(wait); | ||
219 | struct request_queue *q; | ||
220 | struct bio *bio; | ||
221 | int ret = 0; | ||
222 | |||
223 | if (bdev->bd_disk == NULL) | ||
224 | return -ENXIO; | ||
225 | |||
226 | q = bdev_get_queue(bdev); | ||
227 | if (!q) | ||
228 | return -ENXIO; | ||
229 | |||
230 | /* | ||
231 | * some block devices may not have their queue correctly set up here | ||
232 | * (e.g. loop device without a backing file) and so issuing a flush | ||
233 | * here will panic. Ensure there is a request function before issuing | ||
234 | * the flush. | ||
235 | */ | ||
236 | if (!q->make_request_fn) | ||
237 | return -ENXIO; | ||
238 | |||
239 | bio = bio_alloc(gfp_mask, 0); | ||
240 | bio->bi_end_io = bio_end_flush; | ||
241 | bio->bi_bdev = bdev; | ||
242 | bio->bi_private = &wait; | ||
243 | |||
244 | bio_get(bio); | ||
245 | submit_bio(WRITE_FLUSH, bio); | ||
246 | wait_for_completion(&wait); | ||
247 | |||
248 | /* | ||
249 | * The driver must store the error location in ->bi_sector, if | ||
250 | * it supports it. For non-stacked drivers, this should be | ||
251 | * copied from blk_rq_pos(rq). | ||
252 | */ | ||
253 | if (error_sector) | ||
254 | *error_sector = bio->bi_sector; | ||
255 | |||
256 | if (!bio_flagged(bio, BIO_UPTODATE)) | ||
257 | ret = -EIO; | ||
258 | |||
259 | bio_put(bio); | ||
260 | return ret; | ||
261 | } | ||
262 | EXPORT_SYMBOL(blkdev_issue_flush); | ||
diff --git a/block/blk-lib.c b/block/blk-lib.c index c392029a104e..1a320d2406b0 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
39 | { | 39 | { |
40 | DECLARE_COMPLETION_ONSTACK(wait); | 40 | DECLARE_COMPLETION_ONSTACK(wait); |
41 | struct request_queue *q = bdev_get_queue(bdev); | 41 | struct request_queue *q = bdev_get_queue(bdev); |
42 | int type = flags & BLKDEV_IFL_BARRIER ? | 42 | int type = REQ_WRITE | REQ_DISCARD; |
43 | DISCARD_BARRIER : DISCARD_NOBARRIER; | ||
44 | unsigned int max_discard_sectors; | 43 | unsigned int max_discard_sectors; |
45 | struct bio *bio; | 44 | struct bio *bio; |
46 | int ret = 0; | 45 | int ret = 0; |
@@ -62,10 +61,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
62 | max_discard_sectors &= ~(disc_sects - 1); | 61 | max_discard_sectors &= ~(disc_sects - 1); |
63 | } | 62 | } |
64 | 63 | ||
65 | if (flags & BLKDEV_IFL_SECURE) { | 64 | if (flags & BLKDEV_DISCARD_SECURE) { |
66 | if (!blk_queue_secdiscard(q)) | 65 | if (!blk_queue_secdiscard(q)) |
67 | return -EOPNOTSUPP; | 66 | return -EOPNOTSUPP; |
68 | type |= DISCARD_SECURE; | 67 | type |= REQ_SECURE; |
69 | } | 68 | } |
70 | 69 | ||
71 | while (nr_sects && !ret) { | 70 | while (nr_sects && !ret) { |
@@ -78,8 +77,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
78 | bio->bi_sector = sector; | 77 | bio->bi_sector = sector; |
79 | bio->bi_end_io = blkdev_discard_end_io; | 78 | bio->bi_end_io = blkdev_discard_end_io; |
80 | bio->bi_bdev = bdev; | 79 | bio->bi_bdev = bdev; |
81 | if (flags & BLKDEV_IFL_WAIT) | 80 | bio->bi_private = &wait; |
82 | bio->bi_private = &wait; | ||
83 | 81 | ||
84 | if (nr_sects > max_discard_sectors) { | 82 | if (nr_sects > max_discard_sectors) { |
85 | bio->bi_size = max_discard_sectors << 9; | 83 | bio->bi_size = max_discard_sectors << 9; |
@@ -93,8 +91,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
93 | bio_get(bio); | 91 | bio_get(bio); |
94 | submit_bio(type, bio); | 92 | submit_bio(type, bio); |
95 | 93 | ||
96 | if (flags & BLKDEV_IFL_WAIT) | 94 | wait_for_completion(&wait); |
97 | wait_for_completion(&wait); | ||
98 | 95 | ||
99 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 96 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
100 | ret = -EOPNOTSUPP; | 97 | ret = -EOPNOTSUPP; |
@@ -140,7 +137,6 @@ static void bio_batch_end_io(struct bio *bio, int err) | |||
140 | * @sector: start sector | 137 | * @sector: start sector |
141 | * @nr_sects: number of sectors to write | 138 | * @nr_sects: number of sectors to write |
142 | * @gfp_mask: memory allocation flags (for bio_alloc) | 139 | * @gfp_mask: memory allocation flags (for bio_alloc) |
143 | * @flags: BLKDEV_IFL_* flags to control behaviour | ||
144 | * | 140 | * |
145 | * Description: | 141 | * Description: |
146 | * Generate and issue number of bios with zerofiled pages. | 142 | * Generate and issue number of bios with zerofiled pages. |
@@ -149,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err) | |||
149 | */ | 145 | */ |
150 | 146 | ||
151 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 147 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
152 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) | 148 | sector_t nr_sects, gfp_t gfp_mask) |
153 | { | 149 | { |
154 | int ret; | 150 | int ret; |
155 | struct bio *bio; | 151 | struct bio *bio; |
@@ -162,12 +158,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | |||
162 | bb.wait = &wait; | 158 | bb.wait = &wait; |
163 | bb.end_io = NULL; | 159 | bb.end_io = NULL; |
164 | 160 | ||
165 | if (flags & BLKDEV_IFL_BARRIER) { | ||
166 | /* issue async barrier before the data */ | ||
167 | ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0); | ||
168 | if (ret) | ||
169 | return ret; | ||
170 | } | ||
171 | submit: | 161 | submit: |
172 | ret = 0; | 162 | ret = 0; |
173 | while (nr_sects != 0) { | 163 | while (nr_sects != 0) { |
@@ -181,8 +171,7 @@ submit: | |||
181 | bio->bi_sector = sector; | 171 | bio->bi_sector = sector; |
182 | bio->bi_bdev = bdev; | 172 | bio->bi_bdev = bdev; |
183 | bio->bi_end_io = bio_batch_end_io; | 173 | bio->bi_end_io = bio_batch_end_io; |
184 | if (flags & BLKDEV_IFL_WAIT) | 174 | bio->bi_private = &bb; |
185 | bio->bi_private = &bb; | ||
186 | 175 | ||
187 | while (nr_sects != 0) { | 176 | while (nr_sects != 0) { |
188 | sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); | 177 | sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); |
@@ -199,18 +188,10 @@ submit: | |||
199 | issued++; | 188 | issued++; |
200 | submit_bio(WRITE, bio); | 189 | submit_bio(WRITE, bio); |
201 | } | 190 | } |
202 | /* | ||
203 | * When all data bios are in flight. Send final barrier if requeted. | ||
204 | */ | ||
205 | if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER) | ||
206 | ret = blkdev_issue_flush(bdev, gfp_mask, NULL, | ||
207 | flags & BLKDEV_IFL_WAIT); | ||
208 | |||
209 | 191 | ||
210 | if (flags & BLKDEV_IFL_WAIT) | 192 | /* Wait for bios in-flight */ |
211 | /* Wait for bios in-flight */ | 193 | while (issued != atomic_read(&bb.done)) |
212 | while ( issued != atomic_read(&bb.done)) | 194 | wait_for_completion(&wait); |
213 | wait_for_completion(&wait); | ||
214 | 195 | ||
215 | if (!test_bit(BIO_UPTODATE, &bb.flags)) | 196 | if (!test_bit(BIO_UPTODATE, &bb.flags)) |
216 | /* One of bios in the batch was completed with error.*/ | 197 | /* One of bios in the batch was completed with error.*/ |
diff --git a/block/blk-settings.c b/block/blk-settings.c index 315b88c8cbbb..701859fb9647 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -792,6 +792,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) | |||
792 | } | 792 | } |
793 | EXPORT_SYMBOL(blk_queue_update_dma_alignment); | 793 | EXPORT_SYMBOL(blk_queue_update_dma_alignment); |
794 | 794 | ||
795 | /** | ||
796 | * blk_queue_flush - configure queue's cache flush capability | ||
797 | * @q: the request queue for the device | ||
798 | * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA | ||
799 | * | ||
800 | * Tell block layer cache flush capability of @q. If it supports | ||
801 | * flushing, REQ_FLUSH should be set. If it supports bypassing | ||
802 | * write cache for individual writes, REQ_FUA should be set. | ||
803 | */ | ||
804 | void blk_queue_flush(struct request_queue *q, unsigned int flush) | ||
805 | { | ||
806 | WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); | ||
807 | |||
808 | if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) | ||
809 | flush &= ~REQ_FUA; | ||
810 | |||
811 | q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); | ||
812 | } | ||
813 | EXPORT_SYMBOL_GPL(blk_queue_flush); | ||
814 | |||
795 | static int __init blk_settings_init(void) | 815 | static int __init blk_settings_init(void) |
796 | { | 816 | { |
797 | blk_max_low_pfn = max_low_pfn - 1; | 817 | blk_max_low_pfn = max_low_pfn - 1; |
diff --git a/block/blk.h b/block/blk.h index f864012ec300..1e675e5ade02 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq) | |||
51 | */ | 51 | */ |
52 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) | 52 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) |
53 | 53 | ||
54 | struct request *blk_do_flush(struct request_queue *q, struct request *rq); | ||
55 | |||
54 | static inline struct request *__elv_next_request(struct request_queue *q) | 56 | static inline struct request *__elv_next_request(struct request_queue *q) |
55 | { | 57 | { |
56 | struct request *rq; | 58 | struct request *rq; |
@@ -58,7 +60,11 @@ static inline struct request *__elv_next_request(struct request_queue *q) | |||
58 | while (1) { | 60 | while (1) { |
59 | while (!list_empty(&q->queue_head)) { | 61 | while (!list_empty(&q->queue_head)) { |
60 | rq = list_entry_rq(q->queue_head.next); | 62 | rq = list_entry_rq(q->queue_head.next); |
61 | if (blk_do_ordered(q, &rq)) | 63 | if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || |
64 | rq == &q->flush_rq) | ||
65 | return rq; | ||
66 | rq = blk_do_flush(q, rq); | ||
67 | if (rq) | ||
62 | return rq; | 68 | return rq; |
63 | } | 69 | } |
64 | 70 | ||
diff --git a/block/elevator.c b/block/elevator.c index 4e11559aa2b0..282e8308f7e2 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q) | |||
617 | 617 | ||
618 | void elv_insert(struct request_queue *q, struct request *rq, int where) | 618 | void elv_insert(struct request_queue *q, struct request *rq, int where) |
619 | { | 619 | { |
620 | struct list_head *pos; | ||
621 | unsigned ordseq; | ||
622 | int unplug_it = 1; | 620 | int unplug_it = 1; |
623 | 621 | ||
624 | trace_block_rq_insert(q, rq); | 622 | trace_block_rq_insert(q, rq); |
@@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) | |||
626 | rq->q = q; | 624 | rq->q = q; |
627 | 625 | ||
628 | switch (where) { | 626 | switch (where) { |
627 | case ELEVATOR_INSERT_REQUEUE: | ||
628 | /* | ||
629 | * Most requeues happen because of a busy condition, | ||
630 | * don't force unplug of the queue for that case. | ||
631 | * Clear unplug_it and fall through. | ||
632 | */ | ||
633 | unplug_it = 0; | ||
634 | |||
629 | case ELEVATOR_INSERT_FRONT: | 635 | case ELEVATOR_INSERT_FRONT: |
630 | rq->cmd_flags |= REQ_SOFTBARRIER; | 636 | rq->cmd_flags |= REQ_SOFTBARRIER; |
631 | |||
632 | list_add(&rq->queuelist, &q->queue_head); | 637 | list_add(&rq->queuelist, &q->queue_head); |
633 | break; | 638 | break; |
634 | 639 | ||
@@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) | |||
668 | q->elevator->ops->elevator_add_req_fn(q, rq); | 673 | q->elevator->ops->elevator_add_req_fn(q, rq); |
669 | break; | 674 | break; |
670 | 675 | ||
671 | case ELEVATOR_INSERT_REQUEUE: | ||
672 | /* | ||
673 | * If ordered flush isn't in progress, we do front | ||
674 | * insertion; otherwise, requests should be requeued | ||
675 | * in ordseq order. | ||
676 | */ | ||
677 | rq->cmd_flags |= REQ_SOFTBARRIER; | ||
678 | |||
679 | /* | ||
680 | * Most requeues happen because of a busy condition, | ||
681 | * don't force unplug of the queue for that case. | ||
682 | */ | ||
683 | unplug_it = 0; | ||
684 | |||
685 | if (q->ordseq == 0) { | ||
686 | list_add(&rq->queuelist, &q->queue_head); | ||
687 | break; | ||
688 | } | ||
689 | |||
690 | ordseq = blk_ordered_req_seq(rq); | ||
691 | |||
692 | list_for_each(pos, &q->queue_head) { | ||
693 | struct request *pos_rq = list_entry_rq(pos); | ||
694 | if (ordseq <= blk_ordered_req_seq(pos_rq)) | ||
695 | break; | ||
696 | } | ||
697 | |||
698 | list_add_tail(&rq->queuelist, pos); | ||
699 | break; | ||
700 | |||
701 | default: | 676 | default: |
702 | printk(KERN_ERR "%s: bad insertion point %d\n", | 677 | printk(KERN_ERR "%s: bad insertion point %d\n", |
703 | __func__, where); | 678 | __func__, where); |
@@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) | |||
716 | void __elv_add_request(struct request_queue *q, struct request *rq, int where, | 691 | void __elv_add_request(struct request_queue *q, struct request *rq, int where, |
717 | int plug) | 692 | int plug) |
718 | { | 693 | { |
719 | if (q->ordcolor) | ||
720 | rq->cmd_flags |= REQ_ORDERED_COLOR; | ||
721 | |||
722 | if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { | 694 | if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { |
723 | /* | 695 | /* barriers are scheduling boundary, update end_sector */ |
724 | * toggle ordered color | ||
725 | */ | ||
726 | if (rq->cmd_flags & REQ_HARDBARRIER) | ||
727 | q->ordcolor ^= 1; | ||
728 | |||
729 | /* | ||
730 | * barriers implicitly indicate back insertion | ||
731 | */ | ||
732 | if (where == ELEVATOR_INSERT_SORT) | ||
733 | where = ELEVATOR_INSERT_BACK; | ||
734 | |||
735 | /* | ||
736 | * this request is scheduling boundary, update | ||
737 | * end_sector | ||
738 | */ | ||
739 | if (rq->cmd_type == REQ_TYPE_FS || | 696 | if (rq->cmd_type == REQ_TYPE_FS || |
740 | (rq->cmd_flags & REQ_DISCARD)) { | 697 | (rq->cmd_flags & REQ_DISCARD)) { |
741 | q->end_sector = rq_end_sector(rq); | 698 | q->end_sector = rq_end_sector(rq); |
@@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq) | |||
855 | e->ops->elevator_completed_req_fn) | 812 | e->ops->elevator_completed_req_fn) |
856 | e->ops->elevator_completed_req_fn(q, rq); | 813 | e->ops->elevator_completed_req_fn(q, rq); |
857 | } | 814 | } |
858 | |||
859 | /* | ||
860 | * Check if the queue is waiting for fs requests to be | ||
861 | * drained for flush sequence. | ||
862 | */ | ||
863 | if (unlikely(q->ordseq)) { | ||
864 | struct request *next = NULL; | ||
865 | |||
866 | if (!list_empty(&q->queue_head)) | ||
867 | next = list_entry_rq(q->queue_head.next); | ||
868 | |||
869 | if (!queue_in_flight(q) && | ||
870 | blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && | ||
871 | (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { | ||
872 | blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); | ||
873 | __blk_run_queue(q); | ||
874 | } | ||
875 | } | ||
876 | } | 815 | } |
877 | 816 | ||
878 | #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) | 817 | #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) |
diff --git a/block/ioctl.c b/block/ioctl.c index 2c15fe0912c4..d724ceb1d465 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -116,7 +116,7 @@ static int blkdev_reread_part(struct block_device *bdev) | |||
116 | static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | 116 | static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, |
117 | uint64_t len, int secure) | 117 | uint64_t len, int secure) |
118 | { | 118 | { |
119 | unsigned long flags = BLKDEV_IFL_WAIT; | 119 | unsigned long flags = 0; |
120 | 120 | ||
121 | if (start & 511) | 121 | if (start & 511) |
122 | return -EINVAL; | 122 | return -EINVAL; |
@@ -128,7 +128,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | |||
128 | if (start + len > (bdev->bd_inode->i_size >> 9)) | 128 | if (start + len > (bdev->bd_inode->i_size >> 9)) |
129 | return -EINVAL; | 129 | return -EINVAL; |
130 | if (secure) | 130 | if (secure) |
131 | flags |= BLKDEV_IFL_SECURE; | 131 | flags |= BLKDEV_DISCARD_SECURE; |
132 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); | 132 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); |
133 | } | 133 | } |
134 | 134 | ||
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 82bfd5bb4a97..b7f51e4594f8 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -483,7 +483,6 @@ static struct brd_device *brd_alloc(int i) | |||
483 | if (!brd->brd_queue) | 483 | if (!brd->brd_queue) |
484 | goto out_free_dev; | 484 | goto out_free_dev; |
485 | blk_queue_make_request(brd->brd_queue, brd_make_request); | 485 | blk_queue_make_request(brd->brd_queue, brd_make_request); |
486 | blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG); | ||
487 | blk_queue_max_hw_sectors(brd->brd_queue, 1024); | 486 | blk_queue_max_hw_sectors(brd->brd_queue, 1024); |
488 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); | 487 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); |
489 | 488 | ||
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c07c370c4c82..9bdcf4393c0a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -2409,8 +2409,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) | |||
2409 | if (test_bit(MD_NO_BARRIER, &mdev->flags)) | 2409 | if (test_bit(MD_NO_BARRIER, &mdev->flags)) |
2410 | return; | 2410 | return; |
2411 | 2411 | ||
2412 | r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL, | 2412 | r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL); |
2413 | BLKDEV_IFL_WAIT); | ||
2414 | if (r) { | 2413 | if (r) { |
2415 | set_bit(MD_NO_BARRIER, &mdev->flags); | 2414 | set_bit(MD_NO_BARRIER, &mdev->flags); |
2416 | dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); | 2415 | dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 760ae0df9251..efd6169acf2f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -987,7 +987,7 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d | |||
987 | 987 | ||
988 | if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { | 988 | if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { |
989 | rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, | 989 | rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, |
990 | NULL, BLKDEV_IFL_WAIT); | 990 | NULL); |
991 | if (rv) { | 991 | if (rv) { |
992 | dev_err(DEV, "local disk flush failed with status %d\n", rv); | 992 | dev_err(DEV, "local disk flush failed with status %d\n", rv); |
993 | /* would rather check on EOPNOTSUPP, but that is not reliable. | 993 | /* would rather check on EOPNOTSUPP, but that is not reliable. |
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index de3083b0a4f5..6c48b3545f84 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -479,17 +479,17 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) | |||
479 | pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; | 479 | pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; |
480 | 480 | ||
481 | if (bio_rw(bio) == WRITE) { | 481 | if (bio_rw(bio) == WRITE) { |
482 | bool barrier = !!(bio->bi_rw & REQ_HARDBARRIER); | ||
483 | struct file *file = lo->lo_backing_file; | 482 | struct file *file = lo->lo_backing_file; |
484 | 483 | ||
485 | if (barrier) { | 484 | /* REQ_HARDBARRIER is deprecated */ |
486 | if (unlikely(!file->f_op->fsync)) { | 485 | if (bio->bi_rw & REQ_HARDBARRIER) { |
487 | ret = -EOPNOTSUPP; | 486 | ret = -EOPNOTSUPP; |
488 | goto out; | 487 | goto out; |
489 | } | 488 | } |
490 | 489 | ||
490 | if (bio->bi_rw & REQ_FLUSH) { | ||
491 | ret = vfs_fsync(file, 0); | 491 | ret = vfs_fsync(file, 0); |
492 | if (unlikely(ret)) { | 492 | if (unlikely(ret && ret != -EINVAL)) { |
493 | ret = -EIO; | 493 | ret = -EIO; |
494 | goto out; | 494 | goto out; |
495 | } | 495 | } |
@@ -497,9 +497,9 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) | |||
497 | 497 | ||
498 | ret = lo_send(lo, bio, pos); | 498 | ret = lo_send(lo, bio, pos); |
499 | 499 | ||
500 | if (barrier && !ret) { | 500 | if ((bio->bi_rw & REQ_FUA) && !ret) { |
501 | ret = vfs_fsync(file, 0); | 501 | ret = vfs_fsync(file, 0); |
502 | if (unlikely(ret)) | 502 | if (unlikely(ret && ret != -EINVAL)) |
503 | ret = -EIO; | 503 | ret = -EIO; |
504 | } | 504 | } |
505 | } else | 505 | } else |
@@ -931,7 +931,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, | |||
931 | lo->lo_queue->unplug_fn = loop_unplug; | 931 | lo->lo_queue->unplug_fn = loop_unplug; |
932 | 932 | ||
933 | if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) | 933 | if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) |
934 | blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN); | 934 | blk_queue_flush(lo->lo_queue, REQ_FLUSH); |
935 | 935 | ||
936 | set_capacity(lo->lo_disk, size); | 936 | set_capacity(lo->lo_disk, size); |
937 | bd_set_size(bdev, size << 9); | 937 | bd_set_size(bdev, size << 9); |
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 2284b4f05c62..87311ebac0db 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c | |||
@@ -310,8 +310,7 @@ static void osdblk_rq_fn(struct request_queue *q) | |||
310 | break; | 310 | break; |
311 | 311 | ||
312 | /* filter out block requests we don't understand */ | 312 | /* filter out block requests we don't understand */ |
313 | if (rq->cmd_type != REQ_TYPE_FS && | 313 | if (rq->cmd_type != REQ_TYPE_FS) { |
314 | !(rq->cmd_flags & REQ_HARDBARRIER)) { | ||
315 | blk_end_request_all(rq, 0); | 314 | blk_end_request_all(rq, 0); |
316 | continue; | 315 | continue; |
317 | } | 316 | } |
@@ -439,7 +438,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev) | |||
439 | blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); | 438 | blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); |
440 | 439 | ||
441 | blk_queue_prep_rq(q, blk_queue_start_tag); | 440 | blk_queue_prep_rq(q, blk_queue_start_tag); |
442 | blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); | 441 | blk_queue_flush(q, REQ_FLUSH); |
443 | 442 | ||
444 | disk->queue = q; | 443 | disk->queue = q; |
445 | 444 | ||
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index ef58fccadad3..19b3568e9326 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c | |||
@@ -753,7 +753,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * | |||
753 | 753 | ||
754 | rq->timeout = 60*HZ; | 754 | rq->timeout = 60*HZ; |
755 | rq->cmd_type = REQ_TYPE_BLOCK_PC; | 755 | rq->cmd_type = REQ_TYPE_BLOCK_PC; |
756 | rq->cmd_flags |= REQ_HARDBARRIER; | ||
757 | if (cgc->quiet) | 756 | if (cgc->quiet) |
758 | rq->cmd_flags |= REQ_QUIET; | 757 | rq->cmd_flags |= REQ_QUIET; |
759 | 758 | ||
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 03688c2da319..8e1ce2e2916a 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c | |||
@@ -468,7 +468,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) | |||
468 | blk_queue_dma_alignment(queue, dev->blk_size-1); | 468 | blk_queue_dma_alignment(queue, dev->blk_size-1); |
469 | blk_queue_logical_block_size(queue, dev->blk_size); | 469 | blk_queue_logical_block_size(queue, dev->blk_size); |
470 | 470 | ||
471 | blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH); | 471 | blk_queue_flush(queue, REQ_FLUSH); |
472 | 472 | ||
473 | blk_queue_max_segments(queue, -1); | 473 | blk_queue_max_segments(queue, -1); |
474 | blk_queue_max_segment_size(queue, dev->bounce_size); | 474 | blk_queue_max_segment_size(queue, dev->bounce_size); |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 8320490226b7..6ecf89cdf006 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -127,9 +127,6 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
127 | } | 127 | } |
128 | } | 128 | } |
129 | 129 | ||
130 | if (vbr->req->cmd_flags & REQ_HARDBARRIER) | ||
131 | vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER; | ||
132 | |||
133 | sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); | 130 | sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); |
134 | 131 | ||
135 | /* | 132 | /* |
@@ -379,31 +376,9 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
379 | vblk->disk->driverfs_dev = &vdev->dev; | 376 | vblk->disk->driverfs_dev = &vdev->dev; |
380 | index++; | 377 | index++; |
381 | 378 | ||
382 | if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) { | 379 | /* configure queue flush support */ |
383 | /* | 380 | if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) |
384 | * If the FLUSH feature is supported we do have support for | 381 | blk_queue_flush(q, REQ_FLUSH); |
385 | * flushing a volatile write cache on the host. Use that | ||
386 | * to implement write barrier support. | ||
387 | */ | ||
388 | blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); | ||
389 | } else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) { | ||
390 | /* | ||
391 | * If the BARRIER feature is supported the host expects us | ||
392 | * to order request by tags. This implies there is not | ||
393 | * volatile write cache on the host, and that the host | ||
394 | * never re-orders outstanding I/O. This feature is not | ||
395 | * useful for real life scenarious and deprecated. | ||
396 | */ | ||
397 | blk_queue_ordered(q, QUEUE_ORDERED_TAG); | ||
398 | } else { | ||
399 | /* | ||
400 | * If the FLUSH feature is not supported we must assume that | ||
401 | * the host does not perform any kind of volatile write | ||
402 | * caching. We still need to drain the queue to provider | ||
403 | * proper barrier semantics. | ||
404 | */ | ||
405 | blk_queue_ordered(q, QUEUE_ORDERED_DRAIN); | ||
406 | } | ||
407 | 382 | ||
408 | /* If disk is read-only in the host, the guest should obey */ | 383 | /* If disk is read-only in the host, the guest should obey */ |
409 | if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) | 384 | if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) |
@@ -522,9 +497,9 @@ static const struct virtio_device_id id_table[] = { | |||
522 | }; | 497 | }; |
523 | 498 | ||
524 | static unsigned int features[] = { | 499 | static unsigned int features[] = { |
525 | VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, | 500 | VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, |
526 | VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, | 501 | VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, |
527 | VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY | 502 | VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY |
528 | }; | 503 | }; |
529 | 504 | ||
530 | /* | 505 | /* |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 3ff06f475eef..4b33a18c32e0 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -96,7 +96,7 @@ struct blkfront_info | |||
96 | struct gnttab_free_callback callback; | 96 | struct gnttab_free_callback callback; |
97 | struct blk_shadow shadow[BLK_RING_SIZE]; | 97 | struct blk_shadow shadow[BLK_RING_SIZE]; |
98 | unsigned long shadow_free; | 98 | unsigned long shadow_free; |
99 | int feature_barrier; | 99 | unsigned int feature_flush; |
100 | int is_ready; | 100 | int is_ready; |
101 | }; | 101 | }; |
102 | 102 | ||
@@ -419,26 +419,12 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | |||
419 | } | 419 | } |
420 | 420 | ||
421 | 421 | ||
422 | static int xlvbd_barrier(struct blkfront_info *info) | 422 | static void xlvbd_flush(struct blkfront_info *info) |
423 | { | 423 | { |
424 | int err; | 424 | blk_queue_flush(info->rq, info->feature_flush); |
425 | const char *barrier; | ||
426 | |||
427 | switch (info->feature_barrier) { | ||
428 | case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break; | ||
429 | case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break; | ||
430 | case QUEUE_ORDERED_NONE: barrier = "disabled"; break; | ||
431 | default: return -EINVAL; | ||
432 | } | ||
433 | |||
434 | err = blk_queue_ordered(info->rq, info->feature_barrier); | ||
435 | |||
436 | if (err) | ||
437 | return err; | ||
438 | |||
439 | printk(KERN_INFO "blkfront: %s: barriers %s\n", | 425 | printk(KERN_INFO "blkfront: %s: barriers %s\n", |
440 | info->gd->disk_name, barrier); | 426 | info->gd->disk_name, |
441 | return 0; | 427 | info->feature_flush ? "enabled" : "disabled"); |
442 | } | 428 | } |
443 | 429 | ||
444 | 430 | ||
@@ -517,7 +503,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, | |||
517 | info->rq = gd->queue; | 503 | info->rq = gd->queue; |
518 | info->gd = gd; | 504 | info->gd = gd; |
519 | 505 | ||
520 | xlvbd_barrier(info); | 506 | xlvbd_flush(info); |
521 | 507 | ||
522 | if (vdisk_info & VDISK_READONLY) | 508 | if (vdisk_info & VDISK_READONLY) |
523 | set_disk_ro(gd, 1); | 509 | set_disk_ro(gd, 1); |
@@ -663,8 +649,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
663 | printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", | 649 | printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", |
664 | info->gd->disk_name); | 650 | info->gd->disk_name); |
665 | error = -EOPNOTSUPP; | 651 | error = -EOPNOTSUPP; |
666 | info->feature_barrier = QUEUE_ORDERED_NONE; | 652 | info->feature_flush = 0; |
667 | xlvbd_barrier(info); | 653 | xlvbd_flush(info); |
668 | } | 654 | } |
669 | /* fall through */ | 655 | /* fall through */ |
670 | case BLKIF_OP_READ: | 656 | case BLKIF_OP_READ: |
@@ -1077,20 +1063,20 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1077 | /* | 1063 | /* |
1078 | * If there's no "feature-barrier" defined, then it means | 1064 | * If there's no "feature-barrier" defined, then it means |
1079 | * we're dealing with a very old backend which writes | 1065 | * we're dealing with a very old backend which writes |
1080 | * synchronously; draining will do what needs to get done. | 1066 | * synchronously; nothing to do. |
1081 | * | 1067 | * |
1082 | * If there are barriers, then we can do full queued writes | 1068 | * If there are barriers, then we use flush. |
1083 | * with tagged barriers. | ||
1084 | * | ||
1085 | * If barriers are not supported, then there's no much we can | ||
1086 | * do, so just set ordering to NONE. | ||
1087 | */ | 1069 | */ |
1088 | if (err) | 1070 | info->feature_flush = 0; |
1089 | info->feature_barrier = QUEUE_ORDERED_DRAIN; | 1071 | |
1090 | else if (barrier) | 1072 | /* |
1091 | info->feature_barrier = QUEUE_ORDERED_TAG; | 1073 | * The driver doesn't properly handled empty flushes, so |
1092 | else | 1074 | * lets disable barrier support for now. |
1093 | info->feature_barrier = QUEUE_ORDERED_NONE; | 1075 | */ |
1076 | #if 0 | ||
1077 | if (!err && barrier) | ||
1078 | info->feature_flush = REQ_FLUSH; | ||
1079 | #endif | ||
1094 | 1080 | ||
1095 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); | 1081 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); |
1096 | if (err) { | 1082 | if (err) { |
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 7433e07de30e..7c5b01ce51d2 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c | |||
@@ -516,10 +516,10 @@ static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect) | |||
516 | return ide_no_data_taskfile(drive, &cmd); | 516 | return ide_no_data_taskfile(drive, &cmd); |
517 | } | 517 | } |
518 | 518 | ||
519 | static void update_ordered(ide_drive_t *drive) | 519 | static void update_flush(ide_drive_t *drive) |
520 | { | 520 | { |
521 | u16 *id = drive->id; | 521 | u16 *id = drive->id; |
522 | unsigned ordered = QUEUE_ORDERED_NONE; | 522 | unsigned flush = 0; |
523 | 523 | ||
524 | if (drive->dev_flags & IDE_DFLAG_WCACHE) { | 524 | if (drive->dev_flags & IDE_DFLAG_WCACHE) { |
525 | unsigned long long capacity; | 525 | unsigned long long capacity; |
@@ -543,13 +543,12 @@ static void update_ordered(ide_drive_t *drive) | |||
543 | drive->name, barrier ? "" : "not "); | 543 | drive->name, barrier ? "" : "not "); |
544 | 544 | ||
545 | if (barrier) { | 545 | if (barrier) { |
546 | ordered = QUEUE_ORDERED_DRAIN_FLUSH; | 546 | flush = REQ_FLUSH; |
547 | blk_queue_prep_rq(drive->queue, idedisk_prep_fn); | 547 | blk_queue_prep_rq(drive->queue, idedisk_prep_fn); |
548 | } | 548 | } |
549 | } else | 549 | } |
550 | ordered = QUEUE_ORDERED_DRAIN; | ||
551 | 550 | ||
552 | blk_queue_ordered(drive->queue, ordered); | 551 | blk_queue_flush(drive->queue, flush); |
553 | } | 552 | } |
554 | 553 | ||
555 | ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); | 554 | ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); |
@@ -572,7 +571,7 @@ static int set_wcache(ide_drive_t *drive, int arg) | |||
572 | } | 571 | } |
573 | } | 572 | } |
574 | 573 | ||
575 | update_ordered(drive); | 574 | update_flush(drive); |
576 | 575 | ||
577 | return err; | 576 | return err; |
578 | } | 577 | } |
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index a381be814070..999dac054bcc 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c | |||
@@ -441,19 +441,6 @@ void do_ide_request(struct request_queue *q) | |||
441 | struct request *rq = NULL; | 441 | struct request *rq = NULL; |
442 | ide_startstop_t startstop; | 442 | ide_startstop_t startstop; |
443 | 443 | ||
444 | /* | ||
445 | * drive is doing pre-flush, ordered write, post-flush sequence. even | ||
446 | * though that is 3 requests, it must be seen as a single transaction. | ||
447 | * we must not preempt this drive until that is complete | ||
448 | */ | ||
449 | if (blk_queue_flushing(q)) | ||
450 | /* | ||
451 | * small race where queue could get replugged during | ||
452 | * the 3-request flush cycle, just yank the plug since | ||
453 | * we want it to finish asap | ||
454 | */ | ||
455 | blk_remove_plug(q); | ||
456 | |||
457 | spin_unlock_irq(q->queue_lock); | 444 | spin_unlock_irq(q->queue_lock); |
458 | 445 | ||
459 | /* HLD do_request() callback might sleep, make sure it's okay */ | 446 | /* HLD do_request() callback might sleep, make sure it's okay */ |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 368e8e98f705..d5b0e4c0e702 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
1278 | struct dm_crypt_io *io; | 1278 | struct dm_crypt_io *io; |
1279 | struct crypt_config *cc; | 1279 | struct crypt_config *cc; |
1280 | 1280 | ||
1281 | if (unlikely(bio_empty_barrier(bio))) { | 1281 | if (bio->bi_rw & REQ_FLUSH) { |
1282 | cc = ti->private; | 1282 | cc = ti->private; |
1283 | bio->bi_bdev = cc->dev->bdev; | 1283 | bio->bi_bdev = cc->dev->bdev; |
1284 | return DM_MAPIO_REMAPPED; | 1284 | return DM_MAPIO_REMAPPED; |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 0590c75b0ab6..136d4f71a116 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -31,7 +31,6 @@ struct dm_io_client { | |||
31 | */ | 31 | */ |
32 | struct io { | 32 | struct io { |
33 | unsigned long error_bits; | 33 | unsigned long error_bits; |
34 | unsigned long eopnotsupp_bits; | ||
35 | atomic_t count; | 34 | atomic_t count; |
36 | struct task_struct *sleeper; | 35 | struct task_struct *sleeper; |
37 | struct dm_io_client *client; | 36 | struct dm_io_client *client; |
@@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, | |||
130 | *---------------------------------------------------------------*/ | 129 | *---------------------------------------------------------------*/ |
131 | static void dec_count(struct io *io, unsigned int region, int error) | 130 | static void dec_count(struct io *io, unsigned int region, int error) |
132 | { | 131 | { |
133 | if (error) { | 132 | if (error) |
134 | set_bit(region, &io->error_bits); | 133 | set_bit(region, &io->error_bits); |
135 | if (error == -EOPNOTSUPP) | ||
136 | set_bit(region, &io->eopnotsupp_bits); | ||
137 | } | ||
138 | 134 | ||
139 | if (atomic_dec_and_test(&io->count)) { | 135 | if (atomic_dec_and_test(&io->count)) { |
140 | if (io->sleeper) | 136 | if (io->sleeper) |
@@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
310 | sector_t remaining = where->count; | 306 | sector_t remaining = where->count; |
311 | 307 | ||
312 | /* | 308 | /* |
313 | * where->count may be zero if rw holds a write barrier and we | 309 | * where->count may be zero if rw holds a flush and we need to |
314 | * need to send a zero-sized barrier. | 310 | * send a zero-sized flush. |
315 | */ | 311 | */ |
316 | do { | 312 | do { |
317 | /* | 313 | /* |
@@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
364 | */ | 360 | */ |
365 | for (i = 0; i < num_regions; i++) { | 361 | for (i = 0; i < num_regions; i++) { |
366 | *dp = old_pages; | 362 | *dp = old_pages; |
367 | if (where[i].count || (rw & REQ_HARDBARRIER)) | 363 | if (where[i].count || (rw & REQ_FLUSH)) |
368 | do_region(rw, i, where + i, dp, io); | 364 | do_region(rw, i, where + i, dp, io); |
369 | } | 365 | } |
370 | 366 | ||
@@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
393 | return -EIO; | 389 | return -EIO; |
394 | } | 390 | } |
395 | 391 | ||
396 | retry: | ||
397 | io->error_bits = 0; | 392 | io->error_bits = 0; |
398 | io->eopnotsupp_bits = 0; | ||
399 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 393 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
400 | io->sleeper = current; | 394 | io->sleeper = current; |
401 | io->client = client; | 395 | io->client = client; |
@@ -412,11 +406,6 @@ retry: | |||
412 | } | 406 | } |
413 | set_current_state(TASK_RUNNING); | 407 | set_current_state(TASK_RUNNING); |
414 | 408 | ||
415 | if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) { | ||
416 | rw &= ~REQ_HARDBARRIER; | ||
417 | goto retry; | ||
418 | } | ||
419 | |||
420 | if (error_bits) | 409 | if (error_bits) |
421 | *error_bits = io->error_bits; | 410 | *error_bits = io->error_bits; |
422 | 411 | ||
@@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, | |||
437 | 426 | ||
438 | io = mempool_alloc(client->pool, GFP_NOIO); | 427 | io = mempool_alloc(client->pool, GFP_NOIO); |
439 | io->error_bits = 0; | 428 | io->error_bits = 0; |
440 | io->eopnotsupp_bits = 0; | ||
441 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 429 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
442 | io->sleeper = NULL; | 430 | io->sleeper = NULL; |
443 | io->client = client; | 431 | io->client = client; |
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 5a08be0222db..33420e68d153 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc) | |||
300 | .count = 0, | 300 | .count = 0, |
301 | }; | 301 | }; |
302 | 302 | ||
303 | lc->io_req.bi_rw = WRITE_BARRIER; | 303 | lc->io_req.bi_rw = WRITE_FLUSH; |
304 | 304 | ||
305 | return dm_io(&lc->io_req, 1, &null_location, NULL); | 305 | return dm_io(&lc->io_req, 1, &null_location, NULL); |
306 | } | 306 | } |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 7c081bcbc3cf..19a59b041c27 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti) | |||
259 | struct dm_io_region io[ms->nr_mirrors]; | 259 | struct dm_io_region io[ms->nr_mirrors]; |
260 | struct mirror *m; | 260 | struct mirror *m; |
261 | struct dm_io_request io_req = { | 261 | struct dm_io_request io_req = { |
262 | .bi_rw = WRITE_BARRIER, | 262 | .bi_rw = WRITE_FLUSH, |
263 | .mem.type = DM_IO_KMEM, | 263 | .mem.type = DM_IO_KMEM, |
264 | .mem.ptr.bvec = NULL, | 264 | .mem.ptr.bvec = NULL, |
265 | .client = ms->io_client, | 265 | .client = ms->io_client, |
@@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) | |||
629 | struct dm_io_region io[ms->nr_mirrors], *dest = io; | 629 | struct dm_io_region io[ms->nr_mirrors], *dest = io; |
630 | struct mirror *m; | 630 | struct mirror *m; |
631 | struct dm_io_request io_req = { | 631 | struct dm_io_request io_req = { |
632 | .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER), | 632 | .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA), |
633 | .mem.type = DM_IO_BVEC, | 633 | .mem.type = DM_IO_BVEC, |
634 | .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, | 634 | .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, |
635 | .notify.fn = write_callback, | 635 | .notify.fn = write_callback, |
@@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
670 | bio_list_init(&requeue); | 670 | bio_list_init(&requeue); |
671 | 671 | ||
672 | while ((bio = bio_list_pop(writes))) { | 672 | while ((bio = bio_list_pop(writes))) { |
673 | if (unlikely(bio_empty_barrier(bio))) { | 673 | if (bio->bi_rw & REQ_FLUSH) { |
674 | bio_list_add(&sync, bio); | 674 | bio_list_add(&sync, bio); |
675 | continue; | 675 | continue; |
676 | } | 676 | } |
@@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, | |||
1203 | * We need to dec pending if this was a write. | 1203 | * We need to dec pending if this was a write. |
1204 | */ | 1204 | */ |
1205 | if (rw == WRITE) { | 1205 | if (rw == WRITE) { |
1206 | if (likely(!bio_empty_barrier(bio))) | 1206 | if (!(bio->bi_rw & REQ_FLUSH)) |
1207 | dm_rh_dec(ms->rh, map_context->ll); | 1207 | dm_rh_dec(ms->rh, map_context->ll); |
1208 | return error; | 1208 | return error; |
1209 | } | 1209 | } |
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index bd5c58b28868..dad011aed0c9 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c | |||
@@ -81,9 +81,9 @@ struct dm_region_hash { | |||
81 | struct list_head failed_recovered_regions; | 81 | struct list_head failed_recovered_regions; |
82 | 82 | ||
83 | /* | 83 | /* |
84 | * If there was a barrier failure no regions can be marked clean. | 84 | * If there was a flush failure no regions can be marked clean. |
85 | */ | 85 | */ |
86 | int barrier_failure; | 86 | int flush_failure; |
87 | 87 | ||
88 | void *context; | 88 | void *context; |
89 | sector_t target_begin; | 89 | sector_t target_begin; |
@@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create( | |||
217 | INIT_LIST_HEAD(&rh->quiesced_regions); | 217 | INIT_LIST_HEAD(&rh->quiesced_regions); |
218 | INIT_LIST_HEAD(&rh->recovered_regions); | 218 | INIT_LIST_HEAD(&rh->recovered_regions); |
219 | INIT_LIST_HEAD(&rh->failed_recovered_regions); | 219 | INIT_LIST_HEAD(&rh->failed_recovered_regions); |
220 | rh->barrier_failure = 0; | 220 | rh->flush_failure = 0; |
221 | 221 | ||
222 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, | 222 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, |
223 | sizeof(struct dm_region)); | 223 | sizeof(struct dm_region)); |
@@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) | |||
399 | region_t region = dm_rh_bio_to_region(rh, bio); | 399 | region_t region = dm_rh_bio_to_region(rh, bio); |
400 | int recovering = 0; | 400 | int recovering = 0; |
401 | 401 | ||
402 | if (bio_empty_barrier(bio)) { | 402 | if (bio->bi_rw & REQ_FLUSH) { |
403 | rh->barrier_failure = 1; | 403 | rh->flush_failure = 1; |
404 | return; | 404 | return; |
405 | } | 405 | } |
406 | 406 | ||
@@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) | |||
524 | struct bio *bio; | 524 | struct bio *bio; |
525 | 525 | ||
526 | for (bio = bios->head; bio; bio = bio->bi_next) { | 526 | for (bio = bios->head; bio; bio = bio->bi_next) { |
527 | if (bio_empty_barrier(bio)) | 527 | if (bio->bi_rw & REQ_FLUSH) |
528 | continue; | 528 | continue; |
529 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); | 529 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); |
530 | } | 530 | } |
@@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region) | |||
555 | */ | 555 | */ |
556 | 556 | ||
557 | /* do nothing for DM_RH_NOSYNC */ | 557 | /* do nothing for DM_RH_NOSYNC */ |
558 | if (unlikely(rh->barrier_failure)) { | 558 | if (unlikely(rh->flush_failure)) { |
559 | /* | 559 | /* |
560 | * If a write barrier failed some time ago, we | 560 | * If a write flush failed some time ago, we |
561 | * don't know whether or not this write made it | 561 | * don't know whether or not this write made it |
562 | * to the disk, so we must resync the device. | 562 | * to the disk, so we must resync the device. |
563 | */ | 563 | */ |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index cc2bdb83f9ad..0b61792a2780 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c | |||
@@ -687,7 +687,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, | |||
687 | /* | 687 | /* |
688 | * Commit exceptions to disk. | 688 | * Commit exceptions to disk. |
689 | */ | 689 | */ |
690 | if (ps->valid && area_io(ps, WRITE_BARRIER)) | 690 | if (ps->valid && area_io(ps, WRITE_FLUSH_FUA)) |
691 | ps->valid = 0; | 691 | ps->valid = 0; |
692 | 692 | ||
693 | /* | 693 | /* |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index f30f6e8d594e..53cf79d8bcbc 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -1585,7 +1585,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
1585 | chunk_t chunk; | 1585 | chunk_t chunk; |
1586 | struct dm_snap_pending_exception *pe = NULL; | 1586 | struct dm_snap_pending_exception *pe = NULL; |
1587 | 1587 | ||
1588 | if (unlikely(bio_empty_barrier(bio))) { | 1588 | if (bio->bi_rw & REQ_FLUSH) { |
1589 | bio->bi_bdev = s->cow->bdev; | 1589 | bio->bi_bdev = s->cow->bdev; |
1590 | return DM_MAPIO_REMAPPED; | 1590 | return DM_MAPIO_REMAPPED; |
1591 | } | 1591 | } |
@@ -1689,7 +1689,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, | |||
1689 | int r = DM_MAPIO_REMAPPED; | 1689 | int r = DM_MAPIO_REMAPPED; |
1690 | chunk_t chunk; | 1690 | chunk_t chunk; |
1691 | 1691 | ||
1692 | if (unlikely(bio_empty_barrier(bio))) { | 1692 | if (bio->bi_rw & REQ_FLUSH) { |
1693 | if (!map_context->target_request_nr) | 1693 | if (!map_context->target_request_nr) |
1694 | bio->bi_bdev = s->origin->bdev; | 1694 | bio->bi_bdev = s->origin->bdev; |
1695 | else | 1695 | else |
@@ -2133,7 +2133,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio, | |||
2133 | struct dm_dev *dev = ti->private; | 2133 | struct dm_dev *dev = ti->private; |
2134 | bio->bi_bdev = dev->bdev; | 2134 | bio->bi_bdev = dev->bdev; |
2135 | 2135 | ||
2136 | if (unlikely(bio_empty_barrier(bio))) | 2136 | if (bio->bi_rw & REQ_FLUSH) |
2137 | return DM_MAPIO_REMAPPED; | 2137 | return DM_MAPIO_REMAPPED; |
2138 | 2138 | ||
2139 | /* Only tell snapshots if this is a write */ | 2139 | /* Only tell snapshots if this is a write */ |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index c297f6da91ea..f0371b4c4fbf 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -271,7 +271,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, | |||
271 | uint32_t stripe; | 271 | uint32_t stripe; |
272 | unsigned target_request_nr; | 272 | unsigned target_request_nr; |
273 | 273 | ||
274 | if (unlikely(bio_empty_barrier(bio))) { | 274 | if (bio->bi_rw & REQ_FLUSH) { |
275 | target_request_nr = map_context->target_request_nr; | 275 | target_request_nr = map_context->target_request_nr; |
276 | BUG_ON(target_request_nr >= sc->stripes); | 276 | BUG_ON(target_request_nr >= sc->stripes); |
277 | bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; | 277 | bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7967eca5a2d5..7cb1352f7e7a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | |||
110 | #define DMF_FREEING 3 | 110 | #define DMF_FREEING 3 |
111 | #define DMF_DELETING 4 | 111 | #define DMF_DELETING 4 |
112 | #define DMF_NOFLUSH_SUSPENDING 5 | 112 | #define DMF_NOFLUSH_SUSPENDING 5 |
113 | #define DMF_QUEUE_IO_TO_THREAD 6 | ||
114 | 113 | ||
115 | /* | 114 | /* |
116 | * Work processed by per-device workqueue. | 115 | * Work processed by per-device workqueue. |
@@ -144,24 +143,9 @@ struct mapped_device { | |||
144 | spinlock_t deferred_lock; | 143 | spinlock_t deferred_lock; |
145 | 144 | ||
146 | /* | 145 | /* |
147 | * An error from the barrier request currently being processed. | 146 | * Processing queue (flush) |
148 | */ | ||
149 | int barrier_error; | ||
150 | |||
151 | /* | ||
152 | * Protect barrier_error from concurrent endio processing | ||
153 | * in request-based dm. | ||
154 | */ | ||
155 | spinlock_t barrier_error_lock; | ||
156 | |||
157 | /* | ||
158 | * Processing queue (flush/barriers) | ||
159 | */ | 147 | */ |
160 | struct workqueue_struct *wq; | 148 | struct workqueue_struct *wq; |
161 | struct work_struct barrier_work; | ||
162 | |||
163 | /* A pointer to the currently processing pre/post flush request */ | ||
164 | struct request *flush_request; | ||
165 | 149 | ||
166 | /* | 150 | /* |
167 | * The current mapping. | 151 | * The current mapping. |
@@ -200,8 +184,8 @@ struct mapped_device { | |||
200 | /* sysfs handle */ | 184 | /* sysfs handle */ |
201 | struct kobject kobj; | 185 | struct kobject kobj; |
202 | 186 | ||
203 | /* zero-length barrier that will be cloned and submitted to targets */ | 187 | /* zero-length flush that will be cloned and submitted to targets */ |
204 | struct bio barrier_bio; | 188 | struct bio flush_bio; |
205 | }; | 189 | }; |
206 | 190 | ||
207 | /* | 191 | /* |
@@ -512,7 +496,7 @@ static void end_io_acct(struct dm_io *io) | |||
512 | 496 | ||
513 | /* | 497 | /* |
514 | * After this is decremented the bio must not be touched if it is | 498 | * After this is decremented the bio must not be touched if it is |
515 | * a barrier. | 499 | * a flush. |
516 | */ | 500 | */ |
517 | dm_disk(md)->part0.in_flight[rw] = pending = | 501 | dm_disk(md)->part0.in_flight[rw] = pending = |
518 | atomic_dec_return(&md->pending[rw]); | 502 | atomic_dec_return(&md->pending[rw]); |
@@ -528,16 +512,12 @@ static void end_io_acct(struct dm_io *io) | |||
528 | */ | 512 | */ |
529 | static void queue_io(struct mapped_device *md, struct bio *bio) | 513 | static void queue_io(struct mapped_device *md, struct bio *bio) |
530 | { | 514 | { |
531 | down_write(&md->io_lock); | 515 | unsigned long flags; |
532 | 516 | ||
533 | spin_lock_irq(&md->deferred_lock); | 517 | spin_lock_irqsave(&md->deferred_lock, flags); |
534 | bio_list_add(&md->deferred, bio); | 518 | bio_list_add(&md->deferred, bio); |
535 | spin_unlock_irq(&md->deferred_lock); | 519 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
536 | 520 | queue_work(md->wq, &md->work); | |
537 | if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) | ||
538 | queue_work(md->wq, &md->work); | ||
539 | |||
540 | up_write(&md->io_lock); | ||
541 | } | 521 | } |
542 | 522 | ||
543 | /* | 523 | /* |
@@ -625,11 +605,9 @@ static void dec_pending(struct dm_io *io, int error) | |||
625 | * Target requested pushing back the I/O. | 605 | * Target requested pushing back the I/O. |
626 | */ | 606 | */ |
627 | spin_lock_irqsave(&md->deferred_lock, flags); | 607 | spin_lock_irqsave(&md->deferred_lock, flags); |
628 | if (__noflush_suspending(md)) { | 608 | if (__noflush_suspending(md)) |
629 | if (!(io->bio->bi_rw & REQ_HARDBARRIER)) | 609 | bio_list_add_head(&md->deferred, io->bio); |
630 | bio_list_add_head(&md->deferred, | 610 | else |
631 | io->bio); | ||
632 | } else | ||
633 | /* noflush suspend was interrupted. */ | 611 | /* noflush suspend was interrupted. */ |
634 | io->error = -EIO; | 612 | io->error = -EIO; |
635 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 613 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
@@ -637,32 +615,23 @@ static void dec_pending(struct dm_io *io, int error) | |||
637 | 615 | ||
638 | io_error = io->error; | 616 | io_error = io->error; |
639 | bio = io->bio; | 617 | bio = io->bio; |
618 | end_io_acct(io); | ||
619 | free_io(md, io); | ||
620 | |||
621 | if (io_error == DM_ENDIO_REQUEUE) | ||
622 | return; | ||
640 | 623 | ||
641 | if (bio->bi_rw & REQ_HARDBARRIER) { | 624 | if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) { |
642 | /* | 625 | /* |
643 | * There can be just one barrier request so we use | 626 | * Preflush done for flush with data, reissue |
644 | * a per-device variable for error reporting. | 627 | * without REQ_FLUSH. |
645 | * Note that you can't touch the bio after end_io_acct | ||
646 | * | ||
647 | * We ignore -EOPNOTSUPP for empty flush reported by | ||
648 | * underlying devices. We assume that if the device | ||
649 | * doesn't support empty barriers, it doesn't need | ||
650 | * cache flushing commands. | ||
651 | */ | 628 | */ |
652 | if (!md->barrier_error && | 629 | bio->bi_rw &= ~REQ_FLUSH; |
653 | !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) | 630 | queue_io(md, bio); |
654 | md->barrier_error = io_error; | ||
655 | end_io_acct(io); | ||
656 | free_io(md, io); | ||
657 | } else { | 631 | } else { |
658 | end_io_acct(io); | 632 | /* done with normal IO or empty flush */ |
659 | free_io(md, io); | 633 | trace_block_bio_complete(md->queue, bio); |
660 | 634 | bio_endio(bio, io_error); | |
661 | if (io_error != DM_ENDIO_REQUEUE) { | ||
662 | trace_block_bio_complete(md->queue, bio); | ||
663 | |||
664 | bio_endio(bio, io_error); | ||
665 | } | ||
666 | } | 635 | } |
667 | } | 636 | } |
668 | } | 637 | } |
@@ -755,23 +724,6 @@ static void end_clone_bio(struct bio *clone, int error) | |||
755 | blk_update_request(tio->orig, 0, nr_bytes); | 724 | blk_update_request(tio->orig, 0, nr_bytes); |
756 | } | 725 | } |
757 | 726 | ||
758 | static void store_barrier_error(struct mapped_device *md, int error) | ||
759 | { | ||
760 | unsigned long flags; | ||
761 | |||
762 | spin_lock_irqsave(&md->barrier_error_lock, flags); | ||
763 | /* | ||
764 | * Basically, the first error is taken, but: | ||
765 | * -EOPNOTSUPP supersedes any I/O error. | ||
766 | * Requeue request supersedes any I/O error but -EOPNOTSUPP. | ||
767 | */ | ||
768 | if (!md->barrier_error || error == -EOPNOTSUPP || | ||
769 | (md->barrier_error != -EOPNOTSUPP && | ||
770 | error == DM_ENDIO_REQUEUE)) | ||
771 | md->barrier_error = error; | ||
772 | spin_unlock_irqrestore(&md->barrier_error_lock, flags); | ||
773 | } | ||
774 | |||
775 | /* | 727 | /* |
776 | * Don't touch any member of the md after calling this function because | 728 | * Don't touch any member of the md after calling this function because |
777 | * the md may be freed in dm_put() at the end of this function. | 729 | * the md may be freed in dm_put() at the end of this function. |
@@ -809,13 +761,11 @@ static void free_rq_clone(struct request *clone) | |||
809 | static void dm_end_request(struct request *clone, int error) | 761 | static void dm_end_request(struct request *clone, int error) |
810 | { | 762 | { |
811 | int rw = rq_data_dir(clone); | 763 | int rw = rq_data_dir(clone); |
812 | int run_queue = 1; | ||
813 | bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER; | ||
814 | struct dm_rq_target_io *tio = clone->end_io_data; | 764 | struct dm_rq_target_io *tio = clone->end_io_data; |
815 | struct mapped_device *md = tio->md; | 765 | struct mapped_device *md = tio->md; |
816 | struct request *rq = tio->orig; | 766 | struct request *rq = tio->orig; |
817 | 767 | ||
818 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { | 768 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
819 | rq->errors = clone->errors; | 769 | rq->errors = clone->errors; |
820 | rq->resid_len = clone->resid_len; | 770 | rq->resid_len = clone->resid_len; |
821 | 771 | ||
@@ -829,15 +779,8 @@ static void dm_end_request(struct request *clone, int error) | |||
829 | } | 779 | } |
830 | 780 | ||
831 | free_rq_clone(clone); | 781 | free_rq_clone(clone); |
832 | 782 | blk_end_request_all(rq, error); | |
833 | if (unlikely(is_barrier)) { | 783 | rq_completed(md, rw, true); |
834 | if (unlikely(error)) | ||
835 | store_barrier_error(md, error); | ||
836 | run_queue = 0; | ||
837 | } else | ||
838 | blk_end_request_all(rq, error); | ||
839 | |||
840 | rq_completed(md, rw, run_queue); | ||
841 | } | 784 | } |
842 | 785 | ||
843 | static void dm_unprep_request(struct request *rq) | 786 | static void dm_unprep_request(struct request *rq) |
@@ -862,16 +805,6 @@ void dm_requeue_unmapped_request(struct request *clone) | |||
862 | struct request_queue *q = rq->q; | 805 | struct request_queue *q = rq->q; |
863 | unsigned long flags; | 806 | unsigned long flags; |
864 | 807 | ||
865 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
866 | /* | ||
867 | * Barrier clones share an original request. | ||
868 | * Leave it to dm_end_request(), which handles this special | ||
869 | * case. | ||
870 | */ | ||
871 | dm_end_request(clone, DM_ENDIO_REQUEUE); | ||
872 | return; | ||
873 | } | ||
874 | |||
875 | dm_unprep_request(rq); | 808 | dm_unprep_request(rq); |
876 | 809 | ||
877 | spin_lock_irqsave(q->queue_lock, flags); | 810 | spin_lock_irqsave(q->queue_lock, flags); |
@@ -961,19 +894,6 @@ static void dm_complete_request(struct request *clone, int error) | |||
961 | struct dm_rq_target_io *tio = clone->end_io_data; | 894 | struct dm_rq_target_io *tio = clone->end_io_data; |
962 | struct request *rq = tio->orig; | 895 | struct request *rq = tio->orig; |
963 | 896 | ||
964 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
965 | /* | ||
966 | * Barrier clones share an original request. So can't use | ||
967 | * softirq_done with the original. | ||
968 | * Pass the clone to dm_done() directly in this special case. | ||
969 | * It is safe (even if clone->q->queue_lock is held here) | ||
970 | * because there is no I/O dispatching during the completion | ||
971 | * of barrier clone. | ||
972 | */ | ||
973 | dm_done(clone, error, true); | ||
974 | return; | ||
975 | } | ||
976 | |||
977 | tio->error = error; | 897 | tio->error = error; |
978 | rq->completion_data = clone; | 898 | rq->completion_data = clone; |
979 | blk_complete_request(rq); | 899 | blk_complete_request(rq); |
@@ -990,17 +910,6 @@ void dm_kill_unmapped_request(struct request *clone, int error) | |||
990 | struct dm_rq_target_io *tio = clone->end_io_data; | 910 | struct dm_rq_target_io *tio = clone->end_io_data; |
991 | struct request *rq = tio->orig; | 911 | struct request *rq = tio->orig; |
992 | 912 | ||
993 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
994 | /* | ||
995 | * Barrier clones share an original request. | ||
996 | * Leave it to dm_end_request(), which handles this special | ||
997 | * case. | ||
998 | */ | ||
999 | BUG_ON(error > 0); | ||
1000 | dm_end_request(clone, error); | ||
1001 | return; | ||
1002 | } | ||
1003 | |||
1004 | rq->cmd_flags |= REQ_FAILED; | 913 | rq->cmd_flags |= REQ_FAILED; |
1005 | dm_complete_request(clone, error); | 914 | dm_complete_request(clone, error); |
1006 | } | 915 | } |
@@ -1119,7 +1028,7 @@ static void dm_bio_destructor(struct bio *bio) | |||
1119 | } | 1028 | } |
1120 | 1029 | ||
1121 | /* | 1030 | /* |
1122 | * Creates a little bio that is just does part of a bvec. | 1031 | * Creates a little bio that just does part of a bvec. |
1123 | */ | 1032 | */ |
1124 | static struct bio *split_bvec(struct bio *bio, sector_t sector, | 1033 | static struct bio *split_bvec(struct bio *bio, sector_t sector, |
1125 | unsigned short idx, unsigned int offset, | 1034 | unsigned short idx, unsigned int offset, |
@@ -1134,7 +1043,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
1134 | 1043 | ||
1135 | clone->bi_sector = sector; | 1044 | clone->bi_sector = sector; |
1136 | clone->bi_bdev = bio->bi_bdev; | 1045 | clone->bi_bdev = bio->bi_bdev; |
1137 | clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; | 1046 | clone->bi_rw = bio->bi_rw; |
1138 | clone->bi_vcnt = 1; | 1047 | clone->bi_vcnt = 1; |
1139 | clone->bi_size = to_bytes(len); | 1048 | clone->bi_size = to_bytes(len); |
1140 | clone->bi_io_vec->bv_offset = offset; | 1049 | clone->bi_io_vec->bv_offset = offset; |
@@ -1161,7 +1070,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
1161 | 1070 | ||
1162 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1071 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
1163 | __bio_clone(clone, bio); | 1072 | __bio_clone(clone, bio); |
1164 | clone->bi_rw &= ~REQ_HARDBARRIER; | ||
1165 | clone->bi_destructor = dm_bio_destructor; | 1073 | clone->bi_destructor = dm_bio_destructor; |
1166 | clone->bi_sector = sector; | 1074 | clone->bi_sector = sector; |
1167 | clone->bi_idx = idx; | 1075 | clone->bi_idx = idx; |
@@ -1225,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, | |||
1225 | __issue_target_request(ci, ti, request_nr, len); | 1133 | __issue_target_request(ci, ti, request_nr, len); |
1226 | } | 1134 | } |
1227 | 1135 | ||
1228 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | 1136 | static int __clone_and_map_empty_flush(struct clone_info *ci) |
1229 | { | 1137 | { |
1230 | unsigned target_nr = 0; | 1138 | unsigned target_nr = 0; |
1231 | struct dm_target *ti; | 1139 | struct dm_target *ti; |
1232 | 1140 | ||
1141 | BUG_ON(bio_has_data(ci->bio)); | ||
1233 | while ((ti = dm_table_get_target(ci->map, target_nr++))) | 1142 | while ((ti = dm_table_get_target(ci->map, target_nr++))) |
1234 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); | 1143 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); |
1235 | 1144 | ||
1236 | ci->sector_count = 0; | ||
1237 | |||
1238 | return 0; | 1145 | return 0; |
1239 | } | 1146 | } |
1240 | 1147 | ||
@@ -1289,9 +1196,6 @@ static int __clone_and_map(struct clone_info *ci) | |||
1289 | sector_t len = 0, max; | 1196 | sector_t len = 0, max; |
1290 | struct dm_target_io *tio; | 1197 | struct dm_target_io *tio; |
1291 | 1198 | ||
1292 | if (unlikely(bio_empty_barrier(bio))) | ||
1293 | return __clone_and_map_empty_barrier(ci); | ||
1294 | |||
1295 | if (unlikely(bio->bi_rw & REQ_DISCARD)) | 1199 | if (unlikely(bio->bi_rw & REQ_DISCARD)) |
1296 | return __clone_and_map_discard(ci); | 1200 | return __clone_and_map_discard(ci); |
1297 | 1201 | ||
@@ -1383,16 +1287,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1383 | 1287 | ||
1384 | ci.map = dm_get_live_table(md); | 1288 | ci.map = dm_get_live_table(md); |
1385 | if (unlikely(!ci.map)) { | 1289 | if (unlikely(!ci.map)) { |
1386 | if (!(bio->bi_rw & REQ_HARDBARRIER)) | 1290 | bio_io_error(bio); |
1387 | bio_io_error(bio); | ||
1388 | else | ||
1389 | if (!md->barrier_error) | ||
1390 | md->barrier_error = -EIO; | ||
1391 | return; | 1291 | return; |
1392 | } | 1292 | } |
1393 | 1293 | ||
1394 | ci.md = md; | 1294 | ci.md = md; |
1395 | ci.bio = bio; | ||
1396 | ci.io = alloc_io(md); | 1295 | ci.io = alloc_io(md); |
1397 | ci.io->error = 0; | 1296 | ci.io->error = 0; |
1398 | atomic_set(&ci.io->io_count, 1); | 1297 | atomic_set(&ci.io->io_count, 1); |
@@ -1400,14 +1299,20 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1400 | ci.io->md = md; | 1299 | ci.io->md = md; |
1401 | spin_lock_init(&ci.io->endio_lock); | 1300 | spin_lock_init(&ci.io->endio_lock); |
1402 | ci.sector = bio->bi_sector; | 1301 | ci.sector = bio->bi_sector; |
1403 | ci.sector_count = bio_sectors(bio); | ||
1404 | if (unlikely(bio_empty_barrier(bio))) | ||
1405 | ci.sector_count = 1; | ||
1406 | ci.idx = bio->bi_idx; | 1302 | ci.idx = bio->bi_idx; |
1407 | 1303 | ||
1408 | start_io_acct(ci.io); | 1304 | start_io_acct(ci.io); |
1409 | while (ci.sector_count && !error) | 1305 | if (bio->bi_rw & REQ_FLUSH) { |
1410 | error = __clone_and_map(&ci); | 1306 | ci.bio = &ci.md->flush_bio; |
1307 | ci.sector_count = 0; | ||
1308 | error = __clone_and_map_empty_flush(&ci); | ||
1309 | /* dec_pending submits any data associated with flush */ | ||
1310 | } else { | ||
1311 | ci.bio = bio; | ||
1312 | ci.sector_count = bio_sectors(bio); | ||
1313 | while (ci.sector_count && !error) | ||
1314 | error = __clone_and_map(&ci); | ||
1315 | } | ||
1411 | 1316 | ||
1412 | /* drop the extra reference count */ | 1317 | /* drop the extra reference count */ |
1413 | dec_pending(ci.io, error); | 1318 | dec_pending(ci.io, error); |
@@ -1491,22 +1396,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio) | |||
1491 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); | 1396 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); |
1492 | part_stat_unlock(); | 1397 | part_stat_unlock(); |
1493 | 1398 | ||
1494 | /* | 1399 | /* if we're suspended, we have to queue this io for later */ |
1495 | * If we're suspended or the thread is processing barriers | 1400 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { |
1496 | * we have to queue this io for later. | ||
1497 | */ | ||
1498 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || | ||
1499 | unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | ||
1500 | up_read(&md->io_lock); | 1401 | up_read(&md->io_lock); |
1501 | 1402 | ||
1502 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && | 1403 | if (bio_rw(bio) != READA) |
1503 | bio_rw(bio) == READA) { | 1404 | queue_io(md, bio); |
1405 | else | ||
1504 | bio_io_error(bio); | 1406 | bio_io_error(bio); |
1505 | return 0; | ||
1506 | } | ||
1507 | |||
1508 | queue_io(md, bio); | ||
1509 | |||
1510 | return 0; | 1407 | return 0; |
1511 | } | 1408 | } |
1512 | 1409 | ||
@@ -1537,14 +1434,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
1537 | return _dm_request(q, bio); | 1434 | return _dm_request(q, bio); |
1538 | } | 1435 | } |
1539 | 1436 | ||
1540 | static bool dm_rq_is_flush_request(struct request *rq) | ||
1541 | { | ||
1542 | if (rq->cmd_flags & REQ_FLUSH) | ||
1543 | return true; | ||
1544 | else | ||
1545 | return false; | ||
1546 | } | ||
1547 | |||
1548 | void dm_dispatch_request(struct request *rq) | 1437 | void dm_dispatch_request(struct request *rq) |
1549 | { | 1438 | { |
1550 | int r; | 1439 | int r; |
@@ -1592,22 +1481,15 @@ static int setup_clone(struct request *clone, struct request *rq, | |||
1592 | { | 1481 | { |
1593 | int r; | 1482 | int r; |
1594 | 1483 | ||
1595 | if (dm_rq_is_flush_request(rq)) { | 1484 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, |
1596 | blk_rq_init(NULL, clone); | 1485 | dm_rq_bio_constructor, tio); |
1597 | clone->cmd_type = REQ_TYPE_FS; | 1486 | if (r) |
1598 | clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); | 1487 | return r; |
1599 | } else { | ||
1600 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | ||
1601 | dm_rq_bio_constructor, tio); | ||
1602 | if (r) | ||
1603 | return r; | ||
1604 | |||
1605 | clone->cmd = rq->cmd; | ||
1606 | clone->cmd_len = rq->cmd_len; | ||
1607 | clone->sense = rq->sense; | ||
1608 | clone->buffer = rq->buffer; | ||
1609 | } | ||
1610 | 1488 | ||
1489 | clone->cmd = rq->cmd; | ||
1490 | clone->cmd_len = rq->cmd_len; | ||
1491 | clone->sense = rq->sense; | ||
1492 | clone->buffer = rq->buffer; | ||
1611 | clone->end_io = end_clone_request; | 1493 | clone->end_io = end_clone_request; |
1612 | clone->end_io_data = tio; | 1494 | clone->end_io_data = tio; |
1613 | 1495 | ||
@@ -1648,9 +1530,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) | |||
1648 | struct mapped_device *md = q->queuedata; | 1530 | struct mapped_device *md = q->queuedata; |
1649 | struct request *clone; | 1531 | struct request *clone; |
1650 | 1532 | ||
1651 | if (unlikely(dm_rq_is_flush_request(rq))) | ||
1652 | return BLKPREP_OK; | ||
1653 | |||
1654 | if (unlikely(rq->special)) { | 1533 | if (unlikely(rq->special)) { |
1655 | DMWARN("Already has something in rq->special."); | 1534 | DMWARN("Already has something in rq->special."); |
1656 | return BLKPREP_KILL; | 1535 | return BLKPREP_KILL; |
@@ -1727,6 +1606,7 @@ static void dm_request_fn(struct request_queue *q) | |||
1727 | struct dm_table *map = dm_get_live_table(md); | 1606 | struct dm_table *map = dm_get_live_table(md); |
1728 | struct dm_target *ti; | 1607 | struct dm_target *ti; |
1729 | struct request *rq, *clone; | 1608 | struct request *rq, *clone; |
1609 | sector_t pos; | ||
1730 | 1610 | ||
1731 | /* | 1611 | /* |
1732 | * For suspend, check blk_queue_stopped() and increment | 1612 | * For suspend, check blk_queue_stopped() and increment |
@@ -1739,15 +1619,14 @@ static void dm_request_fn(struct request_queue *q) | |||
1739 | if (!rq) | 1619 | if (!rq) |
1740 | goto plug_and_out; | 1620 | goto plug_and_out; |
1741 | 1621 | ||
1742 | if (unlikely(dm_rq_is_flush_request(rq))) { | 1622 | /* always use block 0 to find the target for flushes for now */ |
1743 | BUG_ON(md->flush_request); | 1623 | pos = 0; |
1744 | md->flush_request = rq; | 1624 | if (!(rq->cmd_flags & REQ_FLUSH)) |
1745 | blk_start_request(rq); | 1625 | pos = blk_rq_pos(rq); |
1746 | queue_work(md->wq, &md->barrier_work); | 1626 | |
1747 | goto out; | 1627 | ti = dm_table_find_target(map, pos); |
1748 | } | 1628 | BUG_ON(!dm_target_is_valid(ti)); |
1749 | 1629 | ||
1750 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | ||
1751 | if (ti->type->busy && ti->type->busy(ti)) | 1630 | if (ti->type->busy && ti->type->busy(ti)) |
1752 | goto plug_and_out; | 1631 | goto plug_and_out; |
1753 | 1632 | ||
@@ -1918,7 +1797,6 @@ out: | |||
1918 | static const struct block_device_operations dm_blk_dops; | 1797 | static const struct block_device_operations dm_blk_dops; |
1919 | 1798 | ||
1920 | static void dm_wq_work(struct work_struct *work); | 1799 | static void dm_wq_work(struct work_struct *work); |
1921 | static void dm_rq_barrier_work(struct work_struct *work); | ||
1922 | 1800 | ||
1923 | static void dm_init_md_queue(struct mapped_device *md) | 1801 | static void dm_init_md_queue(struct mapped_device *md) |
1924 | { | 1802 | { |
@@ -1940,6 +1818,7 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
1940 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1818 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
1941 | md->queue->unplug_fn = dm_unplug_all; | 1819 | md->queue->unplug_fn = dm_unplug_all; |
1942 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1820 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
1821 | blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); | ||
1943 | } | 1822 | } |
1944 | 1823 | ||
1945 | /* | 1824 | /* |
@@ -1972,7 +1851,6 @@ static struct mapped_device *alloc_dev(int minor) | |||
1972 | mutex_init(&md->suspend_lock); | 1851 | mutex_init(&md->suspend_lock); |
1973 | mutex_init(&md->type_lock); | 1852 | mutex_init(&md->type_lock); |
1974 | spin_lock_init(&md->deferred_lock); | 1853 | spin_lock_init(&md->deferred_lock); |
1975 | spin_lock_init(&md->barrier_error_lock); | ||
1976 | rwlock_init(&md->map_lock); | 1854 | rwlock_init(&md->map_lock); |
1977 | atomic_set(&md->holders, 1); | 1855 | atomic_set(&md->holders, 1); |
1978 | atomic_set(&md->open_count, 0); | 1856 | atomic_set(&md->open_count, 0); |
@@ -1995,7 +1873,6 @@ static struct mapped_device *alloc_dev(int minor) | |||
1995 | atomic_set(&md->pending[1], 0); | 1873 | atomic_set(&md->pending[1], 0); |
1996 | init_waitqueue_head(&md->wait); | 1874 | init_waitqueue_head(&md->wait); |
1997 | INIT_WORK(&md->work, dm_wq_work); | 1875 | INIT_WORK(&md->work, dm_wq_work); |
1998 | INIT_WORK(&md->barrier_work, dm_rq_barrier_work); | ||
1999 | init_waitqueue_head(&md->eventq); | 1876 | init_waitqueue_head(&md->eventq); |
2000 | 1877 | ||
2001 | md->disk->major = _major; | 1878 | md->disk->major = _major; |
@@ -2015,6 +1892,10 @@ static struct mapped_device *alloc_dev(int minor) | |||
2015 | if (!md->bdev) | 1892 | if (!md->bdev) |
2016 | goto bad_bdev; | 1893 | goto bad_bdev; |
2017 | 1894 | ||
1895 | bio_init(&md->flush_bio); | ||
1896 | md->flush_bio.bi_bdev = md->bdev; | ||
1897 | md->flush_bio.bi_rw = WRITE_FLUSH; | ||
1898 | |||
2018 | /* Populate the mapping, nobody knows we exist yet */ | 1899 | /* Populate the mapping, nobody knows we exist yet */ |
2019 | spin_lock(&_minor_lock); | 1900 | spin_lock(&_minor_lock); |
2020 | old_md = idr_replace(&_minor_idr, md, minor); | 1901 | old_md = idr_replace(&_minor_idr, md, minor); |
@@ -2245,7 +2126,6 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2126 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2127 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 2128 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
2248 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); | ||
2249 | 2129 | ||
2250 | elv_register_queue(md->queue); | 2130 | elv_register_queue(md->queue); |
2251 | 2131 | ||
@@ -2406,43 +2286,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
2406 | return r; | 2286 | return r; |
2407 | } | 2287 | } |
2408 | 2288 | ||
2409 | static void dm_flush(struct mapped_device *md) | ||
2410 | { | ||
2411 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2412 | |||
2413 | bio_init(&md->barrier_bio); | ||
2414 | md->barrier_bio.bi_bdev = md->bdev; | ||
2415 | md->barrier_bio.bi_rw = WRITE_BARRIER; | ||
2416 | __split_and_process_bio(md, &md->barrier_bio); | ||
2417 | |||
2418 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2419 | } | ||
2420 | |||
2421 | static void process_barrier(struct mapped_device *md, struct bio *bio) | ||
2422 | { | ||
2423 | md->barrier_error = 0; | ||
2424 | |||
2425 | dm_flush(md); | ||
2426 | |||
2427 | if (!bio_empty_barrier(bio)) { | ||
2428 | __split_and_process_bio(md, bio); | ||
2429 | /* | ||
2430 | * If the request isn't supported, don't waste time with | ||
2431 | * the second flush. | ||
2432 | */ | ||
2433 | if (md->barrier_error != -EOPNOTSUPP) | ||
2434 | dm_flush(md); | ||
2435 | } | ||
2436 | |||
2437 | if (md->barrier_error != DM_ENDIO_REQUEUE) | ||
2438 | bio_endio(bio, md->barrier_error); | ||
2439 | else { | ||
2440 | spin_lock_irq(&md->deferred_lock); | ||
2441 | bio_list_add_head(&md->deferred, bio); | ||
2442 | spin_unlock_irq(&md->deferred_lock); | ||
2443 | } | ||
2444 | } | ||
2445 | |||
2446 | /* | 2289 | /* |
2447 | * Process the deferred bios | 2290 | * Process the deferred bios |
2448 | */ | 2291 | */ |
@@ -2452,33 +2295,27 @@ static void dm_wq_work(struct work_struct *work) | |||
2452 | work); | 2295 | work); |
2453 | struct bio *c; | 2296 | struct bio *c; |
2454 | 2297 | ||
2455 | down_write(&md->io_lock); | 2298 | down_read(&md->io_lock); |
2456 | 2299 | ||
2457 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 2300 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
2458 | spin_lock_irq(&md->deferred_lock); | 2301 | spin_lock_irq(&md->deferred_lock); |
2459 | c = bio_list_pop(&md->deferred); | 2302 | c = bio_list_pop(&md->deferred); |
2460 | spin_unlock_irq(&md->deferred_lock); | 2303 | spin_unlock_irq(&md->deferred_lock); |
2461 | 2304 | ||
2462 | if (!c) { | 2305 | if (!c) |
2463 | clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
2464 | break; | 2306 | break; |
2465 | } | ||
2466 | 2307 | ||
2467 | up_write(&md->io_lock); | 2308 | up_read(&md->io_lock); |
2468 | 2309 | ||
2469 | if (dm_request_based(md)) | 2310 | if (dm_request_based(md)) |
2470 | generic_make_request(c); | 2311 | generic_make_request(c); |
2471 | else { | 2312 | else |
2472 | if (c->bi_rw & REQ_HARDBARRIER) | 2313 | __split_and_process_bio(md, c); |
2473 | process_barrier(md, c); | ||
2474 | else | ||
2475 | __split_and_process_bio(md, c); | ||
2476 | } | ||
2477 | 2314 | ||
2478 | down_write(&md->io_lock); | 2315 | down_read(&md->io_lock); |
2479 | } | 2316 | } |
2480 | 2317 | ||
2481 | up_write(&md->io_lock); | 2318 | up_read(&md->io_lock); |
2482 | } | 2319 | } |
2483 | 2320 | ||
2484 | static void dm_queue_flush(struct mapped_device *md) | 2321 | static void dm_queue_flush(struct mapped_device *md) |
@@ -2488,73 +2325,6 @@ static void dm_queue_flush(struct mapped_device *md) | |||
2488 | queue_work(md->wq, &md->work); | 2325 | queue_work(md->wq, &md->work); |
2489 | } | 2326 | } |
2490 | 2327 | ||
2491 | static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr) | ||
2492 | { | ||
2493 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
2494 | |||
2495 | tio->info.target_request_nr = request_nr; | ||
2496 | } | ||
2497 | |||
2498 | /* Issue barrier requests to targets and wait for their completion. */ | ||
2499 | static int dm_rq_barrier(struct mapped_device *md) | ||
2500 | { | ||
2501 | int i, j; | ||
2502 | struct dm_table *map = dm_get_live_table(md); | ||
2503 | unsigned num_targets = dm_table_get_num_targets(map); | ||
2504 | struct dm_target *ti; | ||
2505 | struct request *clone; | ||
2506 | |||
2507 | md->barrier_error = 0; | ||
2508 | |||
2509 | for (i = 0; i < num_targets; i++) { | ||
2510 | ti = dm_table_get_target(map, i); | ||
2511 | for (j = 0; j < ti->num_flush_requests; j++) { | ||
2512 | clone = clone_rq(md->flush_request, md, GFP_NOIO); | ||
2513 | dm_rq_set_target_request_nr(clone, j); | ||
2514 | atomic_inc(&md->pending[rq_data_dir(clone)]); | ||
2515 | map_request(ti, clone, md); | ||
2516 | } | ||
2517 | } | ||
2518 | |||
2519 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2520 | dm_table_put(map); | ||
2521 | |||
2522 | return md->barrier_error; | ||
2523 | } | ||
2524 | |||
2525 | static void dm_rq_barrier_work(struct work_struct *work) | ||
2526 | { | ||
2527 | int error; | ||
2528 | struct mapped_device *md = container_of(work, struct mapped_device, | ||
2529 | barrier_work); | ||
2530 | struct request_queue *q = md->queue; | ||
2531 | struct request *rq; | ||
2532 | unsigned long flags; | ||
2533 | |||
2534 | /* | ||
2535 | * Hold the md reference here and leave it at the last part so that | ||
2536 | * the md can't be deleted by device opener when the barrier request | ||
2537 | * completes. | ||
2538 | */ | ||
2539 | dm_get(md); | ||
2540 | |||
2541 | error = dm_rq_barrier(md); | ||
2542 | |||
2543 | rq = md->flush_request; | ||
2544 | md->flush_request = NULL; | ||
2545 | |||
2546 | if (error == DM_ENDIO_REQUEUE) { | ||
2547 | spin_lock_irqsave(q->queue_lock, flags); | ||
2548 | blk_requeue_request(q, rq); | ||
2549 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2550 | } else | ||
2551 | blk_end_request_all(rq, error); | ||
2552 | |||
2553 | blk_run_queue(q); | ||
2554 | |||
2555 | dm_put(md); | ||
2556 | } | ||
2557 | |||
2558 | /* | 2328 | /* |
2559 | * Swap in a new table, returning the old one for the caller to destroy. | 2329 | * Swap in a new table, returning the old one for the caller to destroy. |
2560 | */ | 2330 | */ |
@@ -2677,23 +2447,17 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2677 | * | 2447 | * |
2678 | * To get all processes out of __split_and_process_bio in dm_request, | 2448 | * To get all processes out of __split_and_process_bio in dm_request, |
2679 | * we take the write lock. To prevent any process from reentering | 2449 | * we take the write lock. To prevent any process from reentering |
2680 | * __split_and_process_bio from dm_request, we set | 2450 | * __split_and_process_bio from dm_request and quiesce the thread |
2681 | * DMF_QUEUE_IO_TO_THREAD. | 2451 | * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call |
2682 | * | 2452 | * flush_workqueue(md->wq). |
2683 | * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND | ||
2684 | * and call flush_workqueue(md->wq). flush_workqueue will wait until | ||
2685 | * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any | ||
2686 | * further calls to __split_and_process_bio from dm_wq_work. | ||
2687 | */ | 2453 | */ |
2688 | down_write(&md->io_lock); | 2454 | down_write(&md->io_lock); |
2689 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2455 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2690 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
2691 | up_write(&md->io_lock); | 2456 | up_write(&md->io_lock); |
2692 | 2457 | ||
2693 | /* | 2458 | /* |
2694 | * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which | 2459 | * Stop md->queue before flushing md->wq in case request-based |
2695 | * can be kicked until md->queue is stopped. So stop md->queue before | 2460 | * dm defers requests to md->wq from md->queue. |
2696 | * flushing md->wq. | ||
2697 | */ | 2461 | */ |
2698 | if (dm_request_based(md)) | 2462 | if (dm_request_based(md)) |
2699 | stop_queue(md->queue); | 2463 | stop_queue(md->queue); |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index ba19060bcf3f..8a2f767f26d8 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -294,8 +294,8 @@ static int linear_make_request (mddev_t *mddev, struct bio *bio) | |||
294 | dev_info_t *tmp_dev; | 294 | dev_info_t *tmp_dev; |
295 | sector_t start_sector; | 295 | sector_t start_sector; |
296 | 296 | ||
297 | if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 297 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
298 | md_barrier_request(mddev, bio); | 298 | md_flush_request(mddev, bio); |
299 | return 0; | 299 | return 0; |
300 | } | 300 | } |
301 | 301 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index dbf822df942a..225815197a3d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -227,12 +227,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio) | |||
227 | return 0; | 227 | return 0; |
228 | } | 228 | } |
229 | rcu_read_lock(); | 229 | rcu_read_lock(); |
230 | if (mddev->suspended || mddev->barrier) { | 230 | if (mddev->suspended) { |
231 | DEFINE_WAIT(__wait); | 231 | DEFINE_WAIT(__wait); |
232 | for (;;) { | 232 | for (;;) { |
233 | prepare_to_wait(&mddev->sb_wait, &__wait, | 233 | prepare_to_wait(&mddev->sb_wait, &__wait, |
234 | TASK_UNINTERRUPTIBLE); | 234 | TASK_UNINTERRUPTIBLE); |
235 | if (!mddev->suspended && !mddev->barrier) | 235 | if (!mddev->suspended) |
236 | break; | 236 | break; |
237 | rcu_read_unlock(); | 237 | rcu_read_unlock(); |
238 | schedule(); | 238 | schedule(); |
@@ -283,40 +283,29 @@ EXPORT_SYMBOL_GPL(mddev_resume); | |||
283 | 283 | ||
284 | int mddev_congested(mddev_t *mddev, int bits) | 284 | int mddev_congested(mddev_t *mddev, int bits) |
285 | { | 285 | { |
286 | if (mddev->barrier) | ||
287 | return 1; | ||
288 | return mddev->suspended; | 286 | return mddev->suspended; |
289 | } | 287 | } |
290 | EXPORT_SYMBOL(mddev_congested); | 288 | EXPORT_SYMBOL(mddev_congested); |
291 | 289 | ||
292 | /* | 290 | /* |
293 | * Generic barrier handling for md | 291 | * Generic flush handling for md |
294 | */ | 292 | */ |
295 | 293 | ||
296 | #define POST_REQUEST_BARRIER ((void*)1) | 294 | static void md_end_flush(struct bio *bio, int err) |
297 | |||
298 | static void md_end_barrier(struct bio *bio, int err) | ||
299 | { | 295 | { |
300 | mdk_rdev_t *rdev = bio->bi_private; | 296 | mdk_rdev_t *rdev = bio->bi_private; |
301 | mddev_t *mddev = rdev->mddev; | 297 | mddev_t *mddev = rdev->mddev; |
302 | if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER) | ||
303 | set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags); | ||
304 | 298 | ||
305 | rdev_dec_pending(rdev, mddev); | 299 | rdev_dec_pending(rdev, mddev); |
306 | 300 | ||
307 | if (atomic_dec_and_test(&mddev->flush_pending)) { | 301 | if (atomic_dec_and_test(&mddev->flush_pending)) { |
308 | if (mddev->barrier == POST_REQUEST_BARRIER) { | 302 | /* The pre-request flush has finished */ |
309 | /* This was a post-request barrier */ | 303 | schedule_work(&mddev->flush_work); |
310 | mddev->barrier = NULL; | ||
311 | wake_up(&mddev->sb_wait); | ||
312 | } else | ||
313 | /* The pre-request barrier has finished */ | ||
314 | schedule_work(&mddev->barrier_work); | ||
315 | } | 304 | } |
316 | bio_put(bio); | 305 | bio_put(bio); |
317 | } | 306 | } |
318 | 307 | ||
319 | static void submit_barriers(mddev_t *mddev) | 308 | static void submit_flushes(mddev_t *mddev) |
320 | { | 309 | { |
321 | mdk_rdev_t *rdev; | 310 | mdk_rdev_t *rdev; |
322 | 311 | ||
@@ -333,60 +322,56 @@ static void submit_barriers(mddev_t *mddev) | |||
333 | atomic_inc(&rdev->nr_pending); | 322 | atomic_inc(&rdev->nr_pending); |
334 | rcu_read_unlock(); | 323 | rcu_read_unlock(); |
335 | bi = bio_alloc(GFP_KERNEL, 0); | 324 | bi = bio_alloc(GFP_KERNEL, 0); |
336 | bi->bi_end_io = md_end_barrier; | 325 | bi->bi_end_io = md_end_flush; |
337 | bi->bi_private = rdev; | 326 | bi->bi_private = rdev; |
338 | bi->bi_bdev = rdev->bdev; | 327 | bi->bi_bdev = rdev->bdev; |
339 | atomic_inc(&mddev->flush_pending); | 328 | atomic_inc(&mddev->flush_pending); |
340 | submit_bio(WRITE_BARRIER, bi); | 329 | submit_bio(WRITE_FLUSH, bi); |
341 | rcu_read_lock(); | 330 | rcu_read_lock(); |
342 | rdev_dec_pending(rdev, mddev); | 331 | rdev_dec_pending(rdev, mddev); |
343 | } | 332 | } |
344 | rcu_read_unlock(); | 333 | rcu_read_unlock(); |
345 | } | 334 | } |
346 | 335 | ||
347 | static void md_submit_barrier(struct work_struct *ws) | 336 | static void md_submit_flush_data(struct work_struct *ws) |
348 | { | 337 | { |
349 | mddev_t *mddev = container_of(ws, mddev_t, barrier_work); | 338 | mddev_t *mddev = container_of(ws, mddev_t, flush_work); |
350 | struct bio *bio = mddev->barrier; | 339 | struct bio *bio = mddev->flush_bio; |
351 | 340 | ||
352 | atomic_set(&mddev->flush_pending, 1); | 341 | atomic_set(&mddev->flush_pending, 1); |
353 | 342 | ||
354 | if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) | 343 | if (bio->bi_size == 0) |
355 | bio_endio(bio, -EOPNOTSUPP); | ||
356 | else if (bio->bi_size == 0) | ||
357 | /* an empty barrier - all done */ | 344 | /* an empty barrier - all done */ |
358 | bio_endio(bio, 0); | 345 | bio_endio(bio, 0); |
359 | else { | 346 | else { |
360 | bio->bi_rw &= ~REQ_HARDBARRIER; | 347 | bio->bi_rw &= ~REQ_FLUSH; |
361 | if (mddev->pers->make_request(mddev, bio)) | 348 | if (mddev->pers->make_request(mddev, bio)) |
362 | generic_make_request(bio); | 349 | generic_make_request(bio); |
363 | mddev->barrier = POST_REQUEST_BARRIER; | ||
364 | submit_barriers(mddev); | ||
365 | } | 350 | } |
366 | if (atomic_dec_and_test(&mddev->flush_pending)) { | 351 | if (atomic_dec_and_test(&mddev->flush_pending)) { |
367 | mddev->barrier = NULL; | 352 | mddev->flush_bio = NULL; |
368 | wake_up(&mddev->sb_wait); | 353 | wake_up(&mddev->sb_wait); |
369 | } | 354 | } |
370 | } | 355 | } |
371 | 356 | ||
372 | void md_barrier_request(mddev_t *mddev, struct bio *bio) | 357 | void md_flush_request(mddev_t *mddev, struct bio *bio) |
373 | { | 358 | { |
374 | spin_lock_irq(&mddev->write_lock); | 359 | spin_lock_irq(&mddev->write_lock); |
375 | wait_event_lock_irq(mddev->sb_wait, | 360 | wait_event_lock_irq(mddev->sb_wait, |
376 | !mddev->barrier, | 361 | !mddev->flush_bio, |
377 | mddev->write_lock, /*nothing*/); | 362 | mddev->write_lock, /*nothing*/); |
378 | mddev->barrier = bio; | 363 | mddev->flush_bio = bio; |
379 | spin_unlock_irq(&mddev->write_lock); | 364 | spin_unlock_irq(&mddev->write_lock); |
380 | 365 | ||
381 | atomic_set(&mddev->flush_pending, 1); | 366 | atomic_set(&mddev->flush_pending, 1); |
382 | INIT_WORK(&mddev->barrier_work, md_submit_barrier); | 367 | INIT_WORK(&mddev->flush_work, md_submit_flush_data); |
383 | 368 | ||
384 | submit_barriers(mddev); | 369 | submit_flushes(mddev); |
385 | 370 | ||
386 | if (atomic_dec_and_test(&mddev->flush_pending)) | 371 | if (atomic_dec_and_test(&mddev->flush_pending)) |
387 | schedule_work(&mddev->barrier_work); | 372 | schedule_work(&mddev->flush_work); |
388 | } | 373 | } |
389 | EXPORT_SYMBOL(md_barrier_request); | 374 | EXPORT_SYMBOL(md_flush_request); |
390 | 375 | ||
391 | /* Support for plugging. | 376 | /* Support for plugging. |
392 | * This mirrors the plugging support in request_queue, but does not | 377 | * This mirrors the plugging support in request_queue, but does not |
@@ -697,31 +682,6 @@ static void super_written(struct bio *bio, int error) | |||
697 | bio_put(bio); | 682 | bio_put(bio); |
698 | } | 683 | } |
699 | 684 | ||
700 | static void super_written_barrier(struct bio *bio, int error) | ||
701 | { | ||
702 | struct bio *bio2 = bio->bi_private; | ||
703 | mdk_rdev_t *rdev = bio2->bi_private; | ||
704 | mddev_t *mddev = rdev->mddev; | ||
705 | |||
706 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && | ||
707 | error == -EOPNOTSUPP) { | ||
708 | unsigned long flags; | ||
709 | /* barriers don't appear to be supported :-( */ | ||
710 | set_bit(BarriersNotsupp, &rdev->flags); | ||
711 | mddev->barriers_work = 0; | ||
712 | spin_lock_irqsave(&mddev->write_lock, flags); | ||
713 | bio2->bi_next = mddev->biolist; | ||
714 | mddev->biolist = bio2; | ||
715 | spin_unlock_irqrestore(&mddev->write_lock, flags); | ||
716 | wake_up(&mddev->sb_wait); | ||
717 | bio_put(bio); | ||
718 | } else { | ||
719 | bio_put(bio2); | ||
720 | bio->bi_private = rdev; | ||
721 | super_written(bio, error); | ||
722 | } | ||
723 | } | ||
724 | |||
725 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | 685 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, |
726 | sector_t sector, int size, struct page *page) | 686 | sector_t sector, int size, struct page *page) |
727 | { | 687 | { |
@@ -730,51 +690,28 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | |||
730 | * and decrement it on completion, waking up sb_wait | 690 | * and decrement it on completion, waking up sb_wait |
731 | * if zero is reached. | 691 | * if zero is reached. |
732 | * If an error occurred, call md_error | 692 | * If an error occurred, call md_error |
733 | * | ||
734 | * As we might need to resubmit the request if REQ_HARDBARRIER | ||
735 | * causes ENOTSUPP, we allocate a spare bio... | ||
736 | */ | 693 | */ |
737 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 694 | struct bio *bio = bio_alloc(GFP_NOIO, 1); |
738 | int rw = REQ_WRITE | REQ_SYNC | REQ_UNPLUG; | ||
739 | 695 | ||
740 | bio->bi_bdev = rdev->bdev; | 696 | bio->bi_bdev = rdev->bdev; |
741 | bio->bi_sector = sector; | 697 | bio->bi_sector = sector; |
742 | bio_add_page(bio, page, size, 0); | 698 | bio_add_page(bio, page, size, 0); |
743 | bio->bi_private = rdev; | 699 | bio->bi_private = rdev; |
744 | bio->bi_end_io = super_written; | 700 | bio->bi_end_io = super_written; |
745 | bio->bi_rw = rw; | ||
746 | 701 | ||
747 | atomic_inc(&mddev->pending_writes); | 702 | atomic_inc(&mddev->pending_writes); |
748 | if (!test_bit(BarriersNotsupp, &rdev->flags)) { | 703 | submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA, |
749 | struct bio *rbio; | 704 | bio); |
750 | rw |= REQ_HARDBARRIER; | ||
751 | rbio = bio_clone(bio, GFP_NOIO); | ||
752 | rbio->bi_private = bio; | ||
753 | rbio->bi_end_io = super_written_barrier; | ||
754 | submit_bio(rw, rbio); | ||
755 | } else | ||
756 | submit_bio(rw, bio); | ||
757 | } | 705 | } |
758 | 706 | ||
759 | void md_super_wait(mddev_t *mddev) | 707 | void md_super_wait(mddev_t *mddev) |
760 | { | 708 | { |
761 | /* wait for all superblock writes that were scheduled to complete. | 709 | /* wait for all superblock writes that were scheduled to complete */ |
762 | * if any had to be retried (due to BARRIER problems), retry them | ||
763 | */ | ||
764 | DEFINE_WAIT(wq); | 710 | DEFINE_WAIT(wq); |
765 | for(;;) { | 711 | for(;;) { |
766 | prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); | 712 | prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); |
767 | if (atomic_read(&mddev->pending_writes)==0) | 713 | if (atomic_read(&mddev->pending_writes)==0) |
768 | break; | 714 | break; |
769 | while (mddev->biolist) { | ||
770 | struct bio *bio; | ||
771 | spin_lock_irq(&mddev->write_lock); | ||
772 | bio = mddev->biolist; | ||
773 | mddev->biolist = bio->bi_next ; | ||
774 | bio->bi_next = NULL; | ||
775 | spin_unlock_irq(&mddev->write_lock); | ||
776 | submit_bio(bio->bi_rw, bio); | ||
777 | } | ||
778 | schedule(); | 715 | schedule(); |
779 | } | 716 | } |
780 | finish_wait(&mddev->sb_wait, &wq); | 717 | finish_wait(&mddev->sb_wait, &wq); |
@@ -1071,7 +1008,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1071 | clear_bit(Faulty, &rdev->flags); | 1008 | clear_bit(Faulty, &rdev->flags); |
1072 | clear_bit(In_sync, &rdev->flags); | 1009 | clear_bit(In_sync, &rdev->flags); |
1073 | clear_bit(WriteMostly, &rdev->flags); | 1010 | clear_bit(WriteMostly, &rdev->flags); |
1074 | clear_bit(BarriersNotsupp, &rdev->flags); | ||
1075 | 1011 | ||
1076 | if (mddev->raid_disks == 0) { | 1012 | if (mddev->raid_disks == 0) { |
1077 | mddev->major_version = 0; | 1013 | mddev->major_version = 0; |
@@ -1486,7 +1422,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1486 | clear_bit(Faulty, &rdev->flags); | 1422 | clear_bit(Faulty, &rdev->flags); |
1487 | clear_bit(In_sync, &rdev->flags); | 1423 | clear_bit(In_sync, &rdev->flags); |
1488 | clear_bit(WriteMostly, &rdev->flags); | 1424 | clear_bit(WriteMostly, &rdev->flags); |
1489 | clear_bit(BarriersNotsupp, &rdev->flags); | ||
1490 | 1425 | ||
1491 | if (mddev->raid_disks == 0) { | 1426 | if (mddev->raid_disks == 0) { |
1492 | mddev->major_version = 1; | 1427 | mddev->major_version = 1; |
@@ -4505,7 +4440,6 @@ int md_run(mddev_t *mddev) | |||
4505 | /* may be over-ridden by personality */ | 4440 | /* may be over-ridden by personality */ |
4506 | mddev->resync_max_sectors = mddev->dev_sectors; | 4441 | mddev->resync_max_sectors = mddev->dev_sectors; |
4507 | 4442 | ||
4508 | mddev->barriers_work = 1; | ||
4509 | mddev->ok_start_degraded = start_dirty_degraded; | 4443 | mddev->ok_start_degraded = start_dirty_degraded; |
4510 | 4444 | ||
4511 | if (start_readonly && mddev->ro == 0) | 4445 | if (start_readonly && mddev->ro == 0) |
@@ -4684,7 +4618,6 @@ static void md_clean(mddev_t *mddev) | |||
4684 | mddev->recovery = 0; | 4618 | mddev->recovery = 0; |
4685 | mddev->in_sync = 0; | 4619 | mddev->in_sync = 0; |
4686 | mddev->degraded = 0; | 4620 | mddev->degraded = 0; |
4687 | mddev->barriers_work = 0; | ||
4688 | mddev->safemode = 0; | 4621 | mddev->safemode = 0; |
4689 | mddev->bitmap_info.offset = 0; | 4622 | mddev->bitmap_info.offset = 0; |
4690 | mddev->bitmap_info.default_offset = 0; | 4623 | mddev->bitmap_info.default_offset = 0; |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 3931299788dc..112a2c32db0c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -87,7 +87,6 @@ struct mdk_rdev_s | |||
87 | #define Faulty 1 /* device is known to have a fault */ | 87 | #define Faulty 1 /* device is known to have a fault */ |
88 | #define In_sync 2 /* device is in_sync with rest of array */ | 88 | #define In_sync 2 /* device is in_sync with rest of array */ |
89 | #define WriteMostly 4 /* Avoid reading if at all possible */ | 89 | #define WriteMostly 4 /* Avoid reading if at all possible */ |
90 | #define BarriersNotsupp 5 /* REQ_HARDBARRIER is not supported */ | ||
91 | #define AllReserved 6 /* If whole device is reserved for | 90 | #define AllReserved 6 /* If whole device is reserved for |
92 | * one array */ | 91 | * one array */ |
93 | #define AutoDetected 7 /* added by auto-detect */ | 92 | #define AutoDetected 7 /* added by auto-detect */ |
@@ -273,13 +272,6 @@ struct mddev_s | |||
273 | int degraded; /* whether md should consider | 272 | int degraded; /* whether md should consider |
274 | * adding a spare | 273 | * adding a spare |
275 | */ | 274 | */ |
276 | int barriers_work; /* initialised to true, cleared as soon | ||
277 | * as a barrier request to slave | ||
278 | * fails. Only supported | ||
279 | */ | ||
280 | struct bio *biolist; /* bios that need to be retried | ||
281 | * because REQ_HARDBARRIER is not supported | ||
282 | */ | ||
283 | 275 | ||
284 | atomic_t recovery_active; /* blocks scheduled, but not written */ | 276 | atomic_t recovery_active; /* blocks scheduled, but not written */ |
285 | wait_queue_head_t recovery_wait; | 277 | wait_queue_head_t recovery_wait; |
@@ -339,16 +331,13 @@ struct mddev_s | |||
339 | struct attribute_group *to_remove; | 331 | struct attribute_group *to_remove; |
340 | struct plug_handle *plug; /* if used by personality */ | 332 | struct plug_handle *plug; /* if used by personality */ |
341 | 333 | ||
342 | /* Generic barrier handling. | 334 | /* Generic flush handling. |
343 | * If there is a pending barrier request, all other | 335 | * The last to finish preflush schedules a worker to submit |
344 | * writes are blocked while the devices are flushed. | 336 | * the rest of the request (without the REQ_FLUSH flag). |
345 | * The last to finish a flush schedules a worker to | ||
346 | * submit the barrier request (without the barrier flag), | ||
347 | * then submit more flush requests. | ||
348 | */ | 337 | */ |
349 | struct bio *barrier; | 338 | struct bio *flush_bio; |
350 | atomic_t flush_pending; | 339 | atomic_t flush_pending; |
351 | struct work_struct barrier_work; | 340 | struct work_struct flush_work; |
352 | struct work_struct event_work; /* used by dm to report failure event */ | 341 | struct work_struct event_work; /* used by dm to report failure event */ |
353 | }; | 342 | }; |
354 | 343 | ||
@@ -502,7 +491,7 @@ extern void md_done_sync(mddev_t *mddev, int blocks, int ok); | |||
502 | extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); | 491 | extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); |
503 | 492 | ||
504 | extern int mddev_congested(mddev_t *mddev, int bits); | 493 | extern int mddev_congested(mddev_t *mddev, int bits); |
505 | extern void md_barrier_request(mddev_t *mddev, struct bio *bio); | 494 | extern void md_flush_request(mddev_t *mddev, struct bio *bio); |
506 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | 495 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, |
507 | sector_t sector, int size, struct page *page); | 496 | sector_t sector, int size, struct page *page); |
508 | extern void md_super_wait(mddev_t *mddev); | 497 | extern void md_super_wait(mddev_t *mddev); |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 0307d217e7a4..6d7ddf32ef2e 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -142,8 +142,8 @@ static int multipath_make_request(mddev_t *mddev, struct bio * bio) | |||
142 | struct multipath_bh * mp_bh; | 142 | struct multipath_bh * mp_bh; |
143 | struct multipath_info *multipath; | 143 | struct multipath_info *multipath; |
144 | 144 | ||
145 | if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 145 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
146 | md_barrier_request(mddev, bio); | 146 | md_flush_request(mddev, bio); |
147 | return 0; | 147 | return 0; |
148 | } | 148 | } |
149 | 149 | ||
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 6f7af46d623c..a39f4c355e55 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -483,8 +483,8 @@ static int raid0_make_request(mddev_t *mddev, struct bio *bio) | |||
483 | struct strip_zone *zone; | 483 | struct strip_zone *zone; |
484 | mdk_rdev_t *tmp_dev; | 484 | mdk_rdev_t *tmp_dev; |
485 | 485 | ||
486 | if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 486 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
487 | md_barrier_request(mddev, bio); | 487 | md_flush_request(mddev, bio); |
488 | return 0; | 488 | return 0; |
489 | } | 489 | } |
490 | 490 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0b830bbe1d8b..378a25894c57 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -319,83 +319,74 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
319 | if (r1_bio->bios[mirror] == bio) | 319 | if (r1_bio->bios[mirror] == bio) |
320 | break; | 320 | break; |
321 | 321 | ||
322 | if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { | 322 | /* |
323 | set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); | 323 | * 'one mirror IO has finished' event handler: |
324 | set_bit(R1BIO_BarrierRetry, &r1_bio->state); | 324 | */ |
325 | r1_bio->mddev->barriers_work = 0; | 325 | r1_bio->bios[mirror] = NULL; |
326 | /* Don't rdev_dec_pending in this branch - keep it for the retry */ | 326 | to_put = bio; |
327 | } else { | 327 | if (!uptodate) { |
328 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | ||
329 | /* an I/O failed, we can't clear the bitmap */ | ||
330 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
331 | } else | ||
328 | /* | 332 | /* |
329 | * this branch is our 'one mirror IO has finished' event handler: | 333 | * Set R1BIO_Uptodate in our master bio, so that we |
334 | * will return a good error code for to the higher | ||
335 | * levels even if IO on some other mirrored buffer | ||
336 | * fails. | ||
337 | * | ||
338 | * The 'master' represents the composite IO operation | ||
339 | * to user-side. So if something waits for IO, then it | ||
340 | * will wait for the 'master' bio. | ||
330 | */ | 341 | */ |
331 | r1_bio->bios[mirror] = NULL; | 342 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
332 | to_put = bio; | 343 | |
333 | if (!uptodate) { | 344 | update_head_pos(mirror, r1_bio); |
334 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | 345 | |
335 | /* an I/O failed, we can't clear the bitmap */ | 346 | if (behind) { |
336 | set_bit(R1BIO_Degraded, &r1_bio->state); | 347 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) |
337 | } else | 348 | atomic_dec(&r1_bio->behind_remaining); |
338 | /* | 349 | |
339 | * Set R1BIO_Uptodate in our master bio, so that | 350 | /* |
340 | * we will return a good error code for to the higher | 351 | * In behind mode, we ACK the master bio once the I/O |
341 | * levels even if IO on some other mirrored buffer fails. | 352 | * has safely reached all non-writemostly |
342 | * | 353 | * disks. Setting the Returned bit ensures that this |
343 | * The 'master' represents the composite IO operation to | 354 | * gets done only once -- we don't ever want to return |
344 | * user-side. So if something waits for IO, then it will | 355 | * -EIO here, instead we'll wait |
345 | * wait for the 'master' bio. | 356 | */ |
346 | */ | 357 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && |
347 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 358 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { |
348 | 359 | /* Maybe we can return now */ | |
349 | update_head_pos(mirror, r1_bio); | 360 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { |
350 | 361 | struct bio *mbio = r1_bio->master_bio; | |
351 | if (behind) { | 362 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", |
352 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) | 363 | (unsigned long long) mbio->bi_sector, |
353 | atomic_dec(&r1_bio->behind_remaining); | 364 | (unsigned long long) mbio->bi_sector + |
354 | 365 | (mbio->bi_size >> 9) - 1); | |
355 | /* In behind mode, we ACK the master bio once the I/O has safely | 366 | bio_endio(mbio, 0); |
356 | * reached all non-writemostly disks. Setting the Returned bit | ||
357 | * ensures that this gets done only once -- we don't ever want to | ||
358 | * return -EIO here, instead we'll wait */ | ||
359 | |||
360 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && | ||
361 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { | ||
362 | /* Maybe we can return now */ | ||
363 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { | ||
364 | struct bio *mbio = r1_bio->master_bio; | ||
365 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", | ||
366 | (unsigned long long) mbio->bi_sector, | ||
367 | (unsigned long long) mbio->bi_sector + | ||
368 | (mbio->bi_size >> 9) - 1); | ||
369 | bio_endio(mbio, 0); | ||
370 | } | ||
371 | } | 367 | } |
372 | } | 368 | } |
373 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | ||
374 | } | 369 | } |
370 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | ||
371 | |||
375 | /* | 372 | /* |
376 | * | ||
377 | * Let's see if all mirrored write operations have finished | 373 | * Let's see if all mirrored write operations have finished |
378 | * already. | 374 | * already. |
379 | */ | 375 | */ |
380 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 376 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
381 | if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) | 377 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { |
382 | reschedule_retry(r1_bio); | 378 | /* free extra copy of the data pages */ |
383 | else { | 379 | int i = bio->bi_vcnt; |
384 | /* it really is the end of this request */ | 380 | while (i--) |
385 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | 381 | safe_put_page(bio->bi_io_vec[i].bv_page); |
386 | /* free extra copy of the data pages */ | ||
387 | int i = bio->bi_vcnt; | ||
388 | while (i--) | ||
389 | safe_put_page(bio->bi_io_vec[i].bv_page); | ||
390 | } | ||
391 | /* clear the bitmap if all writes complete successfully */ | ||
392 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
393 | r1_bio->sectors, | ||
394 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
395 | behind); | ||
396 | md_write_end(r1_bio->mddev); | ||
397 | raid_end_bio_io(r1_bio); | ||
398 | } | 382 | } |
383 | /* clear the bitmap if all writes complete successfully */ | ||
384 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
385 | r1_bio->sectors, | ||
386 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
387 | behind); | ||
388 | md_write_end(r1_bio->mddev); | ||
389 | raid_end_bio_io(r1_bio); | ||
399 | } | 390 | } |
400 | 391 | ||
401 | if (to_put) | 392 | if (to_put) |
@@ -788,16 +779,13 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
788 | struct page **behind_pages = NULL; | 779 | struct page **behind_pages = NULL; |
789 | const int rw = bio_data_dir(bio); | 780 | const int rw = bio_data_dir(bio); |
790 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 781 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
791 | unsigned long do_barriers; | 782 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
792 | mdk_rdev_t *blocked_rdev; | 783 | mdk_rdev_t *blocked_rdev; |
793 | 784 | ||
794 | /* | 785 | /* |
795 | * Register the new request and wait if the reconstruction | 786 | * Register the new request and wait if the reconstruction |
796 | * thread has put up a bar for new requests. | 787 | * thread has put up a bar for new requests. |
797 | * Continue immediately if no resync is active currently. | 788 | * Continue immediately if no resync is active currently. |
798 | * We test barriers_work *after* md_write_start as md_write_start | ||
799 | * may cause the first superblock write, and that will check out | ||
800 | * if barriers work. | ||
801 | */ | 789 | */ |
802 | 790 | ||
803 | md_write_start(mddev, bio); /* wait on superblock update early */ | 791 | md_write_start(mddev, bio); /* wait on superblock update early */ |
@@ -821,13 +809,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
821 | } | 809 | } |
822 | finish_wait(&conf->wait_barrier, &w); | 810 | finish_wait(&conf->wait_barrier, &w); |
823 | } | 811 | } |
824 | if (unlikely(!mddev->barriers_work && | ||
825 | (bio->bi_rw & REQ_HARDBARRIER))) { | ||
826 | if (rw == WRITE) | ||
827 | md_write_end(mddev); | ||
828 | bio_endio(bio, -EOPNOTSUPP); | ||
829 | return 0; | ||
830 | } | ||
831 | 812 | ||
832 | wait_barrier(conf); | 813 | wait_barrier(conf); |
833 | 814 | ||
@@ -959,10 +940,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
959 | atomic_set(&r1_bio->remaining, 0); | 940 | atomic_set(&r1_bio->remaining, 0); |
960 | atomic_set(&r1_bio->behind_remaining, 0); | 941 | atomic_set(&r1_bio->behind_remaining, 0); |
961 | 942 | ||
962 | do_barriers = bio->bi_rw & REQ_HARDBARRIER; | ||
963 | if (do_barriers) | ||
964 | set_bit(R1BIO_Barrier, &r1_bio->state); | ||
965 | |||
966 | bio_list_init(&bl); | 943 | bio_list_init(&bl); |
967 | for (i = 0; i < disks; i++) { | 944 | for (i = 0; i < disks; i++) { |
968 | struct bio *mbio; | 945 | struct bio *mbio; |
@@ -975,7 +952,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
975 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; | 952 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; |
976 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 953 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
977 | mbio->bi_end_io = raid1_end_write_request; | 954 | mbio->bi_end_io = raid1_end_write_request; |
978 | mbio->bi_rw = WRITE | do_barriers | do_sync; | 955 | mbio->bi_rw = WRITE | do_flush_fua | do_sync; |
979 | mbio->bi_private = r1_bio; | 956 | mbio->bi_private = r1_bio; |
980 | 957 | ||
981 | if (behind_pages) { | 958 | if (behind_pages) { |
@@ -1634,41 +1611,6 @@ static void raid1d(mddev_t *mddev) | |||
1634 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { | 1611 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { |
1635 | sync_request_write(mddev, r1_bio); | 1612 | sync_request_write(mddev, r1_bio); |
1636 | unplug = 1; | 1613 | unplug = 1; |
1637 | } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { | ||
1638 | /* some requests in the r1bio were REQ_HARDBARRIER | ||
1639 | * requests which failed with -EOPNOTSUPP. Hohumm.. | ||
1640 | * Better resubmit without the barrier. | ||
1641 | * We know which devices to resubmit for, because | ||
1642 | * all others have had their bios[] entry cleared. | ||
1643 | * We already have a nr_pending reference on these rdevs. | ||
1644 | */ | ||
1645 | int i; | ||
1646 | const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC); | ||
1647 | clear_bit(R1BIO_BarrierRetry, &r1_bio->state); | ||
1648 | clear_bit(R1BIO_Barrier, &r1_bio->state); | ||
1649 | for (i=0; i < conf->raid_disks; i++) | ||
1650 | if (r1_bio->bios[i]) | ||
1651 | atomic_inc(&r1_bio->remaining); | ||
1652 | for (i=0; i < conf->raid_disks; i++) | ||
1653 | if (r1_bio->bios[i]) { | ||
1654 | struct bio_vec *bvec; | ||
1655 | int j; | ||
1656 | |||
1657 | bio = bio_clone(r1_bio->master_bio, GFP_NOIO); | ||
1658 | /* copy pages from the failed bio, as | ||
1659 | * this might be a write-behind device */ | ||
1660 | __bio_for_each_segment(bvec, bio, j, 0) | ||
1661 | bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page; | ||
1662 | bio_put(r1_bio->bios[i]); | ||
1663 | bio->bi_sector = r1_bio->sector + | ||
1664 | conf->mirrors[i].rdev->data_offset; | ||
1665 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
1666 | bio->bi_end_io = raid1_end_write_request; | ||
1667 | bio->bi_rw = WRITE | do_sync; | ||
1668 | bio->bi_private = r1_bio; | ||
1669 | r1_bio->bios[i] = bio; | ||
1670 | generic_make_request(bio); | ||
1671 | } | ||
1672 | } else { | 1614 | } else { |
1673 | int disk; | 1615 | int disk; |
1674 | 1616 | ||
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 5f2d443ae28a..adf8cfd73313 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -117,8 +117,6 @@ struct r1bio_s { | |||
117 | #define R1BIO_IsSync 1 | 117 | #define R1BIO_IsSync 1 |
118 | #define R1BIO_Degraded 2 | 118 | #define R1BIO_Degraded 2 |
119 | #define R1BIO_BehindIO 3 | 119 | #define R1BIO_BehindIO 3 |
120 | #define R1BIO_Barrier 4 | ||
121 | #define R1BIO_BarrierRetry 5 | ||
122 | /* For write-behind requests, we call bi_end_io when | 120 | /* For write-behind requests, we call bi_end_io when |
123 | * the last non-write-behind device completes, providing | 121 | * the last non-write-behind device completes, providing |
124 | * any write was successful. Otherwise we call when | 122 | * any write was successful. Otherwise we call when |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 84718383124d..f0d082f749be 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -800,12 +800,13 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
800 | int chunk_sects = conf->chunk_mask + 1; | 800 | int chunk_sects = conf->chunk_mask + 1; |
801 | const int rw = bio_data_dir(bio); | 801 | const int rw = bio_data_dir(bio); |
802 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 802 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
803 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | ||
803 | struct bio_list bl; | 804 | struct bio_list bl; |
804 | unsigned long flags; | 805 | unsigned long flags; |
805 | mdk_rdev_t *blocked_rdev; | 806 | mdk_rdev_t *blocked_rdev; |
806 | 807 | ||
807 | if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 808 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
808 | md_barrier_request(mddev, bio); | 809 | md_flush_request(mddev, bio); |
809 | return 0; | 810 | return 0; |
810 | } | 811 | } |
811 | 812 | ||
@@ -965,7 +966,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
965 | conf->mirrors[d].rdev->data_offset; | 966 | conf->mirrors[d].rdev->data_offset; |
966 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; | 967 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; |
967 | mbio->bi_end_io = raid10_end_write_request; | 968 | mbio->bi_end_io = raid10_end_write_request; |
968 | mbio->bi_rw = WRITE | do_sync; | 969 | mbio->bi_rw = WRITE | do_sync | do_fua; |
969 | mbio->bi_private = r10_bio; | 970 | mbio->bi_private = r10_bio; |
970 | 971 | ||
971 | atomic_inc(&r10_bio->remaining); | 972 | atomic_inc(&r10_bio->remaining); |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 69b0a169e43d..31140d1259dc 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -506,9 +506,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
506 | int rw; | 506 | int rw; |
507 | struct bio *bi; | 507 | struct bio *bi; |
508 | mdk_rdev_t *rdev; | 508 | mdk_rdev_t *rdev; |
509 | if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) | 509 | if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { |
510 | rw = WRITE; | 510 | if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) |
511 | else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) | 511 | rw = WRITE_FUA; |
512 | else | ||
513 | rw = WRITE; | ||
514 | } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) | ||
512 | rw = READ; | 515 | rw = READ; |
513 | else | 516 | else |
514 | continue; | 517 | continue; |
@@ -1031,6 +1034,8 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1031 | 1034 | ||
1032 | while (wbi && wbi->bi_sector < | 1035 | while (wbi && wbi->bi_sector < |
1033 | dev->sector + STRIPE_SECTORS) { | 1036 | dev->sector + STRIPE_SECTORS) { |
1037 | if (wbi->bi_rw & REQ_FUA) | ||
1038 | set_bit(R5_WantFUA, &dev->flags); | ||
1034 | tx = async_copy_data(1, wbi, dev->page, | 1039 | tx = async_copy_data(1, wbi, dev->page, |
1035 | dev->sector, tx); | 1040 | dev->sector, tx); |
1036 | wbi = r5_next_bio(wbi, dev->sector); | 1041 | wbi = r5_next_bio(wbi, dev->sector); |
@@ -1048,15 +1053,22 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
1048 | int pd_idx = sh->pd_idx; | 1053 | int pd_idx = sh->pd_idx; |
1049 | int qd_idx = sh->qd_idx; | 1054 | int qd_idx = sh->qd_idx; |
1050 | int i; | 1055 | int i; |
1056 | bool fua = false; | ||
1051 | 1057 | ||
1052 | pr_debug("%s: stripe %llu\n", __func__, | 1058 | pr_debug("%s: stripe %llu\n", __func__, |
1053 | (unsigned long long)sh->sector); | 1059 | (unsigned long long)sh->sector); |
1054 | 1060 | ||
1061 | for (i = disks; i--; ) | ||
1062 | fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); | ||
1063 | |||
1055 | for (i = disks; i--; ) { | 1064 | for (i = disks; i--; ) { |
1056 | struct r5dev *dev = &sh->dev[i]; | 1065 | struct r5dev *dev = &sh->dev[i]; |
1057 | 1066 | ||
1058 | if (dev->written || i == pd_idx || i == qd_idx) | 1067 | if (dev->written || i == pd_idx || i == qd_idx) { |
1059 | set_bit(R5_UPTODATE, &dev->flags); | 1068 | set_bit(R5_UPTODATE, &dev->flags); |
1069 | if (fua) | ||
1070 | set_bit(R5_WantFUA, &dev->flags); | ||
1071 | } | ||
1060 | } | 1072 | } |
1061 | 1073 | ||
1062 | if (sh->reconstruct_state == reconstruct_state_drain_run) | 1074 | if (sh->reconstruct_state == reconstruct_state_drain_run) |
@@ -3281,7 +3293,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
3281 | 3293 | ||
3282 | if (dec_preread_active) { | 3294 | if (dec_preread_active) { |
3283 | /* We delay this until after ops_run_io so that if make_request | 3295 | /* We delay this until after ops_run_io so that if make_request |
3284 | * is waiting on a barrier, it won't continue until the writes | 3296 | * is waiting on a flush, it won't continue until the writes |
3285 | * have actually been submitted. | 3297 | * have actually been submitted. |
3286 | */ | 3298 | */ |
3287 | atomic_dec(&conf->preread_active_stripes); | 3299 | atomic_dec(&conf->preread_active_stripes); |
@@ -3583,7 +3595,7 @@ static void handle_stripe6(struct stripe_head *sh) | |||
3583 | 3595 | ||
3584 | if (dec_preread_active) { | 3596 | if (dec_preread_active) { |
3585 | /* We delay this until after ops_run_io so that if make_request | 3597 | /* We delay this until after ops_run_io so that if make_request |
3586 | * is waiting on a barrier, it won't continue until the writes | 3598 | * is waiting on a flush, it won't continue until the writes |
3587 | * have actually been submitted. | 3599 | * have actually been submitted. |
3588 | */ | 3600 | */ |
3589 | atomic_dec(&conf->preread_active_stripes); | 3601 | atomic_dec(&conf->preread_active_stripes); |
@@ -3978,14 +3990,8 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
3978 | const int rw = bio_data_dir(bi); | 3990 | const int rw = bio_data_dir(bi); |
3979 | int remaining; | 3991 | int remaining; |
3980 | 3992 | ||
3981 | if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) { | 3993 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { |
3982 | /* Drain all pending writes. We only really need | 3994 | md_flush_request(mddev, bi); |
3983 | * to ensure they have been submitted, but this is | ||
3984 | * easier. | ||
3985 | */ | ||
3986 | mddev->pers->quiesce(mddev, 1); | ||
3987 | mddev->pers->quiesce(mddev, 0); | ||
3988 | md_barrier_request(mddev, bi); | ||
3989 | return 0; | 3995 | return 0; |
3990 | } | 3996 | } |
3991 | 3997 | ||
@@ -4103,7 +4109,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
4103 | finish_wait(&conf->wait_for_overlap, &w); | 4109 | finish_wait(&conf->wait_for_overlap, &w); |
4104 | set_bit(STRIPE_HANDLE, &sh->state); | 4110 | set_bit(STRIPE_HANDLE, &sh->state); |
4105 | clear_bit(STRIPE_DELAYED, &sh->state); | 4111 | clear_bit(STRIPE_DELAYED, &sh->state); |
4106 | if (mddev->barrier && | 4112 | if ((bi->bi_rw & REQ_SYNC) && |
4107 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 4113 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
4108 | atomic_inc(&conf->preread_active_stripes); | 4114 | atomic_inc(&conf->preread_active_stripes); |
4109 | release_stripe(sh); | 4115 | release_stripe(sh); |
@@ -4126,13 +4132,6 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
4126 | bio_endio(bi, 0); | 4132 | bio_endio(bi, 0); |
4127 | } | 4133 | } |
4128 | 4134 | ||
4129 | if (mddev->barrier) { | ||
4130 | /* We need to wait for the stripes to all be handled. | ||
4131 | * So: wait for preread_active_stripes to drop to 0. | ||
4132 | */ | ||
4133 | wait_event(mddev->thread->wqueue, | ||
4134 | atomic_read(&conf->preread_active_stripes) == 0); | ||
4135 | } | ||
4136 | return 0; | 4135 | return 0; |
4137 | } | 4136 | } |
4138 | 4137 | ||
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 36eaed5dfd6e..2ace0582b409 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -275,6 +275,7 @@ struct r6_state { | |||
275 | * filling | 275 | * filling |
276 | */ | 276 | */ |
277 | #define R5_Wantdrain 13 /* dev->towrite needs to be drained */ | 277 | #define R5_Wantdrain 13 /* dev->towrite needs to be drained */ |
278 | #define R5_WantFUA 14 /* Write should be FUA */ | ||
278 | /* | 279 | /* |
279 | * Write method | 280 | * Write method |
280 | */ | 281 | */ |
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index e876678176be..9c0b42bfe089 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c | |||
@@ -128,7 +128,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock | |||
128 | mq->req = NULL; | 128 | mq->req = NULL; |
129 | 129 | ||
130 | blk_queue_prep_rq(mq->queue, mmc_prep_request); | 130 | blk_queue_prep_rq(mq->queue, mmc_prep_request); |
131 | blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN); | ||
132 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); | 131 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); |
133 | if (mmc_can_erase(card)) { | 132 | if (mmc_can_erase(card)) { |
134 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); | 133 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); |
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 38e6fa9a2012..aa95f1001761 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c | |||
@@ -2196,7 +2196,6 @@ static void dasd_setup_queue(struct dasd_block *block) | |||
2196 | */ | 2196 | */ |
2197 | blk_queue_max_segment_size(block->request_queue, PAGE_SIZE); | 2197 | blk_queue_max_segment_size(block->request_queue, PAGE_SIZE); |
2198 | blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1); | 2198 | blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1); |
2199 | blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN); | ||
2200 | } | 2199 | } |
2201 | 2200 | ||
2202 | /* | 2201 | /* |
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c index 93984c9dfe14..aee73fafccc8 100644 --- a/drivers/scsi/aic7xxx_old.c +++ b/drivers/scsi/aic7xxx_old.c | |||
@@ -2850,12 +2850,6 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb) | |||
2850 | aic_dev->r_total++; | 2850 | aic_dev->r_total++; |
2851 | ptr = aic_dev->r_bins; | 2851 | ptr = aic_dev->r_bins; |
2852 | } | 2852 | } |
2853 | if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER) | ||
2854 | { | ||
2855 | aic_dev->barrier_total++; | ||
2856 | if(scb->tag_action == MSG_ORDERED_Q_TAG) | ||
2857 | aic_dev->ordered_total++; | ||
2858 | } | ||
2859 | x = scb->sg_length; | 2853 | x = scb->sg_length; |
2860 | x >>= 10; | 2854 | x >>= 10; |
2861 | for(i=0; i<6; i++) | 2855 | for(i=0; i<6; i++) |
@@ -10125,7 +10119,6 @@ static void aic7xxx_buildscb(struct aic7xxx_host *p, struct scsi_cmnd *cmd, | |||
10125 | struct aic_dev_data *aic_dev = cmd->device->hostdata; | 10119 | struct aic_dev_data *aic_dev = cmd->device->hostdata; |
10126 | struct scsi_device *sdptr = cmd->device; | 10120 | struct scsi_device *sdptr = cmd->device; |
10127 | unsigned char tindex = TARGET_INDEX(cmd); | 10121 | unsigned char tindex = TARGET_INDEX(cmd); |
10128 | struct request *req = cmd->request; | ||
10129 | int use_sg; | 10122 | int use_sg; |
10130 | 10123 | ||
10131 | mask = (0x01 << tindex); | 10124 | mask = (0x01 << tindex); |
@@ -10144,19 +10137,8 @@ static void aic7xxx_buildscb(struct aic7xxx_host *p, struct scsi_cmnd *cmd, | |||
10144 | /* We always force TEST_UNIT_READY to untagged */ | 10137 | /* We always force TEST_UNIT_READY to untagged */ |
10145 | if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags) | 10138 | if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags) |
10146 | { | 10139 | { |
10147 | if (req->cmd_flags & REQ_HARDBARRIER) | 10140 | hscb->control |= MSG_SIMPLE_Q_TAG; |
10148 | { | 10141 | scb->tag_action = MSG_SIMPLE_Q_TAG; |
10149 | if(sdptr->ordered_tags) | ||
10150 | { | ||
10151 | hscb->control |= MSG_ORDERED_Q_TAG; | ||
10152 | scb->tag_action = MSG_ORDERED_Q_TAG; | ||
10153 | } | ||
10154 | } | ||
10155 | else | ||
10156 | { | ||
10157 | hscb->control |= MSG_SIMPLE_Q_TAG; | ||
10158 | scb->tag_action = MSG_SIMPLE_Q_TAG; | ||
10159 | } | ||
10160 | } | 10142 | } |
10161 | } | 10143 | } |
10162 | if ( !(aic_dev->dtr_pending) && | 10144 | if ( !(aic_dev->dtr_pending) && |
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index f0cfba9a1fc8..535085cd27ec 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c | |||
@@ -130,17 +130,6 @@ static void sas_scsi_task_done(struct sas_task *task) | |||
130 | sc->scsi_done(sc); | 130 | sc->scsi_done(sc); |
131 | } | 131 | } |
132 | 132 | ||
133 | static enum task_attribute sas_scsi_get_task_attr(struct scsi_cmnd *cmd) | ||
134 | { | ||
135 | enum task_attribute ta = TASK_ATTR_SIMPLE; | ||
136 | if (cmd->request && blk_rq_tagged(cmd->request)) { | ||
137 | if (cmd->device->ordered_tags && | ||
138 | (cmd->request->cmd_flags & REQ_HARDBARRIER)) | ||
139 | ta = TASK_ATTR_ORDERED; | ||
140 | } | ||
141 | return ta; | ||
142 | } | ||
143 | |||
144 | static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, | 133 | static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, |
145 | struct domain_device *dev, | 134 | struct domain_device *dev, |
146 | gfp_t gfp_flags) | 135 | gfp_t gfp_flags) |
@@ -160,7 +149,7 @@ static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, | |||
160 | task->ssp_task.retry_count = 1; | 149 | task->ssp_task.retry_count = 1; |
161 | int_to_scsilun(cmd->device->lun, &lun); | 150 | int_to_scsilun(cmd->device->lun, &lun); |
162 | memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8); | 151 | memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8); |
163 | task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd); | 152 | task->ssp_task.task_attr = TASK_ATTR_SIMPLE; |
164 | memcpy(task->ssp_task.cdb, cmd->cmnd, 16); | 153 | memcpy(task->ssp_task.cdb, cmd->cmnd, 16); |
165 | 154 | ||
166 | task->scatter = scsi_sglist(cmd); | 155 | task->scatter = scsi_sglist(cmd); |
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ffa0689ee840..20514c47a5aa 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c | |||
@@ -2109,7 +2109,7 @@ static int sd_revalidate_disk(struct gendisk *disk) | |||
2109 | struct scsi_disk *sdkp = scsi_disk(disk); | 2109 | struct scsi_disk *sdkp = scsi_disk(disk); |
2110 | struct scsi_device *sdp = sdkp->device; | 2110 | struct scsi_device *sdp = sdkp->device; |
2111 | unsigned char *buffer; | 2111 | unsigned char *buffer; |
2112 | unsigned ordered; | 2112 | unsigned flush = 0; |
2113 | 2113 | ||
2114 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, | 2114 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, |
2115 | "sd_revalidate_disk\n")); | 2115 | "sd_revalidate_disk\n")); |
@@ -2151,17 +2151,15 @@ static int sd_revalidate_disk(struct gendisk *disk) | |||
2151 | 2151 | ||
2152 | /* | 2152 | /* |
2153 | * We now have all cache related info, determine how we deal | 2153 | * We now have all cache related info, determine how we deal |
2154 | * with ordered requests. Note that as the current SCSI | 2154 | * with flush requests. |
2155 | * dispatch function can alter request order, we cannot use | ||
2156 | * QUEUE_ORDERED_TAG_* even when ordered tag is supported. | ||
2157 | */ | 2155 | */ |
2158 | if (sdkp->WCE) | 2156 | if (sdkp->WCE) { |
2159 | ordered = sdkp->DPOFUA | 2157 | flush |= REQ_FLUSH; |
2160 | ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH; | 2158 | if (sdkp->DPOFUA) |
2161 | else | 2159 | flush |= REQ_FUA; |
2162 | ordered = QUEUE_ORDERED_DRAIN; | 2160 | } |
2163 | 2161 | ||
2164 | blk_queue_ordered(sdkp->disk->queue, ordered); | 2162 | blk_queue_flush(sdkp->disk->queue, flush); |
2165 | 2163 | ||
2166 | set_capacity(disk, sdkp->capacity); | 2164 | set_capacity(disk, sdkp->capacity); |
2167 | kfree(buffer); | 2165 | kfree(buffer); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 50e8c8582faa..b737451e2e9d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -370,7 +370,7 @@ int blkdev_fsync(struct file *filp, int datasync) | |||
370 | */ | 370 | */ |
371 | mutex_unlock(&bd_inode->i_mutex); | 371 | mutex_unlock(&bd_inode->i_mutex); |
372 | 372 | ||
373 | error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); | 373 | error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); |
374 | if (error == -EOPNOTSUPP) | 374 | if (error == -EOPNOTSUPP) |
375 | error = 0; | 375 | error = 0; |
376 | 376 | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 64f10082f048..5e789f4a3ed0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -2063,7 +2063,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
2063 | if (uptodate) { | 2063 | if (uptodate) { |
2064 | set_buffer_uptodate(bh); | 2064 | set_buffer_uptodate(bh); |
2065 | } else { | 2065 | } else { |
2066 | if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { | 2066 | if (printk_ratelimit()) { |
2067 | printk(KERN_WARNING "lost page write due to " | 2067 | printk(KERN_WARNING "lost page write due to " |
2068 | "I/O error on %s\n", | 2068 | "I/O error on %s\n", |
2069 | bdevname(bh->b_bdev, b)); | 2069 | bdevname(bh->b_bdev, b)); |
@@ -2200,21 +2200,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2200 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2200 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2201 | } | 2201 | } |
2202 | 2202 | ||
2203 | if (i == last_barrier && do_barriers && device->barriers) { | 2203 | if (i == last_barrier && do_barriers) |
2204 | ret = submit_bh(WRITE_BARRIER, bh); | 2204 | ret = submit_bh(WRITE_FLUSH_FUA, bh); |
2205 | if (ret == -EOPNOTSUPP) { | 2205 | else |
2206 | printk("btrfs: disabling barriers on dev %s\n", | ||
2207 | device->name); | ||
2208 | set_buffer_uptodate(bh); | ||
2209 | device->barriers = 0; | ||
2210 | /* one reference for submit_bh */ | ||
2211 | get_bh(bh); | ||
2212 | lock_buffer(bh); | ||
2213 | ret = submit_bh(WRITE_SYNC, bh); | ||
2214 | } | ||
2215 | } else { | ||
2216 | ret = submit_bh(WRITE_SYNC, bh); | 2206 | ret = submit_bh(WRITE_SYNC, bh); |
2217 | } | ||
2218 | 2207 | ||
2219 | if (ret) | 2208 | if (ret) |
2220 | errors++; | 2209 | errors++; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32d094002a57..0b81ecdb101c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1695,8 +1695,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1695 | static void btrfs_issue_discard(struct block_device *bdev, | 1695 | static void btrfs_issue_discard(struct block_device *bdev, |
1696 | u64 start, u64 len) | 1696 | u64 start, u64 len) |
1697 | { | 1697 | { |
1698 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1698 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); |
1699 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
1700 | } | 1699 | } |
1701 | 1700 | ||
1702 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1701 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd318ff280b2..e25e46a8b4e2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -398,7 +398,6 @@ static noinline int device_list_add(const char *path, | |||
398 | device->work.func = pending_bios_fn; | 398 | device->work.func = pending_bios_fn; |
399 | memcpy(device->uuid, disk_super->dev_item.uuid, | 399 | memcpy(device->uuid, disk_super->dev_item.uuid, |
400 | BTRFS_UUID_SIZE); | 400 | BTRFS_UUID_SIZE); |
401 | device->barriers = 1; | ||
402 | spin_lock_init(&device->io_lock); | 401 | spin_lock_init(&device->io_lock); |
403 | device->name = kstrdup(path, GFP_NOFS); | 402 | device->name = kstrdup(path, GFP_NOFS); |
404 | if (!device->name) { | 403 | if (!device->name) { |
@@ -462,7 +461,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
462 | device->devid = orig_dev->devid; | 461 | device->devid = orig_dev->devid; |
463 | device->work.func = pending_bios_fn; | 462 | device->work.func = pending_bios_fn; |
464 | memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); | 463 | memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); |
465 | device->barriers = 1; | ||
466 | spin_lock_init(&device->io_lock); | 464 | spin_lock_init(&device->io_lock); |
467 | INIT_LIST_HEAD(&device->dev_list); | 465 | INIT_LIST_HEAD(&device->dev_list); |
468 | INIT_LIST_HEAD(&device->dev_alloc_list); | 466 | INIT_LIST_HEAD(&device->dev_alloc_list); |
@@ -1489,7 +1487,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1489 | trans = btrfs_start_transaction(root, 0); | 1487 | trans = btrfs_start_transaction(root, 0); |
1490 | lock_chunks(root); | 1488 | lock_chunks(root); |
1491 | 1489 | ||
1492 | device->barriers = 1; | ||
1493 | device->writeable = 1; | 1490 | device->writeable = 1; |
1494 | device->work.func = pending_bios_fn; | 1491 | device->work.func = pending_bios_fn; |
1495 | generate_random_uuid(device->uuid); | 1492 | generate_random_uuid(device->uuid); |
@@ -3084,7 +3081,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | |||
3084 | return NULL; | 3081 | return NULL; |
3085 | list_add(&device->dev_list, | 3082 | list_add(&device->dev_list, |
3086 | &fs_devices->devices); | 3083 | &fs_devices->devices); |
3087 | device->barriers = 1; | ||
3088 | device->dev_root = root->fs_info->dev_root; | 3084 | device->dev_root = root->fs_info->dev_root; |
3089 | device->devid = devid; | 3085 | device->devid = devid; |
3090 | device->work.func = pending_bios_fn; | 3086 | device->work.func = pending_bios_fn; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 31b0fabdd2ea..2b638b6e4eea 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -42,7 +42,6 @@ struct btrfs_device { | |||
42 | int running_pending; | 42 | int running_pending; |
43 | u64 generation; | 43 | u64 generation; |
44 | 44 | ||
45 | int barriers; | ||
46 | int writeable; | 45 | int writeable; |
47 | int in_fs_metadata; | 46 | int in_fs_metadata; |
48 | 47 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index 3e7dca279d1c..7f0b9b083f77 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
156 | if (uptodate) { | 156 | if (uptodate) { |
157 | set_buffer_uptodate(bh); | 157 | set_buffer_uptodate(bh); |
158 | } else { | 158 | } else { |
159 | if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { | 159 | if (!quiet_error(bh)) { |
160 | buffer_io_error(bh); | 160 | buffer_io_error(bh); |
161 | printk(KERN_WARNING "lost page write due to " | 161 | printk(KERN_WARNING "lost page write due to " |
162 | "I/O error on %s\n", | 162 | "I/O error on %s\n", |
@@ -2891,7 +2891,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | |||
2891 | 2891 | ||
2892 | if (err == -EOPNOTSUPP) { | 2892 | if (err == -EOPNOTSUPP) { |
2893 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | 2893 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); |
2894 | set_bit(BH_Eopnotsupp, &bh->b_state); | ||
2895 | } | 2894 | } |
2896 | 2895 | ||
2897 | if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) | 2896 | if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) |
@@ -3031,10 +3030,6 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw) | |||
3031 | bh->b_end_io = end_buffer_write_sync; | 3030 | bh->b_end_io = end_buffer_write_sync; |
3032 | ret = submit_bh(rw, bh); | 3031 | ret = submit_bh(rw, bh); |
3033 | wait_on_buffer(bh); | 3032 | wait_on_buffer(bh); |
3034 | if (buffer_eopnotsupp(bh)) { | ||
3035 | clear_buffer_eopnotsupp(bh); | ||
3036 | ret = -EOPNOTSUPP; | ||
3037 | } | ||
3038 | if (!ret && !buffer_uptodate(bh)) | 3033 | if (!ret && !buffer_uptodate(bh)) |
3039 | ret = -EIO; | 3034 | ret = -EIO; |
3040 | } else { | 3035 | } else { |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index d7e9f74dc3a6..09b13bb34c94 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -90,7 +90,6 @@ int ext3_sync_file(struct file *file, int datasync) | |||
90 | * storage | 90 | * storage |
91 | */ | 91 | */ |
92 | if (needs_barrier) | 92 | if (needs_barrier) |
93 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | 93 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
94 | BLKDEV_IFL_WAIT); | ||
95 | return ret; | 94 | return ret; |
96 | } | 95 | } |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 592adf2e546e..3f3ff5ee8f9d 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -128,10 +128,9 @@ int ext4_sync_file(struct file *file, int datasync) | |||
128 | (journal->j_fs_dev != journal->j_dev) && | 128 | (journal->j_fs_dev != journal->j_dev) && |
129 | (journal->j_flags & JBD2_BARRIER)) | 129 | (journal->j_flags & JBD2_BARRIER)) |
130 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | 130 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, |
131 | NULL, BLKDEV_IFL_WAIT); | 131 | NULL); |
132 | ret = jbd2_log_wait_commit(journal, commit_tid); | 132 | ret = jbd2_log_wait_commit(journal, commit_tid); |
133 | } else if (journal->j_flags & JBD2_BARRIER) | 133 | } else if (journal->j_flags & JBD2_BARRIER) |
134 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | 134 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
135 | BLKDEV_IFL_WAIT); | ||
136 | return ret; | 135 | return ret; |
137 | } | 136 | } |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4b4ad4b7ce57..19aa0d44d822 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2566,7 +2566,7 @@ static inline void ext4_issue_discard(struct super_block *sb, | |||
2566 | discard_block = block + ext4_group_first_block_no(sb, block_group); | 2566 | discard_block = block + ext4_group_first_block_no(sb, block_group); |
2567 | trace_ext4_discard_blocks(sb, | 2567 | trace_ext4_discard_blocks(sb, |
2568 | (unsigned long long) discard_block, count); | 2568 | (unsigned long long) discard_block, count); |
2569 | ret = sb_issue_discard(sb, discard_block, count); | 2569 | ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
2570 | if (ret == EOPNOTSUPP) { | 2570 | if (ret == EOPNOTSUPP) { |
2571 | ext4_warning(sb, "discard not supported, disabling"); | 2571 | ext4_warning(sb, "discard not supported, disabling"); |
2572 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | 2572 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 81184d3b75a3..b47d2c9f4fa1 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -577,7 +577,8 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
577 | 577 | ||
578 | sb_issue_discard(sb, | 578 | sb_issue_discard(sb, |
579 | fat_clus_to_blknr(sbi, first_cl), | 579 | fat_clus_to_blknr(sbi, first_cl), |
580 | nr_clus * sbi->sec_per_clus); | 580 | nr_clus * sbi->sec_per_clus, |
581 | GFP_NOFS, 0); | ||
581 | 582 | ||
582 | first_cl = cluster; | 583 | first_cl = cluster; |
583 | } | 584 | } |
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1736f2356388..970e682ea754 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
@@ -255,10 +255,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) | |||
255 | 255 | ||
256 | for (i = 0; i < nr_bhs; i++) { | 256 | for (i = 0; i < nr_bhs; i++) { |
257 | wait_on_buffer(bhs[i]); | 257 | wait_on_buffer(bhs[i]); |
258 | if (buffer_eopnotsupp(bhs[i])) { | 258 | if (!err && !buffer_uptodate(bhs[i])) |
259 | clear_buffer_eopnotsupp(bhs[i]); | ||
260 | err = -EOPNOTSUPP; | ||
261 | } else if (!err && !buffer_uptodate(bhs[i])) | ||
262 | err = -EIO; | 259 | err = -EIO; |
263 | } | 260 | } |
264 | return err; | 261 | return err; |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index ac750bd31a6f..eb01f3575e10 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -592,22 +592,13 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
592 | lh->lh_hash = cpu_to_be32(hash); | 592 | lh->lh_hash = cpu_to_be32(hash); |
593 | 593 | ||
594 | bh->b_end_io = end_buffer_write_sync; | 594 | bh->b_end_io = end_buffer_write_sync; |
595 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) | ||
596 | goto skip_barrier; | ||
597 | get_bh(bh); | 595 | get_bh(bh); |
598 | submit_bh(WRITE_BARRIER | REQ_META, bh); | 596 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
599 | wait_on_buffer(bh); | ||
600 | if (buffer_eopnotsupp(bh)) { | ||
601 | clear_buffer_eopnotsupp(bh); | ||
602 | set_buffer_uptodate(bh); | ||
603 | fs_info(sdp, "barrier sync failed - disabling barriers\n"); | ||
604 | set_bit(SDF_NOBARRIERS, &sdp->sd_flags); | ||
605 | lock_buffer(bh); | ||
606 | skip_barrier: | ||
607 | get_bh(bh); | ||
608 | submit_bh(WRITE_SYNC | REQ_META, bh); | 597 | submit_bh(WRITE_SYNC | REQ_META, bh); |
609 | wait_on_buffer(bh); | 598 | else |
610 | } | 599 | submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); |
600 | wait_on_buffer(bh); | ||
601 | |||
611 | if (!buffer_uptodate(bh)) | 602 | if (!buffer_uptodate(bh)) |
612 | gfs2_io_error_bh(sdp, bh); | 603 | gfs2_io_error_bh(sdp, bh); |
613 | brelse(bh); | 604 | brelse(bh); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index fb67f593f408..bef3ab6cf5c1 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -866,8 +866,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
866 | if ((start + nr_sects) != blk) { | 866 | if ((start + nr_sects) != blk) { |
867 | rv = blkdev_issue_discard(bdev, start, | 867 | rv = blkdev_issue_discard(bdev, start, |
868 | nr_sects, GFP_NOFS, | 868 | nr_sects, GFP_NOFS, |
869 | BLKDEV_IFL_WAIT | | 869 | 0); |
870 | BLKDEV_IFL_BARRIER); | ||
871 | if (rv) | 870 | if (rv) |
872 | goto fail; | 871 | goto fail; |
873 | nr_sects = 0; | 872 | nr_sects = 0; |
@@ -881,8 +880,7 @@ start_new_extent: | |||
881 | } | 880 | } |
882 | } | 881 | } |
883 | if (nr_sects) { | 882 | if (nr_sects) { |
884 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, | 883 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); |
885 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
886 | if (rv) | 884 | if (rv) |
887 | goto fail; | 885 | goto fail; |
888 | } | 886 | } |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 3f030e9efea6..85a6883c0aca 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -137,34 +137,10 @@ static int journal_write_commit_record(journal_t *journal, | |||
137 | JBUFFER_TRACE(descriptor, "write commit block"); | 137 | JBUFFER_TRACE(descriptor, "write commit block"); |
138 | set_buffer_dirty(bh); | 138 | set_buffer_dirty(bh); |
139 | 139 | ||
140 | if (journal->j_flags & JFS_BARRIER) { | 140 | if (journal->j_flags & JFS_BARRIER) |
141 | ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); | 141 | ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA); |
142 | 142 | else | |
143 | /* | ||
144 | * Is it possible for another commit to fail at roughly | ||
145 | * the same time as this one? If so, we don't want to | ||
146 | * trust the barrier flag in the super, but instead want | ||
147 | * to remember if we sent a barrier request | ||
148 | */ | ||
149 | if (ret == -EOPNOTSUPP) { | ||
150 | char b[BDEVNAME_SIZE]; | ||
151 | |||
152 | printk(KERN_WARNING | ||
153 | "JBD: barrier-based sync failed on %s - " | ||
154 | "disabling barriers\n", | ||
155 | bdevname(journal->j_dev, b)); | ||
156 | spin_lock(&journal->j_state_lock); | ||
157 | journal->j_flags &= ~JFS_BARRIER; | ||
158 | spin_unlock(&journal->j_state_lock); | ||
159 | |||
160 | /* And try again, without the barrier */ | ||
161 | set_buffer_uptodate(bh); | ||
162 | set_buffer_dirty(bh); | ||
163 | ret = sync_dirty_buffer(bh); | ||
164 | } | ||
165 | } else { | ||
166 | ret = sync_dirty_buffer(bh); | 143 | ret = sync_dirty_buffer(bh); |
167 | } | ||
168 | 144 | ||
169 | put_bh(bh); /* One for getblk() */ | 145 | put_bh(bh); /* One for getblk() */ |
170 | journal_put_journal_head(descriptor); | 146 | journal_put_journal_head(descriptor); |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 5247e7ffdcb4..6571a056e55d 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -532,8 +532,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
532 | */ | 532 | */ |
533 | if ((journal->j_fs_dev != journal->j_dev) && | 533 | if ((journal->j_fs_dev != journal->j_dev) && |
534 | (journal->j_flags & JBD2_BARRIER)) | 534 | (journal->j_flags & JBD2_BARRIER)) |
535 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, | 535 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
536 | BLKDEV_IFL_WAIT); | ||
537 | if (!(journal->j_flags & JBD2_ABORT)) | 536 | if (!(journal->j_flags & JBD2_ABORT)) |
538 | jbd2_journal_update_superblock(journal, 1); | 537 | jbd2_journal_update_superblock(journal, 1); |
539 | return 0; | 538 | return 0; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 80910f51d4b4..bc6be8bda1cc 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -134,25 +134,11 @@ static int journal_submit_commit_record(journal_t *journal, | |||
134 | 134 | ||
135 | if (journal->j_flags & JBD2_BARRIER && | 135 | if (journal->j_flags & JBD2_BARRIER && |
136 | !JBD2_HAS_INCOMPAT_FEATURE(journal, | 136 | !JBD2_HAS_INCOMPAT_FEATURE(journal, |
137 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 137 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) |
138 | ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); | 138 | ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh); |
139 | if (ret == -EOPNOTSUPP) { | 139 | else |
140 | printk(KERN_WARNING | ||
141 | "JBD2: Disabling barriers on %s, " | ||
142 | "not supported by device\n", journal->j_devname); | ||
143 | write_lock(&journal->j_state_lock); | ||
144 | journal->j_flags &= ~JBD2_BARRIER; | ||
145 | write_unlock(&journal->j_state_lock); | ||
146 | |||
147 | /* And try again, without the barrier */ | ||
148 | lock_buffer(bh); | ||
149 | set_buffer_uptodate(bh); | ||
150 | clear_buffer_dirty(bh); | ||
151 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | ||
152 | } | ||
153 | } else { | ||
154 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | 140 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
155 | } | 141 | |
156 | *cbh = bh; | 142 | *cbh = bh; |
157 | return ret; | 143 | return ret; |
158 | } | 144 | } |
@@ -166,29 +152,8 @@ static int journal_wait_on_commit_record(journal_t *journal, | |||
166 | { | 152 | { |
167 | int ret = 0; | 153 | int ret = 0; |
168 | 154 | ||
169 | retry: | ||
170 | clear_buffer_dirty(bh); | 155 | clear_buffer_dirty(bh); |
171 | wait_on_buffer(bh); | 156 | wait_on_buffer(bh); |
172 | if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { | ||
173 | printk(KERN_WARNING | ||
174 | "JBD2: %s: disabling barries on %s - not supported " | ||
175 | "by device\n", __func__, journal->j_devname); | ||
176 | write_lock(&journal->j_state_lock); | ||
177 | journal->j_flags &= ~JBD2_BARRIER; | ||
178 | write_unlock(&journal->j_state_lock); | ||
179 | |||
180 | lock_buffer(bh); | ||
181 | clear_buffer_dirty(bh); | ||
182 | set_buffer_uptodate(bh); | ||
183 | bh->b_end_io = journal_end_buffer_io_sync; | ||
184 | |||
185 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | ||
186 | if (ret) { | ||
187 | unlock_buffer(bh); | ||
188 | return ret; | ||
189 | } | ||
190 | goto retry; | ||
191 | } | ||
192 | 157 | ||
193 | if (unlikely(!buffer_uptodate(bh))) | 158 | if (unlikely(!buffer_uptodate(bh))) |
194 | ret = -EIO; | 159 | ret = -EIO; |
@@ -701,6 +666,16 @@ start_journal_io: | |||
701 | } | 666 | } |
702 | } | 667 | } |
703 | 668 | ||
669 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
670 | if (err) { | ||
671 | printk(KERN_WARNING | ||
672 | "JBD2: Detected IO errors while flushing file data " | ||
673 | "on %s\n", journal->j_devname); | ||
674 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
675 | jbd2_journal_abort(journal, err); | ||
676 | err = 0; | ||
677 | } | ||
678 | |||
704 | /* | 679 | /* |
705 | * If the journal is not located on the file system device, | 680 | * If the journal is not located on the file system device, |
706 | * then we must flush the file system device before we issue | 681 | * then we must flush the file system device before we issue |
@@ -709,8 +684,7 @@ start_journal_io: | |||
709 | if (commit_transaction->t_flushed_data_blocks && | 684 | if (commit_transaction->t_flushed_data_blocks && |
710 | (journal->j_fs_dev != journal->j_dev) && | 685 | (journal->j_fs_dev != journal->j_dev) && |
711 | (journal->j_flags & JBD2_BARRIER)) | 686 | (journal->j_flags & JBD2_BARRIER)) |
712 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, | 687 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
713 | BLKDEV_IFL_WAIT); | ||
714 | 688 | ||
715 | /* Done it all: now write the commit record asynchronously. */ | 689 | /* Done it all: now write the commit record asynchronously. */ |
716 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 690 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
@@ -719,19 +693,6 @@ start_journal_io: | |||
719 | &cbh, crc32_sum); | 693 | &cbh, crc32_sum); |
720 | if (err) | 694 | if (err) |
721 | __jbd2_journal_abort_hard(journal); | 695 | __jbd2_journal_abort_hard(journal); |
722 | if (journal->j_flags & JBD2_BARRIER) | ||
723 | blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL, | ||
724 | BLKDEV_IFL_WAIT); | ||
725 | } | ||
726 | |||
727 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
728 | if (err) { | ||
729 | printk(KERN_WARNING | ||
730 | "JBD2: Detected IO errors while flushing file data " | ||
731 | "on %s\n", journal->j_devname); | ||
732 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
733 | jbd2_journal_abort(journal, err); | ||
734 | err = 0; | ||
735 | } | 696 | } |
736 | 697 | ||
737 | /* Lo and behold: we have just managed to send a transaction to | 698 | /* Lo and behold: we have just managed to send a transaction to |
@@ -845,6 +806,11 @@ wait_for_iobuf: | |||
845 | } | 806 | } |
846 | if (!err && !is_journal_aborted(journal)) | 807 | if (!err && !is_journal_aborted(journal)) |
847 | err = journal_wait_on_commit_record(journal, cbh); | 808 | err = journal_wait_on_commit_record(journal, cbh); |
809 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
810 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && | ||
811 | journal->j_flags & JBD2_BARRIER) { | ||
812 | blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL); | ||
813 | } | ||
848 | 814 | ||
849 | if (err) | 815 | if (err) |
850 | jbd2_journal_abort(journal, err); | 816 | jbd2_journal_abort(journal, err); |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 9f4913f78408..f3b75206e956 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -177,17 +177,9 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) | |||
177 | 177 | ||
178 | retry: | 178 | retry: |
179 | set_buffer_dirty(nilfs->ns_sbh[0]); | 179 | set_buffer_dirty(nilfs->ns_sbh[0]); |
180 | |||
181 | if (nilfs_test_opt(sbi, BARRIER)) { | 180 | if (nilfs_test_opt(sbi, BARRIER)) { |
182 | err = __sync_dirty_buffer(nilfs->ns_sbh[0], | 181 | err = __sync_dirty_buffer(nilfs->ns_sbh[0], |
183 | WRITE_SYNC | WRITE_BARRIER); | 182 | WRITE_SYNC | WRITE_FLUSH_FUA); |
184 | if (err == -EOPNOTSUPP) { | ||
185 | nilfs_warning(sbi->s_super, __func__, | ||
186 | "barrier-based sync failed. " | ||
187 | "disabling barriers\n"); | ||
188 | nilfs_clear_opt(sbi, BARRIER); | ||
189 | goto retry; | ||
190 | } | ||
191 | } else { | 183 | } else { |
192 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); | 184 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); |
193 | } | 185 | } |
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index ba7c10c917fc..d27715103376 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -775,9 +775,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
775 | ret = blkdev_issue_discard(nilfs->ns_bdev, | 775 | ret = blkdev_issue_discard(nilfs->ns_bdev, |
776 | start * sects_per_block, | 776 | start * sects_per_block, |
777 | nblocks * sects_per_block, | 777 | nblocks * sects_per_block, |
778 | GFP_NOFS, | 778 | GFP_NOFS, 0); |
779 | BLKDEV_IFL_WAIT | | ||
780 | BLKDEV_IFL_BARRIER); | ||
781 | if (ret < 0) | 779 | if (ret < 0) |
782 | return ret; | 780 | return ret; |
783 | nblocks = 0; | 781 | nblocks = 0; |
@@ -787,8 +785,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
787 | ret = blkdev_issue_discard(nilfs->ns_bdev, | 785 | ret = blkdev_issue_discard(nilfs->ns_bdev, |
788 | start * sects_per_block, | 786 | start * sects_per_block, |
789 | nblocks * sects_per_block, | 787 | nblocks * sects_per_block, |
790 | GFP_NOFS, | 788 | GFP_NOFS, 0); |
791 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
792 | return ret; | 789 | return ret; |
793 | } | 790 | } |
794 | 791 | ||
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 6846371498b6..91f080cc76c8 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -152,8 +152,7 @@ static int reiserfs_sync_file(struct file *filp, int datasync) | |||
152 | barrier_done = reiserfs_commit_for_inode(inode); | 152 | barrier_done = reiserfs_commit_for_inode(inode); |
153 | reiserfs_write_unlock(inode->i_sb); | 153 | reiserfs_write_unlock(inode->i_sb); |
154 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) | 154 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) |
155 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | 155 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
156 | BLKDEV_IFL_WAIT); | ||
157 | if (barrier_done < 0) | 156 | if (barrier_done < 0) |
158 | return barrier_done; | 157 | return barrier_done; |
159 | return (err < 0) ? -EIO : 0; | 158 | return (err < 0) ? -EIO : 0; |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 812e2c05aa29..076c8b194682 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -138,13 +138,6 @@ static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) | |||
138 | return 0; | 138 | return 0; |
139 | } | 139 | } |
140 | 140 | ||
141 | static void disable_barrier(struct super_block *s) | ||
142 | { | ||
143 | REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); | ||
144 | printk("reiserfs: disabling flush barriers on %s\n", | ||
145 | reiserfs_bdevname(s)); | ||
146 | } | ||
147 | |||
148 | static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block | 141 | static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block |
149 | *sb) | 142 | *sb) |
150 | { | 143 | { |
@@ -677,30 +670,6 @@ static void submit_ordered_buffer(struct buffer_head *bh) | |||
677 | submit_bh(WRITE, bh); | 670 | submit_bh(WRITE, bh); |
678 | } | 671 | } |
679 | 672 | ||
680 | static int submit_barrier_buffer(struct buffer_head *bh) | ||
681 | { | ||
682 | get_bh(bh); | ||
683 | bh->b_end_io = reiserfs_end_ordered_io; | ||
684 | clear_buffer_dirty(bh); | ||
685 | if (!buffer_uptodate(bh)) | ||
686 | BUG(); | ||
687 | return submit_bh(WRITE_BARRIER, bh); | ||
688 | } | ||
689 | |||
690 | static void check_barrier_completion(struct super_block *s, | ||
691 | struct buffer_head *bh) | ||
692 | { | ||
693 | if (buffer_eopnotsupp(bh)) { | ||
694 | clear_buffer_eopnotsupp(bh); | ||
695 | disable_barrier(s); | ||
696 | set_buffer_uptodate(bh); | ||
697 | set_buffer_dirty(bh); | ||
698 | reiserfs_write_unlock(s); | ||
699 | sync_dirty_buffer(bh); | ||
700 | reiserfs_write_lock(s); | ||
701 | } | ||
702 | } | ||
703 | |||
704 | #define CHUNK_SIZE 32 | 673 | #define CHUNK_SIZE 32 |
705 | struct buffer_chunk { | 674 | struct buffer_chunk { |
706 | struct buffer_head *bh[CHUNK_SIZE]; | 675 | struct buffer_head *bh[CHUNK_SIZE]; |
@@ -1009,7 +978,6 @@ static int flush_commit_list(struct super_block *s, | |||
1009 | struct buffer_head *tbh = NULL; | 978 | struct buffer_head *tbh = NULL; |
1010 | unsigned int trans_id = jl->j_trans_id; | 979 | unsigned int trans_id = jl->j_trans_id; |
1011 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 980 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
1012 | int barrier = 0; | ||
1013 | int retval = 0; | 981 | int retval = 0; |
1014 | int write_len; | 982 | int write_len; |
1015 | 983 | ||
@@ -1094,24 +1062,6 @@ static int flush_commit_list(struct super_block *s, | |||
1094 | } | 1062 | } |
1095 | atomic_dec(&journal->j_async_throttle); | 1063 | atomic_dec(&journal->j_async_throttle); |
1096 | 1064 | ||
1097 | /* We're skipping the commit if there's an error */ | ||
1098 | if (retval || reiserfs_is_journal_aborted(journal)) | ||
1099 | barrier = 0; | ||
1100 | |||
1101 | /* wait on everything written so far before writing the commit | ||
1102 | * if we are in barrier mode, send the commit down now | ||
1103 | */ | ||
1104 | barrier = reiserfs_barrier_flush(s); | ||
1105 | if (barrier) { | ||
1106 | int ret; | ||
1107 | lock_buffer(jl->j_commit_bh); | ||
1108 | ret = submit_barrier_buffer(jl->j_commit_bh); | ||
1109 | if (ret == -EOPNOTSUPP) { | ||
1110 | set_buffer_uptodate(jl->j_commit_bh); | ||
1111 | disable_barrier(s); | ||
1112 | barrier = 0; | ||
1113 | } | ||
1114 | } | ||
1115 | for (i = 0; i < (jl->j_len + 1); i++) { | 1065 | for (i = 0; i < (jl->j_len + 1); i++) { |
1116 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + | 1066 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + |
1117 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); | 1067 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); |
@@ -1143,27 +1093,22 @@ static int flush_commit_list(struct super_block *s, | |||
1143 | 1093 | ||
1144 | BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); | 1094 | BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); |
1145 | 1095 | ||
1146 | if (!barrier) { | 1096 | /* If there was a write error in the journal - we can't commit |
1147 | /* If there was a write error in the journal - we can't commit | 1097 | * this transaction - it will be invalid and, if successful, |
1148 | * this transaction - it will be invalid and, if successful, | 1098 | * will just end up propagating the write error out to |
1149 | * will just end up propagating the write error out to | 1099 | * the file system. */ |
1150 | * the file system. */ | 1100 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { |
1151 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { | 1101 | if (buffer_dirty(jl->j_commit_bh)) |
1152 | if (buffer_dirty(jl->j_commit_bh)) | 1102 | BUG(); |
1153 | BUG(); | 1103 | mark_buffer_dirty(jl->j_commit_bh) ; |
1154 | mark_buffer_dirty(jl->j_commit_bh) ; | ||
1155 | reiserfs_write_unlock(s); | ||
1156 | sync_dirty_buffer(jl->j_commit_bh) ; | ||
1157 | reiserfs_write_lock(s); | ||
1158 | } | ||
1159 | } else { | ||
1160 | reiserfs_write_unlock(s); | 1104 | reiserfs_write_unlock(s); |
1161 | wait_on_buffer(jl->j_commit_bh); | 1105 | if (reiserfs_barrier_flush(s)) |
1106 | __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA); | ||
1107 | else | ||
1108 | sync_dirty_buffer(jl->j_commit_bh); | ||
1162 | reiserfs_write_lock(s); | 1109 | reiserfs_write_lock(s); |
1163 | } | 1110 | } |
1164 | 1111 | ||
1165 | check_barrier_completion(s, jl->j_commit_bh); | ||
1166 | |||
1167 | /* If there was a write error in the journal - we can't commit this | 1112 | /* If there was a write error in the journal - we can't commit this |
1168 | * transaction - it will be invalid and, if successful, will just end | 1113 | * transaction - it will be invalid and, if successful, will just end |
1169 | * up propagating the write error out to the filesystem. */ | 1114 | * up propagating the write error out to the filesystem. */ |
@@ -1319,26 +1264,15 @@ static int _update_journal_header_block(struct super_block *sb, | |||
1319 | jh->j_first_unflushed_offset = cpu_to_le32(offset); | 1264 | jh->j_first_unflushed_offset = cpu_to_le32(offset); |
1320 | jh->j_mount_id = cpu_to_le32(journal->j_mount_id); | 1265 | jh->j_mount_id = cpu_to_le32(journal->j_mount_id); |
1321 | 1266 | ||
1322 | if (reiserfs_barrier_flush(sb)) { | 1267 | set_buffer_dirty(journal->j_header_bh); |
1323 | int ret; | 1268 | reiserfs_write_unlock(sb); |
1324 | lock_buffer(journal->j_header_bh); | 1269 | |
1325 | ret = submit_barrier_buffer(journal->j_header_bh); | 1270 | if (reiserfs_barrier_flush(sb)) |
1326 | if (ret == -EOPNOTSUPP) { | 1271 | __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA); |
1327 | set_buffer_uptodate(journal->j_header_bh); | 1272 | else |
1328 | disable_barrier(sb); | ||
1329 | goto sync; | ||
1330 | } | ||
1331 | reiserfs_write_unlock(sb); | ||
1332 | wait_on_buffer(journal->j_header_bh); | ||
1333 | reiserfs_write_lock(sb); | ||
1334 | check_barrier_completion(sb, journal->j_header_bh); | ||
1335 | } else { | ||
1336 | sync: | ||
1337 | set_buffer_dirty(journal->j_header_bh); | ||
1338 | reiserfs_write_unlock(sb); | ||
1339 | sync_dirty_buffer(journal->j_header_bh); | 1273 | sync_dirty_buffer(journal->j_header_bh); |
1340 | reiserfs_write_lock(sb); | 1274 | |
1341 | } | 1275 | reiserfs_write_lock(sb); |
1342 | if (!buffer_uptodate(journal->j_header_bh)) { | 1276 | if (!buffer_uptodate(journal->j_header_bh)) { |
1343 | reiserfs_warning(sb, "journal-837", | 1277 | reiserfs_warning(sb, "journal-837", |
1344 | "IO error during journal replay"); | 1278 | "IO error during journal replay"); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 286e36e21dae..1846a0dd7035 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -924,19 +924,7 @@ xfs_buf_iodone_work( | |||
924 | xfs_buf_t *bp = | 924 | xfs_buf_t *bp = |
925 | container_of(work, xfs_buf_t, b_iodone_work); | 925 | container_of(work, xfs_buf_t, b_iodone_work); |
926 | 926 | ||
927 | /* | 927 | if (bp->b_iodone) |
928 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | ||
929 | * ordered flag and reissue them. Because we can't tell the higher | ||
930 | * layers directly that they should not issue ordered I/O anymore, they | ||
931 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. | ||
932 | */ | ||
933 | if ((bp->b_error == EOPNOTSUPP) && | ||
934 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | ||
935 | trace_xfs_buf_ordered_retry(bp, _RET_IP_); | ||
936 | bp->b_flags &= ~XBF_ORDERED; | ||
937 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
938 | xfs_buf_iorequest(bp); | ||
939 | } else if (bp->b_iodone) | ||
940 | (*(bp->b_iodone))(bp); | 928 | (*(bp->b_iodone))(bp); |
941 | else if (bp->b_flags & XBF_ASYNC) | 929 | else if (bp->b_flags & XBF_ASYNC) |
942 | xfs_buf_relse(bp); | 930 | xfs_buf_relse(bp); |
@@ -1195,7 +1183,7 @@ _xfs_buf_ioapply( | |||
1195 | 1183 | ||
1196 | if (bp->b_flags & XBF_ORDERED) { | 1184 | if (bp->b_flags & XBF_ORDERED) { |
1197 | ASSERT(!(bp->b_flags & XBF_READ)); | 1185 | ASSERT(!(bp->b_flags & XBF_READ)); |
1198 | rw = WRITE_BARRIER; | 1186 | rw = WRITE_FLUSH_FUA; |
1199 | } else if (bp->b_flags & XBF_LOG_BUFFER) { | 1187 | } else if (bp->b_flags & XBF_LOG_BUFFER) { |
1200 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); | 1188 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); |
1201 | bp->b_flags &= ~_XBF_RUN_QUEUES; | 1189 | bp->b_flags &= ~_XBF_RUN_QUEUES; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 2a05614f0b92..9d021c73ea52 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -86,14 +86,6 @@ typedef enum { | |||
86 | */ | 86 | */ |
87 | #define _XBF_PAGE_LOCKED (1 << 22) | 87 | #define _XBF_PAGE_LOCKED (1 << 22) |
88 | 88 | ||
89 | /* | ||
90 | * If we try a barrier write, but it fails we have to communicate | ||
91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
92 | * when the buffer is re-issued so we have to add another flag to | ||
93 | * keep this information. | ||
94 | */ | ||
95 | #define _XFS_BARRIER_FAILED (1 << 23) | ||
96 | |||
97 | typedef unsigned int xfs_buf_flags_t; | 89 | typedef unsigned int xfs_buf_flags_t; |
98 | 90 | ||
99 | #define XFS_BUF_FLAGS \ | 91 | #define XFS_BUF_FLAGS \ |
@@ -114,8 +106,7 @@ typedef unsigned int xfs_buf_flags_t; | |||
114 | { _XBF_PAGES, "PAGES" }, \ | 106 | { _XBF_PAGES, "PAGES" }, \ |
115 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ | 107 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ |
116 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 108 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ |
117 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ | 109 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } |
118 | { _XFS_BARRIER_FAILED, "BARRIER_FAILED" } | ||
119 | 110 | ||
120 | 111 | ||
121 | typedef enum { | 112 | typedef enum { |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a4e07974955b..08fd3102128c 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -693,8 +693,7 @@ void | |||
693 | xfs_blkdev_issue_flush( | 693 | xfs_blkdev_issue_flush( |
694 | xfs_buftarg_t *buftarg) | 694 | xfs_buftarg_t *buftarg) |
695 | { | 695 | { |
696 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, | 696 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); |
697 | BLKDEV_IFL_WAIT); | ||
698 | } | 697 | } |
699 | 698 | ||
700 | STATIC void | 699 | STATIC void |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index be5dffd282a1..8fe311a456e2 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -325,7 +325,6 @@ DEFINE_BUF_EVENT(xfs_buf_lock); | |||
325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); | 325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); |
326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); | 326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); |
327 | DEFINE_BUF_EVENT(xfs_buf_unlock); | 327 | DEFINE_BUF_EVENT(xfs_buf_unlock); |
328 | DEFINE_BUF_EVENT(xfs_buf_ordered_retry); | ||
329 | DEFINE_BUF_EVENT(xfs_buf_iowait); | 328 | DEFINE_BUF_EVENT(xfs_buf_iowait); |
330 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); | 329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); |
331 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); | 330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 33f718f92a48..ba8e36e0b4e7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -917,19 +917,6 @@ xlog_iodone(xfs_buf_t *bp) | |||
917 | l = iclog->ic_log; | 917 | l = iclog->ic_log; |
918 | 918 | ||
919 | /* | 919 | /* |
920 | * If the _XFS_BARRIER_FAILED flag was set by a lower | ||
921 | * layer, it means the underlying device no longer supports | ||
922 | * barrier I/O. Warn loudly and turn off barriers. | ||
923 | */ | ||
924 | if (bp->b_flags & _XFS_BARRIER_FAILED) { | ||
925 | bp->b_flags &= ~_XFS_BARRIER_FAILED; | ||
926 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
927 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | ||
928 | "xlog_iodone: Barriers are no longer supported" | ||
929 | " by device. Disabling barriers\n"); | ||
930 | } | ||
931 | |||
932 | /* | ||
933 | * Race to shutdown the filesystem if we see an error. | 920 | * Race to shutdown the filesystem if we see an error. |
934 | */ | 921 | */ |
935 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, | 922 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d36629620a4f..0437ab6bb54c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -146,7 +146,6 @@ enum rq_flag_bits { | |||
146 | __REQ_FAILED, /* set if the request failed */ | 146 | __REQ_FAILED, /* set if the request failed */ |
147 | __REQ_QUIET, /* don't worry about errors */ | 147 | __REQ_QUIET, /* don't worry about errors */ |
148 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ | 148 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ |
149 | __REQ_ORDERED_COLOR, /* is before or after barrier */ | ||
150 | __REQ_ALLOCED, /* request came from our alloc pool */ | 149 | __REQ_ALLOCED, /* request came from our alloc pool */ |
151 | __REQ_COPY_USER, /* contains copies of user pages */ | 150 | __REQ_COPY_USER, /* contains copies of user pages */ |
152 | __REQ_FLUSH, /* request for cache flush */ | 151 | __REQ_FLUSH, /* request for cache flush */ |
@@ -170,7 +169,8 @@ enum rq_flag_bits { | |||
170 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) | 169 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) |
171 | #define REQ_COMMON_MASK \ | 170 | #define REQ_COMMON_MASK \ |
172 | (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ | 171 | (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ |
173 | REQ_META| REQ_DISCARD | REQ_NOIDLE) | 172 | REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) |
173 | #define REQ_CLONE_MASK REQ_COMMON_MASK | ||
174 | 174 | ||
175 | #define REQ_UNPLUG (1 << __REQ_UNPLUG) | 175 | #define REQ_UNPLUG (1 << __REQ_UNPLUG) |
176 | #define REQ_RAHEAD (1 << __REQ_RAHEAD) | 176 | #define REQ_RAHEAD (1 << __REQ_RAHEAD) |
@@ -187,7 +187,6 @@ enum rq_flag_bits { | |||
187 | #define REQ_FAILED (1 << __REQ_FAILED) | 187 | #define REQ_FAILED (1 << __REQ_FAILED) |
188 | #define REQ_QUIET (1 << __REQ_QUIET) | 188 | #define REQ_QUIET (1 << __REQ_QUIET) |
189 | #define REQ_PREEMPT (1 << __REQ_PREEMPT) | 189 | #define REQ_PREEMPT (1 << __REQ_PREEMPT) |
190 | #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) | ||
191 | #define REQ_ALLOCED (1 << __REQ_ALLOCED) | 190 | #define REQ_ALLOCED (1 << __REQ_ALLOCED) |
192 | #define REQ_COPY_USER (1 << __REQ_COPY_USER) | 191 | #define REQ_COPY_USER (1 << __REQ_COPY_USER) |
193 | #define REQ_FLUSH (1 << __REQ_FLUSH) | 192 | #define REQ_FLUSH (1 << __REQ_FLUSH) |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 16f7f1be1acf..009b80e49f53 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -360,12 +360,14 @@ struct request_queue | |||
360 | struct blk_trace *blk_trace; | 360 | struct blk_trace *blk_trace; |
361 | #endif | 361 | #endif |
362 | /* | 362 | /* |
363 | * reserved for flush operations | 363 | * for flush operations |
364 | */ | 364 | */ |
365 | unsigned int ordered, next_ordered, ordseq; | 365 | unsigned int flush_flags; |
366 | int orderr, ordcolor; | 366 | unsigned int flush_seq; |
367 | struct request pre_flush_rq, bar_rq, post_flush_rq; | 367 | int flush_err; |
368 | struct request *orig_bar_rq; | 368 | struct request flush_rq; |
369 | struct request *orig_flush_rq; | ||
370 | struct list_head pending_flushes; | ||
369 | 371 | ||
370 | struct mutex sysfs_lock; | 372 | struct mutex sysfs_lock; |
371 | 373 | ||
@@ -472,56 +474,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) | |||
472 | __clear_bit(flag, &q->queue_flags); | 474 | __clear_bit(flag, &q->queue_flags); |
473 | } | 475 | } |
474 | 476 | ||
475 | enum { | ||
476 | /* | ||
477 | * Hardbarrier is supported with one of the following methods. | ||
478 | * | ||
479 | * NONE : hardbarrier unsupported | ||
480 | * DRAIN : ordering by draining is enough | ||
481 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes | ||
482 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write | ||
483 | * TAG : ordering by tag is enough | ||
484 | * TAG_FLUSH : ordering by tag w/ pre and post flushes | ||
485 | * TAG_FUA : ordering by tag w/ pre flush and FUA write | ||
486 | */ | ||
487 | QUEUE_ORDERED_BY_DRAIN = 0x01, | ||
488 | QUEUE_ORDERED_BY_TAG = 0x02, | ||
489 | QUEUE_ORDERED_DO_PREFLUSH = 0x10, | ||
490 | QUEUE_ORDERED_DO_BAR = 0x20, | ||
491 | QUEUE_ORDERED_DO_POSTFLUSH = 0x40, | ||
492 | QUEUE_ORDERED_DO_FUA = 0x80, | ||
493 | |||
494 | QUEUE_ORDERED_NONE = 0x00, | ||
495 | |||
496 | QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | | ||
497 | QUEUE_ORDERED_DO_BAR, | ||
498 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | | ||
499 | QUEUE_ORDERED_DO_PREFLUSH | | ||
500 | QUEUE_ORDERED_DO_POSTFLUSH, | ||
501 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | | ||
502 | QUEUE_ORDERED_DO_PREFLUSH | | ||
503 | QUEUE_ORDERED_DO_FUA, | ||
504 | |||
505 | QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | | ||
506 | QUEUE_ORDERED_DO_BAR, | ||
507 | QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | | ||
508 | QUEUE_ORDERED_DO_PREFLUSH | | ||
509 | QUEUE_ORDERED_DO_POSTFLUSH, | ||
510 | QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | | ||
511 | QUEUE_ORDERED_DO_PREFLUSH | | ||
512 | QUEUE_ORDERED_DO_FUA, | ||
513 | |||
514 | /* | ||
515 | * Ordered operation sequence | ||
516 | */ | ||
517 | QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ | ||
518 | QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ | ||
519 | QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ | ||
520 | QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ | ||
521 | QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ | ||
522 | QUEUE_ORDSEQ_DONE = 0x20, | ||
523 | }; | ||
524 | |||
525 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) | 477 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) |
526 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) | 478 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) |
527 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) | 479 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) |
@@ -531,7 +483,6 @@ enum { | |||
531 | #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) | 483 | #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) |
532 | #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) | 484 | #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) |
533 | #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) | 485 | #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) |
534 | #define blk_queue_flushing(q) ((q)->ordseq) | ||
535 | #define blk_queue_stackable(q) \ | 486 | #define blk_queue_stackable(q) \ |
536 | test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) | 487 | test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) |
537 | #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) | 488 | #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) |
@@ -602,7 +553,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) | |||
602 | * it already be started by driver. | 553 | * it already be started by driver. |
603 | */ | 554 | */ |
604 | #define RQ_NOMERGE_FLAGS \ | 555 | #define RQ_NOMERGE_FLAGS \ |
605 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) | 556 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ |
557 | REQ_FLUSH | REQ_FUA) | ||
606 | #define rq_mergeable(rq) \ | 558 | #define rq_mergeable(rq) \ |
607 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ | 559 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ |
608 | (((rq)->cmd_flags & REQ_DISCARD) || \ | 560 | (((rq)->cmd_flags & REQ_DISCARD) || \ |
@@ -891,12 +843,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); | |||
891 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); | 843 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); |
892 | extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); | 844 | extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); |
893 | extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); | 845 | extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); |
846 | extern void blk_queue_flush(struct request_queue *q, unsigned int flush); | ||
894 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); | 847 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); |
895 | extern int blk_queue_ordered(struct request_queue *, unsigned); | ||
896 | extern bool blk_do_ordered(struct request_queue *, struct request **); | ||
897 | extern unsigned blk_ordered_cur_seq(struct request_queue *); | ||
898 | extern unsigned blk_ordered_req_seq(struct request *); | ||
899 | extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); | ||
900 | 848 | ||
901 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); | 849 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); |
902 | extern void blk_dump_rq_flags(struct request *, char *); | 850 | extern void blk_dump_rq_flags(struct request *, char *); |
@@ -929,27 +877,20 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, | |||
929 | return NULL; | 877 | return NULL; |
930 | return bqt->tag_index[tag]; | 878 | return bqt->tag_index[tag]; |
931 | } | 879 | } |
932 | enum{ | 880 | |
933 | BLKDEV_WAIT, /* wait for completion */ | 881 | #define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ |
934 | BLKDEV_BARRIER, /* issue request with barrier */ | 882 | |
935 | BLKDEV_SECURE, /* secure discard */ | 883 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); |
936 | }; | ||
937 | #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) | ||
938 | #define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) | ||
939 | #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) | ||
940 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, | ||
941 | unsigned long); | ||
942 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | 884 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, |
943 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | 885 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); |
944 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 886 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
945 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | 887 | sector_t nr_sects, gfp_t gfp_mask); |
946 | static inline int sb_issue_discard(struct super_block *sb, | 888 | static inline int sb_issue_discard(struct super_block *sb, sector_t block, |
947 | sector_t block, sector_t nr_blocks) | 889 | sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) |
948 | { | 890 | { |
949 | block <<= (sb->s_blocksize_bits - 9); | 891 | return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), |
950 | nr_blocks <<= (sb->s_blocksize_bits - 9); | 892 | nr_blocks << (sb->s_blocksize_bits - 9), |
951 | return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, | 893 | gfp_mask, flags); |
952 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
953 | } | 894 | } |
954 | 895 | ||
955 | extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); | 896 | extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); |
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ec94c12f21da..dd1b25b2641c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h | |||
@@ -32,7 +32,6 @@ enum bh_state_bits { | |||
32 | BH_Delay, /* Buffer is not yet allocated on disk */ | 32 | BH_Delay, /* Buffer is not yet allocated on disk */ |
33 | BH_Boundary, /* Block is followed by a discontiguity */ | 33 | BH_Boundary, /* Block is followed by a discontiguity */ |
34 | BH_Write_EIO, /* I/O error on write */ | 34 | BH_Write_EIO, /* I/O error on write */ |
35 | BH_Eopnotsupp, /* operation not supported (barrier) */ | ||
36 | BH_Unwritten, /* Buffer is allocated on disk but not written */ | 35 | BH_Unwritten, /* Buffer is allocated on disk but not written */ |
37 | BH_Quiet, /* Buffer Error Prinks to be quiet */ | 36 | BH_Quiet, /* Buffer Error Prinks to be quiet */ |
38 | 37 | ||
@@ -124,7 +123,6 @@ BUFFER_FNS(Async_Write, async_write) | |||
124 | BUFFER_FNS(Delay, delay) | 123 | BUFFER_FNS(Delay, delay) |
125 | BUFFER_FNS(Boundary, boundary) | 124 | BUFFER_FNS(Boundary, boundary) |
126 | BUFFER_FNS(Write_EIO, write_io_error) | 125 | BUFFER_FNS(Write_EIO, write_io_error) |
127 | BUFFER_FNS(Eopnotsupp, eopnotsupp) | ||
128 | BUFFER_FNS(Unwritten, unwritten) | 126 | BUFFER_FNS(Unwritten, unwritten) |
129 | 127 | ||
130 | #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) | 128 | #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 0a81b87ea158..4f34ff6e5558 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -135,12 +135,12 @@ struct inodes_stat_t { | |||
135 | * immediately after submission. The write equivalent | 135 | * immediately after submission. The write equivalent |
136 | * of READ_SYNC. | 136 | * of READ_SYNC. |
137 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. | 137 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. |
138 | * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all | 138 | * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. |
139 | * previously submitted writes must be safely on storage | 139 | * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on |
140 | * before this one is started. Also guarantees that when | 140 | * non-volatile media on completion. |
141 | * this write is complete, it itself is also safely on | 141 | * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded |
142 | * storage. Prevents reordering of writes on both sides | 142 | * by a cache flush and data is guaranteed to be on |
143 | * of this IO. | 143 | * non-volatile media on completion. |
144 | * | 144 | * |
145 | */ | 145 | */ |
146 | #define RW_MASK REQ_WRITE | 146 | #define RW_MASK REQ_WRITE |
@@ -156,16 +156,12 @@ struct inodes_stat_t { | |||
156 | #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) | 156 | #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) |
157 | #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) | 157 | #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) |
158 | #define WRITE_META (WRITE | REQ_META) | 158 | #define WRITE_META (WRITE | REQ_META) |
159 | #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | 159 | #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ |
160 | REQ_HARDBARRIER) | 160 | REQ_FLUSH) |
161 | 161 | #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | |
162 | /* | 162 | REQ_FUA) |
163 | * These aren't really reads or writes, they pass down information about | 163 | #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ |
164 | * parts of device that are now unused by the file system. | 164 | REQ_FLUSH | REQ_FUA) |
165 | */ | ||
166 | #define DISCARD_NOBARRIER (WRITE | REQ_DISCARD) | ||
167 | #define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER) | ||
168 | #define DISCARD_SECURE (DISCARD_NOBARRIER | REQ_SECURE) | ||
169 | 165 | ||
170 | #define SEL_IN 1 | 166 | #define SEL_IN 1 |
171 | #define SEL_OUT 2 | 167 | #define SEL_OUT 2 |
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index 17231385cb37..d6e7994aa634 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h | |||
@@ -97,13 +97,9 @@ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth) | |||
97 | static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg) | 97 | static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg) |
98 | { | 98 | { |
99 | struct request *req = cmd->request; | 99 | struct request *req = cmd->request; |
100 | struct scsi_device *sdev = cmd->device; | ||
101 | 100 | ||
102 | if (blk_rq_tagged(req)) { | 101 | if (blk_rq_tagged(req)) { |
103 | if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER) | 102 | *msg++ = MSG_SIMPLE_TAG; |
104 | *msg++ = MSG_ORDERED_TAG; | ||
105 | else | ||
106 | *msg++ = MSG_SIMPLE_TAG; | ||
107 | *msg++ = req->tag; | 103 | *msg++ = req->tag; |
108 | return 2; | 104 | return 2; |
109 | } | 105 | } |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 7c703ff2f36f..9fc7bac7db0c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -139,7 +139,7 @@ static int discard_swap(struct swap_info_struct *si) | |||
139 | nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); | 139 | nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); |
140 | if (nr_blocks) { | 140 | if (nr_blocks) { |
141 | err = blkdev_issue_discard(si->bdev, start_block, | 141 | err = blkdev_issue_discard(si->bdev, start_block, |
142 | nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT); | 142 | nr_blocks, GFP_KERNEL, 0); |
143 | if (err) | 143 | if (err) |
144 | return err; | 144 | return err; |
145 | cond_resched(); | 145 | cond_resched(); |
@@ -150,7 +150,7 @@ static int discard_swap(struct swap_info_struct *si) | |||
150 | nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); | 150 | nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); |
151 | 151 | ||
152 | err = blkdev_issue_discard(si->bdev, start_block, | 152 | err = blkdev_issue_discard(si->bdev, start_block, |
153 | nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT); | 153 | nr_blocks, GFP_KERNEL, 0); |
154 | if (err) | 154 | if (err) |
155 | break; | 155 | break; |
156 | 156 | ||
@@ -189,7 +189,7 @@ static void discard_swap_cluster(struct swap_info_struct *si, | |||
189 | start_block <<= PAGE_SHIFT - 9; | 189 | start_block <<= PAGE_SHIFT - 9; |
190 | nr_blocks <<= PAGE_SHIFT - 9; | 190 | nr_blocks <<= PAGE_SHIFT - 9; |
191 | if (blkdev_issue_discard(si->bdev, start_block, | 191 | if (blkdev_issue_discard(si->bdev, start_block, |
192 | nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT)) | 192 | nr_blocks, GFP_NOIO, 0)) |
193 | break; | 193 | break; |
194 | } | 194 | } |
195 | 195 | ||