diff options
author | Christoph Hellwig <hch@infradead.org> | 2011-10-17 06:57:20 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2011-10-17 06:57:20 -0400 |
commit | 456be1484ffc72a24bdb4200b5847c4fa90139d9 (patch) | |
tree | 570f0818bd6cfa245ab23d0121853b7b1e5a649b /drivers/block | |
parent | 8bc03e8f3a334e09e89a7dffb486ee97a5ce84ae (diff) |
loop: remove the incorrect write_begin/write_end shortcut
Currently the loop device tries to call directly into write_begin/write_end
instead of going through ->write if it can. This is a fairly nasty shortcut
as write_begin and write_end are only callbacks for the generic write code
and expect to be called with filesystem specific locks held.
This code currently causes various issues for clustered filesystems as it
doesn't take the required cluster locks, and it also causes issues for XFS
as it doesn't properly lock against the swapext ioctl as called by the
defragmentation tools. This in case causes data corruption if
defragmentation hits a busy loop device in the wrong time window, as
reported by RH QA.
The reason why we have this shortcut is that it saves a data copy when
doing a transformation on the loop device, which is the technical term
for using cryptoloop (or an XOR transformation). Given that cryptoloop
has been deprecated in favour of dm-crypt my opinion is that we should
simply drop this shortcut instead of finding complicated ways to to
introduce a formal interface for this shortcut.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/loop.c | 135 |
1 files changed, 23 insertions, 112 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4720c7ade0ae..46cdd6945557 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -203,74 +203,6 @@ lo_do_transfer(struct loop_device *lo, int cmd, | |||
203 | } | 203 | } |
204 | 204 | ||
205 | /** | 205 | /** |
206 | * do_lo_send_aops - helper for writing data to a loop device | ||
207 | * | ||
208 | * This is the fast version for backing filesystems which implement the address | ||
209 | * space operations write_begin and write_end. | ||
210 | */ | ||
211 | static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | ||
212 | loff_t pos, struct page *unused) | ||
213 | { | ||
214 | struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ | ||
215 | struct address_space *mapping = file->f_mapping; | ||
216 | pgoff_t index; | ||
217 | unsigned offset, bv_offs; | ||
218 | int len, ret; | ||
219 | |||
220 | mutex_lock(&mapping->host->i_mutex); | ||
221 | index = pos >> PAGE_CACHE_SHIFT; | ||
222 | offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1); | ||
223 | bv_offs = bvec->bv_offset; | ||
224 | len = bvec->bv_len; | ||
225 | while (len > 0) { | ||
226 | sector_t IV; | ||
227 | unsigned size, copied; | ||
228 | int transfer_result; | ||
229 | struct page *page; | ||
230 | void *fsdata; | ||
231 | |||
232 | IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); | ||
233 | size = PAGE_CACHE_SIZE - offset; | ||
234 | if (size > len) | ||
235 | size = len; | ||
236 | |||
237 | ret = pagecache_write_begin(file, mapping, pos, size, 0, | ||
238 | &page, &fsdata); | ||
239 | if (ret) | ||
240 | goto fail; | ||
241 | |||
242 | file_update_time(file); | ||
243 | |||
244 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, | ||
245 | bvec->bv_page, bv_offs, size, IV); | ||
246 | copied = size; | ||
247 | if (unlikely(transfer_result)) | ||
248 | copied = 0; | ||
249 | |||
250 | ret = pagecache_write_end(file, mapping, pos, size, copied, | ||
251 | page, fsdata); | ||
252 | if (ret < 0 || ret != copied) | ||
253 | goto fail; | ||
254 | |||
255 | if (unlikely(transfer_result)) | ||
256 | goto fail; | ||
257 | |||
258 | bv_offs += copied; | ||
259 | len -= copied; | ||
260 | offset = 0; | ||
261 | index++; | ||
262 | pos += copied; | ||
263 | } | ||
264 | ret = 0; | ||
265 | out: | ||
266 | mutex_unlock(&mapping->host->i_mutex); | ||
267 | return ret; | ||
268 | fail: | ||
269 | ret = -1; | ||
270 | goto out; | ||
271 | } | ||
272 | |||
273 | /** | ||
274 | * __do_lo_send_write - helper for writing data to a loop device | 206 | * __do_lo_send_write - helper for writing data to a loop device |
275 | * | 207 | * |
276 | * This helper just factors out common code between do_lo_send_direct_write() | 208 | * This helper just factors out common code between do_lo_send_direct_write() |
@@ -297,10 +229,8 @@ static int __do_lo_send_write(struct file *file, | |||
297 | /** | 229 | /** |
298 | * do_lo_send_direct_write - helper for writing data to a loop device | 230 | * do_lo_send_direct_write - helper for writing data to a loop device |
299 | * | 231 | * |
300 | * This is the fast, non-transforming version for backing filesystems which do | 232 | * This is the fast, non-transforming version that does not need double |
301 | * not implement the address space operations write_begin and write_end. | 233 | * buffering. |
302 | * It uses the write file operation which should be present on all writeable | ||
303 | * filesystems. | ||
304 | */ | 234 | */ |
305 | static int do_lo_send_direct_write(struct loop_device *lo, | 235 | static int do_lo_send_direct_write(struct loop_device *lo, |
306 | struct bio_vec *bvec, loff_t pos, struct page *page) | 236 | struct bio_vec *bvec, loff_t pos, struct page *page) |
@@ -316,15 +246,9 @@ static int do_lo_send_direct_write(struct loop_device *lo, | |||
316 | /** | 246 | /** |
317 | * do_lo_send_write - helper for writing data to a loop device | 247 | * do_lo_send_write - helper for writing data to a loop device |
318 | * | 248 | * |
319 | * This is the slow, transforming version for filesystems which do not | 249 | * This is the slow, transforming version that needs to double buffer the |
320 | * implement the address space operations write_begin and write_end. It | 250 | * data as it cannot do the transformations in place without having direct |
321 | * uses the write file operation which should be present on all writeable | 251 | * access to the destination pages of the backing file. |
322 | * filesystems. | ||
323 | * | ||
324 | * Using fops->write is slower than using aops->{prepare,commit}_write in the | ||
325 | * transforming case because we need to double buffer the data as we cannot do | ||
326 | * the transformations in place as we do not have direct access to the | ||
327 | * destination pages of the backing file. | ||
328 | */ | 252 | */ |
329 | static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, | 253 | static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, |
330 | loff_t pos, struct page *page) | 254 | loff_t pos, struct page *page) |
@@ -350,17 +274,16 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) | |||
350 | struct page *page = NULL; | 274 | struct page *page = NULL; |
351 | int i, ret = 0; | 275 | int i, ret = 0; |
352 | 276 | ||
353 | do_lo_send = do_lo_send_aops; | 277 | if (lo->transfer != transfer_none) { |
354 | if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) { | 278 | page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); |
279 | if (unlikely(!page)) | ||
280 | goto fail; | ||
281 | kmap(page); | ||
282 | do_lo_send = do_lo_send_write; | ||
283 | } else { | ||
355 | do_lo_send = do_lo_send_direct_write; | 284 | do_lo_send = do_lo_send_direct_write; |
356 | if (lo->transfer != transfer_none) { | ||
357 | page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); | ||
358 | if (unlikely(!page)) | ||
359 | goto fail; | ||
360 | kmap(page); | ||
361 | do_lo_send = do_lo_send_write; | ||
362 | } | ||
363 | } | 285 | } |
286 | |||
364 | bio_for_each_segment(bvec, bio, i) { | 287 | bio_for_each_segment(bvec, bio, i) { |
365 | ret = do_lo_send(lo, bvec, pos, page); | 288 | ret = do_lo_send(lo, bvec, pos, page); |
366 | if (ret < 0) | 289 | if (ret < 0) |
@@ -849,35 +772,23 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, | |||
849 | mapping = file->f_mapping; | 772 | mapping = file->f_mapping; |
850 | inode = mapping->host; | 773 | inode = mapping->host; |
851 | 774 | ||
852 | if (!(file->f_mode & FMODE_WRITE)) | ||
853 | lo_flags |= LO_FLAGS_READ_ONLY; | ||
854 | |||
855 | error = -EINVAL; | 775 | error = -EINVAL; |
856 | if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 776 | if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) |
857 | const struct address_space_operations *aops = mapping->a_ops; | 777 | goto out_putf; |
858 | |||
859 | if (aops->write_begin) | ||
860 | lo_flags |= LO_FLAGS_USE_AOPS; | ||
861 | if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) | ||
862 | lo_flags |= LO_FLAGS_READ_ONLY; | ||
863 | 778 | ||
864 | lo_blocksize = S_ISBLK(inode->i_mode) ? | 779 | if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || |
865 | inode->i_bdev->bd_block_size : PAGE_SIZE; | 780 | !file->f_op->write) |
781 | lo_flags |= LO_FLAGS_READ_ONLY; | ||
866 | 782 | ||
867 | error = 0; | 783 | lo_blocksize = S_ISBLK(inode->i_mode) ? |
868 | } else { | 784 | inode->i_bdev->bd_block_size : PAGE_SIZE; |
869 | goto out_putf; | ||
870 | } | ||
871 | 785 | ||
786 | error = -EFBIG; | ||
872 | size = get_loop_size(lo, file); | 787 | size = get_loop_size(lo, file); |
873 | 788 | if ((loff_t)(sector_t)size != size) | |
874 | if ((loff_t)(sector_t)size != size) { | ||
875 | error = -EFBIG; | ||
876 | goto out_putf; | 789 | goto out_putf; |
877 | } | ||
878 | 790 | ||
879 | if (!(mode & FMODE_WRITE)) | 791 | error = 0; |
880 | lo_flags |= LO_FLAGS_READ_ONLY; | ||
881 | 792 | ||
882 | set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); | 793 | set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); |
883 | 794 | ||