diff options
author | Nick Piggin <npiggin@suse.de> | 2007-10-16 04:25:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:42:55 -0400 |
commit | afddba49d18f346e5cc2938b6ed7c512db18ca68 (patch) | |
tree | 4726e3d3b0e9e8e5b5d3b2b0cccb36446bbdf3ca /drivers/block/loop.c | |
parent | 637aff46f94a754207c80c8c64bf1b74f24b967d (diff) |
fs: introduce write_begin, write_end, and perform_write aops
These are intended to replace prepare_write and commit_write with more
flexible alternatives that are also able to avoid the buffered write
deadlock problems efficiently (which prepare_write is unable to do).
[mark.fasheh@oracle.com: API design contributions, code review and fixes]
[akpm@linux-foundation.org: various fixes]
[dmonakhov@sw.ru: new aop block_write_begin fix]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Dmitriy Monakhov <dmonakhov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/loop.c')
-rw-r--r-- | drivers/block/loop.c | 75 |
1 files changed, 29 insertions, 46 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b9233a06934c..a5f993ac28dd 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -204,14 +204,13 @@ lo_do_transfer(struct loop_device *lo, int cmd, | |||
204 | * do_lo_send_aops - helper for writing data to a loop device | 204 | * do_lo_send_aops - helper for writing data to a loop device |
205 | * | 205 | * |
206 | * This is the fast version for backing filesystems which implement the address | 206 | * This is the fast version for backing filesystems which implement the address |
207 | * space operations prepare_write and commit_write. | 207 | * space operations write_begin and write_end. |
208 | */ | 208 | */ |
209 | static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | 209 | static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, |
210 | int bsize, loff_t pos, struct page *page) | 210 | int bsize, loff_t pos, struct page *unused) |
211 | { | 211 | { |
212 | struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ | 212 | struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ |
213 | struct address_space *mapping = file->f_mapping; | 213 | struct address_space *mapping = file->f_mapping; |
214 | const struct address_space_operations *aops = mapping->a_ops; | ||
215 | pgoff_t index; | 214 | pgoff_t index; |
216 | unsigned offset, bv_offs; | 215 | unsigned offset, bv_offs; |
217 | int len, ret; | 216 | int len, ret; |
@@ -223,63 +222,47 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | |||
223 | len = bvec->bv_len; | 222 | len = bvec->bv_len; |
224 | while (len > 0) { | 223 | while (len > 0) { |
225 | sector_t IV; | 224 | sector_t IV; |
226 | unsigned size; | 225 | unsigned size, copied; |
227 | int transfer_result; | 226 | int transfer_result; |
227 | struct page *page; | ||
228 | void *fsdata; | ||
228 | 229 | ||
229 | IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); | 230 | IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); |
230 | size = PAGE_CACHE_SIZE - offset; | 231 | size = PAGE_CACHE_SIZE - offset; |
231 | if (size > len) | 232 | if (size > len) |
232 | size = len; | 233 | size = len; |
233 | page = grab_cache_page(mapping, index); | 234 | |
234 | if (unlikely(!page)) | 235 | ret = pagecache_write_begin(file, mapping, pos, size, 0, |
236 | &page, &fsdata); | ||
237 | if (ret) | ||
235 | goto fail; | 238 | goto fail; |
236 | ret = aops->prepare_write(file, page, offset, | 239 | |
237 | offset + size); | ||
238 | if (unlikely(ret)) { | ||
239 | if (ret == AOP_TRUNCATED_PAGE) { | ||
240 | page_cache_release(page); | ||
241 | continue; | ||
242 | } | ||
243 | goto unlock; | ||
244 | } | ||
245 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, | 240 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, |
246 | bvec->bv_page, bv_offs, size, IV); | 241 | bvec->bv_page, bv_offs, size, IV); |
247 | if (unlikely(transfer_result)) { | 242 | copied = size; |
248 | /* | ||
249 | * The transfer failed, but we still write the data to | ||
250 | * keep prepare/commit calls balanced. | ||
251 | */ | ||
252 | printk(KERN_ERR "loop: transfer error block %llu\n", | ||
253 | (unsigned long long)index); | ||
254 | zero_user_page(page, offset, size, KM_USER0); | ||
255 | } | ||
256 | flush_dcache_page(page); | ||
257 | ret = aops->commit_write(file, page, offset, | ||
258 | offset + size); | ||
259 | if (unlikely(ret)) { | ||
260 | if (ret == AOP_TRUNCATED_PAGE) { | ||
261 | page_cache_release(page); | ||
262 | continue; | ||
263 | } | ||
264 | goto unlock; | ||
265 | } | ||
266 | if (unlikely(transfer_result)) | 243 | if (unlikely(transfer_result)) |
267 | goto unlock; | 244 | copied = 0; |
268 | bv_offs += size; | 245 | |
269 | len -= size; | 246 | ret = pagecache_write_end(file, mapping, pos, size, copied, |
247 | page, fsdata); | ||
248 | if (ret < 0) | ||
249 | goto fail; | ||
250 | if (ret < copied) | ||
251 | copied = ret; | ||
252 | |||
253 | if (unlikely(transfer_result)) | ||
254 | goto fail; | ||
255 | |||
256 | bv_offs += copied; | ||
257 | len -= copied; | ||
270 | offset = 0; | 258 | offset = 0; |
271 | index++; | 259 | index++; |
272 | pos += size; | 260 | pos += copied; |
273 | unlock_page(page); | ||
274 | page_cache_release(page); | ||
275 | } | 261 | } |
276 | ret = 0; | 262 | ret = 0; |
277 | out: | 263 | out: |
278 | mutex_unlock(&mapping->host->i_mutex); | 264 | mutex_unlock(&mapping->host->i_mutex); |
279 | return ret; | 265 | return ret; |
280 | unlock: | ||
281 | unlock_page(page); | ||
282 | page_cache_release(page); | ||
283 | fail: | 266 | fail: |
284 | ret = -1; | 267 | ret = -1; |
285 | goto out; | 268 | goto out; |
@@ -313,7 +296,7 @@ static int __do_lo_send_write(struct file *file, | |||
313 | * do_lo_send_direct_write - helper for writing data to a loop device | 296 | * do_lo_send_direct_write - helper for writing data to a loop device |
314 | * | 297 | * |
315 | * This is the fast, non-transforming version for backing filesystems which do | 298 | * This is the fast, non-transforming version for backing filesystems which do |
316 | * not implement the address space operations prepare_write and commit_write. | 299 | * not implement the address space operations write_begin and write_end. |
317 | * It uses the write file operation which should be present on all writeable | 300 | * It uses the write file operation which should be present on all writeable |
318 | * filesystems. | 301 | * filesystems. |
319 | */ | 302 | */ |
@@ -332,7 +315,7 @@ static int do_lo_send_direct_write(struct loop_device *lo, | |||
332 | * do_lo_send_write - helper for writing data to a loop device | 315 | * do_lo_send_write - helper for writing data to a loop device |
333 | * | 316 | * |
334 | * This is the slow, transforming version for filesystems which do not | 317 | * This is the slow, transforming version for filesystems which do not |
335 | * implement the address space operations prepare_write and commit_write. It | 318 | * implement the address space operations write_begin and write_end. It |
336 | * uses the write file operation which should be present on all writeable | 319 | * uses the write file operation which should be present on all writeable |
337 | * filesystems. | 320 | * filesystems. |
338 | * | 321 | * |
@@ -780,7 +763,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file, | |||
780 | */ | 763 | */ |
781 | if (!file->f_op->splice_read) | 764 | if (!file->f_op->splice_read) |
782 | goto out_putf; | 765 | goto out_putf; |
783 | if (aops->prepare_write && aops->commit_write) | 766 | if (aops->prepare_write || aops->write_begin) |
784 | lo_flags |= LO_FLAGS_USE_AOPS; | 767 | lo_flags |= LO_FLAGS_USE_AOPS; |
785 | if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) | 768 | if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) |
786 | lo_flags |= LO_FLAGS_READ_ONLY; | 769 | lo_flags |= LO_FLAGS_READ_ONLY; |