diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 170 |
1 files changed, 52 insertions, 118 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index ccea3b665c12..dd51c68e2b86 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -39,11 +39,10 @@ | |||
39 | /* | 39 | /* |
40 | * FIXME: remove all knowledge of the buffer layer from the core VM | 40 | * FIXME: remove all knowledge of the buffer layer from the core VM |
41 | */ | 41 | */ |
42 | #include <linux/buffer_head.h> /* for generic_osync_inode */ | 42 | #include <linux/buffer_head.h> /* for try_to_free_buffers */ |
43 | 43 | ||
44 | #include <asm/mman.h> | 44 | #include <asm/mman.h> |
45 | 45 | ||
46 | |||
47 | /* | 46 | /* |
48 | * Shared mappings implemented 30.11.1994. It's not fully working yet, | 47 | * Shared mappings implemented 30.11.1994. It's not fully working yet, |
49 | * though. | 48 | * though. |
@@ -307,68 +306,24 @@ int wait_on_page_writeback_range(struct address_space *mapping, | |||
307 | } | 306 | } |
308 | 307 | ||
309 | /** | 308 | /** |
310 | * sync_page_range - write and wait on all pages in the passed range | 309 | * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range |
311 | * @inode: target inode | 310 | * @mapping: address space structure to wait for |
312 | * @mapping: target address_space | 311 | * @start: offset in bytes where the range starts |
313 | * @pos: beginning offset in pages to write | 312 | * @end: offset in bytes where the range ends (inclusive) |
314 | * @count: number of bytes to write | ||
315 | * | ||
316 | * Write and wait upon all the pages in the passed range. This is a "data | ||
317 | * integrity" operation. It waits upon in-flight writeout before starting and | ||
318 | * waiting upon new writeout. If there was an IO error, return it. | ||
319 | * | 313 | * |
320 | * We need to re-take i_mutex during the generic_osync_inode list walk because | 314 | * Walk the list of under-writeback pages of the given address space |
321 | * it is otherwise livelockable. | 315 | * in the given range and wait for all of them. |
322 | */ | ||
323 | int sync_page_range(struct inode *inode, struct address_space *mapping, | ||
324 | loff_t pos, loff_t count) | ||
325 | { | ||
326 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | ||
327 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | ||
328 | int ret; | ||
329 | |||
330 | if (!mapping_cap_writeback_dirty(mapping) || !count) | ||
331 | return 0; | ||
332 | ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); | ||
333 | if (ret == 0) { | ||
334 | mutex_lock(&inode->i_mutex); | ||
335 | ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
336 | mutex_unlock(&inode->i_mutex); | ||
337 | } | ||
338 | if (ret == 0) | ||
339 | ret = wait_on_page_writeback_range(mapping, start, end); | ||
340 | return ret; | ||
341 | } | ||
342 | EXPORT_SYMBOL(sync_page_range); | ||
343 | |||
344 | /** | ||
345 | * sync_page_range_nolock - write & wait on all pages in the passed range without locking | ||
346 | * @inode: target inode | ||
347 | * @mapping: target address_space | ||
348 | * @pos: beginning offset in pages to write | ||
349 | * @count: number of bytes to write | ||
350 | * | 316 | * |
351 | * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea | 317 | * This is just a simple wrapper so that callers don't have to convert offsets |
352 | * as it forces O_SYNC writers to different parts of the same file | 318 | * to page indexes themselves |
353 | * to be serialised right until io completion. | ||
354 | */ | 319 | */ |
355 | int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, | 320 | int filemap_fdatawait_range(struct address_space *mapping, loff_t start, |
356 | loff_t pos, loff_t count) | 321 | loff_t end) |
357 | { | 322 | { |
358 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | 323 | return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT, |
359 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | 324 | end >> PAGE_CACHE_SHIFT); |
360 | int ret; | ||
361 | |||
362 | if (!mapping_cap_writeback_dirty(mapping) || !count) | ||
363 | return 0; | ||
364 | ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); | ||
365 | if (ret == 0) | ||
366 | ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
367 | if (ret == 0) | ||
368 | ret = wait_on_page_writeback_range(mapping, start, end); | ||
369 | return ret; | ||
370 | } | 325 | } |
371 | EXPORT_SYMBOL(sync_page_range_nolock); | 326 | EXPORT_SYMBOL(filemap_fdatawait_range); |
372 | 327 | ||
373 | /** | 328 | /** |
374 | * filemap_fdatawait - wait for all under-writeback pages to complete | 329 | * filemap_fdatawait - wait for all under-writeback pages to complete |
@@ -2167,20 +2122,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
2167 | } | 2122 | } |
2168 | *ppos = end; | 2123 | *ppos = end; |
2169 | } | 2124 | } |
2170 | |||
2171 | /* | ||
2172 | * Sync the fs metadata but not the minor inode changes and | ||
2173 | * of course not the data as we did direct DMA for the IO. | ||
2174 | * i_mutex is held, which protects generic_osync_inode() from | ||
2175 | * livelocking. AIO O_DIRECT ops attempt to sync metadata here. | ||
2176 | */ | ||
2177 | out: | 2125 | out: |
2178 | if ((written >= 0 || written == -EIOCBQUEUED) && | ||
2179 | ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2180 | int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
2181 | if (err < 0) | ||
2182 | written = err; | ||
2183 | } | ||
2184 | return written; | 2126 | return written; |
2185 | } | 2127 | } |
2186 | EXPORT_SYMBOL(generic_file_direct_write); | 2128 | EXPORT_SYMBOL(generic_file_direct_write); |
@@ -2312,8 +2254,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2312 | { | 2254 | { |
2313 | struct file *file = iocb->ki_filp; | 2255 | struct file *file = iocb->ki_filp; |
2314 | struct address_space *mapping = file->f_mapping; | 2256 | struct address_space *mapping = file->f_mapping; |
2315 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
2316 | struct inode *inode = mapping->host; | ||
2317 | ssize_t status; | 2257 | ssize_t status; |
2318 | struct iov_iter i; | 2258 | struct iov_iter i; |
2319 | 2259 | ||
@@ -2323,16 +2263,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2323 | if (likely(status >= 0)) { | 2263 | if (likely(status >= 0)) { |
2324 | written += status; | 2264 | written += status; |
2325 | *ppos = pos + status; | 2265 | *ppos = pos + status; |
2326 | |||
2327 | /* | ||
2328 | * For now, when the user asks for O_SYNC, we'll actually give | ||
2329 | * O_DSYNC | ||
2330 | */ | ||
2331 | if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2332 | if (!a_ops->writepage || !is_sync_kiocb(iocb)) | ||
2333 | status = generic_osync_inode(inode, mapping, | ||
2334 | OSYNC_METADATA|OSYNC_DATA); | ||
2335 | } | ||
2336 | } | 2266 | } |
2337 | 2267 | ||
2338 | /* | 2268 | /* |
@@ -2348,9 +2278,27 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2348 | } | 2278 | } |
2349 | EXPORT_SYMBOL(generic_file_buffered_write); | 2279 | EXPORT_SYMBOL(generic_file_buffered_write); |
2350 | 2280 | ||
2351 | static ssize_t | 2281 | /** |
2352 | __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | 2282 | * __generic_file_aio_write - write data to a file |
2353 | unsigned long nr_segs, loff_t *ppos) | 2283 | * @iocb: IO state structure (file, offset, etc.) |
2284 | * @iov: vector with data to write | ||
2285 | * @nr_segs: number of segments in the vector | ||
2286 | * @ppos: position where to write | ||
2287 | * | ||
2288 | * This function does all the work needed for actually writing data to a | ||
2289 | * file. It does all basic checks, removes SUID from the file, updates | ||
2290 | * modification times and calls proper subroutines depending on whether we | ||
2291 | * do direct IO or a standard buffered write. | ||
2292 | * | ||
2293 | * It expects i_mutex to be grabbed unless we work on a block device or similar | ||
2294 | * object which does not need locking at all. | ||
2295 | * | ||
2296 | * This function does *not* take care of syncing data in case of O_SYNC write. | ||
2297 | * A caller has to handle it. This is mainly due to the fact that we want to | ||
2298 | * avoid syncing under i_mutex. | ||
2299 | */ | ||
2300 | ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | ||
2301 | unsigned long nr_segs, loff_t *ppos) | ||
2354 | { | 2302 | { |
2355 | struct file *file = iocb->ki_filp; | 2303 | struct file *file = iocb->ki_filp; |
2356 | struct address_space * mapping = file->f_mapping; | 2304 | struct address_space * mapping = file->f_mapping; |
@@ -2447,51 +2395,37 @@ out: | |||
2447 | current->backing_dev_info = NULL; | 2395 | current->backing_dev_info = NULL; |
2448 | return written ? written : err; | 2396 | return written ? written : err; |
2449 | } | 2397 | } |
2398 | EXPORT_SYMBOL(__generic_file_aio_write); | ||
2450 | 2399 | ||
2451 | ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, | 2400 | /** |
2452 | const struct iovec *iov, unsigned long nr_segs, loff_t pos) | 2401 | * generic_file_aio_write - write data to a file |
2453 | { | 2402 | * @iocb: IO state structure |
2454 | struct file *file = iocb->ki_filp; | 2403 | * @iov: vector with data to write |
2455 | struct address_space *mapping = file->f_mapping; | 2404 | * @nr_segs: number of segments in the vector |
2456 | struct inode *inode = mapping->host; | 2405 | * @pos: position in file where to write |
2457 | ssize_t ret; | 2406 | * |
2458 | 2407 | * This is a wrapper around __generic_file_aio_write() to be used by most | |
2459 | BUG_ON(iocb->ki_pos != pos); | 2408 | * filesystems. It takes care of syncing the file in case of O_SYNC file |
2460 | 2409 | * and acquires i_mutex as needed. | |
2461 | ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, | 2410 | */ |
2462 | &iocb->ki_pos); | ||
2463 | |||
2464 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2465 | ssize_t err; | ||
2466 | |||
2467 | err = sync_page_range_nolock(inode, mapping, pos, ret); | ||
2468 | if (err < 0) | ||
2469 | ret = err; | ||
2470 | } | ||
2471 | return ret; | ||
2472 | } | ||
2473 | EXPORT_SYMBOL(generic_file_aio_write_nolock); | ||
2474 | |||
2475 | ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2411 | ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, |
2476 | unsigned long nr_segs, loff_t pos) | 2412 | unsigned long nr_segs, loff_t pos) |
2477 | { | 2413 | { |
2478 | struct file *file = iocb->ki_filp; | 2414 | struct file *file = iocb->ki_filp; |
2479 | struct address_space *mapping = file->f_mapping; | 2415 | struct inode *inode = file->f_mapping->host; |
2480 | struct inode *inode = mapping->host; | ||
2481 | ssize_t ret; | 2416 | ssize_t ret; |
2482 | 2417 | ||
2483 | BUG_ON(iocb->ki_pos != pos); | 2418 | BUG_ON(iocb->ki_pos != pos); |
2484 | 2419 | ||
2485 | mutex_lock(&inode->i_mutex); | 2420 | mutex_lock(&inode->i_mutex); |
2486 | ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, | 2421 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
2487 | &iocb->ki_pos); | ||
2488 | mutex_unlock(&inode->i_mutex); | 2422 | mutex_unlock(&inode->i_mutex); |
2489 | 2423 | ||
2490 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2424 | if (ret > 0 || ret == -EIOCBQUEUED) { |
2491 | ssize_t err; | 2425 | ssize_t err; |
2492 | 2426 | ||
2493 | err = sync_page_range(inode, mapping, pos, ret); | 2427 | err = generic_write_sync(file, pos, ret); |
2494 | if (err < 0) | 2428 | if (err < 0 && ret > 0) |
2495 | ret = err; | 2429 | ret = err; |
2496 | } | 2430 | } |
2497 | return ret; | 2431 | return ret; |