diff options
Diffstat (limited to 'mm/filemap.c')
| -rw-r--r-- | mm/filemap.c | 170 |
1 files changed, 52 insertions, 118 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index ccea3b665c12..dd51c68e2b86 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -39,11 +39,10 @@ | |||
| 39 | /* | 39 | /* |
| 40 | * FIXME: remove all knowledge of the buffer layer from the core VM | 40 | * FIXME: remove all knowledge of the buffer layer from the core VM |
| 41 | */ | 41 | */ |
| 42 | #include <linux/buffer_head.h> /* for generic_osync_inode */ | 42 | #include <linux/buffer_head.h> /* for try_to_free_buffers */ |
| 43 | 43 | ||
| 44 | #include <asm/mman.h> | 44 | #include <asm/mman.h> |
| 45 | 45 | ||
| 46 | |||
| 47 | /* | 46 | /* |
| 48 | * Shared mappings implemented 30.11.1994. It's not fully working yet, | 47 | * Shared mappings implemented 30.11.1994. It's not fully working yet, |
| 49 | * though. | 48 | * though. |
| @@ -307,68 +306,24 @@ int wait_on_page_writeback_range(struct address_space *mapping, | |||
| 307 | } | 306 | } |
| 308 | 307 | ||
| 309 | /** | 308 | /** |
| 310 | * sync_page_range - write and wait on all pages in the passed range | 309 | * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range |
| 311 | * @inode: target inode | 310 | * @mapping: address space structure to wait for |
| 312 | * @mapping: target address_space | 311 | * @start: offset in bytes where the range starts |
| 313 | * @pos: beginning offset in pages to write | 312 | * @end: offset in bytes where the range ends (inclusive) |
| 314 | * @count: number of bytes to write | ||
| 315 | * | ||
| 316 | * Write and wait upon all the pages in the passed range. This is a "data | ||
| 317 | * integrity" operation. It waits upon in-flight writeout before starting and | ||
| 318 | * waiting upon new writeout. If there was an IO error, return it. | ||
| 319 | * | 313 | * |
| 320 | * We need to re-take i_mutex during the generic_osync_inode list walk because | 314 | * Walk the list of under-writeback pages of the given address space |
| 321 | * it is otherwise livelockable. | 315 | * in the given range and wait for all of them. |
| 322 | */ | ||
| 323 | int sync_page_range(struct inode *inode, struct address_space *mapping, | ||
| 324 | loff_t pos, loff_t count) | ||
| 325 | { | ||
| 326 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | ||
| 327 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | ||
| 328 | int ret; | ||
| 329 | |||
| 330 | if (!mapping_cap_writeback_dirty(mapping) || !count) | ||
| 331 | return 0; | ||
| 332 | ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); | ||
| 333 | if (ret == 0) { | ||
| 334 | mutex_lock(&inode->i_mutex); | ||
| 335 | ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
| 336 | mutex_unlock(&inode->i_mutex); | ||
| 337 | } | ||
| 338 | if (ret == 0) | ||
| 339 | ret = wait_on_page_writeback_range(mapping, start, end); | ||
| 340 | return ret; | ||
| 341 | } | ||
| 342 | EXPORT_SYMBOL(sync_page_range); | ||
| 343 | |||
| 344 | /** | ||
| 345 | * sync_page_range_nolock - write & wait on all pages in the passed range without locking | ||
| 346 | * @inode: target inode | ||
| 347 | * @mapping: target address_space | ||
| 348 | * @pos: beginning offset in pages to write | ||
| 349 | * @count: number of bytes to write | ||
| 350 | * | 316 | * |
| 351 | * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea | 317 | * This is just a simple wrapper so that callers don't have to convert offsets |
| 352 | * as it forces O_SYNC writers to different parts of the same file | 318 | * to page indexes themselves |
| 353 | * to be serialised right until io completion. | ||
| 354 | */ | 319 | */ |
| 355 | int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, | 320 | int filemap_fdatawait_range(struct address_space *mapping, loff_t start, |
| 356 | loff_t pos, loff_t count) | 321 | loff_t end) |
| 357 | { | 322 | { |
| 358 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | 323 | return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT, |
| 359 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | 324 | end >> PAGE_CACHE_SHIFT); |
| 360 | int ret; | ||
| 361 | |||
| 362 | if (!mapping_cap_writeback_dirty(mapping) || !count) | ||
| 363 | return 0; | ||
| 364 | ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); | ||
| 365 | if (ret == 0) | ||
| 366 | ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
| 367 | if (ret == 0) | ||
| 368 | ret = wait_on_page_writeback_range(mapping, start, end); | ||
| 369 | return ret; | ||
| 370 | } | 325 | } |
| 371 | EXPORT_SYMBOL(sync_page_range_nolock); | 326 | EXPORT_SYMBOL(filemap_fdatawait_range); |
| 372 | 327 | ||
| 373 | /** | 328 | /** |
| 374 | * filemap_fdatawait - wait for all under-writeback pages to complete | 329 | * filemap_fdatawait - wait for all under-writeback pages to complete |
| @@ -2167,20 +2122,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 2167 | } | 2122 | } |
| 2168 | *ppos = end; | 2123 | *ppos = end; |
| 2169 | } | 2124 | } |
| 2170 | |||
| 2171 | /* | ||
| 2172 | * Sync the fs metadata but not the minor inode changes and | ||
| 2173 | * of course not the data as we did direct DMA for the IO. | ||
| 2174 | * i_mutex is held, which protects generic_osync_inode() from | ||
| 2175 | * livelocking. AIO O_DIRECT ops attempt to sync metadata here. | ||
| 2176 | */ | ||
| 2177 | out: | 2125 | out: |
| 2178 | if ((written >= 0 || written == -EIOCBQUEUED) && | ||
| 2179 | ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
| 2180 | int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
| 2181 | if (err < 0) | ||
| 2182 | written = err; | ||
| 2183 | } | ||
| 2184 | return written; | 2126 | return written; |
| 2185 | } | 2127 | } |
| 2186 | EXPORT_SYMBOL(generic_file_direct_write); | 2128 | EXPORT_SYMBOL(generic_file_direct_write); |
| @@ -2312,8 +2254,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 2312 | { | 2254 | { |
| 2313 | struct file *file = iocb->ki_filp; | 2255 | struct file *file = iocb->ki_filp; |
| 2314 | struct address_space *mapping = file->f_mapping; | 2256 | struct address_space *mapping = file->f_mapping; |
| 2315 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
| 2316 | struct inode *inode = mapping->host; | ||
| 2317 | ssize_t status; | 2257 | ssize_t status; |
| 2318 | struct iov_iter i; | 2258 | struct iov_iter i; |
| 2319 | 2259 | ||
| @@ -2323,16 +2263,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 2323 | if (likely(status >= 0)) { | 2263 | if (likely(status >= 0)) { |
| 2324 | written += status; | 2264 | written += status; |
| 2325 | *ppos = pos + status; | 2265 | *ppos = pos + status; |
| 2326 | |||
| 2327 | /* | ||
| 2328 | * For now, when the user asks for O_SYNC, we'll actually give | ||
| 2329 | * O_DSYNC | ||
| 2330 | */ | ||
| 2331 | if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
| 2332 | if (!a_ops->writepage || !is_sync_kiocb(iocb)) | ||
| 2333 | status = generic_osync_inode(inode, mapping, | ||
| 2334 | OSYNC_METADATA|OSYNC_DATA); | ||
| 2335 | } | ||
| 2336 | } | 2266 | } |
| 2337 | 2267 | ||
| 2338 | /* | 2268 | /* |
| @@ -2348,9 +2278,27 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 2348 | } | 2278 | } |
| 2349 | EXPORT_SYMBOL(generic_file_buffered_write); | 2279 | EXPORT_SYMBOL(generic_file_buffered_write); |
| 2350 | 2280 | ||
| 2351 | static ssize_t | 2281 | /** |
| 2352 | __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | 2282 | * __generic_file_aio_write - write data to a file |
| 2353 | unsigned long nr_segs, loff_t *ppos) | 2283 | * @iocb: IO state structure (file, offset, etc.) |
| 2284 | * @iov: vector with data to write | ||
| 2285 | * @nr_segs: number of segments in the vector | ||
| 2286 | * @ppos: position where to write | ||
| 2287 | * | ||
| 2288 | * This function does all the work needed for actually writing data to a | ||
| 2289 | * file. It does all basic checks, removes SUID from the file, updates | ||
| 2290 | * modification times and calls proper subroutines depending on whether we | ||
| 2291 | * do direct IO or a standard buffered write. | ||
| 2292 | * | ||
| 2293 | * It expects i_mutex to be grabbed unless we work on a block device or similar | ||
| 2294 | * object which does not need locking at all. | ||
| 2295 | * | ||
| 2296 | * This function does *not* take care of syncing data in case of O_SYNC write. | ||
| 2297 | * A caller has to handle it. This is mainly due to the fact that we want to | ||
| 2298 | * avoid syncing under i_mutex. | ||
| 2299 | */ | ||
| 2300 | ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | ||
| 2301 | unsigned long nr_segs, loff_t *ppos) | ||
| 2354 | { | 2302 | { |
| 2355 | struct file *file = iocb->ki_filp; | 2303 | struct file *file = iocb->ki_filp; |
| 2356 | struct address_space * mapping = file->f_mapping; | 2304 | struct address_space * mapping = file->f_mapping; |
| @@ -2447,51 +2395,37 @@ out: | |||
| 2447 | current->backing_dev_info = NULL; | 2395 | current->backing_dev_info = NULL; |
| 2448 | return written ? written : err; | 2396 | return written ? written : err; |
| 2449 | } | 2397 | } |
| 2398 | EXPORT_SYMBOL(__generic_file_aio_write); | ||
| 2450 | 2399 | ||
| 2451 | ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, | 2400 | /** |
| 2452 | const struct iovec *iov, unsigned long nr_segs, loff_t pos) | 2401 | * generic_file_aio_write - write data to a file |
| 2453 | { | 2402 | * @iocb: IO state structure |
| 2454 | struct file *file = iocb->ki_filp; | 2403 | * @iov: vector with data to write |
| 2455 | struct address_space *mapping = file->f_mapping; | 2404 | * @nr_segs: number of segments in the vector |
| 2456 | struct inode *inode = mapping->host; | 2405 | * @pos: position in file where to write |
| 2457 | ssize_t ret; | 2406 | * |
| 2458 | 2407 | * This is a wrapper around __generic_file_aio_write() to be used by most | |
| 2459 | BUG_ON(iocb->ki_pos != pos); | 2408 | * filesystems. It takes care of syncing the file in case of O_SYNC file |
| 2460 | 2409 | * and acquires i_mutex as needed. | |
| 2461 | ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, | 2410 | */ |
| 2462 | &iocb->ki_pos); | ||
| 2463 | |||
| 2464 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
| 2465 | ssize_t err; | ||
| 2466 | |||
| 2467 | err = sync_page_range_nolock(inode, mapping, pos, ret); | ||
| 2468 | if (err < 0) | ||
| 2469 | ret = err; | ||
| 2470 | } | ||
| 2471 | return ret; | ||
| 2472 | } | ||
| 2473 | EXPORT_SYMBOL(generic_file_aio_write_nolock); | ||
| 2474 | |||
| 2475 | ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2411 | ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, |
| 2476 | unsigned long nr_segs, loff_t pos) | 2412 | unsigned long nr_segs, loff_t pos) |
| 2477 | { | 2413 | { |
| 2478 | struct file *file = iocb->ki_filp; | 2414 | struct file *file = iocb->ki_filp; |
| 2479 | struct address_space *mapping = file->f_mapping; | 2415 | struct inode *inode = file->f_mapping->host; |
| 2480 | struct inode *inode = mapping->host; | ||
| 2481 | ssize_t ret; | 2416 | ssize_t ret; |
| 2482 | 2417 | ||
| 2483 | BUG_ON(iocb->ki_pos != pos); | 2418 | BUG_ON(iocb->ki_pos != pos); |
| 2484 | 2419 | ||
| 2485 | mutex_lock(&inode->i_mutex); | 2420 | mutex_lock(&inode->i_mutex); |
| 2486 | ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, | 2421 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
| 2487 | &iocb->ki_pos); | ||
| 2488 | mutex_unlock(&inode->i_mutex); | 2422 | mutex_unlock(&inode->i_mutex); |
| 2489 | 2423 | ||
| 2490 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2424 | if (ret > 0 || ret == -EIOCBQUEUED) { |
| 2491 | ssize_t err; | 2425 | ssize_t err; |
| 2492 | 2426 | ||
| 2493 | err = sync_page_range(inode, mapping, pos, ret); | 2427 | err = generic_write_sync(file, pos, ret); |
| 2494 | if (err < 0) | 2428 | if (err < 0 && ret > 0) |
| 2495 | ret = err; | 2429 | ret = err; |
| 2496 | } | 2430 | } |
| 2497 | return ret; | 2431 | return ret; |
