diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 182 |
1 files changed, 62 insertions, 120 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index ccea3b665c12..ef169f37156d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -39,11 +39,10 @@ | |||
39 | /* | 39 | /* |
40 | * FIXME: remove all knowledge of the buffer layer from the core VM | 40 | * FIXME: remove all knowledge of the buffer layer from the core VM |
41 | */ | 41 | */ |
42 | #include <linux/buffer_head.h> /* for generic_osync_inode */ | 42 | #include <linux/buffer_head.h> /* for try_to_free_buffers */ |
43 | 43 | ||
44 | #include <asm/mman.h> | 44 | #include <asm/mman.h> |
45 | 45 | ||
46 | |||
47 | /* | 46 | /* |
48 | * Shared mappings implemented 30.11.1994. It's not fully working yet, | 47 | * Shared mappings implemented 30.11.1994. It's not fully working yet, |
49 | * though. | 48 | * though. |
@@ -59,7 +58,7 @@ | |||
59 | /* | 58 | /* |
60 | * Lock ordering: | 59 | * Lock ordering: |
61 | * | 60 | * |
62 | * ->i_mmap_lock (vmtruncate) | 61 | * ->i_mmap_lock (truncate_pagecache) |
63 | * ->private_lock (__free_pte->__set_page_dirty_buffers) | 62 | * ->private_lock (__free_pte->__set_page_dirty_buffers) |
64 | * ->swap_lock (exclusive_swap_page, others) | 63 | * ->swap_lock (exclusive_swap_page, others) |
65 | * ->mapping->tree_lock | 64 | * ->mapping->tree_lock |
@@ -105,6 +104,10 @@ | |||
105 | * | 104 | * |
106 | * ->task->proc_lock | 105 | * ->task->proc_lock |
107 | * ->dcache_lock (proc_pid_lookup) | 106 | * ->dcache_lock (proc_pid_lookup) |
107 | * | ||
108 | * (code doesn't rely on that order, so you could switch it around) | ||
109 | * ->tasklist_lock (memory_failure, collect_procs_ao) | ||
110 | * ->i_mmap_lock | ||
108 | */ | 111 | */ |
109 | 112 | ||
110 | /* | 113 | /* |
@@ -120,6 +123,8 @@ void __remove_from_page_cache(struct page *page) | |||
120 | page->mapping = NULL; | 123 | page->mapping = NULL; |
121 | mapping->nrpages--; | 124 | mapping->nrpages--; |
122 | __dec_zone_page_state(page, NR_FILE_PAGES); | 125 | __dec_zone_page_state(page, NR_FILE_PAGES); |
126 | if (PageSwapBacked(page)) | ||
127 | __dec_zone_page_state(page, NR_SHMEM); | ||
123 | BUG_ON(page_mapped(page)); | 128 | BUG_ON(page_mapped(page)); |
124 | 129 | ||
125 | /* | 130 | /* |
@@ -307,68 +312,24 @@ int wait_on_page_writeback_range(struct address_space *mapping, | |||
307 | } | 312 | } |
308 | 313 | ||
309 | /** | 314 | /** |
310 | * sync_page_range - write and wait on all pages in the passed range | 315 | * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range |
311 | * @inode: target inode | 316 | * @mapping: address space structure to wait for |
312 | * @mapping: target address_space | 317 | * @start: offset in bytes where the range starts |
313 | * @pos: beginning offset in pages to write | 318 | * @end: offset in bytes where the range ends (inclusive) |
314 | * @count: number of bytes to write | ||
315 | * | ||
316 | * Write and wait upon all the pages in the passed range. This is a "data | ||
317 | * integrity" operation. It waits upon in-flight writeout before starting and | ||
318 | * waiting upon new writeout. If there was an IO error, return it. | ||
319 | * | 319 | * |
320 | * We need to re-take i_mutex during the generic_osync_inode list walk because | 320 | * Walk the list of under-writeback pages of the given address space |
321 | * it is otherwise livelockable. | 321 | * in the given range and wait for all of them. |
322 | */ | ||
323 | int sync_page_range(struct inode *inode, struct address_space *mapping, | ||
324 | loff_t pos, loff_t count) | ||
325 | { | ||
326 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | ||
327 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | ||
328 | int ret; | ||
329 | |||
330 | if (!mapping_cap_writeback_dirty(mapping) || !count) | ||
331 | return 0; | ||
332 | ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); | ||
333 | if (ret == 0) { | ||
334 | mutex_lock(&inode->i_mutex); | ||
335 | ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
336 | mutex_unlock(&inode->i_mutex); | ||
337 | } | ||
338 | if (ret == 0) | ||
339 | ret = wait_on_page_writeback_range(mapping, start, end); | ||
340 | return ret; | ||
341 | } | ||
342 | EXPORT_SYMBOL(sync_page_range); | ||
343 | |||
344 | /** | ||
345 | * sync_page_range_nolock - write & wait on all pages in the passed range without locking | ||
346 | * @inode: target inode | ||
347 | * @mapping: target address_space | ||
348 | * @pos: beginning offset in pages to write | ||
349 | * @count: number of bytes to write | ||
350 | * | 322 | * |
351 | * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea | 323 | * This is just a simple wrapper so that callers don't have to convert offsets |
352 | * as it forces O_SYNC writers to different parts of the same file | 324 | * to page indexes themselves |
353 | * to be serialised right until io completion. | ||
354 | */ | 325 | */ |
355 | int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, | 326 | int filemap_fdatawait_range(struct address_space *mapping, loff_t start, |
356 | loff_t pos, loff_t count) | 327 | loff_t end) |
357 | { | 328 | { |
358 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | 329 | return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT, |
359 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | 330 | end >> PAGE_CACHE_SHIFT); |
360 | int ret; | ||
361 | |||
362 | if (!mapping_cap_writeback_dirty(mapping) || !count) | ||
363 | return 0; | ||
364 | ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); | ||
365 | if (ret == 0) | ||
366 | ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
367 | if (ret == 0) | ||
368 | ret = wait_on_page_writeback_range(mapping, start, end); | ||
369 | return ret; | ||
370 | } | 331 | } |
371 | EXPORT_SYMBOL(sync_page_range_nolock); | 332 | EXPORT_SYMBOL(filemap_fdatawait_range); |
372 | 333 | ||
373 | /** | 334 | /** |
374 | * filemap_fdatawait - wait for all under-writeback pages to complete | 335 | * filemap_fdatawait - wait for all under-writeback pages to complete |
@@ -476,6 +437,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
476 | if (likely(!error)) { | 437 | if (likely(!error)) { |
477 | mapping->nrpages++; | 438 | mapping->nrpages++; |
478 | __inc_zone_page_state(page, NR_FILE_PAGES); | 439 | __inc_zone_page_state(page, NR_FILE_PAGES); |
440 | if (PageSwapBacked(page)) | ||
441 | __inc_zone_page_state(page, NR_SHMEM); | ||
479 | spin_unlock_irq(&mapping->tree_lock); | 442 | spin_unlock_irq(&mapping->tree_lock); |
480 | } else { | 443 | } else { |
481 | page->mapping = NULL; | 444 | page->mapping = NULL; |
@@ -1648,7 +1611,7 @@ page_not_uptodate: | |||
1648 | } | 1611 | } |
1649 | EXPORT_SYMBOL(filemap_fault); | 1612 | EXPORT_SYMBOL(filemap_fault); |
1650 | 1613 | ||
1651 | struct vm_operations_struct generic_file_vm_ops = { | 1614 | const struct vm_operations_struct generic_file_vm_ops = { |
1652 | .fault = filemap_fault, | 1615 | .fault = filemap_fault, |
1653 | }; | 1616 | }; |
1654 | 1617 | ||
@@ -2167,20 +2130,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
2167 | } | 2130 | } |
2168 | *ppos = end; | 2131 | *ppos = end; |
2169 | } | 2132 | } |
2170 | |||
2171 | /* | ||
2172 | * Sync the fs metadata but not the minor inode changes and | ||
2173 | * of course not the data as we did direct DMA for the IO. | ||
2174 | * i_mutex is held, which protects generic_osync_inode() from | ||
2175 | * livelocking. AIO O_DIRECT ops attempt to sync metadata here. | ||
2176 | */ | ||
2177 | out: | 2133 | out: |
2178 | if ((written >= 0 || written == -EIOCBQUEUED) && | ||
2179 | ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2180 | int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
2181 | if (err < 0) | ||
2182 | written = err; | ||
2183 | } | ||
2184 | return written; | 2134 | return written; |
2185 | } | 2135 | } |
2186 | EXPORT_SYMBOL(generic_file_direct_write); | 2136 | EXPORT_SYMBOL(generic_file_direct_write); |
@@ -2312,8 +2262,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2312 | { | 2262 | { |
2313 | struct file *file = iocb->ki_filp; | 2263 | struct file *file = iocb->ki_filp; |
2314 | struct address_space *mapping = file->f_mapping; | 2264 | struct address_space *mapping = file->f_mapping; |
2315 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
2316 | struct inode *inode = mapping->host; | ||
2317 | ssize_t status; | 2265 | ssize_t status; |
2318 | struct iov_iter i; | 2266 | struct iov_iter i; |
2319 | 2267 | ||
@@ -2323,16 +2271,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2323 | if (likely(status >= 0)) { | 2271 | if (likely(status >= 0)) { |
2324 | written += status; | 2272 | written += status; |
2325 | *ppos = pos + status; | 2273 | *ppos = pos + status; |
2326 | |||
2327 | /* | ||
2328 | * For now, when the user asks for O_SYNC, we'll actually give | ||
2329 | * O_DSYNC | ||
2330 | */ | ||
2331 | if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2332 | if (!a_ops->writepage || !is_sync_kiocb(iocb)) | ||
2333 | status = generic_osync_inode(inode, mapping, | ||
2334 | OSYNC_METADATA|OSYNC_DATA); | ||
2335 | } | ||
2336 | } | 2274 | } |
2337 | 2275 | ||
2338 | /* | 2276 | /* |
@@ -2348,9 +2286,27 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2348 | } | 2286 | } |
2349 | EXPORT_SYMBOL(generic_file_buffered_write); | 2287 | EXPORT_SYMBOL(generic_file_buffered_write); |
2350 | 2288 | ||
2351 | static ssize_t | 2289 | /** |
2352 | __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | 2290 | * __generic_file_aio_write - write data to a file |
2353 | unsigned long nr_segs, loff_t *ppos) | 2291 | * @iocb: IO state structure (file, offset, etc.) |
2292 | * @iov: vector with data to write | ||
2293 | * @nr_segs: number of segments in the vector | ||
2294 | * @ppos: position where to write | ||
2295 | * | ||
2296 | * This function does all the work needed for actually writing data to a | ||
2297 | * file. It does all basic checks, removes SUID from the file, updates | ||
2298 | * modification times and calls proper subroutines depending on whether we | ||
2299 | * do direct IO or a standard buffered write. | ||
2300 | * | ||
2301 | * It expects i_mutex to be grabbed unless we work on a block device or similar | ||
2302 | * object which does not need locking at all. | ||
2303 | * | ||
2304 | * This function does *not* take care of syncing data in case of O_SYNC write. | ||
2305 | * A caller has to handle it. This is mainly due to the fact that we want to | ||
2306 | * avoid syncing under i_mutex. | ||
2307 | */ | ||
2308 | ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | ||
2309 | unsigned long nr_segs, loff_t *ppos) | ||
2354 | { | 2310 | { |
2355 | struct file *file = iocb->ki_filp; | 2311 | struct file *file = iocb->ki_filp; |
2356 | struct address_space * mapping = file->f_mapping; | 2312 | struct address_space * mapping = file->f_mapping; |
@@ -2447,51 +2403,37 @@ out: | |||
2447 | current->backing_dev_info = NULL; | 2403 | current->backing_dev_info = NULL; |
2448 | return written ? written : err; | 2404 | return written ? written : err; |
2449 | } | 2405 | } |
2406 | EXPORT_SYMBOL(__generic_file_aio_write); | ||
2450 | 2407 | ||
2451 | ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, | 2408 | /** |
2452 | const struct iovec *iov, unsigned long nr_segs, loff_t pos) | 2409 | * generic_file_aio_write - write data to a file |
2453 | { | 2410 | * @iocb: IO state structure |
2454 | struct file *file = iocb->ki_filp; | 2411 | * @iov: vector with data to write |
2455 | struct address_space *mapping = file->f_mapping; | 2412 | * @nr_segs: number of segments in the vector |
2456 | struct inode *inode = mapping->host; | 2413 | * @pos: position in file where to write |
2457 | ssize_t ret; | 2414 | * |
2458 | 2415 | * This is a wrapper around __generic_file_aio_write() to be used by most | |
2459 | BUG_ON(iocb->ki_pos != pos); | 2416 | * filesystems. It takes care of syncing the file in case of O_SYNC file |
2460 | 2417 | * and acquires i_mutex as needed. | |
2461 | ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, | 2418 | */ |
2462 | &iocb->ki_pos); | ||
2463 | |||
2464 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2465 | ssize_t err; | ||
2466 | |||
2467 | err = sync_page_range_nolock(inode, mapping, pos, ret); | ||
2468 | if (err < 0) | ||
2469 | ret = err; | ||
2470 | } | ||
2471 | return ret; | ||
2472 | } | ||
2473 | EXPORT_SYMBOL(generic_file_aio_write_nolock); | ||
2474 | |||
2475 | ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2419 | ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, |
2476 | unsigned long nr_segs, loff_t pos) | 2420 | unsigned long nr_segs, loff_t pos) |
2477 | { | 2421 | { |
2478 | struct file *file = iocb->ki_filp; | 2422 | struct file *file = iocb->ki_filp; |
2479 | struct address_space *mapping = file->f_mapping; | 2423 | struct inode *inode = file->f_mapping->host; |
2480 | struct inode *inode = mapping->host; | ||
2481 | ssize_t ret; | 2424 | ssize_t ret; |
2482 | 2425 | ||
2483 | BUG_ON(iocb->ki_pos != pos); | 2426 | BUG_ON(iocb->ki_pos != pos); |
2484 | 2427 | ||
2485 | mutex_lock(&inode->i_mutex); | 2428 | mutex_lock(&inode->i_mutex); |
2486 | ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, | 2429 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
2487 | &iocb->ki_pos); | ||
2488 | mutex_unlock(&inode->i_mutex); | 2430 | mutex_unlock(&inode->i_mutex); |
2489 | 2431 | ||
2490 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2432 | if (ret > 0 || ret == -EIOCBQUEUED) { |
2491 | ssize_t err; | 2433 | ssize_t err; |
2492 | 2434 | ||
2493 | err = sync_page_range(inode, mapping, pos, ret); | 2435 | err = generic_write_sync(file, pos, ret); |
2494 | if (err < 0) | 2436 | if (err < 0 && ret > 0) |
2495 | ret = err; | 2437 | ret = err; |
2496 | } | 2438 | } |
2497 | return ret; | 2439 | return ret; |