aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2006-01-12 17:47:08 -0500
committerSteve French <sfrench@us.ibm.com>2006-01-12 17:47:08 -0500
commit94bc2be31a01a3055ec94176e595dfe208e92d3b (patch)
treeebfbe81c6718a6390bfa1b99c6d228237d818576 /mm/filemap.c
parentc32a0b689cb9cc160cfcd19735bbf50bb70c6ef4 (diff)
parent58cba4650a7a414eabd2b40cc9d8e45fcdf192d9 (diff)
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c159
1 files changed, 101 insertions, 58 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 33a28bfde158..a965b6b35f26 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -15,6 +15,7 @@
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/aio.h> 17#include <linux/aio.h>
18#include <linux/capability.h>
18#include <linux/kernel_stat.h> 19#include <linux/kernel_stat.h>
19#include <linux/mm.h> 20#include <linux/mm.h>
20#include <linux/swap.h> 21#include <linux/swap.h>
@@ -61,7 +62,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
61 * ->swap_lock (exclusive_swap_page, others) 62 * ->swap_lock (exclusive_swap_page, others)
62 * ->mapping->tree_lock 63 * ->mapping->tree_lock
63 * 64 *
64 * ->i_sem 65 * ->i_mutex
65 * ->i_mmap_lock (truncate->unmap_mapping_range) 66 * ->i_mmap_lock (truncate->unmap_mapping_range)
66 * 67 *
67 * ->mmap_sem 68 * ->mmap_sem
@@ -73,9 +74,9 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
73 * ->lock_page (access_process_vm) 74 * ->lock_page (access_process_vm)
74 * 75 *
75 * ->mmap_sem 76 * ->mmap_sem
76 * ->i_sem (msync) 77 * ->i_mutex (msync)
77 * 78 *
78 * ->i_sem 79 * ->i_mutex
79 * ->i_alloc_sem (various) 80 * ->i_alloc_sem (various)
80 * 81 *
81 * ->inode_lock 82 * ->inode_lock
@@ -276,11 +277,11 @@ static int wait_on_page_writeback_range(struct address_space *mapping,
276 * integrity" operation. It waits upon in-flight writeout before starting and 277 * integrity" operation. It waits upon in-flight writeout before starting and
277 * waiting upon new writeout. If there was an IO error, return it. 278 * waiting upon new writeout. If there was an IO error, return it.
278 * 279 *
279 * We need to re-take i_sem during the generic_osync_inode list walk because 280 * We need to re-take i_mutex during the generic_osync_inode list walk because
280 * it is otherwise livelockable. 281 * it is otherwise livelockable.
281 */ 282 */
282int sync_page_range(struct inode *inode, struct address_space *mapping, 283int sync_page_range(struct inode *inode, struct address_space *mapping,
283 loff_t pos, size_t count) 284 loff_t pos, loff_t count)
284{ 285{
285 pgoff_t start = pos >> PAGE_CACHE_SHIFT; 286 pgoff_t start = pos >> PAGE_CACHE_SHIFT;
286 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; 287 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
@@ -290,9 +291,9 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
290 return 0; 291 return 0;
291 ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); 292 ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
292 if (ret == 0) { 293 if (ret == 0) {
293 down(&inode->i_sem); 294 mutex_lock(&inode->i_mutex);
294 ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); 295 ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
295 up(&inode->i_sem); 296 mutex_unlock(&inode->i_mutex);
296 } 297 }
297 if (ret == 0) 298 if (ret == 0)
298 ret = wait_on_page_writeback_range(mapping, start, end); 299 ret = wait_on_page_writeback_range(mapping, start, end);
@@ -301,13 +302,12 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
301EXPORT_SYMBOL(sync_page_range); 302EXPORT_SYMBOL(sync_page_range);
302 303
303/* 304/*
304 * Note: Holding i_sem across sync_page_range_nolock is not a good idea 305 * Note: Holding i_mutex across sync_page_range_nolock is not a good idea
305 * as it forces O_SYNC writers to different parts of the same file 306 * as it forces O_SYNC writers to different parts of the same file
306 * to be serialised right until io completion. 307 * to be serialised right until io completion.
307 */ 308 */
308static int sync_page_range_nolock(struct inode *inode, 309int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
309 struct address_space *mapping, 310 loff_t pos, loff_t count)
310 loff_t pos, size_t count)
311{ 311{
312 pgoff_t start = pos >> PAGE_CACHE_SHIFT; 312 pgoff_t start = pos >> PAGE_CACHE_SHIFT;
313 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; 313 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
@@ -322,6 +322,7 @@ static int sync_page_range_nolock(struct inode *inode,
322 ret = wait_on_page_writeback_range(mapping, start, end); 322 ret = wait_on_page_writeback_range(mapping, start, end);
323 return ret; 323 return ret;
324} 324}
325EXPORT_SYMBOL(sync_page_range_nolock);
325 326
326/** 327/**
327 * filemap_fdatawait - walk the list of under-writeback pages of the given 328 * filemap_fdatawait - walk the list of under-writeback pages of the given
@@ -343,30 +344,44 @@ EXPORT_SYMBOL(filemap_fdatawait);
343 344
344int filemap_write_and_wait(struct address_space *mapping) 345int filemap_write_and_wait(struct address_space *mapping)
345{ 346{
346 int retval = 0; 347 int err = 0;
347 348
348 if (mapping->nrpages) { 349 if (mapping->nrpages) {
349 retval = filemap_fdatawrite(mapping); 350 err = filemap_fdatawrite(mapping);
350 if (retval == 0) 351 /*
351 retval = filemap_fdatawait(mapping); 352 * Even if the above returned error, the pages may be
353 * written partially (e.g. -ENOSPC), so we wait for it.
354 * But the -EIO is special case, it may indicate the worst
355 * thing (e.g. bug) happened, so we avoid waiting for it.
356 */
357 if (err != -EIO) {
358 int err2 = filemap_fdatawait(mapping);
359 if (!err)
360 err = err2;
361 }
352 } 362 }
353 return retval; 363 return err;
354} 364}
365EXPORT_SYMBOL(filemap_write_and_wait);
355 366
356int filemap_write_and_wait_range(struct address_space *mapping, 367int filemap_write_and_wait_range(struct address_space *mapping,
357 loff_t lstart, loff_t lend) 368 loff_t lstart, loff_t lend)
358{ 369{
359 int retval = 0; 370 int err = 0;
360 371
361 if (mapping->nrpages) { 372 if (mapping->nrpages) {
362 retval = __filemap_fdatawrite_range(mapping, lstart, lend, 373 err = __filemap_fdatawrite_range(mapping, lstart, lend,
363 WB_SYNC_ALL); 374 WB_SYNC_ALL);
364 if (retval == 0) 375 /* See comment of filemap_write_and_wait() */
365 retval = wait_on_page_writeback_range(mapping, 376 if (err != -EIO) {
366 lstart >> PAGE_CACHE_SHIFT, 377 int err2 = wait_on_page_writeback_range(mapping,
367 lend >> PAGE_CACHE_SHIFT); 378 lstart >> PAGE_CACHE_SHIFT,
379 lend >> PAGE_CACHE_SHIFT);
380 if (!err)
381 err = err2;
382 }
368 } 383 }
369 return retval; 384 return err;
370} 385}
371 386
372/* 387/*
@@ -555,11 +570,12 @@ repeat:
555 page_cache_get(page); 570 page_cache_get(page);
556 if (TestSetPageLocked(page)) { 571 if (TestSetPageLocked(page)) {
557 read_unlock_irq(&mapping->tree_lock); 572 read_unlock_irq(&mapping->tree_lock);
558 lock_page(page); 573 __lock_page(page);
559 read_lock_irq(&mapping->tree_lock); 574 read_lock_irq(&mapping->tree_lock);
560 575
561 /* Has the page been truncated while we slept? */ 576 /* Has the page been truncated while we slept? */
562 if (page->mapping != mapping || page->index != offset) { 577 if (unlikely(page->mapping != mapping ||
578 page->index != offset)) {
563 unlock_page(page); 579 unlock_page(page);
564 page_cache_release(page); 580 page_cache_release(page);
565 goto repeat; 581 goto repeat;
@@ -831,8 +847,13 @@ readpage:
831 /* Start the actual read. The read will unlock the page. */ 847 /* Start the actual read. The read will unlock the page. */
832 error = mapping->a_ops->readpage(filp, page); 848 error = mapping->a_ops->readpage(filp, page);
833 849
834 if (unlikely(error)) 850 if (unlikely(error)) {
851 if (error == AOP_TRUNCATED_PAGE) {
852 page_cache_release(page);
853 goto find_page;
854 }
835 goto readpage_error; 855 goto readpage_error;
856 }
836 857
837 if (!PageUptodate(page)) { 858 if (!PageUptodate(page)) {
838 lock_page(page); 859 lock_page(page);
@@ -1152,26 +1173,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
1152{ 1173{
1153 struct address_space *mapping = file->f_mapping; 1174 struct address_space *mapping = file->f_mapping;
1154 struct page *page; 1175 struct page *page;
1155 int error; 1176 int ret;
1156 1177
1157 page = page_cache_alloc_cold(mapping); 1178 do {
1158 if (!page) 1179 page = page_cache_alloc_cold(mapping);
1159 return -ENOMEM; 1180 if (!page)
1181 return -ENOMEM;
1182
1183 ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
1184 if (ret == 0)
1185 ret = mapping->a_ops->readpage(file, page);
1186 else if (ret == -EEXIST)
1187 ret = 0; /* losing race to add is OK */
1160 1188
1161 error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
1162 if (!error) {
1163 error = mapping->a_ops->readpage(file, page);
1164 page_cache_release(page); 1189 page_cache_release(page);
1165 return error;
1166 }
1167 1190
1168 /* 1191 } while (ret == AOP_TRUNCATED_PAGE);
1169 * We arrive here in the unlikely event that someone 1192
1170 * raced with us and added our page to the cache first 1193 return ret;
1171 * or we are out of memory for radix-tree nodes.
1172 */
1173 page_cache_release(page);
1174 return error == -EEXIST ? 0 : error;
1175} 1194}
1176 1195
1177#define MMAP_LOTSAMISS (100) 1196#define MMAP_LOTSAMISS (100)
@@ -1331,10 +1350,14 @@ page_not_uptodate:
1331 goto success; 1350 goto success;
1332 } 1351 }
1333 1352
1334 if (!mapping->a_ops->readpage(file, page)) { 1353 error = mapping->a_ops->readpage(file, page);
1354 if (!error) {
1335 wait_on_page_locked(page); 1355 wait_on_page_locked(page);
1336 if (PageUptodate(page)) 1356 if (PageUptodate(page))
1337 goto success; 1357 goto success;
1358 } else if (error == AOP_TRUNCATED_PAGE) {
1359 page_cache_release(page);
1360 goto retry_find;
1338 } 1361 }
1339 1362
1340 /* 1363 /*
@@ -1358,10 +1381,14 @@ page_not_uptodate:
1358 goto success; 1381 goto success;
1359 } 1382 }
1360 ClearPageError(page); 1383 ClearPageError(page);
1361 if (!mapping->a_ops->readpage(file, page)) { 1384 error = mapping->a_ops->readpage(file, page);
1385 if (!error) {
1362 wait_on_page_locked(page); 1386 wait_on_page_locked(page);
1363 if (PageUptodate(page)) 1387 if (PageUptodate(page))
1364 goto success; 1388 goto success;
1389 } else if (error == AOP_TRUNCATED_PAGE) {
1390 page_cache_release(page);
1391 goto retry_find;
1365 } 1392 }
1366 1393
1367 /* 1394 /*
@@ -1444,10 +1471,14 @@ page_not_uptodate:
1444 goto success; 1471 goto success;
1445 } 1472 }
1446 1473
1447 if (!mapping->a_ops->readpage(file, page)) { 1474 error = mapping->a_ops->readpage(file, page);
1475 if (!error) {
1448 wait_on_page_locked(page); 1476 wait_on_page_locked(page);
1449 if (PageUptodate(page)) 1477 if (PageUptodate(page))
1450 goto success; 1478 goto success;
1479 } else if (error == AOP_TRUNCATED_PAGE) {
1480 page_cache_release(page);
1481 goto retry_find;
1451 } 1482 }
1452 1483
1453 /* 1484 /*
@@ -1470,10 +1501,14 @@ page_not_uptodate:
1470 } 1501 }
1471 1502
1472 ClearPageError(page); 1503 ClearPageError(page);
1473 if (!mapping->a_ops->readpage(file, page)) { 1504 error = mapping->a_ops->readpage(file, page);
1505 if (!error) {
1474 wait_on_page_locked(page); 1506 wait_on_page_locked(page);
1475 if (PageUptodate(page)) 1507 if (PageUptodate(page))
1476 goto success; 1508 goto success;
1509 } else if (error == AOP_TRUNCATED_PAGE) {
1510 page_cache_release(page);
1511 goto retry_find;
1477 } 1512 }
1478 1513
1479 /* 1514 /*
@@ -1858,7 +1893,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
1858 /* 1893 /*
1859 * Sync the fs metadata but not the minor inode changes and 1894 * Sync the fs metadata but not the minor inode changes and
1860 * of course not the data as we did direct DMA for the IO. 1895 * of course not the data as we did direct DMA for the IO.
1861 * i_sem is held, which protects generic_osync_inode() from 1896 * i_mutex is held, which protects generic_osync_inode() from
1862 * livelocking. 1897 * livelocking.
1863 */ 1898 */
1864 if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 1899 if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
@@ -1934,12 +1969,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1934 status = a_ops->prepare_write(file, page, offset, offset+bytes); 1969 status = a_ops->prepare_write(file, page, offset, offset+bytes);
1935 if (unlikely(status)) { 1970 if (unlikely(status)) {
1936 loff_t isize = i_size_read(inode); 1971 loff_t isize = i_size_read(inode);
1972
1973 if (status != AOP_TRUNCATED_PAGE)
1974 unlock_page(page);
1975 page_cache_release(page);
1976 if (status == AOP_TRUNCATED_PAGE)
1977 continue;
1937 /* 1978 /*
1938 * prepare_write() may have instantiated a few blocks 1979 * prepare_write() may have instantiated a few blocks
1939 * outside i_size. Trim these off again. 1980 * outside i_size. Trim these off again.
1940 */ 1981 */
1941 unlock_page(page);
1942 page_cache_release(page);
1943 if (pos + bytes > isize) 1982 if (pos + bytes > isize)
1944 vmtruncate(inode, isize); 1983 vmtruncate(inode, isize);
1945 break; 1984 break;
@@ -1952,6 +1991,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1952 cur_iov, iov_base, bytes); 1991 cur_iov, iov_base, bytes);
1953 flush_dcache_page(page); 1992 flush_dcache_page(page);
1954 status = a_ops->commit_write(file, page, offset, offset+bytes); 1993 status = a_ops->commit_write(file, page, offset, offset+bytes);
1994 if (status == AOP_TRUNCATED_PAGE) {
1995 page_cache_release(page);
1996 continue;
1997 }
1955 if (likely(copied > 0)) { 1998 if (likely(copied > 0)) {
1956 if (!status) 1999 if (!status)
1957 status = copied; 2000 status = copied;
@@ -2066,7 +2109,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
2066 if (err) 2109 if (err)
2067 goto out; 2110 goto out;
2068 2111
2069 inode_update_time(inode, 1); 2112 file_update_time(file);
2070 2113
2071 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 2114 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
2072 if (unlikely(file->f_flags & O_DIRECT)) { 2115 if (unlikely(file->f_flags & O_DIRECT)) {
@@ -2153,10 +2196,10 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
2153 2196
2154 BUG_ON(iocb->ki_pos != pos); 2197 BUG_ON(iocb->ki_pos != pos);
2155 2198
2156 down(&inode->i_sem); 2199 mutex_lock(&inode->i_mutex);
2157 ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1, 2200 ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
2158 &iocb->ki_pos); 2201 &iocb->ki_pos);
2159 up(&inode->i_sem); 2202 mutex_unlock(&inode->i_mutex);
2160 2203
2161 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2204 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2162 ssize_t err; 2205 ssize_t err;
@@ -2178,9 +2221,9 @@ ssize_t generic_file_write(struct file *file, const char __user *buf,
2178 struct iovec local_iov = { .iov_base = (void __user *)buf, 2221 struct iovec local_iov = { .iov_base = (void __user *)buf,
2179 .iov_len = count }; 2222 .iov_len = count };
2180 2223
2181 down(&inode->i_sem); 2224 mutex_lock(&inode->i_mutex);
2182 ret = __generic_file_write_nolock(file, &local_iov, 1, ppos); 2225 ret = __generic_file_write_nolock(file, &local_iov, 1, ppos);
2183 up(&inode->i_sem); 2226 mutex_unlock(&inode->i_mutex);
2184 2227
2185 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2228 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2186 ssize_t err; 2229 ssize_t err;
@@ -2214,9 +2257,9 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
2214 struct inode *inode = mapping->host; 2257 struct inode *inode = mapping->host;
2215 ssize_t ret; 2258 ssize_t ret;
2216 2259
2217 down(&inode->i_sem); 2260 mutex_lock(&inode->i_mutex);
2218 ret = __generic_file_write_nolock(file, iov, nr_segs, ppos); 2261 ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
2219 up(&inode->i_sem); 2262 mutex_unlock(&inode->i_mutex);
2220 2263
2221 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2264 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2222 int err; 2265 int err;
@@ -2230,7 +2273,7 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
2230EXPORT_SYMBOL(generic_file_writev); 2273EXPORT_SYMBOL(generic_file_writev);
2231 2274
2232/* 2275/*
2233 * Called under i_sem for writes to S_ISREG files. Returns -EIO if something 2276 * Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
2234 * went wrong during pagecache shootdown. 2277 * went wrong during pagecache shootdown.
2235 */ 2278 */
2236static ssize_t 2279static ssize_t