aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c105
1 files changed, 77 insertions, 28 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 807a463fd5ed..3277f3b23524 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -9,7 +9,6 @@
9 * most "normal" filesystems (but you don't /have/ to use this: 9 * most "normal" filesystems (but you don't /have/ to use this:
10 * the NFS filesystem used to do this differently, for example) 10 * the NFS filesystem used to do this differently, for example)
11 */ 11 */
12#include <linux/config.h>
13#include <linux/module.h> 12#include <linux/module.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
15#include <linux/compiler.h> 14#include <linux/compiler.h>
@@ -120,7 +119,7 @@ void __remove_from_page_cache(struct page *page)
120 radix_tree_delete(&mapping->page_tree, page->index); 119 radix_tree_delete(&mapping->page_tree, page->index);
121 page->mapping = NULL; 120 page->mapping = NULL;
122 mapping->nrpages--; 121 mapping->nrpages--;
123 pagecache_acct(-1); 122 __dec_zone_page_state(page, NR_FILE_PAGES);
124} 123}
125 124
126void remove_from_page_cache(struct page *page) 125void remove_from_page_cache(struct page *page)
@@ -449,7 +448,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
449 page->mapping = mapping; 448 page->mapping = mapping;
450 page->index = offset; 449 page->index = offset;
451 mapping->nrpages++; 450 mapping->nrpages++;
452 pagecache_acct(1); 451 __inc_zone_page_state(page, NR_FILE_PAGES);
453 } 452 }
454 write_unlock_irq(&mapping->tree_lock); 453 write_unlock_irq(&mapping->tree_lock);
455 radix_tree_preload_end(); 454 radix_tree_preload_end();
@@ -489,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x)
489EXPORT_SYMBOL(page_cache_alloc_cold); 488EXPORT_SYMBOL(page_cache_alloc_cold);
490#endif 489#endif
491 490
491static int __sleep_on_page_lock(void *word)
492{
493 io_schedule();
494 return 0;
495}
496
492/* 497/*
493 * In order to wait for pages to become available there must be 498 * In order to wait for pages to become available there must be
494 * waitqueues associated with pages. By using a hash table of 499 * waitqueues associated with pages. By using a hash table of
@@ -578,13 +583,24 @@ void fastcall __lock_page(struct page *page)
578} 583}
579EXPORT_SYMBOL(__lock_page); 584EXPORT_SYMBOL(__lock_page);
580 585
586/*
587 * Variant of lock_page that does not require the caller to hold a reference
588 * on the page's mapping.
589 */
590void fastcall __lock_page_nosync(struct page *page)
591{
592 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
593 __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
594 TASK_UNINTERRUPTIBLE);
595}
596
581/** 597/**
582 * find_get_page - find and get a page reference 598 * find_get_page - find and get a page reference
583 * @mapping: the address_space to search 599 * @mapping: the address_space to search
584 * @offset: the page index 600 * @offset: the page index
585 * 601 *
586 * A rather lightweight function, finding and getting a reference to a 602 * Is there a pagecache struct page at the given (mapping, offset) tuple?
587 * hashed page atomically. 603 * If yes, increment its refcount and return it; if no, return NULL.
588 */ 604 */
589struct page * find_get_page(struct address_space *mapping, unsigned long offset) 605struct page * find_get_page(struct address_space *mapping, unsigned long offset)
590{ 606{
@@ -828,6 +844,30 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
828} 844}
829EXPORT_SYMBOL(grab_cache_page_nowait); 845EXPORT_SYMBOL(grab_cache_page_nowait);
830 846
847/*
848 * CD/DVDs are error prone. When a medium error occurs, the driver may fail
849 * a _large_ part of the i/o request. Imagine the worst scenario:
850 *
851 * ---R__________________________________________B__________
852 * ^ reading here ^ bad block(assume 4k)
853 *
854 * read(R) => miss => readahead(R...B) => media error => frustrating retries
855 * => failing the whole request => read(R) => read(R+1) =>
856 * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) =>
857 * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) =>
858 * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ......
859 *
860 * It is going insane. Fix it by quickly scaling down the readahead size.
861 */
862static void shrink_readahead_size_eio(struct file *filp,
863 struct file_ra_state *ra)
864{
865 if (!ra->ra_pages)
866 return;
867
868 ra->ra_pages /= 4;
869}
870
831/** 871/**
832 * do_generic_mapping_read - generic file read routine 872 * do_generic_mapping_read - generic file read routine
833 * @mapping: address_space to be read 873 * @mapping: address_space to be read
@@ -947,7 +987,7 @@ page_not_up_to_date:
947 /* Get exclusive access to the page ... */ 987 /* Get exclusive access to the page ... */
948 lock_page(page); 988 lock_page(page);
949 989
950 /* Did it get unhashed before we got the lock? */ 990 /* Did it get truncated before we got the lock? */
951 if (!page->mapping) { 991 if (!page->mapping) {
952 unlock_page(page); 992 unlock_page(page);
953 page_cache_release(page); 993 page_cache_release(page);
@@ -985,6 +1025,7 @@ readpage:
985 } 1025 }
986 unlock_page(page); 1026 unlock_page(page);
987 error = -EIO; 1027 error = -EIO;
1028 shrink_readahead_size_eio(filp, &ra);
988 goto readpage_error; 1029 goto readpage_error;
989 } 1030 }
990 unlock_page(page); 1031 unlock_page(page);
@@ -1389,7 +1430,7 @@ retry_find:
1389 */ 1430 */
1390 if (!did_readaround) { 1431 if (!did_readaround) {
1391 majmin = VM_FAULT_MAJOR; 1432 majmin = VM_FAULT_MAJOR;
1392 inc_page_state(pgmajfault); 1433 count_vm_event(PGMAJFAULT);
1393 } 1434 }
1394 did_readaround = 1; 1435 did_readaround = 1;
1395 ra_pages = max_sane_readahead(file->f_ra.ra_pages); 1436 ra_pages = max_sane_readahead(file->f_ra.ra_pages);
@@ -1430,7 +1471,7 @@ outside_data_content:
1430 * accessible.. 1471 * accessible..
1431 */ 1472 */
1432 if (area->vm_mm == current->mm) 1473 if (area->vm_mm == current->mm)
1433 return NULL; 1474 return NOPAGE_SIGBUS;
1434 /* Fall through to the non-read-ahead case */ 1475 /* Fall through to the non-read-ahead case */
1435no_cached_page: 1476no_cached_page:
1436 /* 1477 /*
@@ -1455,12 +1496,12 @@ no_cached_page:
1455 */ 1496 */
1456 if (error == -ENOMEM) 1497 if (error == -ENOMEM)
1457 return NOPAGE_OOM; 1498 return NOPAGE_OOM;
1458 return NULL; 1499 return NOPAGE_SIGBUS;
1459 1500
1460page_not_uptodate: 1501page_not_uptodate:
1461 if (!did_readaround) { 1502 if (!did_readaround) {
1462 majmin = VM_FAULT_MAJOR; 1503 majmin = VM_FAULT_MAJOR;
1463 inc_page_state(pgmajfault); 1504 count_vm_event(PGMAJFAULT);
1464 } 1505 }
1465 lock_page(page); 1506 lock_page(page);
1466 1507
@@ -1522,8 +1563,9 @@ page_not_uptodate:
1522 * Things didn't work out. Return zero to tell the 1563 * Things didn't work out. Return zero to tell the
1523 * mm layer so, possibly freeing the page cache page first. 1564 * mm layer so, possibly freeing the page cache page first.
1524 */ 1565 */
1566 shrink_readahead_size_eio(file, ra);
1525 page_cache_release(page); 1567 page_cache_release(page);
1526 return NULL; 1568 return NOPAGE_SIGBUS;
1527} 1569}
1528EXPORT_SYMBOL(filemap_nopage); 1570EXPORT_SYMBOL(filemap_nopage);
1529 1571
@@ -1585,7 +1627,7 @@ no_cached_page:
1585page_not_uptodate: 1627page_not_uptodate:
1586 lock_page(page); 1628 lock_page(page);
1587 1629
1588 /* Did it get unhashed while we waited for it? */ 1630 /* Did it get truncated while we waited for it? */
1589 if (!page->mapping) { 1631 if (!page->mapping) {
1590 unlock_page(page); 1632 unlock_page(page);
1591 goto err; 1633 goto err;
@@ -1892,7 +1934,7 @@ int remove_suid(struct dentry *dentry)
1892EXPORT_SYMBOL(remove_suid); 1934EXPORT_SYMBOL(remove_suid);
1893 1935
1894size_t 1936size_t
1895__filemap_copy_from_user_iovec(char *vaddr, 1937__filemap_copy_from_user_iovec_inatomic(char *vaddr,
1896 const struct iovec *iov, size_t base, size_t bytes) 1938 const struct iovec *iov, size_t base, size_t bytes)
1897{ 1939{
1898 size_t copied = 0, left = 0; 1940 size_t copied = 0, left = 0;
@@ -1908,12 +1950,8 @@ __filemap_copy_from_user_iovec(char *vaddr,
1908 vaddr += copy; 1950 vaddr += copy;
1909 iov++; 1951 iov++;
1910 1952
1911 if (unlikely(left)) { 1953 if (unlikely(left))
1912 /* zero the rest of the target like __copy_from_user */
1913 if (bytes)
1914 memset(vaddr, 0, bytes);
1915 break; 1954 break;
1916 }
1917 } 1955 }
1918 return copied - left; 1956 return copied - left;
1919} 1957}
@@ -2045,7 +2083,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2045{ 2083{
2046 struct file *file = iocb->ki_filp; 2084 struct file *file = iocb->ki_filp;
2047 struct address_space * mapping = file->f_mapping; 2085 struct address_space * mapping = file->f_mapping;
2048 struct address_space_operations *a_ops = mapping->a_ops; 2086 const struct address_space_operations *a_ops = mapping->a_ops;
2049 struct inode *inode = mapping->host; 2087 struct inode *inode = mapping->host;
2050 long status = 0; 2088 long status = 0;
2051 struct page *page; 2089 struct page *page;
@@ -2071,14 +2109,21 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2071 do { 2109 do {
2072 unsigned long index; 2110 unsigned long index;
2073 unsigned long offset; 2111 unsigned long offset;
2074 unsigned long maxlen;
2075 size_t copied; 2112 size_t copied;
2076 2113
2077 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 2114 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
2078 index = pos >> PAGE_CACHE_SHIFT; 2115 index = pos >> PAGE_CACHE_SHIFT;
2079 bytes = PAGE_CACHE_SIZE - offset; 2116 bytes = PAGE_CACHE_SIZE - offset;
2080 if (bytes > count) 2117
2081 bytes = count; 2118 /* Limit the size of the copy to the caller's write size */
2119 bytes = min(bytes, count);
2120
2121 /*
2122 * Limit the size of the copy to that of the current segment,
2123 * because fault_in_pages_readable() doesn't know how to walk
2124 * segments.
2125 */
2126 bytes = min(bytes, cur_iov->iov_len - iov_base);
2082 2127
2083 /* 2128 /*
2084 * Bring in the user page that we will copy from _first_. 2129 * Bring in the user page that we will copy from _first_.
@@ -2086,10 +2131,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2086 * same page as we're writing to, without it being marked 2131 * same page as we're writing to, without it being marked
2087 * up-to-date. 2132 * up-to-date.
2088 */ 2133 */
2089 maxlen = cur_iov->iov_len - iov_base; 2134 fault_in_pages_readable(buf, bytes);
2090 if (maxlen > bytes)
2091 maxlen = bytes;
2092 fault_in_pages_readable(buf, maxlen);
2093 2135
2094 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); 2136 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
2095 if (!page) { 2137 if (!page) {
@@ -2097,6 +2139,12 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2097 break; 2139 break;
2098 } 2140 }
2099 2141
2142 if (unlikely(bytes == 0)) {
2143 status = 0;
2144 copied = 0;
2145 goto zero_length_segment;
2146 }
2147
2100 status = a_ops->prepare_write(file, page, offset, offset+bytes); 2148 status = a_ops->prepare_write(file, page, offset, offset+bytes);
2101 if (unlikely(status)) { 2149 if (unlikely(status)) {
2102 loff_t isize = i_size_read(inode); 2150 loff_t isize = i_size_read(inode);
@@ -2126,7 +2174,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2126 page_cache_release(page); 2174 page_cache_release(page);
2127 continue; 2175 continue;
2128 } 2176 }
2129 if (likely(copied > 0)) { 2177zero_length_segment:
2178 if (likely(copied >= 0)) {
2130 if (!status) 2179 if (!status)
2131 status = copied; 2180 status = copied;
2132 2181
@@ -2191,7 +2240,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
2191 unsigned long nr_segs, loff_t *ppos) 2240 unsigned long nr_segs, loff_t *ppos)
2192{ 2241{
2193 struct file *file = iocb->ki_filp; 2242 struct file *file = iocb->ki_filp;
2194 struct address_space * mapping = file->f_mapping; 2243 const struct address_space * mapping = file->f_mapping;
2195 size_t ocount; /* original count */ 2244 size_t ocount; /* original count */
2196 size_t count; /* after file limit checks */ 2245 size_t count; /* after file limit checks */
2197 struct inode *inode = mapping->host; 2246 struct inode *inode = mapping->host;