diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 105 |
1 files changed, 77 insertions, 28 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 807a463fd5ed..3277f3b23524 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -9,7 +9,6 @@ | |||
9 | * most "normal" filesystems (but you don't /have/ to use this: | 9 | * most "normal" filesystems (but you don't /have/ to use this: |
10 | * the NFS filesystem used to do this differently, for example) | 10 | * the NFS filesystem used to do this differently, for example) |
11 | */ | 11 | */ |
12 | #include <linux/config.h> | ||
13 | #include <linux/module.h> | 12 | #include <linux/module.h> |
14 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
15 | #include <linux/compiler.h> | 14 | #include <linux/compiler.h> |
@@ -120,7 +119,7 @@ void __remove_from_page_cache(struct page *page) | |||
120 | radix_tree_delete(&mapping->page_tree, page->index); | 119 | radix_tree_delete(&mapping->page_tree, page->index); |
121 | page->mapping = NULL; | 120 | page->mapping = NULL; |
122 | mapping->nrpages--; | 121 | mapping->nrpages--; |
123 | pagecache_acct(-1); | 122 | __dec_zone_page_state(page, NR_FILE_PAGES); |
124 | } | 123 | } |
125 | 124 | ||
126 | void remove_from_page_cache(struct page *page) | 125 | void remove_from_page_cache(struct page *page) |
@@ -449,7 +448,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, | |||
449 | page->mapping = mapping; | 448 | page->mapping = mapping; |
450 | page->index = offset; | 449 | page->index = offset; |
451 | mapping->nrpages++; | 450 | mapping->nrpages++; |
452 | pagecache_acct(1); | 451 | __inc_zone_page_state(page, NR_FILE_PAGES); |
453 | } | 452 | } |
454 | write_unlock_irq(&mapping->tree_lock); | 453 | write_unlock_irq(&mapping->tree_lock); |
455 | radix_tree_preload_end(); | 454 | radix_tree_preload_end(); |
@@ -489,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x) | |||
489 | EXPORT_SYMBOL(page_cache_alloc_cold); | 488 | EXPORT_SYMBOL(page_cache_alloc_cold); |
490 | #endif | 489 | #endif |
491 | 490 | ||
491 | static int __sleep_on_page_lock(void *word) | ||
492 | { | ||
493 | io_schedule(); | ||
494 | return 0; | ||
495 | } | ||
496 | |||
492 | /* | 497 | /* |
493 | * In order to wait for pages to become available there must be | 498 | * In order to wait for pages to become available there must be |
494 | * waitqueues associated with pages. By using a hash table of | 499 | * waitqueues associated with pages. By using a hash table of |
@@ -578,13 +583,24 @@ void fastcall __lock_page(struct page *page) | |||
578 | } | 583 | } |
579 | EXPORT_SYMBOL(__lock_page); | 584 | EXPORT_SYMBOL(__lock_page); |
580 | 585 | ||
586 | /* | ||
587 | * Variant of lock_page that does not require the caller to hold a reference | ||
588 | * on the page's mapping. | ||
589 | */ | ||
590 | void fastcall __lock_page_nosync(struct page *page) | ||
591 | { | ||
592 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | ||
593 | __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, | ||
594 | TASK_UNINTERRUPTIBLE); | ||
595 | } | ||
596 | |||
581 | /** | 597 | /** |
582 | * find_get_page - find and get a page reference | 598 | * find_get_page - find and get a page reference |
583 | * @mapping: the address_space to search | 599 | * @mapping: the address_space to search |
584 | * @offset: the page index | 600 | * @offset: the page index |
585 | * | 601 | * |
586 | * A rather lightweight function, finding and getting a reference to a | 602 | * Is there a pagecache struct page at the given (mapping, offset) tuple? |
587 | * hashed page atomically. | 603 | * If yes, increment its refcount and return it; if no, return NULL. |
588 | */ | 604 | */ |
589 | struct page * find_get_page(struct address_space *mapping, unsigned long offset) | 605 | struct page * find_get_page(struct address_space *mapping, unsigned long offset) |
590 | { | 606 | { |
@@ -828,6 +844,30 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index) | |||
828 | } | 844 | } |
829 | EXPORT_SYMBOL(grab_cache_page_nowait); | 845 | EXPORT_SYMBOL(grab_cache_page_nowait); |
830 | 846 | ||
847 | /* | ||
848 | * CD/DVDs are error prone. When a medium error occurs, the driver may fail | ||
849 | * a _large_ part of the i/o request. Imagine the worst scenario: | ||
850 | * | ||
851 | * ---R__________________________________________B__________ | ||
852 | * ^ reading here ^ bad block(assume 4k) | ||
853 | * | ||
854 | * read(R) => miss => readahead(R...B) => media error => frustrating retries | ||
855 | * => failing the whole request => read(R) => read(R+1) => | ||
856 | * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) => | ||
857 | * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) => | ||
858 | * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ...... | ||
859 | * | ||
860 | * It is going insane. Fix it by quickly scaling down the readahead size. | ||
861 | */ | ||
862 | static void shrink_readahead_size_eio(struct file *filp, | ||
863 | struct file_ra_state *ra) | ||
864 | { | ||
865 | if (!ra->ra_pages) | ||
866 | return; | ||
867 | |||
868 | ra->ra_pages /= 4; | ||
869 | } | ||
870 | |||
831 | /** | 871 | /** |
832 | * do_generic_mapping_read - generic file read routine | 872 | * do_generic_mapping_read - generic file read routine |
833 | * @mapping: address_space to be read | 873 | * @mapping: address_space to be read |
@@ -947,7 +987,7 @@ page_not_up_to_date: | |||
947 | /* Get exclusive access to the page ... */ | 987 | /* Get exclusive access to the page ... */ |
948 | lock_page(page); | 988 | lock_page(page); |
949 | 989 | ||
950 | /* Did it get unhashed before we got the lock? */ | 990 | /* Did it get truncated before we got the lock? */ |
951 | if (!page->mapping) { | 991 | if (!page->mapping) { |
952 | unlock_page(page); | 992 | unlock_page(page); |
953 | page_cache_release(page); | 993 | page_cache_release(page); |
@@ -985,6 +1025,7 @@ readpage: | |||
985 | } | 1025 | } |
986 | unlock_page(page); | 1026 | unlock_page(page); |
987 | error = -EIO; | 1027 | error = -EIO; |
1028 | shrink_readahead_size_eio(filp, &ra); | ||
988 | goto readpage_error; | 1029 | goto readpage_error; |
989 | } | 1030 | } |
990 | unlock_page(page); | 1031 | unlock_page(page); |
@@ -1389,7 +1430,7 @@ retry_find: | |||
1389 | */ | 1430 | */ |
1390 | if (!did_readaround) { | 1431 | if (!did_readaround) { |
1391 | majmin = VM_FAULT_MAJOR; | 1432 | majmin = VM_FAULT_MAJOR; |
1392 | inc_page_state(pgmajfault); | 1433 | count_vm_event(PGMAJFAULT); |
1393 | } | 1434 | } |
1394 | did_readaround = 1; | 1435 | did_readaround = 1; |
1395 | ra_pages = max_sane_readahead(file->f_ra.ra_pages); | 1436 | ra_pages = max_sane_readahead(file->f_ra.ra_pages); |
@@ -1430,7 +1471,7 @@ outside_data_content: | |||
1430 | * accessible.. | 1471 | * accessible.. |
1431 | */ | 1472 | */ |
1432 | if (area->vm_mm == current->mm) | 1473 | if (area->vm_mm == current->mm) |
1433 | return NULL; | 1474 | return NOPAGE_SIGBUS; |
1434 | /* Fall through to the non-read-ahead case */ | 1475 | /* Fall through to the non-read-ahead case */ |
1435 | no_cached_page: | 1476 | no_cached_page: |
1436 | /* | 1477 | /* |
@@ -1455,12 +1496,12 @@ no_cached_page: | |||
1455 | */ | 1496 | */ |
1456 | if (error == -ENOMEM) | 1497 | if (error == -ENOMEM) |
1457 | return NOPAGE_OOM; | 1498 | return NOPAGE_OOM; |
1458 | return NULL; | 1499 | return NOPAGE_SIGBUS; |
1459 | 1500 | ||
1460 | page_not_uptodate: | 1501 | page_not_uptodate: |
1461 | if (!did_readaround) { | 1502 | if (!did_readaround) { |
1462 | majmin = VM_FAULT_MAJOR; | 1503 | majmin = VM_FAULT_MAJOR; |
1463 | inc_page_state(pgmajfault); | 1504 | count_vm_event(PGMAJFAULT); |
1464 | } | 1505 | } |
1465 | lock_page(page); | 1506 | lock_page(page); |
1466 | 1507 | ||
@@ -1522,8 +1563,9 @@ page_not_uptodate: | |||
1522 | * Things didn't work out. Return zero to tell the | 1563 | * Things didn't work out. Return zero to tell the |
1523 | * mm layer so, possibly freeing the page cache page first. | 1564 | * mm layer so, possibly freeing the page cache page first. |
1524 | */ | 1565 | */ |
1566 | shrink_readahead_size_eio(file, ra); | ||
1525 | page_cache_release(page); | 1567 | page_cache_release(page); |
1526 | return NULL; | 1568 | return NOPAGE_SIGBUS; |
1527 | } | 1569 | } |
1528 | EXPORT_SYMBOL(filemap_nopage); | 1570 | EXPORT_SYMBOL(filemap_nopage); |
1529 | 1571 | ||
@@ -1585,7 +1627,7 @@ no_cached_page: | |||
1585 | page_not_uptodate: | 1627 | page_not_uptodate: |
1586 | lock_page(page); | 1628 | lock_page(page); |
1587 | 1629 | ||
1588 | /* Did it get unhashed while we waited for it? */ | 1630 | /* Did it get truncated while we waited for it? */ |
1589 | if (!page->mapping) { | 1631 | if (!page->mapping) { |
1590 | unlock_page(page); | 1632 | unlock_page(page); |
1591 | goto err; | 1633 | goto err; |
@@ -1892,7 +1934,7 @@ int remove_suid(struct dentry *dentry) | |||
1892 | EXPORT_SYMBOL(remove_suid); | 1934 | EXPORT_SYMBOL(remove_suid); |
1893 | 1935 | ||
1894 | size_t | 1936 | size_t |
1895 | __filemap_copy_from_user_iovec(char *vaddr, | 1937 | __filemap_copy_from_user_iovec_inatomic(char *vaddr, |
1896 | const struct iovec *iov, size_t base, size_t bytes) | 1938 | const struct iovec *iov, size_t base, size_t bytes) |
1897 | { | 1939 | { |
1898 | size_t copied = 0, left = 0; | 1940 | size_t copied = 0, left = 0; |
@@ -1908,12 +1950,8 @@ __filemap_copy_from_user_iovec(char *vaddr, | |||
1908 | vaddr += copy; | 1950 | vaddr += copy; |
1909 | iov++; | 1951 | iov++; |
1910 | 1952 | ||
1911 | if (unlikely(left)) { | 1953 | if (unlikely(left)) |
1912 | /* zero the rest of the target like __copy_from_user */ | ||
1913 | if (bytes) | ||
1914 | memset(vaddr, 0, bytes); | ||
1915 | break; | 1954 | break; |
1916 | } | ||
1917 | } | 1955 | } |
1918 | return copied - left; | 1956 | return copied - left; |
1919 | } | 1957 | } |
@@ -2045,7 +2083,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2045 | { | 2083 | { |
2046 | struct file *file = iocb->ki_filp; | 2084 | struct file *file = iocb->ki_filp; |
2047 | struct address_space * mapping = file->f_mapping; | 2085 | struct address_space * mapping = file->f_mapping; |
2048 | struct address_space_operations *a_ops = mapping->a_ops; | 2086 | const struct address_space_operations *a_ops = mapping->a_ops; |
2049 | struct inode *inode = mapping->host; | 2087 | struct inode *inode = mapping->host; |
2050 | long status = 0; | 2088 | long status = 0; |
2051 | struct page *page; | 2089 | struct page *page; |
@@ -2071,14 +2109,21 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2071 | do { | 2109 | do { |
2072 | unsigned long index; | 2110 | unsigned long index; |
2073 | unsigned long offset; | 2111 | unsigned long offset; |
2074 | unsigned long maxlen; | ||
2075 | size_t copied; | 2112 | size_t copied; |
2076 | 2113 | ||
2077 | offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ | 2114 | offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ |
2078 | index = pos >> PAGE_CACHE_SHIFT; | 2115 | index = pos >> PAGE_CACHE_SHIFT; |
2079 | bytes = PAGE_CACHE_SIZE - offset; | 2116 | bytes = PAGE_CACHE_SIZE - offset; |
2080 | if (bytes > count) | 2117 | |
2081 | bytes = count; | 2118 | /* Limit the size of the copy to the caller's write size */ |
2119 | bytes = min(bytes, count); | ||
2120 | |||
2121 | /* | ||
2122 | * Limit the size of the copy to that of the current segment, | ||
2123 | * because fault_in_pages_readable() doesn't know how to walk | ||
2124 | * segments. | ||
2125 | */ | ||
2126 | bytes = min(bytes, cur_iov->iov_len - iov_base); | ||
2082 | 2127 | ||
2083 | /* | 2128 | /* |
2084 | * Bring in the user page that we will copy from _first_. | 2129 | * Bring in the user page that we will copy from _first_. |
@@ -2086,10 +2131,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2086 | * same page as we're writing to, without it being marked | 2131 | * same page as we're writing to, without it being marked |
2087 | * up-to-date. | 2132 | * up-to-date. |
2088 | */ | 2133 | */ |
2089 | maxlen = cur_iov->iov_len - iov_base; | 2134 | fault_in_pages_readable(buf, bytes); |
2090 | if (maxlen > bytes) | ||
2091 | maxlen = bytes; | ||
2092 | fault_in_pages_readable(buf, maxlen); | ||
2093 | 2135 | ||
2094 | page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); | 2136 | page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); |
2095 | if (!page) { | 2137 | if (!page) { |
@@ -2097,6 +2139,12 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2097 | break; | 2139 | break; |
2098 | } | 2140 | } |
2099 | 2141 | ||
2142 | if (unlikely(bytes == 0)) { | ||
2143 | status = 0; | ||
2144 | copied = 0; | ||
2145 | goto zero_length_segment; | ||
2146 | } | ||
2147 | |||
2100 | status = a_ops->prepare_write(file, page, offset, offset+bytes); | 2148 | status = a_ops->prepare_write(file, page, offset, offset+bytes); |
2101 | if (unlikely(status)) { | 2149 | if (unlikely(status)) { |
2102 | loff_t isize = i_size_read(inode); | 2150 | loff_t isize = i_size_read(inode); |
@@ -2126,7 +2174,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2126 | page_cache_release(page); | 2174 | page_cache_release(page); |
2127 | continue; | 2175 | continue; |
2128 | } | 2176 | } |
2129 | if (likely(copied > 0)) { | 2177 | zero_length_segment: |
2178 | if (likely(copied >= 0)) { | ||
2130 | if (!status) | 2179 | if (!status) |
2131 | status = copied; | 2180 | status = copied; |
2132 | 2181 | ||
@@ -2191,7 +2240,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | |||
2191 | unsigned long nr_segs, loff_t *ppos) | 2240 | unsigned long nr_segs, loff_t *ppos) |
2192 | { | 2241 | { |
2193 | struct file *file = iocb->ki_filp; | 2242 | struct file *file = iocb->ki_filp; |
2194 | struct address_space * mapping = file->f_mapping; | 2243 | const struct address_space * mapping = file->f_mapping; |
2195 | size_t ocount; /* original count */ | 2244 | size_t ocount; /* original count */ |
2196 | size_t count; /* after file limit checks */ | 2245 | size_t count; /* after file limit checks */ |
2197 | struct inode *inode = mapping->host; | 2246 | struct inode *inode = mapping->host; |