diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-07-03 10:25:08 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-07-03 10:25:08 -0400 |
commit | 0a1340c185734a57fbf4775927966ad4a1347b02 (patch) | |
tree | d9ed8f0dd809a7c542a3356601125ea5b5aaa804 /mm/filemap.c | |
parent | af18ddb8864b096e3ed4732e2d4b21c956dcfe3a (diff) | |
parent | 29454dde27d8e340bb1987bad9aa504af7081eba (diff) |
Merge rsync://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts:
include/linux/kernel.h
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 257 |
1 files changed, 179 insertions, 78 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index a02a0b2c986b..b9c91ab7f0f8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -9,11 +9,11 @@ | |||
9 | * most "normal" filesystems (but you don't /have/ to use this: | 9 | * most "normal" filesystems (but you don't /have/ to use this: |
10 | * the NFS filesystem used to do this differently, for example) | 10 | * the NFS filesystem used to do this differently, for example) |
11 | */ | 11 | */ |
12 | #include <linux/config.h> | ||
13 | #include <linux/module.h> | 12 | #include <linux/module.h> |
14 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
15 | #include <linux/compiler.h> | 14 | #include <linux/compiler.h> |
16 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
16 | #include <linux/uaccess.h> | ||
17 | #include <linux/aio.h> | 17 | #include <linux/aio.h> |
18 | #include <linux/capability.h> | 18 | #include <linux/capability.h> |
19 | #include <linux/kernel_stat.h> | 19 | #include <linux/kernel_stat.h> |
@@ -38,7 +38,6 @@ | |||
38 | */ | 38 | */ |
39 | #include <linux/buffer_head.h> /* for generic_osync_inode */ | 39 | #include <linux/buffer_head.h> /* for generic_osync_inode */ |
40 | 40 | ||
41 | #include <asm/uaccess.h> | ||
42 | #include <asm/mman.h> | 41 | #include <asm/mman.h> |
43 | 42 | ||
44 | static ssize_t | 43 | static ssize_t |
@@ -120,7 +119,7 @@ void __remove_from_page_cache(struct page *page) | |||
120 | radix_tree_delete(&mapping->page_tree, page->index); | 119 | radix_tree_delete(&mapping->page_tree, page->index); |
121 | page->mapping = NULL; | 120 | page->mapping = NULL; |
122 | mapping->nrpages--; | 121 | mapping->nrpages--; |
123 | pagecache_acct(-1); | 122 | __dec_zone_page_state(page, NR_FILE_PAGES); |
124 | } | 123 | } |
125 | 124 | ||
126 | void remove_from_page_cache(struct page *page) | 125 | void remove_from_page_cache(struct page *page) |
@@ -171,15 +170,17 @@ static int sync_page(void *word) | |||
171 | } | 170 | } |
172 | 171 | ||
173 | /** | 172 | /** |
174 | * filemap_fdatawrite_range - start writeback against all of a mapping's | 173 | * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range |
175 | * dirty pages that lie within the byte offsets <start, end> | ||
176 | * @mapping: address space structure to write | 174 | * @mapping: address space structure to write |
177 | * @start: offset in bytes where the range starts | 175 | * @start: offset in bytes where the range starts |
178 | * @end: offset in bytes where the range ends (inclusive) | 176 | * @end: offset in bytes where the range ends (inclusive) |
179 | * @sync_mode: enable synchronous operation | 177 | * @sync_mode: enable synchronous operation |
180 | * | 178 | * |
179 | * Start writeback against all of a mapping's dirty pages that lie | ||
180 | * within the byte offsets <start, end> inclusive. | ||
181 | * | ||
181 | * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as | 182 | * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as |
182 | * opposed to a regular memory * cleansing writeback. The difference between | 183 | * opposed to a regular memory cleansing writeback. The difference between |
183 | * these two operations is that if a dirty page/buffer is encountered, it must | 184 | * these two operations is that if a dirty page/buffer is encountered, it must |
184 | * be waited upon, and not just skipped over. | 185 | * be waited upon, and not just skipped over. |
185 | */ | 186 | */ |
@@ -190,8 +191,8 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, | |||
190 | struct writeback_control wbc = { | 191 | struct writeback_control wbc = { |
191 | .sync_mode = sync_mode, | 192 | .sync_mode = sync_mode, |
192 | .nr_to_write = mapping->nrpages * 2, | 193 | .nr_to_write = mapping->nrpages * 2, |
193 | .start = start, | 194 | .range_start = start, |
194 | .end = end, | 195 | .range_end = end, |
195 | }; | 196 | }; |
196 | 197 | ||
197 | if (!mapping_cap_writeback_dirty(mapping)) | 198 | if (!mapping_cap_writeback_dirty(mapping)) |
@@ -204,7 +205,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, | |||
204 | static inline int __filemap_fdatawrite(struct address_space *mapping, | 205 | static inline int __filemap_fdatawrite(struct address_space *mapping, |
205 | int sync_mode) | 206 | int sync_mode) |
206 | { | 207 | { |
207 | return __filemap_fdatawrite_range(mapping, 0, 0, sync_mode); | 208 | return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode); |
208 | } | 209 | } |
209 | 210 | ||
210 | int filemap_fdatawrite(struct address_space *mapping) | 211 | int filemap_fdatawrite(struct address_space *mapping) |
@@ -219,7 +220,10 @@ static int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, | |||
219 | return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL); | 220 | return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL); |
220 | } | 221 | } |
221 | 222 | ||
222 | /* | 223 | /** |
224 | * filemap_flush - mostly a non-blocking flush | ||
225 | * @mapping: target address_space | ||
226 | * | ||
223 | * This is a mostly non-blocking flush. Not suitable for data-integrity | 227 | * This is a mostly non-blocking flush. Not suitable for data-integrity |
224 | * purposes - I/O may not be started against all dirty pages. | 228 | * purposes - I/O may not be started against all dirty pages. |
225 | */ | 229 | */ |
@@ -229,7 +233,12 @@ int filemap_flush(struct address_space *mapping) | |||
229 | } | 233 | } |
230 | EXPORT_SYMBOL(filemap_flush); | 234 | EXPORT_SYMBOL(filemap_flush); |
231 | 235 | ||
232 | /* | 236 | /** |
237 | * wait_on_page_writeback_range - wait for writeback to complete | ||
238 | * @mapping: target address_space | ||
239 | * @start: beginning page index | ||
240 | * @end: ending page index | ||
241 | * | ||
233 | * Wait for writeback to complete against pages indexed by start->end | 242 | * Wait for writeback to complete against pages indexed by start->end |
234 | * inclusive | 243 | * inclusive |
235 | */ | 244 | */ |
@@ -276,7 +285,13 @@ int wait_on_page_writeback_range(struct address_space *mapping, | |||
276 | return ret; | 285 | return ret; |
277 | } | 286 | } |
278 | 287 | ||
279 | /* | 288 | /** |
289 | * sync_page_range - write and wait on all pages in the passed range | ||
290 | * @inode: target inode | ||
291 | * @mapping: target address_space | ||
292 | * @pos: beginning offset in pages to write | ||
293 | * @count: number of bytes to write | ||
294 | * | ||
280 | * Write and wait upon all the pages in the passed range. This is a "data | 295 | * Write and wait upon all the pages in the passed range. This is a "data |
281 | * integrity" operation. It waits upon in-flight writeout before starting and | 296 | * integrity" operation. It waits upon in-flight writeout before starting and |
282 | * waiting upon new writeout. If there was an IO error, return it. | 297 | * waiting upon new writeout. If there was an IO error, return it. |
@@ -305,7 +320,13 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, | |||
305 | } | 320 | } |
306 | EXPORT_SYMBOL(sync_page_range); | 321 | EXPORT_SYMBOL(sync_page_range); |
307 | 322 | ||
308 | /* | 323 | /** |
324 | * sync_page_range_nolock | ||
325 | * @inode: target inode | ||
326 | * @mapping: target address_space | ||
327 | * @pos: beginning offset in pages to write | ||
328 | * @count: number of bytes to write | ||
329 | * | ||
309 | * Note: Holding i_mutex across sync_page_range_nolock is not a good idea | 330 | * Note: Holding i_mutex across sync_page_range_nolock is not a good idea |
310 | * as it forces O_SYNC writers to different parts of the same file | 331 | * as it forces O_SYNC writers to different parts of the same file |
311 | * to be serialised right until io completion. | 332 | * to be serialised right until io completion. |
@@ -329,10 +350,11 @@ int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, | |||
329 | EXPORT_SYMBOL(sync_page_range_nolock); | 350 | EXPORT_SYMBOL(sync_page_range_nolock); |
330 | 351 | ||
331 | /** | 352 | /** |
332 | * filemap_fdatawait - walk the list of under-writeback pages of the given | 353 | * filemap_fdatawait - wait for all under-writeback pages to complete |
333 | * address space and wait for all of them. | ||
334 | * | ||
335 | * @mapping: address space structure to wait for | 354 | * @mapping: address space structure to wait for |
355 | * | ||
356 | * Walk the list of under-writeback pages of the given address space | ||
357 | * and wait for all of them. | ||
336 | */ | 358 | */ |
337 | int filemap_fdatawait(struct address_space *mapping) | 359 | int filemap_fdatawait(struct address_space *mapping) |
338 | { | 360 | { |
@@ -368,7 +390,12 @@ int filemap_write_and_wait(struct address_space *mapping) | |||
368 | } | 390 | } |
369 | EXPORT_SYMBOL(filemap_write_and_wait); | 391 | EXPORT_SYMBOL(filemap_write_and_wait); |
370 | 392 | ||
371 | /* | 393 | /** |
394 | * filemap_write_and_wait_range - write out & wait on a file range | ||
395 | * @mapping: the address_space for the pages | ||
396 | * @lstart: offset in bytes where the range starts | ||
397 | * @lend: offset in bytes where the range ends (inclusive) | ||
398 | * | ||
372 | * Write out and wait upon file offsets lstart->lend, inclusive. | 399 | * Write out and wait upon file offsets lstart->lend, inclusive. |
373 | * | 400 | * |
374 | * Note that `lend' is inclusive (describes the last byte to be written) so | 401 | * Note that `lend' is inclusive (describes the last byte to be written) so |
@@ -394,8 +421,14 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
394 | return err; | 421 | return err; |
395 | } | 422 | } |
396 | 423 | ||
397 | /* | 424 | /** |
398 | * This function is used to add newly allocated pagecache pages: | 425 | * add_to_page_cache - add newly allocated pagecache pages |
426 | * @page: page to add | ||
427 | * @mapping: the page's address_space | ||
428 | * @offset: page index | ||
429 | * @gfp_mask: page allocation mode | ||
430 | * | ||
431 | * This function is used to add newly allocated pagecache pages; | ||
399 | * the page is new, so we can just run SetPageLocked() against it. | 432 | * the page is new, so we can just run SetPageLocked() against it. |
400 | * The other page state flags were set by rmqueue(). | 433 | * The other page state flags were set by rmqueue(). |
401 | * | 434 | * |
@@ -415,14 +448,13 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, | |||
415 | page->mapping = mapping; | 448 | page->mapping = mapping; |
416 | page->index = offset; | 449 | page->index = offset; |
417 | mapping->nrpages++; | 450 | mapping->nrpages++; |
418 | pagecache_acct(1); | 451 | __inc_zone_page_state(page, NR_FILE_PAGES); |
419 | } | 452 | } |
420 | write_unlock_irq(&mapping->tree_lock); | 453 | write_unlock_irq(&mapping->tree_lock); |
421 | radix_tree_preload_end(); | 454 | radix_tree_preload_end(); |
422 | } | 455 | } |
423 | return error; | 456 | return error; |
424 | } | 457 | } |
425 | |||
426 | EXPORT_SYMBOL(add_to_page_cache); | 458 | EXPORT_SYMBOL(add_to_page_cache); |
427 | 459 | ||
428 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | 460 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
@@ -489,8 +521,7 @@ void fastcall wait_on_page_bit(struct page *page, int bit_nr) | |||
489 | EXPORT_SYMBOL(wait_on_page_bit); | 521 | EXPORT_SYMBOL(wait_on_page_bit); |
490 | 522 | ||
491 | /** | 523 | /** |
492 | * unlock_page() - unlock a locked page | 524 | * unlock_page - unlock a locked page |
493 | * | ||
494 | * @page: the page | 525 | * @page: the page |
495 | * | 526 | * |
496 | * Unlocks the page and wakes up sleepers in ___wait_on_page_locked(). | 527 | * Unlocks the page and wakes up sleepers in ___wait_on_page_locked(). |
@@ -513,8 +544,9 @@ void fastcall unlock_page(struct page *page) | |||
513 | } | 544 | } |
514 | EXPORT_SYMBOL(unlock_page); | 545 | EXPORT_SYMBOL(unlock_page); |
515 | 546 | ||
516 | /* | 547 | /** |
517 | * End writeback against a page. | 548 | * end_page_writeback - end writeback against a page |
549 | * @page: the page | ||
518 | */ | 550 | */ |
519 | void end_page_writeback(struct page *page) | 551 | void end_page_writeback(struct page *page) |
520 | { | 552 | { |
@@ -527,10 +559,11 @@ void end_page_writeback(struct page *page) | |||
527 | } | 559 | } |
528 | EXPORT_SYMBOL(end_page_writeback); | 560 | EXPORT_SYMBOL(end_page_writeback); |
529 | 561 | ||
530 | /* | 562 | /** |
531 | * Get a lock on the page, assuming we need to sleep to get it. | 563 | * __lock_page - get a lock on the page, assuming we need to sleep to get it |
564 | * @page: the page to lock | ||
532 | * | 565 | * |
533 | * Ugly: running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some | 566 | * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some |
534 | * random driver's requestfn sets TASK_RUNNING, we could busywait. However | 567 | * random driver's requestfn sets TASK_RUNNING, we could busywait. However |
535 | * chances are that on the second loop, the block layer's plug list is empty, | 568 | * chances are that on the second loop, the block layer's plug list is empty, |
536 | * so sync_page() will then return in state TASK_UNINTERRUPTIBLE. | 569 | * so sync_page() will then return in state TASK_UNINTERRUPTIBLE. |
@@ -544,8 +577,12 @@ void fastcall __lock_page(struct page *page) | |||
544 | } | 577 | } |
545 | EXPORT_SYMBOL(__lock_page); | 578 | EXPORT_SYMBOL(__lock_page); |
546 | 579 | ||
547 | /* | 580 | /** |
548 | * a rather lightweight function, finding and getting a reference to a | 581 | * find_get_page - find and get a page reference |
582 | * @mapping: the address_space to search | ||
583 | * @offset: the page index | ||
584 | * | ||
585 | * A rather lightweight function, finding and getting a reference to a | ||
549 | * hashed page atomically. | 586 | * hashed page atomically. |
550 | */ | 587 | */ |
551 | struct page * find_get_page(struct address_space *mapping, unsigned long offset) | 588 | struct page * find_get_page(struct address_space *mapping, unsigned long offset) |
@@ -559,11 +596,14 @@ struct page * find_get_page(struct address_space *mapping, unsigned long offset) | |||
559 | read_unlock_irq(&mapping->tree_lock); | 596 | read_unlock_irq(&mapping->tree_lock); |
560 | return page; | 597 | return page; |
561 | } | 598 | } |
562 | |||
563 | EXPORT_SYMBOL(find_get_page); | 599 | EXPORT_SYMBOL(find_get_page); |
564 | 600 | ||
565 | /* | 601 | /** |
566 | * Same as above, but trylock it instead of incrementing the count. | 602 | * find_trylock_page - find and lock a page |
603 | * @mapping: the address_space to search | ||
604 | * @offset: the page index | ||
605 | * | ||
606 | * Same as find_get_page(), but trylock it instead of incrementing the count. | ||
567 | */ | 607 | */ |
568 | struct page *find_trylock_page(struct address_space *mapping, unsigned long offset) | 608 | struct page *find_trylock_page(struct address_space *mapping, unsigned long offset) |
569 | { | 609 | { |
@@ -576,12 +616,10 @@ struct page *find_trylock_page(struct address_space *mapping, unsigned long offs | |||
576 | read_unlock_irq(&mapping->tree_lock); | 616 | read_unlock_irq(&mapping->tree_lock); |
577 | return page; | 617 | return page; |
578 | } | 618 | } |
579 | |||
580 | EXPORT_SYMBOL(find_trylock_page); | 619 | EXPORT_SYMBOL(find_trylock_page); |
581 | 620 | ||
582 | /** | 621 | /** |
583 | * find_lock_page - locate, pin and lock a pagecache page | 622 | * find_lock_page - locate, pin and lock a pagecache page |
584 | * | ||
585 | * @mapping: the address_space to search | 623 | * @mapping: the address_space to search |
586 | * @offset: the page index | 624 | * @offset: the page index |
587 | * | 625 | * |
@@ -617,12 +655,10 @@ repeat: | |||
617 | read_unlock_irq(&mapping->tree_lock); | 655 | read_unlock_irq(&mapping->tree_lock); |
618 | return page; | 656 | return page; |
619 | } | 657 | } |
620 | |||
621 | EXPORT_SYMBOL(find_lock_page); | 658 | EXPORT_SYMBOL(find_lock_page); |
622 | 659 | ||
623 | /** | 660 | /** |
624 | * find_or_create_page - locate or add a pagecache page | 661 | * find_or_create_page - locate or add a pagecache page |
625 | * | ||
626 | * @mapping: the page's address_space | 662 | * @mapping: the page's address_space |
627 | * @index: the page's index into the mapping | 663 | * @index: the page's index into the mapping |
628 | * @gfp_mask: page allocation mode | 664 | * @gfp_mask: page allocation mode |
@@ -663,7 +699,6 @@ repeat: | |||
663 | page_cache_release(cached_page); | 699 | page_cache_release(cached_page); |
664 | return page; | 700 | return page; |
665 | } | 701 | } |
666 | |||
667 | EXPORT_SYMBOL(find_or_create_page); | 702 | EXPORT_SYMBOL(find_or_create_page); |
668 | 703 | ||
669 | /** | 704 | /** |
@@ -729,9 +764,16 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
729 | return i; | 764 | return i; |
730 | } | 765 | } |
731 | 766 | ||
732 | /* | 767 | /** |
768 | * find_get_pages_tag - find and return pages that match @tag | ||
769 | * @mapping: the address_space to search | ||
770 | * @index: the starting page index | ||
771 | * @tag: the tag index | ||
772 | * @nr_pages: the maximum number of pages | ||
773 | * @pages: where the resulting pages are placed | ||
774 | * | ||
733 | * Like find_get_pages, except we only return pages which are tagged with | 775 | * Like find_get_pages, except we only return pages which are tagged with |
734 | * `tag'. We update *index to index the next page for the traversal. | 776 | * @tag. We update @index to index the next page for the traversal. |
735 | */ | 777 | */ |
736 | unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | 778 | unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, |
737 | int tag, unsigned int nr_pages, struct page **pages) | 779 | int tag, unsigned int nr_pages, struct page **pages) |
@@ -750,7 +792,11 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | |||
750 | return ret; | 792 | return ret; |
751 | } | 793 | } |
752 | 794 | ||
753 | /* | 795 | /** |
796 | * grab_cache_page_nowait - returns locked page at given index in given cache | ||
797 | * @mapping: target address_space | ||
798 | * @index: the page index | ||
799 | * | ||
754 | * Same as grab_cache_page, but do not wait if the page is unavailable. | 800 | * Same as grab_cache_page, but do not wait if the page is unavailable. |
755 | * This is intended for speculative data generators, where the data can | 801 | * This is intended for speculative data generators, where the data can |
756 | * be regenerated if the page couldn't be grabbed. This routine should | 802 | * be regenerated if the page couldn't be grabbed. This routine should |
@@ -779,19 +825,51 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index) | |||
779 | } | 825 | } |
780 | return page; | 826 | return page; |
781 | } | 827 | } |
782 | |||
783 | EXPORT_SYMBOL(grab_cache_page_nowait); | 828 | EXPORT_SYMBOL(grab_cache_page_nowait); |
784 | 829 | ||
785 | /* | 830 | /* |
831 | * CD/DVDs are error prone. When a medium error occurs, the driver may fail | ||
832 | * a _large_ part of the i/o request. Imagine the worst scenario: | ||
833 | * | ||
834 | * ---R__________________________________________B__________ | ||
835 | * ^ reading here ^ bad block(assume 4k) | ||
836 | * | ||
837 | * read(R) => miss => readahead(R...B) => media error => frustrating retries | ||
838 | * => failing the whole request => read(R) => read(R+1) => | ||
839 | * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) => | ||
840 | * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) => | ||
841 | * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ...... | ||
842 | * | ||
843 | * It is going insane. Fix it by quickly scaling down the readahead size. | ||
844 | */ | ||
845 | static void shrink_readahead_size_eio(struct file *filp, | ||
846 | struct file_ra_state *ra) | ||
847 | { | ||
848 | if (!ra->ra_pages) | ||
849 | return; | ||
850 | |||
851 | ra->ra_pages /= 4; | ||
852 | printk(KERN_WARNING "Reducing readahead size to %luK\n", | ||
853 | ra->ra_pages << (PAGE_CACHE_SHIFT - 10)); | ||
854 | } | ||
855 | |||
856 | /** | ||
857 | * do_generic_mapping_read - generic file read routine | ||
858 | * @mapping: address_space to be read | ||
859 | * @_ra: file's readahead state | ||
860 | * @filp: the file to read | ||
861 | * @ppos: current file position | ||
862 | * @desc: read_descriptor | ||
863 | * @actor: read method | ||
864 | * | ||
786 | * This is a generic file read routine, and uses the | 865 | * This is a generic file read routine, and uses the |
787 | * mapping->a_ops->readpage() function for the actual low-level | 866 | * mapping->a_ops->readpage() function for the actual low-level stuff. |
788 | * stuff. | ||
789 | * | 867 | * |
790 | * This is really ugly. But the goto's actually try to clarify some | 868 | * This is really ugly. But the goto's actually try to clarify some |
791 | * of the logic when it comes to error handling etc. | 869 | * of the logic when it comes to error handling etc. |
792 | * | 870 | * |
793 | * Note the struct file* is only passed for the use of readpage. It may be | 871 | * Note the struct file* is only passed for the use of readpage. |
794 | * NULL. | 872 | * It may be NULL. |
795 | */ | 873 | */ |
796 | void do_generic_mapping_read(struct address_space *mapping, | 874 | void do_generic_mapping_read(struct address_space *mapping, |
797 | struct file_ra_state *_ra, | 875 | struct file_ra_state *_ra, |
@@ -932,6 +1010,7 @@ readpage: | |||
932 | } | 1010 | } |
933 | unlock_page(page); | 1011 | unlock_page(page); |
934 | error = -EIO; | 1012 | error = -EIO; |
1013 | shrink_readahead_size_eio(filp, &ra); | ||
935 | goto readpage_error; | 1014 | goto readpage_error; |
936 | } | 1015 | } |
937 | unlock_page(page); | 1016 | unlock_page(page); |
@@ -1004,7 +1083,6 @@ out: | |||
1004 | if (filp) | 1083 | if (filp) |
1005 | file_accessed(filp); | 1084 | file_accessed(filp); |
1006 | } | 1085 | } |
1007 | |||
1008 | EXPORT_SYMBOL(do_generic_mapping_read); | 1086 | EXPORT_SYMBOL(do_generic_mapping_read); |
1009 | 1087 | ||
1010 | int file_read_actor(read_descriptor_t *desc, struct page *page, | 1088 | int file_read_actor(read_descriptor_t *desc, struct page *page, |
@@ -1046,7 +1124,13 @@ success: | |||
1046 | } | 1124 | } |
1047 | EXPORT_SYMBOL_GPL(file_read_actor); | 1125 | EXPORT_SYMBOL_GPL(file_read_actor); |
1048 | 1126 | ||
1049 | /* | 1127 | /** |
1128 | * __generic_file_aio_read - generic filesystem read routine | ||
1129 | * @iocb: kernel I/O control block | ||
1130 | * @iov: io vector request | ||
1131 | * @nr_segs: number of segments in the iovec | ||
1132 | * @ppos: current file position | ||
1133 | * | ||
1050 | * This is the "read()" routine for all filesystems | 1134 | * This is the "read()" routine for all filesystems |
1051 | * that can use the page cache directly. | 1135 | * that can use the page cache directly. |
1052 | */ | 1136 | */ |
@@ -1125,7 +1209,6 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1125 | out: | 1209 | out: |
1126 | return retval; | 1210 | return retval; |
1127 | } | 1211 | } |
1128 | |||
1129 | EXPORT_SYMBOL(__generic_file_aio_read); | 1212 | EXPORT_SYMBOL(__generic_file_aio_read); |
1130 | 1213 | ||
1131 | ssize_t | 1214 | ssize_t |
@@ -1136,7 +1219,6 @@ generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t | |||
1136 | BUG_ON(iocb->ki_pos != pos); | 1219 | BUG_ON(iocb->ki_pos != pos); |
1137 | return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); | 1220 | return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); |
1138 | } | 1221 | } |
1139 | |||
1140 | EXPORT_SYMBOL(generic_file_aio_read); | 1222 | EXPORT_SYMBOL(generic_file_aio_read); |
1141 | 1223 | ||
1142 | ssize_t | 1224 | ssize_t |
@@ -1152,7 +1234,6 @@ generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppo | |||
1152 | ret = wait_on_sync_kiocb(&kiocb); | 1234 | ret = wait_on_sync_kiocb(&kiocb); |
1153 | return ret; | 1235 | return ret; |
1154 | } | 1236 | } |
1155 | |||
1156 | EXPORT_SYMBOL(generic_file_read); | 1237 | EXPORT_SYMBOL(generic_file_read); |
1157 | 1238 | ||
1158 | int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) | 1239 | int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) |
@@ -1193,7 +1274,6 @@ ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos, | |||
1193 | return desc.written; | 1274 | return desc.written; |
1194 | return desc.error; | 1275 | return desc.error; |
1195 | } | 1276 | } |
1196 | |||
1197 | EXPORT_SYMBOL(generic_file_sendfile); | 1277 | EXPORT_SYMBOL(generic_file_sendfile); |
1198 | 1278 | ||
1199 | static ssize_t | 1279 | static ssize_t |
@@ -1229,11 +1309,15 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) | |||
1229 | } | 1309 | } |
1230 | 1310 | ||
1231 | #ifdef CONFIG_MMU | 1311 | #ifdef CONFIG_MMU |
1232 | /* | 1312 | static int FASTCALL(page_cache_read(struct file * file, unsigned long offset)); |
1313 | /** | ||
1314 | * page_cache_read - adds requested page to the page cache if not already there | ||
1315 | * @file: file to read | ||
1316 | * @offset: page index | ||
1317 | * | ||
1233 | * This adds the requested page to the page cache if it isn't already there, | 1318 | * This adds the requested page to the page cache if it isn't already there, |
1234 | * and schedules an I/O to read in its contents from disk. | 1319 | * and schedules an I/O to read in its contents from disk. |
1235 | */ | 1320 | */ |
1236 | static int FASTCALL(page_cache_read(struct file * file, unsigned long offset)); | ||
1237 | static int fastcall page_cache_read(struct file * file, unsigned long offset) | 1321 | static int fastcall page_cache_read(struct file * file, unsigned long offset) |
1238 | { | 1322 | { |
1239 | struct address_space *mapping = file->f_mapping; | 1323 | struct address_space *mapping = file->f_mapping; |
@@ -1260,7 +1344,12 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) | |||
1260 | 1344 | ||
1261 | #define MMAP_LOTSAMISS (100) | 1345 | #define MMAP_LOTSAMISS (100) |
1262 | 1346 | ||
1263 | /* | 1347 | /** |
1348 | * filemap_nopage - read in file data for page fault handling | ||
1349 | * @area: the applicable vm_area | ||
1350 | * @address: target address to read in | ||
1351 | * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL | ||
1352 | * | ||
1264 | * filemap_nopage() is invoked via the vma operations vector for a | 1353 | * filemap_nopage() is invoked via the vma operations vector for a |
1265 | * mapped memory region to read in file data during a page fault. | 1354 | * mapped memory region to read in file data during a page fault. |
1266 | * | 1355 | * |
@@ -1327,7 +1416,7 @@ retry_find: | |||
1327 | */ | 1416 | */ |
1328 | if (!did_readaround) { | 1417 | if (!did_readaround) { |
1329 | majmin = VM_FAULT_MAJOR; | 1418 | majmin = VM_FAULT_MAJOR; |
1330 | inc_page_state(pgmajfault); | 1419 | count_vm_event(PGMAJFAULT); |
1331 | } | 1420 | } |
1332 | did_readaround = 1; | 1421 | did_readaround = 1; |
1333 | ra_pages = max_sane_readahead(file->f_ra.ra_pages); | 1422 | ra_pages = max_sane_readahead(file->f_ra.ra_pages); |
@@ -1398,7 +1487,7 @@ no_cached_page: | |||
1398 | page_not_uptodate: | 1487 | page_not_uptodate: |
1399 | if (!did_readaround) { | 1488 | if (!did_readaround) { |
1400 | majmin = VM_FAULT_MAJOR; | 1489 | majmin = VM_FAULT_MAJOR; |
1401 | inc_page_state(pgmajfault); | 1490 | count_vm_event(PGMAJFAULT); |
1402 | } | 1491 | } |
1403 | lock_page(page); | 1492 | lock_page(page); |
1404 | 1493 | ||
@@ -1460,10 +1549,10 @@ page_not_uptodate: | |||
1460 | * Things didn't work out. Return zero to tell the | 1549 | * Things didn't work out. Return zero to tell the |
1461 | * mm layer so, possibly freeing the page cache page first. | 1550 | * mm layer so, possibly freeing the page cache page first. |
1462 | */ | 1551 | */ |
1552 | shrink_readahead_size_eio(file, ra); | ||
1463 | page_cache_release(page); | 1553 | page_cache_release(page); |
1464 | return NULL; | 1554 | return NULL; |
1465 | } | 1555 | } |
1466 | |||
1467 | EXPORT_SYMBOL(filemap_nopage); | 1556 | EXPORT_SYMBOL(filemap_nopage); |
1468 | 1557 | ||
1469 | static struct page * filemap_getpage(struct file *file, unsigned long pgoff, | 1558 | static struct page * filemap_getpage(struct file *file, unsigned long pgoff, |
@@ -1717,7 +1806,13 @@ repeat: | |||
1717 | return page; | 1806 | return page; |
1718 | } | 1807 | } |
1719 | 1808 | ||
1720 | /* | 1809 | /** |
1810 | * read_cache_page - read into page cache, fill it if needed | ||
1811 | * @mapping: the page's address_space | ||
1812 | * @index: the page index | ||
1813 | * @filler: function to perform the read | ||
1814 | * @data: destination for read data | ||
1815 | * | ||
1721 | * Read into the page cache. If a page already exists, | 1816 | * Read into the page cache. If a page already exists, |
1722 | * and PageUptodate() is not set, try to fill the page. | 1817 | * and PageUptodate() is not set, try to fill the page. |
1723 | */ | 1818 | */ |
@@ -1755,7 +1850,6 @@ retry: | |||
1755 | out: | 1850 | out: |
1756 | return page; | 1851 | return page; |
1757 | } | 1852 | } |
1758 | |||
1759 | EXPORT_SYMBOL(read_cache_page); | 1853 | EXPORT_SYMBOL(read_cache_page); |
1760 | 1854 | ||
1761 | /* | 1855 | /* |
@@ -1826,7 +1920,7 @@ int remove_suid(struct dentry *dentry) | |||
1826 | EXPORT_SYMBOL(remove_suid); | 1920 | EXPORT_SYMBOL(remove_suid); |
1827 | 1921 | ||
1828 | size_t | 1922 | size_t |
1829 | __filemap_copy_from_user_iovec(char *vaddr, | 1923 | __filemap_copy_from_user_iovec_inatomic(char *vaddr, |
1830 | const struct iovec *iov, size_t base, size_t bytes) | 1924 | const struct iovec *iov, size_t base, size_t bytes) |
1831 | { | 1925 | { |
1832 | size_t copied = 0, left = 0; | 1926 | size_t copied = 0, left = 0; |
@@ -1836,18 +1930,14 @@ __filemap_copy_from_user_iovec(char *vaddr, | |||
1836 | int copy = min(bytes, iov->iov_len - base); | 1930 | int copy = min(bytes, iov->iov_len - base); |
1837 | 1931 | ||
1838 | base = 0; | 1932 | base = 0; |
1839 | left = __copy_from_user_inatomic(vaddr, buf, copy); | 1933 | left = __copy_from_user_inatomic_nocache(vaddr, buf, copy); |
1840 | copied += copy; | 1934 | copied += copy; |
1841 | bytes -= copy; | 1935 | bytes -= copy; |
1842 | vaddr += copy; | 1936 | vaddr += copy; |
1843 | iov++; | 1937 | iov++; |
1844 | 1938 | ||
1845 | if (unlikely(left)) { | 1939 | if (unlikely(left)) |
1846 | /* zero the rest of the target like __copy_from_user */ | ||
1847 | if (bytes) | ||
1848 | memset(vaddr, 0, bytes); | ||
1849 | break; | 1940 | break; |
1850 | } | ||
1851 | } | 1941 | } |
1852 | return copied - left; | 1942 | return copied - left; |
1853 | } | 1943 | } |
@@ -1855,7 +1945,7 @@ __filemap_copy_from_user_iovec(char *vaddr, | |||
1855 | /* | 1945 | /* |
1856 | * Performs necessary checks before doing a write | 1946 | * Performs necessary checks before doing a write |
1857 | * | 1947 | * |
1858 | * Can adjust writing position aor amount of bytes to write. | 1948 | * Can adjust writing position or amount of bytes to write. |
1859 | * Returns appropriate error code that caller should return or | 1949 | * Returns appropriate error code that caller should return or |
1860 | * zero in case that write should be allowed. | 1950 | * zero in case that write should be allowed. |
1861 | */ | 1951 | */ |
@@ -1979,7 +2069,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1979 | { | 2069 | { |
1980 | struct file *file = iocb->ki_filp; | 2070 | struct file *file = iocb->ki_filp; |
1981 | struct address_space * mapping = file->f_mapping; | 2071 | struct address_space * mapping = file->f_mapping; |
1982 | struct address_space_operations *a_ops = mapping->a_ops; | 2072 | const struct address_space_operations *a_ops = mapping->a_ops; |
1983 | struct inode *inode = mapping->host; | 2073 | struct inode *inode = mapping->host; |
1984 | long status = 0; | 2074 | long status = 0; |
1985 | struct page *page; | 2075 | struct page *page; |
@@ -2005,14 +2095,21 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2005 | do { | 2095 | do { |
2006 | unsigned long index; | 2096 | unsigned long index; |
2007 | unsigned long offset; | 2097 | unsigned long offset; |
2008 | unsigned long maxlen; | ||
2009 | size_t copied; | 2098 | size_t copied; |
2010 | 2099 | ||
2011 | offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ | 2100 | offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ |
2012 | index = pos >> PAGE_CACHE_SHIFT; | 2101 | index = pos >> PAGE_CACHE_SHIFT; |
2013 | bytes = PAGE_CACHE_SIZE - offset; | 2102 | bytes = PAGE_CACHE_SIZE - offset; |
2014 | if (bytes > count) | 2103 | |
2015 | bytes = count; | 2104 | /* Limit the size of the copy to the caller's write size */ |
2105 | bytes = min(bytes, count); | ||
2106 | |||
2107 | /* | ||
2108 | * Limit the size of the copy to that of the current segment, | ||
2109 | * because fault_in_pages_readable() doesn't know how to walk | ||
2110 | * segments. | ||
2111 | */ | ||
2112 | bytes = min(bytes, cur_iov->iov_len - iov_base); | ||
2016 | 2113 | ||
2017 | /* | 2114 | /* |
2018 | * Bring in the user page that we will copy from _first_. | 2115 | * Bring in the user page that we will copy from _first_. |
@@ -2020,10 +2117,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2020 | * same page as we're writing to, without it being marked | 2117 | * same page as we're writing to, without it being marked |
2021 | * up-to-date. | 2118 | * up-to-date. |
2022 | */ | 2119 | */ |
2023 | maxlen = cur_iov->iov_len - iov_base; | 2120 | fault_in_pages_readable(buf, bytes); |
2024 | if (maxlen > bytes) | ||
2025 | maxlen = bytes; | ||
2026 | fault_in_pages_readable(buf, maxlen); | ||
2027 | 2121 | ||
2028 | page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); | 2122 | page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); |
2029 | if (!page) { | 2123 | if (!page) { |
@@ -2031,6 +2125,12 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2031 | break; | 2125 | break; |
2032 | } | 2126 | } |
2033 | 2127 | ||
2128 | if (unlikely(bytes == 0)) { | ||
2129 | status = 0; | ||
2130 | copied = 0; | ||
2131 | goto zero_length_segment; | ||
2132 | } | ||
2133 | |||
2034 | status = a_ops->prepare_write(file, page, offset, offset+bytes); | 2134 | status = a_ops->prepare_write(file, page, offset, offset+bytes); |
2035 | if (unlikely(status)) { | 2135 | if (unlikely(status)) { |
2036 | loff_t isize = i_size_read(inode); | 2136 | loff_t isize = i_size_read(inode); |
@@ -2060,7 +2160,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2060 | page_cache_release(page); | 2160 | page_cache_release(page); |
2061 | continue; | 2161 | continue; |
2062 | } | 2162 | } |
2063 | if (likely(copied > 0)) { | 2163 | zero_length_segment: |
2164 | if (likely(copied >= 0)) { | ||
2064 | if (!status) | 2165 | if (!status) |
2065 | status = copied; | 2166 | status = copied; |
2066 | 2167 | ||
@@ -2125,7 +2226,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | |||
2125 | unsigned long nr_segs, loff_t *ppos) | 2226 | unsigned long nr_segs, loff_t *ppos) |
2126 | { | 2227 | { |
2127 | struct file *file = iocb->ki_filp; | 2228 | struct file *file = iocb->ki_filp; |
2128 | struct address_space * mapping = file->f_mapping; | 2229 | const struct address_space * mapping = file->f_mapping; |
2129 | size_t ocount; /* original count */ | 2230 | size_t ocount; /* original count */ |
2130 | size_t count; /* after file limit checks */ | 2231 | size_t count; /* after file limit checks */ |
2131 | struct inode *inode = mapping->host; | 2232 | struct inode *inode = mapping->host; |