aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-07-03 10:25:08 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2006-07-03 10:25:08 -0400
commit0a1340c185734a57fbf4775927966ad4a1347b02 (patch)
treed9ed8f0dd809a7c542a3356601125ea5b5aaa804 /mm/filemap.c
parentaf18ddb8864b096e3ed4732e2d4b21c956dcfe3a (diff)
parent29454dde27d8e340bb1987bad9aa504af7081eba (diff)
Merge rsync://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: include/linux/kernel.h
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c257
1 files changed, 179 insertions, 78 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index a02a0b2c986b..b9c91ab7f0f8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -9,11 +9,11 @@
9 * most "normal" filesystems (but you don't /have/ to use this: 9 * most "normal" filesystems (but you don't /have/ to use this:
10 * the NFS filesystem used to do this differently, for example) 10 * the NFS filesystem used to do this differently, for example)
11 */ 11 */
12#include <linux/config.h>
13#include <linux/module.h> 12#include <linux/module.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
15#include <linux/compiler.h> 14#include <linux/compiler.h>
16#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/uaccess.h>
17#include <linux/aio.h> 17#include <linux/aio.h>
18#include <linux/capability.h> 18#include <linux/capability.h>
19#include <linux/kernel_stat.h> 19#include <linux/kernel_stat.h>
@@ -38,7 +38,6 @@
38 */ 38 */
39#include <linux/buffer_head.h> /* for generic_osync_inode */ 39#include <linux/buffer_head.h> /* for generic_osync_inode */
40 40
41#include <asm/uaccess.h>
42#include <asm/mman.h> 41#include <asm/mman.h>
43 42
44static ssize_t 43static ssize_t
@@ -120,7 +119,7 @@ void __remove_from_page_cache(struct page *page)
120 radix_tree_delete(&mapping->page_tree, page->index); 119 radix_tree_delete(&mapping->page_tree, page->index);
121 page->mapping = NULL; 120 page->mapping = NULL;
122 mapping->nrpages--; 121 mapping->nrpages--;
123 pagecache_acct(-1); 122 __dec_zone_page_state(page, NR_FILE_PAGES);
124} 123}
125 124
126void remove_from_page_cache(struct page *page) 125void remove_from_page_cache(struct page *page)
@@ -171,15 +170,17 @@ static int sync_page(void *word)
171} 170}
172 171
173/** 172/**
174 * filemap_fdatawrite_range - start writeback against all of a mapping's 173 * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
175 * dirty pages that lie within the byte offsets <start, end>
176 * @mapping: address space structure to write 174 * @mapping: address space structure to write
177 * @start: offset in bytes where the range starts 175 * @start: offset in bytes where the range starts
178 * @end: offset in bytes where the range ends (inclusive) 176 * @end: offset in bytes where the range ends (inclusive)
179 * @sync_mode: enable synchronous operation 177 * @sync_mode: enable synchronous operation
180 * 178 *
179 * Start writeback against all of a mapping's dirty pages that lie
180 * within the byte offsets <start, end> inclusive.
181 *
181 * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as 182 * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
182 * opposed to a regular memory * cleansing writeback. The difference between 183 * opposed to a regular memory cleansing writeback. The difference between
183 * these two operations is that if a dirty page/buffer is encountered, it must 184 * these two operations is that if a dirty page/buffer is encountered, it must
184 * be waited upon, and not just skipped over. 185 * be waited upon, and not just skipped over.
185 */ 186 */
@@ -190,8 +191,8 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
190 struct writeback_control wbc = { 191 struct writeback_control wbc = {
191 .sync_mode = sync_mode, 192 .sync_mode = sync_mode,
192 .nr_to_write = mapping->nrpages * 2, 193 .nr_to_write = mapping->nrpages * 2,
193 .start = start, 194 .range_start = start,
194 .end = end, 195 .range_end = end,
195 }; 196 };
196 197
197 if (!mapping_cap_writeback_dirty(mapping)) 198 if (!mapping_cap_writeback_dirty(mapping))
@@ -204,7 +205,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
204static inline int __filemap_fdatawrite(struct address_space *mapping, 205static inline int __filemap_fdatawrite(struct address_space *mapping,
205 int sync_mode) 206 int sync_mode)
206{ 207{
207 return __filemap_fdatawrite_range(mapping, 0, 0, sync_mode); 208 return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
208} 209}
209 210
210int filemap_fdatawrite(struct address_space *mapping) 211int filemap_fdatawrite(struct address_space *mapping)
@@ -219,7 +220,10 @@ static int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
219 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL); 220 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
220} 221}
221 222
222/* 223/**
224 * filemap_flush - mostly a non-blocking flush
225 * @mapping: target address_space
226 *
223 * This is a mostly non-blocking flush. Not suitable for data-integrity 227 * This is a mostly non-blocking flush. Not suitable for data-integrity
224 * purposes - I/O may not be started against all dirty pages. 228 * purposes - I/O may not be started against all dirty pages.
225 */ 229 */
@@ -229,7 +233,12 @@ int filemap_flush(struct address_space *mapping)
229} 233}
230EXPORT_SYMBOL(filemap_flush); 234EXPORT_SYMBOL(filemap_flush);
231 235
232/* 236/**
237 * wait_on_page_writeback_range - wait for writeback to complete
238 * @mapping: target address_space
239 * @start: beginning page index
240 * @end: ending page index
241 *
233 * Wait for writeback to complete against pages indexed by start->end 242 * Wait for writeback to complete against pages indexed by start->end
234 * inclusive 243 * inclusive
235 */ 244 */
@@ -276,7 +285,13 @@ int wait_on_page_writeback_range(struct address_space *mapping,
276 return ret; 285 return ret;
277} 286}
278 287
279/* 288/**
289 * sync_page_range - write and wait on all pages in the passed range
290 * @inode: target inode
291 * @mapping: target address_space
292 * @pos: beginning offset in pages to write
293 * @count: number of bytes to write
294 *
280 * Write and wait upon all the pages in the passed range. This is a "data 295 * Write and wait upon all the pages in the passed range. This is a "data
281 * integrity" operation. It waits upon in-flight writeout before starting and 296 * integrity" operation. It waits upon in-flight writeout before starting and
282 * waiting upon new writeout. If there was an IO error, return it. 297 * waiting upon new writeout. If there was an IO error, return it.
@@ -305,7 +320,13 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
305} 320}
306EXPORT_SYMBOL(sync_page_range); 321EXPORT_SYMBOL(sync_page_range);
307 322
308/* 323/**
324 * sync_page_range_nolock
325 * @inode: target inode
326 * @mapping: target address_space
327 * @pos: beginning offset in pages to write
328 * @count: number of bytes to write
329 *
309 * Note: Holding i_mutex across sync_page_range_nolock is not a good idea 330 * Note: Holding i_mutex across sync_page_range_nolock is not a good idea
310 * as it forces O_SYNC writers to different parts of the same file 331 * as it forces O_SYNC writers to different parts of the same file
311 * to be serialised right until io completion. 332 * to be serialised right until io completion.
@@ -329,10 +350,11 @@ int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
329EXPORT_SYMBOL(sync_page_range_nolock); 350EXPORT_SYMBOL(sync_page_range_nolock);
330 351
331/** 352/**
332 * filemap_fdatawait - walk the list of under-writeback pages of the given 353 * filemap_fdatawait - wait for all under-writeback pages to complete
333 * address space and wait for all of them.
334 *
335 * @mapping: address space structure to wait for 354 * @mapping: address space structure to wait for
355 *
356 * Walk the list of under-writeback pages of the given address space
357 * and wait for all of them.
336 */ 358 */
337int filemap_fdatawait(struct address_space *mapping) 359int filemap_fdatawait(struct address_space *mapping)
338{ 360{
@@ -368,7 +390,12 @@ int filemap_write_and_wait(struct address_space *mapping)
368} 390}
369EXPORT_SYMBOL(filemap_write_and_wait); 391EXPORT_SYMBOL(filemap_write_and_wait);
370 392
371/* 393/**
394 * filemap_write_and_wait_range - write out & wait on a file range
395 * @mapping: the address_space for the pages
396 * @lstart: offset in bytes where the range starts
397 * @lend: offset in bytes where the range ends (inclusive)
398 *
372 * Write out and wait upon file offsets lstart->lend, inclusive. 399 * Write out and wait upon file offsets lstart->lend, inclusive.
373 * 400 *
374 * Note that `lend' is inclusive (describes the last byte to be written) so 401 * Note that `lend' is inclusive (describes the last byte to be written) so
@@ -394,8 +421,14 @@ int filemap_write_and_wait_range(struct address_space *mapping,
394 return err; 421 return err;
395} 422}
396 423
397/* 424/**
398 * This function is used to add newly allocated pagecache pages: 425 * add_to_page_cache - add newly allocated pagecache pages
426 * @page: page to add
427 * @mapping: the page's address_space
428 * @offset: page index
429 * @gfp_mask: page allocation mode
430 *
431 * This function is used to add newly allocated pagecache pages;
399 * the page is new, so we can just run SetPageLocked() against it. 432 * the page is new, so we can just run SetPageLocked() against it.
400 * The other page state flags were set by rmqueue(). 433 * The other page state flags were set by rmqueue().
401 * 434 *
@@ -415,14 +448,13 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
415 page->mapping = mapping; 448 page->mapping = mapping;
416 page->index = offset; 449 page->index = offset;
417 mapping->nrpages++; 450 mapping->nrpages++;
418 pagecache_acct(1); 451 __inc_zone_page_state(page, NR_FILE_PAGES);
419 } 452 }
420 write_unlock_irq(&mapping->tree_lock); 453 write_unlock_irq(&mapping->tree_lock);
421 radix_tree_preload_end(); 454 radix_tree_preload_end();
422 } 455 }
423 return error; 456 return error;
424} 457}
425
426EXPORT_SYMBOL(add_to_page_cache); 458EXPORT_SYMBOL(add_to_page_cache);
427 459
428int add_to_page_cache_lru(struct page *page, struct address_space *mapping, 460int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
@@ -489,8 +521,7 @@ void fastcall wait_on_page_bit(struct page *page, int bit_nr)
489EXPORT_SYMBOL(wait_on_page_bit); 521EXPORT_SYMBOL(wait_on_page_bit);
490 522
491/** 523/**
492 * unlock_page() - unlock a locked page 524 * unlock_page - unlock a locked page
493 *
494 * @page: the page 525 * @page: the page
495 * 526 *
496 * Unlocks the page and wakes up sleepers in ___wait_on_page_locked(). 527 * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
@@ -513,8 +544,9 @@ void fastcall unlock_page(struct page *page)
513} 544}
514EXPORT_SYMBOL(unlock_page); 545EXPORT_SYMBOL(unlock_page);
515 546
516/* 547/**
517 * End writeback against a page. 548 * end_page_writeback - end writeback against a page
549 * @page: the page
518 */ 550 */
519void end_page_writeback(struct page *page) 551void end_page_writeback(struct page *page)
520{ 552{
@@ -527,10 +559,11 @@ void end_page_writeback(struct page *page)
527} 559}
528EXPORT_SYMBOL(end_page_writeback); 560EXPORT_SYMBOL(end_page_writeback);
529 561
530/* 562/**
531 * Get a lock on the page, assuming we need to sleep to get it. 563 * __lock_page - get a lock on the page, assuming we need to sleep to get it
564 * @page: the page to lock
532 * 565 *
533 * Ugly: running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some 566 * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some
534 * random driver's requestfn sets TASK_RUNNING, we could busywait. However 567 * random driver's requestfn sets TASK_RUNNING, we could busywait. However
535 * chances are that on the second loop, the block layer's plug list is empty, 568 * chances are that on the second loop, the block layer's plug list is empty,
536 * so sync_page() will then return in state TASK_UNINTERRUPTIBLE. 569 * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
@@ -544,8 +577,12 @@ void fastcall __lock_page(struct page *page)
544} 577}
545EXPORT_SYMBOL(__lock_page); 578EXPORT_SYMBOL(__lock_page);
546 579
547/* 580/**
548 * a rather lightweight function, finding and getting a reference to a 581 * find_get_page - find and get a page reference
582 * @mapping: the address_space to search
583 * @offset: the page index
584 *
585 * A rather lightweight function, finding and getting a reference to a
549 * hashed page atomically. 586 * hashed page atomically.
550 */ 587 */
551struct page * find_get_page(struct address_space *mapping, unsigned long offset) 588struct page * find_get_page(struct address_space *mapping, unsigned long offset)
@@ -559,11 +596,14 @@ struct page * find_get_page(struct address_space *mapping, unsigned long offset)
559 read_unlock_irq(&mapping->tree_lock); 596 read_unlock_irq(&mapping->tree_lock);
560 return page; 597 return page;
561} 598}
562
563EXPORT_SYMBOL(find_get_page); 599EXPORT_SYMBOL(find_get_page);
564 600
565/* 601/**
566 * Same as above, but trylock it instead of incrementing the count. 602 * find_trylock_page - find and lock a page
603 * @mapping: the address_space to search
604 * @offset: the page index
605 *
606 * Same as find_get_page(), but trylock it instead of incrementing the count.
567 */ 607 */
568struct page *find_trylock_page(struct address_space *mapping, unsigned long offset) 608struct page *find_trylock_page(struct address_space *mapping, unsigned long offset)
569{ 609{
@@ -576,12 +616,10 @@ struct page *find_trylock_page(struct address_space *mapping, unsigned long offs
576 read_unlock_irq(&mapping->tree_lock); 616 read_unlock_irq(&mapping->tree_lock);
577 return page; 617 return page;
578} 618}
579
580EXPORT_SYMBOL(find_trylock_page); 619EXPORT_SYMBOL(find_trylock_page);
581 620
582/** 621/**
583 * find_lock_page - locate, pin and lock a pagecache page 622 * find_lock_page - locate, pin and lock a pagecache page
584 *
585 * @mapping: the address_space to search 623 * @mapping: the address_space to search
586 * @offset: the page index 624 * @offset: the page index
587 * 625 *
@@ -617,12 +655,10 @@ repeat:
617 read_unlock_irq(&mapping->tree_lock); 655 read_unlock_irq(&mapping->tree_lock);
618 return page; 656 return page;
619} 657}
620
621EXPORT_SYMBOL(find_lock_page); 658EXPORT_SYMBOL(find_lock_page);
622 659
623/** 660/**
624 * find_or_create_page - locate or add a pagecache page 661 * find_or_create_page - locate or add a pagecache page
625 *
626 * @mapping: the page's address_space 662 * @mapping: the page's address_space
627 * @index: the page's index into the mapping 663 * @index: the page's index into the mapping
628 * @gfp_mask: page allocation mode 664 * @gfp_mask: page allocation mode
@@ -663,7 +699,6 @@ repeat:
663 page_cache_release(cached_page); 699 page_cache_release(cached_page);
664 return page; 700 return page;
665} 701}
666
667EXPORT_SYMBOL(find_or_create_page); 702EXPORT_SYMBOL(find_or_create_page);
668 703
669/** 704/**
@@ -729,9 +764,16 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
729 return i; 764 return i;
730} 765}
731 766
732/* 767/**
768 * find_get_pages_tag - find and return pages that match @tag
769 * @mapping: the address_space to search
770 * @index: the starting page index
771 * @tag: the tag index
772 * @nr_pages: the maximum number of pages
773 * @pages: where the resulting pages are placed
774 *
733 * Like find_get_pages, except we only return pages which are tagged with 775 * Like find_get_pages, except we only return pages which are tagged with
734 * `tag'. We update *index to index the next page for the traversal. 776 * @tag. We update @index to index the next page for the traversal.
735 */ 777 */
736unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, 778unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
737 int tag, unsigned int nr_pages, struct page **pages) 779 int tag, unsigned int nr_pages, struct page **pages)
@@ -750,7 +792,11 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
750 return ret; 792 return ret;
751} 793}
752 794
753/* 795/**
796 * grab_cache_page_nowait - returns locked page at given index in given cache
797 * @mapping: target address_space
798 * @index: the page index
799 *
754 * Same as grab_cache_page, but do not wait if the page is unavailable. 800 * Same as grab_cache_page, but do not wait if the page is unavailable.
755 * This is intended for speculative data generators, where the data can 801 * This is intended for speculative data generators, where the data can
756 * be regenerated if the page couldn't be grabbed. This routine should 802 * be regenerated if the page couldn't be grabbed. This routine should
@@ -779,19 +825,51 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
779 } 825 }
780 return page; 826 return page;
781} 827}
782
783EXPORT_SYMBOL(grab_cache_page_nowait); 828EXPORT_SYMBOL(grab_cache_page_nowait);
784 829
785/* 830/*
831 * CD/DVDs are error prone. When a medium error occurs, the driver may fail
832 * a _large_ part of the i/o request. Imagine the worst scenario:
833 *
834 * ---R__________________________________________B__________
835 * ^ reading here ^ bad block(assume 4k)
836 *
837 * read(R) => miss => readahead(R...B) => media error => frustrating retries
838 * => failing the whole request => read(R) => read(R+1) =>
839 * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) =>
840 * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) =>
841 * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ......
842 *
843 * It is going insane. Fix it by quickly scaling down the readahead size.
844 */
845static void shrink_readahead_size_eio(struct file *filp,
846 struct file_ra_state *ra)
847{
848 if (!ra->ra_pages)
849 return;
850
851 ra->ra_pages /= 4;
852 printk(KERN_WARNING "Reducing readahead size to %luK\n",
853 ra->ra_pages << (PAGE_CACHE_SHIFT - 10));
854}
855
856/**
857 * do_generic_mapping_read - generic file read routine
858 * @mapping: address_space to be read
859 * @_ra: file's readahead state
860 * @filp: the file to read
861 * @ppos: current file position
862 * @desc: read_descriptor
863 * @actor: read method
864 *
786 * This is a generic file read routine, and uses the 865 * This is a generic file read routine, and uses the
787 * mapping->a_ops->readpage() function for the actual low-level 866 * mapping->a_ops->readpage() function for the actual low-level stuff.
788 * stuff.
789 * 867 *
790 * This is really ugly. But the goto's actually try to clarify some 868 * This is really ugly. But the goto's actually try to clarify some
791 * of the logic when it comes to error handling etc. 869 * of the logic when it comes to error handling etc.
792 * 870 *
793 * Note the struct file* is only passed for the use of readpage. It may be 871 * Note the struct file* is only passed for the use of readpage.
794 * NULL. 872 * It may be NULL.
795 */ 873 */
796void do_generic_mapping_read(struct address_space *mapping, 874void do_generic_mapping_read(struct address_space *mapping,
797 struct file_ra_state *_ra, 875 struct file_ra_state *_ra,
@@ -932,6 +1010,7 @@ readpage:
932 } 1010 }
933 unlock_page(page); 1011 unlock_page(page);
934 error = -EIO; 1012 error = -EIO;
1013 shrink_readahead_size_eio(filp, &ra);
935 goto readpage_error; 1014 goto readpage_error;
936 } 1015 }
937 unlock_page(page); 1016 unlock_page(page);
@@ -1004,7 +1083,6 @@ out:
1004 if (filp) 1083 if (filp)
1005 file_accessed(filp); 1084 file_accessed(filp);
1006} 1085}
1007
1008EXPORT_SYMBOL(do_generic_mapping_read); 1086EXPORT_SYMBOL(do_generic_mapping_read);
1009 1087
1010int file_read_actor(read_descriptor_t *desc, struct page *page, 1088int file_read_actor(read_descriptor_t *desc, struct page *page,
@@ -1046,7 +1124,13 @@ success:
1046} 1124}
1047EXPORT_SYMBOL_GPL(file_read_actor); 1125EXPORT_SYMBOL_GPL(file_read_actor);
1048 1126
1049/* 1127/**
1128 * __generic_file_aio_read - generic filesystem read routine
1129 * @iocb: kernel I/O control block
1130 * @iov: io vector request
1131 * @nr_segs: number of segments in the iovec
1132 * @ppos: current file position
1133 *
1050 * This is the "read()" routine for all filesystems 1134 * This is the "read()" routine for all filesystems
1051 * that can use the page cache directly. 1135 * that can use the page cache directly.
1052 */ 1136 */
@@ -1125,7 +1209,6 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1125out: 1209out:
1126 return retval; 1210 return retval;
1127} 1211}
1128
1129EXPORT_SYMBOL(__generic_file_aio_read); 1212EXPORT_SYMBOL(__generic_file_aio_read);
1130 1213
1131ssize_t 1214ssize_t
@@ -1136,7 +1219,6 @@ generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
1136 BUG_ON(iocb->ki_pos != pos); 1219 BUG_ON(iocb->ki_pos != pos);
1137 return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); 1220 return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
1138} 1221}
1139
1140EXPORT_SYMBOL(generic_file_aio_read); 1222EXPORT_SYMBOL(generic_file_aio_read);
1141 1223
1142ssize_t 1224ssize_t
@@ -1152,7 +1234,6 @@ generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppo
1152 ret = wait_on_sync_kiocb(&kiocb); 1234 ret = wait_on_sync_kiocb(&kiocb);
1153 return ret; 1235 return ret;
1154} 1236}
1155
1156EXPORT_SYMBOL(generic_file_read); 1237EXPORT_SYMBOL(generic_file_read);
1157 1238
1158int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) 1239int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
@@ -1193,7 +1274,6 @@ ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
1193 return desc.written; 1274 return desc.written;
1194 return desc.error; 1275 return desc.error;
1195} 1276}
1196
1197EXPORT_SYMBOL(generic_file_sendfile); 1277EXPORT_SYMBOL(generic_file_sendfile);
1198 1278
1199static ssize_t 1279static ssize_t
@@ -1229,11 +1309,15 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
1229} 1309}
1230 1310
1231#ifdef CONFIG_MMU 1311#ifdef CONFIG_MMU
1232/* 1312static int FASTCALL(page_cache_read(struct file * file, unsigned long offset));
1313/**
1314 * page_cache_read - adds requested page to the page cache if not already there
1315 * @file: file to read
1316 * @offset: page index
1317 *
1233 * This adds the requested page to the page cache if it isn't already there, 1318 * This adds the requested page to the page cache if it isn't already there,
1234 * and schedules an I/O to read in its contents from disk. 1319 * and schedules an I/O to read in its contents from disk.
1235 */ 1320 */
1236static int FASTCALL(page_cache_read(struct file * file, unsigned long offset));
1237static int fastcall page_cache_read(struct file * file, unsigned long offset) 1321static int fastcall page_cache_read(struct file * file, unsigned long offset)
1238{ 1322{
1239 struct address_space *mapping = file->f_mapping; 1323 struct address_space *mapping = file->f_mapping;
@@ -1260,7 +1344,12 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
1260 1344
1261#define MMAP_LOTSAMISS (100) 1345#define MMAP_LOTSAMISS (100)
1262 1346
1263/* 1347/**
1348 * filemap_nopage - read in file data for page fault handling
1349 * @area: the applicable vm_area
1350 * @address: target address to read in
1351 * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL
1352 *
1264 * filemap_nopage() is invoked via the vma operations vector for a 1353 * filemap_nopage() is invoked via the vma operations vector for a
1265 * mapped memory region to read in file data during a page fault. 1354 * mapped memory region to read in file data during a page fault.
1266 * 1355 *
@@ -1327,7 +1416,7 @@ retry_find:
1327 */ 1416 */
1328 if (!did_readaround) { 1417 if (!did_readaround) {
1329 majmin = VM_FAULT_MAJOR; 1418 majmin = VM_FAULT_MAJOR;
1330 inc_page_state(pgmajfault); 1419 count_vm_event(PGMAJFAULT);
1331 } 1420 }
1332 did_readaround = 1; 1421 did_readaround = 1;
1333 ra_pages = max_sane_readahead(file->f_ra.ra_pages); 1422 ra_pages = max_sane_readahead(file->f_ra.ra_pages);
@@ -1398,7 +1487,7 @@ no_cached_page:
1398page_not_uptodate: 1487page_not_uptodate:
1399 if (!did_readaround) { 1488 if (!did_readaround) {
1400 majmin = VM_FAULT_MAJOR; 1489 majmin = VM_FAULT_MAJOR;
1401 inc_page_state(pgmajfault); 1490 count_vm_event(PGMAJFAULT);
1402 } 1491 }
1403 lock_page(page); 1492 lock_page(page);
1404 1493
@@ -1460,10 +1549,10 @@ page_not_uptodate:
1460 * Things didn't work out. Return zero to tell the 1549 * Things didn't work out. Return zero to tell the
1461 * mm layer so, possibly freeing the page cache page first. 1550 * mm layer so, possibly freeing the page cache page first.
1462 */ 1551 */
1552 shrink_readahead_size_eio(file, ra);
1463 page_cache_release(page); 1553 page_cache_release(page);
1464 return NULL; 1554 return NULL;
1465} 1555}
1466
1467EXPORT_SYMBOL(filemap_nopage); 1556EXPORT_SYMBOL(filemap_nopage);
1468 1557
1469static struct page * filemap_getpage(struct file *file, unsigned long pgoff, 1558static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
@@ -1717,7 +1806,13 @@ repeat:
1717 return page; 1806 return page;
1718} 1807}
1719 1808
1720/* 1809/**
1810 * read_cache_page - read into page cache, fill it if needed
1811 * @mapping: the page's address_space
1812 * @index: the page index
1813 * @filler: function to perform the read
1814 * @data: destination for read data
1815 *
1721 * Read into the page cache. If a page already exists, 1816 * Read into the page cache. If a page already exists,
1722 * and PageUptodate() is not set, try to fill the page. 1817 * and PageUptodate() is not set, try to fill the page.
1723 */ 1818 */
@@ -1755,7 +1850,6 @@ retry:
1755 out: 1850 out:
1756 return page; 1851 return page;
1757} 1852}
1758
1759EXPORT_SYMBOL(read_cache_page); 1853EXPORT_SYMBOL(read_cache_page);
1760 1854
1761/* 1855/*
@@ -1826,7 +1920,7 @@ int remove_suid(struct dentry *dentry)
1826EXPORT_SYMBOL(remove_suid); 1920EXPORT_SYMBOL(remove_suid);
1827 1921
1828size_t 1922size_t
1829__filemap_copy_from_user_iovec(char *vaddr, 1923__filemap_copy_from_user_iovec_inatomic(char *vaddr,
1830 const struct iovec *iov, size_t base, size_t bytes) 1924 const struct iovec *iov, size_t base, size_t bytes)
1831{ 1925{
1832 size_t copied = 0, left = 0; 1926 size_t copied = 0, left = 0;
@@ -1836,18 +1930,14 @@ __filemap_copy_from_user_iovec(char *vaddr,
1836 int copy = min(bytes, iov->iov_len - base); 1930 int copy = min(bytes, iov->iov_len - base);
1837 1931
1838 base = 0; 1932 base = 0;
1839 left = __copy_from_user_inatomic(vaddr, buf, copy); 1933 left = __copy_from_user_inatomic_nocache(vaddr, buf, copy);
1840 copied += copy; 1934 copied += copy;
1841 bytes -= copy; 1935 bytes -= copy;
1842 vaddr += copy; 1936 vaddr += copy;
1843 iov++; 1937 iov++;
1844 1938
1845 if (unlikely(left)) { 1939 if (unlikely(left))
1846 /* zero the rest of the target like __copy_from_user */
1847 if (bytes)
1848 memset(vaddr, 0, bytes);
1849 break; 1940 break;
1850 }
1851 } 1941 }
1852 return copied - left; 1942 return copied - left;
1853} 1943}
@@ -1855,7 +1945,7 @@ __filemap_copy_from_user_iovec(char *vaddr,
1855/* 1945/*
1856 * Performs necessary checks before doing a write 1946 * Performs necessary checks before doing a write
1857 * 1947 *
1858 * Can adjust writing position aor amount of bytes to write. 1948 * Can adjust writing position or amount of bytes to write.
1859 * Returns appropriate error code that caller should return or 1949 * Returns appropriate error code that caller should return or
1860 * zero in case that write should be allowed. 1950 * zero in case that write should be allowed.
1861 */ 1951 */
@@ -1979,7 +2069,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1979{ 2069{
1980 struct file *file = iocb->ki_filp; 2070 struct file *file = iocb->ki_filp;
1981 struct address_space * mapping = file->f_mapping; 2071 struct address_space * mapping = file->f_mapping;
1982 struct address_space_operations *a_ops = mapping->a_ops; 2072 const struct address_space_operations *a_ops = mapping->a_ops;
1983 struct inode *inode = mapping->host; 2073 struct inode *inode = mapping->host;
1984 long status = 0; 2074 long status = 0;
1985 struct page *page; 2075 struct page *page;
@@ -2005,14 +2095,21 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2005 do { 2095 do {
2006 unsigned long index; 2096 unsigned long index;
2007 unsigned long offset; 2097 unsigned long offset;
2008 unsigned long maxlen;
2009 size_t copied; 2098 size_t copied;
2010 2099
2011 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 2100 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
2012 index = pos >> PAGE_CACHE_SHIFT; 2101 index = pos >> PAGE_CACHE_SHIFT;
2013 bytes = PAGE_CACHE_SIZE - offset; 2102 bytes = PAGE_CACHE_SIZE - offset;
2014 if (bytes > count) 2103
2015 bytes = count; 2104 /* Limit the size of the copy to the caller's write size */
2105 bytes = min(bytes, count);
2106
2107 /*
2108 * Limit the size of the copy to that of the current segment,
2109 * because fault_in_pages_readable() doesn't know how to walk
2110 * segments.
2111 */
2112 bytes = min(bytes, cur_iov->iov_len - iov_base);
2016 2113
2017 /* 2114 /*
2018 * Bring in the user page that we will copy from _first_. 2115 * Bring in the user page that we will copy from _first_.
@@ -2020,10 +2117,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2020 * same page as we're writing to, without it being marked 2117 * same page as we're writing to, without it being marked
2021 * up-to-date. 2118 * up-to-date.
2022 */ 2119 */
2023 maxlen = cur_iov->iov_len - iov_base; 2120 fault_in_pages_readable(buf, bytes);
2024 if (maxlen > bytes)
2025 maxlen = bytes;
2026 fault_in_pages_readable(buf, maxlen);
2027 2121
2028 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); 2122 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
2029 if (!page) { 2123 if (!page) {
@@ -2031,6 +2125,12 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2031 break; 2125 break;
2032 } 2126 }
2033 2127
2128 if (unlikely(bytes == 0)) {
2129 status = 0;
2130 copied = 0;
2131 goto zero_length_segment;
2132 }
2133
2034 status = a_ops->prepare_write(file, page, offset, offset+bytes); 2134 status = a_ops->prepare_write(file, page, offset, offset+bytes);
2035 if (unlikely(status)) { 2135 if (unlikely(status)) {
2036 loff_t isize = i_size_read(inode); 2136 loff_t isize = i_size_read(inode);
@@ -2060,7 +2160,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2060 page_cache_release(page); 2160 page_cache_release(page);
2061 continue; 2161 continue;
2062 } 2162 }
2063 if (likely(copied > 0)) { 2163zero_length_segment:
2164 if (likely(copied >= 0)) {
2064 if (!status) 2165 if (!status)
2065 status = copied; 2166 status = copied;
2066 2167
@@ -2125,7 +2226,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
2125 unsigned long nr_segs, loff_t *ppos) 2226 unsigned long nr_segs, loff_t *ppos)
2126{ 2227{
2127 struct file *file = iocb->ki_filp; 2228 struct file *file = iocb->ki_filp;
2128 struct address_space * mapping = file->f_mapping; 2229 const struct address_space * mapping = file->f_mapping;
2129 size_t ocount; /* original count */ 2230 size_t ocount; /* original count */
2130 size_t count; /* after file limit checks */ 2231 size_t count; /* after file limit checks */
2131 struct inode *inode = mapping->host; 2232 struct inode *inode = mapping->host;