diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-24 13:16:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-24 13:16:26 -0400 |
commit | 6c5103890057b1bb781b26b7aae38d33e4c517d8 (patch) | |
tree | e6e57961dcddcb5841acb34956e70b9dc696a880 /mm/filemap.c | |
parent | 3dab04e6978e358ad2307bca563fabd6c5d2c58b (diff) | |
parent | 9d2e157d970a73b3f270b631828e03eb452d525e (diff) |
Merge branch 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block: (65 commits)
Documentation/iostats.txt: bit-size reference etc.
cfq-iosched: removing unnecessary think time checking
cfq-iosched: Don't clear queue stats when preempt.
blk-throttle: Reset group slice when limits are changed
blk-cgroup: Only give unaccounted_time under debug
cfq-iosched: Don't set active queue in preempt
block: fix non-atomic access to genhd inflight structures
block: attempt to merge with existing requests on plug flush
block: NULL dereference on error path in __blkdev_get()
cfq-iosched: Don't update group weights when on service tree
fs: assign sb->s_bdi to default_backing_dev_info if the bdi is going away
block: Require subsystems to explicitly allocate bio_set integrity mempool
jbd2: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging
jbd: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging
fs: make fsync_buffers_list() plug
mm: make generic_writepages() use plugging
blk-cgroup: Add unaccounted time to timeslice_used.
block: fixup plugging stubs for !CONFIG_BLOCK
block: remove obsolete comments for blkdev_issue_zeroout.
blktrace: Use rq->cmd_flags directly in blk_add_trace_rq.
...
Fix up conflicts in fs/{aio.c,super.c}
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 74 |
1 files changed, 13 insertions, 61 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index f807afda86f2..04d1992fd86b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -164,45 +164,15 @@ void delete_from_page_cache(struct page *page) | |||
164 | } | 164 | } |
165 | EXPORT_SYMBOL(delete_from_page_cache); | 165 | EXPORT_SYMBOL(delete_from_page_cache); |
166 | 166 | ||
167 | static int sync_page(void *word) | 167 | static int sleep_on_page(void *word) |
168 | { | 168 | { |
169 | struct address_space *mapping; | ||
170 | struct page *page; | ||
171 | |||
172 | page = container_of((unsigned long *)word, struct page, flags); | ||
173 | |||
174 | /* | ||
175 | * page_mapping() is being called without PG_locked held. | ||
176 | * Some knowledge of the state and use of the page is used to | ||
177 | * reduce the requirements down to a memory barrier. | ||
178 | * The danger here is of a stale page_mapping() return value | ||
179 | * indicating a struct address_space different from the one it's | ||
180 | * associated with when it is associated with one. | ||
181 | * After smp_mb(), it's either the correct page_mapping() for | ||
182 | * the page, or an old page_mapping() and the page's own | ||
183 | * page_mapping() has gone NULL. | ||
184 | * The ->sync_page() address_space operation must tolerate | ||
185 | * page_mapping() going NULL. By an amazing coincidence, | ||
186 | * this comes about because none of the users of the page | ||
187 | * in the ->sync_page() methods make essential use of the | ||
188 | * page_mapping(), merely passing the page down to the backing | ||
189 | * device's unplug functions when it's non-NULL, which in turn | ||
190 | * ignore it for all cases but swap, where only page_private(page) is | ||
191 | * of interest. When page_mapping() does go NULL, the entire | ||
192 | * call stack gracefully ignores the page and returns. | ||
193 | * -- wli | ||
194 | */ | ||
195 | smp_mb(); | ||
196 | mapping = page_mapping(page); | ||
197 | if (mapping && mapping->a_ops && mapping->a_ops->sync_page) | ||
198 | mapping->a_ops->sync_page(page); | ||
199 | io_schedule(); | 169 | io_schedule(); |
200 | return 0; | 170 | return 0; |
201 | } | 171 | } |
202 | 172 | ||
203 | static int sync_page_killable(void *word) | 173 | static int sleep_on_page_killable(void *word) |
204 | { | 174 | { |
205 | sync_page(word); | 175 | sleep_on_page(word); |
206 | return fatal_signal_pending(current) ? -EINTR : 0; | 176 | return fatal_signal_pending(current) ? -EINTR : 0; |
207 | } | 177 | } |
208 | 178 | ||
@@ -558,12 +528,6 @@ struct page *__page_cache_alloc(gfp_t gfp) | |||
558 | EXPORT_SYMBOL(__page_cache_alloc); | 528 | EXPORT_SYMBOL(__page_cache_alloc); |
559 | #endif | 529 | #endif |
560 | 530 | ||
561 | static int __sleep_on_page_lock(void *word) | ||
562 | { | ||
563 | io_schedule(); | ||
564 | return 0; | ||
565 | } | ||
566 | |||
567 | /* | 531 | /* |
568 | * In order to wait for pages to become available there must be | 532 | * In order to wait for pages to become available there must be |
569 | * waitqueues associated with pages. By using a hash table of | 533 | * waitqueues associated with pages. By using a hash table of |
@@ -591,7 +555,7 @@ void wait_on_page_bit(struct page *page, int bit_nr) | |||
591 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); | 555 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); |
592 | 556 | ||
593 | if (test_bit(bit_nr, &page->flags)) | 557 | if (test_bit(bit_nr, &page->flags)) |
594 | __wait_on_bit(page_waitqueue(page), &wait, sync_page, | 558 | __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page, |
595 | TASK_UNINTERRUPTIBLE); | 559 | TASK_UNINTERRUPTIBLE); |
596 | } | 560 | } |
597 | EXPORT_SYMBOL(wait_on_page_bit); | 561 | EXPORT_SYMBOL(wait_on_page_bit); |
@@ -655,17 +619,12 @@ EXPORT_SYMBOL(end_page_writeback); | |||
655 | /** | 619 | /** |
656 | * __lock_page - get a lock on the page, assuming we need to sleep to get it | 620 | * __lock_page - get a lock on the page, assuming we need to sleep to get it |
657 | * @page: the page to lock | 621 | * @page: the page to lock |
658 | * | ||
659 | * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some | ||
660 | * random driver's requestfn sets TASK_RUNNING, we could busywait. However | ||
661 | * chances are that on the second loop, the block layer's plug list is empty, | ||
662 | * so sync_page() will then return in state TASK_UNINTERRUPTIBLE. | ||
663 | */ | 622 | */ |
664 | void __lock_page(struct page *page) | 623 | void __lock_page(struct page *page) |
665 | { | 624 | { |
666 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 625 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
667 | 626 | ||
668 | __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page, | 627 | __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page, |
669 | TASK_UNINTERRUPTIBLE); | 628 | TASK_UNINTERRUPTIBLE); |
670 | } | 629 | } |
671 | EXPORT_SYMBOL(__lock_page); | 630 | EXPORT_SYMBOL(__lock_page); |
@@ -675,24 +634,10 @@ int __lock_page_killable(struct page *page) | |||
675 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 634 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
676 | 635 | ||
677 | return __wait_on_bit_lock(page_waitqueue(page), &wait, | 636 | return __wait_on_bit_lock(page_waitqueue(page), &wait, |
678 | sync_page_killable, TASK_KILLABLE); | 637 | sleep_on_page_killable, TASK_KILLABLE); |
679 | } | 638 | } |
680 | EXPORT_SYMBOL_GPL(__lock_page_killable); | 639 | EXPORT_SYMBOL_GPL(__lock_page_killable); |
681 | 640 | ||
682 | /** | ||
683 | * __lock_page_nosync - get a lock on the page, without calling sync_page() | ||
684 | * @page: the page to lock | ||
685 | * | ||
686 | * Variant of lock_page that does not require the caller to hold a reference | ||
687 | * on the page's mapping. | ||
688 | */ | ||
689 | void __lock_page_nosync(struct page *page) | ||
690 | { | ||
691 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | ||
692 | __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, | ||
693 | TASK_UNINTERRUPTIBLE); | ||
694 | } | ||
695 | |||
696 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, | 641 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, |
697 | unsigned int flags) | 642 | unsigned int flags) |
698 | { | 643 | { |
@@ -1407,12 +1352,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1407 | unsigned long seg = 0; | 1352 | unsigned long seg = 0; |
1408 | size_t count; | 1353 | size_t count; |
1409 | loff_t *ppos = &iocb->ki_pos; | 1354 | loff_t *ppos = &iocb->ki_pos; |
1355 | struct blk_plug plug; | ||
1410 | 1356 | ||
1411 | count = 0; | 1357 | count = 0; |
1412 | retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); | 1358 | retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); |
1413 | if (retval) | 1359 | if (retval) |
1414 | return retval; | 1360 | return retval; |
1415 | 1361 | ||
1362 | blk_start_plug(&plug); | ||
1363 | |||
1416 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 1364 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ |
1417 | if (filp->f_flags & O_DIRECT) { | 1365 | if (filp->f_flags & O_DIRECT) { |
1418 | loff_t size; | 1366 | loff_t size; |
@@ -1485,6 +1433,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1485 | break; | 1433 | break; |
1486 | } | 1434 | } |
1487 | out: | 1435 | out: |
1436 | blk_finish_plug(&plug); | ||
1488 | return retval; | 1437 | return retval; |
1489 | } | 1438 | } |
1490 | EXPORT_SYMBOL(generic_file_aio_read); | 1439 | EXPORT_SYMBOL(generic_file_aio_read); |
@@ -2596,11 +2545,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2596 | { | 2545 | { |
2597 | struct file *file = iocb->ki_filp; | 2546 | struct file *file = iocb->ki_filp; |
2598 | struct inode *inode = file->f_mapping->host; | 2547 | struct inode *inode = file->f_mapping->host; |
2548 | struct blk_plug plug; | ||
2599 | ssize_t ret; | 2549 | ssize_t ret; |
2600 | 2550 | ||
2601 | BUG_ON(iocb->ki_pos != pos); | 2551 | BUG_ON(iocb->ki_pos != pos); |
2602 | 2552 | ||
2603 | mutex_lock(&inode->i_mutex); | 2553 | mutex_lock(&inode->i_mutex); |
2554 | blk_start_plug(&plug); | ||
2604 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | 2555 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
2605 | mutex_unlock(&inode->i_mutex); | 2556 | mutex_unlock(&inode->i_mutex); |
2606 | 2557 | ||
@@ -2611,6 +2562,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2611 | if (err < 0 && ret > 0) | 2562 | if (err < 0 && ret > 0) |
2612 | ret = err; | 2563 | ret = err; |
2613 | } | 2564 | } |
2565 | blk_finish_plug(&plug); | ||
2614 | return ret; | 2566 | return ret; |
2615 | } | 2567 | } |
2616 | EXPORT_SYMBOL(generic_file_aio_write); | 2568 | EXPORT_SYMBOL(generic_file_aio_write); |