diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 84 |
1 files changed, 19 insertions, 65 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index f807afda86f2..c641edf553a9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -80,8 +80,8 @@ | |||
80 | * ->i_mutex | 80 | * ->i_mutex |
81 | * ->i_alloc_sem (various) | 81 | * ->i_alloc_sem (various) |
82 | * | 82 | * |
83 | * ->inode_lock | 83 | * inode_wb_list_lock |
84 | * ->sb_lock (fs/fs-writeback.c) | 84 | * sb_lock (fs/fs-writeback.c) |
85 | * ->mapping->tree_lock (__sync_single_inode) | 85 | * ->mapping->tree_lock (__sync_single_inode) |
86 | * | 86 | * |
87 | * ->i_mmap_lock | 87 | * ->i_mmap_lock |
@@ -98,8 +98,10 @@ | |||
98 | * ->zone.lru_lock (check_pte_range->isolate_lru_page) | 98 | * ->zone.lru_lock (check_pte_range->isolate_lru_page) |
99 | * ->private_lock (page_remove_rmap->set_page_dirty) | 99 | * ->private_lock (page_remove_rmap->set_page_dirty) |
100 | * ->tree_lock (page_remove_rmap->set_page_dirty) | 100 | * ->tree_lock (page_remove_rmap->set_page_dirty) |
101 | * ->inode_lock (page_remove_rmap->set_page_dirty) | 101 | * inode_wb_list_lock (page_remove_rmap->set_page_dirty) |
102 | * ->inode_lock (zap_pte_range->set_page_dirty) | 102 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) |
103 | * inode_wb_list_lock (zap_pte_range->set_page_dirty) | ||
104 | * ->inode->i_lock (zap_pte_range->set_page_dirty) | ||
103 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) | 105 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) |
104 | * | 106 | * |
105 | * (code doesn't rely on that order, so you could switch it around) | 107 | * (code doesn't rely on that order, so you could switch it around) |
@@ -164,45 +166,15 @@ void delete_from_page_cache(struct page *page) | |||
164 | } | 166 | } |
165 | EXPORT_SYMBOL(delete_from_page_cache); | 167 | EXPORT_SYMBOL(delete_from_page_cache); |
166 | 168 | ||
167 | static int sync_page(void *word) | 169 | static int sleep_on_page(void *word) |
168 | { | 170 | { |
169 | struct address_space *mapping; | ||
170 | struct page *page; | ||
171 | |||
172 | page = container_of((unsigned long *)word, struct page, flags); | ||
173 | |||
174 | /* | ||
175 | * page_mapping() is being called without PG_locked held. | ||
176 | * Some knowledge of the state and use of the page is used to | ||
177 | * reduce the requirements down to a memory barrier. | ||
178 | * The danger here is of a stale page_mapping() return value | ||
179 | * indicating a struct address_space different from the one it's | ||
180 | * associated with when it is associated with one. | ||
181 | * After smp_mb(), it's either the correct page_mapping() for | ||
182 | * the page, or an old page_mapping() and the page's own | ||
183 | * page_mapping() has gone NULL. | ||
184 | * The ->sync_page() address_space operation must tolerate | ||
185 | * page_mapping() going NULL. By an amazing coincidence, | ||
186 | * this comes about because none of the users of the page | ||
187 | * in the ->sync_page() methods make essential use of the | ||
188 | * page_mapping(), merely passing the page down to the backing | ||
189 | * device's unplug functions when it's non-NULL, which in turn | ||
190 | * ignore it for all cases but swap, where only page_private(page) is | ||
191 | * of interest. When page_mapping() does go NULL, the entire | ||
192 | * call stack gracefully ignores the page and returns. | ||
193 | * -- wli | ||
194 | */ | ||
195 | smp_mb(); | ||
196 | mapping = page_mapping(page); | ||
197 | if (mapping && mapping->a_ops && mapping->a_ops->sync_page) | ||
198 | mapping->a_ops->sync_page(page); | ||
199 | io_schedule(); | 171 | io_schedule(); |
200 | return 0; | 172 | return 0; |
201 | } | 173 | } |
202 | 174 | ||
203 | static int sync_page_killable(void *word) | 175 | static int sleep_on_page_killable(void *word) |
204 | { | 176 | { |
205 | sync_page(word); | 177 | sleep_on_page(word); |
206 | return fatal_signal_pending(current) ? -EINTR : 0; | 178 | return fatal_signal_pending(current) ? -EINTR : 0; |
207 | } | 179 | } |
208 | 180 | ||
@@ -558,12 +530,6 @@ struct page *__page_cache_alloc(gfp_t gfp) | |||
558 | EXPORT_SYMBOL(__page_cache_alloc); | 530 | EXPORT_SYMBOL(__page_cache_alloc); |
559 | #endif | 531 | #endif |
560 | 532 | ||
561 | static int __sleep_on_page_lock(void *word) | ||
562 | { | ||
563 | io_schedule(); | ||
564 | return 0; | ||
565 | } | ||
566 | |||
567 | /* | 533 | /* |
568 | * In order to wait for pages to become available there must be | 534 | * In order to wait for pages to become available there must be |
569 | * waitqueues associated with pages. By using a hash table of | 535 | * waitqueues associated with pages. By using a hash table of |
@@ -591,7 +557,7 @@ void wait_on_page_bit(struct page *page, int bit_nr) | |||
591 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); | 557 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); |
592 | 558 | ||
593 | if (test_bit(bit_nr, &page->flags)) | 559 | if (test_bit(bit_nr, &page->flags)) |
594 | __wait_on_bit(page_waitqueue(page), &wait, sync_page, | 560 | __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page, |
595 | TASK_UNINTERRUPTIBLE); | 561 | TASK_UNINTERRUPTIBLE); |
596 | } | 562 | } |
597 | EXPORT_SYMBOL(wait_on_page_bit); | 563 | EXPORT_SYMBOL(wait_on_page_bit); |
@@ -655,17 +621,12 @@ EXPORT_SYMBOL(end_page_writeback); | |||
655 | /** | 621 | /** |
656 | * __lock_page - get a lock on the page, assuming we need to sleep to get it | 622 | * __lock_page - get a lock on the page, assuming we need to sleep to get it |
657 | * @page: the page to lock | 623 | * @page: the page to lock |
658 | * | ||
659 | * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some | ||
660 | * random driver's requestfn sets TASK_RUNNING, we could busywait. However | ||
661 | * chances are that on the second loop, the block layer's plug list is empty, | ||
662 | * so sync_page() will then return in state TASK_UNINTERRUPTIBLE. | ||
663 | */ | 624 | */ |
664 | void __lock_page(struct page *page) | 625 | void __lock_page(struct page *page) |
665 | { | 626 | { |
666 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 627 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
667 | 628 | ||
668 | __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page, | 629 | __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page, |
669 | TASK_UNINTERRUPTIBLE); | 630 | TASK_UNINTERRUPTIBLE); |
670 | } | 631 | } |
671 | EXPORT_SYMBOL(__lock_page); | 632 | EXPORT_SYMBOL(__lock_page); |
@@ -675,24 +636,10 @@ int __lock_page_killable(struct page *page) | |||
675 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 636 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
676 | 637 | ||
677 | return __wait_on_bit_lock(page_waitqueue(page), &wait, | 638 | return __wait_on_bit_lock(page_waitqueue(page), &wait, |
678 | sync_page_killable, TASK_KILLABLE); | 639 | sleep_on_page_killable, TASK_KILLABLE); |
679 | } | 640 | } |
680 | EXPORT_SYMBOL_GPL(__lock_page_killable); | 641 | EXPORT_SYMBOL_GPL(__lock_page_killable); |
681 | 642 | ||
682 | /** | ||
683 | * __lock_page_nosync - get a lock on the page, without calling sync_page() | ||
684 | * @page: the page to lock | ||
685 | * | ||
686 | * Variant of lock_page that does not require the caller to hold a reference | ||
687 | * on the page's mapping. | ||
688 | */ | ||
689 | void __lock_page_nosync(struct page *page) | ||
690 | { | ||
691 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | ||
692 | __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, | ||
693 | TASK_UNINTERRUPTIBLE); | ||
694 | } | ||
695 | |||
696 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, | 643 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, |
697 | unsigned int flags) | 644 | unsigned int flags) |
698 | { | 645 | { |
@@ -1407,12 +1354,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1407 | unsigned long seg = 0; | 1354 | unsigned long seg = 0; |
1408 | size_t count; | 1355 | size_t count; |
1409 | loff_t *ppos = &iocb->ki_pos; | 1356 | loff_t *ppos = &iocb->ki_pos; |
1357 | struct blk_plug plug; | ||
1410 | 1358 | ||
1411 | count = 0; | 1359 | count = 0; |
1412 | retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); | 1360 | retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); |
1413 | if (retval) | 1361 | if (retval) |
1414 | return retval; | 1362 | return retval; |
1415 | 1363 | ||
1364 | blk_start_plug(&plug); | ||
1365 | |||
1416 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 1366 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ |
1417 | if (filp->f_flags & O_DIRECT) { | 1367 | if (filp->f_flags & O_DIRECT) { |
1418 | loff_t size; | 1368 | loff_t size; |
@@ -1485,6 +1435,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1485 | break; | 1435 | break; |
1486 | } | 1436 | } |
1487 | out: | 1437 | out: |
1438 | blk_finish_plug(&plug); | ||
1488 | return retval; | 1439 | return retval; |
1489 | } | 1440 | } |
1490 | EXPORT_SYMBOL(generic_file_aio_read); | 1441 | EXPORT_SYMBOL(generic_file_aio_read); |
@@ -2596,11 +2547,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2596 | { | 2547 | { |
2597 | struct file *file = iocb->ki_filp; | 2548 | struct file *file = iocb->ki_filp; |
2598 | struct inode *inode = file->f_mapping->host; | 2549 | struct inode *inode = file->f_mapping->host; |
2550 | struct blk_plug plug; | ||
2599 | ssize_t ret; | 2551 | ssize_t ret; |
2600 | 2552 | ||
2601 | BUG_ON(iocb->ki_pos != pos); | 2553 | BUG_ON(iocb->ki_pos != pos); |
2602 | 2554 | ||
2603 | mutex_lock(&inode->i_mutex); | 2555 | mutex_lock(&inode->i_mutex); |
2556 | blk_start_plug(&plug); | ||
2604 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | 2557 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
2605 | mutex_unlock(&inode->i_mutex); | 2558 | mutex_unlock(&inode->i_mutex); |
2606 | 2559 | ||
@@ -2611,6 +2564,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2611 | if (err < 0 && ret > 0) | 2564 | if (err < 0 && ret > 0) |
2612 | ret = err; | 2565 | ret = err; |
2613 | } | 2566 | } |
2567 | blk_finish_plug(&plug); | ||
2614 | return ret; | 2568 | return ret; |
2615 | } | 2569 | } |
2616 | EXPORT_SYMBOL(generic_file_aio_write); | 2570 | EXPORT_SYMBOL(generic_file_aio_write); |