aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c84
1 files changed, 19 insertions, 65 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index f807afda86f2..c641edf553a9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -80,8 +80,8 @@
80 * ->i_mutex 80 * ->i_mutex
81 * ->i_alloc_sem (various) 81 * ->i_alloc_sem (various)
82 * 82 *
83 * ->inode_lock 83 * inode_wb_list_lock
84 * ->sb_lock (fs/fs-writeback.c) 84 * sb_lock (fs/fs-writeback.c)
85 * ->mapping->tree_lock (__sync_single_inode) 85 * ->mapping->tree_lock (__sync_single_inode)
86 * 86 *
87 * ->i_mmap_lock 87 * ->i_mmap_lock
@@ -98,8 +98,10 @@
98 * ->zone.lru_lock (check_pte_range->isolate_lru_page) 98 * ->zone.lru_lock (check_pte_range->isolate_lru_page)
99 * ->private_lock (page_remove_rmap->set_page_dirty) 99 * ->private_lock (page_remove_rmap->set_page_dirty)
100 * ->tree_lock (page_remove_rmap->set_page_dirty) 100 * ->tree_lock (page_remove_rmap->set_page_dirty)
101 * ->inode_lock (page_remove_rmap->set_page_dirty) 101 * inode_wb_list_lock (page_remove_rmap->set_page_dirty)
102 * ->inode_lock (zap_pte_range->set_page_dirty) 102 * ->inode->i_lock (page_remove_rmap->set_page_dirty)
103 * inode_wb_list_lock (zap_pte_range->set_page_dirty)
104 * ->inode->i_lock (zap_pte_range->set_page_dirty)
103 * ->private_lock (zap_pte_range->__set_page_dirty_buffers) 105 * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
104 * 106 *
105 * (code doesn't rely on that order, so you could switch it around) 107 * (code doesn't rely on that order, so you could switch it around)
@@ -164,45 +166,15 @@ void delete_from_page_cache(struct page *page)
164} 166}
165EXPORT_SYMBOL(delete_from_page_cache); 167EXPORT_SYMBOL(delete_from_page_cache);
166 168
167static int sync_page(void *word) 169static int sleep_on_page(void *word)
168{ 170{
169 struct address_space *mapping;
170 struct page *page;
171
172 page = container_of((unsigned long *)word, struct page, flags);
173
174 /*
175 * page_mapping() is being called without PG_locked held.
176 * Some knowledge of the state and use of the page is used to
177 * reduce the requirements down to a memory barrier.
178 * The danger here is of a stale page_mapping() return value
179 * indicating a struct address_space different from the one it's
180 * associated with when it is associated with one.
181 * After smp_mb(), it's either the correct page_mapping() for
182 * the page, or an old page_mapping() and the page's own
183 * page_mapping() has gone NULL.
184 * The ->sync_page() address_space operation must tolerate
185 * page_mapping() going NULL. By an amazing coincidence,
186 * this comes about because none of the users of the page
187 * in the ->sync_page() methods make essential use of the
188 * page_mapping(), merely passing the page down to the backing
189 * device's unplug functions when it's non-NULL, which in turn
190 * ignore it for all cases but swap, where only page_private(page) is
191 * of interest. When page_mapping() does go NULL, the entire
192 * call stack gracefully ignores the page and returns.
193 * -- wli
194 */
195 smp_mb();
196 mapping = page_mapping(page);
197 if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
198 mapping->a_ops->sync_page(page);
199 io_schedule(); 171 io_schedule();
200 return 0; 172 return 0;
201} 173}
202 174
203static int sync_page_killable(void *word) 175static int sleep_on_page_killable(void *word)
204{ 176{
205 sync_page(word); 177 sleep_on_page(word);
206 return fatal_signal_pending(current) ? -EINTR : 0; 178 return fatal_signal_pending(current) ? -EINTR : 0;
207} 179}
208 180
@@ -558,12 +530,6 @@ struct page *__page_cache_alloc(gfp_t gfp)
558EXPORT_SYMBOL(__page_cache_alloc); 530EXPORT_SYMBOL(__page_cache_alloc);
559#endif 531#endif
560 532
561static int __sleep_on_page_lock(void *word)
562{
563 io_schedule();
564 return 0;
565}
566
567/* 533/*
568 * In order to wait for pages to become available there must be 534 * In order to wait for pages to become available there must be
569 * waitqueues associated with pages. By using a hash table of 535 * waitqueues associated with pages. By using a hash table of
@@ -591,7 +557,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
591 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 557 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
592 558
593 if (test_bit(bit_nr, &page->flags)) 559 if (test_bit(bit_nr, &page->flags))
594 __wait_on_bit(page_waitqueue(page), &wait, sync_page, 560 __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
595 TASK_UNINTERRUPTIBLE); 561 TASK_UNINTERRUPTIBLE);
596} 562}
597EXPORT_SYMBOL(wait_on_page_bit); 563EXPORT_SYMBOL(wait_on_page_bit);
@@ -655,17 +621,12 @@ EXPORT_SYMBOL(end_page_writeback);
655/** 621/**
656 * __lock_page - get a lock on the page, assuming we need to sleep to get it 622 * __lock_page - get a lock on the page, assuming we need to sleep to get it
657 * @page: the page to lock 623 * @page: the page to lock
658 *
659 * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some
660 * random driver's requestfn sets TASK_RUNNING, we could busywait. However
661 * chances are that on the second loop, the block layer's plug list is empty,
662 * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
663 */ 624 */
664void __lock_page(struct page *page) 625void __lock_page(struct page *page)
665{ 626{
666 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); 627 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
667 628
668 __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page, 629 __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
669 TASK_UNINTERRUPTIBLE); 630 TASK_UNINTERRUPTIBLE);
670} 631}
671EXPORT_SYMBOL(__lock_page); 632EXPORT_SYMBOL(__lock_page);
@@ -675,24 +636,10 @@ int __lock_page_killable(struct page *page)
675 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); 636 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
676 637
677 return __wait_on_bit_lock(page_waitqueue(page), &wait, 638 return __wait_on_bit_lock(page_waitqueue(page), &wait,
678 sync_page_killable, TASK_KILLABLE); 639 sleep_on_page_killable, TASK_KILLABLE);
679} 640}
680EXPORT_SYMBOL_GPL(__lock_page_killable); 641EXPORT_SYMBOL_GPL(__lock_page_killable);
681 642
682/**
683 * __lock_page_nosync - get a lock on the page, without calling sync_page()
684 * @page: the page to lock
685 *
686 * Variant of lock_page that does not require the caller to hold a reference
687 * on the page's mapping.
688 */
689void __lock_page_nosync(struct page *page)
690{
691 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
692 __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
693 TASK_UNINTERRUPTIBLE);
694}
695
696int __lock_page_or_retry(struct page *page, struct mm_struct *mm, 643int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
697 unsigned int flags) 644 unsigned int flags)
698{ 645{
@@ -1407,12 +1354,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1407 unsigned long seg = 0; 1354 unsigned long seg = 0;
1408 size_t count; 1355 size_t count;
1409 loff_t *ppos = &iocb->ki_pos; 1356 loff_t *ppos = &iocb->ki_pos;
1357 struct blk_plug plug;
1410 1358
1411 count = 0; 1359 count = 0;
1412 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1360 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1413 if (retval) 1361 if (retval)
1414 return retval; 1362 return retval;
1415 1363
1364 blk_start_plug(&plug);
1365
1416 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 1366 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
1417 if (filp->f_flags & O_DIRECT) { 1367 if (filp->f_flags & O_DIRECT) {
1418 loff_t size; 1368 loff_t size;
@@ -1485,6 +1435,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1485 break; 1435 break;
1486 } 1436 }
1487out: 1437out:
1438 blk_finish_plug(&plug);
1488 return retval; 1439 return retval;
1489} 1440}
1490EXPORT_SYMBOL(generic_file_aio_read); 1441EXPORT_SYMBOL(generic_file_aio_read);
@@ -2596,11 +2547,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2596{ 2547{
2597 struct file *file = iocb->ki_filp; 2548 struct file *file = iocb->ki_filp;
2598 struct inode *inode = file->f_mapping->host; 2549 struct inode *inode = file->f_mapping->host;
2550 struct blk_plug plug;
2599 ssize_t ret; 2551 ssize_t ret;
2600 2552
2601 BUG_ON(iocb->ki_pos != pos); 2553 BUG_ON(iocb->ki_pos != pos);
2602 2554
2603 mutex_lock(&inode->i_mutex); 2555 mutex_lock(&inode->i_mutex);
2556 blk_start_plug(&plug);
2604 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 2557 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
2605 mutex_unlock(&inode->i_mutex); 2558 mutex_unlock(&inode->i_mutex);
2606 2559
@@ -2611,6 +2564,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2611 if (err < 0 && ret > 0) 2564 if (err < 0 && ret > 0)
2612 ret = err; 2565 ret = err;
2613 } 2566 }
2567 blk_finish_plug(&plug);
2614 return ret; 2568 return ret;
2615} 2569}
2616EXPORT_SYMBOL(generic_file_aio_write); 2570EXPORT_SYMBOL(generic_file_aio_write);