diff options
author | Jens Axboe <jaxboe@fusionio.com> | 2011-03-10 02:52:07 -0500 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2011-03-10 02:52:07 -0500 |
commit | 7eaceaccab5f40bbfda044629a6298616aeaed50 (patch) | |
tree | 33954d12f63e25a47eb6d86ef3d3d0a5e62bf752 /mm | |
parent | 73c101011926c5832e6e141682180c4debe2cf45 (diff) |
block: remove per-queue plugging
Code has been converted over to the new explicit on-stack plugging,
and delay users have been converted to use the new API for that.
So lets kill off the old plugging along with aops->sync_page().
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 6 | ||||
-rw-r--r-- | mm/filemap.c | 67 | ||||
-rw-r--r-- | mm/memory-failure.c | 8 | ||||
-rw-r--r-- | mm/nommu.c | 4 | ||||
-rw-r--r-- | mm/page-writeback.c | 2 | ||||
-rw-r--r-- | mm/readahead.c | 12 | ||||
-rw-r--r-- | mm/shmem.c | 1 | ||||
-rw-r--r-- | mm/swap_state.c | 5 | ||||
-rw-r--r-- | mm/swapfile.c | 37 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
10 files changed, 13 insertions, 131 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 027100d30227..c91e139a652e 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -14,17 +14,11 @@ | |||
14 | 14 | ||
15 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); | 15 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); |
16 | 16 | ||
17 | void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
18 | { | ||
19 | } | ||
20 | EXPORT_SYMBOL(default_unplug_io_fn); | ||
21 | |||
22 | struct backing_dev_info default_backing_dev_info = { | 17 | struct backing_dev_info default_backing_dev_info = { |
23 | .name = "default", | 18 | .name = "default", |
24 | .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, | 19 | .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, |
25 | .state = 0, | 20 | .state = 0, |
26 | .capabilities = BDI_CAP_MAP_COPY, | 21 | .capabilities = BDI_CAP_MAP_COPY, |
27 | .unplug_io_fn = default_unplug_io_fn, | ||
28 | }; | 22 | }; |
29 | EXPORT_SYMBOL_GPL(default_backing_dev_info); | 23 | EXPORT_SYMBOL_GPL(default_backing_dev_info); |
30 | 24 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 83a45d35468b..380776c2a9ac 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -155,45 +155,15 @@ void remove_from_page_cache(struct page *page) | |||
155 | } | 155 | } |
156 | EXPORT_SYMBOL(remove_from_page_cache); | 156 | EXPORT_SYMBOL(remove_from_page_cache); |
157 | 157 | ||
158 | static int sync_page(void *word) | 158 | static int sleep_on_page(void *word) |
159 | { | 159 | { |
160 | struct address_space *mapping; | ||
161 | struct page *page; | ||
162 | |||
163 | page = container_of((unsigned long *)word, struct page, flags); | ||
164 | |||
165 | /* | ||
166 | * page_mapping() is being called without PG_locked held. | ||
167 | * Some knowledge of the state and use of the page is used to | ||
168 | * reduce the requirements down to a memory barrier. | ||
169 | * The danger here is of a stale page_mapping() return value | ||
170 | * indicating a struct address_space different from the one it's | ||
171 | * associated with when it is associated with one. | ||
172 | * After smp_mb(), it's either the correct page_mapping() for | ||
173 | * the page, or an old page_mapping() and the page's own | ||
174 | * page_mapping() has gone NULL. | ||
175 | * The ->sync_page() address_space operation must tolerate | ||
176 | * page_mapping() going NULL. By an amazing coincidence, | ||
177 | * this comes about because none of the users of the page | ||
178 | * in the ->sync_page() methods make essential use of the | ||
179 | * page_mapping(), merely passing the page down to the backing | ||
180 | * device's unplug functions when it's non-NULL, which in turn | ||
181 | * ignore it for all cases but swap, where only page_private(page) is | ||
182 | * of interest. When page_mapping() does go NULL, the entire | ||
183 | * call stack gracefully ignores the page and returns. | ||
184 | * -- wli | ||
185 | */ | ||
186 | smp_mb(); | ||
187 | mapping = page_mapping(page); | ||
188 | if (mapping && mapping->a_ops && mapping->a_ops->sync_page) | ||
189 | mapping->a_ops->sync_page(page); | ||
190 | io_schedule(); | 160 | io_schedule(); |
191 | return 0; | 161 | return 0; |
192 | } | 162 | } |
193 | 163 | ||
194 | static int sync_page_killable(void *word) | 164 | static int sleep_on_page_killable(void *word) |
195 | { | 165 | { |
196 | sync_page(word); | 166 | sleep_on_page(word); |
197 | return fatal_signal_pending(current) ? -EINTR : 0; | 167 | return fatal_signal_pending(current) ? -EINTR : 0; |
198 | } | 168 | } |
199 | 169 | ||
@@ -479,12 +449,6 @@ struct page *__page_cache_alloc(gfp_t gfp) | |||
479 | EXPORT_SYMBOL(__page_cache_alloc); | 449 | EXPORT_SYMBOL(__page_cache_alloc); |
480 | #endif | 450 | #endif |
481 | 451 | ||
482 | static int __sleep_on_page_lock(void *word) | ||
483 | { | ||
484 | io_schedule(); | ||
485 | return 0; | ||
486 | } | ||
487 | |||
488 | /* | 452 | /* |
489 | * In order to wait for pages to become available there must be | 453 | * In order to wait for pages to become available there must be |
490 | * waitqueues associated with pages. By using a hash table of | 454 | * waitqueues associated with pages. By using a hash table of |
@@ -512,7 +476,7 @@ void wait_on_page_bit(struct page *page, int bit_nr) | |||
512 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); | 476 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); |
513 | 477 | ||
514 | if (test_bit(bit_nr, &page->flags)) | 478 | if (test_bit(bit_nr, &page->flags)) |
515 | __wait_on_bit(page_waitqueue(page), &wait, sync_page, | 479 | __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page, |
516 | TASK_UNINTERRUPTIBLE); | 480 | TASK_UNINTERRUPTIBLE); |
517 | } | 481 | } |
518 | EXPORT_SYMBOL(wait_on_page_bit); | 482 | EXPORT_SYMBOL(wait_on_page_bit); |
@@ -576,17 +540,12 @@ EXPORT_SYMBOL(end_page_writeback); | |||
576 | /** | 540 | /** |
577 | * __lock_page - get a lock on the page, assuming we need to sleep to get it | 541 | * __lock_page - get a lock on the page, assuming we need to sleep to get it |
578 | * @page: the page to lock | 542 | * @page: the page to lock |
579 | * | ||
580 | * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some | ||
581 | * random driver's requestfn sets TASK_RUNNING, we could busywait. However | ||
582 | * chances are that on the second loop, the block layer's plug list is empty, | ||
583 | * so sync_page() will then return in state TASK_UNINTERRUPTIBLE. | ||
584 | */ | 543 | */ |
585 | void __lock_page(struct page *page) | 544 | void __lock_page(struct page *page) |
586 | { | 545 | { |
587 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 546 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
588 | 547 | ||
589 | __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page, | 548 | __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page, |
590 | TASK_UNINTERRUPTIBLE); | 549 | TASK_UNINTERRUPTIBLE); |
591 | } | 550 | } |
592 | EXPORT_SYMBOL(__lock_page); | 551 | EXPORT_SYMBOL(__lock_page); |
@@ -596,24 +555,10 @@ int __lock_page_killable(struct page *page) | |||
596 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 555 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
597 | 556 | ||
598 | return __wait_on_bit_lock(page_waitqueue(page), &wait, | 557 | return __wait_on_bit_lock(page_waitqueue(page), &wait, |
599 | sync_page_killable, TASK_KILLABLE); | 558 | sleep_on_page_killable, TASK_KILLABLE); |
600 | } | 559 | } |
601 | EXPORT_SYMBOL_GPL(__lock_page_killable); | 560 | EXPORT_SYMBOL_GPL(__lock_page_killable); |
602 | 561 | ||
603 | /** | ||
604 | * __lock_page_nosync - get a lock on the page, without calling sync_page() | ||
605 | * @page: the page to lock | ||
606 | * | ||
607 | * Variant of lock_page that does not require the caller to hold a reference | ||
608 | * on the page's mapping. | ||
609 | */ | ||
610 | void __lock_page_nosync(struct page *page) | ||
611 | { | ||
612 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | ||
613 | __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, | ||
614 | TASK_UNINTERRUPTIBLE); | ||
615 | } | ||
616 | |||
617 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, | 562 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, |
618 | unsigned int flags) | 563 | unsigned int flags) |
619 | { | 564 | { |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 0207c2f6f8bd..bfba796d374d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -945,7 +945,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
945 | collect_procs(ppage, &tokill); | 945 | collect_procs(ppage, &tokill); |
946 | 946 | ||
947 | if (hpage != ppage) | 947 | if (hpage != ppage) |
948 | lock_page_nosync(ppage); | 948 | lock_page(ppage); |
949 | 949 | ||
950 | ret = try_to_unmap(ppage, ttu); | 950 | ret = try_to_unmap(ppage, ttu); |
951 | if (ret != SWAP_SUCCESS) | 951 | if (ret != SWAP_SUCCESS) |
@@ -1038,7 +1038,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1038 | * Check "just unpoisoned", "filter hit", and | 1038 | * Check "just unpoisoned", "filter hit", and |
1039 | * "race with other subpage." | 1039 | * "race with other subpage." |
1040 | */ | 1040 | */ |
1041 | lock_page_nosync(hpage); | 1041 | lock_page(hpage); |
1042 | if (!PageHWPoison(hpage) | 1042 | if (!PageHWPoison(hpage) |
1043 | || (hwpoison_filter(p) && TestClearPageHWPoison(p)) | 1043 | || (hwpoison_filter(p) && TestClearPageHWPoison(p)) |
1044 | || (p != hpage && TestSetPageHWPoison(hpage))) { | 1044 | || (p != hpage && TestSetPageHWPoison(hpage))) { |
@@ -1088,7 +1088,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1088 | * It's very difficult to mess with pages currently under IO | 1088 | * It's very difficult to mess with pages currently under IO |
1089 | * and in many cases impossible, so we just avoid it here. | 1089 | * and in many cases impossible, so we just avoid it here. |
1090 | */ | 1090 | */ |
1091 | lock_page_nosync(hpage); | 1091 | lock_page(hpage); |
1092 | 1092 | ||
1093 | /* | 1093 | /* |
1094 | * unpoison always clear PG_hwpoison inside page lock | 1094 | * unpoison always clear PG_hwpoison inside page lock |
@@ -1231,7 +1231,7 @@ int unpoison_memory(unsigned long pfn) | |||
1231 | return 0; | 1231 | return 0; |
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | lock_page_nosync(page); | 1234 | lock_page(page); |
1235 | /* | 1235 | /* |
1236 | * This test is racy because PG_hwpoison is set outside of page lock. | 1236 | * This test is racy because PG_hwpoison is set outside of page lock. |
1237 | * That's acceptable because that won't trigger kernel panic. Instead, | 1237 | * That's acceptable because that won't trigger kernel panic. Instead, |
diff --git a/mm/nommu.c b/mm/nommu.c index f59e1424d3db..fb6cbd6abe16 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1842,10 +1842,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, | |||
1842 | } | 1842 | } |
1843 | EXPORT_SYMBOL(remap_vmalloc_range); | 1843 | EXPORT_SYMBOL(remap_vmalloc_range); |
1844 | 1844 | ||
1845 | void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1846 | { | ||
1847 | } | ||
1848 | |||
1849 | unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, | 1845 | unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, |
1850 | unsigned long len, unsigned long pgoff, unsigned long flags) | 1846 | unsigned long len, unsigned long pgoff, unsigned long flags) |
1851 | { | 1847 | { |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2cb01f6ec5d0..cc0ede169e41 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -1239,7 +1239,7 @@ int set_page_dirty_lock(struct page *page) | |||
1239 | { | 1239 | { |
1240 | int ret; | 1240 | int ret; |
1241 | 1241 | ||
1242 | lock_page_nosync(page); | 1242 | lock_page(page); |
1243 | ret = set_page_dirty(page); | 1243 | ret = set_page_dirty(page); |
1244 | unlock_page(page); | 1244 | unlock_page(page); |
1245 | return ret; | 1245 | return ret; |
diff --git a/mm/readahead.c b/mm/readahead.c index 77506a291a2d..cbddc3e17246 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -554,17 +554,5 @@ page_cache_async_readahead(struct address_space *mapping, | |||
554 | 554 | ||
555 | /* do read-ahead */ | 555 | /* do read-ahead */ |
556 | ondemand_readahead(mapping, ra, filp, true, offset, req_size); | 556 | ondemand_readahead(mapping, ra, filp, true, offset, req_size); |
557 | |||
558 | #ifdef CONFIG_BLOCK | ||
559 | /* | ||
560 | * Normally the current page is !uptodate and lock_page() will be | ||
561 | * immediately called to implicitly unplug the device. However this | ||
562 | * is not always true for RAID conifgurations, where data arrives | ||
563 | * not strictly in their submission order. In this case we need to | ||
564 | * explicitly kick off the IO. | ||
565 | */ | ||
566 | if (PageUptodate(page)) | ||
567 | blk_run_backing_dev(mapping->backing_dev_info, NULL); | ||
568 | #endif | ||
569 | } | 557 | } |
570 | EXPORT_SYMBOL_GPL(page_cache_async_readahead); | 558 | EXPORT_SYMBOL_GPL(page_cache_async_readahead); |
diff --git a/mm/shmem.c b/mm/shmem.c index 5ee67c990602..24d23f5bedf1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -224,7 +224,6 @@ static const struct vm_operations_struct shmem_vm_ops; | |||
224 | static struct backing_dev_info shmem_backing_dev_info __read_mostly = { | 224 | static struct backing_dev_info shmem_backing_dev_info __read_mostly = { |
225 | .ra_pages = 0, /* No readahead */ | 225 | .ra_pages = 0, /* No readahead */ |
226 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, | 226 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, |
227 | .unplug_io_fn = default_unplug_io_fn, | ||
228 | }; | 227 | }; |
229 | 228 | ||
230 | static LIST_HEAD(shmem_swaplist); | 229 | static LIST_HEAD(shmem_swaplist); |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 5c8cfabbc9bc..46680461785b 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -24,12 +24,10 @@ | |||
24 | 24 | ||
25 | /* | 25 | /* |
26 | * swapper_space is a fiction, retained to simplify the path through | 26 | * swapper_space is a fiction, retained to simplify the path through |
27 | * vmscan's shrink_page_list, to make sync_page look nicer, and to allow | 27 | * vmscan's shrink_page_list. |
28 | * future use of radix_tree tags in the swap cache. | ||
29 | */ | 28 | */ |
30 | static const struct address_space_operations swap_aops = { | 29 | static const struct address_space_operations swap_aops = { |
31 | .writepage = swap_writepage, | 30 | .writepage = swap_writepage, |
32 | .sync_page = block_sync_page, | ||
33 | .set_page_dirty = __set_page_dirty_nobuffers, | 31 | .set_page_dirty = __set_page_dirty_nobuffers, |
34 | .migratepage = migrate_page, | 32 | .migratepage = migrate_page, |
35 | }; | 33 | }; |
@@ -37,7 +35,6 @@ static const struct address_space_operations swap_aops = { | |||
37 | static struct backing_dev_info swap_backing_dev_info = { | 35 | static struct backing_dev_info swap_backing_dev_info = { |
38 | .name = "swap", | 36 | .name = "swap", |
39 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, | 37 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, |
40 | .unplug_io_fn = swap_unplug_io_fn, | ||
41 | }; | 38 | }; |
42 | 39 | ||
43 | struct address_space swapper_space = { | 40 | struct address_space swapper_space = { |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 07a458d72fa8..7ceea78ceb20 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -95,39 +95,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset) | |||
95 | } | 95 | } |
96 | 96 | ||
97 | /* | 97 | /* |
98 | * We need this because the bdev->unplug_fn can sleep and we cannot | ||
99 | * hold swap_lock while calling the unplug_fn. And swap_lock | ||
100 | * cannot be turned into a mutex. | ||
101 | */ | ||
102 | static DECLARE_RWSEM(swap_unplug_sem); | ||
103 | |||
104 | void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) | ||
105 | { | ||
106 | swp_entry_t entry; | ||
107 | |||
108 | down_read(&swap_unplug_sem); | ||
109 | entry.val = page_private(page); | ||
110 | if (PageSwapCache(page)) { | ||
111 | struct block_device *bdev = swap_info[swp_type(entry)]->bdev; | ||
112 | struct backing_dev_info *bdi; | ||
113 | |||
114 | /* | ||
115 | * If the page is removed from swapcache from under us (with a | ||
116 | * racy try_to_unuse/swapoff) we need an additional reference | ||
117 | * count to avoid reading garbage from page_private(page) above. | ||
118 | * If the WARN_ON triggers during a swapoff it maybe the race | ||
119 | * condition and it's harmless. However if it triggers without | ||
120 | * swapoff it signals a problem. | ||
121 | */ | ||
122 | WARN_ON(page_count(page) <= 1); | ||
123 | |||
124 | bdi = bdev->bd_inode->i_mapping->backing_dev_info; | ||
125 | blk_run_backing_dev(bdi, page); | ||
126 | } | ||
127 | up_read(&swap_unplug_sem); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * swapon tell device that all the old swap contents can be discarded, | 98 | * swapon tell device that all the old swap contents can be discarded, |
132 | * to allow the swap device to optimize its wear-levelling. | 99 | * to allow the swap device to optimize its wear-levelling. |
133 | */ | 100 | */ |
@@ -1643,10 +1610,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1643 | goto out_dput; | 1610 | goto out_dput; |
1644 | } | 1611 | } |
1645 | 1612 | ||
1646 | /* wait for any unplug function to finish */ | ||
1647 | down_write(&swap_unplug_sem); | ||
1648 | up_write(&swap_unplug_sem); | ||
1649 | |||
1650 | destroy_swap_extents(p); | 1613 | destroy_swap_extents(p); |
1651 | if (p->flags & SWP_CONTINUED) | 1614 | if (p->flags & SWP_CONTINUED) |
1652 | free_swap_count_continuations(p); | 1615 | free_swap_count_continuations(p); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 17497d0cd8b9..251bed73ac03 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -358,7 +358,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi, | |||
358 | static void handle_write_error(struct address_space *mapping, | 358 | static void handle_write_error(struct address_space *mapping, |
359 | struct page *page, int error) | 359 | struct page *page, int error) |
360 | { | 360 | { |
361 | lock_page_nosync(page); | 361 | lock_page(page); |
362 | if (page_mapping(page) == mapping) | 362 | if (page_mapping(page) == mapping) |
363 | mapping_set_error(mapping, error); | 363 | mapping_set_error(mapping, error); |
364 | unlock_page(page); | 364 | unlock_page(page); |