diff options
-rw-r--r-- | drivers/net/cassini.c | 12 | ||||
-rw-r--r-- | include/linux/pagemap.h | 111 | ||||
-rw-r--r-- | mm/filemap.c | 32 | ||||
-rw-r--r-- | mm/migrate.c | 20 | ||||
-rw-r--r-- | mm/shmem.c | 6 | ||||
-rw-r--r-- | mm/swap_state.c | 17 | ||||
-rw-r--r-- | mm/vmscan.c | 74 |
7 files changed, 227 insertions, 45 deletions
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index 83768df27806..f1936d51b458 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c | |||
@@ -576,6 +576,18 @@ static void cas_spare_recover(struct cas *cp, const gfp_t flags) | |||
576 | list_for_each_safe(elem, tmp, &list) { | 576 | list_for_each_safe(elem, tmp, &list) { |
577 | cas_page_t *page = list_entry(elem, cas_page_t, list); | 577 | cas_page_t *page = list_entry(elem, cas_page_t, list); |
578 | 578 | ||
579 | /* | ||
580 | * With the lockless pagecache, cassini buffering scheme gets | ||
581 | * slightly less accurate: we might find that a page has an | ||
582 | * elevated reference count here, due to a speculative ref, | ||
583 | * and skip it as in-use. Ideally we would be able to reclaim | ||
584 | * it. However this would be such a rare case, it doesn't | ||
585 | * matter too much as we should pick it up the next time round. | ||
586 | * | ||
587 | * Importantly, if we find that the page has a refcount of 1 | ||
588 | * here (our refcount), then we know it is definitely not inuse | ||
589 | * so we can reuse it. | ||
590 | */ | ||
579 | if (page_count(page->buffer) > 1) | 591 | if (page_count(page->buffer) > 1) |
580 | continue; | 592 | continue; |
581 | 593 | ||
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ee1ec2c7723c..a81d81890422 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/uaccess.h> | 12 | #include <asm/uaccess.h> |
13 | #include <linux/gfp.h> | 13 | #include <linux/gfp.h> |
14 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
15 | #include <linux/hardirq.h> /* for in_interrupt() */ | ||
15 | 16 | ||
16 | /* | 17 | /* |
17 | * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page | 18 | * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page |
@@ -62,6 +63,98 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) | |||
62 | #define page_cache_release(page) put_page(page) | 63 | #define page_cache_release(page) put_page(page) |
63 | void release_pages(struct page **pages, int nr, int cold); | 64 | void release_pages(struct page **pages, int nr, int cold); |
64 | 65 | ||
66 | /* | ||
67 | * speculatively take a reference to a page. | ||
68 | * If the page is free (_count == 0), then _count is untouched, and 0 | ||
69 | * is returned. Otherwise, _count is incremented by 1 and 1 is returned. | ||
70 | * | ||
71 | * This function must be called inside the same rcu_read_lock() section as has | ||
72 | * been used to lookup the page in the pagecache radix-tree (or page table): | ||
73 | * this allows allocators to use a synchronize_rcu() to stabilize _count. | ||
74 | * | ||
75 | * Unless an RCU grace period has passed, the count of all pages coming out | ||
76 | * of the allocator must be considered unstable. page_count may return higher | ||
77 | * than expected, and put_page must be able to do the right thing when the | ||
78 | * page has been finished with, no matter what it is subsequently allocated | ||
79 | * for (because put_page is what is used here to drop an invalid speculative | ||
80 | * reference). | ||
81 | * | ||
82 | * This is the interesting part of the lockless pagecache (and lockless | ||
83 | * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page) | ||
84 | * has the following pattern: | ||
85 | * 1. find page in radix tree | ||
86 | * 2. conditionally increment refcount | ||
87 | * 3. check the page is still in pagecache (if no, goto 1) | ||
88 | * | ||
89 | * Remove-side that cares about stability of _count (eg. reclaim) has the | ||
90 | * following (with tree_lock held for write): | ||
91 | * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg) | ||
92 | * B. remove page from pagecache | ||
93 | * C. free the page | ||
94 | * | ||
95 | * There are 2 critical interleavings that matter: | ||
96 | * - 2 runs before A: in this case, A sees elevated refcount and bails out | ||
97 | * - A runs before 2: in this case, 2 sees zero refcount and retries; | ||
98 | * subsequently, B will complete and 1 will find no page, causing the | ||
99 | * lookup to return NULL. | ||
100 | * | ||
101 | * It is possible that between 1 and 2, the page is removed then the exact same | ||
102 | * page is inserted into the same position in pagecache. That's OK: the | ||
103 | * old find_get_page using tree_lock could equally have run before or after | ||
104 | * such a re-insertion, depending on order that locks are granted. | ||
105 | * | ||
106 | * Lookups racing against pagecache insertion isn't a big problem: either 1 | ||
107 | * will find the page or it will not. Likewise, the old find_get_page could run | ||
108 | * either before the insertion or afterwards, depending on timing. | ||
109 | */ | ||
110 | static inline int page_cache_get_speculative(struct page *page) | ||
111 | { | ||
112 | VM_BUG_ON(in_interrupt()); | ||
113 | |||
114 | #if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU) | ||
115 | # ifdef CONFIG_PREEMPT | ||
116 | VM_BUG_ON(!in_atomic()); | ||
117 | # endif | ||
118 | /* | ||
119 | * Preempt must be disabled here - we rely on rcu_read_lock doing | ||
120 | * this for us. | ||
121 | * | ||
122 | * Pagecache won't be truncated from interrupt context, so if we have | ||
123 | * found a page in the radix tree here, we have pinned its refcount by | ||
124 | * disabling preempt, and hence no need for the "speculative get" that | ||
125 | * SMP requires. | ||
126 | */ | ||
127 | VM_BUG_ON(page_count(page) == 0); | ||
128 | atomic_inc(&page->_count); | ||
129 | |||
130 | #else | ||
131 | if (unlikely(!get_page_unless_zero(page))) { | ||
132 | /* | ||
133 | * Either the page has been freed, or will be freed. | ||
134 | * In either case, retry here and the caller should | ||
135 | * do the right thing (see comments above). | ||
136 | */ | ||
137 | return 0; | ||
138 | } | ||
139 | #endif | ||
140 | VM_BUG_ON(PageTail(page)); | ||
141 | |||
142 | return 1; | ||
143 | } | ||
144 | |||
145 | static inline int page_freeze_refs(struct page *page, int count) | ||
146 | { | ||
147 | return likely(atomic_cmpxchg(&page->_count, count, 0) == count); | ||
148 | } | ||
149 | |||
150 | static inline void page_unfreeze_refs(struct page *page, int count) | ||
151 | { | ||
152 | VM_BUG_ON(page_count(page) != 0); | ||
153 | VM_BUG_ON(count == 0); | ||
154 | |||
155 | atomic_set(&page->_count, count); | ||
156 | } | ||
157 | |||
65 | #ifdef CONFIG_NUMA | 158 | #ifdef CONFIG_NUMA |
66 | extern struct page *__page_cache_alloc(gfp_t gfp); | 159 | extern struct page *__page_cache_alloc(gfp_t gfp); |
67 | #else | 160 | #else |
@@ -133,7 +226,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping, | |||
133 | return read_cache_page(mapping, index, filler, data); | 226 | return read_cache_page(mapping, index, filler, data); |
134 | } | 227 | } |
135 | 228 | ||
136 | int add_to_page_cache(struct page *page, struct address_space *mapping, | 229 | int add_to_page_cache_locked(struct page *page, struct address_space *mapping, |
137 | pgoff_t index, gfp_t gfp_mask); | 230 | pgoff_t index, gfp_t gfp_mask); |
138 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | 231 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
139 | pgoff_t index, gfp_t gfp_mask); | 232 | pgoff_t index, gfp_t gfp_mask); |
@@ -141,6 +234,22 @@ extern void remove_from_page_cache(struct page *page); | |||
141 | extern void __remove_from_page_cache(struct page *page); | 234 | extern void __remove_from_page_cache(struct page *page); |
142 | 235 | ||
143 | /* | 236 | /* |
237 | * Like add_to_page_cache_locked, but used to add newly allocated pages: | ||
238 | * the page is new, so we can just run SetPageLocked() against it. | ||
239 | */ | ||
240 | static inline int add_to_page_cache(struct page *page, | ||
241 | struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) | ||
242 | { | ||
243 | int error; | ||
244 | |||
245 | SetPageLocked(page); | ||
246 | error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); | ||
247 | if (unlikely(error)) | ||
248 | ClearPageLocked(page); | ||
249 | return error; | ||
250 | } | ||
251 | |||
252 | /* | ||
144 | * Return byte-offset into filesystem object for page. | 253 | * Return byte-offset into filesystem object for page. |
145 | */ | 254 | */ |
146 | static inline loff_t page_offset(struct page *page) | 255 | static inline loff_t page_offset(struct page *page) |
diff --git a/mm/filemap.c b/mm/filemap.c index 2d3ec1ffc66e..4e182a9a14c0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -442,39 +442,43 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
442 | } | 442 | } |
443 | 443 | ||
444 | /** | 444 | /** |
445 | * add_to_page_cache - add newly allocated pagecache pages | 445 | * add_to_page_cache_locked - add a locked page to the pagecache |
446 | * @page: page to add | 446 | * @page: page to add |
447 | * @mapping: the page's address_space | 447 | * @mapping: the page's address_space |
448 | * @offset: page index | 448 | * @offset: page index |
449 | * @gfp_mask: page allocation mode | 449 | * @gfp_mask: page allocation mode |
450 | * | 450 | * |
451 | * This function is used to add newly allocated pagecache pages; | 451 | * This function is used to add a page to the pagecache. It must be locked. |
452 | * the page is new, so we can just run SetPageLocked() against it. | ||
453 | * The other page state flags were set by rmqueue(). | ||
454 | * | ||
455 | * This function does not add the page to the LRU. The caller must do that. | 452 | * This function does not add the page to the LRU. The caller must do that. |
456 | */ | 453 | */ |
457 | int add_to_page_cache(struct page *page, struct address_space *mapping, | 454 | int add_to_page_cache_locked(struct page *page, struct address_space *mapping, |
458 | pgoff_t offset, gfp_t gfp_mask) | 455 | pgoff_t offset, gfp_t gfp_mask) |
459 | { | 456 | { |
460 | int error = mem_cgroup_cache_charge(page, current->mm, | 457 | int error; |
458 | |||
459 | VM_BUG_ON(!PageLocked(page)); | ||
460 | |||
461 | error = mem_cgroup_cache_charge(page, current->mm, | ||
461 | gfp_mask & ~__GFP_HIGHMEM); | 462 | gfp_mask & ~__GFP_HIGHMEM); |
462 | if (error) | 463 | if (error) |
463 | goto out; | 464 | goto out; |
464 | 465 | ||
465 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); | 466 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); |
466 | if (error == 0) { | 467 | if (error == 0) { |
468 | page_cache_get(page); | ||
469 | page->mapping = mapping; | ||
470 | page->index = offset; | ||
471 | |||
467 | write_lock_irq(&mapping->tree_lock); | 472 | write_lock_irq(&mapping->tree_lock); |
468 | error = radix_tree_insert(&mapping->page_tree, offset, page); | 473 | error = radix_tree_insert(&mapping->page_tree, offset, page); |
469 | if (!error) { | 474 | if (likely(!error)) { |
470 | page_cache_get(page); | ||
471 | SetPageLocked(page); | ||
472 | page->mapping = mapping; | ||
473 | page->index = offset; | ||
474 | mapping->nrpages++; | 475 | mapping->nrpages++; |
475 | __inc_zone_page_state(page, NR_FILE_PAGES); | 476 | __inc_zone_page_state(page, NR_FILE_PAGES); |
476 | } else | 477 | } else { |
478 | page->mapping = NULL; | ||
477 | mem_cgroup_uncharge_cache_page(page); | 479 | mem_cgroup_uncharge_cache_page(page); |
480 | page_cache_release(page); | ||
481 | } | ||
478 | 482 | ||
479 | write_unlock_irq(&mapping->tree_lock); | 483 | write_unlock_irq(&mapping->tree_lock); |
480 | radix_tree_preload_end(); | 484 | radix_tree_preload_end(); |
@@ -483,7 +487,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, | |||
483 | out: | 487 | out: |
484 | return error; | 488 | return error; |
485 | } | 489 | } |
486 | EXPORT_SYMBOL(add_to_page_cache); | 490 | EXPORT_SYMBOL(add_to_page_cache_locked); |
487 | 491 | ||
488 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | 492 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
489 | pgoff_t offset, gfp_t gfp_mask) | 493 | pgoff_t offset, gfp_t gfp_mask) |
diff --git a/mm/migrate.c b/mm/migrate.c index d8c65a65c61d..3ca6392e82cc 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -285,7 +285,15 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | |||
285 | 285 | ||
286 | page = migration_entry_to_page(entry); | 286 | page = migration_entry_to_page(entry); |
287 | 287 | ||
288 | get_page(page); | 288 | /* |
289 | * Once radix-tree replacement of page migration started, page_count | ||
290 | * *must* be zero. And, we don't want to call wait_on_page_locked() | ||
291 | * against a page without get_page(). | ||
292 | * So, we use get_page_unless_zero(), here. Even failed, page fault | ||
293 | * will occur again. | ||
294 | */ | ||
295 | if (!get_page_unless_zero(page)) | ||
296 | goto out; | ||
289 | pte_unmap_unlock(ptep, ptl); | 297 | pte_unmap_unlock(ptep, ptl); |
290 | wait_on_page_locked(page); | 298 | wait_on_page_locked(page); |
291 | put_page(page); | 299 | put_page(page); |
@@ -305,6 +313,7 @@ out: | |||
305 | static int migrate_page_move_mapping(struct address_space *mapping, | 313 | static int migrate_page_move_mapping(struct address_space *mapping, |
306 | struct page *newpage, struct page *page) | 314 | struct page *newpage, struct page *page) |
307 | { | 315 | { |
316 | int expected_count; | ||
308 | void **pslot; | 317 | void **pslot; |
309 | 318 | ||
310 | if (!mapping) { | 319 | if (!mapping) { |
@@ -319,12 +328,18 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
319 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | 328 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
320 | page_index(page)); | 329 | page_index(page)); |
321 | 330 | ||
322 | if (page_count(page) != 2 + !!PagePrivate(page) || | 331 | expected_count = 2 + !!PagePrivate(page); |
332 | if (page_count(page) != expected_count || | ||
323 | (struct page *)radix_tree_deref_slot(pslot) != page) { | 333 | (struct page *)radix_tree_deref_slot(pslot) != page) { |
324 | write_unlock_irq(&mapping->tree_lock); | 334 | write_unlock_irq(&mapping->tree_lock); |
325 | return -EAGAIN; | 335 | return -EAGAIN; |
326 | } | 336 | } |
327 | 337 | ||
338 | if (!page_freeze_refs(page, expected_count)) { | ||
339 | write_unlock_irq(&mapping->tree_lock); | ||
340 | return -EAGAIN; | ||
341 | } | ||
342 | |||
328 | /* | 343 | /* |
329 | * Now we know that no one else is looking at the page. | 344 | * Now we know that no one else is looking at the page. |
330 | */ | 345 | */ |
@@ -338,6 +353,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
338 | 353 | ||
339 | radix_tree_replace_slot(pslot, newpage); | 354 | radix_tree_replace_slot(pslot, newpage); |
340 | 355 | ||
356 | page_unfreeze_refs(page, expected_count); | ||
341 | /* | 357 | /* |
342 | * Drop cache reference from old page. | 358 | * Drop cache reference from old page. |
343 | * We know this isn't the last reference. | 359 | * We know this isn't the last reference. |
diff --git a/mm/shmem.c b/mm/shmem.c index f92fea94d037..1089092aecaf 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -936,7 +936,7 @@ found: | |||
936 | spin_lock(&info->lock); | 936 | spin_lock(&info->lock); |
937 | ptr = shmem_swp_entry(info, idx, NULL); | 937 | ptr = shmem_swp_entry(info, idx, NULL); |
938 | if (ptr && ptr->val == entry.val) { | 938 | if (ptr && ptr->val == entry.val) { |
939 | error = add_to_page_cache(page, inode->i_mapping, | 939 | error = add_to_page_cache_locked(page, inode->i_mapping, |
940 | idx, GFP_NOWAIT); | 940 | idx, GFP_NOWAIT); |
941 | /* does mem_cgroup_uncharge_cache_page on error */ | 941 | /* does mem_cgroup_uncharge_cache_page on error */ |
942 | } else /* we must compensate for our precharge above */ | 942 | } else /* we must compensate for our precharge above */ |
@@ -1301,8 +1301,8 @@ repeat: | |||
1301 | SetPageUptodate(filepage); | 1301 | SetPageUptodate(filepage); |
1302 | set_page_dirty(filepage); | 1302 | set_page_dirty(filepage); |
1303 | swap_free(swap); | 1303 | swap_free(swap); |
1304 | } else if (!(error = add_to_page_cache( | 1304 | } else if (!(error = add_to_page_cache_locked(swappage, mapping, |
1305 | swappage, mapping, idx, GFP_NOWAIT))) { | 1305 | idx, GFP_NOWAIT))) { |
1306 | info->flags |= SHMEM_PAGEIN; | 1306 | info->flags |= SHMEM_PAGEIN; |
1307 | shmem_swp_set(info, entry, 0); | 1307 | shmem_swp_set(info, entry, 0); |
1308 | shmem_swp_unmap(entry); | 1308 | shmem_swp_unmap(entry); |
diff --git a/mm/swap_state.c b/mm/swap_state.c index d8aadaf2a0ba..3e3381d6c7ee 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -64,7 +64,7 @@ void show_swap_cache_info(void) | |||
64 | } | 64 | } |
65 | 65 | ||
66 | /* | 66 | /* |
67 | * add_to_swap_cache resembles add_to_page_cache on swapper_space, | 67 | * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, |
68 | * but sets SwapCache flag and private instead of mapping and index. | 68 | * but sets SwapCache flag and private instead of mapping and index. |
69 | */ | 69 | */ |
70 | int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | 70 | int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) |
@@ -76,19 +76,26 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | |||
76 | BUG_ON(PagePrivate(page)); | 76 | BUG_ON(PagePrivate(page)); |
77 | error = radix_tree_preload(gfp_mask); | 77 | error = radix_tree_preload(gfp_mask); |
78 | if (!error) { | 78 | if (!error) { |
79 | page_cache_get(page); | ||
80 | SetPageSwapCache(page); | ||
81 | set_page_private(page, entry.val); | ||
82 | |||
79 | write_lock_irq(&swapper_space.tree_lock); | 83 | write_lock_irq(&swapper_space.tree_lock); |
80 | error = radix_tree_insert(&swapper_space.page_tree, | 84 | error = radix_tree_insert(&swapper_space.page_tree, |
81 | entry.val, page); | 85 | entry.val, page); |
82 | if (!error) { | 86 | if (likely(!error)) { |
83 | page_cache_get(page); | ||
84 | SetPageSwapCache(page); | ||
85 | set_page_private(page, entry.val); | ||
86 | total_swapcache_pages++; | 87 | total_swapcache_pages++; |
87 | __inc_zone_page_state(page, NR_FILE_PAGES); | 88 | __inc_zone_page_state(page, NR_FILE_PAGES); |
88 | INC_CACHE_INFO(add_total); | 89 | INC_CACHE_INFO(add_total); |
89 | } | 90 | } |
90 | write_unlock_irq(&swapper_space.tree_lock); | 91 | write_unlock_irq(&swapper_space.tree_lock); |
91 | radix_tree_preload_end(); | 92 | radix_tree_preload_end(); |
93 | |||
94 | if (unlikely(error)) { | ||
95 | set_page_private(page, 0UL); | ||
96 | ClearPageSwapCache(page); | ||
97 | page_cache_release(page); | ||
98 | } | ||
92 | } | 99 | } |
93 | return error; | 100 | return error; |
94 | } | 101 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 26672c6cd3ce..0075eac1cd04 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -391,12 +391,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
391 | } | 391 | } |
392 | 392 | ||
393 | /* | 393 | /* |
394 | * Attempt to detach a locked page from its ->mapping. If it is dirty or if | 394 | * Same as remove_mapping, but if the page is removed from the mapping, it |
395 | * someone else has a ref on the page, abort and return 0. If it was | 395 | * gets returned with a refcount of 0. |
396 | * successfully detached, return 1. Assumes the caller has a single ref on | ||
397 | * this page. | ||
398 | */ | 396 | */ |
399 | int remove_mapping(struct address_space *mapping, struct page *page) | 397 | static int __remove_mapping(struct address_space *mapping, struct page *page) |
400 | { | 398 | { |
401 | BUG_ON(!PageLocked(page)); | 399 | BUG_ON(!PageLocked(page)); |
402 | BUG_ON(mapping != page_mapping(page)); | 400 | BUG_ON(mapping != page_mapping(page)); |
@@ -427,24 +425,24 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
427 | * Note that if SetPageDirty is always performed via set_page_dirty, | 425 | * Note that if SetPageDirty is always performed via set_page_dirty, |
428 | * and thus under tree_lock, then this ordering is not required. | 426 | * and thus under tree_lock, then this ordering is not required. |
429 | */ | 427 | */ |
430 | if (unlikely(page_count(page) != 2)) | 428 | if (!page_freeze_refs(page, 2)) |
431 | goto cannot_free; | 429 | goto cannot_free; |
432 | smp_rmb(); | 430 | /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */ |
433 | if (unlikely(PageDirty(page))) | 431 | if (unlikely(PageDirty(page))) { |
432 | page_unfreeze_refs(page, 2); | ||
434 | goto cannot_free; | 433 | goto cannot_free; |
434 | } | ||
435 | 435 | ||
436 | if (PageSwapCache(page)) { | 436 | if (PageSwapCache(page)) { |
437 | swp_entry_t swap = { .val = page_private(page) }; | 437 | swp_entry_t swap = { .val = page_private(page) }; |
438 | __delete_from_swap_cache(page); | 438 | __delete_from_swap_cache(page); |
439 | write_unlock_irq(&mapping->tree_lock); | 439 | write_unlock_irq(&mapping->tree_lock); |
440 | swap_free(swap); | 440 | swap_free(swap); |
441 | __put_page(page); /* The pagecache ref */ | 441 | } else { |
442 | return 1; | 442 | __remove_from_page_cache(page); |
443 | write_unlock_irq(&mapping->tree_lock); | ||
443 | } | 444 | } |
444 | 445 | ||
445 | __remove_from_page_cache(page); | ||
446 | write_unlock_irq(&mapping->tree_lock); | ||
447 | __put_page(page); | ||
448 | return 1; | 446 | return 1; |
449 | 447 | ||
450 | cannot_free: | 448 | cannot_free: |
@@ -453,6 +451,26 @@ cannot_free: | |||
453 | } | 451 | } |
454 | 452 | ||
455 | /* | 453 | /* |
454 | * Attempt to detach a locked page from its ->mapping. If it is dirty or if | ||
455 | * someone else has a ref on the page, abort and return 0. If it was | ||
456 | * successfully detached, return 1. Assumes the caller has a single ref on | ||
457 | * this page. | ||
458 | */ | ||
459 | int remove_mapping(struct address_space *mapping, struct page *page) | ||
460 | { | ||
461 | if (__remove_mapping(mapping, page)) { | ||
462 | /* | ||
463 | * Unfreezing the refcount with 1 rather than 2 effectively | ||
464 | * drops the pagecache ref for us without requiring another | ||
465 | * atomic operation. | ||
466 | */ | ||
467 | page_unfreeze_refs(page, 1); | ||
468 | return 1; | ||
469 | } | ||
470 | return 0; | ||
471 | } | ||
472 | |||
473 | /* | ||
456 | * shrink_page_list() returns the number of reclaimed pages | 474 | * shrink_page_list() returns the number of reclaimed pages |
457 | */ | 475 | */ |
458 | static unsigned long shrink_page_list(struct list_head *page_list, | 476 | static unsigned long shrink_page_list(struct list_head *page_list, |
@@ -598,18 +616,34 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
598 | if (PagePrivate(page)) { | 616 | if (PagePrivate(page)) { |
599 | if (!try_to_release_page(page, sc->gfp_mask)) | 617 | if (!try_to_release_page(page, sc->gfp_mask)) |
600 | goto activate_locked; | 618 | goto activate_locked; |
601 | if (!mapping && page_count(page) == 1) | 619 | if (!mapping && page_count(page) == 1) { |
602 | goto free_it; | 620 | unlock_page(page); |
621 | if (put_page_testzero(page)) | ||
622 | goto free_it; | ||
623 | else { | ||
624 | /* | ||
625 | * rare race with speculative reference. | ||
626 | * the speculative reference will free | ||
627 | * this page shortly, so we may | ||
628 | * increment nr_reclaimed here (and | ||
629 | * leave it off the LRU). | ||
630 | */ | ||
631 | nr_reclaimed++; | ||
632 | continue; | ||
633 | } | ||
634 | } | ||
603 | } | 635 | } |
604 | 636 | ||
605 | if (!mapping || !remove_mapping(mapping, page)) | 637 | if (!mapping || !__remove_mapping(mapping, page)) |
606 | goto keep_locked; | 638 | goto keep_locked; |
607 | 639 | ||
608 | free_it: | ||
609 | unlock_page(page); | 640 | unlock_page(page); |
641 | free_it: | ||
610 | nr_reclaimed++; | 642 | nr_reclaimed++; |
611 | if (!pagevec_add(&freed_pvec, page)) | 643 | if (!pagevec_add(&freed_pvec, page)) { |
612 | __pagevec_release_nonlru(&freed_pvec); | 644 | __pagevec_free(&freed_pvec); |
645 | pagevec_reinit(&freed_pvec); | ||
646 | } | ||
613 | continue; | 647 | continue; |
614 | 648 | ||
615 | activate_locked: | 649 | activate_locked: |
@@ -623,7 +657,7 @@ keep: | |||
623 | } | 657 | } |
624 | list_splice(&ret_pages, page_list); | 658 | list_splice(&ret_pages, page_list); |
625 | if (pagevec_count(&freed_pvec)) | 659 | if (pagevec_count(&freed_pvec)) |
626 | __pagevec_release_nonlru(&freed_pvec); | 660 | __pagevec_free(&freed_pvec); |
627 | count_vm_events(PGACTIVATE, pgactivate); | 661 | count_vm_events(PGACTIVATE, pgactivate); |
628 | return nr_reclaimed; | 662 | return nr_reclaimed; |
629 | } | 663 | } |