diff options
author | Hugh Dickins <hughd@google.com> | 2011-08-03 19:21:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-08-03 20:25:23 -0400 |
commit | 46f65ec15c6878a2b4a49f6e01b20b201b46a9e4 (patch) | |
tree | 3b72804ee87b4a5c7ec86b5059e20ddea82d3c2c /mm/shmem.c | |
parent | 7a5d0fbb29936fad7f17b1cb001b0c33a5f13328 (diff) |
tmpfs: convert shmem_unuse_inode to radix-swap
Convert shmem_unuse_inode() to use a lockless gang lookup of the radix
tree, searching for matching swap.
This is somewhat slower than the old method: because of repeated radix
tree descents, because of copying entries up, but probably most because
the old method noted and skipped once a vector page was cleared of swap.
Perhaps we can devise a use of radix tree tagging to achieve that later.
shmem_add_to_page_cache() uses shmem_radix_tree_replace() to compensate
for the lockless lookup by checking that the expected entry is in place,
under lock. It is not very satisfactory to be copying this much from
add_to_page_cache_locked(), but I think easier to sell than insisting
that every caller of add_to_page_cache*() go through the extras.
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 133 |
1 files changed, 107 insertions, 26 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 4439b7d55819..174f97188e8a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -262,6 +262,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping, | |||
262 | } | 262 | } |
263 | 263 | ||
264 | /* | 264 | /* |
265 | * Like add_to_page_cache_locked, but error if expected item has gone. | ||
266 | */ | ||
267 | static int shmem_add_to_page_cache(struct page *page, | ||
268 | struct address_space *mapping, | ||
269 | pgoff_t index, gfp_t gfp, void *expected) | ||
270 | { | ||
271 | int error; | ||
272 | |||
273 | VM_BUG_ON(!PageLocked(page)); | ||
274 | VM_BUG_ON(!PageSwapBacked(page)); | ||
275 | |||
276 | error = mem_cgroup_cache_charge(page, current->mm, | ||
277 | gfp & GFP_RECLAIM_MASK); | ||
278 | if (error) | ||
279 | goto out; | ||
280 | if (!expected) | ||
281 | error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); | ||
282 | if (!error) { | ||
283 | page_cache_get(page); | ||
284 | page->mapping = mapping; | ||
285 | page->index = index; | ||
286 | |||
287 | spin_lock_irq(&mapping->tree_lock); | ||
288 | if (!expected) | ||
289 | error = radix_tree_insert(&mapping->page_tree, | ||
290 | index, page); | ||
291 | else | ||
292 | error = shmem_radix_tree_replace(mapping, index, | ||
293 | expected, page); | ||
294 | if (!error) { | ||
295 | mapping->nrpages++; | ||
296 | __inc_zone_page_state(page, NR_FILE_PAGES); | ||
297 | __inc_zone_page_state(page, NR_SHMEM); | ||
298 | spin_unlock_irq(&mapping->tree_lock); | ||
299 | } else { | ||
300 | page->mapping = NULL; | ||
301 | spin_unlock_irq(&mapping->tree_lock); | ||
302 | page_cache_release(page); | ||
303 | } | ||
304 | if (!expected) | ||
305 | radix_tree_preload_end(); | ||
306 | } | ||
307 | if (error) | ||
308 | mem_cgroup_uncharge_cache_page(page); | ||
309 | out: | ||
310 | return error; | ||
311 | } | ||
312 | |||
313 | /* | ||
265 | * Like find_get_pages, but collecting swap entries as well as pages. | 314 | * Like find_get_pages, but collecting swap entries as well as pages. |
266 | */ | 315 | */ |
267 | static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, | 316 | static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, |
@@ -309,6 +358,42 @@ export: | |||
309 | } | 358 | } |
310 | 359 | ||
311 | /* | 360 | /* |
361 | * Lockless lookup of swap entry in radix tree, avoiding refcount on pages. | ||
362 | */ | ||
363 | static pgoff_t shmem_find_swap(struct address_space *mapping, void *radswap) | ||
364 | { | ||
365 | void **slots[PAGEVEC_SIZE]; | ||
366 | pgoff_t indices[PAGEVEC_SIZE]; | ||
367 | unsigned int nr_found; | ||
368 | |||
369 | restart: | ||
370 | nr_found = 1; | ||
371 | indices[0] = -1; | ||
372 | while (nr_found) { | ||
373 | pgoff_t index = indices[nr_found - 1] + 1; | ||
374 | unsigned int i; | ||
375 | |||
376 | rcu_read_lock(); | ||
377 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
378 | slots, indices, index, PAGEVEC_SIZE); | ||
379 | for (i = 0; i < nr_found; i++) { | ||
380 | void *item = radix_tree_deref_slot(slots[i]); | ||
381 | if (radix_tree_deref_retry(item)) { | ||
382 | rcu_read_unlock(); | ||
383 | goto restart; | ||
384 | } | ||
385 | if (item == radswap) { | ||
386 | rcu_read_unlock(); | ||
387 | return indices[i]; | ||
388 | } | ||
389 | } | ||
390 | rcu_read_unlock(); | ||
391 | cond_resched(); | ||
392 | } | ||
393 | return -1; | ||
394 | } | ||
395 | |||
396 | /* | ||
312 | * Remove swap entry from radix tree, free the swap and its page cache. | 397 | * Remove swap entry from radix tree, free the swap and its page cache. |
313 | */ | 398 | */ |
314 | static int shmem_free_swap(struct address_space *mapping, | 399 | static int shmem_free_swap(struct address_space *mapping, |
@@ -515,23 +600,21 @@ static void shmem_evict_inode(struct inode *inode) | |||
515 | end_writeback(inode); | 600 | end_writeback(inode); |
516 | } | 601 | } |
517 | 602 | ||
603 | /* | ||
604 | * If swap found in inode, free it and move page from swapcache to filecache. | ||
605 | */ | ||
518 | static int shmem_unuse_inode(struct shmem_inode_info *info, | 606 | static int shmem_unuse_inode(struct shmem_inode_info *info, |
519 | swp_entry_t swap, struct page *page) | 607 | swp_entry_t swap, struct page *page) |
520 | { | 608 | { |
521 | struct address_space *mapping = info->vfs_inode.i_mapping; | 609 | struct address_space *mapping = info->vfs_inode.i_mapping; |
610 | void *radswap; | ||
522 | pgoff_t index; | 611 | pgoff_t index; |
523 | int error; | 612 | int error; |
524 | 613 | ||
525 | for (index = 0; index < SHMEM_NR_DIRECT; index++) | 614 | radswap = swp_to_radix_entry(swap); |
526 | if (shmem_get_swap(info, index).val == swap.val) | 615 | index = shmem_find_swap(mapping, radswap); |
527 | goto found; | 616 | if (index == -1) |
528 | return 0; | ||
529 | found: | ||
530 | spin_lock(&info->lock); | ||
531 | if (shmem_get_swap(info, index).val != swap.val) { | ||
532 | spin_unlock(&info->lock); | ||
533 | return 0; | 617 | return 0; |
534 | } | ||
535 | 618 | ||
536 | /* | 619 | /* |
537 | * Move _head_ to start search for next from here. | 620 | * Move _head_ to start search for next from here. |
@@ -547,23 +630,30 @@ found: | |||
547 | * but also to hold up shmem_evict_inode(): so inode cannot be freed | 630 | * but also to hold up shmem_evict_inode(): so inode cannot be freed |
548 | * beneath us (pagelock doesn't help until the page is in pagecache). | 631 | * beneath us (pagelock doesn't help until the page is in pagecache). |
549 | */ | 632 | */ |
550 | error = add_to_page_cache_locked(page, mapping, index, GFP_NOWAIT); | 633 | error = shmem_add_to_page_cache(page, mapping, index, |
634 | GFP_NOWAIT, radswap); | ||
551 | /* which does mem_cgroup_uncharge_cache_page on error */ | 635 | /* which does mem_cgroup_uncharge_cache_page on error */ |
552 | 636 | ||
553 | if (error != -ENOMEM) { | 637 | if (error != -ENOMEM) { |
638 | /* | ||
639 | * Truncation and eviction use free_swap_and_cache(), which | ||
640 | * only does trylock page: if we raced, best clean up here. | ||
641 | */ | ||
554 | delete_from_swap_cache(page); | 642 | delete_from_swap_cache(page); |
555 | set_page_dirty(page); | 643 | set_page_dirty(page); |
556 | shmem_put_swap(info, index, (swp_entry_t){0}); | 644 | if (!error) { |
557 | info->swapped--; | 645 | spin_lock(&info->lock); |
558 | swap_free(swap); | 646 | info->swapped--; |
647 | spin_unlock(&info->lock); | ||
648 | swap_free(swap); | ||
649 | } | ||
559 | error = 1; /* not an error, but entry was found */ | 650 | error = 1; /* not an error, but entry was found */ |
560 | } | 651 | } |
561 | spin_unlock(&info->lock); | ||
562 | return error; | 652 | return error; |
563 | } | 653 | } |
564 | 654 | ||
565 | /* | 655 | /* |
566 | * shmem_unuse() search for an eventually swapped out shmem page. | 656 | * Search through swapped inodes to find and replace swap by page. |
567 | */ | 657 | */ |
568 | int shmem_unuse(swp_entry_t swap, struct page *page) | 658 | int shmem_unuse(swp_entry_t swap, struct page *page) |
569 | { | 659 | { |
@@ -576,20 +666,12 @@ int shmem_unuse(swp_entry_t swap, struct page *page) | |||
576 | * Charge page using GFP_KERNEL while we can wait, before taking | 666 | * Charge page using GFP_KERNEL while we can wait, before taking |
577 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). | 667 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). |
578 | * Charged back to the user (not to caller) when swap account is used. | 668 | * Charged back to the user (not to caller) when swap account is used. |
579 | * add_to_page_cache() will be called with GFP_NOWAIT. | 669 | * shmem_add_to_page_cache() will be called with GFP_NOWAIT. |
580 | */ | 670 | */ |
581 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); | 671 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); |
582 | if (error) | 672 | if (error) |
583 | goto out; | 673 | goto out; |
584 | /* | 674 | /* No radix_tree_preload: swap entry keeps a place for page in tree */ |
585 | * Try to preload while we can wait, to not make a habit of | ||
586 | * draining atomic reserves; but don't latch on to this cpu, | ||
587 | * it's okay if sometimes we get rescheduled after this. | ||
588 | */ | ||
589 | error = radix_tree_preload(GFP_KERNEL); | ||
590 | if (error) | ||
591 | goto uncharge; | ||
592 | radix_tree_preload_end(); | ||
593 | 675 | ||
594 | mutex_lock(&shmem_swaplist_mutex); | 676 | mutex_lock(&shmem_swaplist_mutex); |
595 | list_for_each_safe(this, next, &shmem_swaplist) { | 677 | list_for_each_safe(this, next, &shmem_swaplist) { |
@@ -608,7 +690,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page) | |||
608 | } | 690 | } |
609 | mutex_unlock(&shmem_swaplist_mutex); | 691 | mutex_unlock(&shmem_swaplist_mutex); |
610 | 692 | ||
611 | uncharge: | ||
612 | if (!found) | 693 | if (!found) |
613 | mem_cgroup_uncharge_cache_page(page); | 694 | mem_cgroup_uncharge_cache_page(page); |
614 | if (found < 0) | 695 | if (found < 0) |