aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2011-08-03 19:21:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-08-03 20:25:23 -0400
commit46f65ec15c6878a2b4a49f6e01b20b201b46a9e4 (patch)
tree3b72804ee87b4a5c7ec86b5059e20ddea82d3c2c /mm
parent7a5d0fbb29936fad7f17b1cb001b0c33a5f13328 (diff)
tmpfs: convert shmem_unuse_inode to radix-swap
Convert shmem_unuse_inode() to use a lockless gang lookup of the radix tree, searching for matching swap. This is somewhat slower than the old method: because of repeated radix tree descents, because of copying entries up, but probably most because the old method noted and skipped once a vector page was cleared of swap. Perhaps we can devise a use of radix tree tagging to achieve that later. shmem_add_to_page_cache() uses shmem_radix_tree_replace() to compensate for the lockless lookup by checking that the expected entry is in place, under lock. It is not very satisfactory to be copying this much from add_to_page_cache_locked(), but I think easier to sell than insisting that every caller of add_to_page_cache*() go through the extras. Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/shmem.c133
1 files changed, 107 insertions, 26 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 4439b7d55819..174f97188e8a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -262,6 +262,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
262} 262}
263 263
264/* 264/*
265 * Like add_to_page_cache_locked, but error if expected item has gone.
266 */
267static int shmem_add_to_page_cache(struct page *page,
268 struct address_space *mapping,
269 pgoff_t index, gfp_t gfp, void *expected)
270{
271 int error;
272
273 VM_BUG_ON(!PageLocked(page));
274 VM_BUG_ON(!PageSwapBacked(page));
275
276 error = mem_cgroup_cache_charge(page, current->mm,
277 gfp & GFP_RECLAIM_MASK);
278 if (error)
279 goto out;
280 if (!expected)
281 error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
282 if (!error) {
283 page_cache_get(page);
284 page->mapping = mapping;
285 page->index = index;
286
287 spin_lock_irq(&mapping->tree_lock);
288 if (!expected)
289 error = radix_tree_insert(&mapping->page_tree,
290 index, page);
291 else
292 error = shmem_radix_tree_replace(mapping, index,
293 expected, page);
294 if (!error) {
295 mapping->nrpages++;
296 __inc_zone_page_state(page, NR_FILE_PAGES);
297 __inc_zone_page_state(page, NR_SHMEM);
298 spin_unlock_irq(&mapping->tree_lock);
299 } else {
300 page->mapping = NULL;
301 spin_unlock_irq(&mapping->tree_lock);
302 page_cache_release(page);
303 }
304 if (!expected)
305 radix_tree_preload_end();
306 }
307 if (error)
308 mem_cgroup_uncharge_cache_page(page);
309out:
310 return error;
311}
312
313/*
265 * Like find_get_pages, but collecting swap entries as well as pages. 314 * Like find_get_pages, but collecting swap entries as well as pages.
266 */ 315 */
267static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, 316static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
@@ -309,6 +358,42 @@ export:
309} 358}
310 359
311/* 360/*
361 * Lockless lookup of swap entry in radix tree, avoiding refcount on pages.
362 */
363static pgoff_t shmem_find_swap(struct address_space *mapping, void *radswap)
364{
365 void **slots[PAGEVEC_SIZE];
366 pgoff_t indices[PAGEVEC_SIZE];
367 unsigned int nr_found;
368
369restart:
370 nr_found = 1;
371 indices[0] = -1;
372 while (nr_found) {
373 pgoff_t index = indices[nr_found - 1] + 1;
374 unsigned int i;
375
376 rcu_read_lock();
377 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
378 slots, indices, index, PAGEVEC_SIZE);
379 for (i = 0; i < nr_found; i++) {
380 void *item = radix_tree_deref_slot(slots[i]);
381 if (radix_tree_deref_retry(item)) {
382 rcu_read_unlock();
383 goto restart;
384 }
385 if (item == radswap) {
386 rcu_read_unlock();
387 return indices[i];
388 }
389 }
390 rcu_read_unlock();
391 cond_resched();
392 }
393 return -1;
394}
395
396/*
312 * Remove swap entry from radix tree, free the swap and its page cache. 397 * Remove swap entry from radix tree, free the swap and its page cache.
313 */ 398 */
314static int shmem_free_swap(struct address_space *mapping, 399static int shmem_free_swap(struct address_space *mapping,
@@ -515,23 +600,21 @@ static void shmem_evict_inode(struct inode *inode)
515 end_writeback(inode); 600 end_writeback(inode);
516} 601}
517 602
603/*
604 * If swap found in inode, free it and move page from swapcache to filecache.
605 */
518static int shmem_unuse_inode(struct shmem_inode_info *info, 606static int shmem_unuse_inode(struct shmem_inode_info *info,
519 swp_entry_t swap, struct page *page) 607 swp_entry_t swap, struct page *page)
520{ 608{
521 struct address_space *mapping = info->vfs_inode.i_mapping; 609 struct address_space *mapping = info->vfs_inode.i_mapping;
610 void *radswap;
522 pgoff_t index; 611 pgoff_t index;
523 int error; 612 int error;
524 613
525 for (index = 0; index < SHMEM_NR_DIRECT; index++) 614 radswap = swp_to_radix_entry(swap);
526 if (shmem_get_swap(info, index).val == swap.val) 615 index = shmem_find_swap(mapping, radswap);
527 goto found; 616 if (index == -1)
528 return 0;
529found:
530 spin_lock(&info->lock);
531 if (shmem_get_swap(info, index).val != swap.val) {
532 spin_unlock(&info->lock);
533 return 0; 617 return 0;
534 }
535 618
536 /* 619 /*
537 * Move _head_ to start search for next from here. 620 * Move _head_ to start search for next from here.
@@ -547,23 +630,30 @@ found:
547 * but also to hold up shmem_evict_inode(): so inode cannot be freed 630 * but also to hold up shmem_evict_inode(): so inode cannot be freed
548 * beneath us (pagelock doesn't help until the page is in pagecache). 631 * beneath us (pagelock doesn't help until the page is in pagecache).
549 */ 632 */
550 error = add_to_page_cache_locked(page, mapping, index, GFP_NOWAIT); 633 error = shmem_add_to_page_cache(page, mapping, index,
634 GFP_NOWAIT, radswap);
551 /* which does mem_cgroup_uncharge_cache_page on error */ 635 /* which does mem_cgroup_uncharge_cache_page on error */
552 636
553 if (error != -ENOMEM) { 637 if (error != -ENOMEM) {
638 /*
639 * Truncation and eviction use free_swap_and_cache(), which
640 * only does trylock page: if we raced, best clean up here.
641 */
554 delete_from_swap_cache(page); 642 delete_from_swap_cache(page);
555 set_page_dirty(page); 643 set_page_dirty(page);
556 shmem_put_swap(info, index, (swp_entry_t){0}); 644 if (!error) {
557 info->swapped--; 645 spin_lock(&info->lock);
558 swap_free(swap); 646 info->swapped--;
647 spin_unlock(&info->lock);
648 swap_free(swap);
649 }
559 error = 1; /* not an error, but entry was found */ 650 error = 1; /* not an error, but entry was found */
560 } 651 }
561 spin_unlock(&info->lock);
562 return error; 652 return error;
563} 653}
564 654
565/* 655/*
566 * shmem_unuse() search for an eventually swapped out shmem page. 656 * Search through swapped inodes to find and replace swap by page.
567 */ 657 */
568int shmem_unuse(swp_entry_t swap, struct page *page) 658int shmem_unuse(swp_entry_t swap, struct page *page)
569{ 659{
@@ -576,20 +666,12 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
576 * Charge page using GFP_KERNEL while we can wait, before taking 666 * Charge page using GFP_KERNEL while we can wait, before taking
577 * the shmem_swaplist_mutex which might hold up shmem_writepage(). 667 * the shmem_swaplist_mutex which might hold up shmem_writepage().
578 * Charged back to the user (not to caller) when swap account is used. 668 * Charged back to the user (not to caller) when swap account is used.
579 * add_to_page_cache() will be called with GFP_NOWAIT. 669 * shmem_add_to_page_cache() will be called with GFP_NOWAIT.
580 */ 670 */
581 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); 671 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
582 if (error) 672 if (error)
583 goto out; 673 goto out;
584 /* 674 /* No radix_tree_preload: swap entry keeps a place for page in tree */
585 * Try to preload while we can wait, to not make a habit of
586 * draining atomic reserves; but don't latch on to this cpu,
587 * it's okay if sometimes we get rescheduled after this.
588 */
589 error = radix_tree_preload(GFP_KERNEL);
590 if (error)
591 goto uncharge;
592 radix_tree_preload_end();
593 675
594 mutex_lock(&shmem_swaplist_mutex); 676 mutex_lock(&shmem_swaplist_mutex);
595 list_for_each_safe(this, next, &shmem_swaplist) { 677 list_for_each_safe(this, next, &shmem_swaplist) {
@@ -608,7 +690,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
608 } 690 }
609 mutex_unlock(&shmem_swaplist_mutex); 691 mutex_unlock(&shmem_swaplist_mutex);
610 692
611uncharge:
612 if (!found) 693 if (!found)
613 mem_cgroup_uncharge_cache_page(page); 694 mem_cgroup_uncharge_cache_page(page);
614 if (found < 0) 695 if (found < 0)