aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c132
1 files changed, 100 insertions, 32 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index b8c429a2d271..b2a35ebf071a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
402/* 402/*
403 * shmem_free_swp - free some swap entries in a directory 403 * shmem_free_swp - free some swap entries in a directory
404 * 404 *
405 * @dir: pointer to the directory 405 * @dir: pointer to the directory
406 * @edir: pointer after last entry of the directory 406 * @edir: pointer after last entry of the directory
407 * @punch_lock: pointer to spinlock when needed for the holepunch case
407 */ 408 */
408static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) 409static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
410 spinlock_t *punch_lock)
409{ 411{
412 spinlock_t *punch_unlock = NULL;
410 swp_entry_t *ptr; 413 swp_entry_t *ptr;
411 int freed = 0; 414 int freed = 0;
412 415
413 for (ptr = dir; ptr < edir; ptr++) { 416 for (ptr = dir; ptr < edir; ptr++) {
414 if (ptr->val) { 417 if (ptr->val) {
418 if (unlikely(punch_lock)) {
419 punch_unlock = punch_lock;
420 punch_lock = NULL;
421 spin_lock(punch_unlock);
422 if (!ptr->val)
423 continue;
424 }
415 free_swap_and_cache(*ptr); 425 free_swap_and_cache(*ptr);
416 *ptr = (swp_entry_t){0}; 426 *ptr = (swp_entry_t){0};
417 freed++; 427 freed++;
418 } 428 }
419 } 429 }
430 if (punch_unlock)
431 spin_unlock(punch_unlock);
420 return freed; 432 return freed;
421} 433}
422 434
423static int shmem_map_and_free_swp(struct page *subdir, 435static int shmem_map_and_free_swp(struct page *subdir, int offset,
424 int offset, int limit, struct page ***dir) 436 int limit, struct page ***dir, spinlock_t *punch_lock)
425{ 437{
426 swp_entry_t *ptr; 438 swp_entry_t *ptr;
427 int freed = 0; 439 int freed = 0;
@@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir,
431 int size = limit - offset; 443 int size = limit - offset;
432 if (size > LATENCY_LIMIT) 444 if (size > LATENCY_LIMIT)
433 size = LATENCY_LIMIT; 445 size = LATENCY_LIMIT;
434 freed += shmem_free_swp(ptr+offset, ptr+offset+size); 446 freed += shmem_free_swp(ptr+offset, ptr+offset+size,
447 punch_lock);
435 if (need_resched()) { 448 if (need_resched()) {
436 shmem_swp_unmap(ptr); 449 shmem_swp_unmap(ptr);
437 if (*dir) { 450 if (*dir) {
@@ -481,7 +494,10 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
481 long nr_swaps_freed = 0; 494 long nr_swaps_freed = 0;
482 int offset; 495 int offset;
483 int freed; 496 int freed;
484 int punch_hole = 0; 497 int punch_hole;
498 spinlock_t *needs_lock;
499 spinlock_t *punch_lock;
500 unsigned long upper_limit;
485 501
486 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 502 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
487 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 503 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -492,11 +508,20 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
492 info->flags |= SHMEM_TRUNCATE; 508 info->flags |= SHMEM_TRUNCATE;
493 if (likely(end == (loff_t) -1)) { 509 if (likely(end == (loff_t) -1)) {
494 limit = info->next_index; 510 limit = info->next_index;
511 upper_limit = SHMEM_MAX_INDEX;
495 info->next_index = idx; 512 info->next_index = idx;
513 needs_lock = NULL;
514 punch_hole = 0;
496 } else { 515 } else {
497 limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 516 if (end + 1 >= inode->i_size) { /* we may free a little more */
498 if (limit > info->next_index) 517 limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
499 limit = info->next_index; 518 PAGE_CACHE_SHIFT;
519 upper_limit = SHMEM_MAX_INDEX;
520 } else {
521 limit = (end + 1) >> PAGE_CACHE_SHIFT;
522 upper_limit = limit;
523 }
524 needs_lock = &info->lock;
500 punch_hole = 1; 525 punch_hole = 1;
501 } 526 }
502 527
@@ -513,17 +538,30 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
513 size = limit; 538 size = limit;
514 if (size > SHMEM_NR_DIRECT) 539 if (size > SHMEM_NR_DIRECT)
515 size = SHMEM_NR_DIRECT; 540 size = SHMEM_NR_DIRECT;
516 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); 541 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
517 } 542 }
518 543
519 /* 544 /*
520 * If there are no indirect blocks or we are punching a hole 545 * If there are no indirect blocks or we are punching a hole
521 * below indirect blocks, nothing to be done. 546 * below indirect blocks, nothing to be done.
522 */ 547 */
523 if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT))) 548 if (!topdir || limit <= SHMEM_NR_DIRECT)
524 goto done2; 549 goto done2;
525 550
526 BUG_ON(limit <= SHMEM_NR_DIRECT); 551 /*
552 * The truncation case has already dropped info->lock, and we're safe
553 * because i_size and next_index have already been lowered, preventing
554 * access beyond. But in the punch_hole case, we still need to take
555 * the lock when updating the swap directory, because there might be
556 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
557 * shmem_writepage. However, whenever we find we can remove a whole
558 * directory page (not at the misaligned start or end of the range),
559 * we first NULLify its pointer in the level above, and then have no
560 * need to take the lock when updating its contents: needs_lock and
561 * punch_lock (either pointing to info->lock or NULL) manage this.
562 */
563
564 upper_limit -= SHMEM_NR_DIRECT;
527 limit -= SHMEM_NR_DIRECT; 565 limit -= SHMEM_NR_DIRECT;
528 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; 566 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
529 offset = idx % ENTRIES_PER_PAGE; 567 offset = idx % ENTRIES_PER_PAGE;
@@ -543,8 +581,14 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
543 if (*dir) { 581 if (*dir) {
544 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % 582 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
545 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; 583 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
546 if (!diroff && !offset) { 584 if (!diroff && !offset && upper_limit >= stage) {
547 *dir = NULL; 585 if (needs_lock) {
586 spin_lock(needs_lock);
587 *dir = NULL;
588 spin_unlock(needs_lock);
589 needs_lock = NULL;
590 } else
591 *dir = NULL;
548 nr_pages_to_free++; 592 nr_pages_to_free++;
549 list_add(&middir->lru, &pages_to_free); 593 list_add(&middir->lru, &pages_to_free);
550 } 594 }
@@ -570,39 +614,55 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
570 } 614 }
571 stage = idx + ENTRIES_PER_PAGEPAGE; 615 stage = idx + ENTRIES_PER_PAGEPAGE;
572 middir = *dir; 616 middir = *dir;
573 *dir = NULL; 617 if (punch_hole)
574 nr_pages_to_free++; 618 needs_lock = &info->lock;
575 list_add(&middir->lru, &pages_to_free); 619 if (upper_limit >= stage) {
620 if (needs_lock) {
621 spin_lock(needs_lock);
622 *dir = NULL;
623 spin_unlock(needs_lock);
624 needs_lock = NULL;
625 } else
626 *dir = NULL;
627 nr_pages_to_free++;
628 list_add(&middir->lru, &pages_to_free);
629 }
576 shmem_dir_unmap(dir); 630 shmem_dir_unmap(dir);
577 cond_resched(); 631 cond_resched();
578 dir = shmem_dir_map(middir); 632 dir = shmem_dir_map(middir);
579 diroff = 0; 633 diroff = 0;
580 } 634 }
635 punch_lock = needs_lock;
581 subdir = dir[diroff]; 636 subdir = dir[diroff];
582 if (subdir && page_private(subdir)) { 637 if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
638 if (needs_lock) {
639 spin_lock(needs_lock);
640 dir[diroff] = NULL;
641 spin_unlock(needs_lock);
642 punch_lock = NULL;
643 } else
644 dir[diroff] = NULL;
645 nr_pages_to_free++;
646 list_add(&subdir->lru, &pages_to_free);
647 }
648 if (subdir && page_private(subdir) /* has swap entries */) {
583 size = limit - idx; 649 size = limit - idx;
584 if (size > ENTRIES_PER_PAGE) 650 if (size > ENTRIES_PER_PAGE)
585 size = ENTRIES_PER_PAGE; 651 size = ENTRIES_PER_PAGE;
586 freed = shmem_map_and_free_swp(subdir, 652 freed = shmem_map_and_free_swp(subdir,
587 offset, size, &dir); 653 offset, size, &dir, punch_lock);
588 if (!dir) 654 if (!dir)
589 dir = shmem_dir_map(middir); 655 dir = shmem_dir_map(middir);
590 nr_swaps_freed += freed; 656 nr_swaps_freed += freed;
591 if (offset) 657 if (offset || punch_lock) {
592 spin_lock(&info->lock); 658 spin_lock(&info->lock);
593 set_page_private(subdir, page_private(subdir) - freed); 659 set_page_private(subdir,
594 if (offset) 660 page_private(subdir) - freed);
595 spin_unlock(&info->lock); 661 spin_unlock(&info->lock);
596 if (!punch_hole) 662 } else
597 BUG_ON(page_private(subdir) > offset); 663 BUG_ON(page_private(subdir) != freed);
598 }
599 if (offset)
600 offset = 0;
601 else if (subdir && !page_private(subdir)) {
602 dir[diroff] = NULL;
603 nr_pages_to_free++;
604 list_add(&subdir->lru, &pages_to_free);
605 } 664 }
665 offset = 0;
606 } 666 }
607done1: 667done1:
608 shmem_dir_unmap(dir); 668 shmem_dir_unmap(dir);
@@ -614,8 +674,16 @@ done2:
614 * generic_delete_inode did it, before we lowered next_index. 674 * generic_delete_inode did it, before we lowered next_index.
615 * Also, though shmem_getpage checks i_size before adding to 675 * Also, though shmem_getpage checks i_size before adding to
616 * cache, no recheck after: so fix the narrow window there too. 676 * cache, no recheck after: so fix the narrow window there too.
677 *
678 * Recalling truncate_inode_pages_range and unmap_mapping_range
679 * every time for punch_hole (which never got a chance to clear
680 * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
681 * yet hardly ever necessary: try to optimize them out later.
617 */ 682 */
618 truncate_inode_pages_range(inode->i_mapping, start, end); 683 truncate_inode_pages_range(inode->i_mapping, start, end);
684 if (punch_hole)
685 unmap_mapping_range(inode->i_mapping, start,
686 end - start, 1);
619 } 687 }
620 688
621 spin_lock(&info->lock); 689 spin_lock(&info->lock);