diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 132 |
1 files changed, 100 insertions, 32 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index b8c429a2d271..b2a35ebf071a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long | |||
402 | /* | 402 | /* |
403 | * shmem_free_swp - free some swap entries in a directory | 403 | * shmem_free_swp - free some swap entries in a directory |
404 | * | 404 | * |
405 | * @dir: pointer to the directory | 405 | * @dir: pointer to the directory |
406 | * @edir: pointer after last entry of the directory | 406 | * @edir: pointer after last entry of the directory |
407 | * @punch_lock: pointer to spinlock when needed for the holepunch case | ||
407 | */ | 408 | */ |
408 | static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) | 409 | static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, |
410 | spinlock_t *punch_lock) | ||
409 | { | 411 | { |
412 | spinlock_t *punch_unlock = NULL; | ||
410 | swp_entry_t *ptr; | 413 | swp_entry_t *ptr; |
411 | int freed = 0; | 414 | int freed = 0; |
412 | 415 | ||
413 | for (ptr = dir; ptr < edir; ptr++) { | 416 | for (ptr = dir; ptr < edir; ptr++) { |
414 | if (ptr->val) { | 417 | if (ptr->val) { |
418 | if (unlikely(punch_lock)) { | ||
419 | punch_unlock = punch_lock; | ||
420 | punch_lock = NULL; | ||
421 | spin_lock(punch_unlock); | ||
422 | if (!ptr->val) | ||
423 | continue; | ||
424 | } | ||
415 | free_swap_and_cache(*ptr); | 425 | free_swap_and_cache(*ptr); |
416 | *ptr = (swp_entry_t){0}; | 426 | *ptr = (swp_entry_t){0}; |
417 | freed++; | 427 | freed++; |
418 | } | 428 | } |
419 | } | 429 | } |
430 | if (punch_unlock) | ||
431 | spin_unlock(punch_unlock); | ||
420 | return freed; | 432 | return freed; |
421 | } | 433 | } |
422 | 434 | ||
423 | static int shmem_map_and_free_swp(struct page *subdir, | 435 | static int shmem_map_and_free_swp(struct page *subdir, int offset, |
424 | int offset, int limit, struct page ***dir) | 436 | int limit, struct page ***dir, spinlock_t *punch_lock) |
425 | { | 437 | { |
426 | swp_entry_t *ptr; | 438 | swp_entry_t *ptr; |
427 | int freed = 0; | 439 | int freed = 0; |
@@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir, | |||
431 | int size = limit - offset; | 443 | int size = limit - offset; |
432 | if (size > LATENCY_LIMIT) | 444 | if (size > LATENCY_LIMIT) |
433 | size = LATENCY_LIMIT; | 445 | size = LATENCY_LIMIT; |
434 | freed += shmem_free_swp(ptr+offset, ptr+offset+size); | 446 | freed += shmem_free_swp(ptr+offset, ptr+offset+size, |
447 | punch_lock); | ||
435 | if (need_resched()) { | 448 | if (need_resched()) { |
436 | shmem_swp_unmap(ptr); | 449 | shmem_swp_unmap(ptr); |
437 | if (*dir) { | 450 | if (*dir) { |
@@ -481,7 +494,10 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
481 | long nr_swaps_freed = 0; | 494 | long nr_swaps_freed = 0; |
482 | int offset; | 495 | int offset; |
483 | int freed; | 496 | int freed; |
484 | int punch_hole = 0; | 497 | int punch_hole; |
498 | spinlock_t *needs_lock; | ||
499 | spinlock_t *punch_lock; | ||
500 | unsigned long upper_limit; | ||
485 | 501 | ||
486 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 502 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
487 | idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 503 | idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
@@ -492,11 +508,20 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
492 | info->flags |= SHMEM_TRUNCATE; | 508 | info->flags |= SHMEM_TRUNCATE; |
493 | if (likely(end == (loff_t) -1)) { | 509 | if (likely(end == (loff_t) -1)) { |
494 | limit = info->next_index; | 510 | limit = info->next_index; |
511 | upper_limit = SHMEM_MAX_INDEX; | ||
495 | info->next_index = idx; | 512 | info->next_index = idx; |
513 | needs_lock = NULL; | ||
514 | punch_hole = 0; | ||
496 | } else { | 515 | } else { |
497 | limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 516 | if (end + 1 >= inode->i_size) { /* we may free a little more */ |
498 | if (limit > info->next_index) | 517 | limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> |
499 | limit = info->next_index; | 518 | PAGE_CACHE_SHIFT; |
519 | upper_limit = SHMEM_MAX_INDEX; | ||
520 | } else { | ||
521 | limit = (end + 1) >> PAGE_CACHE_SHIFT; | ||
522 | upper_limit = limit; | ||
523 | } | ||
524 | needs_lock = &info->lock; | ||
500 | punch_hole = 1; | 525 | punch_hole = 1; |
501 | } | 526 | } |
502 | 527 | ||
@@ -513,17 +538,30 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
513 | size = limit; | 538 | size = limit; |
514 | if (size > SHMEM_NR_DIRECT) | 539 | if (size > SHMEM_NR_DIRECT) |
515 | size = SHMEM_NR_DIRECT; | 540 | size = SHMEM_NR_DIRECT; |
516 | nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); | 541 | nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); |
517 | } | 542 | } |
518 | 543 | ||
519 | /* | 544 | /* |
520 | * If there are no indirect blocks or we are punching a hole | 545 | * If there are no indirect blocks or we are punching a hole |
521 | * below indirect blocks, nothing to be done. | 546 | * below indirect blocks, nothing to be done. |
522 | */ | 547 | */ |
523 | if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT))) | 548 | if (!topdir || limit <= SHMEM_NR_DIRECT) |
524 | goto done2; | 549 | goto done2; |
525 | 550 | ||
526 | BUG_ON(limit <= SHMEM_NR_DIRECT); | 551 | /* |
552 | * The truncation case has already dropped info->lock, and we're safe | ||
553 | * because i_size and next_index have already been lowered, preventing | ||
554 | * access beyond. But in the punch_hole case, we still need to take | ||
555 | * the lock when updating the swap directory, because there might be | ||
556 | * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or | ||
557 | * shmem_writepage. However, whenever we find we can remove a whole | ||
558 | * directory page (not at the misaligned start or end of the range), | ||
559 | * we first NULLify its pointer in the level above, and then have no | ||
560 | * need to take the lock when updating its contents: needs_lock and | ||
561 | * punch_lock (either pointing to info->lock or NULL) manage this. | ||
562 | */ | ||
563 | |||
564 | upper_limit -= SHMEM_NR_DIRECT; | ||
527 | limit -= SHMEM_NR_DIRECT; | 565 | limit -= SHMEM_NR_DIRECT; |
528 | idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; | 566 | idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; |
529 | offset = idx % ENTRIES_PER_PAGE; | 567 | offset = idx % ENTRIES_PER_PAGE; |
@@ -543,8 +581,14 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
543 | if (*dir) { | 581 | if (*dir) { |
544 | diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % | 582 | diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % |
545 | ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; | 583 | ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; |
546 | if (!diroff && !offset) { | 584 | if (!diroff && !offset && upper_limit >= stage) { |
547 | *dir = NULL; | 585 | if (needs_lock) { |
586 | spin_lock(needs_lock); | ||
587 | *dir = NULL; | ||
588 | spin_unlock(needs_lock); | ||
589 | needs_lock = NULL; | ||
590 | } else | ||
591 | *dir = NULL; | ||
548 | nr_pages_to_free++; | 592 | nr_pages_to_free++; |
549 | list_add(&middir->lru, &pages_to_free); | 593 | list_add(&middir->lru, &pages_to_free); |
550 | } | 594 | } |
@@ -570,39 +614,55 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
570 | } | 614 | } |
571 | stage = idx + ENTRIES_PER_PAGEPAGE; | 615 | stage = idx + ENTRIES_PER_PAGEPAGE; |
572 | middir = *dir; | 616 | middir = *dir; |
573 | *dir = NULL; | 617 | if (punch_hole) |
574 | nr_pages_to_free++; | 618 | needs_lock = &info->lock; |
575 | list_add(&middir->lru, &pages_to_free); | 619 | if (upper_limit >= stage) { |
620 | if (needs_lock) { | ||
621 | spin_lock(needs_lock); | ||
622 | *dir = NULL; | ||
623 | spin_unlock(needs_lock); | ||
624 | needs_lock = NULL; | ||
625 | } else | ||
626 | *dir = NULL; | ||
627 | nr_pages_to_free++; | ||
628 | list_add(&middir->lru, &pages_to_free); | ||
629 | } | ||
576 | shmem_dir_unmap(dir); | 630 | shmem_dir_unmap(dir); |
577 | cond_resched(); | 631 | cond_resched(); |
578 | dir = shmem_dir_map(middir); | 632 | dir = shmem_dir_map(middir); |
579 | diroff = 0; | 633 | diroff = 0; |
580 | } | 634 | } |
635 | punch_lock = needs_lock; | ||
581 | subdir = dir[diroff]; | 636 | subdir = dir[diroff]; |
582 | if (subdir && page_private(subdir)) { | 637 | if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { |
638 | if (needs_lock) { | ||
639 | spin_lock(needs_lock); | ||
640 | dir[diroff] = NULL; | ||
641 | spin_unlock(needs_lock); | ||
642 | punch_lock = NULL; | ||
643 | } else | ||
644 | dir[diroff] = NULL; | ||
645 | nr_pages_to_free++; | ||
646 | list_add(&subdir->lru, &pages_to_free); | ||
647 | } | ||
648 | if (subdir && page_private(subdir) /* has swap entries */) { | ||
583 | size = limit - idx; | 649 | size = limit - idx; |
584 | if (size > ENTRIES_PER_PAGE) | 650 | if (size > ENTRIES_PER_PAGE) |
585 | size = ENTRIES_PER_PAGE; | 651 | size = ENTRIES_PER_PAGE; |
586 | freed = shmem_map_and_free_swp(subdir, | 652 | freed = shmem_map_and_free_swp(subdir, |
587 | offset, size, &dir); | 653 | offset, size, &dir, punch_lock); |
588 | if (!dir) | 654 | if (!dir) |
589 | dir = shmem_dir_map(middir); | 655 | dir = shmem_dir_map(middir); |
590 | nr_swaps_freed += freed; | 656 | nr_swaps_freed += freed; |
591 | if (offset) | 657 | if (offset || punch_lock) { |
592 | spin_lock(&info->lock); | 658 | spin_lock(&info->lock); |
593 | set_page_private(subdir, page_private(subdir) - freed); | 659 | set_page_private(subdir, |
594 | if (offset) | 660 | page_private(subdir) - freed); |
595 | spin_unlock(&info->lock); | 661 | spin_unlock(&info->lock); |
596 | if (!punch_hole) | 662 | } else |
597 | BUG_ON(page_private(subdir) > offset); | 663 | BUG_ON(page_private(subdir) != freed); |
598 | } | ||
599 | if (offset) | ||
600 | offset = 0; | ||
601 | else if (subdir && !page_private(subdir)) { | ||
602 | dir[diroff] = NULL; | ||
603 | nr_pages_to_free++; | ||
604 | list_add(&subdir->lru, &pages_to_free); | ||
605 | } | 664 | } |
665 | offset = 0; | ||
606 | } | 666 | } |
607 | done1: | 667 | done1: |
608 | shmem_dir_unmap(dir); | 668 | shmem_dir_unmap(dir); |
@@ -614,8 +674,16 @@ done2: | |||
614 | * generic_delete_inode did it, before we lowered next_index. | 674 | * generic_delete_inode did it, before we lowered next_index. |
615 | * Also, though shmem_getpage checks i_size before adding to | 675 | * Also, though shmem_getpage checks i_size before adding to |
616 | * cache, no recheck after: so fix the narrow window there too. | 676 | * cache, no recheck after: so fix the narrow window there too. |
677 | * | ||
678 | * Recalling truncate_inode_pages_range and unmap_mapping_range | ||
679 | * every time for punch_hole (which never got a chance to clear | ||
680 | * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, | ||
681 | * yet hardly ever necessary: try to optimize them out later. | ||
617 | */ | 682 | */ |
618 | truncate_inode_pages_range(inode->i_mapping, start, end); | 683 | truncate_inode_pages_range(inode->i_mapping, start, end); |
684 | if (punch_hole) | ||
685 | unmap_mapping_range(inode->i_mapping, start, | ||
686 | end - start, 1); | ||
619 | } | 687 | } |
620 | 688 | ||
621 | spin_lock(&info->lock); | 689 | spin_lock(&info->lock); |