aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2007-03-29 04:20:36 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-03-29 11:22:25 -0400
commit1ae7000630e3c05b6f7e3dfc76472f1bca6c1788 (patch)
tree805d97820dae82a5141f0b1aefc1383bd794e956
parenta2646d1e6c8d2239d8054a7d342eb9775a1d273a (diff)
[PATCH] holepunch: fix shmem_truncate_range punch locking
Miklos Szeredi observes that during truncation of shmem page directories, info->lock is released to improve latency (after lowering i_size and next_index to exclude races); but this is quite wrong for holepunching, which receives no such protection from i_size or next_index, and is left vulnerable to races with shmem_unuse, shmem_getpage and shmem_writepage. Hold info->lock throughout when holepunching? No, any user could prevent rescheduling for far too long. Instead take info->lock just when needed: in shmem_free_swp when removing the swap entries, and whenever removing a directory page from the level above. But so long as we remove before scanning, we can safely skip taking the lock at the lower levels, except at misaligned start and end of the hole. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Miklos Szeredi <mszeredi@suse.cz> Cc: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/shmem.c96
1 files changed, 73 insertions, 23 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 1077b1d903d2..578eceafba4a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
402/* 402/*
403 * shmem_free_swp - free some swap entries in a directory 403 * shmem_free_swp - free some swap entries in a directory
404 * 404 *
405 * @dir: pointer to the directory 405 * @dir: pointer to the directory
406 * @edir: pointer after last entry of the directory 406 * @edir: pointer after last entry of the directory
407 * @punch_lock: pointer to spinlock when needed for the holepunch case
407 */ 408 */
408static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) 409static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
410 spinlock_t *punch_lock)
409{ 411{
412 spinlock_t *punch_unlock = NULL;
410 swp_entry_t *ptr; 413 swp_entry_t *ptr;
411 int freed = 0; 414 int freed = 0;
412 415
413 for (ptr = dir; ptr < edir; ptr++) { 416 for (ptr = dir; ptr < edir; ptr++) {
414 if (ptr->val) { 417 if (ptr->val) {
418 if (unlikely(punch_lock)) {
419 punch_unlock = punch_lock;
420 punch_lock = NULL;
421 spin_lock(punch_unlock);
422 if (!ptr->val)
423 continue;
424 }
415 free_swap_and_cache(*ptr); 425 free_swap_and_cache(*ptr);
416 *ptr = (swp_entry_t){0}; 426 *ptr = (swp_entry_t){0};
417 freed++; 427 freed++;
418 } 428 }
419 } 429 }
430 if (punch_unlock)
431 spin_unlock(punch_unlock);
420 return freed; 432 return freed;
421} 433}
422 434
423static int shmem_map_and_free_swp(struct page *subdir, 435static int shmem_map_and_free_swp(struct page *subdir, int offset,
424 int offset, int limit, struct page ***dir) 436 int limit, struct page ***dir, spinlock_t *punch_lock)
425{ 437{
426 swp_entry_t *ptr; 438 swp_entry_t *ptr;
427 int freed = 0; 439 int freed = 0;
@@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir,
431 int size = limit - offset; 443 int size = limit - offset;
432 if (size > LATENCY_LIMIT) 444 if (size > LATENCY_LIMIT)
433 size = LATENCY_LIMIT; 445 size = LATENCY_LIMIT;
434 freed += shmem_free_swp(ptr+offset, ptr+offset+size); 446 freed += shmem_free_swp(ptr+offset, ptr+offset+size,
447 punch_lock);
435 if (need_resched()) { 448 if (need_resched()) {
436 shmem_swp_unmap(ptr); 449 shmem_swp_unmap(ptr);
437 if (*dir) { 450 if (*dir) {
@@ -482,6 +495,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
482 int offset; 495 int offset;
483 int freed; 496 int freed;
484 int punch_hole; 497 int punch_hole;
498 spinlock_t *needs_lock;
499 spinlock_t *punch_lock;
485 unsigned long upper_limit; 500 unsigned long upper_limit;
486 501
487 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 502 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -495,6 +510,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
495 limit = info->next_index; 510 limit = info->next_index;
496 upper_limit = SHMEM_MAX_INDEX; 511 upper_limit = SHMEM_MAX_INDEX;
497 info->next_index = idx; 512 info->next_index = idx;
513 needs_lock = NULL;
498 punch_hole = 0; 514 punch_hole = 0;
499 } else { 515 } else {
500 if (end + 1 >= inode->i_size) { /* we may free a little more */ 516 if (end + 1 >= inode->i_size) { /* we may free a little more */
@@ -505,6 +521,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
505 limit = (end + 1) >> PAGE_CACHE_SHIFT; 521 limit = (end + 1) >> PAGE_CACHE_SHIFT;
506 upper_limit = limit; 522 upper_limit = limit;
507 } 523 }
524 needs_lock = &info->lock;
508 punch_hole = 1; 525 punch_hole = 1;
509 } 526 }
510 527
@@ -521,7 +538,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
521 size = limit; 538 size = limit;
522 if (size > SHMEM_NR_DIRECT) 539 if (size > SHMEM_NR_DIRECT)
523 size = SHMEM_NR_DIRECT; 540 size = SHMEM_NR_DIRECT;
524 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); 541 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
525 } 542 }
526 543
527 /* 544 /*
@@ -531,6 +548,19 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
531 if (!topdir || limit <= SHMEM_NR_DIRECT) 548 if (!topdir || limit <= SHMEM_NR_DIRECT)
532 goto done2; 549 goto done2;
533 550
551 /*
552 * The truncation case has already dropped info->lock, and we're safe
553 * because i_size and next_index have already been lowered, preventing
554 * access beyond. But in the punch_hole case, we still need to take
555 * the lock when updating the swap directory, because there might be
556 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
557 * shmem_writepage. However, whenever we find we can remove a whole
558 * directory page (not at the misaligned start or end of the range),
559 * we first NULLify its pointer in the level above, and then have no
560 * need to take the lock when updating its contents: needs_lock and
561 * punch_lock (either pointing to info->lock or NULL) manage this.
562 */
563
534 upper_limit -= SHMEM_NR_DIRECT; 564 upper_limit -= SHMEM_NR_DIRECT;
535 limit -= SHMEM_NR_DIRECT; 565 limit -= SHMEM_NR_DIRECT;
536 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; 566 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
@@ -552,7 +582,13 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
552 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % 582 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
553 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; 583 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
554 if (!diroff && !offset && upper_limit >= stage) { 584 if (!diroff && !offset && upper_limit >= stage) {
555 *dir = NULL; 585 if (needs_lock) {
586 spin_lock(needs_lock);
587 *dir = NULL;
588 spin_unlock(needs_lock);
589 needs_lock = NULL;
590 } else
591 *dir = NULL;
556 nr_pages_to_free++; 592 nr_pages_to_free++;
557 list_add(&middir->lru, &pages_to_free); 593 list_add(&middir->lru, &pages_to_free);
558 } 594 }
@@ -578,8 +614,16 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
578 } 614 }
579 stage = idx + ENTRIES_PER_PAGEPAGE; 615 stage = idx + ENTRIES_PER_PAGEPAGE;
580 middir = *dir; 616 middir = *dir;
617 if (punch_hole)
618 needs_lock = &info->lock;
581 if (upper_limit >= stage) { 619 if (upper_limit >= stage) {
582 *dir = NULL; 620 if (needs_lock) {
621 spin_lock(needs_lock);
622 *dir = NULL;
623 spin_unlock(needs_lock);
624 needs_lock = NULL;
625 } else
626 *dir = NULL;
583 nr_pages_to_free++; 627 nr_pages_to_free++;
584 list_add(&middir->lru, &pages_to_free); 628 list_add(&middir->lru, &pages_to_free);
585 } 629 }
@@ -588,31 +632,37 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
588 dir = shmem_dir_map(middir); 632 dir = shmem_dir_map(middir);
589 diroff = 0; 633 diroff = 0;
590 } 634 }
635 punch_lock = needs_lock;
591 subdir = dir[diroff]; 636 subdir = dir[diroff];
592 if (subdir && page_private(subdir)) { 637 if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
638 if (needs_lock) {
639 spin_lock(needs_lock);
640 dir[diroff] = NULL;
641 spin_unlock(needs_lock);
642 punch_lock = NULL;
643 } else
644 dir[diroff] = NULL;
645 nr_pages_to_free++;
646 list_add(&subdir->lru, &pages_to_free);
647 }
648 if (subdir && page_private(subdir) /* has swap entries */) {
593 size = limit - idx; 649 size = limit - idx;
594 if (size > ENTRIES_PER_PAGE) 650 if (size > ENTRIES_PER_PAGE)
595 size = ENTRIES_PER_PAGE; 651 size = ENTRIES_PER_PAGE;
596 freed = shmem_map_and_free_swp(subdir, 652 freed = shmem_map_and_free_swp(subdir,
597 offset, size, &dir); 653 offset, size, &dir, punch_lock);
598 if (!dir) 654 if (!dir)
599 dir = shmem_dir_map(middir); 655 dir = shmem_dir_map(middir);
600 nr_swaps_freed += freed; 656 nr_swaps_freed += freed;
601 if (offset) 657 if (offset || punch_lock) {
602 spin_lock(&info->lock); 658 spin_lock(&info->lock);
603 set_page_private(subdir, page_private(subdir) - freed); 659 set_page_private(subdir,
604 if (offset) 660 page_private(subdir) - freed);
605 spin_unlock(&info->lock); 661 spin_unlock(&info->lock);
606 if (!punch_hole) 662 } else
607 BUG_ON(page_private(subdir) > offset); 663 BUG_ON(page_private(subdir) != freed);
608 }
609 if (offset)
610 offset = 0;
611 else if (subdir && upper_limit - idx >= ENTRIES_PER_PAGE) {
612 dir[diroff] = NULL;
613 nr_pages_to_free++;
614 list_add(&subdir->lru, &pages_to_free);
615 } 664 }
665 offset = 0;
616 } 666 }
617done1: 667done1:
618 shmem_dir_unmap(dir); 668 shmem_dir_unmap(dir);