aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2012-05-29 18:06:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 19:22:23 -0400
commit1635f6a74152f1dcd1b888231609d64875f0a81a (patch)
treea2919520e0a8fead3930d23136ca30829f0fc64a
parente2d12e22c59ce714008aa5266d769f8568d74eac (diff)
tmpfs: undo fallocation on failure
In the previous episode, we left the already-fallocated pages attached to the file when shmem_fallocate() fails part way through. Now try to do better, by extending the earlier optimization of !Uptodate pages (then always under page lock) to !Uptodate pages (outside of page lock), representing fallocated pages. And don't waste time clearing them at the time of fallocate(), leave that until later if necessary. Adapt shmem_truncate_range() to shmem_undo_range(), so that a failing fallocate can recognize and remove precisely those !Uptodate allocations which it added (and were not independently allocated by racing tasks). But unless we start playing with swapfile.c and memcontrol.c too, once one of our fallocated pages reaches shmem_writepage(), we do then have to instantiate it as an ordinarily allocated page, before swapping out. This is unsatisfactory, but improved in the next episode. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Cong Wang <amwang@redhat.com> Cc: Kay Sievers <kay@vrfy.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/shmem.c105
1 files changed, 72 insertions, 33 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 9b90d89e54ce..793dcd1bac8b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -89,7 +89,8 @@ enum sgp_type {
89 SGP_READ, /* don't exceed i_size, don't allocate page */ 89 SGP_READ, /* don't exceed i_size, don't allocate page */
90 SGP_CACHE, /* don't exceed i_size, may allocate page */ 90 SGP_CACHE, /* don't exceed i_size, may allocate page */
91 SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ 91 SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
92 SGP_WRITE, /* may exceed i_size, may allocate page */ 92 SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
93 SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
93}; 94};
94 95
95#ifdef CONFIG_TMPFS 96#ifdef CONFIG_TMPFS
@@ -427,8 +428,10 @@ void shmem_unlock_mapping(struct address_space *mapping)
427 428
428/* 429/*
429 * Remove range of pages and swap entries from radix tree, and free them. 430 * Remove range of pages and swap entries from radix tree, and free them.
431 * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
430 */ 432 */
431void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 433static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
434 bool unfalloc)
432{ 435{
433 struct address_space *mapping = inode->i_mapping; 436 struct address_space *mapping = inode->i_mapping;
434 struct shmem_inode_info *info = SHMEM_I(inode); 437 struct shmem_inode_info *info = SHMEM_I(inode);
@@ -462,6 +465,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
462 break; 465 break;
463 466
464 if (radix_tree_exceptional_entry(page)) { 467 if (radix_tree_exceptional_entry(page)) {
468 if (unfalloc)
469 continue;
465 nr_swaps_freed += !shmem_free_swap(mapping, 470 nr_swaps_freed += !shmem_free_swap(mapping,
466 index, page); 471 index, page);
467 continue; 472 continue;
@@ -469,9 +474,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
469 474
470 if (!trylock_page(page)) 475 if (!trylock_page(page))
471 continue; 476 continue;
472 if (page->mapping == mapping) { 477 if (!unfalloc || !PageUptodate(page)) {
473 VM_BUG_ON(PageWriteback(page)); 478 if (page->mapping == mapping) {
474 truncate_inode_page(mapping, page); 479 VM_BUG_ON(PageWriteback(page));
480 truncate_inode_page(mapping, page);
481 }
475 } 482 }
476 unlock_page(page); 483 unlock_page(page);
477 } 484 }
@@ -517,12 +524,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
517 min(end - index, (pgoff_t)PAGEVEC_SIZE), 524 min(end - index, (pgoff_t)PAGEVEC_SIZE),
518 pvec.pages, indices); 525 pvec.pages, indices);
519 if (!pvec.nr) { 526 if (!pvec.nr) {
520 if (index == start) 527 if (index == start || unfalloc)
521 break; 528 break;
522 index = start; 529 index = start;
523 continue; 530 continue;
524 } 531 }
525 if (index == start && indices[0] >= end) { 532 if ((index == start || unfalloc) && indices[0] >= end) {
526 shmem_deswap_pagevec(&pvec); 533 shmem_deswap_pagevec(&pvec);
527 pagevec_release(&pvec); 534 pagevec_release(&pvec);
528 break; 535 break;
@@ -536,15 +543,19 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
536 break; 543 break;
537 544
538 if (radix_tree_exceptional_entry(page)) { 545 if (radix_tree_exceptional_entry(page)) {
546 if (unfalloc)
547 continue;
539 nr_swaps_freed += !shmem_free_swap(mapping, 548 nr_swaps_freed += !shmem_free_swap(mapping,
540 index, page); 549 index, page);
541 continue; 550 continue;
542 } 551 }
543 552
544 lock_page(page); 553 lock_page(page);
545 if (page->mapping == mapping) { 554 if (!unfalloc || !PageUptodate(page)) {
546 VM_BUG_ON(PageWriteback(page)); 555 if (page->mapping == mapping) {
547 truncate_inode_page(mapping, page); 556 VM_BUG_ON(PageWriteback(page));
557 truncate_inode_page(mapping, page);
558 }
548 } 559 }
549 unlock_page(page); 560 unlock_page(page);
550 } 561 }
@@ -558,7 +569,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
558 info->swapped -= nr_swaps_freed; 569 info->swapped -= nr_swaps_freed;
559 shmem_recalc_inode(inode); 570 shmem_recalc_inode(inode);
560 spin_unlock(&info->lock); 571 spin_unlock(&info->lock);
572}
561 573
574void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
575{
576 shmem_undo_range(inode, lstart, lend, false);
562 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 577 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
563} 578}
564EXPORT_SYMBOL_GPL(shmem_truncate_range); 579EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -771,6 +786,18 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
771 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ 786 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
772 goto redirty; 787 goto redirty;
773 } 788 }
789
790 /*
791 * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
792 * value into swapfile.c, the only way we can correctly account for a
793 * fallocated page arriving here is now to initialize it and write it.
794 */
795 if (!PageUptodate(page)) {
796 clear_highpage(page);
797 flush_dcache_page(page);
798 SetPageUptodate(page);
799 }
800
774 swap = get_swap_page(); 801 swap = get_swap_page();
775 if (!swap.val) 802 if (!swap.val)
776 goto redirty; 803 goto redirty;
@@ -994,6 +1021,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
994 swp_entry_t swap; 1021 swp_entry_t swap;
995 int error; 1022 int error;
996 int once = 0; 1023 int once = 0;
1024 int alloced = 0;
997 1025
998 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) 1026 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
999 return -EFBIG; 1027 return -EFBIG;
@@ -1005,19 +1033,21 @@ repeat:
1005 page = NULL; 1033 page = NULL;
1006 } 1034 }
1007 1035
1008 if (sgp != SGP_WRITE && 1036 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1009 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 1037 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1010 error = -EINVAL; 1038 error = -EINVAL;
1011 goto failed; 1039 goto failed;
1012 } 1040 }
1013 1041
1042 /* fallocated page? */
1043 if (page && !PageUptodate(page)) {
1044 if (sgp != SGP_READ)
1045 goto clear;
1046 unlock_page(page);
1047 page_cache_release(page);
1048 page = NULL;
1049 }
1014 if (page || (sgp == SGP_READ && !swap.val)) { 1050 if (page || (sgp == SGP_READ && !swap.val)) {
1015 /*
1016 * Once we can get the page lock, it must be uptodate:
1017 * if there were an error in reading back from swap,
1018 * the page would not be inserted into the filecache.
1019 */
1020 BUG_ON(page && !PageUptodate(page));
1021 *pagep = page; 1051 *pagep = page;
1022 return 0; 1052 return 0;
1023 } 1053 }
@@ -1114,9 +1144,18 @@ repeat:
1114 inode->i_blocks += BLOCKS_PER_PAGE; 1144 inode->i_blocks += BLOCKS_PER_PAGE;
1115 shmem_recalc_inode(inode); 1145 shmem_recalc_inode(inode);
1116 spin_unlock(&info->lock); 1146 spin_unlock(&info->lock);
1147 alloced = true;
1117 1148
1118 /* 1149 /*
1119 * Let SGP_WRITE caller clear ends if write does not fill page 1150 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
1151 */
1152 if (sgp == SGP_FALLOC)
1153 sgp = SGP_WRITE;
1154clear:
1155 /*
1156 * Let SGP_WRITE caller clear ends if write does not fill page;
1157 * but SGP_FALLOC on a page fallocated earlier must initialize
1158 * it now, lest undo on failure cancel our earlier guarantee.
1120 */ 1159 */
1121 if (sgp != SGP_WRITE) { 1160 if (sgp != SGP_WRITE) {
1122 clear_highpage(page); 1161 clear_highpage(page);
@@ -1128,10 +1167,13 @@ repeat:
1128 } 1167 }
1129 1168
1130 /* Perhaps the file has been truncated since we checked */ 1169 /* Perhaps the file has been truncated since we checked */
1131 if (sgp != SGP_WRITE && 1170 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1132 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 1171 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1133 error = -EINVAL; 1172 error = -EINVAL;
1134 goto trunc; 1173 if (alloced)
1174 goto trunc;
1175 else
1176 goto failed;
1135 } 1177 }
1136 *pagep = page; 1178 *pagep = page;
1137 return 0; 1179 return 0;
@@ -1140,6 +1182,7 @@ repeat:
1140 * Error recovery. 1182 * Error recovery.
1141 */ 1183 */
1142trunc: 1184trunc:
1185 info = SHMEM_I(inode);
1143 ClearPageDirty(page); 1186 ClearPageDirty(page);
1144 delete_from_page_cache(page); 1187 delete_from_page_cache(page);
1145 spin_lock(&info->lock); 1188 spin_lock(&info->lock);
@@ -1147,6 +1190,7 @@ trunc:
1147 inode->i_blocks -= BLOCKS_PER_PAGE; 1190 inode->i_blocks -= BLOCKS_PER_PAGE;
1148 spin_unlock(&info->lock); 1191 spin_unlock(&info->lock);
1149decused: 1192decused:
1193 sbinfo = SHMEM_SB(inode->i_sb);
1150 if (sbinfo->max_blocks) 1194 if (sbinfo->max_blocks)
1151 percpu_counter_add(&sbinfo->used_blocks, -1); 1195 percpu_counter_add(&sbinfo->used_blocks, -1);
1152unacct: 1196unacct:
@@ -1645,25 +1689,20 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1645 if (signal_pending(current)) 1689 if (signal_pending(current))
1646 error = -EINTR; 1690 error = -EINTR;
1647 else 1691 else
1648 error = shmem_getpage(inode, index, &page, SGP_WRITE, 1692 error = shmem_getpage(inode, index, &page, SGP_FALLOC,
1649 NULL); 1693 NULL);
1650 if (error) { 1694 if (error) {
1651 /* 1695 /* Remove the !PageUptodate pages we added */
1652 * We really ought to free what we allocated so far, 1696 shmem_undo_range(inode,
1653 * but it would be wrong to free pages allocated 1697 (loff_t)start << PAGE_CACHE_SHIFT,
1654 * earlier, or already now in use: i_mutex does not 1698 (loff_t)index << PAGE_CACHE_SHIFT, true);
1655 * exclude all cases. We do not know what to free.
1656 */
1657 goto ctime; 1699 goto ctime;
1658 } 1700 }
1659 1701
1660 if (!PageUptodate(page)) {
1661 clear_highpage(page);
1662 flush_dcache_page(page);
1663 SetPageUptodate(page);
1664 }
1665 /* 1702 /*
1666 * set_page_dirty so that memory pressure will swap rather 1703 * If !PageUptodate, leave it that way so that freeable pages
1704 * can be recognized if we need to rollback on error later.
1705 * But set_page_dirty so that memory pressure will swap rather
1667 * than free the pages we are allocating (and SGP_CACHE pages 1706 * than free the pages we are allocating (and SGP_CACHE pages
1668 * might still be clean: we now need to mark those dirty too). 1707 * might still be clean: we now need to mark those dirty too).
1669 */ 1708 */