diff options
-rw-r--r-- | mm/shmem.c | 105 |
1 files changed, 72 insertions, 33 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 9b90d89e54ce..793dcd1bac8b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -89,7 +89,8 @@ enum sgp_type { | |||
89 | SGP_READ, /* don't exceed i_size, don't allocate page */ | 89 | SGP_READ, /* don't exceed i_size, don't allocate page */ |
90 | SGP_CACHE, /* don't exceed i_size, may allocate page */ | 90 | SGP_CACHE, /* don't exceed i_size, may allocate page */ |
91 | SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ | 91 | SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ |
92 | SGP_WRITE, /* may exceed i_size, may allocate page */ | 92 | SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */ |
93 | SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ | ||
93 | }; | 94 | }; |
94 | 95 | ||
95 | #ifdef CONFIG_TMPFS | 96 | #ifdef CONFIG_TMPFS |
@@ -427,8 +428,10 @@ void shmem_unlock_mapping(struct address_space *mapping) | |||
427 | 428 | ||
428 | /* | 429 | /* |
429 | * Remove range of pages and swap entries from radix tree, and free them. | 430 | * Remove range of pages and swap entries from radix tree, and free them. |
431 | * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. | ||
430 | */ | 432 | */ |
431 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | 433 | static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, |
434 | bool unfalloc) | ||
432 | { | 435 | { |
433 | struct address_space *mapping = inode->i_mapping; | 436 | struct address_space *mapping = inode->i_mapping; |
434 | struct shmem_inode_info *info = SHMEM_I(inode); | 437 | struct shmem_inode_info *info = SHMEM_I(inode); |
@@ -462,6 +465,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
462 | break; | 465 | break; |
463 | 466 | ||
464 | if (radix_tree_exceptional_entry(page)) { | 467 | if (radix_tree_exceptional_entry(page)) { |
468 | if (unfalloc) | ||
469 | continue; | ||
465 | nr_swaps_freed += !shmem_free_swap(mapping, | 470 | nr_swaps_freed += !shmem_free_swap(mapping, |
466 | index, page); | 471 | index, page); |
467 | continue; | 472 | continue; |
@@ -469,9 +474,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
469 | 474 | ||
470 | if (!trylock_page(page)) | 475 | if (!trylock_page(page)) |
471 | continue; | 476 | continue; |
472 | if (page->mapping == mapping) { | 477 | if (!unfalloc || !PageUptodate(page)) { |
473 | VM_BUG_ON(PageWriteback(page)); | 478 | if (page->mapping == mapping) { |
474 | truncate_inode_page(mapping, page); | 479 | VM_BUG_ON(PageWriteback(page)); |
480 | truncate_inode_page(mapping, page); | ||
481 | } | ||
475 | } | 482 | } |
476 | unlock_page(page); | 483 | unlock_page(page); |
477 | } | 484 | } |
@@ -517,12 +524,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
517 | min(end - index, (pgoff_t)PAGEVEC_SIZE), | 524 | min(end - index, (pgoff_t)PAGEVEC_SIZE), |
518 | pvec.pages, indices); | 525 | pvec.pages, indices); |
519 | if (!pvec.nr) { | 526 | if (!pvec.nr) { |
520 | if (index == start) | 527 | if (index == start || unfalloc) |
521 | break; | 528 | break; |
522 | index = start; | 529 | index = start; |
523 | continue; | 530 | continue; |
524 | } | 531 | } |
525 | if (index == start && indices[0] >= end) { | 532 | if ((index == start || unfalloc) && indices[0] >= end) { |
526 | shmem_deswap_pagevec(&pvec); | 533 | shmem_deswap_pagevec(&pvec); |
527 | pagevec_release(&pvec); | 534 | pagevec_release(&pvec); |
528 | break; | 535 | break; |
@@ -536,15 +543,19 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
536 | break; | 543 | break; |
537 | 544 | ||
538 | if (radix_tree_exceptional_entry(page)) { | 545 | if (radix_tree_exceptional_entry(page)) { |
546 | if (unfalloc) | ||
547 | continue; | ||
539 | nr_swaps_freed += !shmem_free_swap(mapping, | 548 | nr_swaps_freed += !shmem_free_swap(mapping, |
540 | index, page); | 549 | index, page); |
541 | continue; | 550 | continue; |
542 | } | 551 | } |
543 | 552 | ||
544 | lock_page(page); | 553 | lock_page(page); |
545 | if (page->mapping == mapping) { | 554 | if (!unfalloc || !PageUptodate(page)) { |
546 | VM_BUG_ON(PageWriteback(page)); | 555 | if (page->mapping == mapping) { |
547 | truncate_inode_page(mapping, page); | 556 | VM_BUG_ON(PageWriteback(page)); |
557 | truncate_inode_page(mapping, page); | ||
558 | } | ||
548 | } | 559 | } |
549 | unlock_page(page); | 560 | unlock_page(page); |
550 | } | 561 | } |
@@ -558,7 +569,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
558 | info->swapped -= nr_swaps_freed; | 569 | info->swapped -= nr_swaps_freed; |
559 | shmem_recalc_inode(inode); | 570 | shmem_recalc_inode(inode); |
560 | spin_unlock(&info->lock); | 571 | spin_unlock(&info->lock); |
572 | } | ||
561 | 573 | ||
574 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | ||
575 | { | ||
576 | shmem_undo_range(inode, lstart, lend, false); | ||
562 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 577 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
563 | } | 578 | } |
564 | EXPORT_SYMBOL_GPL(shmem_truncate_range); | 579 | EXPORT_SYMBOL_GPL(shmem_truncate_range); |
@@ -771,6 +786,18 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
771 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ | 786 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ |
772 | goto redirty; | 787 | goto redirty; |
773 | } | 788 | } |
789 | |||
790 | /* | ||
791 | * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC | ||
792 | * value into swapfile.c, the only way we can correctly account for a | ||
793 | * fallocated page arriving here is now to initialize it and write it. | ||
794 | */ | ||
795 | if (!PageUptodate(page)) { | ||
796 | clear_highpage(page); | ||
797 | flush_dcache_page(page); | ||
798 | SetPageUptodate(page); | ||
799 | } | ||
800 | |||
774 | swap = get_swap_page(); | 801 | swap = get_swap_page(); |
775 | if (!swap.val) | 802 | if (!swap.val) |
776 | goto redirty; | 803 | goto redirty; |
@@ -994,6 +1021,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | |||
994 | swp_entry_t swap; | 1021 | swp_entry_t swap; |
995 | int error; | 1022 | int error; |
996 | int once = 0; | 1023 | int once = 0; |
1024 | int alloced = 0; | ||
997 | 1025 | ||
998 | if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) | 1026 | if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) |
999 | return -EFBIG; | 1027 | return -EFBIG; |
@@ -1005,19 +1033,21 @@ repeat: | |||
1005 | page = NULL; | 1033 | page = NULL; |
1006 | } | 1034 | } |
1007 | 1035 | ||
1008 | if (sgp != SGP_WRITE && | 1036 | if (sgp != SGP_WRITE && sgp != SGP_FALLOC && |
1009 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { | 1037 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { |
1010 | error = -EINVAL; | 1038 | error = -EINVAL; |
1011 | goto failed; | 1039 | goto failed; |
1012 | } | 1040 | } |
1013 | 1041 | ||
1042 | /* fallocated page? */ | ||
1043 | if (page && !PageUptodate(page)) { | ||
1044 | if (sgp != SGP_READ) | ||
1045 | goto clear; | ||
1046 | unlock_page(page); | ||
1047 | page_cache_release(page); | ||
1048 | page = NULL; | ||
1049 | } | ||
1014 | if (page || (sgp == SGP_READ && !swap.val)) { | 1050 | if (page || (sgp == SGP_READ && !swap.val)) { |
1015 | /* | ||
1016 | * Once we can get the page lock, it must be uptodate: | ||
1017 | * if there were an error in reading back from swap, | ||
1018 | * the page would not be inserted into the filecache. | ||
1019 | */ | ||
1020 | BUG_ON(page && !PageUptodate(page)); | ||
1021 | *pagep = page; | 1051 | *pagep = page; |
1022 | return 0; | 1052 | return 0; |
1023 | } | 1053 | } |
@@ -1114,9 +1144,18 @@ repeat: | |||
1114 | inode->i_blocks += BLOCKS_PER_PAGE; | 1144 | inode->i_blocks += BLOCKS_PER_PAGE; |
1115 | shmem_recalc_inode(inode); | 1145 | shmem_recalc_inode(inode); |
1116 | spin_unlock(&info->lock); | 1146 | spin_unlock(&info->lock); |
1147 | alloced = true; | ||
1117 | 1148 | ||
1118 | /* | 1149 | /* |
1119 | * Let SGP_WRITE caller clear ends if write does not fill page | 1150 | * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. |
1151 | */ | ||
1152 | if (sgp == SGP_FALLOC) | ||
1153 | sgp = SGP_WRITE; | ||
1154 | clear: | ||
1155 | /* | ||
1156 | * Let SGP_WRITE caller clear ends if write does not fill page; | ||
1157 | * but SGP_FALLOC on a page fallocated earlier must initialize | ||
1158 | * it now, lest undo on failure cancel our earlier guarantee. | ||
1120 | */ | 1159 | */ |
1121 | if (sgp != SGP_WRITE) { | 1160 | if (sgp != SGP_WRITE) { |
1122 | clear_highpage(page); | 1161 | clear_highpage(page); |
@@ -1128,10 +1167,13 @@ repeat: | |||
1128 | } | 1167 | } |
1129 | 1168 | ||
1130 | /* Perhaps the file has been truncated since we checked */ | 1169 | /* Perhaps the file has been truncated since we checked */ |
1131 | if (sgp != SGP_WRITE && | 1170 | if (sgp != SGP_WRITE && sgp != SGP_FALLOC && |
1132 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { | 1171 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { |
1133 | error = -EINVAL; | 1172 | error = -EINVAL; |
1134 | goto trunc; | 1173 | if (alloced) |
1174 | goto trunc; | ||
1175 | else | ||
1176 | goto failed; | ||
1135 | } | 1177 | } |
1136 | *pagep = page; | 1178 | *pagep = page; |
1137 | return 0; | 1179 | return 0; |
@@ -1140,6 +1182,7 @@ repeat: | |||
1140 | * Error recovery. | 1182 | * Error recovery. |
1141 | */ | 1183 | */ |
1142 | trunc: | 1184 | trunc: |
1185 | info = SHMEM_I(inode); | ||
1143 | ClearPageDirty(page); | 1186 | ClearPageDirty(page); |
1144 | delete_from_page_cache(page); | 1187 | delete_from_page_cache(page); |
1145 | spin_lock(&info->lock); | 1188 | spin_lock(&info->lock); |
@@ -1147,6 +1190,7 @@ trunc: | |||
1147 | inode->i_blocks -= BLOCKS_PER_PAGE; | 1190 | inode->i_blocks -= BLOCKS_PER_PAGE; |
1148 | spin_unlock(&info->lock); | 1191 | spin_unlock(&info->lock); |
1149 | decused: | 1192 | decused: |
1193 | sbinfo = SHMEM_SB(inode->i_sb); | ||
1150 | if (sbinfo->max_blocks) | 1194 | if (sbinfo->max_blocks) |
1151 | percpu_counter_add(&sbinfo->used_blocks, -1); | 1195 | percpu_counter_add(&sbinfo->used_blocks, -1); |
1152 | unacct: | 1196 | unacct: |
@@ -1645,25 +1689,20 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, | |||
1645 | if (signal_pending(current)) | 1689 | if (signal_pending(current)) |
1646 | error = -EINTR; | 1690 | error = -EINTR; |
1647 | else | 1691 | else |
1648 | error = shmem_getpage(inode, index, &page, SGP_WRITE, | 1692 | error = shmem_getpage(inode, index, &page, SGP_FALLOC, |
1649 | NULL); | 1693 | NULL); |
1650 | if (error) { | 1694 | if (error) { |
1651 | /* | 1695 | /* Remove the !PageUptodate pages we added */ |
1652 | * We really ought to free what we allocated so far, | 1696 | shmem_undo_range(inode, |
1653 | * but it would be wrong to free pages allocated | 1697 | (loff_t)start << PAGE_CACHE_SHIFT, |
1654 | * earlier, or already now in use: i_mutex does not | 1698 | (loff_t)index << PAGE_CACHE_SHIFT, true); |
1655 | * exclude all cases. We do not know what to free. | ||
1656 | */ | ||
1657 | goto ctime; | 1699 | goto ctime; |
1658 | } | 1700 | } |
1659 | 1701 | ||
1660 | if (!PageUptodate(page)) { | ||
1661 | clear_highpage(page); | ||
1662 | flush_dcache_page(page); | ||
1663 | SetPageUptodate(page); | ||
1664 | } | ||
1665 | /* | 1702 | /* |
1666 | * set_page_dirty so that memory pressure will swap rather | 1703 | * If !PageUptodate, leave it that way so that freeable pages |
1704 | * can be recognized if we need to rollback on error later. | ||
1705 | * But set_page_dirty so that memory pressure will swap rather | ||
1667 | * than free the pages we are allocating (and SGP_CACHE pages | 1706 | * than free the pages we are allocating (and SGP_CACHE pages |
1668 | * might still be clean: we now need to mark those dirty too). | 1707 | * might still be clean: we now need to mark those dirty too). |
1669 | */ | 1708 | */ |