aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c122
1 files changed, 105 insertions, 17 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index f484c276e994..af68b15a8fc1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -80,11 +80,12 @@ static struct vfsmount *shm_mnt;
80#define SHORT_SYMLINK_LEN 128 80#define SHORT_SYMLINK_LEN 128
81 81
82/* 82/*
83 * shmem_fallocate and shmem_writepage communicate via inode->i_private 83 * shmem_fallocate communicates with shmem_fault or shmem_writepage via
84 * (with i_mutex making sure that it has only one user at a time): 84 * inode->i_private (with i_mutex making sure that it has only one user at
85 * we would prefer not to enlarge the shmem inode just for that. 85 * a time): we would prefer not to enlarge the shmem inode just for that.
86 */ 86 */
87struct shmem_falloc { 87struct shmem_falloc {
88 wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
88 pgoff_t start; /* start of range currently being fallocated */ 89 pgoff_t start; /* start of range currently being fallocated */
89 pgoff_t next; /* the next page offset to be fallocated */ 90 pgoff_t next; /* the next page offset to be fallocated */
90 pgoff_t nr_falloced; /* how many new pages have been fallocated */ 91 pgoff_t nr_falloced; /* how many new pages have been fallocated */
@@ -467,23 +468,20 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
467 return; 468 return;
468 469
469 index = start; 470 index = start;
470 for ( ; ; ) { 471 while (index < end) {
471 cond_resched(); 472 cond_resched();
472 473
473 pvec.nr = find_get_entries(mapping, index, 474 pvec.nr = find_get_entries(mapping, index,
474 min(end - index, (pgoff_t)PAGEVEC_SIZE), 475 min(end - index, (pgoff_t)PAGEVEC_SIZE),
475 pvec.pages, indices); 476 pvec.pages, indices);
476 if (!pvec.nr) { 477 if (!pvec.nr) {
477 if (index == start || unfalloc) 478 /* If all gone or hole-punch or unfalloc, we're done */
479 if (index == start || end != -1)
478 break; 480 break;
481 /* But if truncating, restart to make sure all gone */
479 index = start; 482 index = start;
480 continue; 483 continue;
481 } 484 }
482 if ((index == start || unfalloc) && indices[0] >= end) {
483 pagevec_remove_exceptionals(&pvec);
484 pagevec_release(&pvec);
485 break;
486 }
487 mem_cgroup_uncharge_start(); 485 mem_cgroup_uncharge_start();
488 for (i = 0; i < pagevec_count(&pvec); i++) { 486 for (i = 0; i < pagevec_count(&pvec); i++) {
489 struct page *page = pvec.pages[i]; 487 struct page *page = pvec.pages[i];
@@ -495,8 +493,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
495 if (radix_tree_exceptional_entry(page)) { 493 if (radix_tree_exceptional_entry(page)) {
496 if (unfalloc) 494 if (unfalloc)
497 continue; 495 continue;
498 nr_swaps_freed += !shmem_free_swap(mapping, 496 if (shmem_free_swap(mapping, index, page)) {
499 index, page); 497 /* Swap was replaced by page: retry */
498 index--;
499 break;
500 }
501 nr_swaps_freed++;
500 continue; 502 continue;
501 } 503 }
502 504
@@ -505,6 +507,11 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
505 if (page->mapping == mapping) { 507 if (page->mapping == mapping) {
506 VM_BUG_ON_PAGE(PageWriteback(page), page); 508 VM_BUG_ON_PAGE(PageWriteback(page), page);
507 truncate_inode_page(mapping, page); 509 truncate_inode_page(mapping, page);
510 } else {
511 /* Page was replaced by swap: retry */
512 unlock_page(page);
513 index--;
514 break;
508 } 515 }
509 } 516 }
510 unlock_page(page); 517 unlock_page(page);
@@ -759,6 +766,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
759 spin_lock(&inode->i_lock); 766 spin_lock(&inode->i_lock);
760 shmem_falloc = inode->i_private; 767 shmem_falloc = inode->i_private;
761 if (shmem_falloc && 768 if (shmem_falloc &&
769 !shmem_falloc->waitq &&
762 index >= shmem_falloc->start && 770 index >= shmem_falloc->start &&
763 index < shmem_falloc->next) 771 index < shmem_falloc->next)
764 shmem_falloc->nr_unswapped++; 772 shmem_falloc->nr_unswapped++;
@@ -1027,6 +1035,9 @@ repeat:
1027 goto failed; 1035 goto failed;
1028 } 1036 }
1029 1037
1038 if (page && sgp == SGP_WRITE)
1039 mark_page_accessed(page);
1040
1030 /* fallocated page? */ 1041 /* fallocated page? */
1031 if (page && !PageUptodate(page)) { 1042 if (page && !PageUptodate(page)) {
1032 if (sgp != SGP_READ) 1043 if (sgp != SGP_READ)
@@ -1108,6 +1119,9 @@ repeat:
1108 shmem_recalc_inode(inode); 1119 shmem_recalc_inode(inode);
1109 spin_unlock(&info->lock); 1120 spin_unlock(&info->lock);
1110 1121
1122 if (sgp == SGP_WRITE)
1123 mark_page_accessed(page);
1124
1111 delete_from_swap_cache(page); 1125 delete_from_swap_cache(page);
1112 set_page_dirty(page); 1126 set_page_dirty(page);
1113 swap_free(swap); 1127 swap_free(swap);
@@ -1134,6 +1148,9 @@ repeat:
1134 1148
1135 __SetPageSwapBacked(page); 1149 __SetPageSwapBacked(page);
1136 __set_page_locked(page); 1150 __set_page_locked(page);
1151 if (sgp == SGP_WRITE)
1152 init_page_accessed(page);
1153
1137 error = mem_cgroup_charge_file(page, current->mm, 1154 error = mem_cgroup_charge_file(page, current->mm,
1138 gfp & GFP_RECLAIM_MASK); 1155 gfp & GFP_RECLAIM_MASK);
1139 if (error) 1156 if (error)
@@ -1233,6 +1250,64 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1233 int error; 1250 int error;
1234 int ret = VM_FAULT_LOCKED; 1251 int ret = VM_FAULT_LOCKED;
1235 1252
1253 /*
1254 * Trinity finds that probing a hole which tmpfs is punching can
1255 * prevent the hole-punch from ever completing: which in turn
1256 * locks writers out with its hold on i_mutex. So refrain from
1257 * faulting pages into the hole while it's being punched. Although
1258 * shmem_undo_range() does remove the additions, it may be unable to
1259 * keep up, as each new page needs its own unmap_mapping_range() call,
1260 * and the i_mmap tree grows ever slower to scan if new vmas are added.
1261 *
1262 * It does not matter if we sometimes reach this check just before the
1263 * hole-punch begins, so that one fault then races with the punch:
1264 * we just need to make racing faults a rare case.
1265 *
1266 * The implementation below would be much simpler if we just used a
1267 * standard mutex or completion: but we cannot take i_mutex in fault,
1268 * and bloating every shmem inode for this unlikely case would be sad.
1269 */
1270 if (unlikely(inode->i_private)) {
1271 struct shmem_falloc *shmem_falloc;
1272
1273 spin_lock(&inode->i_lock);
1274 shmem_falloc = inode->i_private;
1275 if (shmem_falloc &&
1276 shmem_falloc->waitq &&
1277 vmf->pgoff >= shmem_falloc->start &&
1278 vmf->pgoff < shmem_falloc->next) {
1279 wait_queue_head_t *shmem_falloc_waitq;
1280 DEFINE_WAIT(shmem_fault_wait);
1281
1282 ret = VM_FAULT_NOPAGE;
1283 if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
1284 !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
1285 /* It's polite to up mmap_sem if we can */
1286 up_read(&vma->vm_mm->mmap_sem);
1287 ret = VM_FAULT_RETRY;
1288 }
1289
1290 shmem_falloc_waitq = shmem_falloc->waitq;
1291 prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
1292 TASK_UNINTERRUPTIBLE);
1293 spin_unlock(&inode->i_lock);
1294 schedule();
1295
1296 /*
1297 * shmem_falloc_waitq points into the shmem_fallocate()
1298 * stack of the hole-punching task: shmem_falloc_waitq
1299 * is usually invalid by the time we reach here, but
1300 * finish_wait() does not dereference it in that case;
1301 * though i_lock needed lest racing with wake_up_all().
1302 */
1303 spin_lock(&inode->i_lock);
1304 finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
1305 spin_unlock(&inode->i_lock);
1306 return ret;
1307 }
1308 spin_unlock(&inode->i_lock);
1309 }
1310
1236 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); 1311 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1237 if (error) 1312 if (error)
1238 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); 1313 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -1372,13 +1447,9 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
1372 loff_t pos, unsigned len, unsigned flags, 1447 loff_t pos, unsigned len, unsigned flags,
1373 struct page **pagep, void **fsdata) 1448 struct page **pagep, void **fsdata)
1374{ 1449{
1375 int ret;
1376 struct inode *inode = mapping->host; 1450 struct inode *inode = mapping->host;
1377 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 1451 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1378 ret = shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); 1452 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1379 if (ret == 0 && *pagep)
1380 init_page_accessed(*pagep);
1381 return ret;
1382} 1453}
1383 1454
1384static int 1455static int
@@ -1724,18 +1795,34 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1724 pgoff_t start, index, end; 1795 pgoff_t start, index, end;
1725 int error; 1796 int error;
1726 1797
1798 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
1799 return -EOPNOTSUPP;
1800
1727 mutex_lock(&inode->i_mutex); 1801 mutex_lock(&inode->i_mutex);
1728 1802
1729 if (mode & FALLOC_FL_PUNCH_HOLE) { 1803 if (mode & FALLOC_FL_PUNCH_HOLE) {
1730 struct address_space *mapping = file->f_mapping; 1804 struct address_space *mapping = file->f_mapping;
1731 loff_t unmap_start = round_up(offset, PAGE_SIZE); 1805 loff_t unmap_start = round_up(offset, PAGE_SIZE);
1732 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; 1806 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
1807 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
1808
1809 shmem_falloc.waitq = &shmem_falloc_waitq;
1810 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
1811 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
1812 spin_lock(&inode->i_lock);
1813 inode->i_private = &shmem_falloc;
1814 spin_unlock(&inode->i_lock);
1733 1815
1734 if ((u64)unmap_end > (u64)unmap_start) 1816 if ((u64)unmap_end > (u64)unmap_start)
1735 unmap_mapping_range(mapping, unmap_start, 1817 unmap_mapping_range(mapping, unmap_start,
1736 1 + unmap_end - unmap_start, 0); 1818 1 + unmap_end - unmap_start, 0);
1737 shmem_truncate_range(inode, offset, offset + len - 1); 1819 shmem_truncate_range(inode, offset, offset + len - 1);
1738 /* No need to unmap again: hole-punching leaves COWed pages */ 1820 /* No need to unmap again: hole-punching leaves COWed pages */
1821
1822 spin_lock(&inode->i_lock);
1823 inode->i_private = NULL;
1824 wake_up_all(&shmem_falloc_waitq);
1825 spin_unlock(&inode->i_lock);
1739 error = 0; 1826 error = 0;
1740 goto out; 1827 goto out;
1741 } 1828 }
@@ -1753,6 +1840,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1753 goto out; 1840 goto out;
1754 } 1841 }
1755 1842
1843 shmem_falloc.waitq = NULL;
1756 shmem_falloc.start = start; 1844 shmem_falloc.start = start;
1757 shmem_falloc.next = start; 1845 shmem_falloc.next = start;
1758 shmem_falloc.nr_falloced = 0; 1846 shmem_falloc.nr_falloced = 0;