aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c182
1 files changed, 174 insertions, 8 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9ea0cde3fa9e..b3a5213bc73e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode)
185 185
186 trace_ext4_evict_inode(inode); 186 trace_ext4_evict_inode(inode);
187 187
188 ext4_ioend_wait(inode);
189
190 if (inode->i_nlink) { 188 if (inode->i_nlink) {
191 /* 189 /*
192 * When journalling data dirty buffers are tracked only in the 190 * When journalling data dirty buffers are tracked only in the
@@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode)
207 * don't use page cache. 205 * don't use page cache.
208 */ 206 */
209 if (ext4_should_journal_data(inode) && 207 if (ext4_should_journal_data(inode) &&
210 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { 208 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
209 inode->i_ino != EXT4_JOURNAL_INO) {
211 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 210 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
212 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; 211 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
213 212
@@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode)
216 filemap_write_and_wait(&inode->i_data); 215 filemap_write_and_wait(&inode->i_data);
217 } 216 }
218 truncate_inode_pages(&inode->i_data, 0); 217 truncate_inode_pages(&inode->i_data, 0);
218 ext4_ioend_shutdown(inode);
219 goto no_delete; 219 goto no_delete;
220 } 220 }
221 221
@@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode)
225 if (ext4_should_order_data(inode)) 225 if (ext4_should_order_data(inode))
226 ext4_begin_ordered_truncate(inode, 0); 226 ext4_begin_ordered_truncate(inode, 0);
227 truncate_inode_pages(&inode->i_data, 0); 227 truncate_inode_pages(&inode->i_data, 0);
228 ext4_ioend_shutdown(inode);
228 229
229 if (is_bad_inode(inode)) 230 if (is_bad_inode(inode))
230 goto no_delete; 231 goto no_delete;
@@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
482 return num; 483 return num;
483} 484}
484 485
486#ifdef ES_AGGRESSIVE_TEST
487static void ext4_map_blocks_es_recheck(handle_t *handle,
488 struct inode *inode,
489 struct ext4_map_blocks *es_map,
490 struct ext4_map_blocks *map,
491 int flags)
492{
493 int retval;
494
495 map->m_flags = 0;
496 /*
497 * There is a race window that the result is not the same.
498 * e.g. xfstests #223 when dioread_nolock enables. The reason
499 * is that we lookup a block mapping in extent status tree with
500 * out taking i_data_sem. So at the time the unwritten extent
501 * could be converted.
502 */
503 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
504 down_read((&EXT4_I(inode)->i_data_sem));
505 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
506 retval = ext4_ext_map_blocks(handle, inode, map, flags &
507 EXT4_GET_BLOCKS_KEEP_SIZE);
508 } else {
509 retval = ext4_ind_map_blocks(handle, inode, map, flags &
510 EXT4_GET_BLOCKS_KEEP_SIZE);
511 }
512 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
513 up_read((&EXT4_I(inode)->i_data_sem));
514 /*
515 * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
516 * because it shouldn't be marked in es_map->m_flags.
517 */
518 map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
519
520 /*
521 * We don't check m_len because extent will be collpased in status
522 * tree. So the m_len might not equal.
523 */
524 if (es_map->m_lblk != map->m_lblk ||
525 es_map->m_flags != map->m_flags ||
526 es_map->m_pblk != map->m_pblk) {
527 printk("ES cache assertation failed for inode: %lu "
528 "es_cached ex [%d/%d/%llu/%x] != "
529 "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
530 inode->i_ino, es_map->m_lblk, es_map->m_len,
531 es_map->m_pblk, es_map->m_flags, map->m_lblk,
532 map->m_len, map->m_pblk, map->m_flags,
533 retval, flags);
534 }
535}
536#endif /* ES_AGGRESSIVE_TEST */
537
485/* 538/*
486 * The ext4_map_blocks() function tries to look up the requested blocks, 539 * The ext4_map_blocks() function tries to look up the requested blocks,
487 * and returns if the blocks are already mapped. 540 * and returns if the blocks are already mapped.
@@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
509{ 562{
510 struct extent_status es; 563 struct extent_status es;
511 int retval; 564 int retval;
565#ifdef ES_AGGRESSIVE_TEST
566 struct ext4_map_blocks orig_map;
567
568 memcpy(&orig_map, map, sizeof(*map));
569#endif
512 570
513 map->m_flags = 0; 571 map->m_flags = 0;
514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," 572 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
@@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
531 } else { 589 } else {
532 BUG_ON(1); 590 BUG_ON(1);
533 } 591 }
592#ifdef ES_AGGRESSIVE_TEST
593 ext4_map_blocks_es_recheck(handle, inode, map,
594 &orig_map, flags);
595#endif
534 goto found; 596 goto found;
535 } 597 }
536 598
@@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
551 int ret; 613 int ret;
552 unsigned long long status; 614 unsigned long long status;
553 615
616#ifdef ES_AGGRESSIVE_TEST
617 if (retval != map->m_len) {
618 printk("ES len assertation failed for inode: %lu "
619 "retval %d != map->m_len %d "
620 "in %s (lookup)\n", inode->i_ino, retval,
621 map->m_len, __func__);
622 }
623#endif
624
554 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 625 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
555 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 626 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
556 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 627 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -643,6 +714,24 @@ found:
643 int ret; 714 int ret;
644 unsigned long long status; 715 unsigned long long status;
645 716
717#ifdef ES_AGGRESSIVE_TEST
718 if (retval != map->m_len) {
719 printk("ES len assertation failed for inode: %lu "
720 "retval %d != map->m_len %d "
721 "in %s (allocation)\n", inode->i_ino, retval,
722 map->m_len, __func__);
723 }
724#endif
725
726 /*
727 * If the extent has been zeroed out, we don't need to update
728 * extent status tree.
729 */
730 if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
731 ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
732 if (ext4_es_is_written(&es))
733 goto has_zeroout;
734 }
646 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 735 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
647 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 736 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
648 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 737 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -655,6 +744,7 @@ found:
655 retval = ret; 744 retval = ret;
656 } 745 }
657 746
747has_zeroout:
658 up_write((&EXT4_I(inode)->i_data_sem)); 748 up_write((&EXT4_I(inode)->i_data_sem));
659 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 749 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
660 int ret = check_block_validity(inode, map); 750 int ret = check_block_validity(inode, map);
@@ -1216,6 +1306,55 @@ static int ext4_journalled_write_end(struct file *file,
1216} 1306}
1217 1307
1218/* 1308/*
1309 * Reserve a metadata for a single block located at lblock
1310 */
1311static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1312{
1313 int retries = 0;
1314 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1315 struct ext4_inode_info *ei = EXT4_I(inode);
1316 unsigned int md_needed;
1317 ext4_lblk_t save_last_lblock;
1318 int save_len;
1319
1320 /*
1321 * recalculate the amount of metadata blocks to reserve
1322 * in order to allocate nrblocks
1323 * worse case is one extent per block
1324 */
1325repeat:
1326 spin_lock(&ei->i_block_reservation_lock);
1327 /*
1328 * ext4_calc_metadata_amount() has side effects, which we have
1329 * to be prepared undo if we fail to claim space.
1330 */
1331 save_len = ei->i_da_metadata_calc_len;
1332 save_last_lblock = ei->i_da_metadata_calc_last_lblock;
1333 md_needed = EXT4_NUM_B2C(sbi,
1334 ext4_calc_metadata_amount(inode, lblock));
1335 trace_ext4_da_reserve_space(inode, md_needed);
1336
1337 /*
1338 * We do still charge estimated metadata to the sb though;
1339 * we cannot afford to run out of free blocks.
1340 */
1341 if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
1342 ei->i_da_metadata_calc_len = save_len;
1343 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1344 spin_unlock(&ei->i_block_reservation_lock);
1345 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1346 cond_resched();
1347 goto repeat;
1348 }
1349 return -ENOSPC;
1350 }
1351 ei->i_reserved_meta_blocks += md_needed;
1352 spin_unlock(&ei->i_block_reservation_lock);
1353
1354 return 0; /* success */
1355}
1356
1357/*
1219 * Reserve a single cluster located at lblock 1358 * Reserve a single cluster located at lblock
1220 */ 1359 */
1221static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1360static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1263,7 +1402,7 @@ repeat:
1263 ei->i_da_metadata_calc_last_lblock = save_last_lblock; 1402 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1264 spin_unlock(&ei->i_block_reservation_lock); 1403 spin_unlock(&ei->i_block_reservation_lock);
1265 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1404 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1266 yield(); 1405 cond_resched();
1267 goto repeat; 1406 goto repeat;
1268 } 1407 }
1269 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 1408 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
@@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1768 struct extent_status es; 1907 struct extent_status es;
1769 int retval; 1908 int retval;
1770 sector_t invalid_block = ~((sector_t) 0xffff); 1909 sector_t invalid_block = ~((sector_t) 0xffff);
1910#ifdef ES_AGGRESSIVE_TEST
1911 struct ext4_map_blocks orig_map;
1912
1913 memcpy(&orig_map, map, sizeof(*map));
1914#endif
1771 1915
1772 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) 1916 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1773 invalid_block = ~0; 1917 invalid_block = ~0;
@@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1809 else 1953 else
1810 BUG_ON(1); 1954 BUG_ON(1);
1811 1955
1956#ifdef ES_AGGRESSIVE_TEST
1957 ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
1958#endif
1812 return retval; 1959 return retval;
1813 } 1960 }
1814 1961
@@ -1843,8 +1990,11 @@ add_delayed:
1843 * XXX: __block_prepare_write() unmaps passed block, 1990 * XXX: __block_prepare_write() unmaps passed block,
1844 * is it OK? 1991 * is it OK?
1845 */ 1992 */
1846 /* If the block was allocated from previously allocated cluster, 1993 /*
1847 * then we dont need to reserve it again. */ 1994 * If the block was allocated from previously allocated cluster,
1995 * then we don't need to reserve it again. However we still need
1996 * to reserve metadata for every block we're going to write.
1997 */
1848 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { 1998 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1849 ret = ext4_da_reserve_space(inode, iblock); 1999 ret = ext4_da_reserve_space(inode, iblock);
1850 if (ret) { 2000 if (ret) {
@@ -1852,6 +2002,13 @@ add_delayed:
1852 retval = ret; 2002 retval = ret;
1853 goto out_unlock; 2003 goto out_unlock;
1854 } 2004 }
2005 } else {
2006 ret = ext4_da_reserve_metadata(inode, iblock);
2007 if (ret) {
2008 /* not enough space to reserve */
2009 retval = ret;
2010 goto out_unlock;
2011 }
1855 } 2012 }
1856 2013
1857 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 2014 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -1873,6 +2030,15 @@ add_delayed:
1873 int ret; 2030 int ret;
1874 unsigned long long status; 2031 unsigned long long status;
1875 2032
2033#ifdef ES_AGGRESSIVE_TEST
2034 if (retval != map->m_len) {
2035 printk("ES len assertation failed for inode: %lu "
2036 "retval %d != map->m_len %d "
2037 "in %s (lookup)\n", inode->i_ino, retval,
2038 map->m_len, __func__);
2039 }
2040#endif
2041
1876 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 2042 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1877 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 2043 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
1878 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 2044 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
2908 3074
2909 trace_ext4_releasepage(page); 3075 trace_ext4_releasepage(page);
2910 3076
2911 WARN_ON(PageChecked(page)); 3077 /* Page has dirty journalled data -> cannot release */
2912 if (!page_has_buffers(page)) 3078 if (PageChecked(page))
2913 return 0; 3079 return 0;
2914 if (journal) 3080 if (journal)
2915 return jbd2_journal_try_to_free_buffers(journal, page, wait); 3081 return jbd2_journal_try_to_free_buffers(journal, page, wait);