diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 182 |
1 files changed, 174 insertions, 8 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9ea0cde3fa9e..b3a5213bc73e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode) | |||
185 | 185 | ||
186 | trace_ext4_evict_inode(inode); | 186 | trace_ext4_evict_inode(inode); |
187 | 187 | ||
188 | ext4_ioend_wait(inode); | ||
189 | |||
190 | if (inode->i_nlink) { | 188 | if (inode->i_nlink) { |
191 | /* | 189 | /* |
192 | * When journalling data dirty buffers are tracked only in the | 190 | * When journalling data dirty buffers are tracked only in the |
@@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode) | |||
207 | * don't use page cache. | 205 | * don't use page cache. |
208 | */ | 206 | */ |
209 | if (ext4_should_journal_data(inode) && | 207 | if (ext4_should_journal_data(inode) && |
210 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | 208 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && |
209 | inode->i_ino != EXT4_JOURNAL_INO) { | ||
211 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 210 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
212 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; | 211 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; |
213 | 212 | ||
@@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode) | |||
216 | filemap_write_and_wait(&inode->i_data); | 215 | filemap_write_and_wait(&inode->i_data); |
217 | } | 216 | } |
218 | truncate_inode_pages(&inode->i_data, 0); | 217 | truncate_inode_pages(&inode->i_data, 0); |
218 | ext4_ioend_shutdown(inode); | ||
219 | goto no_delete; | 219 | goto no_delete; |
220 | } | 220 | } |
221 | 221 | ||
@@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode) | |||
225 | if (ext4_should_order_data(inode)) | 225 | if (ext4_should_order_data(inode)) |
226 | ext4_begin_ordered_truncate(inode, 0); | 226 | ext4_begin_ordered_truncate(inode, 0); |
227 | truncate_inode_pages(&inode->i_data, 0); | 227 | truncate_inode_pages(&inode->i_data, 0); |
228 | ext4_ioend_shutdown(inode); | ||
228 | 229 | ||
229 | if (is_bad_inode(inode)) | 230 | if (is_bad_inode(inode)) |
230 | goto no_delete; | 231 | goto no_delete; |
@@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
482 | return num; | 483 | return num; |
483 | } | 484 | } |
484 | 485 | ||
486 | #ifdef ES_AGGRESSIVE_TEST | ||
487 | static void ext4_map_blocks_es_recheck(handle_t *handle, | ||
488 | struct inode *inode, | ||
489 | struct ext4_map_blocks *es_map, | ||
490 | struct ext4_map_blocks *map, | ||
491 | int flags) | ||
492 | { | ||
493 | int retval; | ||
494 | |||
495 | map->m_flags = 0; | ||
496 | /* | ||
497 | * There is a race window that the result is not the same. | ||
498 | * e.g. xfstests #223 when dioread_nolock enables. The reason | ||
499 | * is that we lookup a block mapping in extent status tree with | ||
500 | * out taking i_data_sem. So at the time the unwritten extent | ||
501 | * could be converted. | ||
502 | */ | ||
503 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | ||
504 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
505 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
506 | retval = ext4_ext_map_blocks(handle, inode, map, flags & | ||
507 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
508 | } else { | ||
509 | retval = ext4_ind_map_blocks(handle, inode, map, flags & | ||
510 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
511 | } | ||
512 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | ||
513 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
514 | /* | ||
515 | * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag | ||
516 | * because it shouldn't be marked in es_map->m_flags. | ||
517 | */ | ||
518 | map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY); | ||
519 | |||
520 | /* | ||
521 | * We don't check m_len because extent will be collpased in status | ||
522 | * tree. So the m_len might not equal. | ||
523 | */ | ||
524 | if (es_map->m_lblk != map->m_lblk || | ||
525 | es_map->m_flags != map->m_flags || | ||
526 | es_map->m_pblk != map->m_pblk) { | ||
527 | printk("ES cache assertation failed for inode: %lu " | ||
528 | "es_cached ex [%d/%d/%llu/%x] != " | ||
529 | "found ex [%d/%d/%llu/%x] retval %d flags %x\n", | ||
530 | inode->i_ino, es_map->m_lblk, es_map->m_len, | ||
531 | es_map->m_pblk, es_map->m_flags, map->m_lblk, | ||
532 | map->m_len, map->m_pblk, map->m_flags, | ||
533 | retval, flags); | ||
534 | } | ||
535 | } | ||
536 | #endif /* ES_AGGRESSIVE_TEST */ | ||
537 | |||
485 | /* | 538 | /* |
486 | * The ext4_map_blocks() function tries to look up the requested blocks, | 539 | * The ext4_map_blocks() function tries to look up the requested blocks, |
487 | * and returns if the blocks are already mapped. | 540 | * and returns if the blocks are already mapped. |
@@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
509 | { | 562 | { |
510 | struct extent_status es; | 563 | struct extent_status es; |
511 | int retval; | 564 | int retval; |
565 | #ifdef ES_AGGRESSIVE_TEST | ||
566 | struct ext4_map_blocks orig_map; | ||
567 | |||
568 | memcpy(&orig_map, map, sizeof(*map)); | ||
569 | #endif | ||
512 | 570 | ||
513 | map->m_flags = 0; | 571 | map->m_flags = 0; |
514 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," | 572 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," |
@@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
531 | } else { | 589 | } else { |
532 | BUG_ON(1); | 590 | BUG_ON(1); |
533 | } | 591 | } |
592 | #ifdef ES_AGGRESSIVE_TEST | ||
593 | ext4_map_blocks_es_recheck(handle, inode, map, | ||
594 | &orig_map, flags); | ||
595 | #endif | ||
534 | goto found; | 596 | goto found; |
535 | } | 597 | } |
536 | 598 | ||
@@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
551 | int ret; | 613 | int ret; |
552 | unsigned long long status; | 614 | unsigned long long status; |
553 | 615 | ||
616 | #ifdef ES_AGGRESSIVE_TEST | ||
617 | if (retval != map->m_len) { | ||
618 | printk("ES len assertation failed for inode: %lu " | ||
619 | "retval %d != map->m_len %d " | ||
620 | "in %s (lookup)\n", inode->i_ino, retval, | ||
621 | map->m_len, __func__); | ||
622 | } | ||
623 | #endif | ||
624 | |||
554 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 625 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
555 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 626 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
556 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && | 627 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && |
@@ -643,6 +714,24 @@ found: | |||
643 | int ret; | 714 | int ret; |
644 | unsigned long long status; | 715 | unsigned long long status; |
645 | 716 | ||
717 | #ifdef ES_AGGRESSIVE_TEST | ||
718 | if (retval != map->m_len) { | ||
719 | printk("ES len assertation failed for inode: %lu " | ||
720 | "retval %d != map->m_len %d " | ||
721 | "in %s (allocation)\n", inode->i_ino, retval, | ||
722 | map->m_len, __func__); | ||
723 | } | ||
724 | #endif | ||
725 | |||
726 | /* | ||
727 | * If the extent has been zeroed out, we don't need to update | ||
728 | * extent status tree. | ||
729 | */ | ||
730 | if ((flags & EXT4_GET_BLOCKS_PRE_IO) && | ||
731 | ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | ||
732 | if (ext4_es_is_written(&es)) | ||
733 | goto has_zeroout; | ||
734 | } | ||
646 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 735 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
647 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 736 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
648 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && | 737 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && |
@@ -655,6 +744,7 @@ found: | |||
655 | retval = ret; | 744 | retval = ret; |
656 | } | 745 | } |
657 | 746 | ||
747 | has_zeroout: | ||
658 | up_write((&EXT4_I(inode)->i_data_sem)); | 748 | up_write((&EXT4_I(inode)->i_data_sem)); |
659 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 749 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
660 | int ret = check_block_validity(inode, map); | 750 | int ret = check_block_validity(inode, map); |
@@ -1216,6 +1306,55 @@ static int ext4_journalled_write_end(struct file *file, | |||
1216 | } | 1306 | } |
1217 | 1307 | ||
1218 | /* | 1308 | /* |
1309 | * Reserve a metadata for a single block located at lblock | ||
1310 | */ | ||
1311 | static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) | ||
1312 | { | ||
1313 | int retries = 0; | ||
1314 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1315 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1316 | unsigned int md_needed; | ||
1317 | ext4_lblk_t save_last_lblock; | ||
1318 | int save_len; | ||
1319 | |||
1320 | /* | ||
1321 | * recalculate the amount of metadata blocks to reserve | ||
1322 | * in order to allocate nrblocks | ||
1323 | * worse case is one extent per block | ||
1324 | */ | ||
1325 | repeat: | ||
1326 | spin_lock(&ei->i_block_reservation_lock); | ||
1327 | /* | ||
1328 | * ext4_calc_metadata_amount() has side effects, which we have | ||
1329 | * to be prepared undo if we fail to claim space. | ||
1330 | */ | ||
1331 | save_len = ei->i_da_metadata_calc_len; | ||
1332 | save_last_lblock = ei->i_da_metadata_calc_last_lblock; | ||
1333 | md_needed = EXT4_NUM_B2C(sbi, | ||
1334 | ext4_calc_metadata_amount(inode, lblock)); | ||
1335 | trace_ext4_da_reserve_space(inode, md_needed); | ||
1336 | |||
1337 | /* | ||
1338 | * We do still charge estimated metadata to the sb though; | ||
1339 | * we cannot afford to run out of free blocks. | ||
1340 | */ | ||
1341 | if (ext4_claim_free_clusters(sbi, md_needed, 0)) { | ||
1342 | ei->i_da_metadata_calc_len = save_len; | ||
1343 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; | ||
1344 | spin_unlock(&ei->i_block_reservation_lock); | ||
1345 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
1346 | cond_resched(); | ||
1347 | goto repeat; | ||
1348 | } | ||
1349 | return -ENOSPC; | ||
1350 | } | ||
1351 | ei->i_reserved_meta_blocks += md_needed; | ||
1352 | spin_unlock(&ei->i_block_reservation_lock); | ||
1353 | |||
1354 | return 0; /* success */ | ||
1355 | } | ||
1356 | |||
1357 | /* | ||
1219 | * Reserve a single cluster located at lblock | 1358 | * Reserve a single cluster located at lblock |
1220 | */ | 1359 | */ |
1221 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1360 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
@@ -1263,7 +1402,7 @@ repeat: | |||
1263 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; | 1402 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; |
1264 | spin_unlock(&ei->i_block_reservation_lock); | 1403 | spin_unlock(&ei->i_block_reservation_lock); |
1265 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1404 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1266 | yield(); | 1405 | cond_resched(); |
1267 | goto repeat; | 1406 | goto repeat; |
1268 | } | 1407 | } |
1269 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); | 1408 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
@@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1768 | struct extent_status es; | 1907 | struct extent_status es; |
1769 | int retval; | 1908 | int retval; |
1770 | sector_t invalid_block = ~((sector_t) 0xffff); | 1909 | sector_t invalid_block = ~((sector_t) 0xffff); |
1910 | #ifdef ES_AGGRESSIVE_TEST | ||
1911 | struct ext4_map_blocks orig_map; | ||
1912 | |||
1913 | memcpy(&orig_map, map, sizeof(*map)); | ||
1914 | #endif | ||
1771 | 1915 | ||
1772 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | 1916 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) |
1773 | invalid_block = ~0; | 1917 | invalid_block = ~0; |
@@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1809 | else | 1953 | else |
1810 | BUG_ON(1); | 1954 | BUG_ON(1); |
1811 | 1955 | ||
1956 | #ifdef ES_AGGRESSIVE_TEST | ||
1957 | ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0); | ||
1958 | #endif | ||
1812 | return retval; | 1959 | return retval; |
1813 | } | 1960 | } |
1814 | 1961 | ||
@@ -1843,8 +1990,11 @@ add_delayed: | |||
1843 | * XXX: __block_prepare_write() unmaps passed block, | 1990 | * XXX: __block_prepare_write() unmaps passed block, |
1844 | * is it OK? | 1991 | * is it OK? |
1845 | */ | 1992 | */ |
1846 | /* If the block was allocated from previously allocated cluster, | 1993 | /* |
1847 | * then we dont need to reserve it again. */ | 1994 | * If the block was allocated from previously allocated cluster, |
1995 | * then we don't need to reserve it again. However we still need | ||
1996 | * to reserve metadata for every block we're going to write. | ||
1997 | */ | ||
1848 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { | 1998 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { |
1849 | ret = ext4_da_reserve_space(inode, iblock); | 1999 | ret = ext4_da_reserve_space(inode, iblock); |
1850 | if (ret) { | 2000 | if (ret) { |
@@ -1852,6 +2002,13 @@ add_delayed: | |||
1852 | retval = ret; | 2002 | retval = ret; |
1853 | goto out_unlock; | 2003 | goto out_unlock; |
1854 | } | 2004 | } |
2005 | } else { | ||
2006 | ret = ext4_da_reserve_metadata(inode, iblock); | ||
2007 | if (ret) { | ||
2008 | /* not enough space to reserve */ | ||
2009 | retval = ret; | ||
2010 | goto out_unlock; | ||
2011 | } | ||
1855 | } | 2012 | } |
1856 | 2013 | ||
1857 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | 2014 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
@@ -1873,6 +2030,15 @@ add_delayed: | |||
1873 | int ret; | 2030 | int ret; |
1874 | unsigned long long status; | 2031 | unsigned long long status; |
1875 | 2032 | ||
2033 | #ifdef ES_AGGRESSIVE_TEST | ||
2034 | if (retval != map->m_len) { | ||
2035 | printk("ES len assertation failed for inode: %lu " | ||
2036 | "retval %d != map->m_len %d " | ||
2037 | "in %s (lookup)\n", inode->i_ino, retval, | ||
2038 | map->m_len, __func__); | ||
2039 | } | ||
2040 | #endif | ||
2041 | |||
1876 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 2042 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
1877 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 2043 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
1878 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | 2044 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
@@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
2908 | 3074 | ||
2909 | trace_ext4_releasepage(page); | 3075 | trace_ext4_releasepage(page); |
2910 | 3076 | ||
2911 | WARN_ON(PageChecked(page)); | 3077 | /* Page has dirty journalled data -> cannot release */ |
2912 | if (!page_has_buffers(page)) | 3078 | if (PageChecked(page)) |
2913 | return 0; | 3079 | return 0; |
2914 | if (journal) | 3080 | if (journal) |
2915 | return jbd2_journal_try_to_free_buffers(journal, page, wait); | 3081 | return jbd2_journal_try_to_free_buffers(journal, page, wait); |