diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-01 13:57:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-01 13:57:49 -0400 |
commit | 395d73413c5656c6d7706ae91dcb441f9b7e3074 (patch) | |
tree | 7fadabe996f70d7918583fa2312d4fad19397fcb /fs/ext4/inode.c | |
parent | c226fd659fa7b6a7b038df5ae6856a68514bacde (diff) | |
parent | 06705bff9114531a997a7d0c2520bea0f2927410 (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (33 commits)
ext4: Regularize mount options
ext4: fix locking typo in mballoc which could cause soft lockup hangs
ext4: fix typo which causes a memory leak on error path
jbd2: Update locking coments
ext4: Rename pa_linear to pa_type
ext4: add checks of block references for non-extent inodes
ext4: Check for an valid i_mode when reading the inode from disk
ext4: Use WRITE_SYNC for commits which are caused by fsync()
ext4: Add auto_da_alloc mount option
ext4: Use struct flex_groups to calculate get_orlov_stats()
ext4: Use atomic_t's in struct flex_groups
ext4: remove /proc tuning knobs
ext4: Add sysfs support
ext4: Track lifetime disk writes
ext4: Fix discard of inode prealloc space with delayed allocation.
ext4: Automatically allocate delay allocated blocks on rename
ext4: Automatically allocate delay allocated blocks on close
ext4: add EXT4_IOC_ALLOC_DA_BLKS ioctl
ext4: Simplify delalloc code by removing mpage_da_writepages()
ext4: Save stack space by removing fake buffer heads
...
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 424 |
1 files changed, 263 insertions, 161 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index dd82ff39006..a2e7952bc5f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -371,6 +371,34 @@ static int ext4_block_to_path(struct inode *inode, | |||
371 | return n; | 371 | return n; |
372 | } | 372 | } |
373 | 373 | ||
374 | static int __ext4_check_blockref(const char *function, struct inode *inode, | ||
375 | unsigned int *p, unsigned int max) { | ||
376 | |||
377 | unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); | ||
378 | unsigned int *bref = p; | ||
379 | while (bref < p+max) { | ||
380 | if (unlikely(*bref >= maxblocks)) { | ||
381 | ext4_error(inode->i_sb, function, | ||
382 | "block reference %u >= max (%u) " | ||
383 | "in inode #%lu, offset=%d", | ||
384 | *bref, maxblocks, | ||
385 | inode->i_ino, (int)(bref-p)); | ||
386 | return -EIO; | ||
387 | } | ||
388 | bref++; | ||
389 | } | ||
390 | return 0; | ||
391 | } | ||
392 | |||
393 | |||
394 | #define ext4_check_indirect_blockref(inode, bh) \ | ||
395 | __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ | ||
396 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | ||
397 | |||
398 | #define ext4_check_inode_blockref(inode) \ | ||
399 | __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ | ||
400 | EXT4_NDIR_BLOCKS) | ||
401 | |||
374 | /** | 402 | /** |
375 | * ext4_get_branch - read the chain of indirect blocks leading to data | 403 | * ext4_get_branch - read the chain of indirect blocks leading to data |
376 | * @inode: inode in question | 404 | * @inode: inode in question |
@@ -415,9 +443,22 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, | |||
415 | if (!p->key) | 443 | if (!p->key) |
416 | goto no_block; | 444 | goto no_block; |
417 | while (--depth) { | 445 | while (--depth) { |
418 | bh = sb_bread(sb, le32_to_cpu(p->key)); | 446 | bh = sb_getblk(sb, le32_to_cpu(p->key)); |
419 | if (!bh) | 447 | if (unlikely(!bh)) |
420 | goto failure; | 448 | goto failure; |
449 | |||
450 | if (!bh_uptodate_or_lock(bh)) { | ||
451 | if (bh_submit_read(bh) < 0) { | ||
452 | put_bh(bh); | ||
453 | goto failure; | ||
454 | } | ||
455 | /* validate block references */ | ||
456 | if (ext4_check_indirect_blockref(inode, bh)) { | ||
457 | put_bh(bh); | ||
458 | goto failure; | ||
459 | } | ||
460 | } | ||
461 | |||
421 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); | 462 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); |
422 | /* Reader: end */ | 463 | /* Reader: end */ |
423 | if (!p->key) | 464 | if (!p->key) |
@@ -459,6 +500,8 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
459 | ext4_fsblk_t bg_start; | 500 | ext4_fsblk_t bg_start; |
460 | ext4_fsblk_t last_block; | 501 | ext4_fsblk_t last_block; |
461 | ext4_grpblk_t colour; | 502 | ext4_grpblk_t colour; |
503 | ext4_group_t block_group; | ||
504 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); | ||
462 | 505 | ||
463 | /* Try to find previous block */ | 506 | /* Try to find previous block */ |
464 | for (p = ind->p - 1; p >= start; p--) { | 507 | for (p = ind->p - 1; p >= start; p--) { |
@@ -474,9 +517,22 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
474 | * It is going to be referred to from the inode itself? OK, just put it | 517 | * It is going to be referred to from the inode itself? OK, just put it |
475 | * into the same cylinder group then. | 518 | * into the same cylinder group then. |
476 | */ | 519 | */ |
477 | bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); | 520 | block_group = ei->i_block_group; |
521 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | ||
522 | block_group &= ~(flex_size-1); | ||
523 | if (S_ISREG(inode->i_mode)) | ||
524 | block_group++; | ||
525 | } | ||
526 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); | ||
478 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | 527 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; |
479 | 528 | ||
529 | /* | ||
530 | * If we are doing delayed allocation, we don't need take | ||
531 | * colour into account. | ||
532 | */ | ||
533 | if (test_opt(inode->i_sb, DELALLOC)) | ||
534 | return bg_start; | ||
535 | |||
480 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) | 536 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) |
481 | colour = (current->pid % 16) * | 537 | colour = (current->pid % 16) * |
482 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); | 538 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); |
@@ -1052,9 +1108,16 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1052 | /* | 1108 | /* |
1053 | * free those over-booking quota for metadata blocks | 1109 | * free those over-booking quota for metadata blocks |
1054 | */ | 1110 | */ |
1055 | |||
1056 | if (mdb_free) | 1111 | if (mdb_free) |
1057 | vfs_dq_release_reservation_block(inode, mdb_free); | 1112 | vfs_dq_release_reservation_block(inode, mdb_free); |
1113 | |||
1114 | /* | ||
1115 | * If we have done all the pending block allocations and if | ||
1116 | * there aren't any writers on the inode, we can discard the | ||
1117 | * inode's preallocations. | ||
1118 | */ | ||
1119 | if (!total && (atomic_read(&inode->i_writecount) == 0)) | ||
1120 | ext4_discard_preallocations(inode); | ||
1058 | } | 1121 | } |
1059 | 1122 | ||
1060 | /* | 1123 | /* |
@@ -1688,9 +1751,10 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1688 | 1751 | ||
1689 | struct mpage_da_data { | 1752 | struct mpage_da_data { |
1690 | struct inode *inode; | 1753 | struct inode *inode; |
1691 | struct buffer_head lbh; /* extent of blocks */ | 1754 | sector_t b_blocknr; /* start block number of extent */ |
1755 | size_t b_size; /* size of extent */ | ||
1756 | unsigned long b_state; /* state of the extent */ | ||
1692 | unsigned long first_page, next_page; /* extent of pages */ | 1757 | unsigned long first_page, next_page; /* extent of pages */ |
1693 | get_block_t *get_block; | ||
1694 | struct writeback_control *wbc; | 1758 | struct writeback_control *wbc; |
1695 | int io_done; | 1759 | int io_done; |
1696 | int pages_written; | 1760 | int pages_written; |
@@ -1704,7 +1768,6 @@ struct mpage_da_data { | |||
1704 | * @mpd->inode: inode | 1768 | * @mpd->inode: inode |
1705 | * @mpd->first_page: first page of the extent | 1769 | * @mpd->first_page: first page of the extent |
1706 | * @mpd->next_page: page after the last page of the extent | 1770 | * @mpd->next_page: page after the last page of the extent |
1707 | * @mpd->get_block: the filesystem's block mapper function | ||
1708 | * | 1771 | * |
1709 | * By the time mpage_da_submit_io() is called we expect all blocks | 1772 | * By the time mpage_da_submit_io() is called we expect all blocks |
1710 | * to be allocated. this may be wrong if allocation failed. | 1773 | * to be allocated. this may be wrong if allocation failed. |
@@ -1724,7 +1787,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1724 | /* | 1787 | /* |
1725 | * We need to start from the first_page to the next_page - 1 | 1788 | * We need to start from the first_page to the next_page - 1 |
1726 | * to make sure we also write the mapped dirty buffer_heads. | 1789 | * to make sure we also write the mapped dirty buffer_heads. |
1727 | * If we look at mpd->lbh.b_blocknr we would only be looking | 1790 | * If we look at mpd->b_blocknr we would only be looking |
1728 | * at the currently mapped buffer_heads. | 1791 | * at the currently mapped buffer_heads. |
1729 | */ | 1792 | */ |
1730 | index = mpd->first_page; | 1793 | index = mpd->first_page; |
@@ -1914,68 +1977,111 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1914 | return; | 1977 | return; |
1915 | } | 1978 | } |
1916 | 1979 | ||
1980 | #define EXT4_DELALLOC_RSVED 1 | ||
1981 | static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||
1982 | struct buffer_head *bh_result, int create) | ||
1983 | { | ||
1984 | int ret; | ||
1985 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
1986 | loff_t disksize = EXT4_I(inode)->i_disksize; | ||
1987 | handle_t *handle = NULL; | ||
1988 | |||
1989 | handle = ext4_journal_current_handle(); | ||
1990 | BUG_ON(!handle); | ||
1991 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
1992 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
1993 | if (ret <= 0) | ||
1994 | return ret; | ||
1995 | |||
1996 | bh_result->b_size = (ret << inode->i_blkbits); | ||
1997 | |||
1998 | if (ext4_should_order_data(inode)) { | ||
1999 | int retval; | ||
2000 | retval = ext4_jbd2_file_inode(handle, inode); | ||
2001 | if (retval) | ||
2002 | /* | ||
2003 | * Failed to add inode for ordered mode. Don't | ||
2004 | * update file size | ||
2005 | */ | ||
2006 | return retval; | ||
2007 | } | ||
2008 | |||
2009 | /* | ||
2010 | * Update on-disk size along with block allocation we don't | ||
2011 | * use 'extend_disksize' as size may change within already | ||
2012 | * allocated block -bzzz | ||
2013 | */ | ||
2014 | disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||
2015 | if (disksize > i_size_read(inode)) | ||
2016 | disksize = i_size_read(inode); | ||
2017 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2018 | ext4_update_i_disksize(inode, disksize); | ||
2019 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2020 | return ret; | ||
2021 | } | ||
2022 | return 0; | ||
2023 | } | ||
2024 | |||
1917 | /* | 2025 | /* |
1918 | * mpage_da_map_blocks - go through given space | 2026 | * mpage_da_map_blocks - go through given space |
1919 | * | 2027 | * |
1920 | * @mpd->lbh - bh describing space | 2028 | * @mpd - bh describing space |
1921 | * @mpd->get_block - the filesystem's block mapper function | ||
1922 | * | 2029 | * |
1923 | * The function skips space we know is already mapped to disk blocks. | 2030 | * The function skips space we know is already mapped to disk blocks. |
1924 | * | 2031 | * |
1925 | */ | 2032 | */ |
1926 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2033 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
1927 | { | 2034 | { |
1928 | int err = 0; | 2035 | int err = 0; |
1929 | struct buffer_head new; | 2036 | struct buffer_head new; |
1930 | struct buffer_head *lbh = &mpd->lbh; | ||
1931 | sector_t next; | 2037 | sector_t next; |
1932 | 2038 | ||
1933 | /* | 2039 | /* |
1934 | * We consider only non-mapped and non-allocated blocks | 2040 | * We consider only non-mapped and non-allocated blocks |
1935 | */ | 2041 | */ |
1936 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 2042 | if ((mpd->b_state & (1 << BH_Mapped)) && |
2043 | !(mpd->b_state & (1 << BH_Delay))) | ||
1937 | return 0; | 2044 | return 0; |
1938 | new.b_state = lbh->b_state; | 2045 | new.b_state = mpd->b_state; |
1939 | new.b_blocknr = 0; | 2046 | new.b_blocknr = 0; |
1940 | new.b_size = lbh->b_size; | 2047 | new.b_size = mpd->b_size; |
1941 | next = lbh->b_blocknr; | 2048 | next = mpd->b_blocknr; |
1942 | /* | 2049 | /* |
1943 | * If we didn't accumulate anything | 2050 | * If we didn't accumulate anything |
1944 | * to write simply return | 2051 | * to write simply return |
1945 | */ | 2052 | */ |
1946 | if (!new.b_size) | 2053 | if (!new.b_size) |
1947 | return 0; | 2054 | return 0; |
1948 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1949 | if (err) { | ||
1950 | 2055 | ||
1951 | /* If get block returns with error | 2056 | err = ext4_da_get_block_write(mpd->inode, next, &new, 1); |
1952 | * we simply return. Later writepage | 2057 | if (err) { |
1953 | * will redirty the page and writepages | 2058 | /* |
1954 | * will find the dirty page again | 2059 | * If get block returns with error we simply |
2060 | * return. Later writepage will redirty the page and | ||
2061 | * writepages will find the dirty page again | ||
1955 | */ | 2062 | */ |
1956 | if (err == -EAGAIN) | 2063 | if (err == -EAGAIN) |
1957 | return 0; | 2064 | return 0; |
1958 | 2065 | ||
1959 | if (err == -ENOSPC && | 2066 | if (err == -ENOSPC && |
1960 | ext4_count_free_blocks(mpd->inode->i_sb)) { | 2067 | ext4_count_free_blocks(mpd->inode->i_sb)) { |
1961 | mpd->retval = err; | 2068 | mpd->retval = err; |
1962 | return 0; | 2069 | return 0; |
1963 | } | 2070 | } |
1964 | 2071 | ||
1965 | /* | 2072 | /* |
1966 | * get block failure will cause us | 2073 | * get block failure will cause us to loop in |
1967 | * to loop in writepages. Because | 2074 | * writepages, because a_ops->writepage won't be able |
1968 | * a_ops->writepage won't be able to | 2075 | * to make progress. The page will be redirtied by |
1969 | * make progress. The page will be redirtied | 2076 | * writepage and writepages will again try to write |
1970 | * by writepage and writepages will again | 2077 | * the same. |
1971 | * try to write the same. | ||
1972 | */ | 2078 | */ |
1973 | printk(KERN_EMERG "%s block allocation failed for inode %lu " | 2079 | printk(KERN_EMERG "%s block allocation failed for inode %lu " |
1974 | "at logical offset %llu with max blocks " | 2080 | "at logical offset %llu with max blocks " |
1975 | "%zd with error %d\n", | 2081 | "%zd with error %d\n", |
1976 | __func__, mpd->inode->i_ino, | 2082 | __func__, mpd->inode->i_ino, |
1977 | (unsigned long long)next, | 2083 | (unsigned long long)next, |
1978 | lbh->b_size >> mpd->inode->i_blkbits, err); | 2084 | mpd->b_size >> mpd->inode->i_blkbits, err); |
1979 | printk(KERN_EMERG "This should not happen.!! " | 2085 | printk(KERN_EMERG "This should not happen.!! " |
1980 | "Data will be lost\n"); | 2086 | "Data will be lost\n"); |
1981 | if (err == -ENOSPC) { | 2087 | if (err == -ENOSPC) { |
@@ -1983,7 +2089,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1983 | } | 2089 | } |
1984 | /* invlaidate all the pages */ | 2090 | /* invlaidate all the pages */ |
1985 | ext4_da_block_invalidatepages(mpd, next, | 2091 | ext4_da_block_invalidatepages(mpd, next, |
1986 | lbh->b_size >> mpd->inode->i_blkbits); | 2092 | mpd->b_size >> mpd->inode->i_blkbits); |
1987 | return err; | 2093 | return err; |
1988 | } | 2094 | } |
1989 | BUG_ON(new.b_size == 0); | 2095 | BUG_ON(new.b_size == 0); |
@@ -1995,7 +2101,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1995 | * If blocks are delayed marked, we need to | 2101 | * If blocks are delayed marked, we need to |
1996 | * put actual blocknr and drop delayed bit | 2102 | * put actual blocknr and drop delayed bit |
1997 | */ | 2103 | */ |
1998 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | 2104 | if ((mpd->b_state & (1 << BH_Delay)) || |
2105 | (mpd->b_state & (1 << BH_Unwritten))) | ||
1999 | mpage_put_bnr_to_bhs(mpd, next, &new); | 2106 | mpage_put_bnr_to_bhs(mpd, next, &new); |
2000 | 2107 | ||
2001 | return 0; | 2108 | return 0; |
@@ -2014,12 +2121,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2014 | * the function is used to collect contig. blocks in same state | 2121 | * the function is used to collect contig. blocks in same state |
2015 | */ | 2122 | */ |
2016 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | 2123 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, |
2017 | sector_t logical, struct buffer_head *bh) | 2124 | sector_t logical, size_t b_size, |
2125 | unsigned long b_state) | ||
2018 | { | 2126 | { |
2019 | sector_t next; | 2127 | sector_t next; |
2020 | size_t b_size = bh->b_size; | 2128 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; |
2021 | struct buffer_head *lbh = &mpd->lbh; | ||
2022 | int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; | ||
2023 | 2129 | ||
2024 | /* check if thereserved journal credits might overflow */ | 2130 | /* check if thereserved journal credits might overflow */ |
2025 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | 2131 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { |
@@ -2046,19 +2152,19 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
2046 | /* | 2152 | /* |
2047 | * First block in the extent | 2153 | * First block in the extent |
2048 | */ | 2154 | */ |
2049 | if (lbh->b_size == 0) { | 2155 | if (mpd->b_size == 0) { |
2050 | lbh->b_blocknr = logical; | 2156 | mpd->b_blocknr = logical; |
2051 | lbh->b_size = b_size; | 2157 | mpd->b_size = b_size; |
2052 | lbh->b_state = bh->b_state & BH_FLAGS; | 2158 | mpd->b_state = b_state & BH_FLAGS; |
2053 | return; | 2159 | return; |
2054 | } | 2160 | } |
2055 | 2161 | ||
2056 | next = lbh->b_blocknr + nrblocks; | 2162 | next = mpd->b_blocknr + nrblocks; |
2057 | /* | 2163 | /* |
2058 | * Can we merge the block to our big extent? | 2164 | * Can we merge the block to our big extent? |
2059 | */ | 2165 | */ |
2060 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | 2166 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { |
2061 | lbh->b_size += b_size; | 2167 | mpd->b_size += b_size; |
2062 | return; | 2168 | return; |
2063 | } | 2169 | } |
2064 | 2170 | ||
@@ -2087,7 +2193,7 @@ static int __mpage_da_writepage(struct page *page, | |||
2087 | { | 2193 | { |
2088 | struct mpage_da_data *mpd = data; | 2194 | struct mpage_da_data *mpd = data; |
2089 | struct inode *inode = mpd->inode; | 2195 | struct inode *inode = mpd->inode; |
2090 | struct buffer_head *bh, *head, fake; | 2196 | struct buffer_head *bh, *head; |
2091 | sector_t logical; | 2197 | sector_t logical; |
2092 | 2198 | ||
2093 | if (mpd->io_done) { | 2199 | if (mpd->io_done) { |
@@ -2129,9 +2235,9 @@ static int __mpage_da_writepage(struct page *page, | |||
2129 | /* | 2235 | /* |
2130 | * ... and blocks | 2236 | * ... and blocks |
2131 | */ | 2237 | */ |
2132 | mpd->lbh.b_size = 0; | 2238 | mpd->b_size = 0; |
2133 | mpd->lbh.b_state = 0; | 2239 | mpd->b_state = 0; |
2134 | mpd->lbh.b_blocknr = 0; | 2240 | mpd->b_blocknr = 0; |
2135 | } | 2241 | } |
2136 | 2242 | ||
2137 | mpd->next_page = page->index + 1; | 2243 | mpd->next_page = page->index + 1; |
@@ -2139,16 +2245,8 @@ static int __mpage_da_writepage(struct page *page, | |||
2139 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2245 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2140 | 2246 | ||
2141 | if (!page_has_buffers(page)) { | 2247 | if (!page_has_buffers(page)) { |
2142 | /* | 2248 | mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE, |
2143 | * There is no attached buffer heads yet (mmap?) | 2249 | (1 << BH_Dirty) | (1 << BH_Uptodate)); |
2144 | * we treat the page asfull of dirty blocks | ||
2145 | */ | ||
2146 | bh = &fake; | ||
2147 | bh->b_size = PAGE_CACHE_SIZE; | ||
2148 | bh->b_state = 0; | ||
2149 | set_buffer_dirty(bh); | ||
2150 | set_buffer_uptodate(bh); | ||
2151 | mpage_add_bh_to_extent(mpd, logical, bh); | ||
2152 | if (mpd->io_done) | 2250 | if (mpd->io_done) |
2153 | return MPAGE_DA_EXTENT_TAIL; | 2251 | return MPAGE_DA_EXTENT_TAIL; |
2154 | } else { | 2252 | } else { |
@@ -2166,8 +2264,10 @@ static int __mpage_da_writepage(struct page *page, | |||
2166 | * with the page in ext4_da_writepage | 2264 | * with the page in ext4_da_writepage |
2167 | */ | 2265 | */ |
2168 | if (buffer_dirty(bh) && | 2266 | if (buffer_dirty(bh) && |
2169 | (!buffer_mapped(bh) || buffer_delay(bh))) { | 2267 | (!buffer_mapped(bh) || buffer_delay(bh))) { |
2170 | mpage_add_bh_to_extent(mpd, logical, bh); | 2268 | mpage_add_bh_to_extent(mpd, logical, |
2269 | bh->b_size, | ||
2270 | bh->b_state); | ||
2171 | if (mpd->io_done) | 2271 | if (mpd->io_done) |
2172 | return MPAGE_DA_EXTENT_TAIL; | 2272 | return MPAGE_DA_EXTENT_TAIL; |
2173 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | 2273 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { |
@@ -2179,9 +2279,8 @@ static int __mpage_da_writepage(struct page *page, | |||
2179 | * unmapped buffer_head later we need to | 2279 | * unmapped buffer_head later we need to |
2180 | * use the b_state flag of that buffer_head. | 2280 | * use the b_state flag of that buffer_head. |
2181 | */ | 2281 | */ |
2182 | if (mpd->lbh.b_size == 0) | 2282 | if (mpd->b_size == 0) |
2183 | mpd->lbh.b_state = | 2283 | mpd->b_state = bh->b_state & BH_FLAGS; |
2184 | bh->b_state & BH_FLAGS; | ||
2185 | } | 2284 | } |
2186 | logical++; | 2285 | logical++; |
2187 | } while ((bh = bh->b_this_page) != head); | 2286 | } while ((bh = bh->b_this_page) != head); |
@@ -2191,51 +2290,6 @@ static int __mpage_da_writepage(struct page *page, | |||
2191 | } | 2290 | } |
2192 | 2291 | ||
2193 | /* | 2292 | /* |
2194 | * mpage_da_writepages - walk the list of dirty pages of the given | ||
2195 | * address space, allocates non-allocated blocks, maps newly-allocated | ||
2196 | * blocks to existing bhs and issue IO them | ||
2197 | * | ||
2198 | * @mapping: address space structure to write | ||
2199 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write | ||
2200 | * @get_block: the filesystem's block mapper function. | ||
2201 | * | ||
2202 | * This is a library function, which implements the writepages() | ||
2203 | * address_space_operation. | ||
2204 | */ | ||
2205 | static int mpage_da_writepages(struct address_space *mapping, | ||
2206 | struct writeback_control *wbc, | ||
2207 | struct mpage_da_data *mpd) | ||
2208 | { | ||
2209 | int ret; | ||
2210 | |||
2211 | if (!mpd->get_block) | ||
2212 | return generic_writepages(mapping, wbc); | ||
2213 | |||
2214 | mpd->lbh.b_size = 0; | ||
2215 | mpd->lbh.b_state = 0; | ||
2216 | mpd->lbh.b_blocknr = 0; | ||
2217 | mpd->first_page = 0; | ||
2218 | mpd->next_page = 0; | ||
2219 | mpd->io_done = 0; | ||
2220 | mpd->pages_written = 0; | ||
2221 | mpd->retval = 0; | ||
2222 | |||
2223 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); | ||
2224 | /* | ||
2225 | * Handle last extent of pages | ||
2226 | */ | ||
2227 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { | ||
2228 | if (mpage_da_map_blocks(mpd) == 0) | ||
2229 | mpage_da_submit_io(mpd); | ||
2230 | |||
2231 | mpd->io_done = 1; | ||
2232 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2233 | } | ||
2234 | wbc->nr_to_write -= mpd->pages_written; | ||
2235 | return ret; | ||
2236 | } | ||
2237 | |||
2238 | /* | ||
2239 | * this is a special callback for ->write_begin() only | 2293 | * this is a special callback for ->write_begin() only |
2240 | * it's intention is to return mapped block or reserve space | 2294 | * it's intention is to return mapped block or reserve space |
2241 | */ | 2295 | */ |
@@ -2274,51 +2328,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2274 | 2328 | ||
2275 | return ret; | 2329 | return ret; |
2276 | } | 2330 | } |
2277 | #define EXT4_DELALLOC_RSVED 1 | ||
2278 | static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||
2279 | struct buffer_head *bh_result, int create) | ||
2280 | { | ||
2281 | int ret; | ||
2282 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
2283 | loff_t disksize = EXT4_I(inode)->i_disksize; | ||
2284 | handle_t *handle = NULL; | ||
2285 | |||
2286 | handle = ext4_journal_current_handle(); | ||
2287 | BUG_ON(!handle); | ||
2288 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2289 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
2290 | if (ret > 0) { | ||
2291 | |||
2292 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2293 | |||
2294 | if (ext4_should_order_data(inode)) { | ||
2295 | int retval; | ||
2296 | retval = ext4_jbd2_file_inode(handle, inode); | ||
2297 | if (retval) | ||
2298 | /* | ||
2299 | * Failed to add inode for ordered | ||
2300 | * mode. Don't update file size | ||
2301 | */ | ||
2302 | return retval; | ||
2303 | } | ||
2304 | |||
2305 | /* | ||
2306 | * Update on-disk size along with block allocation | ||
2307 | * we don't use 'extend_disksize' as size may change | ||
2308 | * within already allocated block -bzzz | ||
2309 | */ | ||
2310 | disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||
2311 | if (disksize > i_size_read(inode)) | ||
2312 | disksize = i_size_read(inode); | ||
2313 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2314 | ext4_update_i_disksize(inode, disksize); | ||
2315 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2316 | return ret; | ||
2317 | } | ||
2318 | ret = 0; | ||
2319 | } | ||
2320 | return ret; | ||
2321 | } | ||
2322 | 2331 | ||
2323 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | 2332 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) |
2324 | { | 2333 | { |
@@ -2569,8 +2578,38 @@ retry: | |||
2569 | dump_stack(); | 2578 | dump_stack(); |
2570 | goto out_writepages; | 2579 | goto out_writepages; |
2571 | } | 2580 | } |
2572 | mpd.get_block = ext4_da_get_block_write; | 2581 | |
2573 | ret = mpage_da_writepages(mapping, wbc, &mpd); | 2582 | /* |
2583 | * Now call __mpage_da_writepage to find the next | ||
2584 | * contiguous region of logical blocks that need | ||
2585 | * blocks to be allocated by ext4. We don't actually | ||
2586 | * submit the blocks for I/O here, even though | ||
2587 | * write_cache_pages thinks it will, and will set the | ||
2588 | * pages as clean for write before calling | ||
2589 | * __mpage_da_writepage(). | ||
2590 | */ | ||
2591 | mpd.b_size = 0; | ||
2592 | mpd.b_state = 0; | ||
2593 | mpd.b_blocknr = 0; | ||
2594 | mpd.first_page = 0; | ||
2595 | mpd.next_page = 0; | ||
2596 | mpd.io_done = 0; | ||
2597 | mpd.pages_written = 0; | ||
2598 | mpd.retval = 0; | ||
2599 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, | ||
2600 | &mpd); | ||
2601 | /* | ||
2602 | * If we have a contigous extent of pages and we | ||
2603 | * haven't done the I/O yet, map the blocks and submit | ||
2604 | * them for I/O. | ||
2605 | */ | ||
2606 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | ||
2607 | if (mpage_da_map_blocks(&mpd) == 0) | ||
2608 | mpage_da_submit_io(&mpd); | ||
2609 | mpd.io_done = 1; | ||
2610 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2611 | } | ||
2612 | wbc->nr_to_write -= mpd.pages_written; | ||
2574 | 2613 | ||
2575 | ext4_journal_stop(handle); | 2614 | ext4_journal_stop(handle); |
2576 | 2615 | ||
@@ -2846,6 +2885,48 @@ out: | |||
2846 | return; | 2885 | return; |
2847 | } | 2886 | } |
2848 | 2887 | ||
2888 | /* | ||
2889 | * Force all delayed allocation blocks to be allocated for a given inode. | ||
2890 | */ | ||
2891 | int ext4_alloc_da_blocks(struct inode *inode) | ||
2892 | { | ||
2893 | if (!EXT4_I(inode)->i_reserved_data_blocks && | ||
2894 | !EXT4_I(inode)->i_reserved_meta_blocks) | ||
2895 | return 0; | ||
2896 | |||
2897 | /* | ||
2898 | * We do something simple for now. The filemap_flush() will | ||
2899 | * also start triggering a write of the data blocks, which is | ||
2900 | * not strictly speaking necessary (and for users of | ||
2901 | * laptop_mode, not even desirable). However, to do otherwise | ||
2902 | * would require replicating code paths in: | ||
2903 | * | ||
2904 | * ext4_da_writepages() -> | ||
2905 | * write_cache_pages() ---> (via passed in callback function) | ||
2906 | * __mpage_da_writepage() --> | ||
2907 | * mpage_add_bh_to_extent() | ||
2908 | * mpage_da_map_blocks() | ||
2909 | * | ||
2910 | * The problem is that write_cache_pages(), located in | ||
2911 | * mm/page-writeback.c, marks pages clean in preparation for | ||
2912 | * doing I/O, which is not desirable if we're not planning on | ||
2913 | * doing I/O at all. | ||
2914 | * | ||
2915 | * We could call write_cache_pages(), and then redirty all of | ||
2916 | * the pages by calling redirty_page_for_writeback() but that | ||
2917 | * would be ugly in the extreme. So instead we would need to | ||
2918 | * replicate parts of the code in the above functions, | ||
2919 | * simplifying them becuase we wouldn't actually intend to | ||
2920 | * write out the pages, but rather only collect contiguous | ||
2921 | * logical block extents, call the multi-block allocator, and | ||
2922 | * then update the buffer heads with the block allocations. | ||
2923 | * | ||
2924 | * For now, though, we'll cheat by calling filemap_flush(), | ||
2925 | * which will map the blocks, and start the I/O, but not | ||
2926 | * actually wait for the I/O to complete. | ||
2927 | */ | ||
2928 | return filemap_flush(inode->i_mapping); | ||
2929 | } | ||
2849 | 2930 | ||
2850 | /* | 2931 | /* |
2851 | * bmap() is special. It gets used by applications such as lilo and by | 2932 | * bmap() is special. It gets used by applications such as lilo and by |
@@ -3868,6 +3949,9 @@ void ext4_truncate(struct inode *inode) | |||
3868 | if (!ext4_can_truncate(inode)) | 3949 | if (!ext4_can_truncate(inode)) |
3869 | return; | 3950 | return; |
3870 | 3951 | ||
3952 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | ||
3953 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | ||
3954 | |||
3871 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3955 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
3872 | ext4_ext_truncate(inode); | 3956 | ext4_ext_truncate(inode); |
3873 | return; | 3957 | return; |
@@ -4110,12 +4194,7 @@ make_io: | |||
4110 | unsigned num; | 4194 | unsigned num; |
4111 | 4195 | ||
4112 | table = ext4_inode_table(sb, gdp); | 4196 | table = ext4_inode_table(sb, gdp); |
4113 | /* Make sure s_inode_readahead_blks is a power of 2 */ | 4197 | /* s_inode_readahead_blks is always a power of 2 */ |
4114 | while (EXT4_SB(sb)->s_inode_readahead_blks & | ||
4115 | (EXT4_SB(sb)->s_inode_readahead_blks-1)) | ||
4116 | EXT4_SB(sb)->s_inode_readahead_blks = | ||
4117 | (EXT4_SB(sb)->s_inode_readahead_blks & | ||
4118 | (EXT4_SB(sb)->s_inode_readahead_blks-1)); | ||
4119 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); | 4198 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); |
4120 | if (table > b) | 4199 | if (table > b) |
4121 | b = table; | 4200 | b = table; |
@@ -4287,6 +4366,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4287 | ei->i_disksize = inode->i_size; | 4366 | ei->i_disksize = inode->i_size; |
4288 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 4367 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
4289 | ei->i_block_group = iloc.block_group; | 4368 | ei->i_block_group = iloc.block_group; |
4369 | ei->i_last_alloc_group = ~0; | ||
4290 | /* | 4370 | /* |
4291 | * NOTE! The in-memory inode i_data array is in little-endian order | 4371 | * NOTE! The in-memory inode i_data array is in little-endian order |
4292 | * even on big-endian machines: we do NOT byteswap the block numbers! | 4372 | * even on big-endian machines: we do NOT byteswap the block numbers! |
@@ -4329,6 +4409,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4329 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | 4409 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; |
4330 | } | 4410 | } |
4331 | 4411 | ||
4412 | if (ei->i_flags & EXT4_EXTENTS_FL) { | ||
4413 | /* Validate extent which is part of inode */ | ||
4414 | ret = ext4_ext_check_inode(inode); | ||
4415 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
4416 | (S_ISLNK(inode->i_mode) && | ||
4417 | !ext4_inode_is_fast_symlink(inode))) { | ||
4418 | /* Validate block references which are part of inode */ | ||
4419 | ret = ext4_check_inode_blockref(inode); | ||
4420 | } | ||
4421 | if (ret) { | ||
4422 | brelse(bh); | ||
4423 | goto bad_inode; | ||
4424 | } | ||
4425 | |||
4332 | if (S_ISREG(inode->i_mode)) { | 4426 | if (S_ISREG(inode->i_mode)) { |
4333 | inode->i_op = &ext4_file_inode_operations; | 4427 | inode->i_op = &ext4_file_inode_operations; |
4334 | inode->i_fop = &ext4_file_operations; | 4428 | inode->i_fop = &ext4_file_operations; |
@@ -4345,7 +4439,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4345 | inode->i_op = &ext4_symlink_inode_operations; | 4439 | inode->i_op = &ext4_symlink_inode_operations; |
4346 | ext4_set_aops(inode); | 4440 | ext4_set_aops(inode); |
4347 | } | 4441 | } |
4348 | } else { | 4442 | } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || |
4443 | S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { | ||
4349 | inode->i_op = &ext4_special_inode_operations; | 4444 | inode->i_op = &ext4_special_inode_operations; |
4350 | if (raw_inode->i_block[0]) | 4445 | if (raw_inode->i_block[0]) |
4351 | init_special_inode(inode, inode->i_mode, | 4446 | init_special_inode(inode, inode->i_mode, |
@@ -4353,6 +4448,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4353 | else | 4448 | else |
4354 | init_special_inode(inode, inode->i_mode, | 4449 | init_special_inode(inode, inode->i_mode, |
4355 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 4450 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
4451 | } else { | ||
4452 | brelse(bh); | ||
4453 | ret = -EIO; | ||
4454 | ext4_error(inode->i_sb, __func__, | ||
4455 | "bogus i_mode (%o) for inode=%lu", | ||
4456 | inode->i_mode, inode->i_ino); | ||
4457 | goto bad_inode; | ||
4356 | } | 4458 | } |
4357 | brelse(iloc.bh); | 4459 | brelse(iloc.bh); |
4358 | ext4_set_inode_flags(inode); | 4460 | ext4_set_inode_flags(inode); |