aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c225
1 files changed, 126 insertions, 99 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab807963a614..c818972c8302 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1009,77 +1009,88 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
1009 return &EXT4_I(inode)->i_reserved_quota; 1009 return &EXT4_I(inode)->i_reserved_quota;
1010} 1010}
1011#endif 1011#endif
1012
1012/* 1013/*
1013 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1014 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1015 */ 1016 */
1016static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1017{ 1019{
1018 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1019 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1020 1022 int blk_bits;
1021 /* number of new indirect blocks needed */
1022 ind_blks = (blocks + icap - 1) / icap;
1023 1023
1024 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1025 1026
1026 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1027 1028
1028 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1029} 1038}
1030 1039
1031/* 1040/*
1032 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1033 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1034 */ 1043 */
1035static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1036{ 1045{
1037 if (!blocks)
1038 return 0;
1039
1040 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1041 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1042 1048
1043 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1044} 1050}
1045 1051
1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1046static void ext4_da_update_reserve_space(struct inode *inode, int used) 1056static void ext4_da_update_reserve_space(struct inode *inode, int used)
1047{ 1057{
1048 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1058 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1049 int total, mdb, mdb_free, mdb_claim = 0; 1059 struct ext4_inode_info *ei = EXT4_I(inode);
1050 1060 int mdb_free = 0;
1051 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1061
1052 /* recalculate the number of metablocks still need to be reserved */ 1062 spin_lock(&ei->i_block_reservation_lock);
1053 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1063 if (unlikely(used > ei->i_reserved_data_blocks)) {
1054 mdb = ext4_calc_metadata_amount(inode, total); 1064 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1055 1065 "with only %d reserved data blocks\n",
1056 /* figure out how many metablocks to release */ 1066 __func__, inode->i_ino, used,
1057 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1067 ei->i_reserved_data_blocks);
1058 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1068 WARN_ON(1);
1059 1069 used = ei->i_reserved_data_blocks;
1060 if (mdb_free) { 1070 }
1061 /* Account for allocated meta_blocks */ 1071
1062 mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; 1072 /* Update per-inode reservations */
1063 BUG_ON(mdb_free < mdb_claim); 1073 ei->i_reserved_data_blocks -= used;
1064 mdb_free -= mdb_claim; 1074 used += ei->i_allocated_meta_blocks;
1065 1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1066 /* update fs dirty blocks counter */ 1076 ei->i_allocated_meta_blocks = 0;
1077 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1078
1079 if (ei->i_reserved_data_blocks == 0) {
1080 /*
1081 * We can release all of the reserved metadata blocks
1082 * only when we have written all of the delayed
1083 * allocation blocks.
1084 */
1085 mdb_free = ei->i_reserved_meta_blocks;
1086 ei->i_reserved_meta_blocks = 0;
1087 ei->i_da_metadata_calc_len = 0;
1067 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1088 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1068 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1069 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1070 } 1089 }
1071
1072 /* update per-inode reservations */
1073 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1074 EXT4_I(inode)->i_reserved_data_blocks -= used;
1075 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
1076 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1077 1091
1078 vfs_dq_claim_block(inode, used + mdb_claim); 1092 /* Update quota subsystem */
1079 1093 vfs_dq_claim_block(inode, used);
1080 /*
1081 * free those over-booking quota for metadata blocks
1082 */
1083 if (mdb_free) 1094 if (mdb_free)
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1095 vfs_dq_release_reservation_block(inode, mdb_free);
1085 1096
@@ -1088,7 +1099,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1088 * there aren't any writers on the inode, we can discard the 1099 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1100 * inode's preallocations.
1090 */ 1101 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1102 if ((ei->i_reserved_data_blocks == 0) &&
1103 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1104 ext4_discard_preallocations(inode);
1093} 1105}
1094 1106
@@ -1797,11 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
1797 return ret ? ret : copied; 1809 return ret ? ret : copied;
1798} 1810}
1799 1811
1800static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1812/*
1813 * Reserve a single block located at lblock
1814 */
1815static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1801{ 1816{
1802 int retries = 0; 1817 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1818 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1819 struct ext4_inode_info *ei = EXT4_I(inode);
1820 unsigned long md_needed, md_reserved;
1805 1821
1806 /* 1822 /*
1807 * recalculate the amount of metadata blocks to reserve 1823 * recalculate the amount of metadata blocks to reserve
@@ -1809,35 +1825,43 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1825 * worse case is one extent per block
1810 */ 1826 */
1811repeat: 1827repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1828 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1829 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1830 md_needed = ext4_calc_metadata_amount(inode, lblock);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1831 spin_unlock(&ei->i_block_reservation_lock);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks;
1819 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1820 1832
1821 /* 1833 /*
1822 * Make quota reservation here to prevent quota overflow 1834 * Make quota reservation here to prevent quota overflow
1823 * later. Real quota accounting is done at pages writeout 1835 * later. Real quota accounting is done at pages writeout
1824 * time. 1836 * time.
1825 */ 1837 */
1826 if (vfs_dq_reserve_block(inode, total)) 1838 if (vfs_dq_reserve_block(inode, md_needed + 1)) {
1839 /*
1840 * We tend to badly over-estimate the amount of
1841 * metadata blocks which are needed, so if we have
1842 * reserved any metadata blocks, try to force out the
1843 * inode and see if we have any better luck.
1844 */
1845 if (md_reserved && retries++ <= 3)
1846 goto retry;
1827 return -EDQUOT; 1847 return -EDQUOT;
1848 }
1828 1849
1829 if (ext4_claim_free_blocks(sbi, total)) { 1850 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1830 vfs_dq_release_reservation_block(inode, total); 1851 vfs_dq_release_reservation_block(inode, md_needed + 1);
1831 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1852 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1853 retry:
1854 if (md_reserved)
1855 write_inode_now(inode, (retries == 3));
1832 yield(); 1856 yield();
1833 goto repeat; 1857 goto repeat;
1834 } 1858 }
1835 return -ENOSPC; 1859 return -ENOSPC;
1836 } 1860 }
1837 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1861 spin_lock(&ei->i_block_reservation_lock);
1838 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1862 ei->i_reserved_data_blocks++;
1839 EXT4_I(inode)->i_reserved_meta_blocks += md_needed; 1863 ei->i_reserved_meta_blocks += md_needed;
1840 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1864 spin_unlock(&ei->i_block_reservation_lock);
1841 1865
1842 return 0; /* success */ 1866 return 0; /* success */
1843} 1867}
@@ -1845,49 +1869,46 @@ repeat:
1845static void ext4_da_release_space(struct inode *inode, int to_free) 1869static void ext4_da_release_space(struct inode *inode, int to_free)
1846{ 1870{
1847 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1871 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1848 int total, mdb, mdb_free, release; 1872 struct ext4_inode_info *ei = EXT4_I(inode);
1849 1873
1850 if (!to_free) 1874 if (!to_free)
1851 return; /* Nothing to release, exit */ 1875 return; /* Nothing to release, exit */
1852 1876
1853 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1877 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1854 1878
1855 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1879 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1856 /* 1880 /*
1857 * if there is no reserved blocks, but we try to free some 1881 * if there aren't enough reserved blocks, then the
1858 * then the counter is messed up somewhere. 1882 * counter is messed up somewhere. Since this
1859 * but since this function is called from invalidate 1883 * function is called from invalidate page, it's
1860 * page, it's harmless to return without any action 1884 * harmless to return without any action.
1861 */ 1885 */
1862 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1886 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1863 "blocks for inode %lu, but there is no reserved " 1887 "ino %lu, to_free %d with only %d reserved "
1864 "data blocks\n", to_free, inode->i_ino); 1888 "data blocks\n", inode->i_ino, to_free,
1865 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1889 ei->i_reserved_data_blocks);
1866 return; 1890 WARN_ON(1);
1891 to_free = ei->i_reserved_data_blocks;
1867 } 1892 }
1893 ei->i_reserved_data_blocks -= to_free;
1868 1894
1869 /* recalculate the number of metablocks still need to be reserved */ 1895 if (ei->i_reserved_data_blocks == 0) {
1870 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1896 /*
1871 mdb = ext4_calc_metadata_amount(inode, total); 1897 * We can release all of the reserved metadata blocks
1872 1898 * only when we have written all of the delayed
1873 /* figure out how many metablocks to release */ 1899 * allocation blocks.
1874 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1900 */
1875 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1901 to_free += ei->i_reserved_meta_blocks;
1876 1902 ei->i_reserved_meta_blocks = 0;
1877 release = to_free + mdb_free; 1903 ei->i_da_metadata_calc_len = 0;
1878 1904 }
1879 /* update fs dirty blocks counter for truncate case */
1880 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1881 1905
1882 /* update per-inode reservations */ 1906 /* update fs dirty blocks counter */
1883 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1907 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1884 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1885 1908
1886 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1887 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1888 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1909 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1889 1910
1890 vfs_dq_release_reservation_block(inode, release); 1911 vfs_dq_release_reservation_block(inode, to_free);
1891} 1912}
1892 1913
1893static void ext4_da_page_release_reservation(struct page *page, 1914static void ext4_da_page_release_reservation(struct page *page,
@@ -2493,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2493 * XXX: __block_prepare_write() unmaps passed block, 2514 * XXX: __block_prepare_write() unmaps passed block,
2494 * is it OK? 2515 * is it OK?
2495 */ 2516 */
2496 ret = ext4_da_reserve_space(inode, 1); 2517 ret = ext4_da_reserve_space(inode, iblock);
2497 if (ret) 2518 if (ret)
2498 /* not enough space to reserve */ 2519 /* not enough space to reserve */
2499 return ret; 2520 return ret;
@@ -2967,8 +2988,7 @@ retry:
2967out_writepages: 2988out_writepages:
2968 if (!no_nrwrite_index_update) 2989 if (!no_nrwrite_index_update)
2969 wbc->no_nrwrite_index_update = 0; 2990 wbc->no_nrwrite_index_update = 0;
2970 if (wbc->nr_to_write > nr_to_writebump) 2991 wbc->nr_to_write -= nr_to_writebump;
2971 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->range_start = range_start; 2992 wbc->range_start = range_start;
2973 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2993 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2974 return ret; 2994 return ret;
@@ -2993,11 +3013,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2993 if (2 * free_blocks < 3 * dirty_blocks || 3013 if (2 * free_blocks < 3 * dirty_blocks ||
2994 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3014 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2995 /* 3015 /*
2996 * free block count is less that 150% of dirty blocks 3016 * free block count is less than 150% of dirty blocks
2997 * or free blocks is less that watermark 3017 * or free blocks is less than watermark
2998 */ 3018 */
2999 return 1; 3019 return 1;
3000 } 3020 }
3021 /*
3022 * Even if we don't switch but are nearing capacity,
3023 * start pushing delalloc when 1/2 of free blocks are dirty.
3024 */
3025 if (free_blocks < 2 * dirty_blocks)
3026 writeback_inodes_sb_if_idle(sb);
3027
3001 return 0; 3028 return 0;
3002} 3029}
3003 3030