aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-01-23 01:45:46 -0500
committerDavid S. Miller <davem@davemloft.net>2010-01-23 01:45:46 -0500
commit6be325719b3e54624397e413efd4b33a997e55a3 (patch)
tree57f321a56794cab2222e179b16731e0d76a4a68a /fs/ext4/inode.c
parent26d92f9276a56d55511a427fb70bd70886af647a (diff)
parent92dcffb916d309aa01778bf8963a6932e4014d07 (diff)
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c235
1 files changed, 132 insertions, 103 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5352db1a3086..c818972c8302 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1003,83 +1003,94 @@ out:
1003 return err; 1003 return err;
1004} 1004}
1005 1005
1006qsize_t ext4_get_reserved_space(struct inode *inode) 1006#ifdef CONFIG_QUOTA
1007qsize_t *ext4_get_reserved_space(struct inode *inode)
1007{ 1008{
1008 unsigned long long total; 1009 return &EXT4_I(inode)->i_reserved_quota;
1009
1010 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1011 total = EXT4_I(inode)->i_reserved_data_blocks +
1012 EXT4_I(inode)->i_reserved_meta_blocks;
1013 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1014
1015 return (total << inode->i_blkbits);
1016} 1010}
1011#endif
1012
1017/* 1013/*
1018 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1019 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1020 */ 1016 */
1021static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1022{ 1019{
1023 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1024 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1025 1022 int blk_bits;
1026 /* number of new indirect blocks needed */
1027 ind_blks = (blocks + icap - 1) / icap;
1028 1023
1029 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1030 1026
1031 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1032 1028
1033 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1034} 1038}
1035 1039
1036/* 1040/*
1037 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1038 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1039 */ 1043 */
1040static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1041{ 1045{
1042 if (!blocks)
1043 return 0;
1044
1045 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1046 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1047 1048
1048 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1049} 1050}
1050 1051
1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1051static void ext4_da_update_reserve_space(struct inode *inode, int used) 1056static void ext4_da_update_reserve_space(struct inode *inode, int used)
1052{ 1057{
1053 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1058 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1054 int total, mdb, mdb_free; 1059 struct ext4_inode_info *ei = EXT4_I(inode);
1055 1060 int mdb_free = 0;
1056 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1061
1057 /* recalculate the number of metablocks still need to be reserved */ 1062 spin_lock(&ei->i_block_reservation_lock);
1058 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1063 if (unlikely(used > ei->i_reserved_data_blocks)) {
1059 mdb = ext4_calc_metadata_amount(inode, total); 1064 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1060 1065 "with only %d reserved data blocks\n",
1061 /* figure out how many metablocks to release */ 1066 __func__, inode->i_ino, used,
1062 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1067 ei->i_reserved_data_blocks);
1063 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1068 WARN_ON(1);
1064 1069 used = ei->i_reserved_data_blocks;
1065 if (mdb_free) { 1070 }
1066 /* Account for allocated meta_blocks */ 1071
1067 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; 1072 /* Update per-inode reservations */
1068 1073 ei->i_reserved_data_blocks -= used;
1069 /* update fs dirty blocks counter */ 1074 used += ei->i_allocated_meta_blocks;
1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1076 ei->i_allocated_meta_blocks = 0;
1077 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1078
1079 if (ei->i_reserved_data_blocks == 0) {
1080 /*
1081 * We can release all of the reserved metadata blocks
1082 * only when we have written all of the delayed
1083 * allocation blocks.
1084 */
1085 mdb_free = ei->i_reserved_meta_blocks;
1086 ei->i_reserved_meta_blocks = 0;
1087 ei->i_da_metadata_calc_len = 0;
1070 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1088 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1071 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1072 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1073 } 1089 }
1074
1075 /* update per-inode reservations */
1076 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1077 EXT4_I(inode)->i_reserved_data_blocks -= used;
1078 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1079 1091
1080 /* 1092 /* Update quota subsystem */
1081 * free those over-booking quota for metadata blocks 1093 vfs_dq_claim_block(inode, used);
1082 */
1083 if (mdb_free) 1094 if (mdb_free)
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1095 vfs_dq_release_reservation_block(inode, mdb_free);
1085 1096
@@ -1088,7 +1099,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1088 * there aren't any writers on the inode, we can discard the 1099 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1100 * inode's preallocations.
1090 */ 1101 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1102 if ((ei->i_reserved_data_blocks == 0) &&
1103 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1104 ext4_discard_preallocations(inode);
1093} 1105}
1094 1106
@@ -1797,11 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
1797 return ret ? ret : copied; 1809 return ret ? ret : copied;
1798} 1810}
1799 1811
1800static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1812/*
1813 * Reserve a single block located at lblock
1814 */
1815static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1801{ 1816{
1802 int retries = 0; 1817 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1818 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1819 struct ext4_inode_info *ei = EXT4_I(inode);
1820 unsigned long md_needed, md_reserved;
1805 1821
1806 /* 1822 /*
1807 * recalculate the amount of metadata blocks to reserve 1823 * recalculate the amount of metadata blocks to reserve
@@ -1809,86 +1825,90 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1825 * worse case is one extent per block
1810 */ 1826 */
1811repeat: 1827repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1828 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1829 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1830 md_needed = ext4_calc_metadata_amount(inode, lblock);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1831 spin_unlock(&ei->i_block_reservation_lock);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks;
1819 1832
1820 /* 1833 /*
1821 * Make quota reservation here to prevent quota overflow 1834 * Make quota reservation here to prevent quota overflow
1822 * later. Real quota accounting is done at pages writeout 1835 * later. Real quota accounting is done at pages writeout
1823 * time. 1836 * time.
1824 */ 1837 */
1825 if (vfs_dq_reserve_block(inode, total)) { 1838 if (vfs_dq_reserve_block(inode, md_needed + 1)) {
1826 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1839 /*
1840 * We tend to badly over-estimate the amount of
1841 * metadata blocks which are needed, so if we have
1842 * reserved any metadata blocks, try to force out the
1843 * inode and see if we have any better luck.
1844 */
1845 if (md_reserved && retries++ <= 3)
1846 goto retry;
1827 return -EDQUOT; 1847 return -EDQUOT;
1828 } 1848 }
1829 1849
1830 if (ext4_claim_free_blocks(sbi, total)) { 1850 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1831 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1851 vfs_dq_release_reservation_block(inode, md_needed + 1);
1832 vfs_dq_release_reservation_block(inode, total);
1833 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1852 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1853 retry:
1854 if (md_reserved)
1855 write_inode_now(inode, (retries == 3));
1834 yield(); 1856 yield();
1835 goto repeat; 1857 goto repeat;
1836 } 1858 }
1837 return -ENOSPC; 1859 return -ENOSPC;
1838 } 1860 }
1839 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1861 spin_lock(&ei->i_block_reservation_lock);
1840 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; 1862 ei->i_reserved_data_blocks++;
1863 ei->i_reserved_meta_blocks += md_needed;
1864 spin_unlock(&ei->i_block_reservation_lock);
1841 1865
1842 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1843 return 0; /* success */ 1866 return 0; /* success */
1844} 1867}
1845 1868
1846static void ext4_da_release_space(struct inode *inode, int to_free) 1869static void ext4_da_release_space(struct inode *inode, int to_free)
1847{ 1870{
1848 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1871 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1849 int total, mdb, mdb_free, release; 1872 struct ext4_inode_info *ei = EXT4_I(inode);
1850 1873
1851 if (!to_free) 1874 if (!to_free)
1852 return; /* Nothing to release, exit */ 1875 return; /* Nothing to release, exit */
1853 1876
1854 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1877 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1855 1878
1856 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1879 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1857 /* 1880 /*
1858 * if there is no reserved blocks, but we try to free some 1881 * if there aren't enough reserved blocks, then the
1859 * then the counter is messed up somewhere. 1882 * counter is messed up somewhere. Since this
1860 * but since this function is called from invalidate 1883 * function is called from invalidate page, it's
1861 * page, it's harmless to return without any action 1884 * harmless to return without any action.
1862 */ 1885 */
1863 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1886 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1864 "blocks for inode %lu, but there is no reserved " 1887 "ino %lu, to_free %d with only %d reserved "
1865 "data blocks\n", to_free, inode->i_ino); 1888 "data blocks\n", inode->i_ino, to_free,
1866 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1889 ei->i_reserved_data_blocks);
1867 return; 1890 WARN_ON(1);
1891 to_free = ei->i_reserved_data_blocks;
1868 } 1892 }
1893 ei->i_reserved_data_blocks -= to_free;
1869 1894
1870 /* recalculate the number of metablocks still need to be reserved */ 1895 if (ei->i_reserved_data_blocks == 0) {
1871 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1896 /*
1872 mdb = ext4_calc_metadata_amount(inode, total); 1897 * We can release all of the reserved metadata blocks
1873 1898 * only when we have written all of the delayed
1874 /* figure out how many metablocks to release */ 1899 * allocation blocks.
1875 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1900 */
1876 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1901 to_free += ei->i_reserved_meta_blocks;
1877 1902 ei->i_reserved_meta_blocks = 0;
1878 release = to_free + mdb_free; 1903 ei->i_da_metadata_calc_len = 0;
1879 1904 }
1880 /* update fs dirty blocks counter for truncate case */
1881 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1882 1905
1883 /* update per-inode reservations */ 1906 /* update fs dirty blocks counter */
1884 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1907 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1885 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1886 1908
1887 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1888 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1889 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1909 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1890 1910
1891 vfs_dq_release_reservation_block(inode, release); 1911 vfs_dq_release_reservation_block(inode, to_free);
1892} 1912}
1893 1913
1894static void ext4_da_page_release_reservation(struct page *page, 1914static void ext4_da_page_release_reservation(struct page *page,
@@ -2494,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2494 * XXX: __block_prepare_write() unmaps passed block, 2514 * XXX: __block_prepare_write() unmaps passed block,
2495 * is it OK? 2515 * is it OK?
2496 */ 2516 */
2497 ret = ext4_da_reserve_space(inode, 1); 2517 ret = ext4_da_reserve_space(inode, iblock);
2498 if (ret) 2518 if (ret)
2499 /* not enough space to reserve */ 2519 /* not enough space to reserve */
2500 return ret; 2520 return ret;
@@ -2968,8 +2988,7 @@ retry:
2968out_writepages: 2988out_writepages:
2969 if (!no_nrwrite_index_update) 2989 if (!no_nrwrite_index_update)
2970 wbc->no_nrwrite_index_update = 0; 2990 wbc->no_nrwrite_index_update = 0;
2971 if (wbc->nr_to_write > nr_to_writebump) 2991 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->nr_to_write -= nr_to_writebump;
2973 wbc->range_start = range_start; 2992 wbc->range_start = range_start;
2974 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2993 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2975 return ret; 2994 return ret;
@@ -2994,11 +3013,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2994 if (2 * free_blocks < 3 * dirty_blocks || 3013 if (2 * free_blocks < 3 * dirty_blocks ||
2995 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3014 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2996 /* 3015 /*
2997 * free block count is less that 150% of dirty blocks 3016 * free block count is less than 150% of dirty blocks
2998 * or free blocks is less that watermark 3017 * or free blocks is less than watermark
2999 */ 3018 */
3000 return 1; 3019 return 1;
3001 } 3020 }
3021 /*
3022 * Even if we don't switch but are nearing capacity,
3023 * start pushing delalloc when 1/2 of free blocks are dirty.
3024 */
3025 if (free_blocks < 2 * dirty_blocks)
3026 writeback_inodes_sb_if_idle(sb);
3027
3002 return 0; 3028 return 0;
3003} 3029}
3004 3030
@@ -4794,6 +4820,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4794 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4820 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4795 inode->i_size = ext4_isize(raw_inode); 4821 inode->i_size = ext4_isize(raw_inode);
4796 ei->i_disksize = inode->i_size; 4822 ei->i_disksize = inode->i_size;
4823#ifdef CONFIG_QUOTA
4824 ei->i_reserved_quota = 0;
4825#endif
4797 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4826 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4798 ei->i_block_group = iloc.block_group; 4827 ei->i_block_group = iloc.block_group;
4799 ei->i_last_alloc_group = ~0; 4828 ei->i_last_alloc_group = ~0;