diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-30 16:25:56 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-30 16:25:56 -0500 |
| commit | 1f11abc966b82b9fd0c834707486ef301b2f398d (patch) | |
| tree | 4e9d49b00ab034f1fc7f11cb1ff54e3121a728ae | |
| parent | f8e9766dd1bacd5f32f9ac4322b55fbfd46b778e (diff) | |
| parent | 0637c6f4135f592f094207c7c21e7c0fc5557834 (diff) | |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: Patch up how we claim metadata blocks for quota purposes
ext4: Ensure zeroout blocks have no dirty metadata
ext4: return correct wbc.nr_to_write in ext4_da_writepages
ext4: Update documentation to correct the inode_readahead_blks option name
jbd2: don't use __GFP_NOFAIL in journal_init_common()
ext4: flush delalloc blocks when space is low
fs-writeback: Add helper function to start writeback if idle
ext4: Eliminate potential double free on error path
ext4: fix unsigned long long printk warning in super.c
ext4, jbd2: Add barriers for file systems with exernal journals
ext4: replace BUG() with return -EIO in ext4_ext_get_blocks
ext4: add module aliases for ext2 and ext3
ext4: Don't ask about supporting ext2/3 in ext4 if ext4 is not configured
ext4: remove unused #include <linux/version.h>
| -rw-r--r-- | Documentation/filesystems/ext4.txt | 2 | ||||
| -rw-r--r-- | fs/ext4/Kconfig | 1 | ||||
| -rw-r--r-- | fs/ext4/block_validity.c | 1 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 28 | ||||
| -rw-r--r-- | fs/ext4/fsync.c | 16 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 171 | ||||
| -rw-r--r-- | fs/ext4/mballoc.h | 1 | ||||
| -rw-r--r-- | fs/ext4/super.c | 6 | ||||
| -rw-r--r-- | fs/ext4/xattr.c | 2 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 17 | ||||
| -rw-r--r-- | fs/jbd2/checkpoint.c | 15 | ||||
| -rw-r--r-- | fs/jbd2/commit.c | 19 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 2 | ||||
| -rw-r--r-- | include/linux/jbd2.h | 1 | ||||
| -rw-r--r-- | include/linux/writeback.h | 1 |
15 files changed, 189 insertions, 94 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index af6885c3c821..e1def1786e50 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
| @@ -196,7 +196,7 @@ nobarrier This also requires an IO stack which can support | |||
| 196 | also be used to enable or disable barriers, for | 196 | also be used to enable or disable barriers, for |
| 197 | consistency with other ext4 mount options. | 197 | consistency with other ext4 mount options. |
| 198 | 198 | ||
| 199 | inode_readahead=n This tuning parameter controls the maximum | 199 | inode_readahead_blks=n This tuning parameter controls the maximum |
| 200 | number of inode table blocks that ext4's inode | 200 | number of inode table blocks that ext4's inode |
| 201 | table readahead algorithm will pre-read into | 201 | table readahead algorithm will pre-read into |
| 202 | the buffer cache. The default value is 32 blocks. | 202 | the buffer cache. The default value is 32 blocks. |
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9acf7e808139..9ed1bb1f319f 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
| @@ -28,6 +28,7 @@ config EXT4_FS | |||
| 28 | 28 | ||
| 29 | config EXT4_USE_FOR_EXT23 | 29 | config EXT4_USE_FOR_EXT23 |
| 30 | bool "Use ext4 for ext2/ext3 file systems" | 30 | bool "Use ext4 for ext2/ext3 file systems" |
| 31 | depends on EXT4_FS | ||
| 31 | depends on EXT3_FS=n || EXT2_FS=n | 32 | depends on EXT3_FS=n || EXT2_FS=n |
| 32 | default y | 33 | default y |
| 33 | help | 34 | help |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 4df8621ec31c..a60ab9aad57d 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
| @@ -16,7 +16,6 @@ | |||
| 16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 17 | #include <linux/swap.h> | 17 | #include <linux/swap.h> |
| 18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
| 19 | #include <linux/version.h> | ||
| 20 | #include <linux/blkdev.h> | 19 | #include <linux/blkdev.h> |
| 21 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
| 22 | #include "ext4.h" | 21 | #include "ext4.h" |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3a7928f825e4..91ae46098ea4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -3023,6 +3023,14 @@ out: | |||
| 3023 | return err; | 3023 | return err; |
| 3024 | } | 3024 | } |
| 3025 | 3025 | ||
| 3026 | static void unmap_underlying_metadata_blocks(struct block_device *bdev, | ||
| 3027 | sector_t block, int count) | ||
| 3028 | { | ||
| 3029 | int i; | ||
| 3030 | for (i = 0; i < count; i++) | ||
| 3031 | unmap_underlying_metadata(bdev, block + i); | ||
| 3032 | } | ||
| 3033 | |||
| 3026 | static int | 3034 | static int |
| 3027 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3035 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
| 3028 | ext4_lblk_t iblock, unsigned int max_blocks, | 3036 | ext4_lblk_t iblock, unsigned int max_blocks, |
| @@ -3098,6 +3106,18 @@ out: | |||
| 3098 | } else | 3106 | } else |
| 3099 | allocated = ret; | 3107 | allocated = ret; |
| 3100 | set_buffer_new(bh_result); | 3108 | set_buffer_new(bh_result); |
| 3109 | /* | ||
| 3110 | * if we allocated more blocks than requested | ||
| 3111 | * we need to make sure we unmap the extra block | ||
| 3112 | * allocated. The actual needed block will get | ||
| 3113 | * unmapped later when we find the buffer_head marked | ||
| 3114 | * new. | ||
| 3115 | */ | ||
| 3116 | if (allocated > max_blocks) { | ||
| 3117 | unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, | ||
| 3118 | newblock + max_blocks, | ||
| 3119 | allocated - max_blocks); | ||
| 3120 | } | ||
| 3101 | map_out: | 3121 | map_out: |
| 3102 | set_buffer_mapped(bh_result); | 3122 | set_buffer_mapped(bh_result); |
| 3103 | out1: | 3123 | out1: |
| @@ -3190,7 +3210,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 3190 | * this situation is possible, though, _during_ tree modification; | 3210 | * this situation is possible, though, _during_ tree modification; |
| 3191 | * this is why assert can't be put in ext4_ext_find_extent() | 3211 | * this is why assert can't be put in ext4_ext_find_extent() |
| 3192 | */ | 3212 | */ |
| 3193 | BUG_ON(path[depth].p_ext == NULL && depth != 0); | 3213 | if (path[depth].p_ext == NULL && depth != 0) { |
| 3214 | ext4_error(inode->i_sb, __func__, "bad extent address " | ||
| 3215 | "inode: %lu, iblock: %d, depth: %d", | ||
| 3216 | inode->i_ino, iblock, depth); | ||
| 3217 | err = -EIO; | ||
| 3218 | goto out2; | ||
| 3219 | } | ||
| 3194 | eh = path[depth].p_hdr; | 3220 | eh = path[depth].p_hdr; |
| 3195 | 3221 | ||
| 3196 | ex = path[depth].p_ext; | 3222 | ex = path[depth].p_ext; |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0b22497d92e1..98bd140aad01 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
| @@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 88 | return ext4_force_commit(inode->i_sb); | 88 | return ext4_force_commit(inode->i_sb); |
| 89 | 89 | ||
| 90 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; | 90 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
| 91 | if (jbd2_log_start_commit(journal, commit_tid)) | 91 | if (jbd2_log_start_commit(journal, commit_tid)) { |
| 92 | /* | ||
| 93 | * When the journal is on a different device than the | ||
| 94 | * fs data disk, we need to issue the barrier in | ||
| 95 | * writeback mode. (In ordered mode, the jbd2 layer | ||
| 96 | * will take care of issuing the barrier. In | ||
| 97 | * data=journal, all of the data blocks are written to | ||
| 98 | * the journal device.) | ||
| 99 | */ | ||
| 100 | if (ext4_should_writeback_data(inode) && | ||
| 101 | (journal->j_fs_dev != journal->j_dev) && | ||
| 102 | (journal->j_flags & JBD2_BARRIER)) | ||
| 103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
| 92 | jbd2_log_wait_commit(journal, commit_tid); | 104 | jbd2_log_wait_commit(journal, commit_tid); |
| 93 | else if (journal->j_flags & JBD2_BARRIER) | 105 | } else if (journal->j_flags & JBD2_BARRIER) |
| 94 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 106 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); |
| 95 | return ret; | 107 | return ret; |
| 96 | } | 108 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ab807963a614..84eeb8f515a3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -1043,43 +1043,47 @@ static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | |||
| 1043 | return ext4_indirect_calc_metadata_amount(inode, blocks); | 1043 | return ext4_indirect_calc_metadata_amount(inode, blocks); |
| 1044 | } | 1044 | } |
| 1045 | 1045 | ||
| 1046 | /* | ||
| 1047 | * Called with i_data_sem down, which is important since we can call | ||
| 1048 | * ext4_discard_preallocations() from here. | ||
| 1049 | */ | ||
| 1046 | static void ext4_da_update_reserve_space(struct inode *inode, int used) | 1050 | static void ext4_da_update_reserve_space(struct inode *inode, int used) |
| 1047 | { | 1051 | { |
| 1048 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1052 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 1049 | int total, mdb, mdb_free, mdb_claim = 0; | 1053 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 1050 | 1054 | int mdb_free = 0; | |
| 1051 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1055 | |
| 1052 | /* recalculate the number of metablocks still need to be reserved */ | 1056 | spin_lock(&ei->i_block_reservation_lock); |
| 1053 | total = EXT4_I(inode)->i_reserved_data_blocks - used; | 1057 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
| 1054 | mdb = ext4_calc_metadata_amount(inode, total); | 1058 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
| 1055 | 1059 | "with only %d reserved data blocks\n", | |
| 1056 | /* figure out how many metablocks to release */ | 1060 | __func__, inode->i_ino, used, |
| 1057 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1061 | ei->i_reserved_data_blocks); |
| 1058 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1062 | WARN_ON(1); |
| 1059 | 1063 | used = ei->i_reserved_data_blocks; | |
| 1060 | if (mdb_free) { | 1064 | } |
| 1061 | /* Account for allocated meta_blocks */ | 1065 | |
| 1062 | mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; | 1066 | /* Update per-inode reservations */ |
| 1063 | BUG_ON(mdb_free < mdb_claim); | 1067 | ei->i_reserved_data_blocks -= used; |
| 1064 | mdb_free -= mdb_claim; | 1068 | used += ei->i_allocated_meta_blocks; |
| 1065 | 1069 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | |
| 1066 | /* update fs dirty blocks counter */ | 1070 | ei->i_allocated_meta_blocks = 0; |
| 1071 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); | ||
| 1072 | |||
| 1073 | if (ei->i_reserved_data_blocks == 0) { | ||
| 1074 | /* | ||
| 1075 | * We can release all of the reserved metadata blocks | ||
| 1076 | * only when we have written all of the delayed | ||
| 1077 | * allocation blocks. | ||
| 1078 | */ | ||
| 1079 | mdb_free = ei->i_allocated_meta_blocks; | ||
| 1067 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); | 1080 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
| 1068 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | 1081 | ei->i_allocated_meta_blocks = 0; |
| 1069 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
| 1070 | } | 1082 | } |
| 1071 | |||
| 1072 | /* update per-inode reservations */ | ||
| 1073 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | ||
| 1074 | EXT4_I(inode)->i_reserved_data_blocks -= used; | ||
| 1075 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim); | ||
| 1076 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1083 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1077 | 1084 | ||
| 1078 | vfs_dq_claim_block(inode, used + mdb_claim); | 1085 | /* Update quota subsystem */ |
| 1079 | 1086 | vfs_dq_claim_block(inode, used); | |
| 1080 | /* | ||
| 1081 | * free those over-booking quota for metadata blocks | ||
| 1082 | */ | ||
| 1083 | if (mdb_free) | 1087 | if (mdb_free) |
| 1084 | vfs_dq_release_reservation_block(inode, mdb_free); | 1088 | vfs_dq_release_reservation_block(inode, mdb_free); |
| 1085 | 1089 | ||
| @@ -1088,7 +1092,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
| 1088 | * there aren't any writers on the inode, we can discard the | 1092 | * there aren't any writers on the inode, we can discard the |
| 1089 | * inode's preallocations. | 1093 | * inode's preallocations. |
| 1090 | */ | 1094 | */ |
| 1091 | if (!total && (atomic_read(&inode->i_writecount) == 0)) | 1095 | if ((ei->i_reserved_data_blocks == 0) && |
| 1096 | (atomic_read(&inode->i_writecount) == 0)) | ||
| 1092 | ext4_discard_preallocations(inode); | 1097 | ext4_discard_preallocations(inode); |
| 1093 | } | 1098 | } |
| 1094 | 1099 | ||
| @@ -1801,7 +1806,8 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
| 1801 | { | 1806 | { |
| 1802 | int retries = 0; | 1807 | int retries = 0; |
| 1803 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1808 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 1804 | unsigned long md_needed, mdblocks, total = 0; | 1809 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 1810 | unsigned long md_needed, md_reserved, total = 0; | ||
| 1805 | 1811 | ||
| 1806 | /* | 1812 | /* |
| 1807 | * recalculate the amount of metadata blocks to reserve | 1813 | * recalculate the amount of metadata blocks to reserve |
| @@ -1809,35 +1815,44 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
| 1809 | * worse case is one extent per block | 1815 | * worse case is one extent per block |
| 1810 | */ | 1816 | */ |
| 1811 | repeat: | 1817 | repeat: |
| 1812 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1818 | spin_lock(&ei->i_block_reservation_lock); |
| 1813 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | 1819 | md_reserved = ei->i_reserved_meta_blocks; |
| 1814 | mdblocks = ext4_calc_metadata_amount(inode, total); | 1820 | md_needed = ext4_calc_metadata_amount(inode, nrblocks); |
| 1815 | BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1816 | |||
| 1817 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | ||
| 1818 | total = md_needed + nrblocks; | 1821 | total = md_needed + nrblocks; |
| 1819 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1822 | spin_unlock(&ei->i_block_reservation_lock); |
| 1820 | 1823 | ||
| 1821 | /* | 1824 | /* |
| 1822 | * Make quota reservation here to prevent quota overflow | 1825 | * Make quota reservation here to prevent quota overflow |
| 1823 | * later. Real quota accounting is done at pages writeout | 1826 | * later. Real quota accounting is done at pages writeout |
| 1824 | * time. | 1827 | * time. |
| 1825 | */ | 1828 | */ |
| 1826 | if (vfs_dq_reserve_block(inode, total)) | 1829 | if (vfs_dq_reserve_block(inode, total)) { |
| 1830 | /* | ||
| 1831 | * We tend to badly over-estimate the amount of | ||
| 1832 | * metadata blocks which are needed, so if we have | ||
| 1833 | * reserved any metadata blocks, try to force out the | ||
| 1834 | * inode and see if we have any better luck. | ||
| 1835 | */ | ||
| 1836 | if (md_reserved && retries++ <= 3) | ||
| 1837 | goto retry; | ||
| 1827 | return -EDQUOT; | 1838 | return -EDQUOT; |
| 1839 | } | ||
| 1828 | 1840 | ||
| 1829 | if (ext4_claim_free_blocks(sbi, total)) { | 1841 | if (ext4_claim_free_blocks(sbi, total)) { |
| 1830 | vfs_dq_release_reservation_block(inode, total); | 1842 | vfs_dq_release_reservation_block(inode, total); |
| 1831 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1843 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
| 1844 | retry: | ||
| 1845 | if (md_reserved) | ||
| 1846 | write_inode_now(inode, (retries == 3)); | ||
| 1832 | yield(); | 1847 | yield(); |
| 1833 | goto repeat; | 1848 | goto repeat; |
| 1834 | } | 1849 | } |
| 1835 | return -ENOSPC; | 1850 | return -ENOSPC; |
| 1836 | } | 1851 | } |
| 1837 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1852 | spin_lock(&ei->i_block_reservation_lock); |
| 1838 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1853 | ei->i_reserved_data_blocks += nrblocks; |
| 1839 | EXT4_I(inode)->i_reserved_meta_blocks += md_needed; | 1854 | ei->i_reserved_meta_blocks += md_needed; |
| 1840 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1855 | spin_unlock(&ei->i_block_reservation_lock); |
| 1841 | 1856 | ||
| 1842 | return 0; /* success */ | 1857 | return 0; /* success */ |
| 1843 | } | 1858 | } |
| @@ -1845,49 +1860,45 @@ repeat: | |||
| 1845 | static void ext4_da_release_space(struct inode *inode, int to_free) | 1860 | static void ext4_da_release_space(struct inode *inode, int to_free) |
| 1846 | { | 1861 | { |
| 1847 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1862 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 1848 | int total, mdb, mdb_free, release; | 1863 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 1849 | 1864 | ||
| 1850 | if (!to_free) | 1865 | if (!to_free) |
| 1851 | return; /* Nothing to release, exit */ | 1866 | return; /* Nothing to release, exit */ |
| 1852 | 1867 | ||
| 1853 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1868 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1854 | 1869 | ||
| 1855 | if (!EXT4_I(inode)->i_reserved_data_blocks) { | 1870 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { |
| 1856 | /* | 1871 | /* |
| 1857 | * if there is no reserved blocks, but we try to free some | 1872 | * if there aren't enough reserved blocks, then the |
| 1858 | * then the counter is messed up somewhere. | 1873 | * counter is messed up somewhere. Since this |
| 1859 | * but since this function is called from invalidate | 1874 | * function is called from invalidate page, it's |
| 1860 | * page, it's harmless to return without any action | 1875 | * harmless to return without any action. |
| 1861 | */ | 1876 | */ |
| 1862 | printk(KERN_INFO "ext4 delalloc try to release %d reserved " | 1877 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " |
| 1863 | "blocks for inode %lu, but there is no reserved " | 1878 | "ino %lu, to_free %d with only %d reserved " |
| 1864 | "data blocks\n", to_free, inode->i_ino); | 1879 | "data blocks\n", inode->i_ino, to_free, |
| 1865 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1880 | ei->i_reserved_data_blocks); |
| 1866 | return; | 1881 | WARN_ON(1); |
| 1882 | to_free = ei->i_reserved_data_blocks; | ||
| 1867 | } | 1883 | } |
| 1884 | ei->i_reserved_data_blocks -= to_free; | ||
| 1868 | 1885 | ||
| 1869 | /* recalculate the number of metablocks still need to be reserved */ | 1886 | if (ei->i_reserved_data_blocks == 0) { |
| 1870 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; | 1887 | /* |
| 1871 | mdb = ext4_calc_metadata_amount(inode, total); | 1888 | * We can release all of the reserved metadata blocks |
| 1872 | 1889 | * only when we have written all of the delayed | |
| 1873 | /* figure out how many metablocks to release */ | 1890 | * allocation blocks. |
| 1874 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1891 | */ |
| 1875 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1892 | to_free += ei->i_allocated_meta_blocks; |
| 1876 | 1893 | ei->i_allocated_meta_blocks = 0; | |
| 1877 | release = to_free + mdb_free; | 1894 | } |
| 1878 | |||
| 1879 | /* update fs dirty blocks counter for truncate case */ | ||
| 1880 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); | ||
| 1881 | 1895 | ||
| 1882 | /* update per-inode reservations */ | 1896 | /* update fs dirty blocks counter */ |
| 1883 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); | 1897 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); |
| 1884 | EXT4_I(inode)->i_reserved_data_blocks -= to_free; | ||
| 1885 | 1898 | ||
| 1886 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1887 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
| 1888 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1899 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1889 | 1900 | ||
| 1890 | vfs_dq_release_reservation_block(inode, release); | 1901 | vfs_dq_release_reservation_block(inode, to_free); |
| 1891 | } | 1902 | } |
| 1892 | 1903 | ||
| 1893 | static void ext4_da_page_release_reservation(struct page *page, | 1904 | static void ext4_da_page_release_reservation(struct page *page, |
| @@ -2967,8 +2978,7 @@ retry: | |||
| 2967 | out_writepages: | 2978 | out_writepages: |
| 2968 | if (!no_nrwrite_index_update) | 2979 | if (!no_nrwrite_index_update) |
| 2969 | wbc->no_nrwrite_index_update = 0; | 2980 | wbc->no_nrwrite_index_update = 0; |
| 2970 | if (wbc->nr_to_write > nr_to_writebump) | 2981 | wbc->nr_to_write -= nr_to_writebump; |
| 2971 | wbc->nr_to_write -= nr_to_writebump; | ||
| 2972 | wbc->range_start = range_start; | 2982 | wbc->range_start = range_start; |
| 2973 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 2983 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
| 2974 | return ret; | 2984 | return ret; |
| @@ -2993,11 +3003,18 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
| 2993 | if (2 * free_blocks < 3 * dirty_blocks || | 3003 | if (2 * free_blocks < 3 * dirty_blocks || |
| 2994 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | 3004 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { |
| 2995 | /* | 3005 | /* |
| 2996 | * free block count is less that 150% of dirty blocks | 3006 | * free block count is less than 150% of dirty blocks |
| 2997 | * or free blocks is less that watermark | 3007 | * or free blocks is less than watermark |
| 2998 | */ | 3008 | */ |
| 2999 | return 1; | 3009 | return 1; |
| 3000 | } | 3010 | } |
| 3011 | /* | ||
| 3012 | * Even if we don't switch but are nearing capacity, | ||
| 3013 | * start pushing delalloc when 1/2 of free blocks are dirty. | ||
| 3014 | */ | ||
| 3015 | if (free_blocks < 2 * dirty_blocks) | ||
| 3016 | writeback_inodes_sb_if_idle(sb); | ||
| 3017 | |||
| 3001 | return 0; | 3018 | return 0; |
| 3002 | } | 3019 | } |
| 3003 | 3020 | ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 0ca811061bc7..436521cae456 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include <linux/proc_fs.h> | 17 | #include <linux/proc_fs.h> |
| 18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
| 19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
| 20 | #include <linux/version.h> | ||
| 21 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
| 22 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
| 23 | #include "ext4_jbd2.h" | 22 | #include "ext4_jbd2.h" |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6ed9aa91f27d..7cccb35c0f4d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -2174,9 +2174,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, | |||
| 2174 | struct super_block *sb = sbi->s_buddy_cache->i_sb; | 2174 | struct super_block *sb = sbi->s_buddy_cache->i_sb; |
| 2175 | 2175 | ||
| 2176 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2176 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
| 2177 | sbi->s_kbytes_written + | 2177 | (unsigned long long)(sbi->s_kbytes_written + |
| 2178 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 2178 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
| 2179 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); | 2179 | EXT4_SB(sb)->s_sectors_written_start) >> 1))); |
| 2180 | } | 2180 | } |
| 2181 | 2181 | ||
| 2182 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | 2182 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, |
| @@ -4005,6 +4005,7 @@ static inline void unregister_as_ext2(void) | |||
| 4005 | { | 4005 | { |
| 4006 | unregister_filesystem(&ext2_fs_type); | 4006 | unregister_filesystem(&ext2_fs_type); |
| 4007 | } | 4007 | } |
| 4008 | MODULE_ALIAS("ext2"); | ||
| 4008 | #else | 4009 | #else |
| 4009 | static inline void register_as_ext2(void) { } | 4010 | static inline void register_as_ext2(void) { } |
| 4010 | static inline void unregister_as_ext2(void) { } | 4011 | static inline void unregister_as_ext2(void) { } |
| @@ -4031,6 +4032,7 @@ static inline void unregister_as_ext3(void) | |||
| 4031 | { | 4032 | { |
| 4032 | unregister_filesystem(&ext3_fs_type); | 4033 | unregister_filesystem(&ext3_fs_type); |
| 4033 | } | 4034 | } |
| 4035 | MODULE_ALIAS("ext3"); | ||
| 4034 | #else | 4036 | #else |
| 4035 | static inline void register_as_ext3(void) { } | 4037 | static inline void register_as_ext3(void) { } |
| 4036 | static inline void unregister_as_ext3(void) { } | 4038 | static inline void unregister_as_ext3(void) { } |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 83218bebbc7c..f3a2f7ed45aa 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
| @@ -1332,6 +1332,8 @@ retry: | |||
| 1332 | goto cleanup; | 1332 | goto cleanup; |
| 1333 | kfree(b_entry_name); | 1333 | kfree(b_entry_name); |
| 1334 | kfree(buffer); | 1334 | kfree(buffer); |
| 1335 | b_entry_name = NULL; | ||
| 1336 | buffer = NULL; | ||
| 1335 | brelse(is->iloc.bh); | 1337 | brelse(is->iloc.bh); |
| 1336 | kfree(is); | 1338 | kfree(is); |
| 1337 | kfree(bs); | 1339 | kfree(bs); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 49bc1b8e8f19..f6c2155e0026 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -1187,6 +1187,23 @@ void writeback_inodes_sb(struct super_block *sb) | |||
| 1187 | EXPORT_SYMBOL(writeback_inodes_sb); | 1187 | EXPORT_SYMBOL(writeback_inodes_sb); |
| 1188 | 1188 | ||
| 1189 | /** | 1189 | /** |
| 1190 | * writeback_inodes_sb_if_idle - start writeback if none underway | ||
| 1191 | * @sb: the superblock | ||
| 1192 | * | ||
| 1193 | * Invoke writeback_inodes_sb if no writeback is currently underway. | ||
| 1194 | * Returns 1 if writeback was started, 0 if not. | ||
| 1195 | */ | ||
| 1196 | int writeback_inodes_sb_if_idle(struct super_block *sb) | ||
| 1197 | { | ||
| 1198 | if (!writeback_in_progress(sb->s_bdi)) { | ||
| 1199 | writeback_inodes_sb(sb); | ||
| 1200 | return 1; | ||
| 1201 | } else | ||
| 1202 | return 0; | ||
| 1203 | } | ||
| 1204 | EXPORT_SYMBOL(writeback_inodes_sb_if_idle); | ||
| 1205 | |||
| 1206 | /** | ||
| 1190 | * sync_inodes_sb - sync sb inode pages | 1207 | * sync_inodes_sb - sync sb inode pages |
| 1191 | * @sb: the superblock | 1208 | * @sb: the superblock |
| 1192 | * | 1209 | * |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index ca0f5eb62b20..886849370950 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
| 23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
| 24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
| 25 | #include <linux/blkdev.h> | ||
| 25 | #include <trace/events/jbd2.h> | 26 | #include <trace/events/jbd2.h> |
| 26 | 27 | ||
| 27 | /* | 28 | /* |
| @@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
| 515 | journal->j_tail_sequence = first_tid; | 516 | journal->j_tail_sequence = first_tid; |
| 516 | journal->j_tail = blocknr; | 517 | journal->j_tail = blocknr; |
| 517 | spin_unlock(&journal->j_state_lock); | 518 | spin_unlock(&journal->j_state_lock); |
| 519 | |||
| 520 | /* | ||
| 521 | * If there is an external journal, we need to make sure that | ||
| 522 | * any data blocks that were recently written out --- perhaps | ||
| 523 | * by jbd2_log_do_checkpoint() --- are flushed out before we | ||
| 524 | * drop the transactions from the external journal. It's | ||
| 525 | * unlikely this will be necessary, especially with a | ||
| 526 | * appropriately sized journal, but we need this to guarantee | ||
| 527 | * correctness. Fortunately jbd2_cleanup_journal_tail() | ||
| 528 | * doesn't get called all that often. | ||
| 529 | */ | ||
| 530 | if ((journal->j_fs_dev != journal->j_dev) && | ||
| 531 | (journal->j_flags & JBD2_BARRIER)) | ||
| 532 | blkdev_issue_flush(journal->j_fs_dev, NULL); | ||
| 518 | if (!(journal->j_flags & JBD2_ABORT)) | 533 | if (!(journal->j_flags & JBD2_ABORT)) |
| 519 | jbd2_journal_update_superblock(journal, 1); | 534 | jbd2_journal_update_superblock(journal, 1); |
| 520 | return 0; | 535 | return 0; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6a10238d2c63..1bc74b6f26d2 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal, | |||
| 259 | ret = err; | 259 | ret = err; |
| 260 | spin_lock(&journal->j_list_lock); | 260 | spin_lock(&journal->j_list_lock); |
| 261 | J_ASSERT(jinode->i_transaction == commit_transaction); | 261 | J_ASSERT(jinode->i_transaction == commit_transaction); |
| 262 | commit_transaction->t_flushed_data_blocks = 1; | ||
| 262 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | 263 | jinode->i_flags &= ~JI_COMMIT_RUNNING; |
| 263 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | 264 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); |
| 264 | } | 265 | } |
| @@ -708,8 +709,17 @@ start_journal_io: | |||
| 708 | } | 709 | } |
| 709 | } | 710 | } |
| 710 | 711 | ||
| 711 | /* Done it all: now write the commit record asynchronously. */ | 712 | /* |
| 713 | * If the journal is not located on the file system device, | ||
| 714 | * then we must flush the file system device before we issue | ||
| 715 | * the commit record | ||
| 716 | */ | ||
| 717 | if (commit_transaction->t_flushed_data_blocks && | ||
| 718 | (journal->j_fs_dev != journal->j_dev) && | ||
| 719 | (journal->j_flags & JBD2_BARRIER)) | ||
| 720 | blkdev_issue_flush(journal->j_fs_dev, NULL); | ||
| 712 | 721 | ||
| 722 | /* Done it all: now write the commit record asynchronously. */ | ||
| 713 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 723 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
| 714 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 724 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
| 715 | err = journal_submit_commit_record(journal, commit_transaction, | 725 | err = journal_submit_commit_record(journal, commit_transaction, |
| @@ -720,13 +730,6 @@ start_journal_io: | |||
| 720 | blkdev_issue_flush(journal->j_dev, NULL); | 730 | blkdev_issue_flush(journal->j_dev, NULL); |
| 721 | } | 731 | } |
| 722 | 732 | ||
| 723 | /* | ||
| 724 | * This is the right place to wait for data buffers both for ASYNC | ||
| 725 | * and !ASYNC commit. If commit is ASYNC, we need to wait only after | ||
| 726 | * the commit block went to disk (which happens above). If commit is | ||
| 727 | * SYNC, we need to wait for data buffers before we start writing | ||
| 728 | * commit block, which happens below in such setting. | ||
| 729 | */ | ||
| 730 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | 733 | err = journal_finish_inode_data_buffers(journal, commit_transaction); |
| 731 | if (err) { | 734 | if (err) { |
| 732 | printk(KERN_WARNING | 735 | printk(KERN_WARNING |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 17af879e6e9e..ac0d027595d0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -814,7 +814,7 @@ static journal_t * journal_init_common (void) | |||
| 814 | journal_t *journal; | 814 | journal_t *journal; |
| 815 | int err; | 815 | int err; |
| 816 | 816 | ||
| 817 | journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); | 817 | journal = kzalloc(sizeof(*journal), GFP_KERNEL); |
| 818 | if (!journal) | 818 | if (!journal) |
| 819 | goto fail; | 819 | goto fail; |
| 820 | 820 | ||
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f1011f7f3d41..638ce4554c76 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
| @@ -653,6 +653,7 @@ struct transaction_s | |||
| 653 | * waiting for it to finish. | 653 | * waiting for it to finish. |
| 654 | */ | 654 | */ |
| 655 | unsigned int t_synchronous_commit:1; | 655 | unsigned int t_synchronous_commit:1; |
| 656 | unsigned int t_flushed_data_blocks:1; | ||
| 656 | 657 | ||
| 657 | /* | 658 | /* |
| 658 | * For use by the filesystem to store fs-specific data | 659 | * For use by the filesystem to store fs-specific data |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c18c008f4bbf..76e8903cd204 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
| @@ -70,6 +70,7 @@ struct writeback_control { | |||
| 70 | struct bdi_writeback; | 70 | struct bdi_writeback; |
| 71 | int inode_wait(void *); | 71 | int inode_wait(void *); |
| 72 | void writeback_inodes_sb(struct super_block *); | 72 | void writeback_inodes_sb(struct super_block *); |
| 73 | int writeback_inodes_sb_if_idle(struct super_block *); | ||
| 73 | void sync_inodes_sb(struct super_block *); | 74 | void sync_inodes_sb(struct super_block *); |
| 74 | void writeback_inodes_wbc(struct writeback_control *wbc); | 75 | void writeback_inodes_wbc(struct writeback_control *wbc); |
| 75 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); | 76 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); |
