aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/ext4.txt2
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/extents.c28
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inode.c171
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c6
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fs-writeback.c17
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--include/linux/jbd2.h1
-rw-r--r--include/linux/writeback.h1
15 files changed, 189 insertions, 94 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index af6885c3c821..e1def1786e50 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -196,7 +196,7 @@ nobarrier This also requires an IO stack which can support
196 also be used to enable or disable barriers, for 196 also be used to enable or disable barriers, for
197 consistency with other ext4 mount options. 197 consistency with other ext4 mount options.
198 198
199inode_readahead=n This tuning parameter controls the maximum 199inode_readahead_blks=n This tuning parameter controls the maximum
200 number of inode table blocks that ext4's inode 200 number of inode table blocks that ext4's inode
201 table readahead algorithm will pre-read into 201 table readahead algorithm will pre-read into
202 the buffer cache. The default value is 32 blocks. 202 the buffer cache. The default value is 32 blocks.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9acf7e808139..9ed1bb1f319f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -28,6 +28,7 @@ config EXT4_FS
28 28
29config EXT4_USE_FOR_EXT23 29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems" 30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
31 depends on EXT3_FS=n || EXT2_FS=n 32 depends on EXT3_FS=n || EXT2_FS=n
32 default y 33 default y
33 help 34 help
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4df8621ec31c..a60ab9aad57d 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include "ext4.h" 21#include "ext4.h"
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e4..91ae46098ea4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3023,6 +3023,14 @@ out:
3023 return err; 3023 return err;
3024} 3024}
3025 3025
3026static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3027 sector_t block, int count)
3028{
3029 int i;
3030 for (i = 0; i < count; i++)
3031 unmap_underlying_metadata(bdev, block + i);
3032}
3033
3026static int 3034static int
3027ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3035ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3028 ext4_lblk_t iblock, unsigned int max_blocks, 3036 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3098,6 +3106,18 @@ out:
3098 } else 3106 } else
3099 allocated = ret; 3107 allocated = ret;
3100 set_buffer_new(bh_result); 3108 set_buffer_new(bh_result);
3109 /*
3110 * if we allocated more blocks than requested
3111 * we need to make sure we unmap the extra block
3112 * allocated. The actual needed block will get
3113 * unmapped later when we find the buffer_head marked
3114 * new.
3115 */
3116 if (allocated > max_blocks) {
3117 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3118 newblock + max_blocks,
3119 allocated - max_blocks);
3120 }
3101map_out: 3121map_out:
3102 set_buffer_mapped(bh_result); 3122 set_buffer_mapped(bh_result);
3103out1: 3123out1:
@@ -3190,7 +3210,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3210 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3211 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3212 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3213 if (path[depth].p_ext == NULL && depth != 0) {
3214 ext4_error(inode->i_sb, __func__, "bad extent address "
3215 "inode: %lu, iblock: %d, depth: %d",
3216 inode->i_ino, iblock, depth);
3217 err = -EIO;
3218 goto out2;
3219 }
3194 eh = path[depth].p_hdr; 3220 eh = path[depth].p_hdr;
3195 3221
3196 ex = path[depth].p_ext; 3222 ex = path[depth].p_ext;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
88 return ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
89 89
90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
91 if (jbd2_log_start_commit(journal, commit_tid)) 91 if (jbd2_log_start_commit(journal, commit_tid)) {
92 /*
93 * When the journal is on a different device than the
94 * fs data disk, we need to issue the barrier in
95 * writeback mode. (In ordered mode, the jbd2 layer
96 * will take care of issuing the barrier. In
97 * data=journal, all of the data blocks are written to
98 * the journal device.)
99 */
100 if (ext4_should_writeback_data(inode) &&
101 (journal->j_fs_dev != journal->j_dev) &&
102 (journal->j_flags & JBD2_BARRIER))
103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
92 jbd2_log_wait_commit(journal, commit_tid); 104 jbd2_log_wait_commit(journal, commit_tid);
93 else if (journal->j_flags & JBD2_BARRIER) 105 } else if (journal->j_flags & JBD2_BARRIER)
94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
95 return ret; 107 return ret;
96} 108}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab807963a614..84eeb8f515a3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1043,43 +1043,47 @@ static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1043 return ext4_indirect_calc_metadata_amount(inode, blocks); 1043 return ext4_indirect_calc_metadata_amount(inode, blocks);
1044} 1044}
1045 1045
1046/*
1047 * Called with i_data_sem down, which is important since we can call
1048 * ext4_discard_preallocations() from here.
1049 */
1046static void ext4_da_update_reserve_space(struct inode *inode, int used) 1050static void ext4_da_update_reserve_space(struct inode *inode, int used)
1047{ 1051{
1048 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1052 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1049 int total, mdb, mdb_free, mdb_claim = 0; 1053 struct ext4_inode_info *ei = EXT4_I(inode);
1050 1054 int mdb_free = 0;
1051 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1055
1052 /* recalculate the number of metablocks still need to be reserved */ 1056 spin_lock(&ei->i_block_reservation_lock);
1053 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1057 if (unlikely(used > ei->i_reserved_data_blocks)) {
1054 mdb = ext4_calc_metadata_amount(inode, total); 1058 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1055 1059 "with only %d reserved data blocks\n",
1056 /* figure out how many metablocks to release */ 1060 __func__, inode->i_ino, used,
1057 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1061 ei->i_reserved_data_blocks);
1058 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1062 WARN_ON(1);
1059 1063 used = ei->i_reserved_data_blocks;
1060 if (mdb_free) { 1064 }
1061 /* Account for allocated meta_blocks */ 1065
1062 mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; 1066 /* Update per-inode reservations */
1063 BUG_ON(mdb_free < mdb_claim); 1067 ei->i_reserved_data_blocks -= used;
1064 mdb_free -= mdb_claim; 1068 used += ei->i_allocated_meta_blocks;
1065 1069 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1066 /* update fs dirty blocks counter */ 1070 ei->i_allocated_meta_blocks = 0;
1071 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1072
1073 if (ei->i_reserved_data_blocks == 0) {
1074 /*
1075 * We can release all of the reserved metadata blocks
1076 * only when we have written all of the delayed
1077 * allocation blocks.
1078 */
1079 mdb_free = ei->i_allocated_meta_blocks;
1067 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1080 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1068 EXT4_I(inode)->i_allocated_meta_blocks = 0; 1081 ei->i_allocated_meta_blocks = 0;
1069 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1070 } 1082 }
1071
1072 /* update per-inode reservations */
1073 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1074 EXT4_I(inode)->i_reserved_data_blocks -= used;
1075 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
1076 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1083 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1077 1084
1078 vfs_dq_claim_block(inode, used + mdb_claim); 1085 /* Update quota subsystem */
1079 1086 vfs_dq_claim_block(inode, used);
1080 /*
1081 * free those over-booking quota for metadata blocks
1082 */
1083 if (mdb_free) 1087 if (mdb_free)
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1088 vfs_dq_release_reservation_block(inode, mdb_free);
1085 1089
@@ -1088,7 +1092,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1088 * there aren't any writers on the inode, we can discard the 1092 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1093 * inode's preallocations.
1090 */ 1094 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1095 if ((ei->i_reserved_data_blocks == 0) &&
1096 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1097 ext4_discard_preallocations(inode);
1093} 1098}
1094 1099
@@ -1801,7 +1806,8 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1801{ 1806{
1802 int retries = 0; 1807 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1808 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1809 struct ext4_inode_info *ei = EXT4_I(inode);
1810 unsigned long md_needed, md_reserved, total = 0;
1805 1811
1806 /* 1812 /*
1807 * recalculate the amount of metadata blocks to reserve 1813 * recalculate the amount of metadata blocks to reserve
@@ -1809,35 +1815,44 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1815 * worse case is one extent per block
1810 */ 1816 */
1811repeat: 1817repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1818 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1819 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1820 md_needed = ext4_calc_metadata_amount(inode, nrblocks);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks; 1821 total = md_needed + nrblocks;
1819 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1822 spin_unlock(&ei->i_block_reservation_lock);
1820 1823
1821 /* 1824 /*
1822 * Make quota reservation here to prevent quota overflow 1825 * Make quota reservation here to prevent quota overflow
1823 * later. Real quota accounting is done at pages writeout 1826 * later. Real quota accounting is done at pages writeout
1824 * time. 1827 * time.
1825 */ 1828 */
1826 if (vfs_dq_reserve_block(inode, total)) 1829 if (vfs_dq_reserve_block(inode, total)) {
1830 /*
1831 * We tend to badly over-estimate the amount of
1832 * metadata blocks which are needed, so if we have
1833 * reserved any metadata blocks, try to force out the
1834 * inode and see if we have any better luck.
1835 */
1836 if (md_reserved && retries++ <= 3)
1837 goto retry;
1827 return -EDQUOT; 1838 return -EDQUOT;
1839 }
1828 1840
1829 if (ext4_claim_free_blocks(sbi, total)) { 1841 if (ext4_claim_free_blocks(sbi, total)) {
1830 vfs_dq_release_reservation_block(inode, total); 1842 vfs_dq_release_reservation_block(inode, total);
1831 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1843 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1844 retry:
1845 if (md_reserved)
1846 write_inode_now(inode, (retries == 3));
1832 yield(); 1847 yield();
1833 goto repeat; 1848 goto repeat;
1834 } 1849 }
1835 return -ENOSPC; 1850 return -ENOSPC;
1836 } 1851 }
1837 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1852 spin_lock(&ei->i_block_reservation_lock);
1838 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1853 ei->i_reserved_data_blocks += nrblocks;
1839 EXT4_I(inode)->i_reserved_meta_blocks += md_needed; 1854 ei->i_reserved_meta_blocks += md_needed;
1840 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1855 spin_unlock(&ei->i_block_reservation_lock);
1841 1856
1842 return 0; /* success */ 1857 return 0; /* success */
1843} 1858}
@@ -1845,49 +1860,45 @@ repeat:
1845static void ext4_da_release_space(struct inode *inode, int to_free) 1860static void ext4_da_release_space(struct inode *inode, int to_free)
1846{ 1861{
1847 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1862 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1848 int total, mdb, mdb_free, release; 1863 struct ext4_inode_info *ei = EXT4_I(inode);
1849 1864
1850 if (!to_free) 1865 if (!to_free)
1851 return; /* Nothing to release, exit */ 1866 return; /* Nothing to release, exit */
1852 1867
1853 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1868 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1854 1869
1855 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1870 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1856 /* 1871 /*
1857 * if there is no reserved blocks, but we try to free some 1872 * if there aren't enough reserved blocks, then the
1858 * then the counter is messed up somewhere. 1873 * counter is messed up somewhere. Since this
1859 * but since this function is called from invalidate 1874 * function is called from invalidate page, it's
1860 * page, it's harmless to return without any action 1875 * harmless to return without any action.
1861 */ 1876 */
1862 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1877 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1863 "blocks for inode %lu, but there is no reserved " 1878 "ino %lu, to_free %d with only %d reserved "
1864 "data blocks\n", to_free, inode->i_ino); 1879 "data blocks\n", inode->i_ino, to_free,
1865 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1880 ei->i_reserved_data_blocks);
1866 return; 1881 WARN_ON(1);
1882 to_free = ei->i_reserved_data_blocks;
1867 } 1883 }
1884 ei->i_reserved_data_blocks -= to_free;
1868 1885
1869 /* recalculate the number of metablocks still need to be reserved */ 1886 if (ei->i_reserved_data_blocks == 0) {
1870 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1887 /*
1871 mdb = ext4_calc_metadata_amount(inode, total); 1888 * We can release all of the reserved metadata blocks
1872 1889 * only when we have written all of the delayed
1873 /* figure out how many metablocks to release */ 1890 * allocation blocks.
1874 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1891 */
1875 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1892 to_free += ei->i_allocated_meta_blocks;
1876 1893 ei->i_allocated_meta_blocks = 0;
1877 release = to_free + mdb_free; 1894 }
1878
1879 /* update fs dirty blocks counter for truncate case */
1880 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1881 1895
1882 /* update per-inode reservations */ 1896 /* update fs dirty blocks counter */
1883 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1897 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1884 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1885 1898
1886 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1887 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1888 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1899 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1889 1900
1890 vfs_dq_release_reservation_block(inode, release); 1901 vfs_dq_release_reservation_block(inode, to_free);
1891} 1902}
1892 1903
1893static void ext4_da_page_release_reservation(struct page *page, 1904static void ext4_da_page_release_reservation(struct page *page,
@@ -2967,8 +2978,7 @@ retry:
2967out_writepages: 2978out_writepages:
2968 if (!no_nrwrite_index_update) 2979 if (!no_nrwrite_index_update)
2969 wbc->no_nrwrite_index_update = 0; 2980 wbc->no_nrwrite_index_update = 0;
2970 if (wbc->nr_to_write > nr_to_writebump) 2981 wbc->nr_to_write -= nr_to_writebump;
2971 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->range_start = range_start; 2982 wbc->range_start = range_start;
2973 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2983 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2974 return ret; 2984 return ret;
@@ -2993,11 +3003,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2993 if (2 * free_blocks < 3 * dirty_blocks || 3003 if (2 * free_blocks < 3 * dirty_blocks ||
2994 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3004 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2995 /* 3005 /*
2996 * free block count is less that 150% of dirty blocks 3006 * free block count is less than 150% of dirty blocks
2997 * or free blocks is less that watermark 3007 * or free blocks is less than watermark
2998 */ 3008 */
2999 return 1; 3009 return 1;
3000 } 3010 }
3011 /*
3012 * Even if we don't switch but are nearing capacity,
3013 * start pushing delalloc when 1/2 of free blocks are dirty.
3014 */
3015 if (free_blocks < 2 * dirty_blocks)
3016 writeback_inodes_sb_if_idle(sb);
3017
3001 return 0; 3018 return 0;
3002} 3019}
3003 3020
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc7..436521cae456 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6ed9aa91f27d..7cccb35c0f4d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2174,9 +2174,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2174 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2174 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2175 2175
2176 return snprintf(buf, PAGE_SIZE, "%llu\n", 2176 return snprintf(buf, PAGE_SIZE, "%llu\n",
2177 sbi->s_kbytes_written + 2177 (unsigned long long)(sbi->s_kbytes_written +
2178 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2178 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2179 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2179 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2180} 2180}
2181 2181
2182static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2182static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -4005,6 +4005,7 @@ static inline void unregister_as_ext2(void)
4005{ 4005{
4006 unregister_filesystem(&ext2_fs_type); 4006 unregister_filesystem(&ext2_fs_type);
4007} 4007}
4008MODULE_ALIAS("ext2");
4008#else 4009#else
4009static inline void register_as_ext2(void) { } 4010static inline void register_as_ext2(void) { }
4010static inline void unregister_as_ext2(void) { } 4011static inline void unregister_as_ext2(void) { }
@@ -4031,6 +4032,7 @@ static inline void unregister_as_ext3(void)
4031{ 4032{
4032 unregister_filesystem(&ext3_fs_type); 4033 unregister_filesystem(&ext3_fs_type);
4033} 4034}
4035MODULE_ALIAS("ext3");
4034#else 4036#else
4035static inline void register_as_ext3(void) { } 4037static inline void register_as_ext3(void) { }
4036static inline void unregister_as_ext3(void) { } 4038static inline void unregister_as_ext3(void) { }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 83218bebbc7c..f3a2f7ed45aa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1332,6 +1332,8 @@ retry:
1332 goto cleanup; 1332 goto cleanup;
1333 kfree(b_entry_name); 1333 kfree(b_entry_name);
1334 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1335 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1336 kfree(is); 1338 kfree(is);
1337 kfree(bs); 1339 kfree(bs);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..f6c2155e0026 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1187,6 +1187,23 @@ void writeback_inodes_sb(struct super_block *sb)
1187EXPORT_SYMBOL(writeback_inodes_sb); 1187EXPORT_SYMBOL(writeback_inodes_sb);
1188 1188
1189/** 1189/**
1190 * writeback_inodes_sb_if_idle - start writeback if none underway
1191 * @sb: the superblock
1192 *
1193 * Invoke writeback_inodes_sb if no writeback is currently underway.
1194 * Returns 1 if writeback was started, 0 if not.
1195 */
1196int writeback_inodes_sb_if_idle(struct super_block *sb)
1197{
1198 if (!writeback_in_progress(sb->s_bdi)) {
1199 writeback_inodes_sb(sb);
1200 return 1;
1201 } else
1202 return 0;
1203}
1204EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1205
1206/**
1190 * sync_inodes_sb - sync sb inode pages 1207 * sync_inodes_sb - sync sb inode pages
1191 * @sb: the superblock 1208 * @sb: the superblock
1192 * 1209 *
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/blkdev.h>
25#include <trace/events/jbd2.h> 26#include <trace/events/jbd2.h>
26 27
27/* 28/*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
515 journal->j_tail_sequence = first_tid; 516 journal->j_tail_sequence = first_tid;
516 journal->j_tail = blocknr; 517 journal->j_tail = blocknr;
517 spin_unlock(&journal->j_state_lock); 518 spin_unlock(&journal->j_state_lock);
519
520 /*
521 * If there is an external journal, we need to make sure that
522 * any data blocks that were recently written out --- perhaps
523 * by jbd2_log_do_checkpoint() --- are flushed out before we
524 * drop the transactions from the external journal. It's
525 * unlikely this will be necessary, especially with a
526 * appropriately sized journal, but we need this to guarantee
527 * correctness. Fortunately jbd2_cleanup_journal_tail()
528 * doesn't get called all that often.
529 */
530 if ((journal->j_fs_dev != journal->j_dev) &&
531 (journal->j_flags & JBD2_BARRIER))
532 blkdev_issue_flush(journal->j_fs_dev, NULL);
518 if (!(journal->j_flags & JBD2_ABORT)) 533 if (!(journal->j_flags & JBD2_ABORT))
519 jbd2_journal_update_superblock(journal, 1); 534 jbd2_journal_update_superblock(journal, 1);
520 return 0; 535 return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
259 ret = err; 259 ret = err;
260 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
261 J_ASSERT(jinode->i_transaction == commit_transaction); 261 J_ASSERT(jinode->i_transaction == commit_transaction);
262 commit_transaction->t_flushed_data_blocks = 1;
262 jinode->i_flags &= ~JI_COMMIT_RUNNING; 263 jinode->i_flags &= ~JI_COMMIT_RUNNING;
263 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 264 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
264 } 265 }
@@ -708,8 +709,17 @@ start_journal_io:
708 } 709 }
709 } 710 }
710 711
711 /* Done it all: now write the commit record asynchronously. */ 712 /*
713 * If the journal is not located on the file system device,
714 * then we must flush the file system device before we issue
715 * the commit record
716 */
717 if (commit_transaction->t_flushed_data_blocks &&
718 (journal->j_fs_dev != journal->j_dev) &&
719 (journal->j_flags & JBD2_BARRIER))
720 blkdev_issue_flush(journal->j_fs_dev, NULL);
712 721
722 /* Done it all: now write the commit record asynchronously. */
713 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 723 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
714 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 724 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
715 err = journal_submit_commit_record(journal, commit_transaction, 725 err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
720 blkdev_issue_flush(journal->j_dev, NULL); 730 blkdev_issue_flush(journal->j_dev, NULL);
721 } 731 }
722 732
723 /*
724 * This is the right place to wait for data buffers both for ASYNC
725 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
726 * the commit block went to disk (which happens above). If commit is
727 * SYNC, we need to wait for data buffers before we start writing
728 * commit block, which happens below in such setting.
729 */
730 err = journal_finish_inode_data_buffers(journal, commit_transaction); 733 err = journal_finish_inode_data_buffers(journal, commit_transaction);
731 if (err) { 734 if (err) {
732 printk(KERN_WARNING 735 printk(KERN_WARNING
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 17af879e6e9e..ac0d027595d0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -814,7 +814,7 @@ static journal_t * journal_init_common (void)
814 journal_t *journal; 814 journal_t *journal;
815 int err; 815 int err;
816 816
817 journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); 817 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
818 if (!journal) 818 if (!journal)
819 goto fail; 819 goto fail;
820 820
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f1011f7f3d41..638ce4554c76 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -653,6 +653,7 @@ struct transaction_s
653 * waiting for it to finish. 653 * waiting for it to finish.
654 */ 654 */
655 unsigned int t_synchronous_commit:1; 655 unsigned int t_synchronous_commit:1;
656 unsigned int t_flushed_data_blocks:1;
656 657
657 /* 658 /*
658 * For use by the filesystem to store fs-specific data 659 * For use by the filesystem to store fs-specific data
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c18c008f4bbf..76e8903cd204 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -70,6 +70,7 @@ struct writeback_control {
70struct bdi_writeback; 70struct bdi_writeback;
71int inode_wait(void *); 71int inode_wait(void *);
72void writeback_inodes_sb(struct super_block *); 72void writeback_inodes_sb(struct super_block *);
73int writeback_inodes_sb_if_idle(struct super_block *);
73void sync_inodes_sb(struct super_block *); 74void sync_inodes_sb(struct super_block *);
74void writeback_inodes_wbc(struct writeback_control *wbc); 75void writeback_inodes_wbc(struct writeback_control *wbc);
75long wb_do_writeback(struct bdi_writeback *wb, int force_wait); 76long wb_do_writeback(struct bdi_writeback *wb, int force_wait);