Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6: jbd: fix race between write_metadata_buffer and get_write_access ext3: Get rid of extenddisksize parameter of ext3_get_blocks_handle() jbd: Fix a race between checkpointing code and journal_get_write_access() ext3: Fix truncation of symlinks after failed write jbd: Fail to load a journal if it is too short
author: Linus Torvalds <torvalds@linux-foundation.org> 2009-07-27 15:12:10 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-07-27 15:12:10 -0400
commit: 2bc20d09b03bca6e068e07440812d75b70b1c0b2 (patch)
tree: 50711d4320e12efd1439639af8464b69a3b7af6a
parent: c7425eb4814bce40f3d117ff7a7870cc12e350e3 (diff)
parent: f1015c447781729060c415f5133164c638561f25 (diff)
5 files changed, 67 insertions, 64 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 3d724a95882f..373fa90c796a 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp,
                struct buffer_head *bh = NULL;
                map_bh.b_state = 0;
-                err = ext3_get_blocks_handle(NULL, inode, blk, 1,
+                err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
-                                                &map_bh, 0, 0);
                if (err > 0) {
                        pgoff_t index = map_bh.b_blocknr >>
                                        (PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 5f51fed5c750..b49908a167ae 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -788,7 +788,7 @@ err_out:
 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
                sector_t iblock, unsigned long maxblocks,
                struct buffer_head *bh_result,
-                int create, int extend_disksize)
+                int create)
 {
        int err = -EIO;
        int offsets[4];
@@ -911,13 +911,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
        if (!err)
                err = ext3_splice_branch(handle, inode, iblock,
                                        partial, indirect_blks, count);
-        /*
-         * i_disksize growing is protected by truncate_mutex.  Don't forget to
-         * protect it if you're about to implement concurrent
-         * ext3_get_block() -bzzz
-        */
-        if (!err && extend_disksize && inode->i_size > ei->i_disksize)
-                ei->i_disksize = inode->i_size;
        mutex_unlock(&ei->truncate_mutex);
        if (err)
                goto cleanup;
@@ -972,7 +965,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
        }
        ret = ext3_get_blocks_handle(handle, inode, iblock,
-                                        max_blocks, bh_result, create, 0);
+                                        max_blocks, bh_result, create);
        if (ret > 0) {
                bh_result->b_size = (ret << inode->i_blkbits);
                ret = 0;
@@ -1005,7 +998,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
        dummy.b_blocknr = -1000;
        buffer_trace_init(&dummy.b_history);
        err = ext3_get_blocks_handle(handle, inode, block, 1,
-                                        &dummy, create, 1);
+                                        &dummy, create);
        /*
         * ext3_get_blocks_handle() returns number of blocks
         * mapped. 0 in case of a HOLE.
@@ -1193,15 +1186,16 @@ write_begin_failed:
                 * i_size_read because we hold i_mutex.
                 *
                 * Add inode to orphan list in case we crash before truncate
-                 * finishes.
+                 * finishes. Do this only if ext3_can_truncate() agrees so
+                 * that orphan processing code is happy.
                 */
-                if (pos + len > inode->i_size)
+                if (pos + len > inode->i_size && ext3_can_truncate(inode))
                        ext3_orphan_add(handle, inode);
                ext3_journal_stop(handle);
                unlock_page(page);
                page_cache_release(page);
                if (pos + len > inode->i_size)
-                        vmtruncate(inode, inode->i_size);
+                        ext3_truncate(inode);
        }
        if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
@@ -1287,7 +1281,7 @@ static int ext3_ordered_write_end(struct file *file,
         * There may be allocated blocks outside of i_size because
         * we failed to copy some data. Prepare for truncate.
         */
-        if (pos + len > inode->i_size)
+        if (pos + len > inode->i_size && ext3_can_truncate(inode))
                ext3_orphan_add(handle, inode);
        ret2 = ext3_journal_stop(handle);
        if (!ret)
@@ -1296,7 +1290,7 @@ static int ext3_ordered_write_end(struct file *file,
        page_cache_release(page);
        if (pos + len > inode->i_size)
-                vmtruncate(inode, inode->i_size);
+                ext3_truncate(inode);
        return ret ? ret : copied;
 }
@@ -1315,14 +1309,14 @@ static int ext3_writeback_write_end(struct file *file,
         * There may be allocated blocks outside of i_size because
         * we failed to copy some data. Prepare for truncate.
         */
-        if (pos + len > inode->i_size)
+        if (pos + len > inode->i_size && ext3_can_truncate(inode))
                ext3_orphan_add(handle, inode);
        ret = ext3_journal_stop(handle);
        unlock_page(page);
        page_cache_release(page);
        if (pos + len > inode->i_size)
-                vmtruncate(inode, inode->i_size);
+                ext3_truncate(inode);
        return ret ? ret : copied;
 }
@@ -1358,7 +1352,7 @@ static int ext3_journalled_write_end(struct file *file,
         * There may be allocated blocks outside of i_size because
         * we failed to copy some data. Prepare for truncate.
         */
-        if (pos + len > inode->i_size)
+        if (pos + len > inode->i_size && ext3_can_truncate(inode))
                ext3_orphan_add(handle, inode);
        EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
        if (inode->i_size > EXT3_I(inode)->i_disksize) {
@@ -1375,7 +1369,7 @@ static int ext3_journalled_write_end(struct file *file,
        page_cache_release(page);
        if (pos + len > inode->i_size)
-                vmtruncate(inode, inode->i_size);
+                ext3_truncate(inode);
        return ret ? ret : copied;
 }
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 737f7246a4b5..f96f85092d1c 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -287,6 +287,7 @@ int journal_write_metadata_buffer(transaction_t *transaction,
        struct page *new_page;
        unsigned int new_offset;
        struct buffer_head *bh_in = jh2bh(jh_in);
+        journal_t *journal = transaction->t_journal;
        /*
         * The buffer really shouldn't be locked: only the current committing
@@ -300,6 +301,11 @@ int journal_write_metadata_buffer(transaction_t *transaction,
        J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
        new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+        /* keep subsequent assertions sane */
+        new_bh->b_state = 0;
+        init_buffer(new_bh, NULL, NULL);
+        atomic_set(&new_bh->b_count, 1);
+        new_jh = journal_add_journal_head(new_bh);      /* This sleeps */
        /*
         * If a new transaction has already done a buffer copy-out, then
@@ -361,14 +367,6 @@ repeat:
                kunmap_atomic(mapped_data, KM_USER0);
        }
-        /* keep subsequent assertions sane */
-        new_bh->b_state = 0;
-        init_buffer(new_bh, NULL, NULL);
-        atomic_set(&new_bh->b_count, 1);
-        jbd_unlock_bh_state(bh_in);
-        new_jh = journal_add_journal_head(new_bh);      /* This sleeps */
        set_bh_page(new_bh, new_page, new_offset);
        new_jh->b_transaction = NULL;
        new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -385,7 +383,11 @@ repeat:
         * copying is moved to the transaction's shadow queue.
         */
        JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
-        journal_file_buffer(jh_in, transaction, BJ_Shadow);
+        spin_lock(&journal->j_list_lock);
+        __journal_file_buffer(jh_in, transaction, BJ_Shadow);
+        spin_unlock(&journal->j_list_lock);
+        jbd_unlock_bh_state(bh_in);
        JBUFFER_TRACE(new_jh, "file as BJ_IO");
        journal_file_buffer(new_jh, transaction, BJ_IO);
@@ -848,6 +850,12 @@ static int journal_reset(journal_t *journal)
        first = be32_to_cpu(sb->s_first);
        last = be32_to_cpu(sb->s_maxlen);
+        if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
+                printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
+                       first, last);
+                journal_fail_superblock(journal);
+                return -EINVAL;
+        }
        journal->j_first = first;
        journal->j_last = last;
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 73242ba7c7b1..c03ac11f74be 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -489,34 +489,15 @@ void journal_unlock_updates (journal_t *journal)
        wake_up(&journal->j_wait_transaction_locked);
 }
-/*
+static void warn_dirty_buffer(struct buffer_head *bh)
- * Report any unexpected dirty buffers which turn up.  Normally those
- * indicate an error, but they can occur if the user is running (say)
- * tune2fs to modify the live filesystem, so we need the option of
- * continuing as gracefully as possible.  #
- *
- * The caller should already hold the journal lock and
- * j_list_lock spinlock: most callers will need those anyway
- * in order to probe the buffer's journaling state safely.
- */
-static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 {
-        int jlist;
+        char b[BDEVNAME_SIZE];
-        /* If this buffer is one which might reasonably be dirty
-         * --- ie. data, or not part of this journal --- then
-         * we're OK to leave it alone, but otherwise we need to
-         * move the dirty bit to the journal's own internal
-         * JBDDirty bit. */
-        jlist = jh->b_jlist;
-        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+        printk(KERN_WARNING
-            jlist == BJ_Shadow || jlist == BJ_Forget) {
+               "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
-                struct buffer_head *bh = jh2bh(jh);
+               "There's a risk of filesystem corruption in case of system "
+               "crash.\n",
-                if (test_clear_buffer_dirty(bh))
+               bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
-                        set_buffer_jbddirty(bh);
-        }
 }
 /*
@@ -583,14 +564,16 @@ repeat:
                        if (jh->b_next_transaction)
                                J_ASSERT_JH(jh, jh->b_next_transaction ==
                                                        transaction);
+                        warn_dirty_buffer(bh);
                }
                /*
                 * In any case we need to clean the dirty flag and we must
                 * do it under the buffer lock to be sure we don't race
                 * with running write-out.
                 */
-                JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+                JBUFFER_TRACE(jh, "Journalling dirty buffer");
-                jbd_unexpected_dirty_buffer(jh);
+                clear_buffer_dirty(bh);
+                set_buffer_jbddirty(bh);
        }
        unlock_buffer(bh);
@@ -826,6 +809,15 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
        J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
        if (jh->b_transaction == NULL) {
+                /*
+                 * Previous journal_forget() could have left the buffer
+                 * with jbddirty bit set because it was being committed. When
+                 * the commit finished, we've filed the buffer for
+                 * checkpointing and marked it dirty. Now we are reallocating
+                 * the buffer so the transaction freeing it must have
+                 * committed and so it's safe to clear the dirty bit.
+                 */
+                clear_buffer_dirty(jh2bh(jh));
                jh->b_transaction = transaction;
                /* first access by this transaction */
@@ -1782,8 +1774,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
        if (jh->b_cp_transaction) {
                JBUFFER_TRACE(jh, "on running+cp transaction");
+                /*
+                 * We don't want to write the buffer anymore, clear the
+                 * bit so that we don't confuse checks in
+                 * __journal_file_buffer
+                 */
+                clear_buffer_dirty(bh);
                __journal_file_buffer(jh, transaction, BJ_Forget);
-                clear_buffer_jbddirty(bh);
                may_free = 0;
        } else {
                JBUFFER_TRACE(jh, "on running transaction");
@@ -2041,12 +2038,17 @@ void __journal_file_buffer(struct journal_head *jh,
        if (jh->b_transaction && jh->b_jlist == jlist)
                return;
-        /* The following list of buffer states needs to be consistent
-         * with __jbd_unexpected_dirty_buffer()'s handling of dirty
-         * state. */
        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
            jlist == BJ_Shadow || jlist == BJ_Forget) {
+                /*
+                 * For metadata buffers, we track dirty bit in buffer_jbddirty
+                 * instead of buffer_dirty. We should not see a dirty bit set
+                 * here because we clear it in do_get_write_access but e.g.
+                 * tune2fs can modify the sb and set the dirty bit at any time
+                 * so we try to gracefully handle that.
+                 */
+                if (buffer_dirty(bh))
+                        warn_dirty_buffer(bh);
                if (test_clear_buffer_dirty(bh) ||
                    test_clear_buffer_jbddirty(bh))
                        was_dirty = 1;
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 634a5e5aba3e..7499b3667798 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -874,7 +874,7 @@ struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
 struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
        sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
-        int create, int extend_disksize);
+        int create);
 extern struct inode *ext3_iget(struct super_block *, unsigned long);
 extern int  ext3_write_inode (struct inode *, int);
author	Linus Torvalds <torvalds@linux-foundation.org>	2009-07-27 15:12:10 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-07-27 15:12:10 -0400
commit	2bc20d09b03bca6e068e07440812d75b70b1c0b2 (patch)
tree	50711d4320e12efd1439639af8464b69a3b7af6a
parent	c7425eb4814bce40f3d117ff7a7870cc12e350e3 (diff)
parent	f1015c447781729060c415f5133164c638561f25 (diff)

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 3d724a95882f..373fa90c796a 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c
@@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp,
130	struct buffer_head *bh = NULL;	130	struct buffer_head *bh = NULL;
131		131
132	map_bh.b_state = 0;	132	map_bh.b_state = 0;
133	err = ext3_get_blocks_handle(NULL, inode, blk, 1,	133	err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
134	&map_bh, 0, 0);
135	if (err > 0) {	134	if (err > 0) {
136	pgoff_t index = map_bh.b_blocknr >>	135	pgoff_t index = map_bh.b_blocknr >>
137	(PAGE_CACHE_SHIFT - inode->i_blkbits);	136	(PAGE_CACHE_SHIFT - inode->i_blkbits);


diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5f51fed5c750..b49908a167ae 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c
@@ -788,7 +788,7 @@ err_out:
788	int ext3_get_blocks_handle(handle_t handle, struct inode inode,	788	int ext3_get_blocks_handle(handle_t handle, struct inode inode,
789	sector_t iblock, unsigned long maxblocks,	789	sector_t iblock, unsigned long maxblocks,
790	struct buffer_head *bh_result,	790	struct buffer_head *bh_result,
791	int create, int extend_disksize)	791	int create)
792	{	792	{
793	int err = -EIO;	793	int err = -EIO;
794	int offsets[4];	794	int offsets[4];
@@ -911,13 +911,6 @@ int ext3_get_blocks_handle(handle_t handle, struct inode inode,
911	if (!err)	911	if (!err)
912	err = ext3_splice_branch(handle, inode, iblock,	912	err = ext3_splice_branch(handle, inode, iblock,
913	partial, indirect_blks, count);	913	partial, indirect_blks, count);
914	/*
915	* i_disksize growing is protected by truncate_mutex. Don't forget to
916	* protect it if you're about to implement concurrent
917	* ext3_get_block() -bzzz
918	*/
919	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
920	ei->i_disksize = inode->i_size;
921	mutex_unlock(&ei->truncate_mutex);	914	mutex_unlock(&ei->truncate_mutex);
922	if (err)	915	if (err)
923	goto cleanup;	916	goto cleanup;
@@ -972,7 +965,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
972	}	965	}
973		966
974	ret = ext3_get_blocks_handle(handle, inode, iblock,	967	ret = ext3_get_blocks_handle(handle, inode, iblock,
975	max_blocks, bh_result, create, 0);	968	max_blocks, bh_result, create);
976	if (ret > 0) {	969	if (ret > 0) {
977	bh_result->b_size = (ret << inode->i_blkbits);	970	bh_result->b_size = (ret << inode->i_blkbits);
978	ret = 0;	971	ret = 0;
@@ -1005,7 +998,7 @@ struct buffer_head ext3_getblk(handle_t handle, struct inode *inode,
1005	dummy.b_blocknr = -1000;	998	dummy.b_blocknr = -1000;
1006	buffer_trace_init(&dummy.b_history);	999	buffer_trace_init(&dummy.b_history);
1007	err = ext3_get_blocks_handle(handle, inode, block, 1,	1000	err = ext3_get_blocks_handle(handle, inode, block, 1,
1008	&dummy, create, 1);	1001	&dummy, create);
1009	/*	1002	/*
1010	* ext3_get_blocks_handle() returns number of blocks	1003	* ext3_get_blocks_handle() returns number of blocks
1011	* mapped. 0 in case of a HOLE.	1004	* mapped. 0 in case of a HOLE.
@@ -1193,15 +1186,16 @@ write_begin_failed:
1193	* i_size_read because we hold i_mutex.	1186	* i_size_read because we hold i_mutex.
1194	*	1187	*
1195	* Add inode to orphan list in case we crash before truncate	1188	* Add inode to orphan list in case we crash before truncate
1196	* finishes.	1189	* finishes. Do this only if ext3_can_truncate() agrees so
		1190	* that orphan processing code is happy.
1197	*/	1191	*/
1198	if (pos + len > inode->i_size)	1192	if (pos + len > inode->i_size && ext3_can_truncate(inode))
1199	ext3_orphan_add(handle, inode);	1193	ext3_orphan_add(handle, inode);
1200	ext3_journal_stop(handle);	1194	ext3_journal_stop(handle);
1201	unlock_page(page);	1195	unlock_page(page);
1202	page_cache_release(page);	1196	page_cache_release(page);
1203	if (pos + len > inode->i_size)	1197	if (pos + len > inode->i_size)
1204	vmtruncate(inode, inode->i_size);	1198	ext3_truncate(inode);
1205	}	1199	}
1206	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))	1200	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1207	goto retry;	1201	goto retry;
@@ -1287,7 +1281,7 @@ static int ext3_ordered_write_end(struct file *file,
1287	* There may be allocated blocks outside of i_size because	1281	* There may be allocated blocks outside of i_size because
1288	* we failed to copy some data. Prepare for truncate.	1282	* we failed to copy some data. Prepare for truncate.
1289	*/	1283	*/
1290	if (pos + len > inode->i_size)	1284	if (pos + len > inode->i_size && ext3_can_truncate(inode))
1291	ext3_orphan_add(handle, inode);	1285	ext3_orphan_add(handle, inode);
1292	ret2 = ext3_journal_stop(handle);	1286	ret2 = ext3_journal_stop(handle);
1293	if (!ret)	1287	if (!ret)
@@ -1296,7 +1290,7 @@ static int ext3_ordered_write_end(struct file *file,
1296	page_cache_release(page);	1290	page_cache_release(page);
1297		1291
1298	if (pos + len > inode->i_size)	1292	if (pos + len > inode->i_size)
1299	vmtruncate(inode, inode->i_size);	1293	ext3_truncate(inode);
1300	return ret ? ret : copied;	1294	return ret ? ret : copied;
1301	}	1295	}
1302		1296
@@ -1315,14 +1309,14 @@ static int ext3_writeback_write_end(struct file *file,
1315	* There may be allocated blocks outside of i_size because	1309	* There may be allocated blocks outside of i_size because
1316	* we failed to copy some data. Prepare for truncate.	1310	* we failed to copy some data. Prepare for truncate.
1317	*/	1311	*/
1318	if (pos + len > inode->i_size)	1312	if (pos + len > inode->i_size && ext3_can_truncate(inode))
1319	ext3_orphan_add(handle, inode);	1313	ext3_orphan_add(handle, inode);
1320	ret = ext3_journal_stop(handle);	1314	ret = ext3_journal_stop(handle);
1321	unlock_page(page);	1315	unlock_page(page);
1322	page_cache_release(page);	1316	page_cache_release(page);
1323		1317
1324	if (pos + len > inode->i_size)	1318	if (pos + len > inode->i_size)
1325	vmtruncate(inode, inode->i_size);	1319	ext3_truncate(inode);
1326	return ret ? ret : copied;	1320	return ret ? ret : copied;
1327	}	1321	}
1328		1322
@@ -1358,7 +1352,7 @@ static int ext3_journalled_write_end(struct file *file,
1358	* There may be allocated blocks outside of i_size because	1352	* There may be allocated blocks outside of i_size because
1359	* we failed to copy some data. Prepare for truncate.	1353	* we failed to copy some data. Prepare for truncate.
1360	*/	1354	*/
1361	if (pos + len > inode->i_size)	1355	if (pos + len > inode->i_size && ext3_can_truncate(inode))
1362	ext3_orphan_add(handle, inode);	1356	ext3_orphan_add(handle, inode);
1363	EXT3_I(inode)->i_state \|= EXT3_STATE_JDATA;	1357	EXT3_I(inode)->i_state \|= EXT3_STATE_JDATA;
1364	if (inode->i_size > EXT3_I(inode)->i_disksize) {	1358	if (inode->i_size > EXT3_I(inode)->i_disksize) {
@@ -1375,7 +1369,7 @@ static int ext3_journalled_write_end(struct file *file,
1375	page_cache_release(page);	1369	page_cache_release(page);
1376		1370
1377	if (pos + len > inode->i_size)	1371	if (pos + len > inode->i_size)
1378	vmtruncate(inode, inode->i_size);	1372	ext3_truncate(inode);
1379	return ret ? ret : copied;	1373	return ret ? ret : copied;
1380	}	1374	}
1381		1375


diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 737f7246a4b5..f96f85092d1c 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c
@@ -287,6 +287,7 @@ int journal_write_metadata_buffer(transaction_t *transaction,
287	struct page *new_page;	287	struct page *new_page;
288	unsigned int new_offset;	288	unsigned int new_offset;
289	struct buffer_head *bh_in = jh2bh(jh_in);	289	struct buffer_head *bh_in = jh2bh(jh_in);
		290	journal_t *journal = transaction->t_journal;
290		291
291	/*	292	/*
292	* The buffer really shouldn't be locked: only the current committing	293	* The buffer really shouldn't be locked: only the current committing
@@ -300,6 +301,11 @@ int journal_write_metadata_buffer(transaction_t *transaction,
300	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));	301	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
301		302
302	new_bh = alloc_buffer_head(GFP_NOFS\|__GFP_NOFAIL);	303	new_bh = alloc_buffer_head(GFP_NOFS\|__GFP_NOFAIL);
		304	/* keep subsequent assertions sane */
		305	new_bh->b_state = 0;
		306	init_buffer(new_bh, NULL, NULL);
		307	atomic_set(&new_bh->b_count, 1);
		308	new_jh = journal_add_journal_head(new_bh); /* This sleeps */
303		309
304	/*	310	/*
305	* If a new transaction has already done a buffer copy-out, then	311	* If a new transaction has already done a buffer copy-out, then
@@ -361,14 +367,6 @@ repeat:
361	kunmap_atomic(mapped_data, KM_USER0);	367	kunmap_atomic(mapped_data, KM_USER0);
362	}	368	}
363		369
364	/* keep subsequent assertions sane */
365	new_bh->b_state = 0;
366	init_buffer(new_bh, NULL, NULL);
367	atomic_set(&new_bh->b_count, 1);
368	jbd_unlock_bh_state(bh_in);
369
370	new_jh = journal_add_journal_head(new_bh); /* This sleeps */
371
372	set_bh_page(new_bh, new_page, new_offset);	370	set_bh_page(new_bh, new_page, new_offset);
373	new_jh->b_transaction = NULL;	371	new_jh->b_transaction = NULL;
374	new_bh->b_size = jh2bh(jh_in)->b_size;	372	new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -385,7 +383,11 @@ repeat:
385	* copying is moved to the transaction's shadow queue.	383	* copying is moved to the transaction's shadow queue.
386	*/	384	*/
387	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");	385	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
388	journal_file_buffer(jh_in, transaction, BJ_Shadow);	386	spin_lock(&journal->j_list_lock);
		387	__journal_file_buffer(jh_in, transaction, BJ_Shadow);
		388	spin_unlock(&journal->j_list_lock);
		389	jbd_unlock_bh_state(bh_in);
		390
389	JBUFFER_TRACE(new_jh, "file as BJ_IO");	391	JBUFFER_TRACE(new_jh, "file as BJ_IO");
390	journal_file_buffer(new_jh, transaction, BJ_IO);	392	journal_file_buffer(new_jh, transaction, BJ_IO);
391		393
@@ -848,6 +850,12 @@ static int journal_reset(journal_t *journal)
848		850
849	first = be32_to_cpu(sb->s_first);	851	first = be32_to_cpu(sb->s_first);
850	last = be32_to_cpu(sb->s_maxlen);	852	last = be32_to_cpu(sb->s_maxlen);
		853	if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
		854	printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
		855	first, last);
		856	journal_fail_superblock(journal);
		857	return -EINVAL;
		858	}
851		859
852	journal->j_first = first;	860	journal->j_first = first;
853	journal->j_last = last;	861	journal->j_last = last;


diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 73242ba7c7b1..c03ac11f74be 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c
@@ -489,34 +489,15 @@ void journal_unlock_updates (journal_t *journal)
489	wake_up(&journal->j_wait_transaction_locked);	489	wake_up(&journal->j_wait_transaction_locked);
490	}	490	}
491		491
492	/*	492	static void warn_dirty_buffer(struct buffer_head *bh)
493	* Report any unexpected dirty buffers which turn up. Normally those
494	* indicate an error, but they can occur if the user is running (say)
495	* tune2fs to modify the live filesystem, so we need the option of
496	* continuing as gracefully as possible. #
497	*
498	* The caller should already hold the journal lock and
499	* j_list_lock spinlock: most callers will need those anyway
500	* in order to probe the buffer's journaling state safely.
501	*/
502	static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
503	{	493	{
504	int jlist;	494	char b[BDEVNAME_SIZE];
505
506	/* If this buffer is one which might reasonably be dirty
507	* --- ie. data, or not part of this journal --- then
508	* we're OK to leave it alone, but otherwise we need to
509	* move the dirty bit to the journal's own internal
510	* JBDDirty bit. */
511	jlist = jh->b_jlist;
512		495
513	if (jlist == BJ_Metadata \|\| jlist == BJ_Reserved \|\|	496	printk(KERN_WARNING
514	jlist == BJ_Shadow \|\| jlist == BJ_Forget) {	497	"JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
515	struct buffer_head *bh = jh2bh(jh);	498	"There's a risk of filesystem corruption in case of system "
516		499	"crash.\n",
517	if (test_clear_buffer_dirty(bh))	500	bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
518	set_buffer_jbddirty(bh);
519	}
520	}	501	}
521		502
522	/*	503	/*
@@ -583,14 +564,16 @@ repeat:
583	if (jh->b_next_transaction)	564	if (jh->b_next_transaction)
584	J_ASSERT_JH(jh, jh->b_next_transaction ==	565	J_ASSERT_JH(jh, jh->b_next_transaction ==
585	transaction);	566	transaction);
		567	warn_dirty_buffer(bh);
586	}	568	}
587	/*	569	/*
588	* In any case we need to clean the dirty flag and we must	570	* In any case we need to clean the dirty flag and we must
589	* do it under the buffer lock to be sure we don't race	571	* do it under the buffer lock to be sure we don't race
590	* with running write-out.	572	* with running write-out.
591	*/	573	*/
592	JBUFFER_TRACE(jh, "Unexpected dirty buffer");	574	JBUFFER_TRACE(jh, "Journalling dirty buffer");
593	jbd_unexpected_dirty_buffer(jh);	575	clear_buffer_dirty(bh);
		576	set_buffer_jbddirty(bh);
594	}	577	}
595		578
596	unlock_buffer(bh);	579	unlock_buffer(bh);
@@ -826,6 +809,15 @@ int journal_get_create_access(handle_t handle, struct buffer_head bh)
826	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));	809	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
827		810
828	if (jh->b_transaction == NULL) {	811	if (jh->b_transaction == NULL) {
		812	/*
		813	* Previous journal_forget() could have left the buffer
		814	* with jbddirty bit set because it was being committed. When
		815	* the commit finished, we've filed the buffer for
		816	* checkpointing and marked it dirty. Now we are reallocating
		817	* the buffer so the transaction freeing it must have
		818	* committed and so it's safe to clear the dirty bit.
		819	*/
		820	clear_buffer_dirty(jh2bh(jh));
829	jh->b_transaction = transaction;	821	jh->b_transaction = transaction;
830		822
831	/* first access by this transaction */	823	/* first access by this transaction */
@@ -1782,8 +1774,13 @@ static int __dispose_buffer(struct journal_head jh, transaction_t transaction)
1782		1774
1783	if (jh->b_cp_transaction) {	1775	if (jh->b_cp_transaction) {
1784	JBUFFER_TRACE(jh, "on running+cp transaction");	1776	JBUFFER_TRACE(jh, "on running+cp transaction");
		1777	/*
		1778	* We don't want to write the buffer anymore, clear the
		1779	* bit so that we don't confuse checks in
		1780	* __journal_file_buffer
		1781	*/
		1782	clear_buffer_dirty(bh);
1785	__journal_file_buffer(jh, transaction, BJ_Forget);	1783	__journal_file_buffer(jh, transaction, BJ_Forget);
1786	clear_buffer_jbddirty(bh);
1787	may_free = 0;	1784	may_free = 0;
1788	} else {	1785	} else {
1789	JBUFFER_TRACE(jh, "on running transaction");	1786	JBUFFER_TRACE(jh, "on running transaction");
@@ -2041,12 +2038,17 @@ void __journal_file_buffer(struct journal_head *jh,
2041	if (jh->b_transaction && jh->b_jlist == jlist)	2038	if (jh->b_transaction && jh->b_jlist == jlist)
2042	return;	2039	return;
2043		2040
2044	/* The following list of buffer states needs to be consistent
2045	* with __jbd_unexpected_dirty_buffer()'s handling of dirty
2046	* state. */
2047
2048	if (jlist == BJ_Metadata \|\| jlist == BJ_Reserved \|\|	2041	if (jlist == BJ_Metadata \|\| jlist == BJ_Reserved \|\|
2049	jlist == BJ_Shadow \|\| jlist == BJ_Forget) {	2042	jlist == BJ_Shadow \|\| jlist == BJ_Forget) {
		2043	/*
		2044	* For metadata buffers, we track dirty bit in buffer_jbddirty
		2045	* instead of buffer_dirty. We should not see a dirty bit set
		2046	* here because we clear it in do_get_write_access but e.g.
		2047	* tune2fs can modify the sb and set the dirty bit at any time
		2048	* so we try to gracefully handle that.
		2049	*/
		2050	if (buffer_dirty(bh))
		2051	warn_dirty_buffer(bh);
2050	if (test_clear_buffer_dirty(bh) \|\|	2052	if (test_clear_buffer_dirty(bh) \|\|
2051	test_clear_buffer_jbddirty(bh))	2053	test_clear_buffer_jbddirty(bh))
2052	was_dirty = 1;	2054	was_dirty = 1;


diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 634a5e5aba3e..7499b3667798 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h
@@ -874,7 +874,7 @@ struct buffer_head * ext3_getblk (handle_t , struct inode , long, int, int *);
874	struct buffer_head * ext3_bread (handle_t , struct inode , int, int, int *);	874	struct buffer_head * ext3_bread (handle_t , struct inode , int, int, int *);
875	int ext3_get_blocks_handle(handle_t handle, struct inode inode,	875	int ext3_get_blocks_handle(handle_t handle, struct inode inode,
876	sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,	876	sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
877	int create, int extend_disksize);	877	int create);
878		878
879	extern struct inode ext3_iget(struct super_block , unsigned long);	879	extern struct inode ext3_iget(struct super_block , unsigned long);
880	extern int ext3_write_inode (struct inode *, int);	880	extern int ext3_write_inode (struct inode *, int);