2 files changed, 52 insertions, 47 deletions
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 18bfd5dab642..e378cb383979 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
        unsigned int new_offset;
        struct buffer_head *bh_in = jh2bh(jh_in);
        struct jbd2_buffer_trigger_type *triggers;
+        journal_t *journal = transaction->t_journal;
        /*
         * The buffer really shouldn't be locked: only the current committing
@@ -310,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
        J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
        new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+        /* keep subsequent assertions sane */
+        new_bh->b_state = 0;
+        init_buffer(new_bh, NULL, NULL);
+        atomic_set(&new_bh->b_count, 1);
+        new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
        /*
         * If a new transaction has already done a buffer copy-out, then
@@ -388,14 +394,6 @@ repeat:
                kunmap_atomic(mapped_data, KM_USER0);
        }
-        /* keep subsequent assertions sane */
-        new_bh->b_state = 0;
-        init_buffer(new_bh, NULL, NULL);
-        atomic_set(&new_bh->b_count, 1);
-        jbd_unlock_bh_state(bh_in);
-        new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
        set_bh_page(new_bh, new_page, new_offset);
        new_jh->b_transaction = NULL;
        new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -412,7 +410,11 @@ repeat:
         * copying is moved to the transaction's shadow queue.
         */
        JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
-        jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
+        spin_lock(&journal->j_list_lock);
+        __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
+        spin_unlock(&journal->j_list_lock);
+        jbd_unlock_bh_state(bh_in);
        JBUFFER_TRACE(new_jh, "file as BJ_IO");
        jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
@@ -2410,6 +2412,7 @@ const char *jbd2_dev_to_name(dev_t device)
        int     i = hash_32(device, CACHE_SIZE_BITS);
        char    *ret;
        struct block_device *bd;
+        static struct devname_cache *new_dev;
        rcu_read_lock();
        if (devcache[i] && devcache[i]->device == device) {
@@ -2419,20 +2422,20 @@ const char *jbd2_dev_to_name(dev_t device)
        }
        rcu_read_unlock();
+        new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+        if (!new_dev)
+                return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
        spin_lock(&devname_cache_lock);
        if (devcache[i]) {
                if (devcache[i]->device == device) {
+                        kfree(new_dev);
                        ret = devcache[i]->devname;
                        spin_unlock(&devname_cache_lock);
                        return ret;
                }
                call_rcu(&devcache[i]->rcu, free_devcache);
        }
-        devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+        devcache[i] = new_dev;
-        if (!devcache[i]) {
-                spin_unlock(&devname_cache_lock);
-                return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
-        }
        devcache[i]->device = device;
        bd = bdget(device);
        if (bd) {
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 494501edba6b..6213ac728f30 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal)
        wake_up(&journal->j_wait_transaction_locked);
 }
-/*
+static void warn_dirty_buffer(struct buffer_head *bh)
- * Report any unexpected dirty buffers which turn up.  Normally those
- * indicate an error, but they can occur if the user is running (say)
- * tune2fs to modify the live filesystem, so we need the option of
- * continuing as gracefully as possible.  #
- *
- * The caller should already hold the journal lock and
- * j_list_lock spinlock: most callers will need those anyway
- * in order to probe the buffer's journaling state safely.
- */
-static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 {
-        int jlist;
+        char b[BDEVNAME_SIZE];
-        /* If this buffer is one which might reasonably be dirty
-         * --- ie. data, or not part of this journal --- then
-         * we're OK to leave it alone, but otherwise we need to
-         * move the dirty bit to the journal's own internal
-         * JBDDirty bit. */
-        jlist = jh->b_jlist;
-        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+        printk(KERN_WARNING
-            jlist == BJ_Shadow || jlist == BJ_Forget) {
+               "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
-                struct buffer_head *bh = jh2bh(jh);
+               "There's a risk of filesystem corruption in case of system "
+               "crash.\n",
-                if (test_clear_buffer_dirty(bh))
+               bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
-                        set_buffer_jbddirty(bh);
-        }
 }
 /*
@@ -593,14 +574,16 @@ repeat:
                        if (jh->b_next_transaction)
                                J_ASSERT_JH(jh, jh->b_next_transaction ==
                                                        transaction);
+                        warn_dirty_buffer(bh);
                }
                /*
                 * In any case we need to clean the dirty flag and we must
                 * do it under the buffer lock to be sure we don't race
                 * with running write-out.
                 */
-                JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+                JBUFFER_TRACE(jh, "Journalling dirty buffer");
-                jbd_unexpected_dirty_buffer(jh);
+                clear_buffer_dirty(bh);
+                set_buffer_jbddirty(bh);
        }
        unlock_buffer(bh);
@@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
        J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
        if (jh->b_transaction == NULL) {
+                /*
+                 * Previous jbd2_journal_forget() could have left the buffer
+                 * with jbddirty bit set because it was being committed. When
+                 * the commit finished, we've filed the buffer for
+                 * checkpointing and marked it dirty. Now we are reallocating
+                 * the buffer so the transaction freeing it must have
+                 * committed and so it's safe to clear the dirty bit.
+                 */
+                clear_buffer_dirty(jh2bh(jh));
                jh->b_transaction = transaction;
                /* first access by this transaction */
@@ -1644,8 +1636,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
        if (jh->b_cp_transaction) {
                JBUFFER_TRACE(jh, "on running+cp transaction");
+                /*
+                 * We don't want to write the buffer anymore, clear the
+                 * bit so that we don't confuse checks in
+                 * __journal_file_buffer
+                 */
+                clear_buffer_dirty(bh);
                __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
-                clear_buffer_jbddirty(bh);
                may_free = 0;
        } else {
                JBUFFER_TRACE(jh, "on running transaction");
@@ -1896,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
        if (jh->b_transaction && jh->b_jlist == jlist)
                return;
-        /* The following list of buffer states needs to be consistent
-         * with __jbd_unexpected_dirty_buffer()'s handling of dirty
-         * state. */
        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
            jlist == BJ_Shadow || jlist == BJ_Forget) {
+                /*
+                 * For metadata buffers, we track dirty bit in buffer_jbddirty
+                 * instead of buffer_dirty. We should not see a dirty bit set
+                 * here because we clear it in do_get_write_access but e.g.
+                 * tune2fs can modify the sb and set the dirty bit at any time
+                 * so we try to gracefully handle that.
+                 */
+                if (buffer_dirty(bh))
+                        warn_dirty_buffer(bh);
                if (test_clear_buffer_dirty(bh) ||
                    test_clear_buffer_jbddirty(bh))
                        was_dirty = 1;

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 18bfd5dab642..e378cb383979 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c
@@ -297,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
297	unsigned int new_offset;	297	unsigned int new_offset;
298	struct buffer_head *bh_in = jh2bh(jh_in);	298	struct buffer_head *bh_in = jh2bh(jh_in);
299	struct jbd2_buffer_trigger_type *triggers;	299	struct jbd2_buffer_trigger_type *triggers;
		300	journal_t *journal = transaction->t_journal;
300		301
301	/*	302	/*
302	* The buffer really shouldn't be locked: only the current committing	303	* The buffer really shouldn't be locked: only the current committing
@@ -310,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
310	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));	311	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
311		312
312	new_bh = alloc_buffer_head(GFP_NOFS\|__GFP_NOFAIL);	313	new_bh = alloc_buffer_head(GFP_NOFS\|__GFP_NOFAIL);
		314	/* keep subsequent assertions sane */
		315	new_bh->b_state = 0;
		316	init_buffer(new_bh, NULL, NULL);
		317	atomic_set(&new_bh->b_count, 1);
		318	new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
313		319
314	/*	320	/*
315	* If a new transaction has already done a buffer copy-out, then	321	* If a new transaction has already done a buffer copy-out, then
@@ -388,14 +394,6 @@ repeat:
388	kunmap_atomic(mapped_data, KM_USER0);	394	kunmap_atomic(mapped_data, KM_USER0);
389	}	395	}
390		396
391	/* keep subsequent assertions sane */
392	new_bh->b_state = 0;
393	init_buffer(new_bh, NULL, NULL);
394	atomic_set(&new_bh->b_count, 1);
395	jbd_unlock_bh_state(bh_in);
396
397	new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
398
399	set_bh_page(new_bh, new_page, new_offset);	397	set_bh_page(new_bh, new_page, new_offset);
400	new_jh->b_transaction = NULL;	398	new_jh->b_transaction = NULL;
401	new_bh->b_size = jh2bh(jh_in)->b_size;	399	new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -412,7 +410,11 @@ repeat:
412	* copying is moved to the transaction's shadow queue.	410	* copying is moved to the transaction's shadow queue.
413	*/	411	*/
414	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");	412	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
415	jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);	413	spin_lock(&journal->j_list_lock);
		414	__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
		415	spin_unlock(&journal->j_list_lock);
		416	jbd_unlock_bh_state(bh_in);
		417
416	JBUFFER_TRACE(new_jh, "file as BJ_IO");	418	JBUFFER_TRACE(new_jh, "file as BJ_IO");
417	jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);	419	jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
418		420
@@ -2410,6 +2412,7 @@ const char *jbd2_dev_to_name(dev_t device)
2410	int i = hash_32(device, CACHE_SIZE_BITS);	2412	int i = hash_32(device, CACHE_SIZE_BITS);
2411	char *ret;	2413	char *ret;
2412	struct block_device *bd;	2414	struct block_device *bd;
		2415	static struct devname_cache *new_dev;
2413		2416
2414	rcu_read_lock();	2417	rcu_read_lock();
2415	if (devcache[i] && devcache[i]->device == device) {	2418	if (devcache[i] && devcache[i]->device == device) {
@@ -2419,20 +2422,20 @@ const char *jbd2_dev_to_name(dev_t device)
2419	}	2422	}
2420	rcu_read_unlock();	2423	rcu_read_unlock();
2421		2424
		2425	new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
		2426	if (!new_dev)
		2427	return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2422	spin_lock(&devname_cache_lock);	2428	spin_lock(&devname_cache_lock);
2423	if (devcache[i]) {	2429	if (devcache[i]) {
2424	if (devcache[i]->device == device) {	2430	if (devcache[i]->device == device) {
		2431	kfree(new_dev);
2425	ret = devcache[i]->devname;	2432	ret = devcache[i]->devname;
2426	spin_unlock(&devname_cache_lock);	2433	spin_unlock(&devname_cache_lock);
2427	return ret;	2434	return ret;
2428	}	2435	}
2429	call_rcu(&devcache[i]->rcu, free_devcache);	2436	call_rcu(&devcache[i]->rcu, free_devcache);
2430	}	2437	}
2431	devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);	2438	devcache[i] = new_dev;
2432	if (!devcache[i]) {
2433	spin_unlock(&devname_cache_lock);
2434	return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2435	}
2436	devcache[i]->device = device;	2439	devcache[i]->device = device;
2437	bd = bdget(device);	2440	bd = bdget(device);
2438	if (bd) {	2441	if (bd) {


diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 494501edba6b..6213ac728f30 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c
@@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal)
499	wake_up(&journal->j_wait_transaction_locked);	499	wake_up(&journal->j_wait_transaction_locked);
500	}	500	}
501		501
502	/*	502	static void warn_dirty_buffer(struct buffer_head *bh)
503	* Report any unexpected dirty buffers which turn up. Normally those
504	* indicate an error, but they can occur if the user is running (say)
505	* tune2fs to modify the live filesystem, so we need the option of
506	* continuing as gracefully as possible. #
507	*
508	* The caller should already hold the journal lock and
509	* j_list_lock spinlock: most callers will need those anyway
510	* in order to probe the buffer's journaling state safely.
511	*/
512	static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
513	{	503	{
514	int jlist;	504	char b[BDEVNAME_SIZE];
515
516	/* If this buffer is one which might reasonably be dirty
517	* --- ie. data, or not part of this journal --- then
518	* we're OK to leave it alone, but otherwise we need to
519	* move the dirty bit to the journal's own internal
520	* JBDDirty bit. */
521	jlist = jh->b_jlist;
522		505
523	if (jlist == BJ_Metadata \|\| jlist == BJ_Reserved \|\|	506	printk(KERN_WARNING
524	jlist == BJ_Shadow \|\| jlist == BJ_Forget) {	507	"JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
525	struct buffer_head *bh = jh2bh(jh);	508	"There's a risk of filesystem corruption in case of system "
526		509	"crash.\n",
527	if (test_clear_buffer_dirty(bh))	510	bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
528	set_buffer_jbddirty(bh);
529	}
530	}	511	}
531		512
532	/*	513	/*
@@ -593,14 +574,16 @@ repeat:
593	if (jh->b_next_transaction)	574	if (jh->b_next_transaction)
594	J_ASSERT_JH(jh, jh->b_next_transaction ==	575	J_ASSERT_JH(jh, jh->b_next_transaction ==
595	transaction);	576	transaction);
		577	warn_dirty_buffer(bh);
596	}	578	}
597	/*	579	/*
598	* In any case we need to clean the dirty flag and we must	580	* In any case we need to clean the dirty flag and we must
599	* do it under the buffer lock to be sure we don't race	581	* do it under the buffer lock to be sure we don't race
600	* with running write-out.	582	* with running write-out.
601	*/	583	*/
602	JBUFFER_TRACE(jh, "Unexpected dirty buffer");	584	JBUFFER_TRACE(jh, "Journalling dirty buffer");
603	jbd_unexpected_dirty_buffer(jh);	585	clear_buffer_dirty(bh);
		586	set_buffer_jbddirty(bh);
604	}	587	}
605		588
606	unlock_buffer(bh);	589	unlock_buffer(bh);
@@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t handle, struct buffer_head bh)
843	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));	826	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
844		827
845	if (jh->b_transaction == NULL) {	828	if (jh->b_transaction == NULL) {
		829	/*
		830	* Previous jbd2_journal_forget() could have left the buffer
		831	* with jbddirty bit set because it was being committed. When
		832	* the commit finished, we've filed the buffer for
		833	* checkpointing and marked it dirty. Now we are reallocating
		834	* the buffer so the transaction freeing it must have
		835	* committed and so it's safe to clear the dirty bit.
		836	*/
		837	clear_buffer_dirty(jh2bh(jh));
846	jh->b_transaction = transaction;	838	jh->b_transaction = transaction;
847		839
848	/* first access by this transaction */	840	/* first access by this transaction */
@@ -1644,8 +1636,13 @@ static int __dispose_buffer(struct journal_head jh, transaction_t transaction)
1644		1636
1645	if (jh->b_cp_transaction) {	1637	if (jh->b_cp_transaction) {
1646	JBUFFER_TRACE(jh, "on running+cp transaction");	1638	JBUFFER_TRACE(jh, "on running+cp transaction");
		1639	/*
		1640	* We don't want to write the buffer anymore, clear the
		1641	* bit so that we don't confuse checks in
		1642	* __journal_file_buffer
		1643	*/
		1644	clear_buffer_dirty(bh);
1647	__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);	1645	__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1648	clear_buffer_jbddirty(bh);
1649	may_free = 0;	1646	may_free = 0;
1650	} else {	1647	} else {
1651	JBUFFER_TRACE(jh, "on running transaction");	1648	JBUFFER_TRACE(jh, "on running transaction");
@@ -1896,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
1896	if (jh->b_transaction && jh->b_jlist == jlist)	1893	if (jh->b_transaction && jh->b_jlist == jlist)
1897	return;	1894	return;
1898		1895
1899	/* The following list of buffer states needs to be consistent
1900	* with __jbd_unexpected_dirty_buffer()'s handling of dirty
1901	* state. */
1902
1903	if (jlist == BJ_Metadata \|\| jlist == BJ_Reserved \|\|	1896	if (jlist == BJ_Metadata \|\| jlist == BJ_Reserved \|\|
1904	jlist == BJ_Shadow \|\| jlist == BJ_Forget) {	1897	jlist == BJ_Shadow \|\| jlist == BJ_Forget) {
		1898	/*
		1899	* For metadata buffers, we track dirty bit in buffer_jbddirty
		1900	* instead of buffer_dirty. We should not see a dirty bit set
		1901	* here because we clear it in do_get_write_access but e.g.
		1902	* tune2fs can modify the sb and set the dirty bit at any time
		1903	* so we try to gracefully handle that.
		1904	*/
		1905	if (buffer_dirty(bh))
		1906	warn_dirty_buffer(bh);
1905	if (test_clear_buffer_dirty(bh) \|\|	1907	if (test_clear_buffer_dirty(bh) \|\|
1906	test_clear_buffer_jbddirty(bh))	1908	test_clear_buffer_jbddirty(bh))
1907	was_dirty = 1;	1909	was_dirty = 1;