GFS2: Umount recovery race fix

This patch fixes a race condition where we can receive recovery requests part way through processing a umount. This was causing problems since the recovery thread had already gone away. Looking in more detail at the recovery code, it was really trying to implement a slight variation on a work queue, and that happens to align nicely with the recently introduced slow-work subsystem. As a result I've updated the code to use slow-work, rather than its own home grown variety of work queue. When using the wait_on_bit() function, I noticed that the wait function that was supplied as an argument was appearing in the WCHAN field, so I've updated the function names in order to produce more meaningful output. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
author: Steven Whitehouse <swhiteho@redhat.com> 2009-05-19 05:01:18 -0400
committer: Steven Whitehouse <swhiteho@redhat.com> 2009-05-19 05:01:18 -0400
commit: fe64d517df0970a68417184a12fcd4ba0589cc28 (patch)
tree: d977f214fdf6ba96254cfbf6683e8583ecebe504 /fs/gfs2/recovery.c
parent: 9582d41135c0d362f04ed6bf3dc8d693a7eafee2 (diff)
1 files changed, 33 insertions, 69 deletions
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 247e8f7d6b3d..59d2695509d3 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -13,8 +13,7 @@
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
-#include <linux/kthread.h>
+#include <linux/slow-work.h>
-#include <linux/freezer.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -441,18 +440,25 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
        kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 }
-/**
+static int gfs2_recover_get_ref(struct slow_work *work)
- * gfs2_recover_journal - recover a given journal
+{
- * @jd: the struct gfs2_jdesc describing the journal
+        struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
- *
+        if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
- * Acquire the journal's lock, check to see if the journal is clean, and
+                return -EBUSY;
- * do recovery if necessary.
+        return 0;
- *
+}
- * Returns: errno
- */
-int gfs2_recover_journal(struct gfs2_jdesc *jd)
+static void gfs2_recover_put_ref(struct slow_work *work)
+{
+        struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
+        clear_bit(JDF_RECOVERY, &jd->jd_flags);
+        smp_mb__after_clear_bit();
+        wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
+}
+static void gfs2_recover_work(struct slow_work *work)
 {
+        struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
        struct gfs2_log_header_host head;
@@ -569,7 +575,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
                gfs2_glock_dq_uninit(&j_gh);
        fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
-        return 0;
+        return;
 fail_gunlock_tr:
        gfs2_glock_dq_uninit(&t_gh);
@@ -584,70 +590,28 @@ fail_gunlock_j:
 fail:
        gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
-        return error;
 }
-static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
+struct slow_work_ops gfs2_recover_ops = {
-{
+        .get_ref = gfs2_recover_get_ref,
-        struct gfs2_jdesc *jd;
+        .put_ref = gfs2_recover_put_ref,
-        int found = 0;
+        .execute = gfs2_recover_work,
+};
-        spin_lock(&sdp->sd_jindex_spin);
-        list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
-                if (jd->jd_dirty) {
-                        jd->jd_dirty = 0;
-                        found = 1;
-                        break;
-                }
-        }
-        spin_unlock(&sdp->sd_jindex_spin);
-        if (!found)
-                jd = NULL;
-        return jd;
+static int gfs2_recovery_wait(void *word)
-}
-/**
- * gfs2_check_journals - Recover any dirty journals
- * @sdp: the filesystem
- *
- */
-static void gfs2_check_journals(struct gfs2_sbd *sdp)
 {
-        struct gfs2_jdesc *jd;
+        schedule();
+        return 0;
-        for (;;) {
-                jd = gfs2_jdesc_find_dirty(sdp);
-                if (!jd)
-                        break;
-                if (jd != sdp->sd_jdesc)
-                        gfs2_recover_journal(jd);
-        }
 }
-/**
+int gfs2_recover_journal(struct gfs2_jdesc *jd)
- * gfs2_recoverd - Recover dead machine's journals
- * @sdp: Pointer to GFS2 superblock
- *
- */
-int gfs2_recoverd(void *data)
 {
-        struct gfs2_sbd *sdp = data;
+        int rv;
-        unsigned long t;
+        rv = slow_work_enqueue(&jd->jd_work);
+        if (rv)
-        while (!kthread_should_stop()) {
+                return rv;
-                gfs2_check_journals(sdp);
+        wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE);
-                t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
-                if (freezing(current))
-                        refrigerator();
-                schedule_timeout_interruptible(t);
-        }
        return 0;
 }
author	Steven Whitehouse <swhiteho@redhat.com>	2009-05-19 05:01:18 -0400
committer	Steven Whitehouse <swhiteho@redhat.com>	2009-05-19 05:01:18 -0400
commit	fe64d517df0970a68417184a12fcd4ba0589cc28 (patch)
tree	d977f214fdf6ba96254cfbf6683e8583ecebe504 /fs/gfs2/recovery.c
parent	9582d41135c0d362f04ed6bf3dc8d693a7eafee2 (diff)

diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 247e8f7d6b3d..59d2695509d3 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c
@@ -13,8 +13,7 @@
13	#include <linux/buffer_head.h>	13	#include <linux/buffer_head.h>
14	#include <linux/gfs2_ondisk.h>	14	#include <linux/gfs2_ondisk.h>
15	#include <linux/crc32.h>	15	#include <linux/crc32.h>
16	#include <linux/kthread.h>	16	#include <linux/slow-work.h>
17	#include <linux/freezer.h>
18		17
19	#include "gfs2.h"	18	#include "gfs2.h"
20	#include "incore.h"	19	#include "incore.h"
@@ -441,18 +440,25 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
441	kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);	440	kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
442	}	441	}
443		442
444	/**	443	static int gfs2_recover_get_ref(struct slow_work *work)
445	* gfs2_recover_journal - recover a given journal	444	{
446	* @jd: the struct gfs2_jdesc describing the journal	445	struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
447	*	446	if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
448	* Acquire the journal's lock, check to see if the journal is clean, and	447	return -EBUSY;
449	* do recovery if necessary.	448	return 0;
450	*	449	}
451	* Returns: errno
452	*/
453		450
454	int gfs2_recover_journal(struct gfs2_jdesc *jd)	451	static void gfs2_recover_put_ref(struct slow_work *work)
		452	{
		453	struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
		454	clear_bit(JDF_RECOVERY, &jd->jd_flags);
		455	smp_mb__after_clear_bit();
		456	wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
		457	}
		458
		459	static void gfs2_recover_work(struct slow_work *work)
455	{	460	{
		461	struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
456	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);	462	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
457	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);	463	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
458	struct gfs2_log_header_host head;	464	struct gfs2_log_header_host head;
@@ -569,7 +575,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
569	gfs2_glock_dq_uninit(&j_gh);	575	gfs2_glock_dq_uninit(&j_gh);
570		576
571	fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);	577	fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
572	return 0;	578	return;
573		579
574	fail_gunlock_tr:	580	fail_gunlock_tr:
575	gfs2_glock_dq_uninit(&t_gh);	581	gfs2_glock_dq_uninit(&t_gh);
@@ -584,70 +590,28 @@ fail_gunlock_j:
584		590
585	fail:	591	fail:
586	gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);	592	gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
587	return error;
588	}	593	}
589		594
590	static struct gfs2_jdesc gfs2_jdesc_find_dirty(struct gfs2_sbd sdp)	595	struct slow_work_ops gfs2_recover_ops = {
591	{	596	.get_ref = gfs2_recover_get_ref,
592	struct gfs2_jdesc *jd;	597	.put_ref = gfs2_recover_put_ref,
593	int found = 0;	598	.execute = gfs2_recover_work,
594		599	};
595	spin_lock(&sdp->sd_jindex_spin);
596		600
597	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
598	if (jd->jd_dirty) {
599	jd->jd_dirty = 0;
600	found = 1;
601	break;
602	}
603	}
604	spin_unlock(&sdp->sd_jindex_spin);
605
606	if (!found)
607	jd = NULL;
608		601
609	return jd;	602	static int gfs2_recovery_wait(void *word)
610	}
611
612	/**
613	* gfs2_check_journals - Recover any dirty journals
614	* @sdp: the filesystem
615	*
616	*/
617
618	static void gfs2_check_journals(struct gfs2_sbd *sdp)
619	{	603	{
620	struct gfs2_jdesc *jd;	604	schedule();
621		605	return 0;
622	for (;;) {
623	jd = gfs2_jdesc_find_dirty(sdp);
624	if (!jd)
625	break;
626
627	if (jd != sdp->sd_jdesc)
628	gfs2_recover_journal(jd);
629	}
630	}	606	}
631		607
632	/**	608	int gfs2_recover_journal(struct gfs2_jdesc *jd)
633	* gfs2_recoverd - Recover dead machine's journals
634	* @sdp: Pointer to GFS2 superblock
635	*
636	*/
637
638	int gfs2_recoverd(void *data)
639	{	609	{
640	struct gfs2_sbd *sdp = data;	610	int rv;
641	unsigned long t;	611	rv = slow_work_enqueue(&jd->jd_work);
642		612	if (rv)
643	while (!kthread_should_stop()) {	613	return rv;
644	gfs2_check_journals(sdp);	614	wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE);
645	t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
646	if (freezing(current))
647	refrigerator();
648	schedule_timeout_interruptible(t);
649	}
650
651	return 0;	615	return 0;
652	}	616	}
653		617