diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2009-05-19 05:01:18 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2009-05-19 05:01:18 -0400 |
commit | fe64d517df0970a68417184a12fcd4ba0589cc28 (patch) | |
tree | d977f214fdf6ba96254cfbf6683e8583ecebe504 /fs/gfs2/recovery.c | |
parent | 9582d41135c0d362f04ed6bf3dc8d693a7eafee2 (diff) |
GFS2: Umount recovery race fix
This patch fixes a race condition where we can receive recovery
requests part way through processing a umount. This was causing
problems since the recovery thread had already gone away.
Looking in more detail at the recovery code, it was really trying
to implement a slight variation on a work queue, and that happens to
align nicely with the recently introduced slow-work subsystem. As a
result I've updated the code to use slow-work, rather than its own home
grown variety of work queue.
When using the wait_on_bit() function, I noticed that the wait function
that was supplied as an argument was appearing in the WCHAN field, so
I've updated the function names in order to produce more meaningful
output.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/recovery.c')
-rw-r--r-- | fs/gfs2/recovery.c | 102 |
1 files changed, 33 insertions, 69 deletions
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 247e8f7d6b3d..59d2695509d3 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -13,8 +13,7 @@ | |||
13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
14 | #include <linux/gfs2_ondisk.h> | 14 | #include <linux/gfs2_ondisk.h> |
15 | #include <linux/crc32.h> | 15 | #include <linux/crc32.h> |
16 | #include <linux/kthread.h> | 16 | #include <linux/slow-work.h> |
17 | #include <linux/freezer.h> | ||
18 | 17 | ||
19 | #include "gfs2.h" | 18 | #include "gfs2.h" |
20 | #include "incore.h" | 19 | #include "incore.h" |
@@ -441,18 +440,25 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | |||
441 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); | 440 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); |
442 | } | 441 | } |
443 | 442 | ||
444 | /** | 443 | static int gfs2_recover_get_ref(struct slow_work *work) |
445 | * gfs2_recover_journal - recover a given journal | 444 | { |
446 | * @jd: the struct gfs2_jdesc describing the journal | 445 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); |
447 | * | 446 | if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) |
448 | * Acquire the journal's lock, check to see if the journal is clean, and | 447 | return -EBUSY; |
449 | * do recovery if necessary. | 448 | return 0; |
450 | * | 449 | } |
451 | * Returns: errno | ||
452 | */ | ||
453 | 450 | ||
454 | int gfs2_recover_journal(struct gfs2_jdesc *jd) | 451 | static void gfs2_recover_put_ref(struct slow_work *work) |
452 | { | ||
453 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); | ||
454 | clear_bit(JDF_RECOVERY, &jd->jd_flags); | ||
455 | smp_mb__after_clear_bit(); | ||
456 | wake_up_bit(&jd->jd_flags, JDF_RECOVERY); | ||
457 | } | ||
458 | |||
459 | static void gfs2_recover_work(struct slow_work *work) | ||
455 | { | 460 | { |
461 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); | ||
456 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | 462 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); |
457 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | 463 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); |
458 | struct gfs2_log_header_host head; | 464 | struct gfs2_log_header_host head; |
@@ -569,7 +575,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) | |||
569 | gfs2_glock_dq_uninit(&j_gh); | 575 | gfs2_glock_dq_uninit(&j_gh); |
570 | 576 | ||
571 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); | 577 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); |
572 | return 0; | 578 | return; |
573 | 579 | ||
574 | fail_gunlock_tr: | 580 | fail_gunlock_tr: |
575 | gfs2_glock_dq_uninit(&t_gh); | 581 | gfs2_glock_dq_uninit(&t_gh); |
@@ -584,70 +590,28 @@ fail_gunlock_j: | |||
584 | 590 | ||
585 | fail: | 591 | fail: |
586 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); | 592 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); |
587 | return error; | ||
588 | } | 593 | } |
589 | 594 | ||
590 | static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp) | 595 | struct slow_work_ops gfs2_recover_ops = { |
591 | { | 596 | .get_ref = gfs2_recover_get_ref, |
592 | struct gfs2_jdesc *jd; | 597 | .put_ref = gfs2_recover_put_ref, |
593 | int found = 0; | 598 | .execute = gfs2_recover_work, |
594 | 599 | }; | |
595 | spin_lock(&sdp->sd_jindex_spin); | ||
596 | 600 | ||
597 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | ||
598 | if (jd->jd_dirty) { | ||
599 | jd->jd_dirty = 0; | ||
600 | found = 1; | ||
601 | break; | ||
602 | } | ||
603 | } | ||
604 | spin_unlock(&sdp->sd_jindex_spin); | ||
605 | |||
606 | if (!found) | ||
607 | jd = NULL; | ||
608 | 601 | ||
609 | return jd; | 602 | static int gfs2_recovery_wait(void *word) |
610 | } | ||
611 | |||
612 | /** | ||
613 | * gfs2_check_journals - Recover any dirty journals | ||
614 | * @sdp: the filesystem | ||
615 | * | ||
616 | */ | ||
617 | |||
618 | static void gfs2_check_journals(struct gfs2_sbd *sdp) | ||
619 | { | 603 | { |
620 | struct gfs2_jdesc *jd; | 604 | schedule(); |
621 | 605 | return 0; | |
622 | for (;;) { | ||
623 | jd = gfs2_jdesc_find_dirty(sdp); | ||
624 | if (!jd) | ||
625 | break; | ||
626 | |||
627 | if (jd != sdp->sd_jdesc) | ||
628 | gfs2_recover_journal(jd); | ||
629 | } | ||
630 | } | 606 | } |
631 | 607 | ||
632 | /** | 608 | int gfs2_recover_journal(struct gfs2_jdesc *jd) |
633 | * gfs2_recoverd - Recover dead machine's journals | ||
634 | * @sdp: Pointer to GFS2 superblock | ||
635 | * | ||
636 | */ | ||
637 | |||
638 | int gfs2_recoverd(void *data) | ||
639 | { | 609 | { |
640 | struct gfs2_sbd *sdp = data; | 610 | int rv; |
641 | unsigned long t; | 611 | rv = slow_work_enqueue(&jd->jd_work); |
642 | 612 | if (rv) | |
643 | while (!kthread_should_stop()) { | 613 | return rv; |
644 | gfs2_check_journals(sdp); | 614 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); |
645 | t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; | ||
646 | if (freezing(current)) | ||
647 | refrigerator(); | ||
648 | schedule_timeout_interruptible(t); | ||
649 | } | ||
650 | |||
651 | return 0; | 615 | return 0; |
652 | } | 616 | } |
653 | 617 | ||