1 files changed, 762 insertions, 0 deletions
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
new file mode 100644
index 000000000000..b036ee7dcb32
--- /dev/null
+++ b/fs/dlm/recover.c
@@ -0,0 +1,762 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+#include "dlm_internal.h"
+#include "lockspace.h"
+#include "dir.h"
+#include "config.h"
+#include "ast.h"
+#include "memory.h"
+#include "rcom.h"
+#include "lock.h"
+#include "lowcomms.h"
+#include "member.h"
+#include "recover.h"
+/*
+ * Recovery waiting routines: these functions wait for a particular reply from
+ * a remote node, or for the remote node to report a certain status.  They need
+ * to abort if the lockspace is stopped indicating a node has failed (perhaps
+ * the one being waited for).
+ */
+/*
+ * Wait until given function returns non-zero or lockspace is stopped
+ * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes).  When another
+ * function thinks it could have completed the waited-on task, they should wake
+ * up ls_wait_general to get an immediate response rather than waiting for the
+ * timer to detect the result.  A timer wakes us up periodically while waiting
+ * to see if we should abort due to a node failure.  This should only be called
+ * by the dlm_recoverd thread.
+ */
+static void dlm_wait_timer_fn(unsigned long data)
+{
+        struct dlm_ls *ls = (struct dlm_ls *) data;
+        mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ));
+        wake_up(&ls->ls_wait_general);
+}
+int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
+{
+        int error = 0;
+        init_timer(&ls->ls_timer);
+        ls->ls_timer.function = dlm_wait_timer_fn;
+        ls->ls_timer.data = (long) ls;
+        ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ);
+        add_timer(&ls->ls_timer);
+        wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
+        del_timer_sync(&ls->ls_timer);
+        if (dlm_recovery_stopped(ls)) {
+                log_debug(ls, "dlm_wait_function aborted");
+                error = -EINTR;
+        }
+        return error;
+}
+/*
+ * An efficient way for all nodes to wait for all others to have a certain
+ * status.  The node with the lowest nodeid polls all the others for their
+ * status (wait_status_all) and all the others poll the node with the low id
+ * for its accumulated result (wait_status_low).  When all nodes have set
+ * status flag X, then status flag X_ALL will be set on the low nodeid.
+ */
+uint32_t dlm_recover_status(struct dlm_ls *ls)
+{
+        uint32_t status;
+        spin_lock(&ls->ls_recover_lock);
+        status = ls->ls_recover_status;
+        spin_unlock(&ls->ls_recover_lock);
+        return status;
+}
+void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
+{
+        spin_lock(&ls->ls_recover_lock);
+        ls->ls_recover_status |= status;
+        spin_unlock(&ls->ls_recover_lock);
+}
+static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status)
+{
+        struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf;
+        struct dlm_member *memb;
+        int error = 0, delay;
+        list_for_each_entry(memb, &ls->ls_nodes, list) {
+                delay = 0;
+                for (;;) {
+                        if (dlm_recovery_stopped(ls)) {
+                                error = -EINTR;
+                                goto out;
+                        }
+                        error = dlm_rcom_status(ls, memb->nodeid);
+                        if (error)
+                                goto out;
+                        if (rc->rc_result & wait_status)
+                                break;
+                        if (delay < 1000)
+                                delay += 20;
+                        msleep(delay);
+                }
+        }
+ out:
+        return error;
+}
+static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status)
+{
+        struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf;
+        int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
+        for (;;) {
+                if (dlm_recovery_stopped(ls)) {
+                        error = -EINTR;
+                        goto out;
+                }
+                error = dlm_rcom_status(ls, nodeid);
+                if (error)
+                        break;
+                if (rc->rc_result & wait_status)
+                        break;
+                if (delay < 1000)
+                        delay += 20;
+                msleep(delay);
+        }
+ out:
+        return error;
+}
+static int wait_status(struct dlm_ls *ls, uint32_t status)
+{
+        uint32_t status_all = status << 1;
+        int error;
+        if (ls->ls_low_nodeid == dlm_our_nodeid()) {
+                error = wait_status_all(ls, status);
+                if (!error)
+                        dlm_set_recover_status(ls, status_all);
+        } else
+                error = wait_status_low(ls, status_all);
+        return error;
+}
+int dlm_recover_members_wait(struct dlm_ls *ls)
+{
+        return wait_status(ls, DLM_RS_NODES);
+}
+int dlm_recover_directory_wait(struct dlm_ls *ls)
+{
+        return wait_status(ls, DLM_RS_DIR);
+}
+int dlm_recover_locks_wait(struct dlm_ls *ls)
+{
+        return wait_status(ls, DLM_RS_LOCKS);
+}
+int dlm_recover_done_wait(struct dlm_ls *ls)
+{
+        return wait_status(ls, DLM_RS_DONE);
+}
+/*
+ * The recover_list contains all the rsb's for which we've requested the new
+ * master nodeid.  As replies are returned from the resource directories the
+ * rsb's are removed from the list.  When the list is empty we're done.
+ *
+ * The recover_list is later similarly used for all rsb's for which we've sent
+ * new lkb's and need to receive new corresponding lkid's.
+ *
+ * We use the address of the rsb struct as a simple local identifier for the
+ * rsb so we can match an rcom reply with the rsb it was sent for.
+ */
+static int recover_list_empty(struct dlm_ls *ls)
+{
+        int empty;
+        spin_lock(&ls->ls_recover_list_lock);
+        empty = list_empty(&ls->ls_recover_list);
+        spin_unlock(&ls->ls_recover_list_lock);
+        return empty;
+}
+static void recover_list_add(struct dlm_rsb *r)
+{
+        struct dlm_ls *ls = r->res_ls;
+        spin_lock(&ls->ls_recover_list_lock);
+        if (list_empty(&r->res_recover_list)) {
+                list_add_tail(&r->res_recover_list, &ls->ls_recover_list);
+                ls->ls_recover_list_count++;
+                dlm_hold_rsb(r);
+        }
+        spin_unlock(&ls->ls_recover_list_lock);
+}
+static void recover_list_del(struct dlm_rsb *r)
+{
+        struct dlm_ls *ls = r->res_ls;
+        spin_lock(&ls->ls_recover_list_lock);
+        list_del_init(&r->res_recover_list);
+        ls->ls_recover_list_count--;
+        spin_unlock(&ls->ls_recover_list_lock);
+        dlm_put_rsb(r);
+}
+static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id)
+{
+        struct dlm_rsb *r = NULL;
+        spin_lock(&ls->ls_recover_list_lock);
+        list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) {
+                if (id == (unsigned long) r)
+                        goto out;
+        }
+        r = NULL;
+ out:
+        spin_unlock(&ls->ls_recover_list_lock);
+        return r;
+}
+static void recover_list_clear(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r, *s;
+        spin_lock(&ls->ls_recover_list_lock);
+        list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
+                list_del_init(&r->res_recover_list);
+                dlm_put_rsb(r);
+                ls->ls_recover_list_count--;
+        }
+        if (ls->ls_recover_list_count != 0) {
+                log_error(ls, "warning: recover_list_count %d",
+                          ls->ls_recover_list_count);
+                ls->ls_recover_list_count = 0;
+        }
+        spin_unlock(&ls->ls_recover_list_lock);
+}
+/* Master recovery: find new master node for rsb's that were
+   mastered on nodes that have been removed.
+   dlm_recover_masters
+   recover_master
+   dlm_send_rcom_lookup            ->  receive_rcom_lookup
+                                       dlm_dir_lookup
+   receive_rcom_lookup_reply       <-
+   dlm_recover_master_reply
+   set_new_master
+   set_master_lkbs
+   set_lock_master
+*/
+/*
+ * Set the lock master for all LKBs in a lock queue
+ * If we are the new master of the rsb, we may have received new
+ * MSTCPY locks from other nodes already which we need to ignore
+ * when setting the new nodeid.
+ */
+static void set_lock_master(struct list_head *queue, int nodeid)
+{
+        struct dlm_lkb *lkb;
+        list_for_each_entry(lkb, queue, lkb_statequeue)
+                if (!(lkb->lkb_flags & DLM_IFL_MSTCPY))
+                        lkb->lkb_nodeid = nodeid;
+}
+static void set_master_lkbs(struct dlm_rsb *r)
+{
+        set_lock_master(&r->res_grantqueue, r->res_nodeid);
+        set_lock_master(&r->res_convertqueue, r->res_nodeid);
+        set_lock_master(&r->res_waitqueue, r->res_nodeid);
+}
+/*
+ * Propogate the new master nodeid to locks
+ * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
+ * The NEW_MASTER2 flag tells recover_lvb() which rsb's to consider.
+ */
+static void set_new_master(struct dlm_rsb *r, int nodeid)
+{
+        lock_rsb(r);
+        r->res_nodeid = nodeid;
+        set_master_lkbs(r);
+        rsb_set_flag(r, RSB_NEW_MASTER);
+        rsb_set_flag(r, RSB_NEW_MASTER2);
+        unlock_rsb(r);
+}
+/*
+ * We do async lookups on rsb's that need new masters.  The rsb's
+ * waiting for a lookup reply are kept on the recover_list.
+ */
+static int recover_master(struct dlm_rsb *r)
+{
+        struct dlm_ls *ls = r->res_ls;
+        int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
+        dir_nodeid = dlm_dir_nodeid(r);
+        if (dir_nodeid == our_nodeid) {
+                error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
+                                       r->res_length, &ret_nodeid);
+                if (error)
+                        log_error(ls, "recover dir lookup error %d", error);
+                if (ret_nodeid == our_nodeid)
+                        ret_nodeid = 0;
+                set_new_master(r, ret_nodeid);
+        } else {
+                recover_list_add(r);
+                error = dlm_send_rcom_lookup(r, dir_nodeid);
+        }
+        return error;
+}
+/*
+ * When not using a directory, most resource names will hash to a new static
+ * master nodeid and the resource will need to be remastered.
+ */
+static int recover_master_static(struct dlm_rsb *r)
+{
+        int master = dlm_dir_nodeid(r);
+        if (master == dlm_our_nodeid())
+                master = 0;
+        if (r->res_nodeid != master) {
+                if (is_master(r))
+                        dlm_purge_mstcpy_locks(r);
+                set_new_master(r, master);
+                return 1;
+        }
+        return 0;
+}
+/*
+ * Go through local root resources and for each rsb which has a master which
+ * has departed, get the new master nodeid from the directory.  The dir will
+ * assign mastery to the first node to look up the new master.  That means
+ * we'll discover in this lookup if we're the new master of any rsb's.
+ *
+ * We fire off all the dir lookup requests individually and asynchronously to
+ * the correct dir node.
+ */
+int dlm_recover_masters(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r;
+        int error = 0, count = 0;
+        log_debug(ls, "dlm_recover_masters");
+        down_read(&ls->ls_root_sem);
+        list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
+                if (dlm_recovery_stopped(ls)) {
+                        up_read(&ls->ls_root_sem);
+                        error = -EINTR;
+                        goto out;
+                }
+                if (dlm_no_directory(ls))
+                        count += recover_master_static(r);
+                else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) {
+                        recover_master(r);
+                        count++;
+                }
+                schedule();
+        }
+        up_read(&ls->ls_root_sem);
+        log_debug(ls, "dlm_recover_masters %d resources", count);
+        error = dlm_wait_function(ls, &recover_list_empty);
+ out:
+        if (error)
+                recover_list_clear(ls);
+        return error;
+}
+int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
+{
+        struct dlm_rsb *r;
+        int nodeid;
+        r = recover_list_find(ls, rc->rc_id);
+        if (!r) {
+                log_error(ls, "dlm_recover_master_reply no id %llx",
+                          rc->rc_id);
+                goto out;
+        }
+        nodeid = rc->rc_result;
+        if (nodeid == dlm_our_nodeid())
+                nodeid = 0;
+        set_new_master(r, nodeid);
+        recover_list_del(r);
+        if (recover_list_empty(ls))
+                wake_up(&ls->ls_wait_general);
+ out:
+        return 0;
+}
+/* Lock recovery: rebuild the process-copy locks we hold on a
+   remastered rsb on the new rsb master.
+   dlm_recover_locks
+   recover_locks
+   recover_locks_queue
+   dlm_send_rcom_lock              ->  receive_rcom_lock
+                                       dlm_recover_master_copy
+   receive_rcom_lock_reply         <-
+   dlm_recover_process_copy
+*/
+/*
+ * keep a count of the number of lkb's we send to the new master; when we get
+ * an equal number of replies then recovery for the rsb is done
+ */
+static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head)
+{
+        struct dlm_lkb *lkb;
+        int error = 0;
+        list_for_each_entry(lkb, head, lkb_statequeue) {
+                error = dlm_send_rcom_lock(r, lkb);
+                if (error)
+                        break;
+                r->res_recover_locks_count++;
+        }
+        return error;
+}
+static int all_queues_empty(struct dlm_rsb *r)
+{
+        if (!list_empty(&r->res_grantqueue) ||
+            !list_empty(&r->res_convertqueue) ||
+            !list_empty(&r->res_waitqueue))
+                return 0;
+        return 1;
+}
+static int recover_locks(struct dlm_rsb *r)
+{
+        int error = 0;
+        lock_rsb(r);
+        if (all_queues_empty(r))
+                goto out;
+        DLM_ASSERT(!r->res_recover_locks_count, dlm_print_rsb(r););
+        error = recover_locks_queue(r, &r->res_grantqueue);
+        if (error)
+                goto out;
+        error = recover_locks_queue(r, &r->res_convertqueue);
+        if (error)
+                goto out;
+        error = recover_locks_queue(r, &r->res_waitqueue);
+        if (error)
+                goto out;
+        if (r->res_recover_locks_count)
+                recover_list_add(r);
+        else
+                rsb_clear_flag(r, RSB_NEW_MASTER);
+ out:
+        unlock_rsb(r);
+        return error;
+}
+int dlm_recover_locks(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r;
+        int error, count = 0;
+        log_debug(ls, "dlm_recover_locks");
+        down_read(&ls->ls_root_sem);
+        list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
+                if (is_master(r)) {
+                        rsb_clear_flag(r, RSB_NEW_MASTER);
+                        continue;
+                }
+                if (!rsb_flag(r, RSB_NEW_MASTER))
+                        continue;
+                if (dlm_recovery_stopped(ls)) {
+                        error = -EINTR;
+                        up_read(&ls->ls_root_sem);
+                        goto out;
+                }
+                error = recover_locks(r);
+                if (error) {
+                        up_read(&ls->ls_root_sem);
+                        goto out;
+                }
+                count += r->res_recover_locks_count;
+        }
+        up_read(&ls->ls_root_sem);
+        log_debug(ls, "dlm_recover_locks %d locks", count);
+        error = dlm_wait_function(ls, &recover_list_empty);
+ out:
+        if (error)
+                recover_list_clear(ls);
+        else
+                dlm_set_recover_status(ls, DLM_RS_LOCKS);
+        return error;
+}
+void dlm_recovered_lock(struct dlm_rsb *r)
+{
+        DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_print_rsb(r););
+        r->res_recover_locks_count--;
+        if (!r->res_recover_locks_count) {
+                rsb_clear_flag(r, RSB_NEW_MASTER);
+                recover_list_del(r);
+        }
+        if (recover_list_empty(r->res_ls))
+                wake_up(&r->res_ls->ls_wait_general);
+}
+/*
+ * The lvb needs to be recovered on all master rsb's.  This includes setting
+ * the VALNOTVALID flag if necessary, and determining the correct lvb contents
+ * based on the lvb's of the locks held on the rsb.
+ *
+ * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb.  If it
+ * was already set prior to recovery, it's not cleared, regardless of locks.
+ *
+ * The LVB contents are only considered for changing when this is a new master
+ * of the rsb (NEW_MASTER2).  Then, the rsb's lvb is taken from any lkb with
+ * mode > CR.  If no lkb's exist with mode above CR, the lvb contents are taken
+ * from the lkb with the largest lvb sequence number.
+ */
+static void recover_lvb(struct dlm_rsb *r)
+{
+        struct dlm_lkb *lkb, *high_lkb = NULL;
+        uint32_t high_seq = 0;
+        int lock_lvb_exists = 0;
+        int big_lock_exists = 0;
+        int lvblen = r->res_ls->ls_lvblen;
+        list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
+                if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
+                        continue;
+                lock_lvb_exists = 1;
+                if (lkb->lkb_grmode > DLM_LOCK_CR) {
+                        big_lock_exists = 1;
+                        goto setflag;
+                }
+                if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
+                        high_lkb = lkb;
+                        high_seq = lkb->lkb_lvbseq;
+                }
+        }
+        list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
+                if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
+                        continue;
+                lock_lvb_exists = 1;
+                if (lkb->lkb_grmode > DLM_LOCK_CR) {
+                        big_lock_exists = 1;
+                        goto setflag;
+                }
+                if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
+                        high_lkb = lkb;
+                        high_seq = lkb->lkb_lvbseq;
+                }
+        }
+ setflag:
+        if (!lock_lvb_exists)
+                goto out;
+        if (!big_lock_exists)
+                rsb_set_flag(r, RSB_VALNOTVALID);
+        /* don't mess with the lvb unless we're the new master */
+        if (!rsb_flag(r, RSB_NEW_MASTER2))
+                goto out;
+        if (!r->res_lvbptr) {
+                r->res_lvbptr = allocate_lvb(r->res_ls);
+                if (!r->res_lvbptr)
+                        goto out;
+        }
+        if (big_lock_exists) {
+                r->res_lvbseq = lkb->lkb_lvbseq;
+                memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
+        } else if (high_lkb) {
+                r->res_lvbseq = high_lkb->lkb_lvbseq;
+                memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
+        } else {
+                r->res_lvbseq = 0;
+                memset(r->res_lvbptr, 0, lvblen);
+        }
+ out:
+        return;
+}
+/* All master rsb's flagged RECOVER_CONVERT need to be looked at.  The locks
+   converting PR->CW or CW->PR need to have their lkb_grmode set. */
+static void recover_conversion(struct dlm_rsb *r)
+{
+        struct dlm_lkb *lkb;
+        int grmode = -1;
+        list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
+                if (lkb->lkb_grmode == DLM_LOCK_PR ||
+                    lkb->lkb_grmode == DLM_LOCK_CW) {
+                        grmode = lkb->lkb_grmode;
+                        break;
+                }
+        }
+        list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
+                if (lkb->lkb_grmode != DLM_LOCK_IV)
+                        continue;
+                if (grmode == -1)
+                        lkb->lkb_grmode = lkb->lkb_rqmode;
+                else
+                        lkb->lkb_grmode = grmode;
+        }
+}
+void dlm_recover_rsbs(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r;
+        int count = 0;
+        log_debug(ls, "dlm_recover_rsbs");
+        down_read(&ls->ls_root_sem);
+        list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
+                lock_rsb(r);
+                if (is_master(r)) {
+                        if (rsb_flag(r, RSB_RECOVER_CONVERT))
+                                recover_conversion(r);
+                        recover_lvb(r);
+                        count++;
+                }
+                rsb_clear_flag(r, RSB_RECOVER_CONVERT);
+                unlock_rsb(r);
+        }
+        up_read(&ls->ls_root_sem);
+        log_debug(ls, "dlm_recover_rsbs %d rsbs", count);
+}
+/* Create a single list of all root rsb's to be used during recovery */
+int dlm_create_root_list(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r;
+        int i, error = 0;
+        down_write(&ls->ls_root_sem);
+        if (!list_empty(&ls->ls_root_list)) {
+                log_error(ls, "root list not empty");
+                error = -EINVAL;
+                goto out;
+        }
+        for (i = 0; i < ls->ls_rsbtbl_size; i++) {
+                read_lock(&ls->ls_rsbtbl[i].lock);
+                list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) {
+                        list_add(&r->res_root_list, &ls->ls_root_list);
+                        dlm_hold_rsb(r);
+                }
+                read_unlock(&ls->ls_rsbtbl[i].lock);
+        }
+ out:
+        up_write(&ls->ls_root_sem);
+        return error;
+}
+void dlm_release_root_list(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r, *safe;
+        down_write(&ls->ls_root_sem);
+        list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) {
+                list_del_init(&r->res_root_list);
+                dlm_put_rsb(r);
+        }
+        up_write(&ls->ls_root_sem);
+}
+void dlm_clear_toss_list(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r, *safe;
+        int i;
+        for (i = 0; i < ls->ls_rsbtbl_size; i++) {
+                write_lock(&ls->ls_rsbtbl[i].lock);
+                list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss,
+                                         res_hashchain) {
+                        list_del(&r->res_hashchain);
+                        free_rsb(r);
+                }
+                write_unlock(&ls->ls_rsbtbl[i].lock);
+        }
+}

diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c new file mode 100644 index 000000000000..b036ee7dcb32 --- /dev/null +++ b/fs/dlm/recover.c
@@ -0,0 +1,762 @@
	1	/******************************************************************************
	2	*******************************************************************************
	3	**
	4	** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
	5	** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
	6	**
	7	** This copyrighted material is made available to anyone wishing to use,
	8	** modify, copy, or redistribute it subject to the terms and conditions
	9	** of the GNU General Public License v.2.
	10	**
	11	*******************************************************************************
	12	******************************************************************************/
	13
	14	#include "dlm_internal.h"
	15	#include "lockspace.h"
	16	#include "dir.h"
	17	#include "config.h"
	18	#include "ast.h"
	19	#include "memory.h"
	20	#include "rcom.h"
	21	#include "lock.h"
	22	#include "lowcomms.h"
	23	#include "member.h"
	24	#include "recover.h"
	25
	26
	27	/*
	28	* Recovery waiting routines: these functions wait for a particular reply from
	29	* a remote node, or for the remote node to report a certain status. They need
	30	* to abort if the lockspace is stopped indicating a node has failed (perhaps
	31	* the one being waited for).
	32	*/
	33
	34	/*
	35	* Wait until given function returns non-zero or lockspace is stopped
	36	* (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another
	37	* function thinks it could have completed the waited-on task, they should wake
	38	* up ls_wait_general to get an immediate response rather than waiting for the
	39	* timer to detect the result. A timer wakes us up periodically while waiting
	40	* to see if we should abort due to a node failure. This should only be called
	41	* by the dlm_recoverd thread.
	42	*/
	43
	44	static void dlm_wait_timer_fn(unsigned long data)
	45	{
	46	struct dlm_ls ls = (struct dlm_ls ) data;
	47	mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ));
	48	wake_up(&ls->ls_wait_general);
	49	}
	50
	51	int dlm_wait_function(struct dlm_ls ls, int (testfn) (struct dlm_ls *ls))
	52	{
	53	int error = 0;
	54
	55	init_timer(&ls->ls_timer);
	56	ls->ls_timer.function = dlm_wait_timer_fn;
	57	ls->ls_timer.data = (long) ls;
	58	ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ);
	59	add_timer(&ls->ls_timer);
	60
	61	wait_event(ls->ls_wait_general, testfn(ls) \|\| dlm_recovery_stopped(ls));
	62	del_timer_sync(&ls->ls_timer);
	63
	64	if (dlm_recovery_stopped(ls)) {
	65	log_debug(ls, "dlm_wait_function aborted");
	66	error = -EINTR;
	67	}
	68	return error;
	69	}
	70
	71	/*
	72	* An efficient way for all nodes to wait for all others to have a certain
	73	* status. The node with the lowest nodeid polls all the others for their
	74	* status (wait_status_all) and all the others poll the node with the low id
	75	* for its accumulated result (wait_status_low). When all nodes have set
	76	* status flag X, then status flag X_ALL will be set on the low nodeid.
	77	*/
	78
	79	uint32_t dlm_recover_status(struct dlm_ls *ls)
	80	{
	81	uint32_t status;
	82	spin_lock(&ls->ls_recover_lock);
	83	status = ls->ls_recover_status;
	84	spin_unlock(&ls->ls_recover_lock);
	85	return status;
	86	}
	87
	88	void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
	89	{
	90	spin_lock(&ls->ls_recover_lock);
	91	ls->ls_recover_status \|= status;
	92	spin_unlock(&ls->ls_recover_lock);
	93	}
	94
	95	static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status)
	96	{
	97	struct dlm_rcom rc = (struct dlm_rcom ) ls->ls_recover_buf;
	98	struct dlm_member *memb;
	99	int error = 0, delay;
	100
	101	list_for_each_entry(memb, &ls->ls_nodes, list) {
	102	delay = 0;
	103	for (;;) {
	104	if (dlm_recovery_stopped(ls)) {
	105	error = -EINTR;
	106	goto out;
	107	}
	108
	109	error = dlm_rcom_status(ls, memb->nodeid);
	110	if (error)
	111	goto out;
	112
	113	if (rc->rc_result & wait_status)
	114	break;
	115	if (delay < 1000)
	116	delay += 20;
	117	msleep(delay);
	118	}
	119	}
	120	out:
	121	return error;
	122	}
	123
	124	static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status)
	125	{
	126	struct dlm_rcom rc = (struct dlm_rcom ) ls->ls_recover_buf;
	127	int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
	128
	129	for (;;) {
	130	if (dlm_recovery_stopped(ls)) {
	131	error = -EINTR;
	132	goto out;
	133	}
	134
	135	error = dlm_rcom_status(ls, nodeid);
	136	if (error)
	137	break;
	138
	139	if (rc->rc_result & wait_status)
	140	break;
	141	if (delay < 1000)
	142	delay += 20;
	143	msleep(delay);
	144	}
	145	out:
	146	return error;
	147	}
	148
	149	static int wait_status(struct dlm_ls *ls, uint32_t status)
	150	{
	151	uint32_t status_all = status << 1;
	152	int error;
	153
	154	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
	155	error = wait_status_all(ls, status);
	156	if (!error)
	157	dlm_set_recover_status(ls, status_all);
	158	} else
	159	error = wait_status_low(ls, status_all);
	160
	161	return error;
	162	}
	163
	164	int dlm_recover_members_wait(struct dlm_ls *ls)
	165	{
	166	return wait_status(ls, DLM_RS_NODES);
	167	}
	168
	169	int dlm_recover_directory_wait(struct dlm_ls *ls)
	170	{
	171	return wait_status(ls, DLM_RS_DIR);
	172	}
	173
	174	int dlm_recover_locks_wait(struct dlm_ls *ls)
	175	{
	176	return wait_status(ls, DLM_RS_LOCKS);
	177	}
	178
	179	int dlm_recover_done_wait(struct dlm_ls *ls)
	180	{
	181	return wait_status(ls, DLM_RS_DONE);
	182	}
	183
	184	/*
	185	* The recover_list contains all the rsb's for which we've requested the new
	186	* master nodeid. As replies are returned from the resource directories the
	187	* rsb's are removed from the list. When the list is empty we're done.
	188	*
	189	* The recover_list is later similarly used for all rsb's for which we've sent
	190	* new lkb's and need to receive new corresponding lkid's.
	191	*
	192	* We use the address of the rsb struct as a simple local identifier for the
	193	* rsb so we can match an rcom reply with the rsb it was sent for.
	194	*/
	195
	196	static int recover_list_empty(struct dlm_ls *ls)
	197	{
	198	int empty;
	199
	200	spin_lock(&ls->ls_recover_list_lock);
	201	empty = list_empty(&ls->ls_recover_list);
	202	spin_unlock(&ls->ls_recover_list_lock);
	203
	204	return empty;
	205	}
	206
	207	static void recover_list_add(struct dlm_rsb *r)
	208	{
	209	struct dlm_ls *ls = r->res_ls;
	210
	211	spin_lock(&ls->ls_recover_list_lock);
	212	if (list_empty(&r->res_recover_list)) {
	213	list_add_tail(&r->res_recover_list, &ls->ls_recover_list);
	214	ls->ls_recover_list_count++;
	215	dlm_hold_rsb(r);
	216	}
	217	spin_unlock(&ls->ls_recover_list_lock);
	218	}
	219
	220	static void recover_list_del(struct dlm_rsb *r)
	221	{
	222	struct dlm_ls *ls = r->res_ls;
	223
	224	spin_lock(&ls->ls_recover_list_lock);
	225	list_del_init(&r->res_recover_list);
	226	ls->ls_recover_list_count--;
	227	spin_unlock(&ls->ls_recover_list_lock);
	228
	229	dlm_put_rsb(r);
	230	}
	231
	232	static struct dlm_rsb recover_list_find(struct dlm_ls ls, uint64_t id)
	233	{
	234	struct dlm_rsb *r = NULL;
	235
	236	spin_lock(&ls->ls_recover_list_lock);
	237
	238	list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) {
	239	if (id == (unsigned long) r)
	240	goto out;
	241	}
	242	r = NULL;
	243	out:
	244	spin_unlock(&ls->ls_recover_list_lock);
	245	return r;
	246	}
	247
	248	static void recover_list_clear(struct dlm_ls *ls)
	249	{
	250	struct dlm_rsb r, s;
	251
	252	spin_lock(&ls->ls_recover_list_lock);
	253	list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
	254	list_del_init(&r->res_recover_list);
	255	dlm_put_rsb(r);
	256	ls->ls_recover_list_count--;
	257	}
	258
	259	if (ls->ls_recover_list_count != 0) {
	260	log_error(ls, "warning: recover_list_count %d",
	261	ls->ls_recover_list_count);
	262	ls->ls_recover_list_count = 0;
	263	}
	264	spin_unlock(&ls->ls_recover_list_lock);
	265	}
	266
	267
	268	/* Master recovery: find new master node for rsb's that were
	269	mastered on nodes that have been removed.
	270
	271	dlm_recover_masters
	272	recover_master
	273	dlm_send_rcom_lookup -> receive_rcom_lookup
	274	dlm_dir_lookup
	275	receive_rcom_lookup_reply <-
	276	dlm_recover_master_reply
	277	set_new_master
	278	set_master_lkbs
	279	set_lock_master
	280	*/
	281
	282	/*
	283	* Set the lock master for all LKBs in a lock queue
	284	* If we are the new master of the rsb, we may have received new
	285	* MSTCPY locks from other nodes already which we need to ignore
	286	* when setting the new nodeid.
	287	*/
	288
	289	static void set_lock_master(struct list_head *queue, int nodeid)
	290	{
	291	struct dlm_lkb *lkb;
	292
	293	list_for_each_entry(lkb, queue, lkb_statequeue)
	294	if (!(lkb->lkb_flags & DLM_IFL_MSTCPY))
	295	lkb->lkb_nodeid = nodeid;
	296	}
	297
	298	static void set_master_lkbs(struct dlm_rsb *r)
	299	{
	300	set_lock_master(&r->res_grantqueue, r->res_nodeid);
	301	set_lock_master(&r->res_convertqueue, r->res_nodeid);
	302	set_lock_master(&r->res_waitqueue, r->res_nodeid);
	303	}
	304
	305	/*
	306	* Propogate the new master nodeid to locks
	307	* The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
	308	* The NEW_MASTER2 flag tells recover_lvb() which rsb's to consider.
	309	*/
	310
	311	static void set_new_master(struct dlm_rsb *r, int nodeid)
	312	{
	313	lock_rsb(r);
	314	r->res_nodeid = nodeid;
	315	set_master_lkbs(r);
	316	rsb_set_flag(r, RSB_NEW_MASTER);
	317	rsb_set_flag(r, RSB_NEW_MASTER2);
	318	unlock_rsb(r);
	319	}
	320
	321	/*
	322	* We do async lookups on rsb's that need new masters. The rsb's
	323	* waiting for a lookup reply are kept on the recover_list.
	324	*/
	325
	326	static int recover_master(struct dlm_rsb *r)
	327	{
	328	struct dlm_ls *ls = r->res_ls;
	329	int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
	330
	331	dir_nodeid = dlm_dir_nodeid(r);
	332
	333	if (dir_nodeid == our_nodeid) {
	334	error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
	335	r->res_length, &ret_nodeid);
	336	if (error)
	337	log_error(ls, "recover dir lookup error %d", error);
	338
	339	if (ret_nodeid == our_nodeid)
	340	ret_nodeid = 0;
	341	set_new_master(r, ret_nodeid);
	342	} else {
	343	recover_list_add(r);
	344	error = dlm_send_rcom_lookup(r, dir_nodeid);
	345	}
	346
	347	return error;
	348	}
	349
	350	/*
	351	* When not using a directory, most resource names will hash to a new static
	352	* master nodeid and the resource will need to be remastered.
	353	*/
	354
	355	static int recover_master_static(struct dlm_rsb *r)
	356	{
	357	int master = dlm_dir_nodeid(r);
	358
	359	if (master == dlm_our_nodeid())
	360	master = 0;
	361
	362	if (r->res_nodeid != master) {
	363	if (is_master(r))
	364	dlm_purge_mstcpy_locks(r);
	365	set_new_master(r, master);
	366	return 1;
	367	}
	368	return 0;
	369	}
	370
	371	/*
	372	* Go through local root resources and for each rsb which has a master which
	373	* has departed, get the new master nodeid from the directory. The dir will
	374	* assign mastery to the first node to look up the new master. That means
	375	* we'll discover in this lookup if we're the new master of any rsb's.
	376	*
	377	* We fire off all the dir lookup requests individually and asynchronously to
	378	* the correct dir node.
	379	*/
	380
	381	int dlm_recover_masters(struct dlm_ls *ls)
	382	{
	383	struct dlm_rsb *r;
	384	int error = 0, count = 0;
	385
	386	log_debug(ls, "dlm_recover_masters");
	387
	388	down_read(&ls->ls_root_sem);
	389	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
	390	if (dlm_recovery_stopped(ls)) {
	391	up_read(&ls->ls_root_sem);
	392	error = -EINTR;
	393	goto out;
	394	}
	395
	396	if (dlm_no_directory(ls))
	397	count += recover_master_static(r);
	398	else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) {
	399	recover_master(r);
	400	count++;
	401	}
	402
	403	schedule();
	404	}
	405	up_read(&ls->ls_root_sem);
	406
	407	log_debug(ls, "dlm_recover_masters %d resources", count);
	408
	409	error = dlm_wait_function(ls, &recover_list_empty);
	410	out:
	411	if (error)
	412	recover_list_clear(ls);
	413	return error;
	414	}
	415
	416	int dlm_recover_master_reply(struct dlm_ls ls, struct dlm_rcom rc)
	417	{
	418	struct dlm_rsb *r;
	419	int nodeid;
	420
	421	r = recover_list_find(ls, rc->rc_id);
	422	if (!r) {
	423	log_error(ls, "dlm_recover_master_reply no id %llx",
	424	rc->rc_id);
	425	goto out;
	426	}
	427
	428	nodeid = rc->rc_result;
	429	if (nodeid == dlm_our_nodeid())
	430	nodeid = 0;
	431
	432	set_new_master(r, nodeid);
	433	recover_list_del(r);
	434
	435	if (recover_list_empty(ls))
	436	wake_up(&ls->ls_wait_general);
	437	out:
	438	return 0;
	439	}
	440
	441
	442	/* Lock recovery: rebuild the process-copy locks we hold on a
	443	remastered rsb on the new rsb master.
	444
	445	dlm_recover_locks
	446	recover_locks
	447	recover_locks_queue
	448	dlm_send_rcom_lock -> receive_rcom_lock
	449	dlm_recover_master_copy
	450	receive_rcom_lock_reply <-
	451	dlm_recover_process_copy
	452	*/
	453
	454
	455	/*
	456	* keep a count of the number of lkb's we send to the new master; when we get
	457	* an equal number of replies then recovery for the rsb is done
	458	*/
	459
	460	static int recover_locks_queue(struct dlm_rsb r, struct list_head head)
	461	{
	462	struct dlm_lkb *lkb;
	463	int error = 0;
	464
	465	list_for_each_entry(lkb, head, lkb_statequeue) {
	466	error = dlm_send_rcom_lock(r, lkb);
	467	if (error)
	468	break;
	469	r->res_recover_locks_count++;
	470	}
	471
	472	return error;
	473	}
	474
	475	static int all_queues_empty(struct dlm_rsb *r)
	476	{
	477	if (!list_empty(&r->res_grantqueue) \|\|
	478	!list_empty(&r->res_convertqueue) \|\|
	479	!list_empty(&r->res_waitqueue))
	480	return 0;
	481	return 1;
	482	}
	483
	484	static int recover_locks(struct dlm_rsb *r)
	485	{
	486	int error = 0;
	487
	488	lock_rsb(r);
	489	if (all_queues_empty(r))
	490	goto out;
	491
	492	DLM_ASSERT(!r->res_recover_locks_count, dlm_print_rsb(r););
	493
	494	error = recover_locks_queue(r, &r->res_grantqueue);
	495	if (error)
	496	goto out;
	497	error = recover_locks_queue(r, &r->res_convertqueue);
	498	if (error)
	499	goto out;
	500	error = recover_locks_queue(r, &r->res_waitqueue);
	501	if (error)
	502	goto out;
	503
	504	if (r->res_recover_locks_count)
	505	recover_list_add(r);
	506	else
	507	rsb_clear_flag(r, RSB_NEW_MASTER);
	508	out:
	509	unlock_rsb(r);
	510	return error;
	511	}
	512
	513	int dlm_recover_locks(struct dlm_ls *ls)
	514	{
	515	struct dlm_rsb *r;
	516	int error, count = 0;
	517
	518	log_debug(ls, "dlm_recover_locks");
	519
	520	down_read(&ls->ls_root_sem);
	521	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
	522	if (is_master(r)) {
	523	rsb_clear_flag(r, RSB_NEW_MASTER);
	524	continue;
	525	}
	526
	527	if (!rsb_flag(r, RSB_NEW_MASTER))
	528	continue;
	529
	530	if (dlm_recovery_stopped(ls)) {
	531	error = -EINTR;
	532	up_read(&ls->ls_root_sem);
	533	goto out;
	534	}
	535
	536	error = recover_locks(r);
	537	if (error) {
	538	up_read(&ls->ls_root_sem);
	539	goto out;
	540	}
	541
	542	count += r->res_recover_locks_count;
	543	}
	544	up_read(&ls->ls_root_sem);
	545
	546	log_debug(ls, "dlm_recover_locks %d locks", count);
	547
	548	error = dlm_wait_function(ls, &recover_list_empty);
	549	out:
	550	if (error)
	551	recover_list_clear(ls);
	552	else
	553	dlm_set_recover_status(ls, DLM_RS_LOCKS);
	554	return error;
	555	}
	556
	557	void dlm_recovered_lock(struct dlm_rsb *r)
	558	{
	559	DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_print_rsb(r););
	560
	561	r->res_recover_locks_count--;
	562	if (!r->res_recover_locks_count) {
	563	rsb_clear_flag(r, RSB_NEW_MASTER);
	564	recover_list_del(r);
	565	}
	566
	567	if (recover_list_empty(r->res_ls))
	568	wake_up(&r->res_ls->ls_wait_general);
	569	}
	570
	571	/*
	572	* The lvb needs to be recovered on all master rsb's. This includes setting
	573	* the VALNOTVALID flag if necessary, and determining the correct lvb contents
	574	* based on the lvb's of the locks held on the rsb.
	575	*
	576	* RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it
	577	* was already set prior to recovery, it's not cleared, regardless of locks.
	578	*
	579	* The LVB contents are only considered for changing when this is a new master
	580	* of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with
	581	* mode > CR. If no lkb's exist with mode above CR, the lvb contents are taken
	582	* from the lkb with the largest lvb sequence number.
	583	*/
	584
	585	static void recover_lvb(struct dlm_rsb *r)
	586	{
	587	struct dlm_lkb lkb, high_lkb = NULL;
	588	uint32_t high_seq = 0;
	589	int lock_lvb_exists = 0;
	590	int big_lock_exists = 0;
	591	int lvblen = r->res_ls->ls_lvblen;
	592
	593	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
	594	if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
	595	continue;
	596
	597	lock_lvb_exists = 1;
	598
	599	if (lkb->lkb_grmode > DLM_LOCK_CR) {
	600	big_lock_exists = 1;
	601	goto setflag;
	602	}
	603
	604	if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
	605	high_lkb = lkb;
	606	high_seq = lkb->lkb_lvbseq;
	607	}
	608	}
	609
	610	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
	611	if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
	612	continue;
	613
	614	lock_lvb_exists = 1;
	615
	616	if (lkb->lkb_grmode > DLM_LOCK_CR) {
	617	big_lock_exists = 1;
	618	goto setflag;
	619	}
	620
	621	if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
	622	high_lkb = lkb;
	623	high_seq = lkb->lkb_lvbseq;
	624	}
	625	}
	626
	627	setflag:
	628	if (!lock_lvb_exists)
	629	goto out;
	630
	631	if (!big_lock_exists)
	632	rsb_set_flag(r, RSB_VALNOTVALID);
	633
	634	/* don't mess with the lvb unless we're the new master */
	635	if (!rsb_flag(r, RSB_NEW_MASTER2))
	636	goto out;
	637
	638	if (!r->res_lvbptr) {
	639	r->res_lvbptr = allocate_lvb(r->res_ls);
	640	if (!r->res_lvbptr)
	641	goto out;
	642	}
	643
	644	if (big_lock_exists) {
	645	r->res_lvbseq = lkb->lkb_lvbseq;
	646	memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
	647	} else if (high_lkb) {
	648	r->res_lvbseq = high_lkb->lkb_lvbseq;
	649	memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
	650	} else {
	651	r->res_lvbseq = 0;
	652	memset(r->res_lvbptr, 0, lvblen);
	653	}
	654	out:
	655	return;
	656	}
	657
	658	/* All master rsb's flagged RECOVER_CONVERT need to be looked at. The locks
	659	converting PR->CW or CW->PR need to have their lkb_grmode set. */
	660
	661	static void recover_conversion(struct dlm_rsb *r)
	662	{
	663	struct dlm_lkb *lkb;
	664	int grmode = -1;
	665
	666	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
	667	if (lkb->lkb_grmode == DLM_LOCK_PR \|\|
	668	lkb->lkb_grmode == DLM_LOCK_CW) {
	669	grmode = lkb->lkb_grmode;
	670	break;
	671	}
	672	}
	673
	674	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
	675	if (lkb->lkb_grmode != DLM_LOCK_IV)
	676	continue;
	677	if (grmode == -1)
	678	lkb->lkb_grmode = lkb->lkb_rqmode;
	679	else
	680	lkb->lkb_grmode = grmode;
	681	}
	682	}
	683
	684	void dlm_recover_rsbs(struct dlm_ls *ls)
	685	{
	686	struct dlm_rsb *r;
	687	int count = 0;
	688
	689	log_debug(ls, "dlm_recover_rsbs");
	690
	691	down_read(&ls->ls_root_sem);
	692	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
	693	lock_rsb(r);
	694	if (is_master(r)) {
	695	if (rsb_flag(r, RSB_RECOVER_CONVERT))
	696	recover_conversion(r);
	697	recover_lvb(r);
	698	count++;
	699	}
	700	rsb_clear_flag(r, RSB_RECOVER_CONVERT);
	701	unlock_rsb(r);
	702	}
	703	up_read(&ls->ls_root_sem);
	704
	705	log_debug(ls, "dlm_recover_rsbs %d rsbs", count);
	706	}
	707
	708	/* Create a single list of all root rsb's to be used during recovery */
	709
	710	int dlm_create_root_list(struct dlm_ls *ls)
	711	{
	712	struct dlm_rsb *r;
	713	int i, error = 0;
	714
	715	down_write(&ls->ls_root_sem);
	716	if (!list_empty(&ls->ls_root_list)) {
	717	log_error(ls, "root list not empty");
	718	error = -EINVAL;
	719	goto out;
	720	}
	721
	722	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
	723	read_lock(&ls->ls_rsbtbl[i].lock);
	724	list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) {
	725	list_add(&r->res_root_list, &ls->ls_root_list);
	726	dlm_hold_rsb(r);
	727	}
	728	read_unlock(&ls->ls_rsbtbl[i].lock);
	729	}
	730	out:
	731	up_write(&ls->ls_root_sem);
	732	return error;
	733	}
	734
	735	void dlm_release_root_list(struct dlm_ls *ls)
	736	{
	737	struct dlm_rsb r, safe;
	738
	739	down_write(&ls->ls_root_sem);
	740	list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) {
	741	list_del_init(&r->res_root_list);
	742	dlm_put_rsb(r);
	743	}
	744	up_write(&ls->ls_root_sem);
	745	}
	746
	747	void dlm_clear_toss_list(struct dlm_ls *ls)
	748	{
	749	struct dlm_rsb r, safe;
	750	int i;
	751
	752	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
	753	write_lock(&ls->ls_rsbtbl[i].lock);
	754	list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss,
	755	res_hashchain) {
	756	list_del(&r->res_hashchain);
	757	free_rsb(r);
	758	}
	759	write_unlock(&ls->ls_rsbtbl[i].lock);
	760	}
	761	}
	762