From 24ef1815e5e13e50196eb1ab8ddc0d783443bdf8 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 29 Jan 2008 17:37:32 -0800 Subject: ocfs2: Separate out dlm lock functions. This is the first in a series of patches to isolate ocfs2 from the underlying cluster stack. Here we wrap the dlm locking functions with ocfs2-specific calls. Because ocfs2 always uses the same dlm lock status callbacks, we can eliminate the callbacks from the filesystem visible functions. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 fs/ocfs2/stackglue.c (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c new file mode 100644 index 000000000000..4f44f23795f0 --- /dev/null +++ b/fs/ocfs2/stackglue.c @@ -0,0 +1,65 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * stackglue.c + * + * Code which implements an OCFS2 specific interface to underlying + * cluster stacks. + * + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include + +#include "dlm/dlmapi.h" + +#include "stackglue.h" + +static struct ocfs2_locking_protocol *lproto; + +enum dlm_status ocfs2_dlm_lock(struct dlm_ctxt *dlm, + int mode, + struct dlm_lockstatus *lksb, + u32 flags, + void *name, + unsigned int namelen, + void *astarg) +{ + BUG_ON(lproto == NULL); + return dlmlock(dlm, mode, lksb, flags, name, namelen, + lproto->lp_lock_ast, astarg, + lproto->lp_blocking_ast); +} + +enum dlm_status ocfs2_dlm_unlock(struct dlm_ctxt *dlm, + struct dlm_lockstatus *lksb, + u32 flags, + void *astarg) +{ + BUG_ON(lproto == NULL); + + return dlmunlock(dlm, lksb, flags, lproto->lp_unlock_ast, astarg); +} + + +void o2cb_get_stack(struct ocfs2_locking_protocol *proto) +{ + BUG_ON(proto == NULL); + + lproto = proto; +} + +void o2cb_put_stack(void) +{ + lproto = NULL; +} -- cgit v1.2.2 From bd3e76105d4478ab89951a52d1a35250d24a9f16 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 12:14:57 -0800 Subject: ocfs2: Use global DLM_ constants in generic code. The ocfs2 generic code should use the values in . stackglue.c will convert them to o2dlm values. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 4f44f23795f0..99538043fc17 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -18,15 +18,65 @@ * General Public License for more details. */ -#include -#include - -#include "dlm/dlmapi.h" - #include "stackglue.h" static struct ocfs2_locking_protocol *lproto; +/* These should be identical */ +#if (DLM_LOCK_IV != LKM_IVMODE) +# error Lock modes do not match +#endif +#if (DLM_LOCK_NL != LKM_NLMODE) +# error Lock modes do not match +#endif +#if (DLM_LOCK_CR != LKM_CRMODE) +# error Lock modes do not match +#endif +#if (DLM_LOCK_CW != LKM_CWMODE) +# error Lock modes do not match +#endif +#if (DLM_LOCK_PR != LKM_PRMODE) +# error Lock modes do not match +#endif +#if (DLM_LOCK_PW != LKM_PWMODE) +# error Lock modes do not match +#endif +#if (DLM_LOCK_EX != LKM_EXMODE) +# error Lock modes do not match +#endif +static inline int mode_to_o2dlm(int mode) +{ + BUG_ON(mode > LKM_MAXMODE); + + return mode; +} + +#define map_flag(_generic, _o2dlm) \ + if (flags & (_generic)) { \ + flags &= ~(_generic); \ + o2dlm_flags |= (_o2dlm); \ + } +static int flags_to_o2dlm(u32 flags) +{ + int o2dlm_flags = 0; + + map_flag(DLM_LKF_NOQUEUE, LKM_NOQUEUE); + map_flag(DLM_LKF_CANCEL, LKM_CANCEL); + map_flag(DLM_LKF_CONVERT, LKM_CONVERT); + map_flag(DLM_LKF_VALBLK, LKM_VALBLK); + map_flag(DLM_LKF_IVVALBLK, LKM_INVVALBLK); + map_flag(DLM_LKF_ORPHAN, LKM_ORPHAN); + map_flag(DLM_LKF_FORCEUNLOCK, LKM_FORCE); + map_flag(DLM_LKF_TIMEOUT, LKM_TIMEOUT); + map_flag(DLM_LKF_LOCAL, LKM_LOCAL); + + /* map_flag() should have cleared every flag passed in */ + BUG_ON(flags != 0); + + return o2dlm_flags; +} +#undef map_flag + enum dlm_status ocfs2_dlm_lock(struct dlm_ctxt *dlm, int mode, struct dlm_lockstatus *lksb, @@ -35,8 +85,12 @@ enum dlm_status ocfs2_dlm_lock(struct dlm_ctxt *dlm, unsigned int namelen, void *astarg) { + int o2dlm_mode = mode_to_o2dlm(mode); + int o2dlm_flags = flags_to_o2dlm(flags); + BUG_ON(lproto == NULL); - return dlmlock(dlm, mode, lksb, flags, name, namelen, + + return dlmlock(dlm, o2dlm_mode, lksb, o2dlm_flags, name, namelen, lproto->lp_lock_ast, astarg, lproto->lp_blocking_ast); } @@ -46,9 +100,12 @@ enum dlm_status ocfs2_dlm_unlock(struct dlm_ctxt *dlm, u32 flags, void *astarg) { + int o2dlm_flags = flags_to_o2dlm(flags); + BUG_ON(lproto == NULL); - return dlmunlock(dlm, lksb, flags, lproto->lp_unlock_ast, astarg); + return dlmunlock(dlm, lksb, o2dlm_flags, + lproto->lp_unlock_ast, astarg); } -- cgit v1.2.2 From 7431cd7e8dd0e46e9b12bd6a1ac1286f4b420371 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 12:15:37 -0800 Subject: ocfs2: Use -errno instead of dlm_status for ocfs2_dlm_lock/unlock() API. Change the ocfs2_dlm_lock/unlock() functions to return -errno values. This is the first step towards elminiating dlm_status in fs/ocfs2/dlmglue.c. The change also passes -errno values to ->unlock_ast(). [ Fix a return code in dlmglue.c and change the error translation table into an array of ints. --Mark ] Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 135 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 99538043fc17..0aec2fcf2175 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -18,6 +18,7 @@ * General Public License for more details. */ +#include "cluster/masklog.h" #include "stackglue.h" static struct ocfs2_locking_protocol *lproto; @@ -77,7 +78,126 @@ static int flags_to_o2dlm(u32 flags) } #undef map_flag -enum dlm_status ocfs2_dlm_lock(struct dlm_ctxt *dlm, +/* + * Map an o2dlm status to standard errno values. + * + * o2dlm only uses a handful of these, and returns even fewer to the + * caller. Still, we try to assign sane values to each error. + * + * The following value pairs have special meanings to dlmglue, thus + * the right hand side needs to stay unique - never duplicate the + * mapping elsewhere in the table! + * + * DLM_NORMAL: 0 + * DLM_NOTQUEUED: -EAGAIN + * DLM_CANCELGRANT: -DLM_ECANCEL + * DLM_CANCEL: -DLM_EUNLOCK + */ +/* Keep in sync with dlmapi.h */ +static int status_map[] = { + [DLM_NORMAL] = 0, /* Success */ + [DLM_GRANTED] = -EINVAL, + [DLM_DENIED] = -EACCES, + [DLM_DENIED_NOLOCKS] = -EACCES, + [DLM_WORKING] = -EBUSY, + [DLM_BLOCKED] = -EINVAL, + [DLM_BLOCKED_ORPHAN] = -EINVAL, + [DLM_DENIED_GRACE_PERIOD] = -EACCES, + [DLM_SYSERR] = -ENOMEM, /* It is what it is */ + [DLM_NOSUPPORT] = -EPROTO, + [DLM_CANCELGRANT] = -DLM_ECANCEL, /* Cancel after grant */ + [DLM_IVLOCKID] = -EINVAL, + [DLM_SYNC] = -EINVAL, + [DLM_BADTYPE] = -EINVAL, + [DLM_BADRESOURCE] = -EINVAL, + [DLM_MAXHANDLES] = -ENOMEM, + [DLM_NOCLINFO] = -EINVAL, + [DLM_NOLOCKMGR] = -EINVAL, + [DLM_NOPURGED] = -EINVAL, + [DLM_BADARGS] = -EINVAL, + [DLM_VOID] = -EINVAL, + [DLM_NOTQUEUED] = -EAGAIN, /* Trylock failed */ + [DLM_IVBUFLEN] = -EINVAL, + [DLM_CVTUNGRANT] = -EPERM, + [DLM_BADPARAM] = -EINVAL, + [DLM_VALNOTVALID] = -EINVAL, + [DLM_REJECTED] = -EPERM, + [DLM_ABORT] = -EINVAL, + [DLM_CANCEL] = -DLM_EUNLOCK, /* Successful cancel */ + [DLM_IVRESHANDLE] = -EINVAL, + [DLM_DEADLOCK] = -EDEADLK, + [DLM_DENIED_NOASTS] = -EINVAL, + [DLM_FORWARD] = -EINVAL, + [DLM_TIMEOUT] = -ETIMEDOUT, + [DLM_IVGROUPID] = -EINVAL, + [DLM_VERS_CONFLICT] = -EOPNOTSUPP, + [DLM_BAD_DEVICE_PATH] = -ENOENT, + [DLM_NO_DEVICE_PERMISSION] = -EPERM, + [DLM_NO_CONTROL_DEVICE] = -ENOENT, + [DLM_RECOVERING] = -ENOTCONN, + [DLM_MIGRATING] = -ERESTART, + [DLM_MAXSTATS] = -EINVAL, +}; +static int dlm_status_to_errno(enum dlm_status status) +{ + BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); + + return status_map[status]; +} + +static void o2dlm_lock_ast_wrapper(void *astarg) +{ + BUG_ON(lproto == NULL); + + lproto->lp_lock_ast(astarg); +} + +static void o2dlm_blocking_ast_wrapper(void *astarg, int level) +{ + BUG_ON(lproto == NULL); + + lproto->lp_blocking_ast(astarg, level); +} + +static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) +{ + int error; + + BUG_ON(lproto == NULL); + + /* + * XXX: CANCEL values are sketchy. + * + * Currently we have preserved the o2dlm paradigm. You can get + * unlock_ast() whether the cancel succeded or not. + * + * First, we're going to pass DLM_EUNLOCK just like fs/dlm does for + * successful unlocks. That is a clean behavior. + * + * In o2dlm, you can get both the lock_ast() for the lock being + * granted and the unlock_ast() for the CANCEL failing. A + * successful cancel sends DLM_NORMAL here. If the + * lock grant happened before the cancel arrived, you get + * DLM_CANCELGRANT. For now, we'll use DLM_ECANCEL to signify + * CANCELGRANT - the CANCEL was supposed to happen but didn't. We + * can then use DLM_EUNLOCK to signify a successful CANCEL - + * effectively, the CANCEL caused the lock to roll back. + * + * In the future, we will likely move the o2dlm to send only one + * ast - either unlock_ast() for a successful CANCEL or lock_ast() + * when the grant succeeds. At that point, we'll send DLM_ECANCEL + * for all cancel results (CANCELGRANT will no longer exist). + */ + error = dlm_status_to_errno(status); + + /* Successful unlock is DLM_EUNLOCK */ + if (!error) + error = -DLM_EUNLOCK; + + lproto->lp_unlock_ast(astarg, error); +} + +int ocfs2_dlm_lock(struct dlm_ctxt *dlm, int mode, struct dlm_lockstatus *lksb, u32 flags, @@ -85,27 +205,35 @@ enum dlm_status ocfs2_dlm_lock(struct dlm_ctxt *dlm, unsigned int namelen, void *astarg) { + enum dlm_status status; int o2dlm_mode = mode_to_o2dlm(mode); int o2dlm_flags = flags_to_o2dlm(flags); + int ret; BUG_ON(lproto == NULL); - return dlmlock(dlm, o2dlm_mode, lksb, o2dlm_flags, name, namelen, - lproto->lp_lock_ast, astarg, - lproto->lp_blocking_ast); + status = dlmlock(dlm, o2dlm_mode, lksb, o2dlm_flags, name, namelen, + o2dlm_lock_ast_wrapper, astarg, + o2dlm_blocking_ast_wrapper); + ret = dlm_status_to_errno(status); + return ret; } -enum dlm_status ocfs2_dlm_unlock(struct dlm_ctxt *dlm, +int ocfs2_dlm_unlock(struct dlm_ctxt *dlm, struct dlm_lockstatus *lksb, u32 flags, void *astarg) { + enum dlm_status status; int o2dlm_flags = flags_to_o2dlm(flags); + int ret; BUG_ON(lproto == NULL); - return dlmunlock(dlm, lksb, o2dlm_flags, - lproto->lp_unlock_ast, astarg); + status = dlmunlock(dlm, lksb, o2dlm_flags, + o2dlm_unlock_ast_wrapper, astarg); + ret = dlm_status_to_errno(status); + return ret; } -- cgit v1.2.2 From 8f2c9c1b16bf6ed0903b29c49d56fa0109a390e4 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 12:16:57 -0800 Subject: ocfs2: Create the lock status block union. Wrap the lock status block (lksb) in a union. Later we will add a union element for the fs/dlm lksb. Create accessors for the status and lvb fields. Other than a debugging function, dlmglue.c does not directly reference the o2dlm locking path anymore. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 0aec2fcf2175..eb88854cb976 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -199,7 +199,7 @@ static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) int ocfs2_dlm_lock(struct dlm_ctxt *dlm, int mode, - struct dlm_lockstatus *lksb, + union ocfs2_dlm_lksb *lksb, u32 flags, void *name, unsigned int namelen, @@ -212,15 +212,16 @@ int ocfs2_dlm_lock(struct dlm_ctxt *dlm, BUG_ON(lproto == NULL); - status = dlmlock(dlm, o2dlm_mode, lksb, o2dlm_flags, name, namelen, - o2dlm_lock_ast_wrapper, astarg, - o2dlm_blocking_ast_wrapper); + status = dlmlock(dlm, o2dlm_mode, &lksb->lksb_o2dlm, o2dlm_flags, + name, namelen, + o2dlm_lock_ast_wrapper, astarg, + o2dlm_blocking_ast_wrapper); ret = dlm_status_to_errno(status); return ret; } int ocfs2_dlm_unlock(struct dlm_ctxt *dlm, - struct dlm_lockstatus *lksb, + union ocfs2_dlm_lksb *lksb, u32 flags, void *astarg) { @@ -230,12 +231,26 @@ int ocfs2_dlm_unlock(struct dlm_ctxt *dlm, BUG_ON(lproto == NULL); - status = dlmunlock(dlm, lksb, o2dlm_flags, - o2dlm_unlock_ast_wrapper, astarg); + status = dlmunlock(dlm, &lksb->lksb_o2dlm, o2dlm_flags, + o2dlm_unlock_ast_wrapper, astarg); ret = dlm_status_to_errno(status); return ret; } +int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) +{ + return dlm_status_to_errno(lksb->lksb_o2dlm.status); +} + +/* + * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we + * don't cast at the glue level. The real answer is that the header + * ordering is nigh impossible. + */ +void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) +{ + return (void *)(lksb->lksb_o2dlm.lvb); +} void o2cb_get_stack(struct ocfs2_locking_protocol *proto) { -- cgit v1.2.2 From 4670c46ded9a18268d1265417ff4ac72145a7917 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 14:39:35 -0800 Subject: ocfs2: Introduce the new ocfs2_cluster_connect/disconnect() API. This step introduces a cluster stack agnostic API for initializing and exiting. fs/ocfs2/dlmglue.c no longer uses o2cb/o2dlm knowledge to connect to the stack. It is all handled in stackglue.c. heartbeat.c no longer needs to know how it gets called. ocfs2_do_node_down() is now a clean recovery trigger. The big gotcha is the ordering of initializations and de-initializations done underneath ocfs2_cluster_connect(). ocfs2_dlm_init() used to do all o2dlm initialization in one block. Thus, the o2dlm functionality of ocfs2_cluster_connect() is very straightforward. ocfs2_dlm_shutdown(), however, did a few things between de-registration of the eviction callback and actually shutting down the domain. Now de-registration and shutdown of the domain are wrapped within the single ocfs2_cluster_disconnect() call. I've checked the code paths to make sure we can safely tear down things in ocfs2_dlm_shutdown() before calling ocfs2_cluster_disconnect(). The filesystem has already set itself to ignore the callback. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 125 insertions(+), 6 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index eb88854cb976..f6f309a08344 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -18,11 +18,21 @@ * General Public License for more details. */ +#include +#include + +/* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ +#include + #include "cluster/masklog.h" #include "stackglue.h" static struct ocfs2_locking_protocol *lproto; +struct o2dlm_private { + struct dlm_eviction_cb op_eviction_cb; +}; + /* These should be identical */ #if (DLM_LOCK_IV != LKM_IVMODE) # error Lock modes do not match @@ -197,7 +207,7 @@ static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) lproto->lp_unlock_ast(astarg, error); } -int ocfs2_dlm_lock(struct dlm_ctxt *dlm, +int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, int mode, union ocfs2_dlm_lksb *lksb, u32 flags, @@ -212,15 +222,15 @@ int ocfs2_dlm_lock(struct dlm_ctxt *dlm, BUG_ON(lproto == NULL); - status = dlmlock(dlm, o2dlm_mode, &lksb->lksb_o2dlm, o2dlm_flags, - name, namelen, + status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm, + o2dlm_flags, name, namelen, o2dlm_lock_ast_wrapper, astarg, o2dlm_blocking_ast_wrapper); ret = dlm_status_to_errno(status); return ret; } -int ocfs2_dlm_unlock(struct dlm_ctxt *dlm, +int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, union ocfs2_dlm_lksb *lksb, u32 flags, void *astarg) @@ -231,8 +241,8 @@ int ocfs2_dlm_unlock(struct dlm_ctxt *dlm, BUG_ON(lproto == NULL); - status = dlmunlock(dlm, &lksb->lksb_o2dlm, o2dlm_flags, - o2dlm_unlock_ast_wrapper, astarg); + status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm, + o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg); ret = dlm_status_to_errno(status); return ret; } @@ -252,6 +262,115 @@ void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) return (void *)(lksb->lksb_o2dlm.lvb); } +/* + * Called from the dlm when it's about to evict a node. This is how the + * classic stack signals node death. + */ +static void o2dlm_eviction_cb(int node_num, void *data) +{ + struct ocfs2_cluster_connection *conn = data; + + mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n", + node_num, conn->cc_namelen, conn->cc_name); + + conn->cc_recovery_handler(node_num, conn->cc_recovery_data); +} + +int ocfs2_cluster_connect(const char *group, + int grouplen, + void (*recovery_handler)(int node_num, + void *recovery_data), + void *recovery_data, + struct ocfs2_cluster_connection **conn) +{ + int rc = 0; + struct ocfs2_cluster_connection *new_conn; + u32 dlm_key; + struct dlm_ctxt *dlm; + struct o2dlm_private *priv; + struct dlm_protocol_version dlm_version; + + BUG_ON(group == NULL); + BUG_ON(conn == NULL); + BUG_ON(recovery_handler == NULL); + + if (grouplen > GROUP_NAME_MAX) { + rc = -EINVAL; + goto out; + } + + new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), + GFP_KERNEL); + if (!new_conn) { + rc = -ENOMEM; + goto out; + } + + memcpy(new_conn->cc_name, group, grouplen); + new_conn->cc_namelen = grouplen; + new_conn->cc_recovery_handler = recovery_handler; + new_conn->cc_recovery_data = recovery_data; + + /* Start the new connection at our maximum compatibility level */ + new_conn->cc_version = lproto->lp_max_version; + + priv = kzalloc(sizeof(struct o2dlm_private), GFP_KERNEL); + if (!priv) { + rc = -ENOMEM; + goto out_free; + } + + /* This just fills the structure in. It is safe to use new_conn. */ + dlm_setup_eviction_cb(&priv->op_eviction_cb, o2dlm_eviction_cb, + new_conn); + + new_conn->cc_private = priv; + + /* used by the dlm code to make message headers unique, each + * node in this domain must agree on this. */ + dlm_key = crc32_le(0, group, grouplen); + dlm_version.pv_major = new_conn->cc_version.pv_major; + dlm_version.pv_minor = new_conn->cc_version.pv_minor; + + dlm = dlm_register_domain(group, dlm_key, &dlm_version); + if (IS_ERR(dlm)) { + rc = PTR_ERR(dlm); + mlog_errno(rc); + goto out_free; + } + + new_conn->cc_version.pv_major = dlm_version.pv_major; + new_conn->cc_version.pv_minor = dlm_version.pv_minor; + new_conn->cc_lockspace = dlm; + + dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); + + *conn = new_conn; + +out_free: + if (rc) { + kfree(new_conn->cc_private); + kfree(new_conn); + } + +out: + return rc; +} + +int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn) +{ + struct dlm_ctxt *dlm = conn->cc_lockspace; + struct o2dlm_private *priv = conn->cc_private; + + dlm_unregister_eviction_cb(&priv->op_eviction_cb); + dlm_unregister_domain(dlm); + + kfree(priv); + kfree(conn); + + return 0; +} + void o2cb_get_stack(struct ocfs2_locking_protocol *proto) { BUG_ON(proto == NULL); -- cgit v1.2.2 From 19fdb624dc8ccb663f6e48b3a3a3fa4e4e567fc1 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 30 Jan 2008 15:38:24 -0800 Subject: ocfs2: Abstract out node number queries. ocfs2 asks the cluster stack for the local node's node number for two reasons; to fill the slot map and to print it. While the slot map isn't necessary for userspace cluster stacks, the printing is very nice for debugging. Thus we add ocfs2_cluster_this_node() as a generic API to get this value. It is anticipated that the slot map will not be used under a userspace cluster stack, so validity checks of the node num only need to exist in the slot map code. Otherwise, it just gets used and printed as an opaque value. [ Fixed up some "int" versus "unsigned int" issues and made osb->node_num truly opaque. --Mark ] Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index f6f309a08344..814686356cc6 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -25,6 +25,8 @@ #include #include "cluster/masklog.h" +#include "cluster/nodemanager.h" + #include "stackglue.h" static struct ocfs2_locking_protocol *lproto; @@ -371,6 +373,21 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn) return 0; } +int ocfs2_cluster_this_node(unsigned int *node) +{ + int node_num; + + node_num = o2nm_this_node(); + if (node_num == O2NM_INVALID_NODE_NUM) + return -ENOENT; + + if (node_num >= O2NM_MAX_NODES) + return -EOVERFLOW; + + *node = node_num; + return 0; +} + void o2cb_get_stack(struct ocfs2_locking_protocol *proto) { BUG_ON(proto == NULL); -- cgit v1.2.2 From 6953b4c008628b945bfe0cee97f6e78a98773859 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 29 Jan 2008 16:59:56 -0800 Subject: ocfs2: Move o2hb functionality into the stack glue. The last bit of classic stack used directly in ocfs2 code is o2hb. Specifically, the check for heartbeat during mount and the call to ocfs2_hb_ctl during unmount. We create an extra API, ocfs2_cluster_hangup(), to encapsulate the call to ocfs2_hb_ctl. Other stacks will just leave hangup() empty. The check for heartbeat is moved into ocfs2_cluster_connect(). It will be matched by a similar check for other stacks. With this change, only stackglue.c includes cluster/ headers. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 814686356cc6..670fa945c212 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -20,12 +20,14 @@ #include #include +#include /* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ #include #include "cluster/masklog.h" #include "cluster/nodemanager.h" +#include "cluster/heartbeat.h" #include "stackglue.h" @@ -301,6 +303,13 @@ int ocfs2_cluster_connect(const char *group, goto out; } + /* for now we only have one cluster/node, make sure we see it + * in the heartbeat universe */ + if (!o2hb_check_local_node_heartbeating()) { + rc = -EINVAL; + goto out; + } + new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), GFP_KERNEL); if (!new_conn) { @@ -359,6 +368,7 @@ out: return rc; } + int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn) { struct dlm_ctxt *dlm = conn->cc_lockspace; @@ -373,6 +383,46 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn) return 0; } +static void o2hb_stop(const char *group) +{ + int ret; + char *argv[5], *envp[3]; + + argv[0] = (char *)o2nm_get_hb_ctl_path(); + argv[1] = "-K"; + argv[2] = "-u"; + argv[3] = (char *)group; + argv[4] = NULL; + + mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); + + /* minimal command environment taken from cpu_run_sbin_hotplug */ + envp[0] = "HOME=/"; + envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + envp[2] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + if (ret < 0) + mlog_errno(ret); +} + +/* + * Hangup is a hack for tools compatibility. Older ocfs2-tools software + * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This + * happens regardless of whether the DLM got started, so we can't do it + * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into + * the glue and provide a "hangup" API for super.c to call. + * + * Other stacks will eventually provide a NULL ->hangup() pointer. + */ +void ocfs2_cluster_hangup(const char *group, int grouplen) +{ + BUG_ON(group == NULL); + BUG_ON(group[grouplen] != '\0'); + + o2hb_stop(group); +} + int ocfs2_cluster_this_node(unsigned int *node) { int node_num; -- cgit v1.2.2 From de551246e7bc5558371c3427889a8db1b8cc60f4 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 14:45:08 -0800 Subject: ocfs2: Remove CANCELGRANT from the view of dlmglue. o2dlm has the non-standard behavior of providing a cancel callback (unlock_ast) even when the cancel has failed (the locking operation succeeded without canceling). This is called CANCELGRANT after the status code sent to the callback. fs/dlm does not provide this callback, so dlmglue must be changed to live without it. o2dlm_unlock_ast_wrapper() in stackglue now ignores CANCELGRANT calls. Because dlmglue no longer sees CANCELGRANT, ocfs2_unlock_ast() no longer needs to check for it. ocfs2_locking_ast() must catch that a cancel was tried and clear the cancel state. Making these changes opens up a locking race. dlmglue uses the the OCFS2_LOCK_BUSY flag to ensure only one thread is calling the dlm at any one time. But dlmglue must unlock the lockres before calling into the dlm. In the small window of time between unlocking the lockres and calling the dlm, the downconvert thread can try to cancel the lock. The downconvert thread is checking the OCFS2_LOCK_BUSY flag - it doesn't know that ocfs2_dlm_lock() has not yet been called. Because ocfs2_dlm_lock() has not yet been called, the cancel operation will just be a no-op. There's nothing to cancel. With CANCELGRANT, dlmglue uses the CANCELGRANT callback to clear up the cancel state. When it comes around again, it will retry the cancel. Eventually, the first thread will have called into ocfs2_dlm_lock(), and either the lock or the cancel will succeed. The downconvert thread can then do its downconvert. Without CANCELGRANT, there is nothing to clean up the cancellation state. The downconvert thread does not know to retry its operations. More importantly, the original lock may be blocking on the other node that is trying to cancel us. With neither able to make progress, the ast is never called and the cancellation state is never cleaned up that way. dlmglue is deadlocked. The OCFS2_LOCK_PENDING flag is introduced to remedy this window. It is set at the same time OCFS2_LOCK_BUSY is. Thus, the downconvert thread can check whether the lock is cancelable. If not, it just loops around to try again. Once ocfs2_dlm_lock() is called, the thread then clears OCFS2_LOCK_PENDING and wakes the downconvert thread. Now, if the downconvert thread finds the lock BUSY, it can safely try to cancel it. Whether the cancel works or not, the state will be properly set and the lock processing can continue. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 670fa945c212..abdb9f6f4cc9 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -104,8 +104,8 @@ static int flags_to_o2dlm(u32 flags) * * DLM_NORMAL: 0 * DLM_NOTQUEUED: -EAGAIN - * DLM_CANCELGRANT: -DLM_ECANCEL - * DLM_CANCEL: -DLM_EUNLOCK + * DLM_CANCELGRANT: -EBUSY + * DLM_CANCEL: -DLM_ECANCEL */ /* Keep in sync with dlmapi.h */ static int status_map[] = { @@ -113,13 +113,13 @@ static int status_map[] = { [DLM_GRANTED] = -EINVAL, [DLM_DENIED] = -EACCES, [DLM_DENIED_NOLOCKS] = -EACCES, - [DLM_WORKING] = -EBUSY, + [DLM_WORKING] = -EACCES, [DLM_BLOCKED] = -EINVAL, [DLM_BLOCKED_ORPHAN] = -EINVAL, [DLM_DENIED_GRACE_PERIOD] = -EACCES, [DLM_SYSERR] = -ENOMEM, /* It is what it is */ [DLM_NOSUPPORT] = -EPROTO, - [DLM_CANCELGRANT] = -DLM_ECANCEL, /* Cancel after grant */ + [DLM_CANCELGRANT] = -EBUSY, /* Cancel after grant */ [DLM_IVLOCKID] = -EINVAL, [DLM_SYNC] = -EINVAL, [DLM_BADTYPE] = -EINVAL, @@ -137,7 +137,7 @@ static int status_map[] = { [DLM_VALNOTVALID] = -EINVAL, [DLM_REJECTED] = -EPERM, [DLM_ABORT] = -EINVAL, - [DLM_CANCEL] = -DLM_EUNLOCK, /* Successful cancel */ + [DLM_CANCEL] = -DLM_ECANCEL, /* Successful cancel */ [DLM_IVRESHANDLE] = -EINVAL, [DLM_DEADLOCK] = -EDEADLK, [DLM_DENIED_NOASTS] = -EINVAL, @@ -152,6 +152,7 @@ static int status_map[] = { [DLM_MIGRATING] = -ERESTART, [DLM_MAXSTATS] = -EINVAL, }; + static int dlm_status_to_errno(enum dlm_status status) { BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); @@ -175,38 +176,23 @@ static void o2dlm_blocking_ast_wrapper(void *astarg, int level) static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) { - int error; + int error = dlm_status_to_errno(status); BUG_ON(lproto == NULL); /* - * XXX: CANCEL values are sketchy. - * - * Currently we have preserved the o2dlm paradigm. You can get - * unlock_ast() whether the cancel succeded or not. - * - * First, we're going to pass DLM_EUNLOCK just like fs/dlm does for - * successful unlocks. That is a clean behavior. - * * In o2dlm, you can get both the lock_ast() for the lock being * granted and the unlock_ast() for the CANCEL failing. A * successful cancel sends DLM_NORMAL here. If the * lock grant happened before the cancel arrived, you get - * DLM_CANCELGRANT. For now, we'll use DLM_ECANCEL to signify - * CANCELGRANT - the CANCEL was supposed to happen but didn't. We - * can then use DLM_EUNLOCK to signify a successful CANCEL - - * effectively, the CANCEL caused the lock to roll back. + * DLM_CANCELGRANT. * - * In the future, we will likely move the o2dlm to send only one - * ast - either unlock_ast() for a successful CANCEL or lock_ast() - * when the grant succeeds. At that point, we'll send DLM_ECANCEL - * for all cancel results (CANCELGRANT will no longer exist). + * There's no need for the double-ast. If we see DLM_CANCELGRANT, + * we just ignore it. We expect the lock_ast() to handle the + * granted lock. */ - error = dlm_status_to_errno(status); - - /* Successful unlock is DLM_EUNLOCK */ - if (!error) - error = -DLM_EUNLOCK; + if (status == DLM_CANCELGRANT) + return; lproto->lp_unlock_ast(astarg, error); } -- cgit v1.2.2 From cf0acdcd640e9466059e69951c557e90b4bee45a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 29 Jan 2008 16:59:55 -0800 Subject: ocfs2: Abstract out a debugging function for underlying dlms. dlmglue.c was still referencing a raw o2dlm lksb in one instance. Let's create a generic ocfs2_dlm_dump_lksb() function. This allows underlying DLMs to print whatever they want about their lock. We then move the o2dlm dump into stackglue.c where it belongs. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index abdb9f6f4cc9..bd805411a856 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -252,6 +252,11 @@ void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) return (void *)(lksb->lksb_o2dlm.lvb); } +void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) +{ + dlm_print_one_lock(lksb->lksb_o2dlm.lockid); +} + /* * Called from the dlm when it's about to evict a node. This is how the * classic stack signals node death. -- cgit v1.2.2 From 63e0c48ae6986a5bbb8e8dd9210c0e6ca79f2e50 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 30 Jan 2008 16:58:36 -0800 Subject: ocfs2: Clean up stackglue initialization The stack glue initialization function needs a better name so that it can be used cleanly when stackglue becomes a module. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index bd805411a856..51c2546b328d 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -429,14 +429,10 @@ int ocfs2_cluster_this_node(unsigned int *node) return 0; } -void o2cb_get_stack(struct ocfs2_locking_protocol *proto) +void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) { - BUG_ON(proto == NULL); + BUG_ON(proto != NULL); lproto = proto; } -void o2cb_put_stack(void) -{ - lproto = NULL; -} -- cgit v1.2.2 From 553aa7e408eac402c00b67ddfa7aec13fe1f3a33 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 14:51:03 -0800 Subject: ocfs2: Split o2cb code from generic stack functions. Split off the o2cb-specific funtionality from the generic stack glue calls. This is a precurser to wrapping the o2cb functionality in an operations vector. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 209 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 144 insertions(+), 65 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 51c2546b328d..e35dde6217f5 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -197,21 +197,19 @@ static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) lproto->lp_unlock_ast(astarg, error); } -int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, - int mode, - union ocfs2_dlm_lksb *lksb, - u32 flags, - void *name, - unsigned int namelen, - void *astarg) +static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn, + int mode, + union ocfs2_dlm_lksb *lksb, + u32 flags, + void *name, + unsigned int namelen, + void *astarg) { enum dlm_status status; int o2dlm_mode = mode_to_o2dlm(mode); int o2dlm_flags = flags_to_o2dlm(flags); int ret; - BUG_ON(lproto == NULL); - status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm, o2dlm_flags, name, namelen, o2dlm_lock_ast_wrapper, astarg, @@ -220,43 +218,80 @@ int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, return ret; } -int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, - union ocfs2_dlm_lksb *lksb, - u32 flags, - void *astarg) +int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, + int mode, + union ocfs2_dlm_lksb *lksb, + u32 flags, + void *name, + unsigned int namelen, + void *astarg) +{ + BUG_ON(lproto == NULL); + + return o2cb_dlm_lock(conn, mode, lksb, flags, + name, namelen, astarg); +} + +static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn, + union ocfs2_dlm_lksb *lksb, + u32 flags, + void *astarg) { enum dlm_status status; int o2dlm_flags = flags_to_o2dlm(flags); int ret; - BUG_ON(lproto == NULL); - status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm, o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg); ret = dlm_status_to_errno(status); return ret; } -int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) +int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, + union ocfs2_dlm_lksb *lksb, + u32 flags, + void *astarg) +{ + BUG_ON(lproto == NULL); + + return o2cb_dlm_unlock(conn, lksb, flags, astarg); +} + +static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) { return dlm_status_to_errno(lksb->lksb_o2dlm.status); } +int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) +{ + return o2cb_dlm_lock_status(lksb); +} + /* * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we * don't cast at the glue level. The real answer is that the header * ordering is nigh impossible. */ -void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) +static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) { return (void *)(lksb->lksb_o2dlm.lvb); } -void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) +void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) +{ + return o2cb_dlm_lvb(lksb); +} + +static void o2cb_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) { dlm_print_one_lock(lksb->lksb_o2dlm.lockid); } +void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) +{ + o2cb_dlm_dump_lksb(lksb); +} + /* * Called from the dlm when it's about to evict a node. This is how the * classic stack signals node death. @@ -271,6 +306,62 @@ static void o2dlm_eviction_cb(int node_num, void *data) conn->cc_recovery_handler(node_num, conn->cc_recovery_data); } +static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) +{ + int rc = 0; + u32 dlm_key; + struct dlm_ctxt *dlm; + struct o2dlm_private *priv; + struct dlm_protocol_version dlm_version; + + BUG_ON(conn == NULL); + + /* for now we only have one cluster/node, make sure we see it + * in the heartbeat universe */ + if (!o2hb_check_local_node_heartbeating()) { + rc = -EINVAL; + goto out; + } + + priv = kzalloc(sizeof(struct o2dlm_private), GFP_KERNEL); + if (!priv) { + rc = -ENOMEM; + goto out_free; + } + + /* This just fills the structure in. It is safe to pass conn. */ + dlm_setup_eviction_cb(&priv->op_eviction_cb, o2dlm_eviction_cb, + conn); + + conn->cc_private = priv; + + /* used by the dlm code to make message headers unique, each + * node in this domain must agree on this. */ + dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); + dlm_version.pv_major = conn->cc_version.pv_major; + dlm_version.pv_minor = conn->cc_version.pv_minor; + + dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version); + if (IS_ERR(dlm)) { + rc = PTR_ERR(dlm); + mlog_errno(rc); + goto out_free; + } + + conn->cc_version.pv_major = dlm_version.pv_major; + conn->cc_version.pv_minor = dlm_version.pv_minor; + conn->cc_lockspace = dlm; + + dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); + +out_free: + if (rc && conn->cc_private) + kfree(conn->cc_private); + +out: + return rc; +} + int ocfs2_cluster_connect(const char *group, int grouplen, void (*recovery_handler)(int node_num, @@ -280,10 +371,6 @@ int ocfs2_cluster_connect(const char *group, { int rc = 0; struct ocfs2_cluster_connection *new_conn; - u32 dlm_key; - struct dlm_ctxt *dlm; - struct o2dlm_private *priv; - struct dlm_protocol_version dlm_version; BUG_ON(group == NULL); BUG_ON(conn == NULL); @@ -294,13 +381,6 @@ int ocfs2_cluster_connect(const char *group, goto out; } - /* for now we only have one cluster/node, make sure we see it - * in the heartbeat universe */ - if (!o2hb_check_local_node_heartbeating()) { - rc = -EINVAL; - goto out; - } - new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), GFP_KERNEL); if (!new_conn) { @@ -316,64 +396,53 @@ int ocfs2_cluster_connect(const char *group, /* Start the new connection at our maximum compatibility level */ new_conn->cc_version = lproto->lp_max_version; - priv = kzalloc(sizeof(struct o2dlm_private), GFP_KERNEL); - if (!priv) { - rc = -ENOMEM; - goto out_free; - } - - /* This just fills the structure in. It is safe to use new_conn. */ - dlm_setup_eviction_cb(&priv->op_eviction_cb, o2dlm_eviction_cb, - new_conn); - - new_conn->cc_private = priv; - - /* used by the dlm code to make message headers unique, each - * node in this domain must agree on this. */ - dlm_key = crc32_le(0, group, grouplen); - dlm_version.pv_major = new_conn->cc_version.pv_major; - dlm_version.pv_minor = new_conn->cc_version.pv_minor; - - dlm = dlm_register_domain(group, dlm_key, &dlm_version); - if (IS_ERR(dlm)) { - rc = PTR_ERR(dlm); + rc = o2cb_cluster_connect(new_conn); + if (rc) { mlog_errno(rc); goto out_free; } - new_conn->cc_version.pv_major = dlm_version.pv_major; - new_conn->cc_version.pv_minor = dlm_version.pv_minor; - new_conn->cc_lockspace = dlm; - - dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); - *conn = new_conn; out_free: - if (rc) { - kfree(new_conn->cc_private); + if (rc) kfree(new_conn); - } out: return rc; } -int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn) +static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn) { struct dlm_ctxt *dlm = conn->cc_lockspace; struct o2dlm_private *priv = conn->cc_private; dlm_unregister_eviction_cb(&priv->op_eviction_cb); - dlm_unregister_domain(dlm); - + conn->cc_private = NULL; kfree(priv); - kfree(conn); + + dlm_unregister_domain(dlm); + conn->cc_lockspace = NULL; return 0; } +int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn) +{ + int ret; + + BUG_ON(conn == NULL); + + ret = o2cb_cluster_disconnect(conn); + + /* XXX Should we free it anyway? */ + if (!ret) + kfree(conn); + + return ret; +} + static void o2hb_stop(const char *group) { int ret; @@ -406,15 +475,20 @@ static void o2hb_stop(const char *group) * * Other stacks will eventually provide a NULL ->hangup() pointer. */ +static void o2cb_cluster_hangup(const char *group, int grouplen) +{ + o2hb_stop(group); +} + void ocfs2_cluster_hangup(const char *group, int grouplen) { BUG_ON(group == NULL); BUG_ON(group[grouplen] != '\0'); - o2hb_stop(group); + o2cb_cluster_hangup(group, grouplen); } -int ocfs2_cluster_this_node(unsigned int *node) +static int o2cb_cluster_this_node(unsigned int *node) { int node_num; @@ -429,6 +503,11 @@ int ocfs2_cluster_this_node(unsigned int *node) return 0; } +int ocfs2_cluster_this_node(unsigned int *node) +{ + return o2cb_cluster_this_node(node); +} + void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) { BUG_ON(proto != NULL); -- cgit v1.2.2 From e3dad42bf993a0f24eb6e46152356c9b119c15e8 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 15:02:36 -0800 Subject: ocfs2: Create ocfs2_stack_operations and split out the o2cb stack. Define the ocfs2_stack_operations structure. Build o2cb_stack_ops from all of the o2cb-specific stack functions. Change the generic stack glue functions to call the stack_ops instead of the o2cb functions directly. The o2cb functions are moved to stack_o2cb.c. The headers are cleaned up to where only needed headers are included. In this code, stackglue.c and stack_o2cb.c refer to some shared extern variables. When they become modules, that will change. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 385 ++------------------------------------------------- 1 file changed, 15 insertions(+), 370 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index e35dde6217f5..e197367b6bd6 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -19,204 +19,17 @@ */ #include -#include #include /* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ #include #include "cluster/masklog.h" -#include "cluster/nodemanager.h" -#include "cluster/heartbeat.h" #include "stackglue.h" -static struct ocfs2_locking_protocol *lproto; - -struct o2dlm_private { - struct dlm_eviction_cb op_eviction_cb; -}; - -/* These should be identical */ -#if (DLM_LOCK_IV != LKM_IVMODE) -# error Lock modes do not match -#endif -#if (DLM_LOCK_NL != LKM_NLMODE) -# error Lock modes do not match -#endif -#if (DLM_LOCK_CR != LKM_CRMODE) -# error Lock modes do not match -#endif -#if (DLM_LOCK_CW != LKM_CWMODE) -# error Lock modes do not match -#endif -#if (DLM_LOCK_PR != LKM_PRMODE) -# error Lock modes do not match -#endif -#if (DLM_LOCK_PW != LKM_PWMODE) -# error Lock modes do not match -#endif -#if (DLM_LOCK_EX != LKM_EXMODE) -# error Lock modes do not match -#endif -static inline int mode_to_o2dlm(int mode) -{ - BUG_ON(mode > LKM_MAXMODE); - - return mode; -} - -#define map_flag(_generic, _o2dlm) \ - if (flags & (_generic)) { \ - flags &= ~(_generic); \ - o2dlm_flags |= (_o2dlm); \ - } -static int flags_to_o2dlm(u32 flags) -{ - int o2dlm_flags = 0; - - map_flag(DLM_LKF_NOQUEUE, LKM_NOQUEUE); - map_flag(DLM_LKF_CANCEL, LKM_CANCEL); - map_flag(DLM_LKF_CONVERT, LKM_CONVERT); - map_flag(DLM_LKF_VALBLK, LKM_VALBLK); - map_flag(DLM_LKF_IVVALBLK, LKM_INVVALBLK); - map_flag(DLM_LKF_ORPHAN, LKM_ORPHAN); - map_flag(DLM_LKF_FORCEUNLOCK, LKM_FORCE); - map_flag(DLM_LKF_TIMEOUT, LKM_TIMEOUT); - map_flag(DLM_LKF_LOCAL, LKM_LOCAL); - - /* map_flag() should have cleared every flag passed in */ - BUG_ON(flags != 0); - - return o2dlm_flags; -} -#undef map_flag - -/* - * Map an o2dlm status to standard errno values. - * - * o2dlm only uses a handful of these, and returns even fewer to the - * caller. Still, we try to assign sane values to each error. - * - * The following value pairs have special meanings to dlmglue, thus - * the right hand side needs to stay unique - never duplicate the - * mapping elsewhere in the table! - * - * DLM_NORMAL: 0 - * DLM_NOTQUEUED: -EAGAIN - * DLM_CANCELGRANT: -EBUSY - * DLM_CANCEL: -DLM_ECANCEL - */ -/* Keep in sync with dlmapi.h */ -static int status_map[] = { - [DLM_NORMAL] = 0, /* Success */ - [DLM_GRANTED] = -EINVAL, - [DLM_DENIED] = -EACCES, - [DLM_DENIED_NOLOCKS] = -EACCES, - [DLM_WORKING] = -EACCES, - [DLM_BLOCKED] = -EINVAL, - [DLM_BLOCKED_ORPHAN] = -EINVAL, - [DLM_DENIED_GRACE_PERIOD] = -EACCES, - [DLM_SYSERR] = -ENOMEM, /* It is what it is */ - [DLM_NOSUPPORT] = -EPROTO, - [DLM_CANCELGRANT] = -EBUSY, /* Cancel after grant */ - [DLM_IVLOCKID] = -EINVAL, - [DLM_SYNC] = -EINVAL, - [DLM_BADTYPE] = -EINVAL, - [DLM_BADRESOURCE] = -EINVAL, - [DLM_MAXHANDLES] = -ENOMEM, - [DLM_NOCLINFO] = -EINVAL, - [DLM_NOLOCKMGR] = -EINVAL, - [DLM_NOPURGED] = -EINVAL, - [DLM_BADARGS] = -EINVAL, - [DLM_VOID] = -EINVAL, - [DLM_NOTQUEUED] = -EAGAIN, /* Trylock failed */ - [DLM_IVBUFLEN] = -EINVAL, - [DLM_CVTUNGRANT] = -EPERM, - [DLM_BADPARAM] = -EINVAL, - [DLM_VALNOTVALID] = -EINVAL, - [DLM_REJECTED] = -EPERM, - [DLM_ABORT] = -EINVAL, - [DLM_CANCEL] = -DLM_ECANCEL, /* Successful cancel */ - [DLM_IVRESHANDLE] = -EINVAL, - [DLM_DEADLOCK] = -EDEADLK, - [DLM_DENIED_NOASTS] = -EINVAL, - [DLM_FORWARD] = -EINVAL, - [DLM_TIMEOUT] = -ETIMEDOUT, - [DLM_IVGROUPID] = -EINVAL, - [DLM_VERS_CONFLICT] = -EOPNOTSUPP, - [DLM_BAD_DEVICE_PATH] = -ENOENT, - [DLM_NO_DEVICE_PERMISSION] = -EPERM, - [DLM_NO_CONTROL_DEVICE] = -ENOENT, - [DLM_RECOVERING] = -ENOTCONN, - [DLM_MIGRATING] = -ERESTART, - [DLM_MAXSTATS] = -EINVAL, -}; - -static int dlm_status_to_errno(enum dlm_status status) -{ - BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); +struct ocfs2_locking_protocol *stack_glue_lproto; - return status_map[status]; -} - -static void o2dlm_lock_ast_wrapper(void *astarg) -{ - BUG_ON(lproto == NULL); - - lproto->lp_lock_ast(astarg); -} - -static void o2dlm_blocking_ast_wrapper(void *astarg, int level) -{ - BUG_ON(lproto == NULL); - - lproto->lp_blocking_ast(astarg, level); -} - -static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) -{ - int error = dlm_status_to_errno(status); - - BUG_ON(lproto == NULL); - - /* - * In o2dlm, you can get both the lock_ast() for the lock being - * granted and the unlock_ast() for the CANCEL failing. A - * successful cancel sends DLM_NORMAL here. If the - * lock grant happened before the cancel arrived, you get - * DLM_CANCELGRANT. - * - * There's no need for the double-ast. If we see DLM_CANCELGRANT, - * we just ignore it. We expect the lock_ast() to handle the - * granted lock. - */ - if (status == DLM_CANCELGRANT) - return; - - lproto->lp_unlock_ast(astarg, error); -} - -static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn, - int mode, - union ocfs2_dlm_lksb *lksb, - u32 flags, - void *name, - unsigned int namelen, - void *astarg) -{ - enum dlm_status status; - int o2dlm_mode = mode_to_o2dlm(mode); - int o2dlm_flags = flags_to_o2dlm(flags); - int ret; - - status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm, - o2dlm_flags, name, namelen, - o2dlm_lock_ast_wrapper, astarg, - o2dlm_blocking_ast_wrapper); - ret = dlm_status_to_errno(status); - return ret; -} int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, int mode, @@ -226,25 +39,10 @@ int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, unsigned int namelen, void *astarg) { - BUG_ON(lproto == NULL); - - return o2cb_dlm_lock(conn, mode, lksb, flags, - name, namelen, astarg); -} - -static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn, - union ocfs2_dlm_lksb *lksb, - u32 flags, - void *astarg) -{ - enum dlm_status status; - int o2dlm_flags = flags_to_o2dlm(flags); - int ret; + BUG_ON(stack_glue_lproto == NULL); - status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm, - o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg); - ret = dlm_status_to_errno(status); - return ret; + return o2cb_stack_ops.dlm_lock(conn, mode, lksb, flags, + name, namelen, astarg); } int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, @@ -252,19 +50,14 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, u32 flags, void *astarg) { - BUG_ON(lproto == NULL); + BUG_ON(stack_glue_lproto == NULL); - return o2cb_dlm_unlock(conn, lksb, flags, astarg); -} - -static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) -{ - return dlm_status_to_errno(lksb->lksb_o2dlm.status); + return o2cb_stack_ops.dlm_unlock(conn, lksb, flags, astarg); } int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) { - return o2cb_dlm_lock_status(lksb); + return o2cb_stack_ops.lock_status(lksb); } /* @@ -272,94 +65,14 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) * don't cast at the glue level. The real answer is that the header * ordering is nigh impossible. */ -static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) -{ - return (void *)(lksb->lksb_o2dlm.lvb); -} - void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) { - return o2cb_dlm_lvb(lksb); -} - -static void o2cb_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) -{ - dlm_print_one_lock(lksb->lksb_o2dlm.lockid); + return o2cb_stack_ops.lock_lvb(lksb); } void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) { - o2cb_dlm_dump_lksb(lksb); -} - -/* - * Called from the dlm when it's about to evict a node. This is how the - * classic stack signals node death. - */ -static void o2dlm_eviction_cb(int node_num, void *data) -{ - struct ocfs2_cluster_connection *conn = data; - - mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n", - node_num, conn->cc_namelen, conn->cc_name); - - conn->cc_recovery_handler(node_num, conn->cc_recovery_data); -} - -static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) -{ - int rc = 0; - u32 dlm_key; - struct dlm_ctxt *dlm; - struct o2dlm_private *priv; - struct dlm_protocol_version dlm_version; - - BUG_ON(conn == NULL); - - /* for now we only have one cluster/node, make sure we see it - * in the heartbeat universe */ - if (!o2hb_check_local_node_heartbeating()) { - rc = -EINVAL; - goto out; - } - - priv = kzalloc(sizeof(struct o2dlm_private), GFP_KERNEL); - if (!priv) { - rc = -ENOMEM; - goto out_free; - } - - /* This just fills the structure in. It is safe to pass conn. */ - dlm_setup_eviction_cb(&priv->op_eviction_cb, o2dlm_eviction_cb, - conn); - - conn->cc_private = priv; - - /* used by the dlm code to make message headers unique, each - * node in this domain must agree on this. */ - dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); - dlm_version.pv_major = conn->cc_version.pv_major; - dlm_version.pv_minor = conn->cc_version.pv_minor; - - dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version); - if (IS_ERR(dlm)) { - rc = PTR_ERR(dlm); - mlog_errno(rc); - goto out_free; - } - - conn->cc_version.pv_major = dlm_version.pv_major; - conn->cc_version.pv_minor = dlm_version.pv_minor; - conn->cc_lockspace = dlm; - - dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); - -out_free: - if (rc && conn->cc_private) - kfree(conn->cc_private); - -out: - return rc; + o2cb_stack_ops.dump_lksb(lksb); } int ocfs2_cluster_connect(const char *group, @@ -394,9 +107,9 @@ int ocfs2_cluster_connect(const char *group, new_conn->cc_recovery_data = recovery_data; /* Start the new connection at our maximum compatibility level */ - new_conn->cc_version = lproto->lp_max_version; + new_conn->cc_version = stack_glue_lproto->lp_max_version; - rc = o2cb_cluster_connect(new_conn); + rc = o2cb_stack_ops.connect(new_conn); if (rc) { mlog_errno(rc); goto out_free; @@ -412,29 +125,13 @@ out: return rc; } - -static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn) -{ - struct dlm_ctxt *dlm = conn->cc_lockspace; - struct o2dlm_private *priv = conn->cc_private; - - dlm_unregister_eviction_cb(&priv->op_eviction_cb); - conn->cc_private = NULL; - kfree(priv); - - dlm_unregister_domain(dlm); - conn->cc_lockspace = NULL; - - return 0; -} - int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn) { int ret; BUG_ON(conn == NULL); - ret = o2cb_cluster_disconnect(conn); + ret = o2cb_stack_ops.disconnect(conn); /* XXX Should we free it anyway? */ if (!ret) @@ -443,75 +140,23 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn) return ret; } -static void o2hb_stop(const char *group) -{ - int ret; - char *argv[5], *envp[3]; - - argv[0] = (char *)o2nm_get_hb_ctl_path(); - argv[1] = "-K"; - argv[2] = "-u"; - argv[3] = (char *)group; - argv[4] = NULL; - - mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); - - /* minimal command environment taken from cpu_run_sbin_hotplug */ - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); - if (ret < 0) - mlog_errno(ret); -} - -/* - * Hangup is a hack for tools compatibility. Older ocfs2-tools software - * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This - * happens regardless of whether the DLM got started, so we can't do it - * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into - * the glue and provide a "hangup" API for super.c to call. - * - * Other stacks will eventually provide a NULL ->hangup() pointer. - */ -static void o2cb_cluster_hangup(const char *group, int grouplen) -{ - o2hb_stop(group); -} - void ocfs2_cluster_hangup(const char *group, int grouplen) { BUG_ON(group == NULL); BUG_ON(group[grouplen] != '\0'); - o2cb_cluster_hangup(group, grouplen); -} - -static int o2cb_cluster_this_node(unsigned int *node) -{ - int node_num; - - node_num = o2nm_this_node(); - if (node_num == O2NM_INVALID_NODE_NUM) - return -ENOENT; - - if (node_num >= O2NM_MAX_NODES) - return -EOVERFLOW; - - *node = node_num; - return 0; + o2cb_stack_ops.hangup(group, grouplen); } int ocfs2_cluster_this_node(unsigned int *node) { - return o2cb_cluster_this_node(node); + return o2cb_stack_ops.this_node(node); } void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) { BUG_ON(proto != NULL); - lproto = proto; + stack_glue_lproto = proto; } -- cgit v1.2.2 From 286eaa95c5c5915a6b72cc3f0a2534161fd7928b Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 15:03:57 -0800 Subject: ocfs2: Break out stackglue into modules. We define the ocfs2_stack_plugin structure to represent a stack driver. The o2cb stack code is split into stack_o2cb.c. This becomes the ocfs2_stack_o2cb.ko module. The stackglue generic functions are similarly split into the ocfs2_stackglue.ko module. This module now provides an interface to register drivers. The ocfs2_stack_o2cb driver registers itself. As part of this interface, ocfs2_stackglue can load drivers on demand. This is accomplished in ocfs2_cluster_connect(). ocfs2_cluster_disconnect() is now notified when a _hangup() is pending. If a hangup is pending, it will not release the driver module and will let _hangup() do that. Signed-off-by: Joel Becker --- fs/ocfs2/stackglue.c | 238 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 214 insertions(+), 24 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index e197367b6bd6..1978c9cff0e9 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -18,17 +18,176 @@ * General Public License for more details. */ +#include +#include +#include #include #include -/* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ -#include +#include "stackglue.h" -#include "cluster/masklog.h" +static struct ocfs2_locking_protocol *lproto; +static DEFINE_SPINLOCK(ocfs2_stack_lock); +static LIST_HEAD(ocfs2_stack_list); -#include "stackglue.h" +/* + * The stack currently in use. If not null, active_stack->sp_count > 0, + * the module is pinned, and the locking protocol cannot be changed. + */ +static struct ocfs2_stack_plugin *active_stack; + +static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) +{ + struct ocfs2_stack_plugin *p; + + assert_spin_locked(&ocfs2_stack_lock); + + list_for_each_entry(p, &ocfs2_stack_list, sp_list) { + if (!strcmp(p->sp_name, name)) + return p; + } + + return NULL; +} + +static int ocfs2_stack_driver_request(const char *name) +{ + int rc; + struct ocfs2_stack_plugin *p; + + spin_lock(&ocfs2_stack_lock); + + if (active_stack) { + /* + * If the active stack isn't the one we want, it cannot + * be selected right now. + */ + if (!strcmp(active_stack->sp_name, name)) + rc = 0; + else + rc = -EBUSY; + goto out; + } + + p = ocfs2_stack_lookup(name); + if (!p || !try_module_get(p->sp_owner)) { + rc = -ENOENT; + goto out; + } + + /* Ok, the stack is pinned */ + p->sp_count++; + active_stack = p; + + rc = 0; + +out: + spin_unlock(&ocfs2_stack_lock); + return rc; +} + +/* + * This function looks up the appropriate stack and makes it active. If + * there is no stack, it tries to load it. It will fail if the stack still + * cannot be found. It will also fail if a different stack is in use. + */ +static int ocfs2_stack_driver_get(const char *name) +{ + int rc; + + rc = ocfs2_stack_driver_request(name); + if (rc == -ENOENT) { + request_module("ocfs2_stack_%s", name); + rc = ocfs2_stack_driver_request(name); + } + + if (rc == -ENOENT) { + printk(KERN_ERR + "ocfs2: Cluster stack driver \"%s\" cannot be found\n", + name); + } else if (rc == -EBUSY) { + printk(KERN_ERR + "ocfs2: A different cluster stack driver is in use\n"); + } + + return rc; +} -struct ocfs2_locking_protocol *stack_glue_lproto; +static void ocfs2_stack_driver_put(void) +{ + spin_lock(&ocfs2_stack_lock); + BUG_ON(active_stack == NULL); + BUG_ON(active_stack->sp_count == 0); + + active_stack->sp_count--; + if (!active_stack->sp_count) { + module_put(active_stack->sp_owner); + active_stack = NULL; + } + spin_unlock(&ocfs2_stack_lock); +} + +int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) +{ + int rc; + + spin_lock(&ocfs2_stack_lock); + if (!ocfs2_stack_lookup(plugin->sp_name)) { + plugin->sp_count = 0; + plugin->sp_proto = lproto; + list_add(&plugin->sp_list, &ocfs2_stack_list); + printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", + plugin->sp_name); + rc = 0; + } else { + printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n", + plugin->sp_name); + rc = -EEXIST; + } + spin_unlock(&ocfs2_stack_lock); + + return rc; +} +EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register); + +void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) +{ + struct ocfs2_stack_plugin *p; + + spin_lock(&ocfs2_stack_lock); + p = ocfs2_stack_lookup(plugin->sp_name); + if (p) { + BUG_ON(p != plugin); + BUG_ON(plugin == active_stack); + BUG_ON(plugin->sp_count != 0); + list_del_init(&plugin->sp_list); + printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n", + plugin->sp_name); + } else { + printk(KERN_ERR "Stack \"%s\" is not registered\n", + plugin->sp_name); + } + spin_unlock(&ocfs2_stack_lock); +} +EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); + +void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) +{ + struct ocfs2_stack_plugin *p; + + BUG_ON(proto == NULL); + + spin_lock(&ocfs2_stack_lock); + BUG_ON(active_stack != NULL); + + lproto = proto; + list_for_each_entry(p, &ocfs2_stack_list, sp_list) { + p->sp_proto = lproto; + } + + spin_unlock(&ocfs2_stack_lock); +} +EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, @@ -39,26 +198,29 @@ int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, unsigned int namelen, void *astarg) { - BUG_ON(stack_glue_lproto == NULL); + BUG_ON(lproto == NULL); - return o2cb_stack_ops.dlm_lock(conn, mode, lksb, flags, - name, namelen, astarg); + return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, + name, namelen, astarg); } +EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, union ocfs2_dlm_lksb *lksb, u32 flags, void *astarg) { - BUG_ON(stack_glue_lproto == NULL); + BUG_ON(lproto == NULL); - return o2cb_stack_ops.dlm_unlock(conn, lksb, flags, astarg); + return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); } +EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) { - return o2cb_stack_ops.lock_status(lksb); + return active_stack->sp_ops->lock_status(lksb); } +EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); /* * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we @@ -67,13 +229,15 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) */ void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) { - return o2cb_stack_ops.lock_lvb(lksb); + return active_stack->sp_ops->lock_lvb(lksb); } +EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) { - o2cb_stack_ops.dump_lksb(lksb); + active_stack->sp_ops->dump_lksb(lksb); } +EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); int ocfs2_cluster_connect(const char *group, int grouplen, @@ -107,11 +271,16 @@ int ocfs2_cluster_connect(const char *group, new_conn->cc_recovery_data = recovery_data; /* Start the new connection at our maximum compatibility level */ - new_conn->cc_version = stack_glue_lproto->lp_max_version; + new_conn->cc_version = lproto->lp_max_version; + + /* This will pin the stack driver if successful */ + rc = ocfs2_stack_driver_get("o2cb"); + if (rc) + goto out_free; - rc = o2cb_stack_ops.connect(new_conn); + rc = active_stack->sp_ops->connect(new_conn); if (rc) { - mlog_errno(rc); + ocfs2_stack_driver_put(); goto out_free; } @@ -124,39 +293,60 @@ out_free: out: return rc; } +EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); -int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn) +/* If hangup_pending is 0, the stack driver will be dropped */ +int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, + int hangup_pending) { int ret; BUG_ON(conn == NULL); - ret = o2cb_stack_ops.disconnect(conn); + ret = active_stack->sp_ops->disconnect(conn, hangup_pending); /* XXX Should we free it anyway? */ - if (!ret) + if (!ret) { kfree(conn); + if (!hangup_pending) + ocfs2_stack_driver_put(); + } return ret; } +EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); void ocfs2_cluster_hangup(const char *group, int grouplen) { BUG_ON(group == NULL); BUG_ON(group[grouplen] != '\0'); - o2cb_stack_ops.hangup(group, grouplen); + active_stack->sp_ops->hangup(group, grouplen); + + /* cluster_disconnect() was called with hangup_pending==1 */ + ocfs2_stack_driver_put(); } +EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); int ocfs2_cluster_this_node(unsigned int *node) { - return o2cb_stack_ops.this_node(node); + return active_stack->sp_ops->this_node(node); } +EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); -void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) + +static int __init ocfs2_stack_glue_init(void) { - BUG_ON(proto != NULL); + return 0; +} - stack_glue_lproto = proto; +static void __exit ocfs2_stack_glue_exit(void) +{ + lproto = NULL; } +MODULE_AUTHOR("Oracle"); +MODULE_DESCRIPTION("ocfs2 cluter stack glue layer"); +MODULE_LICENSE("GPL"); +module_init(ocfs2_stack_glue_init); +module_exit(ocfs2_stack_glue_exit); -- cgit v1.2.2 From 74ae4e104dfc57017783fc07d5f2f9129062207f Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 31 Jan 2008 23:56:17 -0800 Subject: ocfs2: Create stack glue sysfs files. Introduce a set of sysfs files that describe the current stack glue state. The files live under /sys/fs/ocfs2. The locking_protocol file displays the version of ocfs2's locking code. The loaded_cluster_plugins file displays all of the currently loaded stack plugins. When filesystems are mounted, the active_cluster_plugin file will display the plugin in use. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 1978c9cff0e9..76ae4fcebcbd 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -23,6 +23,9 @@ #include #include #include +#include +#include +#include #include "stackglue.h" @@ -335,14 +338,130 @@ int ocfs2_cluster_this_node(unsigned int *node) EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); -static int __init ocfs2_stack_glue_init(void) +/* + * Sysfs bits + */ + +static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + ssize_t ret = 0; + + spin_lock(&ocfs2_stack_lock); + if (lproto) + ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", + lproto->lp_max_version.pv_major, + lproto->lp_max_version.pv_minor); + spin_unlock(&ocfs2_stack_lock); + + return ret; +} + +static struct kobj_attribute ocfs2_attr_max_locking_protocol = + __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, + ocfs2_max_locking_protocol_show, NULL); + +static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) { + ssize_t ret = 0, total = 0, remain = PAGE_SIZE; + struct ocfs2_stack_plugin *p; + + spin_lock(&ocfs2_stack_lock); + list_for_each_entry(p, &ocfs2_stack_list, sp_list) { + ret = snprintf(buf, remain, "%s\n", + p->sp_name); + if (ret < 0) { + total = ret; + break; + } + if (ret == remain) { + /* snprintf() didn't fit */ + total = -E2BIG; + break; + } + total += ret; + remain -= ret; + } + spin_unlock(&ocfs2_stack_lock); + + return total; +} + +static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = + __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, + ocfs2_loaded_cluster_plugins_show, NULL); + +static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + ssize_t ret = 0; + + spin_lock(&ocfs2_stack_lock); + if (active_stack) { + ret = snprintf(buf, PAGE_SIZE, "%s\n", + active_stack->sp_name); + if (ret == PAGE_SIZE) + ret = -E2BIG; + } + spin_unlock(&ocfs2_stack_lock); + + return ret; +} + +static struct kobj_attribute ocfs2_attr_active_cluster_plugin = + __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, + ocfs2_active_cluster_plugin_show, NULL); + +static struct attribute *ocfs2_attrs[] = { + &ocfs2_attr_max_locking_protocol.attr, + &ocfs2_attr_loaded_cluster_plugins.attr, + &ocfs2_attr_active_cluster_plugin.attr, + NULL, +}; + +static struct attribute_group ocfs2_attr_group = { + .attrs = ocfs2_attrs, +}; + +static struct kset *ocfs2_kset; + +static void ocfs2_sysfs_exit(void) +{ + kset_unregister(ocfs2_kset); +} + +static int ocfs2_sysfs_init(void) +{ + int ret; + + ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj); + if (!ocfs2_kset) + return -ENOMEM; + + ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group); + if (ret) + goto error; + return 0; + +error: + kset_unregister(ocfs2_kset); + return ret; +} + +static int __init ocfs2_stack_glue_init(void) +{ + return ocfs2_sysfs_init(); } static void __exit ocfs2_stack_glue_exit(void) { lproto = NULL; + ocfs2_sysfs_exit(); } MODULE_AUTHOR("Oracle"); -- cgit v1.2.2 From 9c6c877c04ce17d76a35d2173d3a3840d6b796a2 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 15:17:30 -0800 Subject: ocfs2: Add the 'cluster_stack' sysfs file. Userspace can now query and specify the cluster stack in use via the /sys/fs/ocfs2/cluster_stack file. By default, it is 'o2cb', which is the classic stack. Thus, old tools that do not know how to modify this file will work just fine. The stack cannot be modified if there is a live filesystem. ocfs2_cluster_connect() now takes the expected cluster stack as an argument. This way, the filesystem and the stack glue ensure they are speaking to the same backend. If the stack is 'o2cb', the o2cb stack plugin is used. For any other value, the fsdlm stack plugin is selected. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 100 insertions(+), 11 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 76ae4fcebcbd..bf45d9bff8a7 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -27,11 +27,17 @@ #include #include +#include "ocfs2_fs.h" + #include "stackglue.h" +#define OCFS2_STACK_PLUGIN_O2CB "o2cb" +#define OCFS2_STACK_PLUGIN_USER "user" + static struct ocfs2_locking_protocol *lproto; static DEFINE_SPINLOCK(ocfs2_stack_lock); static LIST_HEAD(ocfs2_stack_list); +static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; /* * The stack currently in use. If not null, active_stack->sp_count > 0, @@ -53,26 +59,36 @@ static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) return NULL; } -static int ocfs2_stack_driver_request(const char *name) +static int ocfs2_stack_driver_request(const char *stack_name, + const char *plugin_name) { int rc; struct ocfs2_stack_plugin *p; spin_lock(&ocfs2_stack_lock); + /* + * If the stack passed by the filesystem isn't the selected one, + * we can't continue. + */ + if (strcmp(stack_name, cluster_stack_name)) { + rc = -EBUSY; + goto out; + } + if (active_stack) { /* * If the active stack isn't the one we want, it cannot * be selected right now. */ - if (!strcmp(active_stack->sp_name, name)) + if (!strcmp(active_stack->sp_name, plugin_name)) rc = 0; else rc = -EBUSY; goto out; } - p = ocfs2_stack_lookup(name); + p = ocfs2_stack_lookup(plugin_name); if (!p || !try_module_get(p->sp_owner)) { rc = -ENOENT; goto out; @@ -94,23 +110,42 @@ out: * there is no stack, it tries to load it. It will fail if the stack still * cannot be found. It will also fail if a different stack is in use. */ -static int ocfs2_stack_driver_get(const char *name) +static int ocfs2_stack_driver_get(const char *stack_name) { int rc; + char *plugin_name = OCFS2_STACK_PLUGIN_O2CB; + + /* + * Classic stack does not pass in a stack name. This is + * compatible with older tools as well. + */ + if (!stack_name || !*stack_name) + stack_name = OCFS2_STACK_PLUGIN_O2CB; + + if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) { + printk(KERN_ERR + "ocfs2 passed an invalid cluster stack label: \"%s\"\n", + stack_name); + return -EINVAL; + } - rc = ocfs2_stack_driver_request(name); + /* Anything that isn't the classic stack is a user stack */ + if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB)) + plugin_name = OCFS2_STACK_PLUGIN_USER; + + rc = ocfs2_stack_driver_request(stack_name, plugin_name); if (rc == -ENOENT) { - request_module("ocfs2_stack_%s", name); - rc = ocfs2_stack_driver_request(name); + request_module("ocfs2_stack_%s", plugin_name); + rc = ocfs2_stack_driver_request(stack_name, plugin_name); } if (rc == -ENOENT) { printk(KERN_ERR "ocfs2: Cluster stack driver \"%s\" cannot be found\n", - name); + plugin_name); } else if (rc == -EBUSY) { printk(KERN_ERR - "ocfs2: A different cluster stack driver is in use\n"); + "ocfs2: A different cluster stack is in use\n"); } return rc; @@ -242,7 +277,8 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) } EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); -int ocfs2_cluster_connect(const char *group, +int ocfs2_cluster_connect(const char *stack_name, + const char *group, int grouplen, void (*recovery_handler)(int node_num, void *recovery_data), @@ -277,7 +313,7 @@ int ocfs2_cluster_connect(const char *group, new_conn->cc_version = lproto->lp_max_version; /* This will pin the stack driver if successful */ - rc = ocfs2_stack_driver_get("o2cb"); + rc = ocfs2_stack_driver_get(stack_name); if (rc) goto out_free; @@ -416,10 +452,61 @@ static struct kobj_attribute ocfs2_attr_active_cluster_plugin = __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, ocfs2_active_cluster_plugin_show, NULL); +static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + ssize_t ret; + spin_lock(&ocfs2_stack_lock); + ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name); + spin_unlock(&ocfs2_stack_lock); + + return ret; +} + +static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + size_t len = count; + ssize_t ret; + + if (len == 0) + return len; + + if (buf[len - 1] == '\n') + len--; + + if ((len != OCFS2_STACK_LABEL_LEN) || + (strnlen(buf, len) != len)) + return -EINVAL; + + spin_lock(&ocfs2_stack_lock); + if (active_stack) { + if (!strncmp(buf, cluster_stack_name, len)) + ret = count; + else + ret = -EBUSY; + } else { + memcpy(cluster_stack_name, buf, len); + ret = count; + } + spin_unlock(&ocfs2_stack_lock); + + return ret; +} + + +static struct kobj_attribute ocfs2_attr_cluster_stack = + __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, + ocfs2_cluster_stack_show, + ocfs2_cluster_stack_store); + static struct attribute *ocfs2_attrs[] = { &ocfs2_attr_max_locking_protocol.attr, &ocfs2_attr_loaded_cluster_plugins.attr, &ocfs2_attr_active_cluster_plugin.attr, + &ocfs2_attr_cluster_stack.attr, NULL, }; @@ -455,6 +542,8 @@ error: static int __init ocfs2_stack_glue_init(void) { + strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); + return ocfs2_sysfs_init(); } -- cgit v1.2.2 From cf4d8d75d8aba537a19b313a9364fd08ddbd5622 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 20 Feb 2008 14:29:27 -0800 Subject: ocfs2: add fsdlm to stackglue Add code to use fs/dlm. [ Modified to be part of the stack_user module -- Joel ] Signed-off-by: David Teigland Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs/ocfs2/stackglue.c') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index bf45d9bff8a7..119f60cea9cc 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -228,13 +228,20 @@ void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); +/* + * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take + * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the + * underlying stack plugins need to pilfer the lksb off of the lock_res. + * If some other structure needs to be passed as an astarg, the plugins + * will need to be given a different avenue to the lksb. + */ int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, int mode, union ocfs2_dlm_lksb *lksb, u32 flags, void *name, unsigned int namelen, - void *astarg) + struct ocfs2_lock_res *astarg) { BUG_ON(lproto == NULL); @@ -246,7 +253,7 @@ EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, union ocfs2_dlm_lksb *lksb, u32 flags, - void *astarg) + struct ocfs2_lock_res *astarg) { BUG_ON(lproto == NULL); @@ -360,7 +367,8 @@ void ocfs2_cluster_hangup(const char *group, int grouplen) BUG_ON(group == NULL); BUG_ON(group[grouplen] != '\0'); - active_stack->sp_ops->hangup(group, grouplen); + if (active_stack->sp_ops->hangup) + active_stack->sp_ops->hangup(group, grouplen); /* cluster_disconnect() was called with hangup_pending==1 */ ocfs2_stack_driver_put(); -- cgit v1.2.2