aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2005-12-15 17:31:23 -0500
committerJoel Becker <joel.becker@oracle.com>2006-01-03 14:45:47 -0500
commit8df08c89c668e1bd922a053fdb5ba1fadbecbb38 (patch)
treeab1febb732c01a66c0a9bfe9c8952ba2bb66fba8
parent6714d8e86bf443f6f7af50f9d432025649f091f5 (diff)
[PATCH] OCFS2: The Second Oracle Cluster Filesystem
dlmfs: A minimal dlm userspace interface implemented via a virtual file system. Most of the OCFS2 tools make use of this to take cluster locks when doing operations on the file system. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
-rw-r--r--Documentation/filesystems/00-INDEX2
-rw-r--r--Documentation/filesystems/dlmfs.txt130
-rw-r--r--fs/ocfs2/dlm/Makefile4
-rw-r--r--fs/ocfs2/dlm/dlmfs.c640
-rw-r--r--fs/ocfs2/dlm/dlmfsver.c42
-rw-r--r--fs/ocfs2/dlm/dlmfsver.h31
-rw-r--r--fs/ocfs2/dlm/userdlm.c658
-rw-r--r--fs/ocfs2/dlm/userdlm.h111
8 files changed, 1617 insertions, 1 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 628f8a7adb85..d9b0a0691866 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -18,6 +18,8 @@ cramfs.txt
18 - info on the cram filesystem for small storage (ROMs etc) 18 - info on the cram filesystem for small storage (ROMs etc)
19devfs/ 19devfs/
20 - directory containing devfs documentation. 20 - directory containing devfs documentation.
21dlmfs.txt
22 - info on the userspace interface to the OCFS2 DLM.
21ext2.txt 23ext2.txt
22 - info, mount options and specifications for the Ext2 filesystem. 24 - info, mount options and specifications for the Ext2 filesystem.
23fat_cvf.txt 25fat_cvf.txt
diff --git a/Documentation/filesystems/dlmfs.txt b/Documentation/filesystems/dlmfs.txt
new file mode 100644
index 000000000000..9afab845a906
--- /dev/null
+++ b/Documentation/filesystems/dlmfs.txt
@@ -0,0 +1,130 @@
1dlmfs
2==================
3A minimal DLM userspace interface implemented via a virtual file
4system.
5
6dlmfs is built with OCFS2 as it requires most of its infrastructure.
7
8Project web page: http://oss.oracle.com/projects/ocfs2
9Tools web page: http://oss.oracle.com/projects/ocfs2-tools
10OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
11
12All code copyright 2005 Oracle except when otherwise noted.
13
14CREDITS
15=======
16
17Some code taken from ramfs which is Copyright (C) 2000 Linus Torvalds
18and Transmeta Corp.
19
20Mark Fasheh <mark.fasheh@oracle.com>
21
22Caveats
23=======
24- Right now it only works with the OCFS2 DLM, though support for other
25 DLM implementations should not be a major issue.
26
27Mount options
28=============
29None
30
31Usage
32=====
33
34If you're just interested in OCFS2, then please see ocfs2.txt. The
35rest of this document will be geared towards those who want to use
36dlmfs for easy to setup and easy to use clustered locking in
37userspace.
38
39Setup
40=====
41
42dlmfs requires that the OCFS2 cluster infrastructure be in
43place. Please download ocfs2-tools from the above url and configure a
44cluster.
45
46You'll want to start heartbeating on a volume which all the nodes in
47your lockspace can access. The easiest way to do this is via
48ocfs2_hb_ctl (distributed with ocfs2-tools). Right now it requires
49that an OCFS2 file system be in place so that it can automatically
50find it's heartbeat area, though it will eventually support heartbeat
51against raw disks.
52
53Please see the ocfs2_hb_ctl and mkfs.ocfs2 manual pages distributed
54with ocfs2-tools.
55
56Once you're heartbeating, DLM lock 'domains' can be easily created /
57destroyed and locks within them accessed.
58
59Locking
60=======
61
62Users may access dlmfs via standard file system calls, or they can use
63'libo2dlm' (distributed with ocfs2-tools) which abstracts the file
64system calls and presents a more traditional locking api.
65
66dlmfs handles lock caching automatically for the user, so a lock
67request for an already acquired lock will not generate another DLM
68call. Userspace programs are assumed to handle their own local
69locking.
70
71Two levels of locks are supported - Shared Read, and Exlcusive.
72Also supported is a Trylock operation.
73
74For information on the libo2dlm interface, please see o2dlm.h,
75distributed with ocfs2-tools.
76
77Lock value blocks can be read and written to a resource via read(2)
78and write(2) against the fd obtained via your open(2) call. The
79maximum currently supported LVB length is 64 bytes (though that is an
80OCFS2 DLM limitation). Through this mechanism, users of dlmfs can share
81small amounts of data amongst their nodes.
82
83mkdir(2) signals dlmfs to join a domain (which will have the same name
84as the resulting directory)
85
86rmdir(2) signals dlmfs to leave the domain
87
88Locks for a given domain are represented by regular inodes inside the
89domain directory. Locking against them is done via the open(2) system
90call.
91
92The open(2) call will not return until your lock has been granted or
93an error has occurred, unless it has been instructed to do a trylock
94operation. If the lock succeeds, you'll get an fd.
95
96open(2) with O_CREAT to ensure the resource inode is created - dlmfs does
97not automatically create inodes for existing lock resources.
98
99Open Flag Lock Request Type
100--------- -----------------
101O_RDONLY Shared Read
102O_RDWR Exclusive
103
104Open Flag Resulting Locking Behavior
105--------- --------------------------
106O_NONBLOCK Trylock operation
107
108You must provide exactly one of O_RDONLY or O_RDWR.
109
110If O_NONBLOCK is also provided and the trylock operation was valid but
111could not lock the resource then open(2) will return ETXTBUSY.
112
113close(2) drops the lock associated with your fd.
114
115Modes passed to mkdir(2) or open(2) are adhered to locally. Chown is
116supported locally as well. This means you can use them to restrict
117access to the resources via dlmfs on your local node only.
118
119The resource LVB may be read from the fd in either Shared Read or
120Exclusive modes via the read(2) system call. It can be written via
121write(2) only when open in Exclusive mode.
122
123Once written, an LVB will be visible to other nodes who obtain Read
124Only or higher level locks on the resource.
125
126See Also
127========
128http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf
129
130For more information on the VMS distributed locking API.
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index 2a5274bcc8bb..ce3f7c29d270 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -1,6 +1,8 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1EXTRA_CFLAGS += -Ifs/ocfs2
2 2
3obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o 3obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o ocfs2_dlmfs.o
4 4
5ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ 5ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
6 dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o 6 dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
7
8ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
new file mode 100644
index 000000000000..dd2d24dc25e0
--- /dev/null
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -0,0 +1,640 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dlmfs.c
5 *
6 * Code which implements the kernel side of a minimal userspace
7 * interface to our DLM. This file handles the virtual file system
8 * used for communication with userspace. Credit should go to ramfs,
9 * which was a template for the fs side of this module.
10 *
11 * Copyright (C) 2003, 2004 Oracle. All rights reserved.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public
24 * License along with this program; if not, write to the
25 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 * Boston, MA 021110-1307, USA.
27 */
28
29/* Simple VFS hooks based on: */
30/*
31 * Resizable simple ram filesystem for Linux.
32 *
33 * Copyright (C) 2000 Linus Torvalds.
34 * 2000 Transmeta Corp.
35 */
36
37#include <linux/module.h>
38#include <linux/fs.h>
39#include <linux/pagemap.h>
40#include <linux/types.h>
41#include <linux/slab.h>
42#include <linux/highmem.h>
43#include <linux/init.h>
44#include <linux/string.h>
45#include <linux/smp_lock.h>
46#include <linux/backing-dev.h>
47
48#include <asm/uaccess.h>
49
50
51#include "cluster/nodemanager.h"
52#include "cluster/heartbeat.h"
53#include "cluster/tcp.h"
54
55#include "dlmapi.h"
56
57#include "userdlm.h"
58
59#include "dlmfsver.h"
60
61#define MLOG_MASK_PREFIX ML_DLMFS
62#include "cluster/masklog.h"
63
64static struct super_operations dlmfs_ops;
65static struct file_operations dlmfs_file_operations;
66static struct inode_operations dlmfs_dir_inode_operations;
67static struct inode_operations dlmfs_root_inode_operations;
68static struct inode_operations dlmfs_file_inode_operations;
69static kmem_cache_t *dlmfs_inode_cache;
70
71struct workqueue_struct *user_dlm_worker;
72
73/*
74 * decodes a set of open flags into a valid lock level and a set of flags.
75 * returns < 0 if we have invalid flags
76 * flags which mean something to us:
77 * O_RDONLY -> PRMODE level
78 * O_WRONLY -> EXMODE level
79 *
80 * O_NONBLOCK -> LKM_NOQUEUE
81 */
82static int dlmfs_decode_open_flags(int open_flags,
83 int *level,
84 int *flags)
85{
86 if (open_flags & (O_WRONLY|O_RDWR))
87 *level = LKM_EXMODE;
88 else
89 *level = LKM_PRMODE;
90
91 *flags = 0;
92 if (open_flags & O_NONBLOCK)
93 *flags |= LKM_NOQUEUE;
94
95 return 0;
96}
97
98static int dlmfs_file_open(struct inode *inode,
99 struct file *file)
100{
101 int status, level, flags;
102 struct dlmfs_filp_private *fp = NULL;
103 struct dlmfs_inode_private *ip;
104
105 if (S_ISDIR(inode->i_mode))
106 BUG();
107
108 mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino,
109 file->f_flags);
110
111 status = dlmfs_decode_open_flags(file->f_flags, &level, &flags);
112 if (status < 0)
113 goto bail;
114
115 /* We don't want to honor O_APPEND at read/write time as it
116 * doesn't make sense for LVB writes. */
117 file->f_flags &= ~O_APPEND;
118
119 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
120 if (!fp) {
121 status = -ENOMEM;
122 goto bail;
123 }
124 fp->fp_lock_level = level;
125
126 ip = DLMFS_I(inode);
127
128 status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags);
129 if (status < 0) {
130 /* this is a strange error to return here but I want
131 * to be able userspace to be able to distinguish a
132 * valid lock request from one that simply couldn't be
133 * granted. */
134 if (flags & LKM_NOQUEUE && status == -EAGAIN)
135 status = -ETXTBSY;
136 kfree(fp);
137 goto bail;
138 }
139
140 file->private_data = fp;
141bail:
142 return status;
143}
144
145static int dlmfs_file_release(struct inode *inode,
146 struct file *file)
147{
148 int level, status;
149 struct dlmfs_inode_private *ip = DLMFS_I(inode);
150 struct dlmfs_filp_private *fp =
151 (struct dlmfs_filp_private *) file->private_data;
152
153 if (S_ISDIR(inode->i_mode))
154 BUG();
155
156 mlog(0, "close called on inode %lu\n", inode->i_ino);
157
158 status = 0;
159 if (fp) {
160 level = fp->fp_lock_level;
161 if (level != LKM_IVMODE)
162 user_dlm_cluster_unlock(&ip->ip_lockres, level);
163
164 kfree(fp);
165 file->private_data = NULL;
166 }
167
168 return 0;
169}
170
171static ssize_t dlmfs_file_read(struct file *filp,
172 char __user *buf,
173 size_t count,
174 loff_t *ppos)
175{
176 int bytes_left;
177 ssize_t readlen;
178 char *lvb_buf;
179 struct inode *inode = filp->f_dentry->d_inode;
180
181 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
182 inode->i_ino, count, *ppos);
183
184 if (*ppos >= i_size_read(inode))
185 return 0;
186
187 if (!count)
188 return 0;
189
190 if (!access_ok(VERIFY_WRITE, buf, count))
191 return -EFAULT;
192
193 /* don't read past the lvb */
194 if ((count + *ppos) > i_size_read(inode))
195 readlen = i_size_read(inode) - *ppos;
196 else
197 readlen = count - *ppos;
198
199 lvb_buf = kmalloc(readlen, GFP_KERNEL);
200 if (!lvb_buf)
201 return -ENOMEM;
202
203 user_dlm_read_lvb(inode, lvb_buf, readlen);
204 bytes_left = __copy_to_user(buf, lvb_buf, readlen);
205 readlen -= bytes_left;
206
207 kfree(lvb_buf);
208
209 *ppos = *ppos + readlen;
210
211 mlog(0, "read %zd bytes\n", readlen);
212 return readlen;
213}
214
215static ssize_t dlmfs_file_write(struct file *filp,
216 const char __user *buf,
217 size_t count,
218 loff_t *ppos)
219{
220 int bytes_left;
221 ssize_t writelen;
222 char *lvb_buf;
223 struct inode *inode = filp->f_dentry->d_inode;
224
225 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
226 inode->i_ino, count, *ppos);
227
228 if (*ppos >= i_size_read(inode))
229 return -ENOSPC;
230
231 if (!count)
232 return 0;
233
234 if (!access_ok(VERIFY_READ, buf, count))
235 return -EFAULT;
236
237 /* don't write past the lvb */
238 if ((count + *ppos) > i_size_read(inode))
239 writelen = i_size_read(inode) - *ppos;
240 else
241 writelen = count - *ppos;
242
243 lvb_buf = kmalloc(writelen, GFP_KERNEL);
244 if (!lvb_buf)
245 return -ENOMEM;
246
247 bytes_left = copy_from_user(lvb_buf, buf, writelen);
248 writelen -= bytes_left;
249 if (writelen)
250 user_dlm_write_lvb(inode, lvb_buf, writelen);
251
252 kfree(lvb_buf);
253
254 *ppos = *ppos + writelen;
255 mlog(0, "wrote %zd bytes\n", writelen);
256 return writelen;
257}
258
259static void dlmfs_init_once(void *foo,
260 kmem_cache_t *cachep,
261 unsigned long flags)
262{
263 struct dlmfs_inode_private *ip =
264 (struct dlmfs_inode_private *) foo;
265
266 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
267 SLAB_CTOR_CONSTRUCTOR) {
268 ip->ip_dlm = NULL;
269 ip->ip_parent = NULL;
270
271 inode_init_once(&ip->ip_vfs_inode);
272 }
273}
274
275static struct inode *dlmfs_alloc_inode(struct super_block *sb)
276{
277 struct dlmfs_inode_private *ip;
278
279 ip = kmem_cache_alloc(dlmfs_inode_cache, SLAB_NOFS);
280 if (!ip)
281 return NULL;
282
283 return &ip->ip_vfs_inode;
284}
285
286static void dlmfs_destroy_inode(struct inode *inode)
287{
288 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
289}
290
291static void dlmfs_clear_inode(struct inode *inode)
292{
293 int status;
294 struct dlmfs_inode_private *ip;
295
296 if (!inode)
297 return;
298
299 mlog(0, "inode %lu\n", inode->i_ino);
300
301 ip = DLMFS_I(inode);
302
303 if (S_ISREG(inode->i_mode)) {
304 status = user_dlm_destroy_lock(&ip->ip_lockres);
305 if (status < 0)
306 mlog_errno(status);
307 iput(ip->ip_parent);
308 goto clear_fields;
309 }
310
311 mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm);
312 /* we must be a directory. If required, lets unregister the
313 * dlm context now. */
314 if (ip->ip_dlm)
315 user_dlm_unregister_context(ip->ip_dlm);
316clear_fields:
317 ip->ip_parent = NULL;
318 ip->ip_dlm = NULL;
319}
320
321static struct backing_dev_info dlmfs_backing_dev_info = {
322 .ra_pages = 0, /* No readahead */
323 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
324};
325
326static struct inode *dlmfs_get_root_inode(struct super_block *sb)
327{
328 struct inode *inode = new_inode(sb);
329 int mode = S_IFDIR | 0755;
330 struct dlmfs_inode_private *ip;
331
332 if (inode) {
333 ip = DLMFS_I(inode);
334
335 inode->i_mode = mode;
336 inode->i_uid = current->fsuid;
337 inode->i_gid = current->fsgid;
338 inode->i_blksize = PAGE_CACHE_SIZE;
339 inode->i_blocks = 0;
340 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
341 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
342 inode->i_nlink++;
343
344 inode->i_fop = &simple_dir_operations;
345 inode->i_op = &dlmfs_root_inode_operations;
346 }
347
348 return inode;
349}
350
351static struct inode *dlmfs_get_inode(struct inode *parent,
352 struct dentry *dentry,
353 int mode)
354{
355 struct super_block *sb = parent->i_sb;
356 struct inode * inode = new_inode(sb);
357 struct dlmfs_inode_private *ip;
358
359 if (!inode)
360 return NULL;
361
362 inode->i_mode = mode;
363 inode->i_uid = current->fsuid;
364 inode->i_gid = current->fsgid;
365 inode->i_blksize = PAGE_CACHE_SIZE;
366 inode->i_blocks = 0;
367 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
368 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
369
370 ip = DLMFS_I(inode);
371 ip->ip_dlm = DLMFS_I(parent)->ip_dlm;
372
373 switch (mode & S_IFMT) {
374 default:
375 /* for now we don't support anything other than
376 * directories and regular files. */
377 BUG();
378 break;
379 case S_IFREG:
380 inode->i_op = &dlmfs_file_inode_operations;
381 inode->i_fop = &dlmfs_file_operations;
382
383 i_size_write(inode, DLM_LVB_LEN);
384
385 user_dlm_lock_res_init(&ip->ip_lockres, dentry);
386
387 /* released at clear_inode time, this insures that we
388 * get to drop the dlm reference on each lock *before*
389 * we call the unregister code for releasing parent
390 * directories. */
391 ip->ip_parent = igrab(parent);
392 BUG_ON(!ip->ip_parent);
393 break;
394 case S_IFDIR:
395 inode->i_op = &dlmfs_dir_inode_operations;
396 inode->i_fop = &simple_dir_operations;
397
398 /* directory inodes start off with i_nlink ==
399 * 2 (for "." entry) */
400 inode->i_nlink++;
401 break;
402 }
403
404 if (parent->i_mode & S_ISGID) {
405 inode->i_gid = parent->i_gid;
406 if (S_ISDIR(mode))
407 inode->i_mode |= S_ISGID;
408 }
409
410 return inode;
411}
412
413/*
414 * File creation. Allocate an inode, and we're done..
415 */
416/* SMP-safe */
417static int dlmfs_mkdir(struct inode * dir,
418 struct dentry * dentry,
419 int mode)
420{
421 int status;
422 struct inode *inode = NULL;
423 struct qstr *domain = &dentry->d_name;
424 struct dlmfs_inode_private *ip;
425 struct dlm_ctxt *dlm;
426
427 mlog(0, "mkdir %.*s\n", domain->len, domain->name);
428
429 /* verify that we have a proper domain */
430 if (domain->len >= O2NM_MAX_NAME_LEN) {
431 status = -EINVAL;
432 mlog(ML_ERROR, "invalid domain name for directory.\n");
433 goto bail;
434 }
435
436 inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR);
437 if (!inode) {
438 status = -ENOMEM;
439 mlog_errno(status);
440 goto bail;
441 }
442
443 ip = DLMFS_I(inode);
444
445 dlm = user_dlm_register_context(domain);
446 if (IS_ERR(dlm)) {
447 status = PTR_ERR(dlm);
448 mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n",
449 status, domain->len, domain->name);
450 goto bail;
451 }
452 ip->ip_dlm = dlm;
453
454 dir->i_nlink++;
455 d_instantiate(dentry, inode);
456 dget(dentry); /* Extra count - pin the dentry in core */
457
458 status = 0;
459bail:
460 if (status < 0)
461 iput(inode);
462 return status;
463}
464
465static int dlmfs_create(struct inode *dir,
466 struct dentry *dentry,
467 int mode,
468 struct nameidata *nd)
469{
470 int status = 0;
471 struct inode *inode;
472 struct qstr *name = &dentry->d_name;
473
474 mlog(0, "create %.*s\n", name->len, name->name);
475
476 /* verify name is valid and doesn't contain any dlm reserved
477 * characters */
478 if (name->len >= USER_DLM_LOCK_ID_MAX_LEN ||
479 name->name[0] == '$') {
480 status = -EINVAL;
481 mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len,
482 name->name);
483 goto bail;
484 }
485
486 inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG);
487 if (!inode) {
488 status = -ENOMEM;
489 mlog_errno(status);
490 goto bail;
491 }
492
493 d_instantiate(dentry, inode);
494 dget(dentry); /* Extra count - pin the dentry in core */
495bail:
496 return status;
497}
498
499static int dlmfs_unlink(struct inode *dir,
500 struct dentry *dentry)
501{
502 int status;
503 struct inode *inode = dentry->d_inode;
504
505 mlog(0, "unlink inode %lu\n", inode->i_ino);
506
507 /* if there are no current holders, or none that are waiting
508 * to acquire a lock, this basically destroys our lockres. */
509 status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres);
510 if (status < 0) {
511 mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n",
512 dentry->d_name.len, dentry->d_name.name, status);
513 goto bail;
514 }
515 status = simple_unlink(dir, dentry);
516bail:
517 return status;
518}
519
520static int dlmfs_fill_super(struct super_block * sb,
521 void * data,
522 int silent)
523{
524 struct inode * inode;
525 struct dentry * root;
526
527 sb->s_maxbytes = MAX_LFS_FILESIZE;
528 sb->s_blocksize = PAGE_CACHE_SIZE;
529 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
530 sb->s_magic = DLMFS_MAGIC;
531 sb->s_op = &dlmfs_ops;
532 inode = dlmfs_get_root_inode(sb);
533 if (!inode)
534 return -ENOMEM;
535
536 root = d_alloc_root(inode);
537 if (!root) {
538 iput(inode);
539 return -ENOMEM;
540 }
541 sb->s_root = root;
542 return 0;
543}
544
545static struct file_operations dlmfs_file_operations = {
546 .open = dlmfs_file_open,
547 .release = dlmfs_file_release,
548 .read = dlmfs_file_read,
549 .write = dlmfs_file_write,
550};
551
552static struct inode_operations dlmfs_dir_inode_operations = {
553 .create = dlmfs_create,
554 .lookup = simple_lookup,
555 .unlink = dlmfs_unlink,
556};
557
558/* this way we can restrict mkdir to only the toplevel of the fs. */
559static struct inode_operations dlmfs_root_inode_operations = {
560 .lookup = simple_lookup,
561 .mkdir = dlmfs_mkdir,
562 .rmdir = simple_rmdir,
563};
564
565static struct super_operations dlmfs_ops = {
566 .statfs = simple_statfs,
567 .alloc_inode = dlmfs_alloc_inode,
568 .destroy_inode = dlmfs_destroy_inode,
569 .clear_inode = dlmfs_clear_inode,
570 .drop_inode = generic_delete_inode,
571};
572
573static struct inode_operations dlmfs_file_inode_operations = {
574 .getattr = simple_getattr,
575};
576
577static struct super_block *dlmfs_get_sb(struct file_system_type *fs_type,
578 int flags, const char *dev_name, void *data)
579{
580 return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super);
581}
582
583static struct file_system_type dlmfs_fs_type = {
584 .owner = THIS_MODULE,
585 .name = "ocfs2_dlmfs",
586 .get_sb = dlmfs_get_sb,
587 .kill_sb = kill_litter_super,
588};
589
590static int __init init_dlmfs_fs(void)
591{
592 int status;
593 int cleanup_inode = 0, cleanup_worker = 0;
594
595 dlmfs_print_version();
596
597 dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
598 sizeof(struct dlmfs_inode_private),
599 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
600 dlmfs_init_once, NULL);
601 if (!dlmfs_inode_cache)
602 return -ENOMEM;
603 cleanup_inode = 1;
604
605 user_dlm_worker = create_singlethread_workqueue("user_dlm");
606 if (!user_dlm_worker) {
607 status = -ENOMEM;
608 goto bail;
609 }
610 cleanup_worker = 1;
611
612 status = register_filesystem(&dlmfs_fs_type);
613bail:
614 if (status) {
615 if (cleanup_inode)
616 kmem_cache_destroy(dlmfs_inode_cache);
617 if (cleanup_worker)
618 destroy_workqueue(user_dlm_worker);
619 } else
620 printk("OCFS2 User DLM kernel interface loaded\n");
621 return status;
622}
623
624static void __exit exit_dlmfs_fs(void)
625{
626 unregister_filesystem(&dlmfs_fs_type);
627
628 flush_workqueue(user_dlm_worker);
629 destroy_workqueue(user_dlm_worker);
630
631 if (kmem_cache_destroy(dlmfs_inode_cache))
632 printk(KERN_INFO "dlmfs_inode_cache: not all structures "
633 "were freed\n");
634}
635
636MODULE_AUTHOR("Oracle");
637MODULE_LICENSE("GPL");
638
639module_init(init_dlmfs_fs)
640module_exit(exit_dlmfs_fs)
diff --git a/fs/ocfs2/dlm/dlmfsver.c b/fs/ocfs2/dlm/dlmfsver.c
new file mode 100644
index 000000000000..d2be3ad841f9
--- /dev/null
+++ b/fs/ocfs2/dlm/dlmfsver.c
@@ -0,0 +1,42 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dlmfsver.c
5 *
6 * version string
7 *
8 * Copyright (C) 2002, 2005 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */
25
26#include <linux/module.h>
27#include <linux/kernel.h>
28
29#include "dlmfsver.h"
30
31#define DLM_BUILD_VERSION "1.3.3"
32
33#define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION
34
35void dlmfs_print_version(void)
36{
37 printk(KERN_INFO "%s\n", VERSION_STR);
38}
39
40MODULE_DESCRIPTION(VERSION_STR);
41
42MODULE_VERSION(DLM_BUILD_VERSION);
diff --git a/fs/ocfs2/dlm/dlmfsver.h b/fs/ocfs2/dlm/dlmfsver.h
new file mode 100644
index 000000000000..f35eadbed25c
--- /dev/null
+++ b/fs/ocfs2/dlm/dlmfsver.h
@@ -0,0 +1,31 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dlmver.h
5 *
6 * Function prototypes
7 *
8 * Copyright (C) 2005 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */
25
26#ifndef DLMFS_VER_H
27#define DLMFS_VER_H
28
29void dlmfs_print_version(void);
30
31#endif /* DLMFS_VER_H */
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
new file mode 100644
index 000000000000..e1fdd288796e
--- /dev/null
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -0,0 +1,658 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * userdlm.c
5 *
6 * Code which implements the kernel side of a minimal userspace
7 * interface to our DLM.
8 *
9 * Many of the functions here are pared down versions of dlmglue.c
10 * functions.
11 *
12 * Copyright (C) 2003, 2004 Oracle. All rights reserved.
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public
16 * License as published by the Free Software Foundation; either
17 * version 2 of the License, or (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public
25 * License along with this program; if not, write to the
26 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 * Boston, MA 021110-1307, USA.
28 */
29
30#include <asm/signal.h>
31
32#include <linux/module.h>
33#include <linux/fs.h>
34#include <linux/types.h>
35#include <linux/crc32.h>
36
37
38#include "cluster/nodemanager.h"
39#include "cluster/heartbeat.h"
40#include "cluster/tcp.h"
41
42#include "dlmapi.h"
43
44#include "userdlm.h"
45
46#define MLOG_MASK_PREFIX ML_DLMFS
47#include "cluster/masklog.h"
48
49static inline int user_check_wait_flag(struct user_lock_res *lockres,
50 int flag)
51{
52 int ret;
53
54 spin_lock(&lockres->l_lock);
55 ret = lockres->l_flags & flag;
56 spin_unlock(&lockres->l_lock);
57
58 return ret;
59}
60
61static inline void user_wait_on_busy_lock(struct user_lock_res *lockres)
62
63{
64 wait_event(lockres->l_event,
65 !user_check_wait_flag(lockres, USER_LOCK_BUSY));
66}
67
68static inline void user_wait_on_blocked_lock(struct user_lock_res *lockres)
69
70{
71 wait_event(lockres->l_event,
72 !user_check_wait_flag(lockres, USER_LOCK_BLOCKED));
73}
74
75/* I heart container_of... */
76static inline struct dlm_ctxt *
77dlm_ctxt_from_user_lockres(struct user_lock_res *lockres)
78{
79 struct dlmfs_inode_private *ip;
80
81 ip = container_of(lockres,
82 struct dlmfs_inode_private,
83 ip_lockres);
84 return ip->ip_dlm;
85}
86
87static struct inode *
88user_dlm_inode_from_user_lockres(struct user_lock_res *lockres)
89{
90 struct dlmfs_inode_private *ip;
91
92 ip = container_of(lockres,
93 struct dlmfs_inode_private,
94 ip_lockres);
95 return &ip->ip_vfs_inode;
96}
97
98static inline void user_recover_from_dlm_error(struct user_lock_res *lockres)
99{
100 spin_lock(&lockres->l_lock);
101 lockres->l_flags &= ~USER_LOCK_BUSY;
102 spin_unlock(&lockres->l_lock);
103}
104
105#define user_log_dlm_error(_func, _stat, _lockres) do { \
106 mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \
107 "resource %s: %s\n", dlm_errname(_stat), _func, \
108 _lockres->l_name, dlm_errmsg(_stat)); \
109} while (0)
110
111/* WARNING: This function lives in a world where the only three lock
112 * levels are EX, PR, and NL. It *will* have to be adjusted when more
113 * lock types are added. */
114static inline int user_highest_compat_lock_level(int level)
115{
116 int new_level = LKM_EXMODE;
117
118 if (level == LKM_EXMODE)
119 new_level = LKM_NLMODE;
120 else if (level == LKM_PRMODE)
121 new_level = LKM_PRMODE;
122 return new_level;
123}
124
125static void user_ast(void *opaque)
126{
127 struct user_lock_res *lockres = opaque;
128 struct dlm_lockstatus *lksb;
129
130 mlog(0, "AST fired for lockres %s\n", lockres->l_name);
131
132 spin_lock(&lockres->l_lock);
133
134 lksb = &(lockres->l_lksb);
135 if (lksb->status != DLM_NORMAL) {
136 mlog(ML_ERROR, "lksb status value of %u on lockres %s\n",
137 lksb->status, lockres->l_name);
138 spin_unlock(&lockres->l_lock);
139 return;
140 }
141
142 /* we're downconverting. */
143 if (lockres->l_requested < lockres->l_level) {
144 if (lockres->l_requested <=
145 user_highest_compat_lock_level(lockres->l_blocking)) {
146 lockres->l_blocking = LKM_NLMODE;
147 lockres->l_flags &= ~USER_LOCK_BLOCKED;
148 }
149 }
150
151 lockres->l_level = lockres->l_requested;
152 lockres->l_requested = LKM_IVMODE;
153 lockres->l_flags |= USER_LOCK_ATTACHED;
154 lockres->l_flags &= ~USER_LOCK_BUSY;
155
156 spin_unlock(&lockres->l_lock);
157
158 wake_up(&lockres->l_event);
159}
160
161static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres)
162{
163 struct inode *inode;
164 inode = user_dlm_inode_from_user_lockres(lockres);
165 if (!igrab(inode))
166 BUG();
167}
168
169static void user_dlm_unblock_lock(void *opaque);
170
171static void __user_dlm_queue_lockres(struct user_lock_res *lockres)
172{
173 if (!(lockres->l_flags & USER_LOCK_QUEUED)) {
174 user_dlm_grab_inode_ref(lockres);
175
176 INIT_WORK(&lockres->l_work, user_dlm_unblock_lock,
177 lockres);
178
179 queue_work(user_dlm_worker, &lockres->l_work);
180 lockres->l_flags |= USER_LOCK_QUEUED;
181 }
182}
183
184static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres)
185{
186 int queue = 0;
187
188 if (!(lockres->l_flags & USER_LOCK_BLOCKED))
189 return;
190
191 switch (lockres->l_blocking) {
192 case LKM_EXMODE:
193 if (!lockres->l_ex_holders && !lockres->l_ro_holders)
194 queue = 1;
195 break;
196 case LKM_PRMODE:
197 if (!lockres->l_ex_holders)
198 queue = 1;
199 break;
200 default:
201 BUG();
202 }
203
204 if (queue)
205 __user_dlm_queue_lockres(lockres);
206}
207
208static void user_bast(void *opaque, int level)
209{
210 struct user_lock_res *lockres = opaque;
211
212 mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n",
213 lockres->l_name, level);
214
215 spin_lock(&lockres->l_lock);
216 lockres->l_flags |= USER_LOCK_BLOCKED;
217 if (level > lockres->l_blocking)
218 lockres->l_blocking = level;
219
220 __user_dlm_queue_lockres(lockres);
221 spin_unlock(&lockres->l_lock);
222
223 wake_up(&lockres->l_event);
224}
225
226static void user_unlock_ast(void *opaque, enum dlm_status status)
227{
228 struct user_lock_res *lockres = opaque;
229
230 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
231
232 if (status != DLM_NORMAL)
233 mlog(ML_ERROR, "Dlm returns status %d\n", status);
234
235 spin_lock(&lockres->l_lock);
236 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN)
237 lockres->l_level = LKM_IVMODE;
238 else {
239 lockres->l_requested = LKM_IVMODE; /* cancel an
240 * upconvert
241 * request. */
242 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
243 /* we want the unblock thread to look at it again
244 * now. */
245 __user_dlm_queue_lockres(lockres);
246 }
247
248 lockres->l_flags &= ~USER_LOCK_BUSY;
249 spin_unlock(&lockres->l_lock);
250
251 wake_up(&lockres->l_event);
252}
253
254static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres)
255{
256 struct inode *inode;
257 inode = user_dlm_inode_from_user_lockres(lockres);
258 iput(inode);
259}
260
261static void user_dlm_unblock_lock(void *opaque)
262{
263 int new_level, status;
264 struct user_lock_res *lockres = (struct user_lock_res *) opaque;
265 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
266
267 mlog(0, "processing lockres %s\n", lockres->l_name);
268
269 spin_lock(&lockres->l_lock);
270
271 BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED));
272 BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED));
273
274 /* notice that we don't clear USER_LOCK_BLOCKED here. That's
275 * for user_ast to do. */
276 lockres->l_flags &= ~USER_LOCK_QUEUED;
277
278 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
279 mlog(0, "lock is in teardown so we do nothing\n");
280 spin_unlock(&lockres->l_lock);
281 goto drop_ref;
282 }
283
284 if (lockres->l_flags & USER_LOCK_BUSY) {
285 mlog(0, "BUSY flag detected...\n");
286 if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
287 spin_unlock(&lockres->l_lock);
288 goto drop_ref;
289 }
290
291 lockres->l_flags |= USER_LOCK_IN_CANCEL;
292 spin_unlock(&lockres->l_lock);
293
294 status = dlmunlock(dlm,
295 &lockres->l_lksb,
296 LKM_CANCEL,
297 user_unlock_ast,
298 lockres);
299 if (status == DLM_CANCELGRANT) {
300 /* If we got this, then the ast was fired
301 * before we could cancel. We cleanup our
302 * state, and restart the function. */
303 spin_lock(&lockres->l_lock);
304 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
305 spin_unlock(&lockres->l_lock);
306 } else if (status != DLM_NORMAL)
307 user_log_dlm_error("dlmunlock", status, lockres);
308 goto drop_ref;
309 }
310
311 /* If there are still incompat holders, we can exit safely
312 * without worrying about re-queueing this lock as that will
313 * happen on the last call to user_cluster_unlock. */
314 if ((lockres->l_blocking == LKM_EXMODE)
315 && (lockres->l_ex_holders || lockres->l_ro_holders)) {
316 spin_unlock(&lockres->l_lock);
317 mlog(0, "can't downconvert for ex: ro = %u, ex = %u\n",
318 lockres->l_ro_holders, lockres->l_ex_holders);
319 goto drop_ref;
320 }
321
322 if ((lockres->l_blocking == LKM_PRMODE)
323 && lockres->l_ex_holders) {
324 spin_unlock(&lockres->l_lock);
325 mlog(0, "can't downconvert for pr: ex = %u\n",
326 lockres->l_ex_holders);
327 goto drop_ref;
328 }
329
330 /* yay, we can downconvert now. */
331 new_level = user_highest_compat_lock_level(lockres->l_blocking);
332 lockres->l_requested = new_level;
333 lockres->l_flags |= USER_LOCK_BUSY;
334 mlog(0, "Downconvert lock from %d to %d\n",
335 lockres->l_level, new_level);
336 spin_unlock(&lockres->l_lock);
337
338 /* need lock downconvert request now... */
339 status = dlmlock(dlm,
340 new_level,
341 &lockres->l_lksb,
342 LKM_CONVERT|LKM_VALBLK,
343 lockres->l_name,
344 user_ast,
345 lockres,
346 user_bast);
347 if (status != DLM_NORMAL) {
348 user_log_dlm_error("dlmlock", status, lockres);
349 user_recover_from_dlm_error(lockres);
350 }
351
352drop_ref:
353 user_dlm_drop_inode_ref(lockres);
354}
355
356static inline void user_dlm_inc_holders(struct user_lock_res *lockres,
357 int level)
358{
359 switch(level) {
360 case LKM_EXMODE:
361 lockres->l_ex_holders++;
362 break;
363 case LKM_PRMODE:
364 lockres->l_ro_holders++;
365 break;
366 default:
367 BUG();
368 }
369}
370
371/* predict what lock level we'll be dropping down to on behalf
372 * of another node, and return true if the currently wanted
373 * level will be compatible with it. */
374static inline int
375user_may_continue_on_blocked_lock(struct user_lock_res *lockres,
376 int wanted)
377{
378 BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED));
379
380 return wanted <= user_highest_compat_lock_level(lockres->l_blocking);
381}
382
383int user_dlm_cluster_lock(struct user_lock_res *lockres,
384 int level,
385 int lkm_flags)
386{
387 int status, local_flags;
388 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
389
390 if (level != LKM_EXMODE &&
391 level != LKM_PRMODE) {
392 mlog(ML_ERROR, "lockres %s: invalid request!\n",
393 lockres->l_name);
394 status = -EINVAL;
395 goto bail;
396 }
397
398 mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n",
399 lockres->l_name,
400 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
401 lkm_flags);
402
403again:
404 if (signal_pending(current)) {
405 status = -ERESTARTSYS;
406 goto bail;
407 }
408
409 spin_lock(&lockres->l_lock);
410
411 /* We only compare against the currently granted level
412 * here. If the lock is blocked waiting on a downconvert,
413 * we'll get caught below. */
414 if ((lockres->l_flags & USER_LOCK_BUSY) &&
415 (level > lockres->l_level)) {
416 /* is someone sitting in dlm_lock? If so, wait on
417 * them. */
418 spin_unlock(&lockres->l_lock);
419
420 user_wait_on_busy_lock(lockres);
421 goto again;
422 }
423
424 if ((lockres->l_flags & USER_LOCK_BLOCKED) &&
425 (!user_may_continue_on_blocked_lock(lockres, level))) {
426 /* is the lock is currently blocked on behalf of
427 * another node */
428 spin_unlock(&lockres->l_lock);
429
430 user_wait_on_blocked_lock(lockres);
431 goto again;
432 }
433
434 if (level > lockres->l_level) {
435 local_flags = lkm_flags | LKM_VALBLK;
436 if (lockres->l_level != LKM_IVMODE)
437 local_flags |= LKM_CONVERT;
438
439 lockres->l_requested = level;
440 lockres->l_flags |= USER_LOCK_BUSY;
441 spin_unlock(&lockres->l_lock);
442
443 BUG_ON(level == LKM_IVMODE);
444 BUG_ON(level == LKM_NLMODE);
445
446 mlog(0, "lock %s, get lock from %d to level = %d\n",
447 lockres->l_name, lockres->l_level, level);
448
449 /* call dlm_lock to upgrade lock now */
450 status = dlmlock(dlm,
451 level,
452 &lockres->l_lksb,
453 local_flags,
454 lockres->l_name,
455 user_ast,
456 lockres,
457 user_bast);
458 if (status != DLM_NORMAL) {
459 if ((lkm_flags & LKM_NOQUEUE) &&
460 (status == DLM_NOTQUEUED))
461 status = -EAGAIN;
462 else {
463 user_log_dlm_error("dlmlock", status, lockres);
464 status = -EINVAL;
465 }
466 user_recover_from_dlm_error(lockres);
467 goto bail;
468 }
469
470 mlog(0, "lock %s, successfull return from dlmlock\n",
471 lockres->l_name);
472
473 user_wait_on_busy_lock(lockres);
474 goto again;
475 }
476
477 user_dlm_inc_holders(lockres, level);
478 spin_unlock(&lockres->l_lock);
479
480 mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name,
481 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
482
483 status = 0;
484bail:
485 return status;
486}
487
488static inline void user_dlm_dec_holders(struct user_lock_res *lockres,
489 int level)
490{
491 switch(level) {
492 case LKM_EXMODE:
493 BUG_ON(!lockres->l_ex_holders);
494 lockres->l_ex_holders--;
495 break;
496 case LKM_PRMODE:
497 BUG_ON(!lockres->l_ro_holders);
498 lockres->l_ro_holders--;
499 break;
500 default:
501 BUG();
502 }
503}
504
505void user_dlm_cluster_unlock(struct user_lock_res *lockres,
506 int level)
507{
508 if (level != LKM_EXMODE &&
509 level != LKM_PRMODE) {
510 mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name);
511 return;
512 }
513
514 mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name,
515 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
516
517 spin_lock(&lockres->l_lock);
518 user_dlm_dec_holders(lockres, level);
519 __user_dlm_cond_queue_lockres(lockres);
520 spin_unlock(&lockres->l_lock);
521}
522
523void user_dlm_write_lvb(struct inode *inode,
524 const char *val,
525 unsigned int len)
526{
527 struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres;
528 char *lvb = lockres->l_lksb.lvb;
529
530 BUG_ON(len > DLM_LVB_LEN);
531
532 spin_lock(&lockres->l_lock);
533
534 BUG_ON(lockres->l_level < LKM_EXMODE);
535 memcpy(lvb, val, len);
536
537 spin_unlock(&lockres->l_lock);
538}
539
540void user_dlm_read_lvb(struct inode *inode,
541 char *val,
542 unsigned int len)
543{
544 struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres;
545 char *lvb = lockres->l_lksb.lvb;
546
547 BUG_ON(len > DLM_LVB_LEN);
548
549 spin_lock(&lockres->l_lock);
550
551 BUG_ON(lockres->l_level < LKM_PRMODE);
552 memcpy(val, lvb, len);
553
554 spin_unlock(&lockres->l_lock);
555}
556
557void user_dlm_lock_res_init(struct user_lock_res *lockres,
558 struct dentry *dentry)
559{
560 memset(lockres, 0, sizeof(*lockres));
561
562 spin_lock_init(&lockres->l_lock);
563 init_waitqueue_head(&lockres->l_event);
564 lockres->l_level = LKM_IVMODE;
565 lockres->l_requested = LKM_IVMODE;
566 lockres->l_blocking = LKM_IVMODE;
567
568 /* should have been checked before getting here. */
569 BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN);
570
571 memcpy(lockres->l_name,
572 dentry->d_name.name,
573 dentry->d_name.len);
574}
575
576int user_dlm_destroy_lock(struct user_lock_res *lockres)
577{
578 int status = -EBUSY;
579 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
580
581 mlog(0, "asked to destroy %s\n", lockres->l_name);
582
583 spin_lock(&lockres->l_lock);
584 while (lockres->l_flags & USER_LOCK_BUSY) {
585 spin_unlock(&lockres->l_lock);
586
587 mlog(0, "lock %s is busy\n", lockres->l_name);
588
589 user_wait_on_busy_lock(lockres);
590
591 spin_lock(&lockres->l_lock);
592 }
593
594 if (lockres->l_ro_holders || lockres->l_ex_holders) {
595 spin_unlock(&lockres->l_lock);
596 mlog(0, "lock %s has holders\n", lockres->l_name);
597 goto bail;
598 }
599
600 status = 0;
601 if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
602 spin_unlock(&lockres->l_lock);
603 mlog(0, "lock %s is not attached\n", lockres->l_name);
604 goto bail;
605 }
606
607 lockres->l_flags &= ~USER_LOCK_ATTACHED;
608 lockres->l_flags |= USER_LOCK_BUSY;
609 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
610 spin_unlock(&lockres->l_lock);
611
612 mlog(0, "unlocking lockres %s\n", lockres->l_name);
613 status = dlmunlock(dlm,
614 &lockres->l_lksb,
615 LKM_VALBLK,
616 user_unlock_ast,
617 lockres);
618 if (status != DLM_NORMAL) {
619 user_log_dlm_error("dlmunlock", status, lockres);
620 status = -EINVAL;
621 goto bail;
622 }
623
624 user_wait_on_busy_lock(lockres);
625
626 status = 0;
627bail:
628 return status;
629}
630
631struct dlm_ctxt *user_dlm_register_context(struct qstr *name)
632{
633 struct dlm_ctxt *dlm;
634 u32 dlm_key;
635 char *domain;
636
637 domain = kmalloc(name->len + 1, GFP_KERNEL);
638 if (!domain) {
639 mlog_errno(-ENOMEM);
640 return ERR_PTR(-ENOMEM);
641 }
642
643 dlm_key = crc32_le(0, name->name, name->len);
644
645 snprintf(domain, name->len + 1, "%.*s", name->len, name->name);
646
647 dlm = dlm_register_domain(domain, dlm_key);
648 if (IS_ERR(dlm))
649 mlog_errno(PTR_ERR(dlm));
650
651 kfree(domain);
652 return dlm;
653}
654
655void user_dlm_unregister_context(struct dlm_ctxt *dlm)
656{
657 dlm_unregister_domain(dlm);
658}
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
new file mode 100644
index 000000000000..04178bc40b76
--- /dev/null
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -0,0 +1,111 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * userdlm.h
5 *
6 * Userspace dlm defines
7 *
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */
25
26
27#ifndef USERDLM_H
28#define USERDLM_H
29
30#include <linux/module.h>
31#include <linux/fs.h>
32#include <linux/types.h>
33#include <linux/workqueue.h>
34
35/* user_lock_res->l_flags flags. */
36#define USER_LOCK_ATTACHED (0x00000001) /* have we initialized
37 * the lvb */
38#define USER_LOCK_BUSY (0x00000002) /* we are currently in
39 * dlm_lock */
40#define USER_LOCK_BLOCKED (0x00000004) /* blocked waiting to
41 * downconvert*/
42#define USER_LOCK_IN_TEARDOWN (0x00000008) /* we're currently
43 * destroying this
44 * lock. */
45#define USER_LOCK_QUEUED (0x00000010) /* lock is on the
46 * workqueue */
47#define USER_LOCK_IN_CANCEL (0x00000020)
48
49struct user_lock_res {
50 spinlock_t l_lock;
51
52 int l_flags;
53
54#define USER_DLM_LOCK_ID_MAX_LEN 32
55 char l_name[USER_DLM_LOCK_ID_MAX_LEN];
56 int l_level;
57 unsigned int l_ro_holders;
58 unsigned int l_ex_holders;
59 struct dlm_lockstatus l_lksb;
60
61 int l_requested;
62 int l_blocking;
63
64 wait_queue_head_t l_event;
65
66 struct work_struct l_work;
67};
68
69extern struct workqueue_struct *user_dlm_worker;
70
71void user_dlm_lock_res_init(struct user_lock_res *lockres,
72 struct dentry *dentry);
73int user_dlm_destroy_lock(struct user_lock_res *lockres);
74int user_dlm_cluster_lock(struct user_lock_res *lockres,
75 int level,
76 int lkm_flags);
77void user_dlm_cluster_unlock(struct user_lock_res *lockres,
78 int level);
79void user_dlm_write_lvb(struct inode *inode,
80 const char *val,
81 unsigned int len);
82void user_dlm_read_lvb(struct inode *inode,
83 char *val,
84 unsigned int len);
85struct dlm_ctxt *user_dlm_register_context(struct qstr *name);
86void user_dlm_unregister_context(struct dlm_ctxt *dlm);
87
88struct dlmfs_inode_private {
89 struct dlm_ctxt *ip_dlm;
90
91 struct user_lock_res ip_lockres; /* unused for directories. */
92 struct inode *ip_parent;
93
94 struct inode ip_vfs_inode;
95};
96
97static inline struct dlmfs_inode_private *
98DLMFS_I(struct inode *inode)
99{
100 return container_of(inode,
101 struct dlmfs_inode_private,
102 ip_vfs_inode);
103}
104
105struct dlmfs_filp_private {
106 int fp_lock_level;
107};
108
109#define DLMFS_MAGIC 0x76a9f425
110
111#endif /* USERDLM_H */