diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2005-12-15 17:31:23 -0500 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2006-01-03 14:45:47 -0500 |
commit | 8df08c89c668e1bd922a053fdb5ba1fadbecbb38 (patch) | |
tree | ab1febb732c01a66c0a9bfe9c8952ba2bb66fba8 /fs/ocfs2 | |
parent | 6714d8e86bf443f6f7af50f9d432025649f091f5 (diff) |
[PATCH] OCFS2: The Second Oracle Cluster Filesystem
dlmfs: A minimal dlm userspace interface implemented via a virtual
file system.
Most of the OCFS2 tools make use of this to take cluster locks when
doing operations on the file system.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/dlm/Makefile | 4 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmfs.c | 640 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmfsver.c | 42 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmfsver.h | 31 | ||||
-rw-r--r-- | fs/ocfs2/dlm/userdlm.c | 658 | ||||
-rw-r--r-- | fs/ocfs2/dlm/userdlm.h | 111 |
6 files changed, 1485 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index 2a5274bcc8bb..ce3f7c29d270 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile | |||
@@ -1,6 +1,8 @@ | |||
1 | EXTRA_CFLAGS += -Ifs/ocfs2 | 1 | EXTRA_CFLAGS += -Ifs/ocfs2 |
2 | 2 | ||
3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o | 3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o ocfs2_dlmfs.o |
4 | 4 | ||
5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ | 5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ |
6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o | 6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o |
7 | |||
8 | ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o | ||
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c new file mode 100644 index 000000000000..dd2d24dc25e0 --- /dev/null +++ b/fs/ocfs2/dlm/dlmfs.c | |||
@@ -0,0 +1,640 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmfs.c | ||
5 | * | ||
6 | * Code which implements the kernel side of a minimal userspace | ||
7 | * interface to our DLM. This file handles the virtual file system | ||
8 | * used for communication with userspace. Credit should go to ramfs, | ||
9 | * which was a template for the fs side of this module. | ||
10 | * | ||
11 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public | ||
15 | * License as published by the Free Software Foundation; either | ||
16 | * version 2 of the License, or (at your option) any later version. | ||
17 | * | ||
18 | * This program is distributed in the hope that it will be useful, | ||
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
21 | * General Public License for more details. | ||
22 | * | ||
23 | * You should have received a copy of the GNU General Public | ||
24 | * License along with this program; if not, write to the | ||
25 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
26 | * Boston, MA 021110-1307, USA. | ||
27 | */ | ||
28 | |||
29 | /* Simple VFS hooks based on: */ | ||
30 | /* | ||
31 | * Resizable simple ram filesystem for Linux. | ||
32 | * | ||
33 | * Copyright (C) 2000 Linus Torvalds. | ||
34 | * 2000 Transmeta Corp. | ||
35 | */ | ||
36 | |||
37 | #include <linux/module.h> | ||
38 | #include <linux/fs.h> | ||
39 | #include <linux/pagemap.h> | ||
40 | #include <linux/types.h> | ||
41 | #include <linux/slab.h> | ||
42 | #include <linux/highmem.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <linux/string.h> | ||
45 | #include <linux/smp_lock.h> | ||
46 | #include <linux/backing-dev.h> | ||
47 | |||
48 | #include <asm/uaccess.h> | ||
49 | |||
50 | |||
51 | #include "cluster/nodemanager.h" | ||
52 | #include "cluster/heartbeat.h" | ||
53 | #include "cluster/tcp.h" | ||
54 | |||
55 | #include "dlmapi.h" | ||
56 | |||
57 | #include "userdlm.h" | ||
58 | |||
59 | #include "dlmfsver.h" | ||
60 | |||
61 | #define MLOG_MASK_PREFIX ML_DLMFS | ||
62 | #include "cluster/masklog.h" | ||
63 | |||
64 | static struct super_operations dlmfs_ops; | ||
65 | static struct file_operations dlmfs_file_operations; | ||
66 | static struct inode_operations dlmfs_dir_inode_operations; | ||
67 | static struct inode_operations dlmfs_root_inode_operations; | ||
68 | static struct inode_operations dlmfs_file_inode_operations; | ||
69 | static kmem_cache_t *dlmfs_inode_cache; | ||
70 | |||
71 | struct workqueue_struct *user_dlm_worker; | ||
72 | |||
73 | /* | ||
74 | * decodes a set of open flags into a valid lock level and a set of flags. | ||
75 | * returns < 0 if we have invalid flags | ||
76 | * flags which mean something to us: | ||
77 | * O_RDONLY -> PRMODE level | ||
78 | * O_WRONLY -> EXMODE level | ||
79 | * | ||
80 | * O_NONBLOCK -> LKM_NOQUEUE | ||
81 | */ | ||
82 | static int dlmfs_decode_open_flags(int open_flags, | ||
83 | int *level, | ||
84 | int *flags) | ||
85 | { | ||
86 | if (open_flags & (O_WRONLY|O_RDWR)) | ||
87 | *level = LKM_EXMODE; | ||
88 | else | ||
89 | *level = LKM_PRMODE; | ||
90 | |||
91 | *flags = 0; | ||
92 | if (open_flags & O_NONBLOCK) | ||
93 | *flags |= LKM_NOQUEUE; | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int dlmfs_file_open(struct inode *inode, | ||
99 | struct file *file) | ||
100 | { | ||
101 | int status, level, flags; | ||
102 | struct dlmfs_filp_private *fp = NULL; | ||
103 | struct dlmfs_inode_private *ip; | ||
104 | |||
105 | if (S_ISDIR(inode->i_mode)) | ||
106 | BUG(); | ||
107 | |||
108 | mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, | ||
109 | file->f_flags); | ||
110 | |||
111 | status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); | ||
112 | if (status < 0) | ||
113 | goto bail; | ||
114 | |||
115 | /* We don't want to honor O_APPEND at read/write time as it | ||
116 | * doesn't make sense for LVB writes. */ | ||
117 | file->f_flags &= ~O_APPEND; | ||
118 | |||
119 | fp = kmalloc(sizeof(*fp), GFP_KERNEL); | ||
120 | if (!fp) { | ||
121 | status = -ENOMEM; | ||
122 | goto bail; | ||
123 | } | ||
124 | fp->fp_lock_level = level; | ||
125 | |||
126 | ip = DLMFS_I(inode); | ||
127 | |||
128 | status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); | ||
129 | if (status < 0) { | ||
130 | /* this is a strange error to return here but I want | ||
131 | * to be able userspace to be able to distinguish a | ||
132 | * valid lock request from one that simply couldn't be | ||
133 | * granted. */ | ||
134 | if (flags & LKM_NOQUEUE && status == -EAGAIN) | ||
135 | status = -ETXTBSY; | ||
136 | kfree(fp); | ||
137 | goto bail; | ||
138 | } | ||
139 | |||
140 | file->private_data = fp; | ||
141 | bail: | ||
142 | return status; | ||
143 | } | ||
144 | |||
145 | static int dlmfs_file_release(struct inode *inode, | ||
146 | struct file *file) | ||
147 | { | ||
148 | int level, status; | ||
149 | struct dlmfs_inode_private *ip = DLMFS_I(inode); | ||
150 | struct dlmfs_filp_private *fp = | ||
151 | (struct dlmfs_filp_private *) file->private_data; | ||
152 | |||
153 | if (S_ISDIR(inode->i_mode)) | ||
154 | BUG(); | ||
155 | |||
156 | mlog(0, "close called on inode %lu\n", inode->i_ino); | ||
157 | |||
158 | status = 0; | ||
159 | if (fp) { | ||
160 | level = fp->fp_lock_level; | ||
161 | if (level != LKM_IVMODE) | ||
162 | user_dlm_cluster_unlock(&ip->ip_lockres, level); | ||
163 | |||
164 | kfree(fp); | ||
165 | file->private_data = NULL; | ||
166 | } | ||
167 | |||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static ssize_t dlmfs_file_read(struct file *filp, | ||
172 | char __user *buf, | ||
173 | size_t count, | ||
174 | loff_t *ppos) | ||
175 | { | ||
176 | int bytes_left; | ||
177 | ssize_t readlen; | ||
178 | char *lvb_buf; | ||
179 | struct inode *inode = filp->f_dentry->d_inode; | ||
180 | |||
181 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", | ||
182 | inode->i_ino, count, *ppos); | ||
183 | |||
184 | if (*ppos >= i_size_read(inode)) | ||
185 | return 0; | ||
186 | |||
187 | if (!count) | ||
188 | return 0; | ||
189 | |||
190 | if (!access_ok(VERIFY_WRITE, buf, count)) | ||
191 | return -EFAULT; | ||
192 | |||
193 | /* don't read past the lvb */ | ||
194 | if ((count + *ppos) > i_size_read(inode)) | ||
195 | readlen = i_size_read(inode) - *ppos; | ||
196 | else | ||
197 | readlen = count - *ppos; | ||
198 | |||
199 | lvb_buf = kmalloc(readlen, GFP_KERNEL); | ||
200 | if (!lvb_buf) | ||
201 | return -ENOMEM; | ||
202 | |||
203 | user_dlm_read_lvb(inode, lvb_buf, readlen); | ||
204 | bytes_left = __copy_to_user(buf, lvb_buf, readlen); | ||
205 | readlen -= bytes_left; | ||
206 | |||
207 | kfree(lvb_buf); | ||
208 | |||
209 | *ppos = *ppos + readlen; | ||
210 | |||
211 | mlog(0, "read %zd bytes\n", readlen); | ||
212 | return readlen; | ||
213 | } | ||
214 | |||
215 | static ssize_t dlmfs_file_write(struct file *filp, | ||
216 | const char __user *buf, | ||
217 | size_t count, | ||
218 | loff_t *ppos) | ||
219 | { | ||
220 | int bytes_left; | ||
221 | ssize_t writelen; | ||
222 | char *lvb_buf; | ||
223 | struct inode *inode = filp->f_dentry->d_inode; | ||
224 | |||
225 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", | ||
226 | inode->i_ino, count, *ppos); | ||
227 | |||
228 | if (*ppos >= i_size_read(inode)) | ||
229 | return -ENOSPC; | ||
230 | |||
231 | if (!count) | ||
232 | return 0; | ||
233 | |||
234 | if (!access_ok(VERIFY_READ, buf, count)) | ||
235 | return -EFAULT; | ||
236 | |||
237 | /* don't write past the lvb */ | ||
238 | if ((count + *ppos) > i_size_read(inode)) | ||
239 | writelen = i_size_read(inode) - *ppos; | ||
240 | else | ||
241 | writelen = count - *ppos; | ||
242 | |||
243 | lvb_buf = kmalloc(writelen, GFP_KERNEL); | ||
244 | if (!lvb_buf) | ||
245 | return -ENOMEM; | ||
246 | |||
247 | bytes_left = copy_from_user(lvb_buf, buf, writelen); | ||
248 | writelen -= bytes_left; | ||
249 | if (writelen) | ||
250 | user_dlm_write_lvb(inode, lvb_buf, writelen); | ||
251 | |||
252 | kfree(lvb_buf); | ||
253 | |||
254 | *ppos = *ppos + writelen; | ||
255 | mlog(0, "wrote %zd bytes\n", writelen); | ||
256 | return writelen; | ||
257 | } | ||
258 | |||
259 | static void dlmfs_init_once(void *foo, | ||
260 | kmem_cache_t *cachep, | ||
261 | unsigned long flags) | ||
262 | { | ||
263 | struct dlmfs_inode_private *ip = | ||
264 | (struct dlmfs_inode_private *) foo; | ||
265 | |||
266 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
267 | SLAB_CTOR_CONSTRUCTOR) { | ||
268 | ip->ip_dlm = NULL; | ||
269 | ip->ip_parent = NULL; | ||
270 | |||
271 | inode_init_once(&ip->ip_vfs_inode); | ||
272 | } | ||
273 | } | ||
274 | |||
275 | static struct inode *dlmfs_alloc_inode(struct super_block *sb) | ||
276 | { | ||
277 | struct dlmfs_inode_private *ip; | ||
278 | |||
279 | ip = kmem_cache_alloc(dlmfs_inode_cache, SLAB_NOFS); | ||
280 | if (!ip) | ||
281 | return NULL; | ||
282 | |||
283 | return &ip->ip_vfs_inode; | ||
284 | } | ||
285 | |||
286 | static void dlmfs_destroy_inode(struct inode *inode) | ||
287 | { | ||
288 | kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); | ||
289 | } | ||
290 | |||
291 | static void dlmfs_clear_inode(struct inode *inode) | ||
292 | { | ||
293 | int status; | ||
294 | struct dlmfs_inode_private *ip; | ||
295 | |||
296 | if (!inode) | ||
297 | return; | ||
298 | |||
299 | mlog(0, "inode %lu\n", inode->i_ino); | ||
300 | |||
301 | ip = DLMFS_I(inode); | ||
302 | |||
303 | if (S_ISREG(inode->i_mode)) { | ||
304 | status = user_dlm_destroy_lock(&ip->ip_lockres); | ||
305 | if (status < 0) | ||
306 | mlog_errno(status); | ||
307 | iput(ip->ip_parent); | ||
308 | goto clear_fields; | ||
309 | } | ||
310 | |||
311 | mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm); | ||
312 | /* we must be a directory. If required, lets unregister the | ||
313 | * dlm context now. */ | ||
314 | if (ip->ip_dlm) | ||
315 | user_dlm_unregister_context(ip->ip_dlm); | ||
316 | clear_fields: | ||
317 | ip->ip_parent = NULL; | ||
318 | ip->ip_dlm = NULL; | ||
319 | } | ||
320 | |||
321 | static struct backing_dev_info dlmfs_backing_dev_info = { | ||
322 | .ra_pages = 0, /* No readahead */ | ||
323 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, | ||
324 | }; | ||
325 | |||
326 | static struct inode *dlmfs_get_root_inode(struct super_block *sb) | ||
327 | { | ||
328 | struct inode *inode = new_inode(sb); | ||
329 | int mode = S_IFDIR | 0755; | ||
330 | struct dlmfs_inode_private *ip; | ||
331 | |||
332 | if (inode) { | ||
333 | ip = DLMFS_I(inode); | ||
334 | |||
335 | inode->i_mode = mode; | ||
336 | inode->i_uid = current->fsuid; | ||
337 | inode->i_gid = current->fsgid; | ||
338 | inode->i_blksize = PAGE_CACHE_SIZE; | ||
339 | inode->i_blocks = 0; | ||
340 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | ||
341 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
342 | inode->i_nlink++; | ||
343 | |||
344 | inode->i_fop = &simple_dir_operations; | ||
345 | inode->i_op = &dlmfs_root_inode_operations; | ||
346 | } | ||
347 | |||
348 | return inode; | ||
349 | } | ||
350 | |||
351 | static struct inode *dlmfs_get_inode(struct inode *parent, | ||
352 | struct dentry *dentry, | ||
353 | int mode) | ||
354 | { | ||
355 | struct super_block *sb = parent->i_sb; | ||
356 | struct inode * inode = new_inode(sb); | ||
357 | struct dlmfs_inode_private *ip; | ||
358 | |||
359 | if (!inode) | ||
360 | return NULL; | ||
361 | |||
362 | inode->i_mode = mode; | ||
363 | inode->i_uid = current->fsuid; | ||
364 | inode->i_gid = current->fsgid; | ||
365 | inode->i_blksize = PAGE_CACHE_SIZE; | ||
366 | inode->i_blocks = 0; | ||
367 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | ||
368 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
369 | |||
370 | ip = DLMFS_I(inode); | ||
371 | ip->ip_dlm = DLMFS_I(parent)->ip_dlm; | ||
372 | |||
373 | switch (mode & S_IFMT) { | ||
374 | default: | ||
375 | /* for now we don't support anything other than | ||
376 | * directories and regular files. */ | ||
377 | BUG(); | ||
378 | break; | ||
379 | case S_IFREG: | ||
380 | inode->i_op = &dlmfs_file_inode_operations; | ||
381 | inode->i_fop = &dlmfs_file_operations; | ||
382 | |||
383 | i_size_write(inode, DLM_LVB_LEN); | ||
384 | |||
385 | user_dlm_lock_res_init(&ip->ip_lockres, dentry); | ||
386 | |||
387 | /* released at clear_inode time, this insures that we | ||
388 | * get to drop the dlm reference on each lock *before* | ||
389 | * we call the unregister code for releasing parent | ||
390 | * directories. */ | ||
391 | ip->ip_parent = igrab(parent); | ||
392 | BUG_ON(!ip->ip_parent); | ||
393 | break; | ||
394 | case S_IFDIR: | ||
395 | inode->i_op = &dlmfs_dir_inode_operations; | ||
396 | inode->i_fop = &simple_dir_operations; | ||
397 | |||
398 | /* directory inodes start off with i_nlink == | ||
399 | * 2 (for "." entry) */ | ||
400 | inode->i_nlink++; | ||
401 | break; | ||
402 | } | ||
403 | |||
404 | if (parent->i_mode & S_ISGID) { | ||
405 | inode->i_gid = parent->i_gid; | ||
406 | if (S_ISDIR(mode)) | ||
407 | inode->i_mode |= S_ISGID; | ||
408 | } | ||
409 | |||
410 | return inode; | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * File creation. Allocate an inode, and we're done.. | ||
415 | */ | ||
416 | /* SMP-safe */ | ||
417 | static int dlmfs_mkdir(struct inode * dir, | ||
418 | struct dentry * dentry, | ||
419 | int mode) | ||
420 | { | ||
421 | int status; | ||
422 | struct inode *inode = NULL; | ||
423 | struct qstr *domain = &dentry->d_name; | ||
424 | struct dlmfs_inode_private *ip; | ||
425 | struct dlm_ctxt *dlm; | ||
426 | |||
427 | mlog(0, "mkdir %.*s\n", domain->len, domain->name); | ||
428 | |||
429 | /* verify that we have a proper domain */ | ||
430 | if (domain->len >= O2NM_MAX_NAME_LEN) { | ||
431 | status = -EINVAL; | ||
432 | mlog(ML_ERROR, "invalid domain name for directory.\n"); | ||
433 | goto bail; | ||
434 | } | ||
435 | |||
436 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); | ||
437 | if (!inode) { | ||
438 | status = -ENOMEM; | ||
439 | mlog_errno(status); | ||
440 | goto bail; | ||
441 | } | ||
442 | |||
443 | ip = DLMFS_I(inode); | ||
444 | |||
445 | dlm = user_dlm_register_context(domain); | ||
446 | if (IS_ERR(dlm)) { | ||
447 | status = PTR_ERR(dlm); | ||
448 | mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", | ||
449 | status, domain->len, domain->name); | ||
450 | goto bail; | ||
451 | } | ||
452 | ip->ip_dlm = dlm; | ||
453 | |||
454 | dir->i_nlink++; | ||
455 | d_instantiate(dentry, inode); | ||
456 | dget(dentry); /* Extra count - pin the dentry in core */ | ||
457 | |||
458 | status = 0; | ||
459 | bail: | ||
460 | if (status < 0) | ||
461 | iput(inode); | ||
462 | return status; | ||
463 | } | ||
464 | |||
465 | static int dlmfs_create(struct inode *dir, | ||
466 | struct dentry *dentry, | ||
467 | int mode, | ||
468 | struct nameidata *nd) | ||
469 | { | ||
470 | int status = 0; | ||
471 | struct inode *inode; | ||
472 | struct qstr *name = &dentry->d_name; | ||
473 | |||
474 | mlog(0, "create %.*s\n", name->len, name->name); | ||
475 | |||
476 | /* verify name is valid and doesn't contain any dlm reserved | ||
477 | * characters */ | ||
478 | if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || | ||
479 | name->name[0] == '$') { | ||
480 | status = -EINVAL; | ||
481 | mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, | ||
482 | name->name); | ||
483 | goto bail; | ||
484 | } | ||
485 | |||
486 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); | ||
487 | if (!inode) { | ||
488 | status = -ENOMEM; | ||
489 | mlog_errno(status); | ||
490 | goto bail; | ||
491 | } | ||
492 | |||
493 | d_instantiate(dentry, inode); | ||
494 | dget(dentry); /* Extra count - pin the dentry in core */ | ||
495 | bail: | ||
496 | return status; | ||
497 | } | ||
498 | |||
499 | static int dlmfs_unlink(struct inode *dir, | ||
500 | struct dentry *dentry) | ||
501 | { | ||
502 | int status; | ||
503 | struct inode *inode = dentry->d_inode; | ||
504 | |||
505 | mlog(0, "unlink inode %lu\n", inode->i_ino); | ||
506 | |||
507 | /* if there are no current holders, or none that are waiting | ||
508 | * to acquire a lock, this basically destroys our lockres. */ | ||
509 | status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); | ||
510 | if (status < 0) { | ||
511 | mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n", | ||
512 | dentry->d_name.len, dentry->d_name.name, status); | ||
513 | goto bail; | ||
514 | } | ||
515 | status = simple_unlink(dir, dentry); | ||
516 | bail: | ||
517 | return status; | ||
518 | } | ||
519 | |||
520 | static int dlmfs_fill_super(struct super_block * sb, | ||
521 | void * data, | ||
522 | int silent) | ||
523 | { | ||
524 | struct inode * inode; | ||
525 | struct dentry * root; | ||
526 | |||
527 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
528 | sb->s_blocksize = PAGE_CACHE_SIZE; | ||
529 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | ||
530 | sb->s_magic = DLMFS_MAGIC; | ||
531 | sb->s_op = &dlmfs_ops; | ||
532 | inode = dlmfs_get_root_inode(sb); | ||
533 | if (!inode) | ||
534 | return -ENOMEM; | ||
535 | |||
536 | root = d_alloc_root(inode); | ||
537 | if (!root) { | ||
538 | iput(inode); | ||
539 | return -ENOMEM; | ||
540 | } | ||
541 | sb->s_root = root; | ||
542 | return 0; | ||
543 | } | ||
544 | |||
545 | static struct file_operations dlmfs_file_operations = { | ||
546 | .open = dlmfs_file_open, | ||
547 | .release = dlmfs_file_release, | ||
548 | .read = dlmfs_file_read, | ||
549 | .write = dlmfs_file_write, | ||
550 | }; | ||
551 | |||
552 | static struct inode_operations dlmfs_dir_inode_operations = { | ||
553 | .create = dlmfs_create, | ||
554 | .lookup = simple_lookup, | ||
555 | .unlink = dlmfs_unlink, | ||
556 | }; | ||
557 | |||
558 | /* this way we can restrict mkdir to only the toplevel of the fs. */ | ||
559 | static struct inode_operations dlmfs_root_inode_operations = { | ||
560 | .lookup = simple_lookup, | ||
561 | .mkdir = dlmfs_mkdir, | ||
562 | .rmdir = simple_rmdir, | ||
563 | }; | ||
564 | |||
565 | static struct super_operations dlmfs_ops = { | ||
566 | .statfs = simple_statfs, | ||
567 | .alloc_inode = dlmfs_alloc_inode, | ||
568 | .destroy_inode = dlmfs_destroy_inode, | ||
569 | .clear_inode = dlmfs_clear_inode, | ||
570 | .drop_inode = generic_delete_inode, | ||
571 | }; | ||
572 | |||
573 | static struct inode_operations dlmfs_file_inode_operations = { | ||
574 | .getattr = simple_getattr, | ||
575 | }; | ||
576 | |||
577 | static struct super_block *dlmfs_get_sb(struct file_system_type *fs_type, | ||
578 | int flags, const char *dev_name, void *data) | ||
579 | { | ||
580 | return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super); | ||
581 | } | ||
582 | |||
583 | static struct file_system_type dlmfs_fs_type = { | ||
584 | .owner = THIS_MODULE, | ||
585 | .name = "ocfs2_dlmfs", | ||
586 | .get_sb = dlmfs_get_sb, | ||
587 | .kill_sb = kill_litter_super, | ||
588 | }; | ||
589 | |||
590 | static int __init init_dlmfs_fs(void) | ||
591 | { | ||
592 | int status; | ||
593 | int cleanup_inode = 0, cleanup_worker = 0; | ||
594 | |||
595 | dlmfs_print_version(); | ||
596 | |||
597 | dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", | ||
598 | sizeof(struct dlmfs_inode_private), | ||
599 | 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, | ||
600 | dlmfs_init_once, NULL); | ||
601 | if (!dlmfs_inode_cache) | ||
602 | return -ENOMEM; | ||
603 | cleanup_inode = 1; | ||
604 | |||
605 | user_dlm_worker = create_singlethread_workqueue("user_dlm"); | ||
606 | if (!user_dlm_worker) { | ||
607 | status = -ENOMEM; | ||
608 | goto bail; | ||
609 | } | ||
610 | cleanup_worker = 1; | ||
611 | |||
612 | status = register_filesystem(&dlmfs_fs_type); | ||
613 | bail: | ||
614 | if (status) { | ||
615 | if (cleanup_inode) | ||
616 | kmem_cache_destroy(dlmfs_inode_cache); | ||
617 | if (cleanup_worker) | ||
618 | destroy_workqueue(user_dlm_worker); | ||
619 | } else | ||
620 | printk("OCFS2 User DLM kernel interface loaded\n"); | ||
621 | return status; | ||
622 | } | ||
623 | |||
624 | static void __exit exit_dlmfs_fs(void) | ||
625 | { | ||
626 | unregister_filesystem(&dlmfs_fs_type); | ||
627 | |||
628 | flush_workqueue(user_dlm_worker); | ||
629 | destroy_workqueue(user_dlm_worker); | ||
630 | |||
631 | if (kmem_cache_destroy(dlmfs_inode_cache)) | ||
632 | printk(KERN_INFO "dlmfs_inode_cache: not all structures " | ||
633 | "were freed\n"); | ||
634 | } | ||
635 | |||
636 | MODULE_AUTHOR("Oracle"); | ||
637 | MODULE_LICENSE("GPL"); | ||
638 | |||
639 | module_init(init_dlmfs_fs) | ||
640 | module_exit(exit_dlmfs_fs) | ||
diff --git a/fs/ocfs2/dlm/dlmfsver.c b/fs/ocfs2/dlm/dlmfsver.c new file mode 100644 index 000000000000..d2be3ad841f9 --- /dev/null +++ b/fs/ocfs2/dlm/dlmfsver.c | |||
@@ -0,0 +1,42 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmfsver.c | ||
5 | * | ||
6 | * version string | ||
7 | * | ||
8 | * Copyright (C) 2002, 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/kernel.h> | ||
28 | |||
29 | #include "dlmfsver.h" | ||
30 | |||
31 | #define DLM_BUILD_VERSION "1.3.3" | ||
32 | |||
33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION | ||
34 | |||
35 | void dlmfs_print_version(void) | ||
36 | { | ||
37 | printk(KERN_INFO "%s\n", VERSION_STR); | ||
38 | } | ||
39 | |||
40 | MODULE_DESCRIPTION(VERSION_STR); | ||
41 | |||
42 | MODULE_VERSION(DLM_BUILD_VERSION); | ||
diff --git a/fs/ocfs2/dlm/dlmfsver.h b/fs/ocfs2/dlm/dlmfsver.h new file mode 100644 index 000000000000..f35eadbed25c --- /dev/null +++ b/fs/ocfs2/dlm/dlmfsver.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmver.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef DLMFS_VER_H | ||
27 | #define DLMFS_VER_H | ||
28 | |||
29 | void dlmfs_print_version(void); | ||
30 | |||
31 | #endif /* DLMFS_VER_H */ | ||
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c new file mode 100644 index 000000000000..e1fdd288796e --- /dev/null +++ b/fs/ocfs2/dlm/userdlm.c | |||
@@ -0,0 +1,658 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * userdlm.c | ||
5 | * | ||
6 | * Code which implements the kernel side of a minimal userspace | ||
7 | * interface to our DLM. | ||
8 | * | ||
9 | * Many of the functions here are pared down versions of dlmglue.c | ||
10 | * functions. | ||
11 | * | ||
12 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public | ||
16 | * License as published by the Free Software Foundation; either | ||
17 | * version 2 of the License, or (at your option) any later version. | ||
18 | * | ||
19 | * This program is distributed in the hope that it will be useful, | ||
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
22 | * General Public License for more details. | ||
23 | * | ||
24 | * You should have received a copy of the GNU General Public | ||
25 | * License along with this program; if not, write to the | ||
26 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
27 | * Boston, MA 021110-1307, USA. | ||
28 | */ | ||
29 | |||
30 | #include <asm/signal.h> | ||
31 | |||
32 | #include <linux/module.h> | ||
33 | #include <linux/fs.h> | ||
34 | #include <linux/types.h> | ||
35 | #include <linux/crc32.h> | ||
36 | |||
37 | |||
38 | #include "cluster/nodemanager.h" | ||
39 | #include "cluster/heartbeat.h" | ||
40 | #include "cluster/tcp.h" | ||
41 | |||
42 | #include "dlmapi.h" | ||
43 | |||
44 | #include "userdlm.h" | ||
45 | |||
46 | #define MLOG_MASK_PREFIX ML_DLMFS | ||
47 | #include "cluster/masklog.h" | ||
48 | |||
49 | static inline int user_check_wait_flag(struct user_lock_res *lockres, | ||
50 | int flag) | ||
51 | { | ||
52 | int ret; | ||
53 | |||
54 | spin_lock(&lockres->l_lock); | ||
55 | ret = lockres->l_flags & flag; | ||
56 | spin_unlock(&lockres->l_lock); | ||
57 | |||
58 | return ret; | ||
59 | } | ||
60 | |||
61 | static inline void user_wait_on_busy_lock(struct user_lock_res *lockres) | ||
62 | |||
63 | { | ||
64 | wait_event(lockres->l_event, | ||
65 | !user_check_wait_flag(lockres, USER_LOCK_BUSY)); | ||
66 | } | ||
67 | |||
68 | static inline void user_wait_on_blocked_lock(struct user_lock_res *lockres) | ||
69 | |||
70 | { | ||
71 | wait_event(lockres->l_event, | ||
72 | !user_check_wait_flag(lockres, USER_LOCK_BLOCKED)); | ||
73 | } | ||
74 | |||
75 | /* I heart container_of... */ | ||
76 | static inline struct dlm_ctxt * | ||
77 | dlm_ctxt_from_user_lockres(struct user_lock_res *lockres) | ||
78 | { | ||
79 | struct dlmfs_inode_private *ip; | ||
80 | |||
81 | ip = container_of(lockres, | ||
82 | struct dlmfs_inode_private, | ||
83 | ip_lockres); | ||
84 | return ip->ip_dlm; | ||
85 | } | ||
86 | |||
87 | static struct inode * | ||
88 | user_dlm_inode_from_user_lockres(struct user_lock_res *lockres) | ||
89 | { | ||
90 | struct dlmfs_inode_private *ip; | ||
91 | |||
92 | ip = container_of(lockres, | ||
93 | struct dlmfs_inode_private, | ||
94 | ip_lockres); | ||
95 | return &ip->ip_vfs_inode; | ||
96 | } | ||
97 | |||
98 | static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) | ||
99 | { | ||
100 | spin_lock(&lockres->l_lock); | ||
101 | lockres->l_flags &= ~USER_LOCK_BUSY; | ||
102 | spin_unlock(&lockres->l_lock); | ||
103 | } | ||
104 | |||
105 | #define user_log_dlm_error(_func, _stat, _lockres) do { \ | ||
106 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ | ||
107 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | ||
108 | _lockres->l_name, dlm_errmsg(_stat)); \ | ||
109 | } while (0) | ||
110 | |||
111 | /* WARNING: This function lives in a world where the only three lock | ||
112 | * levels are EX, PR, and NL. It *will* have to be adjusted when more | ||
113 | * lock types are added. */ | ||
114 | static inline int user_highest_compat_lock_level(int level) | ||
115 | { | ||
116 | int new_level = LKM_EXMODE; | ||
117 | |||
118 | if (level == LKM_EXMODE) | ||
119 | new_level = LKM_NLMODE; | ||
120 | else if (level == LKM_PRMODE) | ||
121 | new_level = LKM_PRMODE; | ||
122 | return new_level; | ||
123 | } | ||
124 | |||
125 | static void user_ast(void *opaque) | ||
126 | { | ||
127 | struct user_lock_res *lockres = opaque; | ||
128 | struct dlm_lockstatus *lksb; | ||
129 | |||
130 | mlog(0, "AST fired for lockres %s\n", lockres->l_name); | ||
131 | |||
132 | spin_lock(&lockres->l_lock); | ||
133 | |||
134 | lksb = &(lockres->l_lksb); | ||
135 | if (lksb->status != DLM_NORMAL) { | ||
136 | mlog(ML_ERROR, "lksb status value of %u on lockres %s\n", | ||
137 | lksb->status, lockres->l_name); | ||
138 | spin_unlock(&lockres->l_lock); | ||
139 | return; | ||
140 | } | ||
141 | |||
142 | /* we're downconverting. */ | ||
143 | if (lockres->l_requested < lockres->l_level) { | ||
144 | if (lockres->l_requested <= | ||
145 | user_highest_compat_lock_level(lockres->l_blocking)) { | ||
146 | lockres->l_blocking = LKM_NLMODE; | ||
147 | lockres->l_flags &= ~USER_LOCK_BLOCKED; | ||
148 | } | ||
149 | } | ||
150 | |||
151 | lockres->l_level = lockres->l_requested; | ||
152 | lockres->l_requested = LKM_IVMODE; | ||
153 | lockres->l_flags |= USER_LOCK_ATTACHED; | ||
154 | lockres->l_flags &= ~USER_LOCK_BUSY; | ||
155 | |||
156 | spin_unlock(&lockres->l_lock); | ||
157 | |||
158 | wake_up(&lockres->l_event); | ||
159 | } | ||
160 | |||
161 | static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres) | ||
162 | { | ||
163 | struct inode *inode; | ||
164 | inode = user_dlm_inode_from_user_lockres(lockres); | ||
165 | if (!igrab(inode)) | ||
166 | BUG(); | ||
167 | } | ||
168 | |||
169 | static void user_dlm_unblock_lock(void *opaque); | ||
170 | |||
171 | static void __user_dlm_queue_lockres(struct user_lock_res *lockres) | ||
172 | { | ||
173 | if (!(lockres->l_flags & USER_LOCK_QUEUED)) { | ||
174 | user_dlm_grab_inode_ref(lockres); | ||
175 | |||
176 | INIT_WORK(&lockres->l_work, user_dlm_unblock_lock, | ||
177 | lockres); | ||
178 | |||
179 | queue_work(user_dlm_worker, &lockres->l_work); | ||
180 | lockres->l_flags |= USER_LOCK_QUEUED; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres) | ||
185 | { | ||
186 | int queue = 0; | ||
187 | |||
188 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) | ||
189 | return; | ||
190 | |||
191 | switch (lockres->l_blocking) { | ||
192 | case LKM_EXMODE: | ||
193 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | ||
194 | queue = 1; | ||
195 | break; | ||
196 | case LKM_PRMODE: | ||
197 | if (!lockres->l_ex_holders) | ||
198 | queue = 1; | ||
199 | break; | ||
200 | default: | ||
201 | BUG(); | ||
202 | } | ||
203 | |||
204 | if (queue) | ||
205 | __user_dlm_queue_lockres(lockres); | ||
206 | } | ||
207 | |||
208 | static void user_bast(void *opaque, int level) | ||
209 | { | ||
210 | struct user_lock_res *lockres = opaque; | ||
211 | |||
212 | mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n", | ||
213 | lockres->l_name, level); | ||
214 | |||
215 | spin_lock(&lockres->l_lock); | ||
216 | lockres->l_flags |= USER_LOCK_BLOCKED; | ||
217 | if (level > lockres->l_blocking) | ||
218 | lockres->l_blocking = level; | ||
219 | |||
220 | __user_dlm_queue_lockres(lockres); | ||
221 | spin_unlock(&lockres->l_lock); | ||
222 | |||
223 | wake_up(&lockres->l_event); | ||
224 | } | ||
225 | |||
226 | static void user_unlock_ast(void *opaque, enum dlm_status status) | ||
227 | { | ||
228 | struct user_lock_res *lockres = opaque; | ||
229 | |||
230 | mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); | ||
231 | |||
232 | if (status != DLM_NORMAL) | ||
233 | mlog(ML_ERROR, "Dlm returns status %d\n", status); | ||
234 | |||
235 | spin_lock(&lockres->l_lock); | ||
236 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) | ||
237 | lockres->l_level = LKM_IVMODE; | ||
238 | else { | ||
239 | lockres->l_requested = LKM_IVMODE; /* cancel an | ||
240 | * upconvert | ||
241 | * request. */ | ||
242 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | ||
243 | /* we want the unblock thread to look at it again | ||
244 | * now. */ | ||
245 | __user_dlm_queue_lockres(lockres); | ||
246 | } | ||
247 | |||
248 | lockres->l_flags &= ~USER_LOCK_BUSY; | ||
249 | spin_unlock(&lockres->l_lock); | ||
250 | |||
251 | wake_up(&lockres->l_event); | ||
252 | } | ||
253 | |||
254 | static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres) | ||
255 | { | ||
256 | struct inode *inode; | ||
257 | inode = user_dlm_inode_from_user_lockres(lockres); | ||
258 | iput(inode); | ||
259 | } | ||
260 | |||
261 | static void user_dlm_unblock_lock(void *opaque) | ||
262 | { | ||
263 | int new_level, status; | ||
264 | struct user_lock_res *lockres = (struct user_lock_res *) opaque; | ||
265 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | ||
266 | |||
267 | mlog(0, "processing lockres %s\n", lockres->l_name); | ||
268 | |||
269 | spin_lock(&lockres->l_lock); | ||
270 | |||
271 | BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); | ||
272 | BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED)); | ||
273 | |||
274 | /* notice that we don't clear USER_LOCK_BLOCKED here. That's | ||
275 | * for user_ast to do. */ | ||
276 | lockres->l_flags &= ~USER_LOCK_QUEUED; | ||
277 | |||
278 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | ||
279 | mlog(0, "lock is in teardown so we do nothing\n"); | ||
280 | spin_unlock(&lockres->l_lock); | ||
281 | goto drop_ref; | ||
282 | } | ||
283 | |||
284 | if (lockres->l_flags & USER_LOCK_BUSY) { | ||
285 | mlog(0, "BUSY flag detected...\n"); | ||
286 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { | ||
287 | spin_unlock(&lockres->l_lock); | ||
288 | goto drop_ref; | ||
289 | } | ||
290 | |||
291 | lockres->l_flags |= USER_LOCK_IN_CANCEL; | ||
292 | spin_unlock(&lockres->l_lock); | ||
293 | |||
294 | status = dlmunlock(dlm, | ||
295 | &lockres->l_lksb, | ||
296 | LKM_CANCEL, | ||
297 | user_unlock_ast, | ||
298 | lockres); | ||
299 | if (status == DLM_CANCELGRANT) { | ||
300 | /* If we got this, then the ast was fired | ||
301 | * before we could cancel. We cleanup our | ||
302 | * state, and restart the function. */ | ||
303 | spin_lock(&lockres->l_lock); | ||
304 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | ||
305 | spin_unlock(&lockres->l_lock); | ||
306 | } else if (status != DLM_NORMAL) | ||
307 | user_log_dlm_error("dlmunlock", status, lockres); | ||
308 | goto drop_ref; | ||
309 | } | ||
310 | |||
311 | /* If there are still incompat holders, we can exit safely | ||
312 | * without worrying about re-queueing this lock as that will | ||
313 | * happen on the last call to user_cluster_unlock. */ | ||
314 | if ((lockres->l_blocking == LKM_EXMODE) | ||
315 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { | ||
316 | spin_unlock(&lockres->l_lock); | ||
317 | mlog(0, "can't downconvert for ex: ro = %u, ex = %u\n", | ||
318 | lockres->l_ro_holders, lockres->l_ex_holders); | ||
319 | goto drop_ref; | ||
320 | } | ||
321 | |||
322 | if ((lockres->l_blocking == LKM_PRMODE) | ||
323 | && lockres->l_ex_holders) { | ||
324 | spin_unlock(&lockres->l_lock); | ||
325 | mlog(0, "can't downconvert for pr: ex = %u\n", | ||
326 | lockres->l_ex_holders); | ||
327 | goto drop_ref; | ||
328 | } | ||
329 | |||
330 | /* yay, we can downconvert now. */ | ||
331 | new_level = user_highest_compat_lock_level(lockres->l_blocking); | ||
332 | lockres->l_requested = new_level; | ||
333 | lockres->l_flags |= USER_LOCK_BUSY; | ||
334 | mlog(0, "Downconvert lock from %d to %d\n", | ||
335 | lockres->l_level, new_level); | ||
336 | spin_unlock(&lockres->l_lock); | ||
337 | |||
338 | /* need lock downconvert request now... */ | ||
339 | status = dlmlock(dlm, | ||
340 | new_level, | ||
341 | &lockres->l_lksb, | ||
342 | LKM_CONVERT|LKM_VALBLK, | ||
343 | lockres->l_name, | ||
344 | user_ast, | ||
345 | lockres, | ||
346 | user_bast); | ||
347 | if (status != DLM_NORMAL) { | ||
348 | user_log_dlm_error("dlmlock", status, lockres); | ||
349 | user_recover_from_dlm_error(lockres); | ||
350 | } | ||
351 | |||
352 | drop_ref: | ||
353 | user_dlm_drop_inode_ref(lockres); | ||
354 | } | ||
355 | |||
356 | static inline void user_dlm_inc_holders(struct user_lock_res *lockres, | ||
357 | int level) | ||
358 | { | ||
359 | switch(level) { | ||
360 | case LKM_EXMODE: | ||
361 | lockres->l_ex_holders++; | ||
362 | break; | ||
363 | case LKM_PRMODE: | ||
364 | lockres->l_ro_holders++; | ||
365 | break; | ||
366 | default: | ||
367 | BUG(); | ||
368 | } | ||
369 | } | ||
370 | |||
371 | /* predict what lock level we'll be dropping down to on behalf | ||
372 | * of another node, and return true if the currently wanted | ||
373 | * level will be compatible with it. */ | ||
374 | static inline int | ||
375 | user_may_continue_on_blocked_lock(struct user_lock_res *lockres, | ||
376 | int wanted) | ||
377 | { | ||
378 | BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); | ||
379 | |||
380 | return wanted <= user_highest_compat_lock_level(lockres->l_blocking); | ||
381 | } | ||
382 | |||
383 | int user_dlm_cluster_lock(struct user_lock_res *lockres, | ||
384 | int level, | ||
385 | int lkm_flags) | ||
386 | { | ||
387 | int status, local_flags; | ||
388 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | ||
389 | |||
390 | if (level != LKM_EXMODE && | ||
391 | level != LKM_PRMODE) { | ||
392 | mlog(ML_ERROR, "lockres %s: invalid request!\n", | ||
393 | lockres->l_name); | ||
394 | status = -EINVAL; | ||
395 | goto bail; | ||
396 | } | ||
397 | |||
398 | mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n", | ||
399 | lockres->l_name, | ||
400 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", | ||
401 | lkm_flags); | ||
402 | |||
403 | again: | ||
404 | if (signal_pending(current)) { | ||
405 | status = -ERESTARTSYS; | ||
406 | goto bail; | ||
407 | } | ||
408 | |||
409 | spin_lock(&lockres->l_lock); | ||
410 | |||
411 | /* We only compare against the currently granted level | ||
412 | * here. If the lock is blocked waiting on a downconvert, | ||
413 | * we'll get caught below. */ | ||
414 | if ((lockres->l_flags & USER_LOCK_BUSY) && | ||
415 | (level > lockres->l_level)) { | ||
416 | /* is someone sitting in dlm_lock? If so, wait on | ||
417 | * them. */ | ||
418 | spin_unlock(&lockres->l_lock); | ||
419 | |||
420 | user_wait_on_busy_lock(lockres); | ||
421 | goto again; | ||
422 | } | ||
423 | |||
424 | if ((lockres->l_flags & USER_LOCK_BLOCKED) && | ||
425 | (!user_may_continue_on_blocked_lock(lockres, level))) { | ||
426 | /* is the lock is currently blocked on behalf of | ||
427 | * another node */ | ||
428 | spin_unlock(&lockres->l_lock); | ||
429 | |||
430 | user_wait_on_blocked_lock(lockres); | ||
431 | goto again; | ||
432 | } | ||
433 | |||
434 | if (level > lockres->l_level) { | ||
435 | local_flags = lkm_flags | LKM_VALBLK; | ||
436 | if (lockres->l_level != LKM_IVMODE) | ||
437 | local_flags |= LKM_CONVERT; | ||
438 | |||
439 | lockres->l_requested = level; | ||
440 | lockres->l_flags |= USER_LOCK_BUSY; | ||
441 | spin_unlock(&lockres->l_lock); | ||
442 | |||
443 | BUG_ON(level == LKM_IVMODE); | ||
444 | BUG_ON(level == LKM_NLMODE); | ||
445 | |||
446 | mlog(0, "lock %s, get lock from %d to level = %d\n", | ||
447 | lockres->l_name, lockres->l_level, level); | ||
448 | |||
449 | /* call dlm_lock to upgrade lock now */ | ||
450 | status = dlmlock(dlm, | ||
451 | level, | ||
452 | &lockres->l_lksb, | ||
453 | local_flags, | ||
454 | lockres->l_name, | ||
455 | user_ast, | ||
456 | lockres, | ||
457 | user_bast); | ||
458 | if (status != DLM_NORMAL) { | ||
459 | if ((lkm_flags & LKM_NOQUEUE) && | ||
460 | (status == DLM_NOTQUEUED)) | ||
461 | status = -EAGAIN; | ||
462 | else { | ||
463 | user_log_dlm_error("dlmlock", status, lockres); | ||
464 | status = -EINVAL; | ||
465 | } | ||
466 | user_recover_from_dlm_error(lockres); | ||
467 | goto bail; | ||
468 | } | ||
469 | |||
470 | mlog(0, "lock %s, successfull return from dlmlock\n", | ||
471 | lockres->l_name); | ||
472 | |||
473 | user_wait_on_busy_lock(lockres); | ||
474 | goto again; | ||
475 | } | ||
476 | |||
477 | user_dlm_inc_holders(lockres, level); | ||
478 | spin_unlock(&lockres->l_lock); | ||
479 | |||
480 | mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name, | ||
481 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE"); | ||
482 | |||
483 | status = 0; | ||
484 | bail: | ||
485 | return status; | ||
486 | } | ||
487 | |||
488 | static inline void user_dlm_dec_holders(struct user_lock_res *lockres, | ||
489 | int level) | ||
490 | { | ||
491 | switch(level) { | ||
492 | case LKM_EXMODE: | ||
493 | BUG_ON(!lockres->l_ex_holders); | ||
494 | lockres->l_ex_holders--; | ||
495 | break; | ||
496 | case LKM_PRMODE: | ||
497 | BUG_ON(!lockres->l_ro_holders); | ||
498 | lockres->l_ro_holders--; | ||
499 | break; | ||
500 | default: | ||
501 | BUG(); | ||
502 | } | ||
503 | } | ||
504 | |||
505 | void user_dlm_cluster_unlock(struct user_lock_res *lockres, | ||
506 | int level) | ||
507 | { | ||
508 | if (level != LKM_EXMODE && | ||
509 | level != LKM_PRMODE) { | ||
510 | mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name); | ||
511 | return; | ||
512 | } | ||
513 | |||
514 | mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name, | ||
515 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE"); | ||
516 | |||
517 | spin_lock(&lockres->l_lock); | ||
518 | user_dlm_dec_holders(lockres, level); | ||
519 | __user_dlm_cond_queue_lockres(lockres); | ||
520 | spin_unlock(&lockres->l_lock); | ||
521 | } | ||
522 | |||
523 | void user_dlm_write_lvb(struct inode *inode, | ||
524 | const char *val, | ||
525 | unsigned int len) | ||
526 | { | ||
527 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; | ||
528 | char *lvb = lockres->l_lksb.lvb; | ||
529 | |||
530 | BUG_ON(len > DLM_LVB_LEN); | ||
531 | |||
532 | spin_lock(&lockres->l_lock); | ||
533 | |||
534 | BUG_ON(lockres->l_level < LKM_EXMODE); | ||
535 | memcpy(lvb, val, len); | ||
536 | |||
537 | spin_unlock(&lockres->l_lock); | ||
538 | } | ||
539 | |||
540 | void user_dlm_read_lvb(struct inode *inode, | ||
541 | char *val, | ||
542 | unsigned int len) | ||
543 | { | ||
544 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; | ||
545 | char *lvb = lockres->l_lksb.lvb; | ||
546 | |||
547 | BUG_ON(len > DLM_LVB_LEN); | ||
548 | |||
549 | spin_lock(&lockres->l_lock); | ||
550 | |||
551 | BUG_ON(lockres->l_level < LKM_PRMODE); | ||
552 | memcpy(val, lvb, len); | ||
553 | |||
554 | spin_unlock(&lockres->l_lock); | ||
555 | } | ||
556 | |||
557 | void user_dlm_lock_res_init(struct user_lock_res *lockres, | ||
558 | struct dentry *dentry) | ||
559 | { | ||
560 | memset(lockres, 0, sizeof(*lockres)); | ||
561 | |||
562 | spin_lock_init(&lockres->l_lock); | ||
563 | init_waitqueue_head(&lockres->l_event); | ||
564 | lockres->l_level = LKM_IVMODE; | ||
565 | lockres->l_requested = LKM_IVMODE; | ||
566 | lockres->l_blocking = LKM_IVMODE; | ||
567 | |||
568 | /* should have been checked before getting here. */ | ||
569 | BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN); | ||
570 | |||
571 | memcpy(lockres->l_name, | ||
572 | dentry->d_name.name, | ||
573 | dentry->d_name.len); | ||
574 | } | ||
575 | |||
576 | int user_dlm_destroy_lock(struct user_lock_res *lockres) | ||
577 | { | ||
578 | int status = -EBUSY; | ||
579 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | ||
580 | |||
581 | mlog(0, "asked to destroy %s\n", lockres->l_name); | ||
582 | |||
583 | spin_lock(&lockres->l_lock); | ||
584 | while (lockres->l_flags & USER_LOCK_BUSY) { | ||
585 | spin_unlock(&lockres->l_lock); | ||
586 | |||
587 | mlog(0, "lock %s is busy\n", lockres->l_name); | ||
588 | |||
589 | user_wait_on_busy_lock(lockres); | ||
590 | |||
591 | spin_lock(&lockres->l_lock); | ||
592 | } | ||
593 | |||
594 | if (lockres->l_ro_holders || lockres->l_ex_holders) { | ||
595 | spin_unlock(&lockres->l_lock); | ||
596 | mlog(0, "lock %s has holders\n", lockres->l_name); | ||
597 | goto bail; | ||
598 | } | ||
599 | |||
600 | status = 0; | ||
601 | if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { | ||
602 | spin_unlock(&lockres->l_lock); | ||
603 | mlog(0, "lock %s is not attached\n", lockres->l_name); | ||
604 | goto bail; | ||
605 | } | ||
606 | |||
607 | lockres->l_flags &= ~USER_LOCK_ATTACHED; | ||
608 | lockres->l_flags |= USER_LOCK_BUSY; | ||
609 | lockres->l_flags |= USER_LOCK_IN_TEARDOWN; | ||
610 | spin_unlock(&lockres->l_lock); | ||
611 | |||
612 | mlog(0, "unlocking lockres %s\n", lockres->l_name); | ||
613 | status = dlmunlock(dlm, | ||
614 | &lockres->l_lksb, | ||
615 | LKM_VALBLK, | ||
616 | user_unlock_ast, | ||
617 | lockres); | ||
618 | if (status != DLM_NORMAL) { | ||
619 | user_log_dlm_error("dlmunlock", status, lockres); | ||
620 | status = -EINVAL; | ||
621 | goto bail; | ||
622 | } | ||
623 | |||
624 | user_wait_on_busy_lock(lockres); | ||
625 | |||
626 | status = 0; | ||
627 | bail: | ||
628 | return status; | ||
629 | } | ||
630 | |||
631 | struct dlm_ctxt *user_dlm_register_context(struct qstr *name) | ||
632 | { | ||
633 | struct dlm_ctxt *dlm; | ||
634 | u32 dlm_key; | ||
635 | char *domain; | ||
636 | |||
637 | domain = kmalloc(name->len + 1, GFP_KERNEL); | ||
638 | if (!domain) { | ||
639 | mlog_errno(-ENOMEM); | ||
640 | return ERR_PTR(-ENOMEM); | ||
641 | } | ||
642 | |||
643 | dlm_key = crc32_le(0, name->name, name->len); | ||
644 | |||
645 | snprintf(domain, name->len + 1, "%.*s", name->len, name->name); | ||
646 | |||
647 | dlm = dlm_register_domain(domain, dlm_key); | ||
648 | if (IS_ERR(dlm)) | ||
649 | mlog_errno(PTR_ERR(dlm)); | ||
650 | |||
651 | kfree(domain); | ||
652 | return dlm; | ||
653 | } | ||
654 | |||
655 | void user_dlm_unregister_context(struct dlm_ctxt *dlm) | ||
656 | { | ||
657 | dlm_unregister_domain(dlm); | ||
658 | } | ||
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h new file mode 100644 index 000000000000..04178bc40b76 --- /dev/null +++ b/fs/ocfs2/dlm/userdlm.h | |||
@@ -0,0 +1,111 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * userdlm.h | ||
5 | * | ||
6 | * Userspace dlm defines | ||
7 | * | ||
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | |||
27 | #ifndef USERDLM_H | ||
28 | #define USERDLM_H | ||
29 | |||
30 | #include <linux/module.h> | ||
31 | #include <linux/fs.h> | ||
32 | #include <linux/types.h> | ||
33 | #include <linux/workqueue.h> | ||
34 | |||
35 | /* user_lock_res->l_flags flags. */ | ||
36 | #define USER_LOCK_ATTACHED (0x00000001) /* have we initialized | ||
37 | * the lvb */ | ||
38 | #define USER_LOCK_BUSY (0x00000002) /* we are currently in | ||
39 | * dlm_lock */ | ||
40 | #define USER_LOCK_BLOCKED (0x00000004) /* blocked waiting to | ||
41 | * downconvert*/ | ||
42 | #define USER_LOCK_IN_TEARDOWN (0x00000008) /* we're currently | ||
43 | * destroying this | ||
44 | * lock. */ | ||
45 | #define USER_LOCK_QUEUED (0x00000010) /* lock is on the | ||
46 | * workqueue */ | ||
47 | #define USER_LOCK_IN_CANCEL (0x00000020) | ||
48 | |||
49 | struct user_lock_res { | ||
50 | spinlock_t l_lock; | ||
51 | |||
52 | int l_flags; | ||
53 | |||
54 | #define USER_DLM_LOCK_ID_MAX_LEN 32 | ||
55 | char l_name[USER_DLM_LOCK_ID_MAX_LEN]; | ||
56 | int l_level; | ||
57 | unsigned int l_ro_holders; | ||
58 | unsigned int l_ex_holders; | ||
59 | struct dlm_lockstatus l_lksb; | ||
60 | |||
61 | int l_requested; | ||
62 | int l_blocking; | ||
63 | |||
64 | wait_queue_head_t l_event; | ||
65 | |||
66 | struct work_struct l_work; | ||
67 | }; | ||
68 | |||
69 | extern struct workqueue_struct *user_dlm_worker; | ||
70 | |||
71 | void user_dlm_lock_res_init(struct user_lock_res *lockres, | ||
72 | struct dentry *dentry); | ||
73 | int user_dlm_destroy_lock(struct user_lock_res *lockres); | ||
74 | int user_dlm_cluster_lock(struct user_lock_res *lockres, | ||
75 | int level, | ||
76 | int lkm_flags); | ||
77 | void user_dlm_cluster_unlock(struct user_lock_res *lockres, | ||
78 | int level); | ||
79 | void user_dlm_write_lvb(struct inode *inode, | ||
80 | const char *val, | ||
81 | unsigned int len); | ||
82 | void user_dlm_read_lvb(struct inode *inode, | ||
83 | char *val, | ||
84 | unsigned int len); | ||
85 | struct dlm_ctxt *user_dlm_register_context(struct qstr *name); | ||
86 | void user_dlm_unregister_context(struct dlm_ctxt *dlm); | ||
87 | |||
88 | struct dlmfs_inode_private { | ||
89 | struct dlm_ctxt *ip_dlm; | ||
90 | |||
91 | struct user_lock_res ip_lockres; /* unused for directories. */ | ||
92 | struct inode *ip_parent; | ||
93 | |||
94 | struct inode ip_vfs_inode; | ||
95 | }; | ||
96 | |||
97 | static inline struct dlmfs_inode_private * | ||
98 | DLMFS_I(struct inode *inode) | ||
99 | { | ||
100 | return container_of(inode, | ||
101 | struct dlmfs_inode_private, | ||
102 | ip_vfs_inode); | ||
103 | } | ||
104 | |||
105 | struct dlmfs_filp_private { | ||
106 | int fp_lock_level; | ||
107 | }; | ||
108 | |||
109 | #define DLMFS_MAGIC 0x76a9f425 | ||
110 | |||
111 | #endif /* USERDLM_H */ | ||