diff options
Diffstat (limited to 'fs/ocfs2/dlmfs')
-rw-r--r-- | fs/ocfs2/dlmfs/Makefile | 5 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/dlmfs.c | 710 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/dlmfsver.c | 42 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/dlmfsver.h | 31 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/userdlm.c | 676 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/userdlm.h | 113 |
6 files changed, 1577 insertions, 0 deletions
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile new file mode 100644 index 000000000000..df69b4856d0d --- /dev/null +++ b/fs/ocfs2/dlmfs/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | EXTRA_CFLAGS += -Ifs/ocfs2 | ||
2 | |||
3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o | ||
4 | |||
5 | ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o | ||
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c new file mode 100644 index 000000000000..e21ce0e5fc42 --- /dev/null +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -0,0 +1,710 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmfs.c | ||
5 | * | ||
6 | * Code which implements the kernel side of a minimal userspace | ||
7 | * interface to our DLM. This file handles the virtual file system | ||
8 | * used for communication with userspace. Credit should go to ramfs, | ||
9 | * which was a template for the fs side of this module. | ||
10 | * | ||
11 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public | ||
15 | * License as published by the Free Software Foundation; either | ||
16 | * version 2 of the License, or (at your option) any later version. | ||
17 | * | ||
18 | * This program is distributed in the hope that it will be useful, | ||
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
21 | * General Public License for more details. | ||
22 | * | ||
23 | * You should have received a copy of the GNU General Public | ||
24 | * License along with this program; if not, write to the | ||
25 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
26 | * Boston, MA 021110-1307, USA. | ||
27 | */ | ||
28 | |||
29 | /* Simple VFS hooks based on: */ | ||
30 | /* | ||
31 | * Resizable simple ram filesystem for Linux. | ||
32 | * | ||
33 | * Copyright (C) 2000 Linus Torvalds. | ||
34 | * 2000 Transmeta Corp. | ||
35 | */ | ||
36 | |||
37 | #include <linux/module.h> | ||
38 | #include <linux/fs.h> | ||
39 | #include <linux/pagemap.h> | ||
40 | #include <linux/types.h> | ||
41 | #include <linux/slab.h> | ||
42 | #include <linux/highmem.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <linux/string.h> | ||
45 | #include <linux/backing-dev.h> | ||
46 | #include <linux/poll.h> | ||
47 | |||
48 | #include <asm/uaccess.h> | ||
49 | |||
50 | |||
51 | #include "cluster/nodemanager.h" | ||
52 | #include "cluster/heartbeat.h" | ||
53 | #include "cluster/tcp.h" | ||
54 | |||
55 | #include "dlm/dlmapi.h" | ||
56 | |||
57 | #include "userdlm.h" | ||
58 | |||
59 | #include "dlmfsver.h" | ||
60 | |||
61 | #define MLOG_MASK_PREFIX ML_DLMFS | ||
62 | #include "cluster/masklog.h" | ||
63 | |||
64 | #include "ocfs2_lockingver.h" | ||
65 | |||
66 | static const struct super_operations dlmfs_ops; | ||
67 | static const struct file_operations dlmfs_file_operations; | ||
68 | static const struct inode_operations dlmfs_dir_inode_operations; | ||
69 | static const struct inode_operations dlmfs_root_inode_operations; | ||
70 | static const struct inode_operations dlmfs_file_inode_operations; | ||
71 | static struct kmem_cache *dlmfs_inode_cache; | ||
72 | |||
73 | struct workqueue_struct *user_dlm_worker; | ||
74 | |||
75 | /* | ||
76 | * This is the userdlmfs locking protocol version. | ||
77 | * | ||
78 | * See fs/ocfs2/dlmglue.c for more details on locking versions. | ||
79 | */ | ||
80 | static const struct dlm_protocol_version user_locking_protocol = { | ||
81 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
82 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
83 | }; | ||
84 | |||
85 | |||
86 | /* | ||
87 | * These are the ABI capabilities of dlmfs. | ||
88 | * | ||
89 | * Over time, dlmfs has added some features that were not part of the | ||
90 | * initial ABI. Unfortunately, some of these features are not detectable | ||
91 | * via standard usage. For example, Linux's default poll always returns | ||
92 | * POLLIN, so there is no way for a caller of poll(2) to know when dlmfs | ||
93 | * added poll support. Instead, we provide this list of new capabilities. | ||
94 | * | ||
95 | * Capabilities is a read-only attribute. We do it as a module parameter | ||
96 | * so we can discover it whether dlmfs is built in, loaded, or even not | ||
97 | * loaded. | ||
98 | * | ||
99 | * The ABI features are local to this machine's dlmfs mount. This is | ||
100 | * distinct from the locking protocol, which is concerned with inter-node | ||
101 | * interaction. | ||
102 | * | ||
103 | * Capabilities: | ||
104 | * - bast : POLLIN against the file descriptor of a held lock | ||
105 | * signifies a bast fired on the lock. | ||
106 | */ | ||
107 | #define DLMFS_CAPABILITIES "bast" | ||
108 | extern int param_set_dlmfs_capabilities(const char *val, | ||
109 | struct kernel_param *kp) | ||
110 | { | ||
111 | printk(KERN_ERR "%s: readonly parameter\n", kp->name); | ||
112 | return -EINVAL; | ||
113 | } | ||
114 | static int param_get_dlmfs_capabilities(char *buffer, | ||
115 | struct kernel_param *kp) | ||
116 | { | ||
117 | return strlcpy(buffer, DLMFS_CAPABILITIES, | ||
118 | strlen(DLMFS_CAPABILITIES) + 1); | ||
119 | } | ||
120 | module_param_call(capabilities, param_set_dlmfs_capabilities, | ||
121 | param_get_dlmfs_capabilities, NULL, 0444); | ||
122 | MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); | ||
123 | |||
124 | |||
125 | /* | ||
126 | * decodes a set of open flags into a valid lock level and a set of flags. | ||
127 | * returns < 0 if we have invalid flags | ||
128 | * flags which mean something to us: | ||
129 | * O_RDONLY -> PRMODE level | ||
130 | * O_WRONLY -> EXMODE level | ||
131 | * | ||
132 | * O_NONBLOCK -> LKM_NOQUEUE | ||
133 | */ | ||
134 | static int dlmfs_decode_open_flags(int open_flags, | ||
135 | int *level, | ||
136 | int *flags) | ||
137 | { | ||
138 | if (open_flags & (O_WRONLY|O_RDWR)) | ||
139 | *level = LKM_EXMODE; | ||
140 | else | ||
141 | *level = LKM_PRMODE; | ||
142 | |||
143 | *flags = 0; | ||
144 | if (open_flags & O_NONBLOCK) | ||
145 | *flags |= LKM_NOQUEUE; | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | static int dlmfs_file_open(struct inode *inode, | ||
151 | struct file *file) | ||
152 | { | ||
153 | int status, level, flags; | ||
154 | struct dlmfs_filp_private *fp = NULL; | ||
155 | struct dlmfs_inode_private *ip; | ||
156 | |||
157 | if (S_ISDIR(inode->i_mode)) | ||
158 | BUG(); | ||
159 | |||
160 | mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, | ||
161 | file->f_flags); | ||
162 | |||
163 | status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); | ||
164 | if (status < 0) | ||
165 | goto bail; | ||
166 | |||
167 | /* We don't want to honor O_APPEND at read/write time as it | ||
168 | * doesn't make sense for LVB writes. */ | ||
169 | file->f_flags &= ~O_APPEND; | ||
170 | |||
171 | fp = kmalloc(sizeof(*fp), GFP_NOFS); | ||
172 | if (!fp) { | ||
173 | status = -ENOMEM; | ||
174 | goto bail; | ||
175 | } | ||
176 | fp->fp_lock_level = level; | ||
177 | |||
178 | ip = DLMFS_I(inode); | ||
179 | |||
180 | status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); | ||
181 | if (status < 0) { | ||
182 | /* this is a strange error to return here but I want | ||
183 | * to be able userspace to be able to distinguish a | ||
184 | * valid lock request from one that simply couldn't be | ||
185 | * granted. */ | ||
186 | if (flags & LKM_NOQUEUE && status == -EAGAIN) | ||
187 | status = -ETXTBSY; | ||
188 | kfree(fp); | ||
189 | goto bail; | ||
190 | } | ||
191 | |||
192 | file->private_data = fp; | ||
193 | bail: | ||
194 | return status; | ||
195 | } | ||
196 | |||
197 | static int dlmfs_file_release(struct inode *inode, | ||
198 | struct file *file) | ||
199 | { | ||
200 | int level, status; | ||
201 | struct dlmfs_inode_private *ip = DLMFS_I(inode); | ||
202 | struct dlmfs_filp_private *fp = | ||
203 | (struct dlmfs_filp_private *) file->private_data; | ||
204 | |||
205 | if (S_ISDIR(inode->i_mode)) | ||
206 | BUG(); | ||
207 | |||
208 | mlog(0, "close called on inode %lu\n", inode->i_ino); | ||
209 | |||
210 | status = 0; | ||
211 | if (fp) { | ||
212 | level = fp->fp_lock_level; | ||
213 | if (level != LKM_IVMODE) | ||
214 | user_dlm_cluster_unlock(&ip->ip_lockres, level); | ||
215 | |||
216 | kfree(fp); | ||
217 | file->private_data = NULL; | ||
218 | } | ||
219 | |||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) | ||
224 | { | ||
225 | int event = 0; | ||
226 | struct inode *inode = file->f_path.dentry->d_inode; | ||
227 | struct dlmfs_inode_private *ip = DLMFS_I(inode); | ||
228 | |||
229 | poll_wait(file, &ip->ip_lockres.l_event, wait); | ||
230 | |||
231 | spin_lock(&ip->ip_lockres.l_lock); | ||
232 | if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED) | ||
233 | event = POLLIN | POLLRDNORM; | ||
234 | spin_unlock(&ip->ip_lockres.l_lock); | ||
235 | |||
236 | return event; | ||
237 | } | ||
238 | |||
239 | static ssize_t dlmfs_file_read(struct file *filp, | ||
240 | char __user *buf, | ||
241 | size_t count, | ||
242 | loff_t *ppos) | ||
243 | { | ||
244 | int bytes_left; | ||
245 | ssize_t readlen; | ||
246 | char *lvb_buf; | ||
247 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
248 | |||
249 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", | ||
250 | inode->i_ino, count, *ppos); | ||
251 | |||
252 | if (*ppos >= i_size_read(inode)) | ||
253 | return 0; | ||
254 | |||
255 | if (!count) | ||
256 | return 0; | ||
257 | |||
258 | if (!access_ok(VERIFY_WRITE, buf, count)) | ||
259 | return -EFAULT; | ||
260 | |||
261 | /* don't read past the lvb */ | ||
262 | if ((count + *ppos) > i_size_read(inode)) | ||
263 | readlen = i_size_read(inode) - *ppos; | ||
264 | else | ||
265 | readlen = count - *ppos; | ||
266 | |||
267 | lvb_buf = kmalloc(readlen, GFP_NOFS); | ||
268 | if (!lvb_buf) | ||
269 | return -ENOMEM; | ||
270 | |||
271 | user_dlm_read_lvb(inode, lvb_buf, readlen); | ||
272 | bytes_left = __copy_to_user(buf, lvb_buf, readlen); | ||
273 | readlen -= bytes_left; | ||
274 | |||
275 | kfree(lvb_buf); | ||
276 | |||
277 | *ppos = *ppos + readlen; | ||
278 | |||
279 | mlog(0, "read %zd bytes\n", readlen); | ||
280 | return readlen; | ||
281 | } | ||
282 | |||
283 | static ssize_t dlmfs_file_write(struct file *filp, | ||
284 | const char __user *buf, | ||
285 | size_t count, | ||
286 | loff_t *ppos) | ||
287 | { | ||
288 | int bytes_left; | ||
289 | ssize_t writelen; | ||
290 | char *lvb_buf; | ||
291 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
292 | |||
293 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", | ||
294 | inode->i_ino, count, *ppos); | ||
295 | |||
296 | if (*ppos >= i_size_read(inode)) | ||
297 | return -ENOSPC; | ||
298 | |||
299 | if (!count) | ||
300 | return 0; | ||
301 | |||
302 | if (!access_ok(VERIFY_READ, buf, count)) | ||
303 | return -EFAULT; | ||
304 | |||
305 | /* don't write past the lvb */ | ||
306 | if ((count + *ppos) > i_size_read(inode)) | ||
307 | writelen = i_size_read(inode) - *ppos; | ||
308 | else | ||
309 | writelen = count - *ppos; | ||
310 | |||
311 | lvb_buf = kmalloc(writelen, GFP_NOFS); | ||
312 | if (!lvb_buf) | ||
313 | return -ENOMEM; | ||
314 | |||
315 | bytes_left = copy_from_user(lvb_buf, buf, writelen); | ||
316 | writelen -= bytes_left; | ||
317 | if (writelen) | ||
318 | user_dlm_write_lvb(inode, lvb_buf, writelen); | ||
319 | |||
320 | kfree(lvb_buf); | ||
321 | |||
322 | *ppos = *ppos + writelen; | ||
323 | mlog(0, "wrote %zd bytes\n", writelen); | ||
324 | return writelen; | ||
325 | } | ||
326 | |||
327 | static void dlmfs_init_once(void *foo) | ||
328 | { | ||
329 | struct dlmfs_inode_private *ip = | ||
330 | (struct dlmfs_inode_private *) foo; | ||
331 | |||
332 | ip->ip_dlm = NULL; | ||
333 | ip->ip_parent = NULL; | ||
334 | |||
335 | inode_init_once(&ip->ip_vfs_inode); | ||
336 | } | ||
337 | |||
338 | static struct inode *dlmfs_alloc_inode(struct super_block *sb) | ||
339 | { | ||
340 | struct dlmfs_inode_private *ip; | ||
341 | |||
342 | ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS); | ||
343 | if (!ip) | ||
344 | return NULL; | ||
345 | |||
346 | return &ip->ip_vfs_inode; | ||
347 | } | ||
348 | |||
349 | static void dlmfs_destroy_inode(struct inode *inode) | ||
350 | { | ||
351 | kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); | ||
352 | } | ||
353 | |||
354 | static void dlmfs_clear_inode(struct inode *inode) | ||
355 | { | ||
356 | int status; | ||
357 | struct dlmfs_inode_private *ip; | ||
358 | |||
359 | if (!inode) | ||
360 | return; | ||
361 | |||
362 | mlog(0, "inode %lu\n", inode->i_ino); | ||
363 | |||
364 | ip = DLMFS_I(inode); | ||
365 | |||
366 | if (S_ISREG(inode->i_mode)) { | ||
367 | status = user_dlm_destroy_lock(&ip->ip_lockres); | ||
368 | if (status < 0) | ||
369 | mlog_errno(status); | ||
370 | iput(ip->ip_parent); | ||
371 | goto clear_fields; | ||
372 | } | ||
373 | |||
374 | mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm); | ||
375 | /* we must be a directory. If required, lets unregister the | ||
376 | * dlm context now. */ | ||
377 | if (ip->ip_dlm) | ||
378 | user_dlm_unregister_context(ip->ip_dlm); | ||
379 | clear_fields: | ||
380 | ip->ip_parent = NULL; | ||
381 | ip->ip_dlm = NULL; | ||
382 | } | ||
383 | |||
384 | static struct backing_dev_info dlmfs_backing_dev_info = { | ||
385 | .name = "ocfs2-dlmfs", | ||
386 | .ra_pages = 0, /* No readahead */ | ||
387 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | ||
388 | }; | ||
389 | |||
390 | static struct inode *dlmfs_get_root_inode(struct super_block *sb) | ||
391 | { | ||
392 | struct inode *inode = new_inode(sb); | ||
393 | int mode = S_IFDIR | 0755; | ||
394 | struct dlmfs_inode_private *ip; | ||
395 | |||
396 | if (inode) { | ||
397 | ip = DLMFS_I(inode); | ||
398 | |||
399 | inode->i_mode = mode; | ||
400 | inode->i_uid = current_fsuid(); | ||
401 | inode->i_gid = current_fsgid(); | ||
402 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | ||
403 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
404 | inc_nlink(inode); | ||
405 | |||
406 | inode->i_fop = &simple_dir_operations; | ||
407 | inode->i_op = &dlmfs_root_inode_operations; | ||
408 | } | ||
409 | |||
410 | return inode; | ||
411 | } | ||
412 | |||
413 | static struct inode *dlmfs_get_inode(struct inode *parent, | ||
414 | struct dentry *dentry, | ||
415 | int mode) | ||
416 | { | ||
417 | struct super_block *sb = parent->i_sb; | ||
418 | struct inode * inode = new_inode(sb); | ||
419 | struct dlmfs_inode_private *ip; | ||
420 | |||
421 | if (!inode) | ||
422 | return NULL; | ||
423 | |||
424 | inode->i_mode = mode; | ||
425 | inode->i_uid = current_fsuid(); | ||
426 | inode->i_gid = current_fsgid(); | ||
427 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | ||
428 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
429 | |||
430 | ip = DLMFS_I(inode); | ||
431 | ip->ip_dlm = DLMFS_I(parent)->ip_dlm; | ||
432 | |||
433 | switch (mode & S_IFMT) { | ||
434 | default: | ||
435 | /* for now we don't support anything other than | ||
436 | * directories and regular files. */ | ||
437 | BUG(); | ||
438 | break; | ||
439 | case S_IFREG: | ||
440 | inode->i_op = &dlmfs_file_inode_operations; | ||
441 | inode->i_fop = &dlmfs_file_operations; | ||
442 | |||
443 | i_size_write(inode, DLM_LVB_LEN); | ||
444 | |||
445 | user_dlm_lock_res_init(&ip->ip_lockres, dentry); | ||
446 | |||
447 | /* released at clear_inode time, this insures that we | ||
448 | * get to drop the dlm reference on each lock *before* | ||
449 | * we call the unregister code for releasing parent | ||
450 | * directories. */ | ||
451 | ip->ip_parent = igrab(parent); | ||
452 | BUG_ON(!ip->ip_parent); | ||
453 | break; | ||
454 | case S_IFDIR: | ||
455 | inode->i_op = &dlmfs_dir_inode_operations; | ||
456 | inode->i_fop = &simple_dir_operations; | ||
457 | |||
458 | /* directory inodes start off with i_nlink == | ||
459 | * 2 (for "." entry) */ | ||
460 | inc_nlink(inode); | ||
461 | break; | ||
462 | } | ||
463 | |||
464 | if (parent->i_mode & S_ISGID) { | ||
465 | inode->i_gid = parent->i_gid; | ||
466 | if (S_ISDIR(mode)) | ||
467 | inode->i_mode |= S_ISGID; | ||
468 | } | ||
469 | |||
470 | return inode; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * File creation. Allocate an inode, and we're done.. | ||
475 | */ | ||
476 | /* SMP-safe */ | ||
477 | static int dlmfs_mkdir(struct inode * dir, | ||
478 | struct dentry * dentry, | ||
479 | int mode) | ||
480 | { | ||
481 | int status; | ||
482 | struct inode *inode = NULL; | ||
483 | struct qstr *domain = &dentry->d_name; | ||
484 | struct dlmfs_inode_private *ip; | ||
485 | struct dlm_ctxt *dlm; | ||
486 | struct dlm_protocol_version proto = user_locking_protocol; | ||
487 | |||
488 | mlog(0, "mkdir %.*s\n", domain->len, domain->name); | ||
489 | |||
490 | /* verify that we have a proper domain */ | ||
491 | if (domain->len >= O2NM_MAX_NAME_LEN) { | ||
492 | status = -EINVAL; | ||
493 | mlog(ML_ERROR, "invalid domain name for directory.\n"); | ||
494 | goto bail; | ||
495 | } | ||
496 | |||
497 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); | ||
498 | if (!inode) { | ||
499 | status = -ENOMEM; | ||
500 | mlog_errno(status); | ||
501 | goto bail; | ||
502 | } | ||
503 | |||
504 | ip = DLMFS_I(inode); | ||
505 | |||
506 | dlm = user_dlm_register_context(domain, &proto); | ||
507 | if (IS_ERR(dlm)) { | ||
508 | status = PTR_ERR(dlm); | ||
509 | mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", | ||
510 | status, domain->len, domain->name); | ||
511 | goto bail; | ||
512 | } | ||
513 | ip->ip_dlm = dlm; | ||
514 | |||
515 | inc_nlink(dir); | ||
516 | d_instantiate(dentry, inode); | ||
517 | dget(dentry); /* Extra count - pin the dentry in core */ | ||
518 | |||
519 | status = 0; | ||
520 | bail: | ||
521 | if (status < 0) | ||
522 | iput(inode); | ||
523 | return status; | ||
524 | } | ||
525 | |||
526 | static int dlmfs_create(struct inode *dir, | ||
527 | struct dentry *dentry, | ||
528 | int mode, | ||
529 | struct nameidata *nd) | ||
530 | { | ||
531 | int status = 0; | ||
532 | struct inode *inode; | ||
533 | struct qstr *name = &dentry->d_name; | ||
534 | |||
535 | mlog(0, "create %.*s\n", name->len, name->name); | ||
536 | |||
537 | /* verify name is valid and doesn't contain any dlm reserved | ||
538 | * characters */ | ||
539 | if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || | ||
540 | name->name[0] == '$') { | ||
541 | status = -EINVAL; | ||
542 | mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, | ||
543 | name->name); | ||
544 | goto bail; | ||
545 | } | ||
546 | |||
547 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); | ||
548 | if (!inode) { | ||
549 | status = -ENOMEM; | ||
550 | mlog_errno(status); | ||
551 | goto bail; | ||
552 | } | ||
553 | |||
554 | d_instantiate(dentry, inode); | ||
555 | dget(dentry); /* Extra count - pin the dentry in core */ | ||
556 | bail: | ||
557 | return status; | ||
558 | } | ||
559 | |||
560 | static int dlmfs_unlink(struct inode *dir, | ||
561 | struct dentry *dentry) | ||
562 | { | ||
563 | int status; | ||
564 | struct inode *inode = dentry->d_inode; | ||
565 | |||
566 | mlog(0, "unlink inode %lu\n", inode->i_ino); | ||
567 | |||
568 | /* if there are no current holders, or none that are waiting | ||
569 | * to acquire a lock, this basically destroys our lockres. */ | ||
570 | status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); | ||
571 | if (status < 0) { | ||
572 | mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n", | ||
573 | dentry->d_name.len, dentry->d_name.name, status); | ||
574 | goto bail; | ||
575 | } | ||
576 | status = simple_unlink(dir, dentry); | ||
577 | bail: | ||
578 | return status; | ||
579 | } | ||
580 | |||
581 | static int dlmfs_fill_super(struct super_block * sb, | ||
582 | void * data, | ||
583 | int silent) | ||
584 | { | ||
585 | struct inode * inode; | ||
586 | struct dentry * root; | ||
587 | |||
588 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
589 | sb->s_blocksize = PAGE_CACHE_SIZE; | ||
590 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | ||
591 | sb->s_magic = DLMFS_MAGIC; | ||
592 | sb->s_op = &dlmfs_ops; | ||
593 | inode = dlmfs_get_root_inode(sb); | ||
594 | if (!inode) | ||
595 | return -ENOMEM; | ||
596 | |||
597 | root = d_alloc_root(inode); | ||
598 | if (!root) { | ||
599 | iput(inode); | ||
600 | return -ENOMEM; | ||
601 | } | ||
602 | sb->s_root = root; | ||
603 | return 0; | ||
604 | } | ||
605 | |||
606 | static const struct file_operations dlmfs_file_operations = { | ||
607 | .open = dlmfs_file_open, | ||
608 | .release = dlmfs_file_release, | ||
609 | .poll = dlmfs_file_poll, | ||
610 | .read = dlmfs_file_read, | ||
611 | .write = dlmfs_file_write, | ||
612 | }; | ||
613 | |||
614 | static const struct inode_operations dlmfs_dir_inode_operations = { | ||
615 | .create = dlmfs_create, | ||
616 | .lookup = simple_lookup, | ||
617 | .unlink = dlmfs_unlink, | ||
618 | }; | ||
619 | |||
620 | /* this way we can restrict mkdir to only the toplevel of the fs. */ | ||
621 | static const struct inode_operations dlmfs_root_inode_operations = { | ||
622 | .lookup = simple_lookup, | ||
623 | .mkdir = dlmfs_mkdir, | ||
624 | .rmdir = simple_rmdir, | ||
625 | }; | ||
626 | |||
627 | static const struct super_operations dlmfs_ops = { | ||
628 | .statfs = simple_statfs, | ||
629 | .alloc_inode = dlmfs_alloc_inode, | ||
630 | .destroy_inode = dlmfs_destroy_inode, | ||
631 | .clear_inode = dlmfs_clear_inode, | ||
632 | .drop_inode = generic_delete_inode, | ||
633 | }; | ||
634 | |||
635 | static const struct inode_operations dlmfs_file_inode_operations = { | ||
636 | .getattr = simple_getattr, | ||
637 | }; | ||
638 | |||
639 | static int dlmfs_get_sb(struct file_system_type *fs_type, | ||
640 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | ||
641 | { | ||
642 | return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); | ||
643 | } | ||
644 | |||
645 | static struct file_system_type dlmfs_fs_type = { | ||
646 | .owner = THIS_MODULE, | ||
647 | .name = "ocfs2_dlmfs", | ||
648 | .get_sb = dlmfs_get_sb, | ||
649 | .kill_sb = kill_litter_super, | ||
650 | }; | ||
651 | |||
652 | static int __init init_dlmfs_fs(void) | ||
653 | { | ||
654 | int status; | ||
655 | int cleanup_inode = 0, cleanup_worker = 0; | ||
656 | |||
657 | dlmfs_print_version(); | ||
658 | |||
659 | status = bdi_init(&dlmfs_backing_dev_info); | ||
660 | if (status) | ||
661 | return status; | ||
662 | |||
663 | dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", | ||
664 | sizeof(struct dlmfs_inode_private), | ||
665 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | ||
666 | SLAB_MEM_SPREAD), | ||
667 | dlmfs_init_once); | ||
668 | if (!dlmfs_inode_cache) { | ||
669 | status = -ENOMEM; | ||
670 | goto bail; | ||
671 | } | ||
672 | cleanup_inode = 1; | ||
673 | |||
674 | user_dlm_worker = create_singlethread_workqueue("user_dlm"); | ||
675 | if (!user_dlm_worker) { | ||
676 | status = -ENOMEM; | ||
677 | goto bail; | ||
678 | } | ||
679 | cleanup_worker = 1; | ||
680 | |||
681 | status = register_filesystem(&dlmfs_fs_type); | ||
682 | bail: | ||
683 | if (status) { | ||
684 | if (cleanup_inode) | ||
685 | kmem_cache_destroy(dlmfs_inode_cache); | ||
686 | if (cleanup_worker) | ||
687 | destroy_workqueue(user_dlm_worker); | ||
688 | bdi_destroy(&dlmfs_backing_dev_info); | ||
689 | } else | ||
690 | printk("OCFS2 User DLM kernel interface loaded\n"); | ||
691 | return status; | ||
692 | } | ||
693 | |||
694 | static void __exit exit_dlmfs_fs(void) | ||
695 | { | ||
696 | unregister_filesystem(&dlmfs_fs_type); | ||
697 | |||
698 | flush_workqueue(user_dlm_worker); | ||
699 | destroy_workqueue(user_dlm_worker); | ||
700 | |||
701 | kmem_cache_destroy(dlmfs_inode_cache); | ||
702 | |||
703 | bdi_destroy(&dlmfs_backing_dev_info); | ||
704 | } | ||
705 | |||
706 | MODULE_AUTHOR("Oracle"); | ||
707 | MODULE_LICENSE("GPL"); | ||
708 | |||
709 | module_init(init_dlmfs_fs) | ||
710 | module_exit(exit_dlmfs_fs) | ||
diff --git a/fs/ocfs2/dlmfs/dlmfsver.c b/fs/ocfs2/dlmfs/dlmfsver.c new file mode 100644 index 000000000000..a733b3321f83 --- /dev/null +++ b/fs/ocfs2/dlmfs/dlmfsver.c | |||
@@ -0,0 +1,42 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmfsver.c | ||
5 | * | ||
6 | * version string | ||
7 | * | ||
8 | * Copyright (C) 2002, 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/kernel.h> | ||
28 | |||
29 | #include "dlmfsver.h" | ||
30 | |||
31 | #define DLM_BUILD_VERSION "1.5.0" | ||
32 | |||
33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION | ||
34 | |||
35 | void dlmfs_print_version(void) | ||
36 | { | ||
37 | printk(KERN_INFO "%s\n", VERSION_STR); | ||
38 | } | ||
39 | |||
40 | MODULE_DESCRIPTION(VERSION_STR); | ||
41 | |||
42 | MODULE_VERSION(DLM_BUILD_VERSION); | ||
diff --git a/fs/ocfs2/dlmfs/dlmfsver.h b/fs/ocfs2/dlmfs/dlmfsver.h new file mode 100644 index 000000000000..f35eadbed25c --- /dev/null +++ b/fs/ocfs2/dlmfs/dlmfsver.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmver.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef DLMFS_VER_H | ||
27 | #define DLMFS_VER_H | ||
28 | |||
29 | void dlmfs_print_version(void); | ||
30 | |||
31 | #endif /* DLMFS_VER_H */ | ||
diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c new file mode 100644 index 000000000000..6adae70cee8e --- /dev/null +++ b/fs/ocfs2/dlmfs/userdlm.c | |||
@@ -0,0 +1,676 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * userdlm.c | ||
5 | * | ||
6 | * Code which implements the kernel side of a minimal userspace | ||
7 | * interface to our DLM. | ||
8 | * | ||
9 | * Many of the functions here are pared down versions of dlmglue.c | ||
10 | * functions. | ||
11 | * | ||
12 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public | ||
16 | * License as published by the Free Software Foundation; either | ||
17 | * version 2 of the License, or (at your option) any later version. | ||
18 | * | ||
19 | * This program is distributed in the hope that it will be useful, | ||
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
22 | * General Public License for more details. | ||
23 | * | ||
24 | * You should have received a copy of the GNU General Public | ||
25 | * License along with this program; if not, write to the | ||
26 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
27 | * Boston, MA 021110-1307, USA. | ||
28 | */ | ||
29 | |||
30 | #include <linux/signal.h> | ||
31 | |||
32 | #include <linux/module.h> | ||
33 | #include <linux/fs.h> | ||
34 | #include <linux/types.h> | ||
35 | #include <linux/crc32.h> | ||
36 | |||
37 | |||
38 | #include "cluster/nodemanager.h" | ||
39 | #include "cluster/heartbeat.h" | ||
40 | #include "cluster/tcp.h" | ||
41 | |||
42 | #include "dlm/dlmapi.h" | ||
43 | |||
44 | #include "userdlm.h" | ||
45 | |||
46 | #define MLOG_MASK_PREFIX ML_DLMFS | ||
47 | #include "cluster/masklog.h" | ||
48 | |||
49 | static inline int user_check_wait_flag(struct user_lock_res *lockres, | ||
50 | int flag) | ||
51 | { | ||
52 | int ret; | ||
53 | |||
54 | spin_lock(&lockres->l_lock); | ||
55 | ret = lockres->l_flags & flag; | ||
56 | spin_unlock(&lockres->l_lock); | ||
57 | |||
58 | return ret; | ||
59 | } | ||
60 | |||
61 | static inline void user_wait_on_busy_lock(struct user_lock_res *lockres) | ||
62 | |||
63 | { | ||
64 | wait_event(lockres->l_event, | ||
65 | !user_check_wait_flag(lockres, USER_LOCK_BUSY)); | ||
66 | } | ||
67 | |||
68 | static inline void user_wait_on_blocked_lock(struct user_lock_res *lockres) | ||
69 | |||
70 | { | ||
71 | wait_event(lockres->l_event, | ||
72 | !user_check_wait_flag(lockres, USER_LOCK_BLOCKED)); | ||
73 | } | ||
74 | |||
75 | /* I heart container_of... */ | ||
76 | static inline struct dlm_ctxt * | ||
77 | dlm_ctxt_from_user_lockres(struct user_lock_res *lockres) | ||
78 | { | ||
79 | struct dlmfs_inode_private *ip; | ||
80 | |||
81 | ip = container_of(lockres, | ||
82 | struct dlmfs_inode_private, | ||
83 | ip_lockres); | ||
84 | return ip->ip_dlm; | ||
85 | } | ||
86 | |||
87 | static struct inode * | ||
88 | user_dlm_inode_from_user_lockres(struct user_lock_res *lockres) | ||
89 | { | ||
90 | struct dlmfs_inode_private *ip; | ||
91 | |||
92 | ip = container_of(lockres, | ||
93 | struct dlmfs_inode_private, | ||
94 | ip_lockres); | ||
95 | return &ip->ip_vfs_inode; | ||
96 | } | ||
97 | |||
98 | static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) | ||
99 | { | ||
100 | spin_lock(&lockres->l_lock); | ||
101 | lockres->l_flags &= ~USER_LOCK_BUSY; | ||
102 | spin_unlock(&lockres->l_lock); | ||
103 | } | ||
104 | |||
105 | #define user_log_dlm_error(_func, _stat, _lockres) do { \ | ||
106 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ | ||
107 | "resource %.*s: %s\n", dlm_errname(_stat), _func, \ | ||
108 | _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \ | ||
109 | } while (0) | ||
110 | |||
111 | /* WARNING: This function lives in a world where the only three lock | ||
112 | * levels are EX, PR, and NL. It *will* have to be adjusted when more | ||
113 | * lock types are added. */ | ||
114 | static inline int user_highest_compat_lock_level(int level) | ||
115 | { | ||
116 | int new_level = LKM_EXMODE; | ||
117 | |||
118 | if (level == LKM_EXMODE) | ||
119 | new_level = LKM_NLMODE; | ||
120 | else if (level == LKM_PRMODE) | ||
121 | new_level = LKM_PRMODE; | ||
122 | return new_level; | ||
123 | } | ||
124 | |||
125 | static void user_ast(void *opaque) | ||
126 | { | ||
127 | struct user_lock_res *lockres = opaque; | ||
128 | struct dlm_lockstatus *lksb; | ||
129 | |||
130 | mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen, | ||
131 | lockres->l_name); | ||
132 | |||
133 | spin_lock(&lockres->l_lock); | ||
134 | |||
135 | lksb = &(lockres->l_lksb); | ||
136 | if (lksb->status != DLM_NORMAL) { | ||
137 | mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", | ||
138 | lksb->status, lockres->l_namelen, lockres->l_name); | ||
139 | spin_unlock(&lockres->l_lock); | ||
140 | return; | ||
141 | } | ||
142 | |||
143 | mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, | ||
144 | "Lockres %.*s, requested ivmode. flags 0x%x\n", | ||
145 | lockres->l_namelen, lockres->l_name, lockres->l_flags); | ||
146 | |||
147 | /* we're downconverting. */ | ||
148 | if (lockres->l_requested < lockres->l_level) { | ||
149 | if (lockres->l_requested <= | ||
150 | user_highest_compat_lock_level(lockres->l_blocking)) { | ||
151 | lockres->l_blocking = LKM_NLMODE; | ||
152 | lockres->l_flags &= ~USER_LOCK_BLOCKED; | ||
153 | } | ||
154 | } | ||
155 | |||
156 | lockres->l_level = lockres->l_requested; | ||
157 | lockres->l_requested = LKM_IVMODE; | ||
158 | lockres->l_flags |= USER_LOCK_ATTACHED; | ||
159 | lockres->l_flags &= ~USER_LOCK_BUSY; | ||
160 | |||
161 | spin_unlock(&lockres->l_lock); | ||
162 | |||
163 | wake_up(&lockres->l_event); | ||
164 | } | ||
165 | |||
166 | static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres) | ||
167 | { | ||
168 | struct inode *inode; | ||
169 | inode = user_dlm_inode_from_user_lockres(lockres); | ||
170 | if (!igrab(inode)) | ||
171 | BUG(); | ||
172 | } | ||
173 | |||
174 | static void user_dlm_unblock_lock(struct work_struct *work); | ||
175 | |||
176 | static void __user_dlm_queue_lockres(struct user_lock_res *lockres) | ||
177 | { | ||
178 | if (!(lockres->l_flags & USER_LOCK_QUEUED)) { | ||
179 | user_dlm_grab_inode_ref(lockres); | ||
180 | |||
181 | INIT_WORK(&lockres->l_work, user_dlm_unblock_lock); | ||
182 | |||
183 | queue_work(user_dlm_worker, &lockres->l_work); | ||
184 | lockres->l_flags |= USER_LOCK_QUEUED; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres) | ||
189 | { | ||
190 | int queue = 0; | ||
191 | |||
192 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) | ||
193 | return; | ||
194 | |||
195 | switch (lockres->l_blocking) { | ||
196 | case LKM_EXMODE: | ||
197 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | ||
198 | queue = 1; | ||
199 | break; | ||
200 | case LKM_PRMODE: | ||
201 | if (!lockres->l_ex_holders) | ||
202 | queue = 1; | ||
203 | break; | ||
204 | default: | ||
205 | BUG(); | ||
206 | } | ||
207 | |||
208 | if (queue) | ||
209 | __user_dlm_queue_lockres(lockres); | ||
210 | } | ||
211 | |||
212 | static void user_bast(void *opaque, int level) | ||
213 | { | ||
214 | struct user_lock_res *lockres = opaque; | ||
215 | |||
216 | mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n", | ||
217 | lockres->l_namelen, lockres->l_name, level); | ||
218 | |||
219 | spin_lock(&lockres->l_lock); | ||
220 | lockres->l_flags |= USER_LOCK_BLOCKED; | ||
221 | if (level > lockres->l_blocking) | ||
222 | lockres->l_blocking = level; | ||
223 | |||
224 | __user_dlm_queue_lockres(lockres); | ||
225 | spin_unlock(&lockres->l_lock); | ||
226 | |||
227 | wake_up(&lockres->l_event); | ||
228 | } | ||
229 | |||
230 | static void user_unlock_ast(void *opaque, enum dlm_status status) | ||
231 | { | ||
232 | struct user_lock_res *lockres = opaque; | ||
233 | |||
234 | mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen, | ||
235 | lockres->l_name); | ||
236 | |||
237 | if (status != DLM_NORMAL && status != DLM_CANCELGRANT) | ||
238 | mlog(ML_ERROR, "Dlm returns status %d\n", status); | ||
239 | |||
240 | spin_lock(&lockres->l_lock); | ||
241 | /* The teardown flag gets set early during the unlock process, | ||
242 | * so test the cancel flag to make sure that this ast isn't | ||
243 | * for a concurrent cancel. */ | ||
244 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN | ||
245 | && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { | ||
246 | lockres->l_level = LKM_IVMODE; | ||
247 | } else if (status == DLM_CANCELGRANT) { | ||
248 | /* We tried to cancel a convert request, but it was | ||
249 | * already granted. Don't clear the busy flag - the | ||
250 | * ast should've done this already. */ | ||
251 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); | ||
252 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | ||
253 | goto out_noclear; | ||
254 | } else { | ||
255 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); | ||
256 | /* Cancel succeeded, we want to re-queue */ | ||
257 | lockres->l_requested = LKM_IVMODE; /* cancel an | ||
258 | * upconvert | ||
259 | * request. */ | ||
260 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | ||
261 | /* we want the unblock thread to look at it again | ||
262 | * now. */ | ||
263 | if (lockres->l_flags & USER_LOCK_BLOCKED) | ||
264 | __user_dlm_queue_lockres(lockres); | ||
265 | } | ||
266 | |||
267 | lockres->l_flags &= ~USER_LOCK_BUSY; | ||
268 | out_noclear: | ||
269 | spin_unlock(&lockres->l_lock); | ||
270 | |||
271 | wake_up(&lockres->l_event); | ||
272 | } | ||
273 | |||
274 | static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres) | ||
275 | { | ||
276 | struct inode *inode; | ||
277 | inode = user_dlm_inode_from_user_lockres(lockres); | ||
278 | iput(inode); | ||
279 | } | ||
280 | |||
281 | static void user_dlm_unblock_lock(struct work_struct *work) | ||
282 | { | ||
283 | int new_level, status; | ||
284 | struct user_lock_res *lockres = | ||
285 | container_of(work, struct user_lock_res, l_work); | ||
286 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | ||
287 | |||
288 | mlog(0, "processing lockres %.*s\n", lockres->l_namelen, | ||
289 | lockres->l_name); | ||
290 | |||
291 | spin_lock(&lockres->l_lock); | ||
292 | |||
293 | mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), | ||
294 | "Lockres %.*s, flags 0x%x\n", | ||
295 | lockres->l_namelen, lockres->l_name, lockres->l_flags); | ||
296 | |||
297 | /* notice that we don't clear USER_LOCK_BLOCKED here. If it's | ||
298 | * set, we want user_ast clear it. */ | ||
299 | lockres->l_flags &= ~USER_LOCK_QUEUED; | ||
300 | |||
301 | /* It's valid to get here and no longer be blocked - if we get | ||
302 | * several basts in a row, we might be queued by the first | ||
303 | * one, the unblock thread might run and clear the queued | ||
304 | * flag, and finally we might get another bast which re-queues | ||
305 | * us before our ast for the downconvert is called. */ | ||
306 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { | ||
307 | spin_unlock(&lockres->l_lock); | ||
308 | goto drop_ref; | ||
309 | } | ||
310 | |||
311 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | ||
312 | spin_unlock(&lockres->l_lock); | ||
313 | goto drop_ref; | ||
314 | } | ||
315 | |||
316 | if (lockres->l_flags & USER_LOCK_BUSY) { | ||
317 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { | ||
318 | spin_unlock(&lockres->l_lock); | ||
319 | goto drop_ref; | ||
320 | } | ||
321 | |||
322 | lockres->l_flags |= USER_LOCK_IN_CANCEL; | ||
323 | spin_unlock(&lockres->l_lock); | ||
324 | |||
325 | status = dlmunlock(dlm, | ||
326 | &lockres->l_lksb, | ||
327 | LKM_CANCEL, | ||
328 | user_unlock_ast, | ||
329 | lockres); | ||
330 | if (status != DLM_NORMAL) | ||
331 | user_log_dlm_error("dlmunlock", status, lockres); | ||
332 | goto drop_ref; | ||
333 | } | ||
334 | |||
335 | /* If there are still incompat holders, we can exit safely | ||
336 | * without worrying about re-queueing this lock as that will | ||
337 | * happen on the last call to user_cluster_unlock. */ | ||
338 | if ((lockres->l_blocking == LKM_EXMODE) | ||
339 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { | ||
340 | spin_unlock(&lockres->l_lock); | ||
341 | mlog(0, "can't downconvert for ex: ro = %u, ex = %u\n", | ||
342 | lockres->l_ro_holders, lockres->l_ex_holders); | ||
343 | goto drop_ref; | ||
344 | } | ||
345 | |||
346 | if ((lockres->l_blocking == LKM_PRMODE) | ||
347 | && lockres->l_ex_holders) { | ||
348 | spin_unlock(&lockres->l_lock); | ||
349 | mlog(0, "can't downconvert for pr: ex = %u\n", | ||
350 | lockres->l_ex_holders); | ||
351 | goto drop_ref; | ||
352 | } | ||
353 | |||
354 | /* yay, we can downconvert now. */ | ||
355 | new_level = user_highest_compat_lock_level(lockres->l_blocking); | ||
356 | lockres->l_requested = new_level; | ||
357 | lockres->l_flags |= USER_LOCK_BUSY; | ||
358 | mlog(0, "Downconvert lock from %d to %d\n", | ||
359 | lockres->l_level, new_level); | ||
360 | spin_unlock(&lockres->l_lock); | ||
361 | |||
362 | /* need lock downconvert request now... */ | ||
363 | status = dlmlock(dlm, | ||
364 | new_level, | ||
365 | &lockres->l_lksb, | ||
366 | LKM_CONVERT|LKM_VALBLK, | ||
367 | lockres->l_name, | ||
368 | lockres->l_namelen, | ||
369 | user_ast, | ||
370 | lockres, | ||
371 | user_bast); | ||
372 | if (status != DLM_NORMAL) { | ||
373 | user_log_dlm_error("dlmlock", status, lockres); | ||
374 | user_recover_from_dlm_error(lockres); | ||
375 | } | ||
376 | |||
377 | drop_ref: | ||
378 | user_dlm_drop_inode_ref(lockres); | ||
379 | } | ||
380 | |||
381 | static inline void user_dlm_inc_holders(struct user_lock_res *lockres, | ||
382 | int level) | ||
383 | { | ||
384 | switch(level) { | ||
385 | case LKM_EXMODE: | ||
386 | lockres->l_ex_holders++; | ||
387 | break; | ||
388 | case LKM_PRMODE: | ||
389 | lockres->l_ro_holders++; | ||
390 | break; | ||
391 | default: | ||
392 | BUG(); | ||
393 | } | ||
394 | } | ||
395 | |||
396 | /* predict what lock level we'll be dropping down to on behalf | ||
397 | * of another node, and return true if the currently wanted | ||
398 | * level will be compatible with it. */ | ||
399 | static inline int | ||
400 | user_may_continue_on_blocked_lock(struct user_lock_res *lockres, | ||
401 | int wanted) | ||
402 | { | ||
403 | BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); | ||
404 | |||
405 | return wanted <= user_highest_compat_lock_level(lockres->l_blocking); | ||
406 | } | ||
407 | |||
408 | int user_dlm_cluster_lock(struct user_lock_res *lockres, | ||
409 | int level, | ||
410 | int lkm_flags) | ||
411 | { | ||
412 | int status, local_flags; | ||
413 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | ||
414 | |||
415 | if (level != LKM_EXMODE && | ||
416 | level != LKM_PRMODE) { | ||
417 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", | ||
418 | lockres->l_namelen, lockres->l_name); | ||
419 | status = -EINVAL; | ||
420 | goto bail; | ||
421 | } | ||
422 | |||
423 | mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n", | ||
424 | lockres->l_namelen, lockres->l_name, | ||
425 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", | ||
426 | lkm_flags); | ||
427 | |||
428 | again: | ||
429 | if (signal_pending(current)) { | ||
430 | status = -ERESTARTSYS; | ||
431 | goto bail; | ||
432 | } | ||
433 | |||
434 | spin_lock(&lockres->l_lock); | ||
435 | |||
436 | /* We only compare against the currently granted level | ||
437 | * here. If the lock is blocked waiting on a downconvert, | ||
438 | * we'll get caught below. */ | ||
439 | if ((lockres->l_flags & USER_LOCK_BUSY) && | ||
440 | (level > lockres->l_level)) { | ||
441 | /* is someone sitting in dlm_lock? If so, wait on | ||
442 | * them. */ | ||
443 | spin_unlock(&lockres->l_lock); | ||
444 | |||
445 | user_wait_on_busy_lock(lockres); | ||
446 | goto again; | ||
447 | } | ||
448 | |||
449 | if ((lockres->l_flags & USER_LOCK_BLOCKED) && | ||
450 | (!user_may_continue_on_blocked_lock(lockres, level))) { | ||
451 | /* is the lock is currently blocked on behalf of | ||
452 | * another node */ | ||
453 | spin_unlock(&lockres->l_lock); | ||
454 | |||
455 | user_wait_on_blocked_lock(lockres); | ||
456 | goto again; | ||
457 | } | ||
458 | |||
459 | if (level > lockres->l_level) { | ||
460 | local_flags = lkm_flags | LKM_VALBLK; | ||
461 | if (lockres->l_level != LKM_IVMODE) | ||
462 | local_flags |= LKM_CONVERT; | ||
463 | |||
464 | lockres->l_requested = level; | ||
465 | lockres->l_flags |= USER_LOCK_BUSY; | ||
466 | spin_unlock(&lockres->l_lock); | ||
467 | |||
468 | BUG_ON(level == LKM_IVMODE); | ||
469 | BUG_ON(level == LKM_NLMODE); | ||
470 | |||
471 | /* call dlm_lock to upgrade lock now */ | ||
472 | status = dlmlock(dlm, | ||
473 | level, | ||
474 | &lockres->l_lksb, | ||
475 | local_flags, | ||
476 | lockres->l_name, | ||
477 | lockres->l_namelen, | ||
478 | user_ast, | ||
479 | lockres, | ||
480 | user_bast); | ||
481 | if (status != DLM_NORMAL) { | ||
482 | if ((lkm_flags & LKM_NOQUEUE) && | ||
483 | (status == DLM_NOTQUEUED)) | ||
484 | status = -EAGAIN; | ||
485 | else { | ||
486 | user_log_dlm_error("dlmlock", status, lockres); | ||
487 | status = -EINVAL; | ||
488 | } | ||
489 | user_recover_from_dlm_error(lockres); | ||
490 | goto bail; | ||
491 | } | ||
492 | |||
493 | user_wait_on_busy_lock(lockres); | ||
494 | goto again; | ||
495 | } | ||
496 | |||
497 | user_dlm_inc_holders(lockres, level); | ||
498 | spin_unlock(&lockres->l_lock); | ||
499 | |||
500 | status = 0; | ||
501 | bail: | ||
502 | return status; | ||
503 | } | ||
504 | |||
505 | static inline void user_dlm_dec_holders(struct user_lock_res *lockres, | ||
506 | int level) | ||
507 | { | ||
508 | switch(level) { | ||
509 | case LKM_EXMODE: | ||
510 | BUG_ON(!lockres->l_ex_holders); | ||
511 | lockres->l_ex_holders--; | ||
512 | break; | ||
513 | case LKM_PRMODE: | ||
514 | BUG_ON(!lockres->l_ro_holders); | ||
515 | lockres->l_ro_holders--; | ||
516 | break; | ||
517 | default: | ||
518 | BUG(); | ||
519 | } | ||
520 | } | ||
521 | |||
522 | void user_dlm_cluster_unlock(struct user_lock_res *lockres, | ||
523 | int level) | ||
524 | { | ||
525 | if (level != LKM_EXMODE && | ||
526 | level != LKM_PRMODE) { | ||
527 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", | ||
528 | lockres->l_namelen, lockres->l_name); | ||
529 | return; | ||
530 | } | ||
531 | |||
532 | spin_lock(&lockres->l_lock); | ||
533 | user_dlm_dec_holders(lockres, level); | ||
534 | __user_dlm_cond_queue_lockres(lockres); | ||
535 | spin_unlock(&lockres->l_lock); | ||
536 | } | ||
537 | |||
538 | void user_dlm_write_lvb(struct inode *inode, | ||
539 | const char *val, | ||
540 | unsigned int len) | ||
541 | { | ||
542 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; | ||
543 | char *lvb = lockres->l_lksb.lvb; | ||
544 | |||
545 | BUG_ON(len > DLM_LVB_LEN); | ||
546 | |||
547 | spin_lock(&lockres->l_lock); | ||
548 | |||
549 | BUG_ON(lockres->l_level < LKM_EXMODE); | ||
550 | memcpy(lvb, val, len); | ||
551 | |||
552 | spin_unlock(&lockres->l_lock); | ||
553 | } | ||
554 | |||
555 | void user_dlm_read_lvb(struct inode *inode, | ||
556 | char *val, | ||
557 | unsigned int len) | ||
558 | { | ||
559 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; | ||
560 | char *lvb = lockres->l_lksb.lvb; | ||
561 | |||
562 | BUG_ON(len > DLM_LVB_LEN); | ||
563 | |||
564 | spin_lock(&lockres->l_lock); | ||
565 | |||
566 | BUG_ON(lockres->l_level < LKM_PRMODE); | ||
567 | memcpy(val, lvb, len); | ||
568 | |||
569 | spin_unlock(&lockres->l_lock); | ||
570 | } | ||
571 | |||
572 | void user_dlm_lock_res_init(struct user_lock_res *lockres, | ||
573 | struct dentry *dentry) | ||
574 | { | ||
575 | memset(lockres, 0, sizeof(*lockres)); | ||
576 | |||
577 | spin_lock_init(&lockres->l_lock); | ||
578 | init_waitqueue_head(&lockres->l_event); | ||
579 | lockres->l_level = LKM_IVMODE; | ||
580 | lockres->l_requested = LKM_IVMODE; | ||
581 | lockres->l_blocking = LKM_IVMODE; | ||
582 | |||
583 | /* should have been checked before getting here. */ | ||
584 | BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN); | ||
585 | |||
586 | memcpy(lockres->l_name, | ||
587 | dentry->d_name.name, | ||
588 | dentry->d_name.len); | ||
589 | lockres->l_namelen = dentry->d_name.len; | ||
590 | } | ||
591 | |||
592 | int user_dlm_destroy_lock(struct user_lock_res *lockres) | ||
593 | { | ||
594 | int status = -EBUSY; | ||
595 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | ||
596 | |||
597 | mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name); | ||
598 | |||
599 | spin_lock(&lockres->l_lock); | ||
600 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | ||
601 | spin_unlock(&lockres->l_lock); | ||
602 | return 0; | ||
603 | } | ||
604 | |||
605 | lockres->l_flags |= USER_LOCK_IN_TEARDOWN; | ||
606 | |||
607 | while (lockres->l_flags & USER_LOCK_BUSY) { | ||
608 | spin_unlock(&lockres->l_lock); | ||
609 | |||
610 | user_wait_on_busy_lock(lockres); | ||
611 | |||
612 | spin_lock(&lockres->l_lock); | ||
613 | } | ||
614 | |||
615 | if (lockres->l_ro_holders || lockres->l_ex_holders) { | ||
616 | spin_unlock(&lockres->l_lock); | ||
617 | goto bail; | ||
618 | } | ||
619 | |||
620 | status = 0; | ||
621 | if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { | ||
622 | spin_unlock(&lockres->l_lock); | ||
623 | goto bail; | ||
624 | } | ||
625 | |||
626 | lockres->l_flags &= ~USER_LOCK_ATTACHED; | ||
627 | lockres->l_flags |= USER_LOCK_BUSY; | ||
628 | spin_unlock(&lockres->l_lock); | ||
629 | |||
630 | status = dlmunlock(dlm, | ||
631 | &lockres->l_lksb, | ||
632 | LKM_VALBLK, | ||
633 | user_unlock_ast, | ||
634 | lockres); | ||
635 | if (status != DLM_NORMAL) { | ||
636 | user_log_dlm_error("dlmunlock", status, lockres); | ||
637 | status = -EINVAL; | ||
638 | goto bail; | ||
639 | } | ||
640 | |||
641 | user_wait_on_busy_lock(lockres); | ||
642 | |||
643 | status = 0; | ||
644 | bail: | ||
645 | return status; | ||
646 | } | ||
647 | |||
648 | struct dlm_ctxt *user_dlm_register_context(struct qstr *name, | ||
649 | struct dlm_protocol_version *proto) | ||
650 | { | ||
651 | struct dlm_ctxt *dlm; | ||
652 | u32 dlm_key; | ||
653 | char *domain; | ||
654 | |||
655 | domain = kmalloc(name->len + 1, GFP_NOFS); | ||
656 | if (!domain) { | ||
657 | mlog_errno(-ENOMEM); | ||
658 | return ERR_PTR(-ENOMEM); | ||
659 | } | ||
660 | |||
661 | dlm_key = crc32_le(0, name->name, name->len); | ||
662 | |||
663 | snprintf(domain, name->len + 1, "%.*s", name->len, name->name); | ||
664 | |||
665 | dlm = dlm_register_domain(domain, dlm_key, proto); | ||
666 | if (IS_ERR(dlm)) | ||
667 | mlog_errno(PTR_ERR(dlm)); | ||
668 | |||
669 | kfree(domain); | ||
670 | return dlm; | ||
671 | } | ||
672 | |||
673 | void user_dlm_unregister_context(struct dlm_ctxt *dlm) | ||
674 | { | ||
675 | dlm_unregister_domain(dlm); | ||
676 | } | ||
diff --git a/fs/ocfs2/dlmfs/userdlm.h b/fs/ocfs2/dlmfs/userdlm.h new file mode 100644 index 000000000000..0c3cc03c61fa --- /dev/null +++ b/fs/ocfs2/dlmfs/userdlm.h | |||
@@ -0,0 +1,113 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * userdlm.h | ||
5 | * | ||
6 | * Userspace dlm defines | ||
7 | * | ||
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | |||
27 | #ifndef USERDLM_H | ||
28 | #define USERDLM_H | ||
29 | |||
30 | #include <linux/module.h> | ||
31 | #include <linux/fs.h> | ||
32 | #include <linux/types.h> | ||
33 | #include <linux/workqueue.h> | ||
34 | |||
35 | /* user_lock_res->l_flags flags. */ | ||
36 | #define USER_LOCK_ATTACHED (0x00000001) /* we have initialized | ||
37 | * the lvb */ | ||
38 | #define USER_LOCK_BUSY (0x00000002) /* we are currently in | ||
39 | * dlm_lock */ | ||
40 | #define USER_LOCK_BLOCKED (0x00000004) /* blocked waiting to | ||
41 | * downconvert*/ | ||
42 | #define USER_LOCK_IN_TEARDOWN (0x00000008) /* we're currently | ||
43 | * destroying this | ||
44 | * lock. */ | ||
45 | #define USER_LOCK_QUEUED (0x00000010) /* lock is on the | ||
46 | * workqueue */ | ||
47 | #define USER_LOCK_IN_CANCEL (0x00000020) | ||
48 | |||
49 | struct user_lock_res { | ||
50 | spinlock_t l_lock; | ||
51 | |||
52 | int l_flags; | ||
53 | |||
54 | #define USER_DLM_LOCK_ID_MAX_LEN 32 | ||
55 | char l_name[USER_DLM_LOCK_ID_MAX_LEN]; | ||
56 | int l_namelen; | ||
57 | int l_level; | ||
58 | unsigned int l_ro_holders; | ||
59 | unsigned int l_ex_holders; | ||
60 | struct dlm_lockstatus l_lksb; | ||
61 | |||
62 | int l_requested; | ||
63 | int l_blocking; | ||
64 | |||
65 | wait_queue_head_t l_event; | ||
66 | |||
67 | struct work_struct l_work; | ||
68 | }; | ||
69 | |||
70 | extern struct workqueue_struct *user_dlm_worker; | ||
71 | |||
72 | void user_dlm_lock_res_init(struct user_lock_res *lockres, | ||
73 | struct dentry *dentry); | ||
74 | int user_dlm_destroy_lock(struct user_lock_res *lockres); | ||
75 | int user_dlm_cluster_lock(struct user_lock_res *lockres, | ||
76 | int level, | ||
77 | int lkm_flags); | ||
78 | void user_dlm_cluster_unlock(struct user_lock_res *lockres, | ||
79 | int level); | ||
80 | void user_dlm_write_lvb(struct inode *inode, | ||
81 | const char *val, | ||
82 | unsigned int len); | ||
83 | void user_dlm_read_lvb(struct inode *inode, | ||
84 | char *val, | ||
85 | unsigned int len); | ||
86 | struct dlm_ctxt *user_dlm_register_context(struct qstr *name, | ||
87 | struct dlm_protocol_version *proto); | ||
88 | void user_dlm_unregister_context(struct dlm_ctxt *dlm); | ||
89 | |||
90 | struct dlmfs_inode_private { | ||
91 | struct dlm_ctxt *ip_dlm; | ||
92 | |||
93 | struct user_lock_res ip_lockres; /* unused for directories. */ | ||
94 | struct inode *ip_parent; | ||
95 | |||
96 | struct inode ip_vfs_inode; | ||
97 | }; | ||
98 | |||
99 | static inline struct dlmfs_inode_private * | ||
100 | DLMFS_I(struct inode *inode) | ||
101 | { | ||
102 | return container_of(inode, | ||
103 | struct dlmfs_inode_private, | ||
104 | ip_vfs_inode); | ||
105 | } | ||
106 | |||
107 | struct dlmfs_filp_private { | ||
108 | int fp_lock_level; | ||
109 | }; | ||
110 | |||
111 | #define DLMFS_MAGIC 0x76a9f425 | ||
112 | |||
113 | #endif /* USERDLM_H */ | ||