diff options
Diffstat (limited to 'fs/ocfs2/dlmfs')
-rw-r--r-- | fs/ocfs2/dlmfs/Makefile | 5 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/dlmfs.c | 715 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/dlmfsver.c | 42 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/dlmfsver.h | 31 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/userdlm.c | 688 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/userdlm.h | 113 |
6 files changed, 1594 insertions, 0 deletions
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile new file mode 100644 index 000000000000..df69b4856d0d --- /dev/null +++ b/fs/ocfs2/dlmfs/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | EXTRA_CFLAGS += -Ifs/ocfs2 | ||
2 | |||
3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o | ||
4 | |||
5 | ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o | ||
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c new file mode 100644 index 000000000000..1b0de157a08c --- /dev/null +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -0,0 +1,715 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmfs.c | ||
5 | * | ||
6 | * Code which implements the kernel side of a minimal userspace | ||
7 | * interface to our DLM. This file handles the virtual file system | ||
8 | * used for communication with userspace. Credit should go to ramfs, | ||
9 | * which was a template for the fs side of this module. | ||
10 | * | ||
11 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public | ||
15 | * License as published by the Free Software Foundation; either | ||
16 | * version 2 of the License, or (at your option) any later version. | ||
17 | * | ||
18 | * This program is distributed in the hope that it will be useful, | ||
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
21 | * General Public License for more details. | ||
22 | * | ||
23 | * You should have received a copy of the GNU General Public | ||
24 | * License along with this program; if not, write to the | ||
25 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
26 | * Boston, MA 021110-1307, USA. | ||
27 | */ | ||
28 | |||
29 | /* Simple VFS hooks based on: */ | ||
30 | /* | ||
31 | * Resizable simple ram filesystem for Linux. | ||
32 | * | ||
33 | * Copyright (C) 2000 Linus Torvalds. | ||
34 | * 2000 Transmeta Corp. | ||
35 | */ | ||
36 | |||
37 | #include <linux/module.h> | ||
38 | #include <linux/fs.h> | ||
39 | #include <linux/pagemap.h> | ||
40 | #include <linux/types.h> | ||
41 | #include <linux/slab.h> | ||
42 | #include <linux/highmem.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <linux/string.h> | ||
45 | #include <linux/backing-dev.h> | ||
46 | #include <linux/poll.h> | ||
47 | |||
48 | #include <asm/uaccess.h> | ||
49 | |||
50 | #include "stackglue.h" | ||
51 | #include "userdlm.h" | ||
52 | #include "dlmfsver.h" | ||
53 | |||
54 | #define MLOG_MASK_PREFIX ML_DLMFS | ||
55 | #include "cluster/masklog.h" | ||
56 | |||
57 | |||
58 | static const struct super_operations dlmfs_ops; | ||
59 | static const struct file_operations dlmfs_file_operations; | ||
60 | static const struct inode_operations dlmfs_dir_inode_operations; | ||
61 | static const struct inode_operations dlmfs_root_inode_operations; | ||
62 | static const struct inode_operations dlmfs_file_inode_operations; | ||
63 | static struct kmem_cache *dlmfs_inode_cache; | ||
64 | |||
65 | struct workqueue_struct *user_dlm_worker; | ||
66 | |||
67 | |||
68 | |||
69 | /* | ||
70 | * These are the ABI capabilities of dlmfs. | ||
71 | * | ||
72 | * Over time, dlmfs has added some features that were not part of the | ||
73 | * initial ABI. Unfortunately, some of these features are not detectable | ||
74 | * via standard usage. For example, Linux's default poll always returns | ||
75 | * POLLIN, so there is no way for a caller of poll(2) to know when dlmfs | ||
76 | * added poll support. Instead, we provide this list of new capabilities. | ||
77 | * | ||
78 | * Capabilities is a read-only attribute. We do it as a module parameter | ||
79 | * so we can discover it whether dlmfs is built in, loaded, or even not | ||
80 | * loaded. | ||
81 | * | ||
82 | * The ABI features are local to this machine's dlmfs mount. This is | ||
83 | * distinct from the locking protocol, which is concerned with inter-node | ||
84 | * interaction. | ||
85 | * | ||
86 | * Capabilities: | ||
87 | * - bast : POLLIN against the file descriptor of a held lock | ||
88 | * signifies a bast fired on the lock. | ||
89 | */ | ||
90 | #define DLMFS_CAPABILITIES "bast stackglue" | ||
91 | extern int param_set_dlmfs_capabilities(const char *val, | ||
92 | struct kernel_param *kp) | ||
93 | { | ||
94 | printk(KERN_ERR "%s: readonly parameter\n", kp->name); | ||
95 | return -EINVAL; | ||
96 | } | ||
97 | static int param_get_dlmfs_capabilities(char *buffer, | ||
98 | struct kernel_param *kp) | ||
99 | { | ||
100 | return strlcpy(buffer, DLMFS_CAPABILITIES, | ||
101 | strlen(DLMFS_CAPABILITIES) + 1); | ||
102 | } | ||
103 | module_param_call(capabilities, param_set_dlmfs_capabilities, | ||
104 | param_get_dlmfs_capabilities, NULL, 0444); | ||
105 | MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); | ||
106 | |||
107 | |||
108 | /* | ||
109 | * decodes a set of open flags into a valid lock level and a set of flags. | ||
110 | * returns < 0 if we have invalid flags | ||
111 | * flags which mean something to us: | ||
112 | * O_RDONLY -> PRMODE level | ||
113 | * O_WRONLY -> EXMODE level | ||
114 | * | ||
115 | * O_NONBLOCK -> LKM_NOQUEUE | ||
116 | */ | ||
117 | static int dlmfs_decode_open_flags(int open_flags, | ||
118 | int *level, | ||
119 | int *flags) | ||
120 | { | ||
121 | if (open_flags & (O_WRONLY|O_RDWR)) | ||
122 | *level = LKM_EXMODE; | ||
123 | else | ||
124 | *level = LKM_PRMODE; | ||
125 | |||
126 | *flags = 0; | ||
127 | if (open_flags & O_NONBLOCK) | ||
128 | *flags |= LKM_NOQUEUE; | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | static int dlmfs_file_open(struct inode *inode, | ||
134 | struct file *file) | ||
135 | { | ||
136 | int status, level, flags; | ||
137 | struct dlmfs_filp_private *fp = NULL; | ||
138 | struct dlmfs_inode_private *ip; | ||
139 | |||
140 | if (S_ISDIR(inode->i_mode)) | ||
141 | BUG(); | ||
142 | |||
143 | mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, | ||
144 | file->f_flags); | ||
145 | |||
146 | status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); | ||
147 | if (status < 0) | ||
148 | goto bail; | ||
149 | |||
150 | /* We don't want to honor O_APPEND at read/write time as it | ||
151 | * doesn't make sense for LVB writes. */ | ||
152 | file->f_flags &= ~O_APPEND; | ||
153 | |||
154 | fp = kmalloc(sizeof(*fp), GFP_NOFS); | ||
155 | if (!fp) { | ||
156 | status = -ENOMEM; | ||
157 | goto bail; | ||
158 | } | ||
159 | fp->fp_lock_level = level; | ||
160 | |||
161 | ip = DLMFS_I(inode); | ||
162 | |||
163 | status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); | ||
164 | if (status < 0) { | ||
165 | /* this is a strange error to return here but I want | ||
166 | * to be able userspace to be able to distinguish a | ||
167 | * valid lock request from one that simply couldn't be | ||
168 | * granted. */ | ||
169 | if (flags & LKM_NOQUEUE && status == -EAGAIN) | ||
170 | status = -ETXTBSY; | ||
171 | kfree(fp); | ||
172 | goto bail; | ||
173 | } | ||
174 | |||
175 | file->private_data = fp; | ||
176 | bail: | ||
177 | return status; | ||
178 | } | ||
179 | |||
180 | static int dlmfs_file_release(struct inode *inode, | ||
181 | struct file *file) | ||
182 | { | ||
183 | int level, status; | ||
184 | struct dlmfs_inode_private *ip = DLMFS_I(inode); | ||
185 | struct dlmfs_filp_private *fp = | ||
186 | (struct dlmfs_filp_private *) file->private_data; | ||
187 | |||
188 | if (S_ISDIR(inode->i_mode)) | ||
189 | BUG(); | ||
190 | |||
191 | mlog(0, "close called on inode %lu\n", inode->i_ino); | ||
192 | |||
193 | status = 0; | ||
194 | if (fp) { | ||
195 | level = fp->fp_lock_level; | ||
196 | if (level != LKM_IVMODE) | ||
197 | user_dlm_cluster_unlock(&ip->ip_lockres, level); | ||
198 | |||
199 | kfree(fp); | ||
200 | file->private_data = NULL; | ||
201 | } | ||
202 | |||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * We do ->setattr() just to override size changes. Our size is the size | ||
208 | * of the LVB and nothing else. | ||
209 | */ | ||
210 | static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr) | ||
211 | { | ||
212 | int error; | ||
213 | struct inode *inode = dentry->d_inode; | ||
214 | |||
215 | attr->ia_valid &= ~ATTR_SIZE; | ||
216 | error = inode_change_ok(inode, attr); | ||
217 | if (!error) | ||
218 | error = inode_setattr(inode, attr); | ||
219 | |||
220 | return error; | ||
221 | } | ||
222 | |||
223 | static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) | ||
224 | { | ||
225 | int event = 0; | ||
226 | struct inode *inode = file->f_path.dentry->d_inode; | ||
227 | struct dlmfs_inode_private *ip = DLMFS_I(inode); | ||
228 | |||
229 | poll_wait(file, &ip->ip_lockres.l_event, wait); | ||
230 | |||
231 | spin_lock(&ip->ip_lockres.l_lock); | ||
232 | if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED) | ||
233 | event = POLLIN | POLLRDNORM; | ||
234 | spin_unlock(&ip->ip_lockres.l_lock); | ||
235 | |||
236 | return event; | ||
237 | } | ||
238 | |||
239 | static ssize_t dlmfs_file_read(struct file *filp, | ||
240 | char __user *buf, | ||
241 | size_t count, | ||
242 | loff_t *ppos) | ||
243 | { | ||
244 | int bytes_left; | ||
245 | ssize_t readlen, got; | ||
246 | char *lvb_buf; | ||
247 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
248 | |||
249 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", | ||
250 | inode->i_ino, count, *ppos); | ||
251 | |||
252 | if (*ppos >= i_size_read(inode)) | ||
253 | return 0; | ||
254 | |||
255 | if (!count) | ||
256 | return 0; | ||
257 | |||
258 | if (!access_ok(VERIFY_WRITE, buf, count)) | ||
259 | return -EFAULT; | ||
260 | |||
261 | /* don't read past the lvb */ | ||
262 | if ((count + *ppos) > i_size_read(inode)) | ||
263 | readlen = i_size_read(inode) - *ppos; | ||
264 | else | ||
265 | readlen = count - *ppos; | ||
266 | |||
267 | lvb_buf = kmalloc(readlen, GFP_NOFS); | ||
268 | if (!lvb_buf) | ||
269 | return -ENOMEM; | ||
270 | |||
271 | got = user_dlm_read_lvb(inode, lvb_buf, readlen); | ||
272 | if (got) { | ||
273 | BUG_ON(got != readlen); | ||
274 | bytes_left = __copy_to_user(buf, lvb_buf, readlen); | ||
275 | readlen -= bytes_left; | ||
276 | } else | ||
277 | readlen = 0; | ||
278 | |||
279 | kfree(lvb_buf); | ||
280 | |||
281 | *ppos = *ppos + readlen; | ||
282 | |||
283 | mlog(0, "read %zd bytes\n", readlen); | ||
284 | return readlen; | ||
285 | } | ||
286 | |||
287 | static ssize_t dlmfs_file_write(struct file *filp, | ||
288 | const char __user *buf, | ||
289 | size_t count, | ||
290 | loff_t *ppos) | ||
291 | { | ||
292 | int bytes_left; | ||
293 | ssize_t writelen; | ||
294 | char *lvb_buf; | ||
295 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
296 | |||
297 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", | ||
298 | inode->i_ino, count, *ppos); | ||
299 | |||
300 | if (*ppos >= i_size_read(inode)) | ||
301 | return -ENOSPC; | ||
302 | |||
303 | if (!count) | ||
304 | return 0; | ||
305 | |||
306 | if (!access_ok(VERIFY_READ, buf, count)) | ||
307 | return -EFAULT; | ||
308 | |||
309 | /* don't write past the lvb */ | ||
310 | if ((count + *ppos) > i_size_read(inode)) | ||
311 | writelen = i_size_read(inode) - *ppos; | ||
312 | else | ||
313 | writelen = count - *ppos; | ||
314 | |||
315 | lvb_buf = kmalloc(writelen, GFP_NOFS); | ||
316 | if (!lvb_buf) | ||
317 | return -ENOMEM; | ||
318 | |||
319 | bytes_left = copy_from_user(lvb_buf, buf, writelen); | ||
320 | writelen -= bytes_left; | ||
321 | if (writelen) | ||
322 | user_dlm_write_lvb(inode, lvb_buf, writelen); | ||
323 | |||
324 | kfree(lvb_buf); | ||
325 | |||
326 | *ppos = *ppos + writelen; | ||
327 | mlog(0, "wrote %zd bytes\n", writelen); | ||
328 | return writelen; | ||
329 | } | ||
330 | |||
331 | static void dlmfs_init_once(void *foo) | ||
332 | { | ||
333 | struct dlmfs_inode_private *ip = | ||
334 | (struct dlmfs_inode_private *) foo; | ||
335 | |||
336 | ip->ip_conn = NULL; | ||
337 | ip->ip_parent = NULL; | ||
338 | |||
339 | inode_init_once(&ip->ip_vfs_inode); | ||
340 | } | ||
341 | |||
342 | static struct inode *dlmfs_alloc_inode(struct super_block *sb) | ||
343 | { | ||
344 | struct dlmfs_inode_private *ip; | ||
345 | |||
346 | ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS); | ||
347 | if (!ip) | ||
348 | return NULL; | ||
349 | |||
350 | return &ip->ip_vfs_inode; | ||
351 | } | ||
352 | |||
353 | static void dlmfs_destroy_inode(struct inode *inode) | ||
354 | { | ||
355 | kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); | ||
356 | } | ||
357 | |||
358 | static void dlmfs_clear_inode(struct inode *inode) | ||
359 | { | ||
360 | int status; | ||
361 | struct dlmfs_inode_private *ip; | ||
362 | |||
363 | if (!inode) | ||
364 | return; | ||
365 | |||
366 | mlog(0, "inode %lu\n", inode->i_ino); | ||
367 | |||
368 | ip = DLMFS_I(inode); | ||
369 | |||
370 | if (S_ISREG(inode->i_mode)) { | ||
371 | status = user_dlm_destroy_lock(&ip->ip_lockres); | ||
372 | if (status < 0) | ||
373 | mlog_errno(status); | ||
374 | iput(ip->ip_parent); | ||
375 | goto clear_fields; | ||
376 | } | ||
377 | |||
378 | mlog(0, "we're a directory, ip->ip_conn = 0x%p\n", ip->ip_conn); | ||
379 | /* we must be a directory. If required, lets unregister the | ||
380 | * dlm context now. */ | ||
381 | if (ip->ip_conn) | ||
382 | user_dlm_unregister(ip->ip_conn); | ||
383 | clear_fields: | ||
384 | ip->ip_parent = NULL; | ||
385 | ip->ip_conn = NULL; | ||
386 | } | ||
387 | |||
388 | static struct backing_dev_info dlmfs_backing_dev_info = { | ||
389 | .name = "ocfs2-dlmfs", | ||
390 | .ra_pages = 0, /* No readahead */ | ||
391 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | ||
392 | }; | ||
393 | |||
394 | static struct inode *dlmfs_get_root_inode(struct super_block *sb) | ||
395 | { | ||
396 | struct inode *inode = new_inode(sb); | ||
397 | int mode = S_IFDIR | 0755; | ||
398 | struct dlmfs_inode_private *ip; | ||
399 | |||
400 | if (inode) { | ||
401 | ip = DLMFS_I(inode); | ||
402 | |||
403 | inode->i_mode = mode; | ||
404 | inode->i_uid = current_fsuid(); | ||
405 | inode->i_gid = current_fsgid(); | ||
406 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | ||
407 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
408 | inc_nlink(inode); | ||
409 | |||
410 | inode->i_fop = &simple_dir_operations; | ||
411 | inode->i_op = &dlmfs_root_inode_operations; | ||
412 | } | ||
413 | |||
414 | return inode; | ||
415 | } | ||
416 | |||
417 | static struct inode *dlmfs_get_inode(struct inode *parent, | ||
418 | struct dentry *dentry, | ||
419 | int mode) | ||
420 | { | ||
421 | struct super_block *sb = parent->i_sb; | ||
422 | struct inode * inode = new_inode(sb); | ||
423 | struct dlmfs_inode_private *ip; | ||
424 | |||
425 | if (!inode) | ||
426 | return NULL; | ||
427 | |||
428 | inode->i_mode = mode; | ||
429 | inode->i_uid = current_fsuid(); | ||
430 | inode->i_gid = current_fsgid(); | ||
431 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | ||
432 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
433 | |||
434 | ip = DLMFS_I(inode); | ||
435 | ip->ip_conn = DLMFS_I(parent)->ip_conn; | ||
436 | |||
437 | switch (mode & S_IFMT) { | ||
438 | default: | ||
439 | /* for now we don't support anything other than | ||
440 | * directories and regular files. */ | ||
441 | BUG(); | ||
442 | break; | ||
443 | case S_IFREG: | ||
444 | inode->i_op = &dlmfs_file_inode_operations; | ||
445 | inode->i_fop = &dlmfs_file_operations; | ||
446 | |||
447 | i_size_write(inode, DLM_LVB_LEN); | ||
448 | |||
449 | user_dlm_lock_res_init(&ip->ip_lockres, dentry); | ||
450 | |||
451 | /* released at clear_inode time, this insures that we | ||
452 | * get to drop the dlm reference on each lock *before* | ||
453 | * we call the unregister code for releasing parent | ||
454 | * directories. */ | ||
455 | ip->ip_parent = igrab(parent); | ||
456 | BUG_ON(!ip->ip_parent); | ||
457 | break; | ||
458 | case S_IFDIR: | ||
459 | inode->i_op = &dlmfs_dir_inode_operations; | ||
460 | inode->i_fop = &simple_dir_operations; | ||
461 | |||
462 | /* directory inodes start off with i_nlink == | ||
463 | * 2 (for "." entry) */ | ||
464 | inc_nlink(inode); | ||
465 | break; | ||
466 | } | ||
467 | |||
468 | if (parent->i_mode & S_ISGID) { | ||
469 | inode->i_gid = parent->i_gid; | ||
470 | if (S_ISDIR(mode)) | ||
471 | inode->i_mode |= S_ISGID; | ||
472 | } | ||
473 | |||
474 | return inode; | ||
475 | } | ||
476 | |||
477 | /* | ||
478 | * File creation. Allocate an inode, and we're done.. | ||
479 | */ | ||
480 | /* SMP-safe */ | ||
481 | static int dlmfs_mkdir(struct inode * dir, | ||
482 | struct dentry * dentry, | ||
483 | int mode) | ||
484 | { | ||
485 | int status; | ||
486 | struct inode *inode = NULL; | ||
487 | struct qstr *domain = &dentry->d_name; | ||
488 | struct dlmfs_inode_private *ip; | ||
489 | struct ocfs2_cluster_connection *conn; | ||
490 | |||
491 | mlog(0, "mkdir %.*s\n", domain->len, domain->name); | ||
492 | |||
493 | /* verify that we have a proper domain */ | ||
494 | if (domain->len >= GROUP_NAME_MAX) { | ||
495 | status = -EINVAL; | ||
496 | mlog(ML_ERROR, "invalid domain name for directory.\n"); | ||
497 | goto bail; | ||
498 | } | ||
499 | |||
500 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); | ||
501 | if (!inode) { | ||
502 | status = -ENOMEM; | ||
503 | mlog_errno(status); | ||
504 | goto bail; | ||
505 | } | ||
506 | |||
507 | ip = DLMFS_I(inode); | ||
508 | |||
509 | conn = user_dlm_register(domain); | ||
510 | if (IS_ERR(conn)) { | ||
511 | status = PTR_ERR(conn); | ||
512 | mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", | ||
513 | status, domain->len, domain->name); | ||
514 | goto bail; | ||
515 | } | ||
516 | ip->ip_conn = conn; | ||
517 | |||
518 | inc_nlink(dir); | ||
519 | d_instantiate(dentry, inode); | ||
520 | dget(dentry); /* Extra count - pin the dentry in core */ | ||
521 | |||
522 | status = 0; | ||
523 | bail: | ||
524 | if (status < 0) | ||
525 | iput(inode); | ||
526 | return status; | ||
527 | } | ||
528 | |||
529 | static int dlmfs_create(struct inode *dir, | ||
530 | struct dentry *dentry, | ||
531 | int mode, | ||
532 | struct nameidata *nd) | ||
533 | { | ||
534 | int status = 0; | ||
535 | struct inode *inode; | ||
536 | struct qstr *name = &dentry->d_name; | ||
537 | |||
538 | mlog(0, "create %.*s\n", name->len, name->name); | ||
539 | |||
540 | /* verify name is valid and doesn't contain any dlm reserved | ||
541 | * characters */ | ||
542 | if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || | ||
543 | name->name[0] == '$') { | ||
544 | status = -EINVAL; | ||
545 | mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, | ||
546 | name->name); | ||
547 | goto bail; | ||
548 | } | ||
549 | |||
550 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); | ||
551 | if (!inode) { | ||
552 | status = -ENOMEM; | ||
553 | mlog_errno(status); | ||
554 | goto bail; | ||
555 | } | ||
556 | |||
557 | d_instantiate(dentry, inode); | ||
558 | dget(dentry); /* Extra count - pin the dentry in core */ | ||
559 | bail: | ||
560 | return status; | ||
561 | } | ||
562 | |||
563 | static int dlmfs_unlink(struct inode *dir, | ||
564 | struct dentry *dentry) | ||
565 | { | ||
566 | int status; | ||
567 | struct inode *inode = dentry->d_inode; | ||
568 | |||
569 | mlog(0, "unlink inode %lu\n", inode->i_ino); | ||
570 | |||
571 | /* if there are no current holders, or none that are waiting | ||
572 | * to acquire a lock, this basically destroys our lockres. */ | ||
573 | status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); | ||
574 | if (status < 0) { | ||
575 | mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n", | ||
576 | dentry->d_name.len, dentry->d_name.name, status); | ||
577 | goto bail; | ||
578 | } | ||
579 | status = simple_unlink(dir, dentry); | ||
580 | bail: | ||
581 | return status; | ||
582 | } | ||
583 | |||
584 | static int dlmfs_fill_super(struct super_block * sb, | ||
585 | void * data, | ||
586 | int silent) | ||
587 | { | ||
588 | struct inode * inode; | ||
589 | struct dentry * root; | ||
590 | |||
591 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
592 | sb->s_blocksize = PAGE_CACHE_SIZE; | ||
593 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | ||
594 | sb->s_magic = DLMFS_MAGIC; | ||
595 | sb->s_op = &dlmfs_ops; | ||
596 | inode = dlmfs_get_root_inode(sb); | ||
597 | if (!inode) | ||
598 | return -ENOMEM; | ||
599 | |||
600 | root = d_alloc_root(inode); | ||
601 | if (!root) { | ||
602 | iput(inode); | ||
603 | return -ENOMEM; | ||
604 | } | ||
605 | sb->s_root = root; | ||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | static const struct file_operations dlmfs_file_operations = { | ||
610 | .open = dlmfs_file_open, | ||
611 | .release = dlmfs_file_release, | ||
612 | .poll = dlmfs_file_poll, | ||
613 | .read = dlmfs_file_read, | ||
614 | .write = dlmfs_file_write, | ||
615 | }; | ||
616 | |||
617 | static const struct inode_operations dlmfs_dir_inode_operations = { | ||
618 | .create = dlmfs_create, | ||
619 | .lookup = simple_lookup, | ||
620 | .unlink = dlmfs_unlink, | ||
621 | }; | ||
622 | |||
623 | /* this way we can restrict mkdir to only the toplevel of the fs. */ | ||
624 | static const struct inode_operations dlmfs_root_inode_operations = { | ||
625 | .lookup = simple_lookup, | ||
626 | .mkdir = dlmfs_mkdir, | ||
627 | .rmdir = simple_rmdir, | ||
628 | }; | ||
629 | |||
630 | static const struct super_operations dlmfs_ops = { | ||
631 | .statfs = simple_statfs, | ||
632 | .alloc_inode = dlmfs_alloc_inode, | ||
633 | .destroy_inode = dlmfs_destroy_inode, | ||
634 | .clear_inode = dlmfs_clear_inode, | ||
635 | .drop_inode = generic_delete_inode, | ||
636 | }; | ||
637 | |||
638 | static const struct inode_operations dlmfs_file_inode_operations = { | ||
639 | .getattr = simple_getattr, | ||
640 | .setattr = dlmfs_file_setattr, | ||
641 | }; | ||
642 | |||
643 | static int dlmfs_get_sb(struct file_system_type *fs_type, | ||
644 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | ||
645 | { | ||
646 | return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); | ||
647 | } | ||
648 | |||
649 | static struct file_system_type dlmfs_fs_type = { | ||
650 | .owner = THIS_MODULE, | ||
651 | .name = "ocfs2_dlmfs", | ||
652 | .get_sb = dlmfs_get_sb, | ||
653 | .kill_sb = kill_litter_super, | ||
654 | }; | ||
655 | |||
656 | static int __init init_dlmfs_fs(void) | ||
657 | { | ||
658 | int status; | ||
659 | int cleanup_inode = 0, cleanup_worker = 0; | ||
660 | |||
661 | dlmfs_print_version(); | ||
662 | |||
663 | status = bdi_init(&dlmfs_backing_dev_info); | ||
664 | if (status) | ||
665 | return status; | ||
666 | |||
667 | dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", | ||
668 | sizeof(struct dlmfs_inode_private), | ||
669 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | ||
670 | SLAB_MEM_SPREAD), | ||
671 | dlmfs_init_once); | ||
672 | if (!dlmfs_inode_cache) { | ||
673 | status = -ENOMEM; | ||
674 | goto bail; | ||
675 | } | ||
676 | cleanup_inode = 1; | ||
677 | |||
678 | user_dlm_worker = create_singlethread_workqueue("user_dlm"); | ||
679 | if (!user_dlm_worker) { | ||
680 | status = -ENOMEM; | ||
681 | goto bail; | ||
682 | } | ||
683 | cleanup_worker = 1; | ||
684 | |||
685 | user_dlm_set_locking_protocol(); | ||
686 | status = register_filesystem(&dlmfs_fs_type); | ||
687 | bail: | ||
688 | if (status) { | ||
689 | if (cleanup_inode) | ||
690 | kmem_cache_destroy(dlmfs_inode_cache); | ||
691 | if (cleanup_worker) | ||
692 | destroy_workqueue(user_dlm_worker); | ||
693 | bdi_destroy(&dlmfs_backing_dev_info); | ||
694 | } else | ||
695 | printk("OCFS2 User DLM kernel interface loaded\n"); | ||
696 | return status; | ||
697 | } | ||
698 | |||
699 | static void __exit exit_dlmfs_fs(void) | ||
700 | { | ||
701 | unregister_filesystem(&dlmfs_fs_type); | ||
702 | |||
703 | flush_workqueue(user_dlm_worker); | ||
704 | destroy_workqueue(user_dlm_worker); | ||
705 | |||
706 | kmem_cache_destroy(dlmfs_inode_cache); | ||
707 | |||
708 | bdi_destroy(&dlmfs_backing_dev_info); | ||
709 | } | ||
710 | |||
711 | MODULE_AUTHOR("Oracle"); | ||
712 | MODULE_LICENSE("GPL"); | ||
713 | |||
714 | module_init(init_dlmfs_fs) | ||
715 | module_exit(exit_dlmfs_fs) | ||
diff --git a/fs/ocfs2/dlmfs/dlmfsver.c b/fs/ocfs2/dlmfs/dlmfsver.c new file mode 100644 index 000000000000..a733b3321f83 --- /dev/null +++ b/fs/ocfs2/dlmfs/dlmfsver.c | |||
@@ -0,0 +1,42 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmfsver.c | ||
5 | * | ||
6 | * version string | ||
7 | * | ||
8 | * Copyright (C) 2002, 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/kernel.h> | ||
28 | |||
29 | #include "dlmfsver.h" | ||
30 | |||
31 | #define DLM_BUILD_VERSION "1.5.0" | ||
32 | |||
33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION | ||
34 | |||
35 | void dlmfs_print_version(void) | ||
36 | { | ||
37 | printk(KERN_INFO "%s\n", VERSION_STR); | ||
38 | } | ||
39 | |||
40 | MODULE_DESCRIPTION(VERSION_STR); | ||
41 | |||
42 | MODULE_VERSION(DLM_BUILD_VERSION); | ||
diff --git a/fs/ocfs2/dlmfs/dlmfsver.h b/fs/ocfs2/dlmfs/dlmfsver.h new file mode 100644 index 000000000000..f35eadbed25c --- /dev/null +++ b/fs/ocfs2/dlmfs/dlmfsver.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmver.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef DLMFS_VER_H | ||
27 | #define DLMFS_VER_H | ||
28 | |||
29 | void dlmfs_print_version(void); | ||
30 | |||
31 | #endif /* DLMFS_VER_H */ | ||
diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c new file mode 100644 index 000000000000..0499e3fb7bdb --- /dev/null +++ b/fs/ocfs2/dlmfs/userdlm.c | |||
@@ -0,0 +1,688 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * userdlm.c | ||
5 | * | ||
6 | * Code which implements the kernel side of a minimal userspace | ||
7 | * interface to our DLM. | ||
8 | * | ||
9 | * Many of the functions here are pared down versions of dlmglue.c | ||
10 | * functions. | ||
11 | * | ||
12 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public | ||
16 | * License as published by the Free Software Foundation; either | ||
17 | * version 2 of the License, or (at your option) any later version. | ||
18 | * | ||
19 | * This program is distributed in the hope that it will be useful, | ||
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
22 | * General Public License for more details. | ||
23 | * | ||
24 | * You should have received a copy of the GNU General Public | ||
25 | * License along with this program; if not, write to the | ||
26 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
27 | * Boston, MA 021110-1307, USA. | ||
28 | */ | ||
29 | |||
30 | #include <linux/signal.h> | ||
31 | |||
32 | #include <linux/module.h> | ||
33 | #include <linux/fs.h> | ||
34 | #include <linux/types.h> | ||
35 | #include <linux/crc32.h> | ||
36 | |||
37 | #include "ocfs2_lockingver.h" | ||
38 | #include "stackglue.h" | ||
39 | #include "userdlm.h" | ||
40 | |||
41 | #define MLOG_MASK_PREFIX ML_DLMFS | ||
42 | #include "cluster/masklog.h" | ||
43 | |||
44 | |||
45 | static inline struct user_lock_res *user_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) | ||
46 | { | ||
47 | return container_of(lksb, struct user_lock_res, l_lksb); | ||
48 | } | ||
49 | |||
50 | static inline int user_check_wait_flag(struct user_lock_res *lockres, | ||
51 | int flag) | ||
52 | { | ||
53 | int ret; | ||
54 | |||
55 | spin_lock(&lockres->l_lock); | ||
56 | ret = lockres->l_flags & flag; | ||
57 | spin_unlock(&lockres->l_lock); | ||
58 | |||
59 | return ret; | ||
60 | } | ||
61 | |||
62 | static inline void user_wait_on_busy_lock(struct user_lock_res *lockres) | ||
63 | |||
64 | { | ||
65 | wait_event(lockres->l_event, | ||
66 | !user_check_wait_flag(lockres, USER_LOCK_BUSY)); | ||
67 | } | ||
68 | |||
69 | static inline void user_wait_on_blocked_lock(struct user_lock_res *lockres) | ||
70 | |||
71 | { | ||
72 | wait_event(lockres->l_event, | ||
73 | !user_check_wait_flag(lockres, USER_LOCK_BLOCKED)); | ||
74 | } | ||
75 | |||
76 | /* I heart container_of... */ | ||
77 | static inline struct ocfs2_cluster_connection * | ||
78 | cluster_connection_from_user_lockres(struct user_lock_res *lockres) | ||
79 | { | ||
80 | struct dlmfs_inode_private *ip; | ||
81 | |||
82 | ip = container_of(lockres, | ||
83 | struct dlmfs_inode_private, | ||
84 | ip_lockres); | ||
85 | return ip->ip_conn; | ||
86 | } | ||
87 | |||
88 | static struct inode * | ||
89 | user_dlm_inode_from_user_lockres(struct user_lock_res *lockres) | ||
90 | { | ||
91 | struct dlmfs_inode_private *ip; | ||
92 | |||
93 | ip = container_of(lockres, | ||
94 | struct dlmfs_inode_private, | ||
95 | ip_lockres); | ||
96 | return &ip->ip_vfs_inode; | ||
97 | } | ||
98 | |||
99 | static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) | ||
100 | { | ||
101 | spin_lock(&lockres->l_lock); | ||
102 | lockres->l_flags &= ~USER_LOCK_BUSY; | ||
103 | spin_unlock(&lockres->l_lock); | ||
104 | } | ||
105 | |||
106 | #define user_log_dlm_error(_func, _stat, _lockres) do { \ | ||
107 | mlog(ML_ERROR, "Dlm error %d while calling %s on " \ | ||
108 | "resource %.*s\n", _stat, _func, \ | ||
109 | _lockres->l_namelen, _lockres->l_name); \ | ||
110 | } while (0) | ||
111 | |||
112 | /* WARNING: This function lives in a world where the only three lock | ||
113 | * levels are EX, PR, and NL. It *will* have to be adjusted when more | ||
114 | * lock types are added. */ | ||
115 | static inline int user_highest_compat_lock_level(int level) | ||
116 | { | ||
117 | int new_level = DLM_LOCK_EX; | ||
118 | |||
119 | if (level == DLM_LOCK_EX) | ||
120 | new_level = DLM_LOCK_NL; | ||
121 | else if (level == DLM_LOCK_PR) | ||
122 | new_level = DLM_LOCK_PR; | ||
123 | return new_level; | ||
124 | } | ||
125 | |||
126 | static void user_ast(struct ocfs2_dlm_lksb *lksb) | ||
127 | { | ||
128 | struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); | ||
129 | int status; | ||
130 | |||
131 | mlog(ML_BASTS, "AST fired for lockres %.*s, level %d => %d\n", | ||
132 | lockres->l_namelen, lockres->l_name, lockres->l_level, | ||
133 | lockres->l_requested); | ||
134 | |||
135 | spin_lock(&lockres->l_lock); | ||
136 | |||
137 | status = ocfs2_dlm_lock_status(&lockres->l_lksb); | ||
138 | if (status) { | ||
139 | mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", | ||
140 | status, lockres->l_namelen, lockres->l_name); | ||
141 | spin_unlock(&lockres->l_lock); | ||
142 | return; | ||
143 | } | ||
144 | |||
145 | mlog_bug_on_msg(lockres->l_requested == DLM_LOCK_IV, | ||
146 | "Lockres %.*s, requested ivmode. flags 0x%x\n", | ||
147 | lockres->l_namelen, lockres->l_name, lockres->l_flags); | ||
148 | |||
149 | /* we're downconverting. */ | ||
150 | if (lockres->l_requested < lockres->l_level) { | ||
151 | if (lockres->l_requested <= | ||
152 | user_highest_compat_lock_level(lockres->l_blocking)) { | ||
153 | lockres->l_blocking = DLM_LOCK_NL; | ||
154 | lockres->l_flags &= ~USER_LOCK_BLOCKED; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | lockres->l_level = lockres->l_requested; | ||
159 | lockres->l_requested = DLM_LOCK_IV; | ||
160 | lockres->l_flags |= USER_LOCK_ATTACHED; | ||
161 | lockres->l_flags &= ~USER_LOCK_BUSY; | ||
162 | |||
163 | spin_unlock(&lockres->l_lock); | ||
164 | |||
165 | wake_up(&lockres->l_event); | ||
166 | } | ||
167 | |||
168 | static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres) | ||
169 | { | ||
170 | struct inode *inode; | ||
171 | inode = user_dlm_inode_from_user_lockres(lockres); | ||
172 | if (!igrab(inode)) | ||
173 | BUG(); | ||
174 | } | ||
175 | |||
176 | static void user_dlm_unblock_lock(struct work_struct *work); | ||
177 | |||
178 | static void __user_dlm_queue_lockres(struct user_lock_res *lockres) | ||
179 | { | ||
180 | if (!(lockres->l_flags & USER_LOCK_QUEUED)) { | ||
181 | user_dlm_grab_inode_ref(lockres); | ||
182 | |||
183 | INIT_WORK(&lockres->l_work, user_dlm_unblock_lock); | ||
184 | |||
185 | queue_work(user_dlm_worker, &lockres->l_work); | ||
186 | lockres->l_flags |= USER_LOCK_QUEUED; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres) | ||
191 | { | ||
192 | int queue = 0; | ||
193 | |||
194 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) | ||
195 | return; | ||
196 | |||
197 | switch (lockres->l_blocking) { | ||
198 | case DLM_LOCK_EX: | ||
199 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | ||
200 | queue = 1; | ||
201 | break; | ||
202 | case DLM_LOCK_PR: | ||
203 | if (!lockres->l_ex_holders) | ||
204 | queue = 1; | ||
205 | break; | ||
206 | default: | ||
207 | BUG(); | ||
208 | } | ||
209 | |||
210 | if (queue) | ||
211 | __user_dlm_queue_lockres(lockres); | ||
212 | } | ||
213 | |||
214 | static void user_bast(struct ocfs2_dlm_lksb *lksb, int level) | ||
215 | { | ||
216 | struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); | ||
217 | |||
218 | mlog(ML_BASTS, "BAST fired for lockres %.*s, blocking %d, level %d\n", | ||
219 | lockres->l_namelen, lockres->l_name, level, lockres->l_level); | ||
220 | |||
221 | spin_lock(&lockres->l_lock); | ||
222 | lockres->l_flags |= USER_LOCK_BLOCKED; | ||
223 | if (level > lockres->l_blocking) | ||
224 | lockres->l_blocking = level; | ||
225 | |||
226 | __user_dlm_queue_lockres(lockres); | ||
227 | spin_unlock(&lockres->l_lock); | ||
228 | |||
229 | wake_up(&lockres->l_event); | ||
230 | } | ||
231 | |||
232 | static void user_unlock_ast(struct ocfs2_dlm_lksb *lksb, int status) | ||
233 | { | ||
234 | struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); | ||
235 | |||
236 | mlog(ML_BASTS, "UNLOCK AST fired for lockres %.*s, flags 0x%x\n", | ||
237 | lockres->l_namelen, lockres->l_name, lockres->l_flags); | ||
238 | |||
239 | if (status) | ||
240 | mlog(ML_ERROR, "dlm returns status %d\n", status); | ||
241 | |||
242 | spin_lock(&lockres->l_lock); | ||
243 | /* The teardown flag gets set early during the unlock process, | ||
244 | * so test the cancel flag to make sure that this ast isn't | ||
245 | * for a concurrent cancel. */ | ||
246 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN | ||
247 | && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { | ||
248 | lockres->l_level = DLM_LOCK_IV; | ||
249 | } else if (status == DLM_CANCELGRANT) { | ||
250 | /* We tried to cancel a convert request, but it was | ||
251 | * already granted. Don't clear the busy flag - the | ||
252 | * ast should've done this already. */ | ||
253 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); | ||
254 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | ||
255 | goto out_noclear; | ||
256 | } else { | ||
257 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); | ||
258 | /* Cancel succeeded, we want to re-queue */ | ||
259 | lockres->l_requested = DLM_LOCK_IV; /* cancel an | ||
260 | * upconvert | ||
261 | * request. */ | ||
262 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | ||
263 | /* we want the unblock thread to look at it again | ||
264 | * now. */ | ||
265 | if (lockres->l_flags & USER_LOCK_BLOCKED) | ||
266 | __user_dlm_queue_lockres(lockres); | ||
267 | } | ||
268 | |||
269 | lockres->l_flags &= ~USER_LOCK_BUSY; | ||
270 | out_noclear: | ||
271 | spin_unlock(&lockres->l_lock); | ||
272 | |||
273 | wake_up(&lockres->l_event); | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * This is the userdlmfs locking protocol version. | ||
278 | * | ||
279 | * See fs/ocfs2/dlmglue.c for more details on locking versions. | ||
280 | */ | ||
281 | static struct ocfs2_locking_protocol user_dlm_lproto = { | ||
282 | .lp_max_version = { | ||
283 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
284 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
285 | }, | ||
286 | .lp_lock_ast = user_ast, | ||
287 | .lp_blocking_ast = user_bast, | ||
288 | .lp_unlock_ast = user_unlock_ast, | ||
289 | }; | ||
290 | |||
291 | static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres) | ||
292 | { | ||
293 | struct inode *inode; | ||
294 | inode = user_dlm_inode_from_user_lockres(lockres); | ||
295 | iput(inode); | ||
296 | } | ||
297 | |||
298 | static void user_dlm_unblock_lock(struct work_struct *work) | ||
299 | { | ||
300 | int new_level, status; | ||
301 | struct user_lock_res *lockres = | ||
302 | container_of(work, struct user_lock_res, l_work); | ||
303 | struct ocfs2_cluster_connection *conn = | ||
304 | cluster_connection_from_user_lockres(lockres); | ||
305 | |||
306 | mlog(0, "lockres %.*s\n", lockres->l_namelen, lockres->l_name); | ||
307 | |||
308 | spin_lock(&lockres->l_lock); | ||
309 | |||
310 | mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), | ||
311 | "Lockres %.*s, flags 0x%x\n", | ||
312 | lockres->l_namelen, lockres->l_name, lockres->l_flags); | ||
313 | |||
314 | /* notice that we don't clear USER_LOCK_BLOCKED here. If it's | ||
315 | * set, we want user_ast clear it. */ | ||
316 | lockres->l_flags &= ~USER_LOCK_QUEUED; | ||
317 | |||
318 | /* It's valid to get here and no longer be blocked - if we get | ||
319 | * several basts in a row, we might be queued by the first | ||
320 | * one, the unblock thread might run and clear the queued | ||
321 | * flag, and finally we might get another bast which re-queues | ||
322 | * us before our ast for the downconvert is called. */ | ||
323 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { | ||
324 | mlog(ML_BASTS, "lockres %.*s USER_LOCK_BLOCKED\n", | ||
325 | lockres->l_namelen, lockres->l_name); | ||
326 | spin_unlock(&lockres->l_lock); | ||
327 | goto drop_ref; | ||
328 | } | ||
329 | |||
330 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | ||
331 | mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_TEARDOWN\n", | ||
332 | lockres->l_namelen, lockres->l_name); | ||
333 | spin_unlock(&lockres->l_lock); | ||
334 | goto drop_ref; | ||
335 | } | ||
336 | |||
337 | if (lockres->l_flags & USER_LOCK_BUSY) { | ||
338 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { | ||
339 | mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_CANCEL\n", | ||
340 | lockres->l_namelen, lockres->l_name); | ||
341 | spin_unlock(&lockres->l_lock); | ||
342 | goto drop_ref; | ||
343 | } | ||
344 | |||
345 | lockres->l_flags |= USER_LOCK_IN_CANCEL; | ||
346 | spin_unlock(&lockres->l_lock); | ||
347 | |||
348 | status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, | ||
349 | DLM_LKF_CANCEL); | ||
350 | if (status) | ||
351 | user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); | ||
352 | goto drop_ref; | ||
353 | } | ||
354 | |||
355 | /* If there are still incompat holders, we can exit safely | ||
356 | * without worrying about re-queueing this lock as that will | ||
357 | * happen on the last call to user_cluster_unlock. */ | ||
358 | if ((lockres->l_blocking == DLM_LOCK_EX) | ||
359 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { | ||
360 | spin_unlock(&lockres->l_lock); | ||
361 | mlog(ML_BASTS, "lockres %.*s, EX/PR Holders %u,%u\n", | ||
362 | lockres->l_namelen, lockres->l_name, | ||
363 | lockres->l_ex_holders, lockres->l_ro_holders); | ||
364 | goto drop_ref; | ||
365 | } | ||
366 | |||
367 | if ((lockres->l_blocking == DLM_LOCK_PR) | ||
368 | && lockres->l_ex_holders) { | ||
369 | spin_unlock(&lockres->l_lock); | ||
370 | mlog(ML_BASTS, "lockres %.*s, EX Holders %u\n", | ||
371 | lockres->l_namelen, lockres->l_name, | ||
372 | lockres->l_ex_holders); | ||
373 | goto drop_ref; | ||
374 | } | ||
375 | |||
376 | /* yay, we can downconvert now. */ | ||
377 | new_level = user_highest_compat_lock_level(lockres->l_blocking); | ||
378 | lockres->l_requested = new_level; | ||
379 | lockres->l_flags |= USER_LOCK_BUSY; | ||
380 | mlog(ML_BASTS, "lockres %.*s, downconvert %d => %d\n", | ||
381 | lockres->l_namelen, lockres->l_name, lockres->l_level, new_level); | ||
382 | spin_unlock(&lockres->l_lock); | ||
383 | |||
384 | /* need lock downconvert request now... */ | ||
385 | status = ocfs2_dlm_lock(conn, new_level, &lockres->l_lksb, | ||
386 | DLM_LKF_CONVERT|DLM_LKF_VALBLK, | ||
387 | lockres->l_name, | ||
388 | lockres->l_namelen); | ||
389 | if (status) { | ||
390 | user_log_dlm_error("ocfs2_dlm_lock", status, lockres); | ||
391 | user_recover_from_dlm_error(lockres); | ||
392 | } | ||
393 | |||
394 | drop_ref: | ||
395 | user_dlm_drop_inode_ref(lockres); | ||
396 | } | ||
397 | |||
398 | static inline void user_dlm_inc_holders(struct user_lock_res *lockres, | ||
399 | int level) | ||
400 | { | ||
401 | switch(level) { | ||
402 | case DLM_LOCK_EX: | ||
403 | lockres->l_ex_holders++; | ||
404 | break; | ||
405 | case DLM_LOCK_PR: | ||
406 | lockres->l_ro_holders++; | ||
407 | break; | ||
408 | default: | ||
409 | BUG(); | ||
410 | } | ||
411 | } | ||
412 | |||
413 | /* predict what lock level we'll be dropping down to on behalf | ||
414 | * of another node, and return true if the currently wanted | ||
415 | * level will be compatible with it. */ | ||
416 | static inline int | ||
417 | user_may_continue_on_blocked_lock(struct user_lock_res *lockres, | ||
418 | int wanted) | ||
419 | { | ||
420 | BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); | ||
421 | |||
422 | return wanted <= user_highest_compat_lock_level(lockres->l_blocking); | ||
423 | } | ||
424 | |||
425 | int user_dlm_cluster_lock(struct user_lock_res *lockres, | ||
426 | int level, | ||
427 | int lkm_flags) | ||
428 | { | ||
429 | int status, local_flags; | ||
430 | struct ocfs2_cluster_connection *conn = | ||
431 | cluster_connection_from_user_lockres(lockres); | ||
432 | |||
433 | if (level != DLM_LOCK_EX && | ||
434 | level != DLM_LOCK_PR) { | ||
435 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", | ||
436 | lockres->l_namelen, lockres->l_name); | ||
437 | status = -EINVAL; | ||
438 | goto bail; | ||
439 | } | ||
440 | |||
441 | mlog(ML_BASTS, "lockres %.*s, level %d, flags = 0x%x\n", | ||
442 | lockres->l_namelen, lockres->l_name, level, lkm_flags); | ||
443 | |||
444 | again: | ||
445 | if (signal_pending(current)) { | ||
446 | status = -ERESTARTSYS; | ||
447 | goto bail; | ||
448 | } | ||
449 | |||
450 | spin_lock(&lockres->l_lock); | ||
451 | |||
452 | /* We only compare against the currently granted level | ||
453 | * here. If the lock is blocked waiting on a downconvert, | ||
454 | * we'll get caught below. */ | ||
455 | if ((lockres->l_flags & USER_LOCK_BUSY) && | ||
456 | (level > lockres->l_level)) { | ||
457 | /* is someone sitting in dlm_lock? If so, wait on | ||
458 | * them. */ | ||
459 | spin_unlock(&lockres->l_lock); | ||
460 | |||
461 | user_wait_on_busy_lock(lockres); | ||
462 | goto again; | ||
463 | } | ||
464 | |||
465 | if ((lockres->l_flags & USER_LOCK_BLOCKED) && | ||
466 | (!user_may_continue_on_blocked_lock(lockres, level))) { | ||
467 | /* is the lock is currently blocked on behalf of | ||
468 | * another node */ | ||
469 | spin_unlock(&lockres->l_lock); | ||
470 | |||
471 | user_wait_on_blocked_lock(lockres); | ||
472 | goto again; | ||
473 | } | ||
474 | |||
475 | if (level > lockres->l_level) { | ||
476 | local_flags = lkm_flags | DLM_LKF_VALBLK; | ||
477 | if (lockres->l_level != DLM_LOCK_IV) | ||
478 | local_flags |= DLM_LKF_CONVERT; | ||
479 | |||
480 | lockres->l_requested = level; | ||
481 | lockres->l_flags |= USER_LOCK_BUSY; | ||
482 | spin_unlock(&lockres->l_lock); | ||
483 | |||
484 | BUG_ON(level == DLM_LOCK_IV); | ||
485 | BUG_ON(level == DLM_LOCK_NL); | ||
486 | |||
487 | /* call dlm_lock to upgrade lock now */ | ||
488 | status = ocfs2_dlm_lock(conn, level, &lockres->l_lksb, | ||
489 | local_flags, lockres->l_name, | ||
490 | lockres->l_namelen); | ||
491 | if (status) { | ||
492 | if ((lkm_flags & DLM_LKF_NOQUEUE) && | ||
493 | (status != -EAGAIN)) | ||
494 | user_log_dlm_error("ocfs2_dlm_lock", | ||
495 | status, lockres); | ||
496 | user_recover_from_dlm_error(lockres); | ||
497 | goto bail; | ||
498 | } | ||
499 | |||
500 | user_wait_on_busy_lock(lockres); | ||
501 | goto again; | ||
502 | } | ||
503 | |||
504 | user_dlm_inc_holders(lockres, level); | ||
505 | spin_unlock(&lockres->l_lock); | ||
506 | |||
507 | status = 0; | ||
508 | bail: | ||
509 | return status; | ||
510 | } | ||
511 | |||
512 | static inline void user_dlm_dec_holders(struct user_lock_res *lockres, | ||
513 | int level) | ||
514 | { | ||
515 | switch(level) { | ||
516 | case DLM_LOCK_EX: | ||
517 | BUG_ON(!lockres->l_ex_holders); | ||
518 | lockres->l_ex_holders--; | ||
519 | break; | ||
520 | case DLM_LOCK_PR: | ||
521 | BUG_ON(!lockres->l_ro_holders); | ||
522 | lockres->l_ro_holders--; | ||
523 | break; | ||
524 | default: | ||
525 | BUG(); | ||
526 | } | ||
527 | } | ||
528 | |||
529 | void user_dlm_cluster_unlock(struct user_lock_res *lockres, | ||
530 | int level) | ||
531 | { | ||
532 | if (level != DLM_LOCK_EX && | ||
533 | level != DLM_LOCK_PR) { | ||
534 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", | ||
535 | lockres->l_namelen, lockres->l_name); | ||
536 | return; | ||
537 | } | ||
538 | |||
539 | spin_lock(&lockres->l_lock); | ||
540 | user_dlm_dec_holders(lockres, level); | ||
541 | __user_dlm_cond_queue_lockres(lockres); | ||
542 | spin_unlock(&lockres->l_lock); | ||
543 | } | ||
544 | |||
545 | void user_dlm_write_lvb(struct inode *inode, | ||
546 | const char *val, | ||
547 | unsigned int len) | ||
548 | { | ||
549 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; | ||
550 | char *lvb; | ||
551 | |||
552 | BUG_ON(len > DLM_LVB_LEN); | ||
553 | |||
554 | spin_lock(&lockres->l_lock); | ||
555 | |||
556 | BUG_ON(lockres->l_level < DLM_LOCK_EX); | ||
557 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | ||
558 | memcpy(lvb, val, len); | ||
559 | |||
560 | spin_unlock(&lockres->l_lock); | ||
561 | } | ||
562 | |||
563 | ssize_t user_dlm_read_lvb(struct inode *inode, | ||
564 | char *val, | ||
565 | unsigned int len) | ||
566 | { | ||
567 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; | ||
568 | char *lvb; | ||
569 | ssize_t ret = len; | ||
570 | |||
571 | BUG_ON(len > DLM_LVB_LEN); | ||
572 | |||
573 | spin_lock(&lockres->l_lock); | ||
574 | |||
575 | BUG_ON(lockres->l_level < DLM_LOCK_PR); | ||
576 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)) { | ||
577 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | ||
578 | memcpy(val, lvb, len); | ||
579 | } else | ||
580 | ret = 0; | ||
581 | |||
582 | spin_unlock(&lockres->l_lock); | ||
583 | return ret; | ||
584 | } | ||
585 | |||
586 | void user_dlm_lock_res_init(struct user_lock_res *lockres, | ||
587 | struct dentry *dentry) | ||
588 | { | ||
589 | memset(lockres, 0, sizeof(*lockres)); | ||
590 | |||
591 | spin_lock_init(&lockres->l_lock); | ||
592 | init_waitqueue_head(&lockres->l_event); | ||
593 | lockres->l_level = DLM_LOCK_IV; | ||
594 | lockres->l_requested = DLM_LOCK_IV; | ||
595 | lockres->l_blocking = DLM_LOCK_IV; | ||
596 | |||
597 | /* should have been checked before getting here. */ | ||
598 | BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN); | ||
599 | |||
600 | memcpy(lockres->l_name, | ||
601 | dentry->d_name.name, | ||
602 | dentry->d_name.len); | ||
603 | lockres->l_namelen = dentry->d_name.len; | ||
604 | } | ||
605 | |||
606 | int user_dlm_destroy_lock(struct user_lock_res *lockres) | ||
607 | { | ||
608 | int status = -EBUSY; | ||
609 | struct ocfs2_cluster_connection *conn = | ||
610 | cluster_connection_from_user_lockres(lockres); | ||
611 | |||
612 | mlog(ML_BASTS, "lockres %.*s\n", lockres->l_namelen, lockres->l_name); | ||
613 | |||
614 | spin_lock(&lockres->l_lock); | ||
615 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | ||
616 | spin_unlock(&lockres->l_lock); | ||
617 | return 0; | ||
618 | } | ||
619 | |||
620 | lockres->l_flags |= USER_LOCK_IN_TEARDOWN; | ||
621 | |||
622 | while (lockres->l_flags & USER_LOCK_BUSY) { | ||
623 | spin_unlock(&lockres->l_lock); | ||
624 | |||
625 | user_wait_on_busy_lock(lockres); | ||
626 | |||
627 | spin_lock(&lockres->l_lock); | ||
628 | } | ||
629 | |||
630 | if (lockres->l_ro_holders || lockres->l_ex_holders) { | ||
631 | spin_unlock(&lockres->l_lock); | ||
632 | goto bail; | ||
633 | } | ||
634 | |||
635 | status = 0; | ||
636 | if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { | ||
637 | spin_unlock(&lockres->l_lock); | ||
638 | goto bail; | ||
639 | } | ||
640 | |||
641 | lockres->l_flags &= ~USER_LOCK_ATTACHED; | ||
642 | lockres->l_flags |= USER_LOCK_BUSY; | ||
643 | spin_unlock(&lockres->l_lock); | ||
644 | |||
645 | status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK); | ||
646 | if (status) { | ||
647 | user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); | ||
648 | goto bail; | ||
649 | } | ||
650 | |||
651 | user_wait_on_busy_lock(lockres); | ||
652 | |||
653 | status = 0; | ||
654 | bail: | ||
655 | return status; | ||
656 | } | ||
657 | |||
658 | static void user_dlm_recovery_handler_noop(int node_num, | ||
659 | void *recovery_data) | ||
660 | { | ||
661 | /* We ignore recovery events */ | ||
662 | return; | ||
663 | } | ||
664 | |||
665 | void user_dlm_set_locking_protocol(void) | ||
666 | { | ||
667 | ocfs2_stack_glue_set_max_proto_version(&user_dlm_lproto.lp_max_version); | ||
668 | } | ||
669 | |||
670 | struct ocfs2_cluster_connection *user_dlm_register(struct qstr *name) | ||
671 | { | ||
672 | int rc; | ||
673 | struct ocfs2_cluster_connection *conn; | ||
674 | |||
675 | rc = ocfs2_cluster_connect_agnostic(name->name, name->len, | ||
676 | &user_dlm_lproto, | ||
677 | user_dlm_recovery_handler_noop, | ||
678 | NULL, &conn); | ||
679 | if (rc) | ||
680 | mlog_errno(rc); | ||
681 | |||
682 | return rc ? ERR_PTR(rc) : conn; | ||
683 | } | ||
684 | |||
685 | void user_dlm_unregister(struct ocfs2_cluster_connection *conn) | ||
686 | { | ||
687 | ocfs2_cluster_disconnect(conn, 0); | ||
688 | } | ||
diff --git a/fs/ocfs2/dlmfs/userdlm.h b/fs/ocfs2/dlmfs/userdlm.h new file mode 100644 index 000000000000..3b42d79531d7 --- /dev/null +++ b/fs/ocfs2/dlmfs/userdlm.h | |||
@@ -0,0 +1,113 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * userdlm.h | ||
5 | * | ||
6 | * Userspace dlm defines | ||
7 | * | ||
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | |||
27 | #ifndef USERDLM_H | ||
28 | #define USERDLM_H | ||
29 | |||
30 | #include <linux/module.h> | ||
31 | #include <linux/fs.h> | ||
32 | #include <linux/types.h> | ||
33 | #include <linux/workqueue.h> | ||
34 | |||
35 | /* user_lock_res->l_flags flags. */ | ||
36 | #define USER_LOCK_ATTACHED (0x00000001) /* we have initialized | ||
37 | * the lvb */ | ||
38 | #define USER_LOCK_BUSY (0x00000002) /* we are currently in | ||
39 | * dlm_lock */ | ||
40 | #define USER_LOCK_BLOCKED (0x00000004) /* blocked waiting to | ||
41 | * downconvert*/ | ||
42 | #define USER_LOCK_IN_TEARDOWN (0x00000008) /* we're currently | ||
43 | * destroying this | ||
44 | * lock. */ | ||
45 | #define USER_LOCK_QUEUED (0x00000010) /* lock is on the | ||
46 | * workqueue */ | ||
47 | #define USER_LOCK_IN_CANCEL (0x00000020) | ||
48 | |||
49 | struct user_lock_res { | ||
50 | spinlock_t l_lock; | ||
51 | |||
52 | int l_flags; | ||
53 | |||
54 | #define USER_DLM_LOCK_ID_MAX_LEN 32 | ||
55 | char l_name[USER_DLM_LOCK_ID_MAX_LEN]; | ||
56 | int l_namelen; | ||
57 | int l_level; | ||
58 | unsigned int l_ro_holders; | ||
59 | unsigned int l_ex_holders; | ||
60 | struct ocfs2_dlm_lksb l_lksb; | ||
61 | |||
62 | int l_requested; | ||
63 | int l_blocking; | ||
64 | |||
65 | wait_queue_head_t l_event; | ||
66 | |||
67 | struct work_struct l_work; | ||
68 | }; | ||
69 | |||
70 | extern struct workqueue_struct *user_dlm_worker; | ||
71 | |||
72 | void user_dlm_lock_res_init(struct user_lock_res *lockres, | ||
73 | struct dentry *dentry); | ||
74 | int user_dlm_destroy_lock(struct user_lock_res *lockres); | ||
75 | int user_dlm_cluster_lock(struct user_lock_res *lockres, | ||
76 | int level, | ||
77 | int lkm_flags); | ||
78 | void user_dlm_cluster_unlock(struct user_lock_res *lockres, | ||
79 | int level); | ||
80 | void user_dlm_write_lvb(struct inode *inode, | ||
81 | const char *val, | ||
82 | unsigned int len); | ||
83 | ssize_t user_dlm_read_lvb(struct inode *inode, | ||
84 | char *val, | ||
85 | unsigned int len); | ||
86 | struct ocfs2_cluster_connection *user_dlm_register(struct qstr *name); | ||
87 | void user_dlm_unregister(struct ocfs2_cluster_connection *conn); | ||
88 | void user_dlm_set_locking_protocol(void); | ||
89 | |||
90 | struct dlmfs_inode_private { | ||
91 | struct ocfs2_cluster_connection *ip_conn; | ||
92 | |||
93 | struct user_lock_res ip_lockres; /* unused for directories. */ | ||
94 | struct inode *ip_parent; | ||
95 | |||
96 | struct inode ip_vfs_inode; | ||
97 | }; | ||
98 | |||
99 | static inline struct dlmfs_inode_private * | ||
100 | DLMFS_I(struct inode *inode) | ||
101 | { | ||
102 | return container_of(inode, | ||
103 | struct dlmfs_inode_private, | ||
104 | ip_vfs_inode); | ||
105 | } | ||
106 | |||
107 | struct dlmfs_filp_private { | ||
108 | int fp_lock_level; | ||
109 | }; | ||
110 | |||
111 | #define DLMFS_MAGIC 0x76a9f425 | ||
112 | |||
113 | #endif /* USERDLM_H */ | ||