diff options
Diffstat (limited to 'fs/dlm/device.c')
-rw-r--r-- | fs/dlm/device.c | 1084 |
1 files changed, 1084 insertions, 0 deletions
diff --git a/fs/dlm/device.c b/fs/dlm/device.c new file mode 100644 index 000000000000..a8bf600ed13d --- /dev/null +++ b/fs/dlm/device.c | |||
@@ -0,0 +1,1084 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | /* | ||
15 | * device.c | ||
16 | * | ||
17 | * This is the userland interface to the DLM. | ||
18 | * | ||
19 | * The locking is done via a misc char device (find the | ||
20 | * registered minor number in /proc/misc). | ||
21 | * | ||
22 | * User code should not use this interface directly but | ||
23 | * call the library routines in libdlm.a instead. | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/miscdevice.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/wait.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/file.h> | ||
32 | #include <linux/fs.h> | ||
33 | #include <linux/poll.h> | ||
34 | #include <linux/signal.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/idr.h> | ||
37 | |||
38 | #include <linux/dlm.h> | ||
39 | #include <linux/dlm_device.h> | ||
40 | |||
41 | #include "lvb_table.h" | ||
42 | |||
43 | static struct file_operations _dlm_fops; | ||
44 | static const char *name_prefix="dlm"; | ||
45 | static struct list_head user_ls_list; | ||
46 | static struct semaphore user_ls_lock; | ||
47 | |||
48 | /* Lock infos are stored in here indexed by lock ID */ | ||
49 | static DEFINE_IDR(lockinfo_idr); | ||
50 | static rwlock_t lockinfo_lock; | ||
51 | |||
52 | /* Flags in li_flags */ | ||
53 | #define LI_FLAG_COMPLETE 1 | ||
54 | #define LI_FLAG_FIRSTLOCK 2 | ||
55 | #define LI_FLAG_PERSISTENT 3 | ||
56 | |||
57 | /* flags in ls_flags*/ | ||
58 | #define LS_FLAG_DELETED 1 | ||
59 | #define LS_FLAG_AUTOFREE 2 | ||
60 | |||
61 | |||
62 | #define LOCKINFO_MAGIC 0x53595324 | ||
63 | |||
64 | struct lock_info { | ||
65 | uint32_t li_magic; | ||
66 | uint8_t li_cmd; | ||
67 | int8_t li_grmode; | ||
68 | int8_t li_rqmode; | ||
69 | struct dlm_lksb li_lksb; | ||
70 | wait_queue_head_t li_waitq; | ||
71 | unsigned long li_flags; | ||
72 | void __user *li_castparam; | ||
73 | void __user *li_castaddr; | ||
74 | void __user *li_bastparam; | ||
75 | void __user *li_bastaddr; | ||
76 | void __user *li_pend_bastparam; | ||
77 | void __user *li_pend_bastaddr; | ||
78 | struct list_head li_ownerqueue; | ||
79 | struct file_info *li_file; | ||
80 | struct dlm_lksb __user *li_user_lksb; | ||
81 | struct semaphore li_firstlock; | ||
82 | }; | ||
83 | |||
84 | /* A queued AST no less */ | ||
85 | struct ast_info { | ||
86 | struct dlm_lock_result result; | ||
87 | struct list_head list; | ||
88 | uint32_t lvb_updated; | ||
89 | uint32_t progress; /* How much has been read */ | ||
90 | }; | ||
91 | |||
92 | /* One of these per userland lockspace */ | ||
93 | struct user_ls { | ||
94 | void *ls_lockspace; | ||
95 | atomic_t ls_refcnt; | ||
96 | long ls_flags; | ||
97 | |||
98 | /* Passed into misc_register() */ | ||
99 | struct miscdevice ls_miscinfo; | ||
100 | struct list_head ls_list; | ||
101 | }; | ||
102 | |||
103 | /* misc_device info for the control device */ | ||
104 | static struct miscdevice ctl_device; | ||
105 | |||
106 | /* | ||
107 | * Stuff we hang off the file struct. | ||
108 | * The first two are to cope with unlocking all the | ||
109 | * locks help by a process when it dies. | ||
110 | */ | ||
111 | struct file_info { | ||
112 | struct list_head fi_li_list; /* List of active lock_infos */ | ||
113 | spinlock_t fi_li_lock; | ||
114 | struct list_head fi_ast_list; /* Queue of ASTs to be delivered */ | ||
115 | spinlock_t fi_ast_lock; | ||
116 | wait_queue_head_t fi_wait; | ||
117 | struct user_ls *fi_ls; | ||
118 | atomic_t fi_refcnt; /* Number of users */ | ||
119 | unsigned long fi_flags; /* Bit 1 means the device is open */ | ||
120 | }; | ||
121 | |||
122 | |||
123 | /* get and put ops for file_info. | ||
124 | Actually I don't really like "get" and "put", but everyone | ||
125 | else seems to use them and I can't think of anything | ||
126 | nicer at the moment */ | ||
127 | static void get_file_info(struct file_info *f) | ||
128 | { | ||
129 | atomic_inc(&f->fi_refcnt); | ||
130 | } | ||
131 | |||
132 | static void put_file_info(struct file_info *f) | ||
133 | { | ||
134 | if (atomic_dec_and_test(&f->fi_refcnt)) | ||
135 | kfree(f); | ||
136 | } | ||
137 | |||
138 | static void release_lockinfo(struct lock_info *li) | ||
139 | { | ||
140 | put_file_info(li->li_file); | ||
141 | |||
142 | write_lock(&lockinfo_lock); | ||
143 | idr_remove(&lockinfo_idr, li->li_lksb.sb_lkid); | ||
144 | write_unlock(&lockinfo_lock); | ||
145 | |||
146 | if (li->li_lksb.sb_lvbptr) | ||
147 | kfree(li->li_lksb.sb_lvbptr); | ||
148 | kfree(li); | ||
149 | |||
150 | module_put(THIS_MODULE); | ||
151 | } | ||
152 | |||
153 | static struct lock_info *get_lockinfo(uint32_t lockid) | ||
154 | { | ||
155 | struct lock_info *li; | ||
156 | |||
157 | read_lock(&lockinfo_lock); | ||
158 | li = idr_find(&lockinfo_idr, lockid); | ||
159 | read_unlock(&lockinfo_lock); | ||
160 | |||
161 | return li; | ||
162 | } | ||
163 | |||
164 | static int add_lockinfo(struct lock_info *li) | ||
165 | { | ||
166 | int n; | ||
167 | int r; | ||
168 | int ret = -EINVAL; | ||
169 | |||
170 | write_lock(&lockinfo_lock); | ||
171 | |||
172 | if (idr_find(&lockinfo_idr, li->li_lksb.sb_lkid)) | ||
173 | goto out_up; | ||
174 | |||
175 | ret = -ENOMEM; | ||
176 | r = idr_pre_get(&lockinfo_idr, GFP_KERNEL); | ||
177 | if (!r) | ||
178 | goto out_up; | ||
179 | |||
180 | r = idr_get_new_above(&lockinfo_idr, li, li->li_lksb.sb_lkid, &n); | ||
181 | if (r) | ||
182 | goto out_up; | ||
183 | |||
184 | if (n != li->li_lksb.sb_lkid) { | ||
185 | idr_remove(&lockinfo_idr, n); | ||
186 | goto out_up; | ||
187 | } | ||
188 | |||
189 | ret = 0; | ||
190 | |||
191 | out_up: | ||
192 | write_unlock(&lockinfo_lock); | ||
193 | |||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | |||
198 | static struct user_ls *__find_lockspace(int minor) | ||
199 | { | ||
200 | struct user_ls *lsinfo; | ||
201 | |||
202 | list_for_each_entry(lsinfo, &user_ls_list, ls_list) { | ||
203 | if (lsinfo->ls_miscinfo.minor == minor) | ||
204 | return lsinfo; | ||
205 | } | ||
206 | return NULL; | ||
207 | } | ||
208 | |||
209 | /* Find a lockspace struct given the device minor number */ | ||
210 | static struct user_ls *find_lockspace(int minor) | ||
211 | { | ||
212 | struct user_ls *lsinfo; | ||
213 | |||
214 | down(&user_ls_lock); | ||
215 | lsinfo = __find_lockspace(minor); | ||
216 | up(&user_ls_lock); | ||
217 | |||
218 | return lsinfo; | ||
219 | } | ||
220 | |||
221 | static void add_lockspace_to_list(struct user_ls *lsinfo) | ||
222 | { | ||
223 | down(&user_ls_lock); | ||
224 | list_add(&lsinfo->ls_list, &user_ls_list); | ||
225 | up(&user_ls_lock); | ||
226 | } | ||
227 | |||
228 | /* Register a lockspace with the DLM and create a misc | ||
229 | device for userland to access it */ | ||
230 | static int register_lockspace(char *name, struct user_ls **ls, int flags) | ||
231 | { | ||
232 | struct user_ls *newls; | ||
233 | int status; | ||
234 | int namelen; | ||
235 | |||
236 | namelen = strlen(name)+strlen(name_prefix)+2; | ||
237 | |||
238 | newls = kmalloc(sizeof(struct user_ls), GFP_KERNEL); | ||
239 | if (!newls) | ||
240 | return -ENOMEM; | ||
241 | memset(newls, 0, sizeof(struct user_ls)); | ||
242 | |||
243 | newls->ls_miscinfo.name = kmalloc(namelen, GFP_KERNEL); | ||
244 | if (!newls->ls_miscinfo.name) { | ||
245 | kfree(newls); | ||
246 | return -ENOMEM; | ||
247 | } | ||
248 | |||
249 | status = dlm_new_lockspace(name, strlen(name), &newls->ls_lockspace, 0, | ||
250 | DLM_USER_LVB_LEN); | ||
251 | if (status != 0) { | ||
252 | kfree(newls->ls_miscinfo.name); | ||
253 | kfree(newls); | ||
254 | return status; | ||
255 | } | ||
256 | |||
257 | snprintf((char*)newls->ls_miscinfo.name, namelen, "%s_%s", | ||
258 | name_prefix, name); | ||
259 | |||
260 | newls->ls_miscinfo.fops = &_dlm_fops; | ||
261 | newls->ls_miscinfo.minor = MISC_DYNAMIC_MINOR; | ||
262 | |||
263 | status = misc_register(&newls->ls_miscinfo); | ||
264 | if (status) { | ||
265 | printk(KERN_ERR "dlm: misc register failed for %s\n", name); | ||
266 | dlm_release_lockspace(newls->ls_lockspace, 0); | ||
267 | kfree(newls->ls_miscinfo.name); | ||
268 | kfree(newls); | ||
269 | return status; | ||
270 | } | ||
271 | |||
272 | if (flags & DLM_USER_LSFLG_AUTOFREE) | ||
273 | set_bit(LS_FLAG_AUTOFREE, &newls->ls_flags); | ||
274 | |||
275 | add_lockspace_to_list(newls); | ||
276 | *ls = newls; | ||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | /* Called with the user_ls_lock semaphore held */ | ||
281 | static int unregister_lockspace(struct user_ls *lsinfo, int force) | ||
282 | { | ||
283 | int status; | ||
284 | |||
285 | status = dlm_release_lockspace(lsinfo->ls_lockspace, force); | ||
286 | if (status) | ||
287 | return status; | ||
288 | |||
289 | status = misc_deregister(&lsinfo->ls_miscinfo); | ||
290 | if (status) | ||
291 | return status; | ||
292 | |||
293 | list_del(&lsinfo->ls_list); | ||
294 | set_bit(LS_FLAG_DELETED, &lsinfo->ls_flags); | ||
295 | lsinfo->ls_lockspace = NULL; | ||
296 | if (atomic_read(&lsinfo->ls_refcnt) == 0) { | ||
297 | kfree(lsinfo->ls_miscinfo.name); | ||
298 | kfree(lsinfo); | ||
299 | } | ||
300 | |||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | /* Add it to userland's AST queue */ | ||
305 | static void add_to_astqueue(struct lock_info *li, void *astaddr, void *astparam, | ||
306 | int lvb_updated) | ||
307 | { | ||
308 | struct ast_info *ast = kmalloc(sizeof(struct ast_info), GFP_KERNEL); | ||
309 | if (!ast) | ||
310 | return; | ||
311 | |||
312 | memset(ast, 0, sizeof(*ast)); | ||
313 | ast->result.user_astparam = astparam; | ||
314 | ast->result.user_astaddr = astaddr; | ||
315 | ast->result.user_lksb = li->li_user_lksb; | ||
316 | memcpy(&ast->result.lksb, &li->li_lksb, sizeof(struct dlm_lksb)); | ||
317 | ast->lvb_updated = lvb_updated; | ||
318 | |||
319 | spin_lock(&li->li_file->fi_ast_lock); | ||
320 | list_add_tail(&ast->list, &li->li_file->fi_ast_list); | ||
321 | spin_unlock(&li->li_file->fi_ast_lock); | ||
322 | wake_up_interruptible(&li->li_file->fi_wait); | ||
323 | } | ||
324 | |||
325 | static void bast_routine(void *param, int mode) | ||
326 | { | ||
327 | struct lock_info *li = param; | ||
328 | |||
329 | if (li && li->li_bastaddr) | ||
330 | add_to_astqueue(li, li->li_bastaddr, li->li_bastparam, 0); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * This is the kernel's AST routine. | ||
335 | * All lock, unlock & query operations complete here. | ||
336 | * The only syncronous ops are those done during device close. | ||
337 | */ | ||
338 | static void ast_routine(void *param) | ||
339 | { | ||
340 | struct lock_info *li = param; | ||
341 | |||
342 | /* Param may be NULL if a persistent lock is unlocked by someone else */ | ||
343 | if (!li) | ||
344 | return; | ||
345 | |||
346 | /* If this is a succesful conversion then activate the blocking ast | ||
347 | * args from the conversion request */ | ||
348 | if (!test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) && | ||
349 | li->li_lksb.sb_status == 0) { | ||
350 | |||
351 | li->li_bastparam = li->li_pend_bastparam; | ||
352 | li->li_bastaddr = li->li_pend_bastaddr; | ||
353 | li->li_pend_bastaddr = NULL; | ||
354 | } | ||
355 | |||
356 | /* If it's an async request then post data to the user's AST queue. */ | ||
357 | if (li->li_castaddr) { | ||
358 | int lvb_updated = 0; | ||
359 | |||
360 | /* See if the lvb has been updated */ | ||
361 | if (dlm_lvb_operations[li->li_grmode+1][li->li_rqmode+1] == 1) | ||
362 | lvb_updated = 1; | ||
363 | |||
364 | if (li->li_lksb.sb_status == 0) | ||
365 | li->li_grmode = li->li_rqmode; | ||
366 | |||
367 | /* Only queue AST if the device is still open */ | ||
368 | if (test_bit(1, &li->li_file->fi_flags)) | ||
369 | add_to_astqueue(li, li->li_castaddr, li->li_castparam, | ||
370 | lvb_updated); | ||
371 | |||
372 | /* If it's a new lock operation that failed, then | ||
373 | * remove it from the owner queue and free the | ||
374 | * lock_info. | ||
375 | */ | ||
376 | if (test_and_clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) && | ||
377 | li->li_lksb.sb_status != 0) { | ||
378 | |||
379 | /* Wait till dlm_lock() has finished */ | ||
380 | down(&li->li_firstlock); | ||
381 | up(&li->li_firstlock); | ||
382 | |||
383 | spin_lock(&li->li_file->fi_li_lock); | ||
384 | list_del(&li->li_ownerqueue); | ||
385 | spin_unlock(&li->li_file->fi_li_lock); | ||
386 | release_lockinfo(li); | ||
387 | return; | ||
388 | } | ||
389 | /* Free unlocks & queries */ | ||
390 | if (li->li_lksb.sb_status == -DLM_EUNLOCK || | ||
391 | li->li_cmd == DLM_USER_QUERY) { | ||
392 | release_lockinfo(li); | ||
393 | } | ||
394 | } else { | ||
395 | /* Synchronous request, just wake up the caller */ | ||
396 | set_bit(LI_FLAG_COMPLETE, &li->li_flags); | ||
397 | wake_up_interruptible(&li->li_waitq); | ||
398 | } | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * Wait for the lock op to complete and return the status. | ||
403 | */ | ||
404 | static int wait_for_ast(struct lock_info *li) | ||
405 | { | ||
406 | /* Wait for the AST routine to complete */ | ||
407 | set_task_state(current, TASK_INTERRUPTIBLE); | ||
408 | while (!test_bit(LI_FLAG_COMPLETE, &li->li_flags)) | ||
409 | schedule(); | ||
410 | |||
411 | set_task_state(current, TASK_RUNNING); | ||
412 | |||
413 | return li->li_lksb.sb_status; | ||
414 | } | ||
415 | |||
416 | |||
417 | /* Open on control device */ | ||
418 | static int dlm_ctl_open(struct inode *inode, struct file *file) | ||
419 | { | ||
420 | file->private_data = NULL; | ||
421 | return 0; | ||
422 | } | ||
423 | |||
424 | /* Close on control device */ | ||
425 | static int dlm_ctl_close(struct inode *inode, struct file *file) | ||
426 | { | ||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | /* Open on lockspace device */ | ||
431 | static int dlm_open(struct inode *inode, struct file *file) | ||
432 | { | ||
433 | struct file_info *f; | ||
434 | struct user_ls *lsinfo; | ||
435 | |||
436 | lsinfo = find_lockspace(iminor(inode)); | ||
437 | if (!lsinfo) | ||
438 | return -ENOENT; | ||
439 | |||
440 | f = kmalloc(sizeof(struct file_info), GFP_KERNEL); | ||
441 | if (!f) | ||
442 | return -ENOMEM; | ||
443 | |||
444 | atomic_inc(&lsinfo->ls_refcnt); | ||
445 | INIT_LIST_HEAD(&f->fi_li_list); | ||
446 | INIT_LIST_HEAD(&f->fi_ast_list); | ||
447 | spin_lock_init(&f->fi_li_lock); | ||
448 | spin_lock_init(&f->fi_ast_lock); | ||
449 | init_waitqueue_head(&f->fi_wait); | ||
450 | f->fi_ls = lsinfo; | ||
451 | f->fi_flags = 0; | ||
452 | get_file_info(f); | ||
453 | set_bit(1, &f->fi_flags); | ||
454 | |||
455 | file->private_data = f; | ||
456 | |||
457 | return 0; | ||
458 | } | ||
459 | |||
460 | /* Check the user's version matches ours */ | ||
461 | static int check_version(struct dlm_write_request *req) | ||
462 | { | ||
463 | if (req->version[0] != DLM_DEVICE_VERSION_MAJOR || | ||
464 | (req->version[0] == DLM_DEVICE_VERSION_MAJOR && | ||
465 | req->version[1] > DLM_DEVICE_VERSION_MINOR)) { | ||
466 | |||
467 | printk(KERN_DEBUG "dlm: process %s (%d) version mismatch " | ||
468 | "user (%d.%d.%d) kernel (%d.%d.%d)\n", | ||
469 | current->comm, | ||
470 | current->pid, | ||
471 | req->version[0], | ||
472 | req->version[1], | ||
473 | req->version[2], | ||
474 | DLM_DEVICE_VERSION_MAJOR, | ||
475 | DLM_DEVICE_VERSION_MINOR, | ||
476 | DLM_DEVICE_VERSION_PATCH); | ||
477 | return -EINVAL; | ||
478 | } | ||
479 | return 0; | ||
480 | } | ||
481 | |||
482 | /* Close on lockspace device */ | ||
483 | static int dlm_close(struct inode *inode, struct file *file) | ||
484 | { | ||
485 | struct file_info *f = file->private_data; | ||
486 | struct lock_info li; | ||
487 | struct lock_info *old_li, *safe; | ||
488 | sigset_t tmpsig; | ||
489 | sigset_t allsigs; | ||
490 | struct user_ls *lsinfo; | ||
491 | DECLARE_WAITQUEUE(wq, current); | ||
492 | |||
493 | lsinfo = find_lockspace(iminor(inode)); | ||
494 | if (!lsinfo) | ||
495 | return -ENOENT; | ||
496 | |||
497 | /* Mark this closed so that ASTs will not be delivered any more */ | ||
498 | clear_bit(1, &f->fi_flags); | ||
499 | |||
500 | /* Block signals while we are doing this */ | ||
501 | sigfillset(&allsigs); | ||
502 | sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | ||
503 | |||
504 | /* We use our own lock_info struct here, so that any | ||
505 | * outstanding "real" ASTs will be delivered with the | ||
506 | * corresponding "real" params, thus freeing the lock_info | ||
507 | * that belongs the lock. This catches the corner case where | ||
508 | * a lock is BUSY when we try to unlock it here | ||
509 | */ | ||
510 | memset(&li, 0, sizeof(li)); | ||
511 | clear_bit(LI_FLAG_COMPLETE, &li.li_flags); | ||
512 | init_waitqueue_head(&li.li_waitq); | ||
513 | add_wait_queue(&li.li_waitq, &wq); | ||
514 | |||
515 | /* | ||
516 | * Free any outstanding locks, they are on the | ||
517 | * list in LIFO order so there should be no problems | ||
518 | * about unlocking parents before children. | ||
519 | */ | ||
520 | list_for_each_entry_safe(old_li, safe, &f->fi_li_list, li_ownerqueue) { | ||
521 | int status; | ||
522 | int flags = 0; | ||
523 | |||
524 | /* Don't unlock persistent locks, just mark them orphaned */ | ||
525 | if (test_bit(LI_FLAG_PERSISTENT, &old_li->li_flags)) { | ||
526 | list_del(&old_li->li_ownerqueue); | ||
527 | |||
528 | /* Update master copy */ | ||
529 | /* TODO: Check locking core updates the local and | ||
530 | remote ORPHAN flags */ | ||
531 | li.li_lksb.sb_lkid = old_li->li_lksb.sb_lkid; | ||
532 | status = dlm_lock(f->fi_ls->ls_lockspace, | ||
533 | old_li->li_grmode, &li.li_lksb, | ||
534 | DLM_LKF_CONVERT|DLM_LKF_ORPHAN, | ||
535 | NULL, 0, 0, ast_routine, NULL, | ||
536 | NULL, NULL); | ||
537 | if (status != 0) | ||
538 | printk("dlm: Error orphaning lock %x: %d\n", | ||
539 | old_li->li_lksb.sb_lkid, status); | ||
540 | |||
541 | /* But tidy our references in it */ | ||
542 | release_lockinfo(old_li); | ||
543 | continue; | ||
544 | } | ||
545 | |||
546 | clear_bit(LI_FLAG_COMPLETE, &li.li_flags); | ||
547 | |||
548 | flags = DLM_LKF_FORCEUNLOCK; | ||
549 | if (old_li->li_grmode >= DLM_LOCK_PW) | ||
550 | flags |= DLM_LKF_IVVALBLK; | ||
551 | |||
552 | status = dlm_unlock(f->fi_ls->ls_lockspace, | ||
553 | old_li->li_lksb.sb_lkid, flags, | ||
554 | &li.li_lksb, &li); | ||
555 | |||
556 | /* Must wait for it to complete as the next lock could be its | ||
557 | * parent */ | ||
558 | if (status == 0) | ||
559 | wait_for_ast(&li); | ||
560 | |||
561 | /* Unlock suceeded, free the lock_info struct. */ | ||
562 | if (status == 0) | ||
563 | release_lockinfo(old_li); | ||
564 | } | ||
565 | |||
566 | remove_wait_queue(&li.li_waitq, &wq); | ||
567 | |||
568 | /* | ||
569 | * If this is the last reference to the lockspace | ||
570 | * then free the struct. If it's an AUTOFREE lockspace | ||
571 | * then free the whole thing. | ||
572 | */ | ||
573 | down(&user_ls_lock); | ||
574 | if (atomic_dec_and_test(&lsinfo->ls_refcnt)) { | ||
575 | |||
576 | if (lsinfo->ls_lockspace) { | ||
577 | if (test_bit(LS_FLAG_AUTOFREE, &lsinfo->ls_flags)) { | ||
578 | unregister_lockspace(lsinfo, 1); | ||
579 | } | ||
580 | } else { | ||
581 | kfree(lsinfo->ls_miscinfo.name); | ||
582 | kfree(lsinfo); | ||
583 | } | ||
584 | } | ||
585 | up(&user_ls_lock); | ||
586 | put_file_info(f); | ||
587 | |||
588 | /* Restore signals */ | ||
589 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); | ||
590 | recalc_sigpending(); | ||
591 | |||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | static int do_user_create_lockspace(struct file_info *fi, uint8_t cmd, | ||
596 | struct dlm_lspace_params *kparams) | ||
597 | { | ||
598 | int status; | ||
599 | struct user_ls *lsinfo; | ||
600 | |||
601 | if (!capable(CAP_SYS_ADMIN)) | ||
602 | return -EPERM; | ||
603 | |||
604 | status = register_lockspace(kparams->name, &lsinfo, kparams->flags); | ||
605 | |||
606 | /* If it succeeded then return the minor number */ | ||
607 | if (status == 0) | ||
608 | status = lsinfo->ls_miscinfo.minor; | ||
609 | |||
610 | return status; | ||
611 | } | ||
612 | |||
613 | static int do_user_remove_lockspace(struct file_info *fi, uint8_t cmd, | ||
614 | struct dlm_lspace_params *kparams) | ||
615 | { | ||
616 | int status; | ||
617 | int force = 1; | ||
618 | struct user_ls *lsinfo; | ||
619 | |||
620 | if (!capable(CAP_SYS_ADMIN)) | ||
621 | return -EPERM; | ||
622 | |||
623 | down(&user_ls_lock); | ||
624 | lsinfo = __find_lockspace(kparams->minor); | ||
625 | if (!lsinfo) { | ||
626 | up(&user_ls_lock); | ||
627 | return -EINVAL; | ||
628 | } | ||
629 | |||
630 | if (kparams->flags & DLM_USER_LSFLG_FORCEFREE) | ||
631 | force = 2; | ||
632 | |||
633 | status = unregister_lockspace(lsinfo, force); | ||
634 | up(&user_ls_lock); | ||
635 | |||
636 | return status; | ||
637 | } | ||
638 | |||
639 | /* Read call, might block if no ASTs are waiting. | ||
640 | * It will only ever return one message at a time, regardless | ||
641 | * of how many are pending. | ||
642 | */ | ||
643 | static ssize_t dlm_read(struct file *file, char __user *buffer, size_t count, | ||
644 | loff_t *ppos) | ||
645 | { | ||
646 | struct file_info *fi = file->private_data; | ||
647 | struct ast_info *ast; | ||
648 | int data_size; | ||
649 | int offset; | ||
650 | DECLARE_WAITQUEUE(wait, current); | ||
651 | |||
652 | if (count < sizeof(struct dlm_lock_result)) | ||
653 | return -EINVAL; | ||
654 | |||
655 | spin_lock(&fi->fi_ast_lock); | ||
656 | if (list_empty(&fi->fi_ast_list)) { | ||
657 | |||
658 | /* No waiting ASTs. | ||
659 | * Return EOF if the lockspace been deleted. | ||
660 | */ | ||
661 | if (test_bit(LS_FLAG_DELETED, &fi->fi_ls->ls_flags)) | ||
662 | return 0; | ||
663 | |||
664 | if (file->f_flags & O_NONBLOCK) { | ||
665 | spin_unlock(&fi->fi_ast_lock); | ||
666 | return -EAGAIN; | ||
667 | } | ||
668 | |||
669 | add_wait_queue(&fi->fi_wait, &wait); | ||
670 | |||
671 | repeat: | ||
672 | set_current_state(TASK_INTERRUPTIBLE); | ||
673 | if (list_empty(&fi->fi_ast_list) && | ||
674 | !signal_pending(current)) { | ||
675 | |||
676 | spin_unlock(&fi->fi_ast_lock); | ||
677 | schedule(); | ||
678 | spin_lock(&fi->fi_ast_lock); | ||
679 | goto repeat; | ||
680 | } | ||
681 | |||
682 | current->state = TASK_RUNNING; | ||
683 | remove_wait_queue(&fi->fi_wait, &wait); | ||
684 | |||
685 | if (signal_pending(current)) { | ||
686 | spin_unlock(&fi->fi_ast_lock); | ||
687 | return -ERESTARTSYS; | ||
688 | } | ||
689 | } | ||
690 | |||
691 | ast = list_entry(fi->fi_ast_list.next, struct ast_info, list); | ||
692 | list_del(&ast->list); | ||
693 | spin_unlock(&fi->fi_ast_lock); | ||
694 | |||
695 | /* Work out the size of the returned data */ | ||
696 | data_size = sizeof(struct dlm_lock_result); | ||
697 | if (ast->lvb_updated && ast->result.lksb.sb_lvbptr) | ||
698 | data_size += DLM_USER_LVB_LEN; | ||
699 | |||
700 | offset = sizeof(struct dlm_lock_result); | ||
701 | |||
702 | /* Room for the extended data ? */ | ||
703 | if (count >= data_size) { | ||
704 | |||
705 | if (ast->lvb_updated && ast->result.lksb.sb_lvbptr) { | ||
706 | if (copy_to_user(buffer+offset, | ||
707 | ast->result.lksb.sb_lvbptr, | ||
708 | DLM_USER_LVB_LEN)) | ||
709 | return -EFAULT; | ||
710 | ast->result.lvb_offset = offset; | ||
711 | offset += DLM_USER_LVB_LEN; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | ast->result.length = data_size; | ||
716 | /* Copy the header now it has all the offsets in it */ | ||
717 | if (copy_to_user(buffer, &ast->result, sizeof(struct dlm_lock_result))) | ||
718 | offset = -EFAULT; | ||
719 | |||
720 | /* If we only returned a header and there's more to come then put it | ||
721 | back on the list */ | ||
722 | if (count < data_size) { | ||
723 | spin_lock(&fi->fi_ast_lock); | ||
724 | list_add(&ast->list, &fi->fi_ast_list); | ||
725 | spin_unlock(&fi->fi_ast_lock); | ||
726 | } else | ||
727 | kfree(ast); | ||
728 | return offset; | ||
729 | } | ||
730 | |||
731 | static unsigned int dlm_poll(struct file *file, poll_table *wait) | ||
732 | { | ||
733 | struct file_info *fi = file->private_data; | ||
734 | |||
735 | poll_wait(file, &fi->fi_wait, wait); | ||
736 | |||
737 | spin_lock(&fi->fi_ast_lock); | ||
738 | if (!list_empty(&fi->fi_ast_list)) { | ||
739 | spin_unlock(&fi->fi_ast_lock); | ||
740 | return POLLIN | POLLRDNORM; | ||
741 | } | ||
742 | |||
743 | spin_unlock(&fi->fi_ast_lock); | ||
744 | return 0; | ||
745 | } | ||
746 | |||
747 | static struct lock_info *allocate_lockinfo(struct file_info *fi, uint8_t cmd, | ||
748 | struct dlm_lock_params *kparams) | ||
749 | { | ||
750 | struct lock_info *li; | ||
751 | |||
752 | if (!try_module_get(THIS_MODULE)) | ||
753 | return NULL; | ||
754 | |||
755 | li = kmalloc(sizeof(struct lock_info), GFP_KERNEL); | ||
756 | if (li) { | ||
757 | li->li_magic = LOCKINFO_MAGIC; | ||
758 | li->li_file = fi; | ||
759 | li->li_cmd = cmd; | ||
760 | li->li_flags = 0; | ||
761 | li->li_grmode = -1; | ||
762 | li->li_rqmode = -1; | ||
763 | li->li_pend_bastparam = NULL; | ||
764 | li->li_pend_bastaddr = NULL; | ||
765 | li->li_castaddr = NULL; | ||
766 | li->li_castparam = NULL; | ||
767 | li->li_lksb.sb_lvbptr = NULL; | ||
768 | li->li_bastaddr = kparams->bastaddr; | ||
769 | li->li_bastparam = kparams->bastparam; | ||
770 | |||
771 | get_file_info(fi); | ||
772 | } | ||
773 | return li; | ||
774 | } | ||
775 | |||
776 | static int do_user_lock(struct file_info *fi, uint8_t cmd, | ||
777 | struct dlm_lock_params *kparams) | ||
778 | { | ||
779 | struct lock_info *li; | ||
780 | int status; | ||
781 | |||
782 | /* | ||
783 | * Validate things that we need to have correct. | ||
784 | */ | ||
785 | if (!kparams->castaddr) | ||
786 | return -EINVAL; | ||
787 | |||
788 | if (!kparams->lksb) | ||
789 | return -EINVAL; | ||
790 | |||
791 | /* Persistent child locks are not available yet */ | ||
792 | if ((kparams->flags & DLM_LKF_PERSISTENT) && kparams->parent) | ||
793 | return -EINVAL; | ||
794 | |||
795 | /* For conversions, there should already be a lockinfo struct, | ||
796 | unless we are adopting an orphaned persistent lock */ | ||
797 | if (kparams->flags & DLM_LKF_CONVERT) { | ||
798 | |||
799 | li = get_lockinfo(kparams->lkid); | ||
800 | |||
801 | /* If this is a persistent lock we will have to create a | ||
802 | lockinfo again */ | ||
803 | if (!li && DLM_LKF_PERSISTENT) { | ||
804 | li = allocate_lockinfo(fi, cmd, kparams); | ||
805 | |||
806 | li->li_lksb.sb_lkid = kparams->lkid; | ||
807 | li->li_castaddr = kparams->castaddr; | ||
808 | li->li_castparam = kparams->castparam; | ||
809 | |||
810 | /* OK, this isn;t exactly a FIRSTLOCK but it is the | ||
811 | first time we've used this lockinfo, and if things | ||
812 | fail we want rid of it */ | ||
813 | init_MUTEX_LOCKED(&li->li_firstlock); | ||
814 | set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); | ||
815 | add_lockinfo(li); | ||
816 | |||
817 | /* TODO: do a query to get the current state ?? */ | ||
818 | } | ||
819 | if (!li) | ||
820 | return -EINVAL; | ||
821 | |||
822 | if (li->li_magic != LOCKINFO_MAGIC) | ||
823 | return -EINVAL; | ||
824 | |||
825 | /* For conversions don't overwrite the current blocking AST | ||
826 | info so that: | ||
827 | a) if a blocking AST fires before the conversion is queued | ||
828 | it runs the current handler | ||
829 | b) if the conversion is cancelled, the original blocking AST | ||
830 | declaration is active | ||
831 | The pend_ info is made active when the conversion | ||
832 | completes. | ||
833 | */ | ||
834 | li->li_pend_bastaddr = kparams->bastaddr; | ||
835 | li->li_pend_bastparam = kparams->bastparam; | ||
836 | } else { | ||
837 | li = allocate_lockinfo(fi, cmd, kparams); | ||
838 | if (!li) | ||
839 | return -ENOMEM; | ||
840 | |||
841 | /* semaphore to allow us to complete our work before | ||
842 | the AST routine runs. In fact we only need (and use) this | ||
843 | when the initial lock fails */ | ||
844 | init_MUTEX_LOCKED(&li->li_firstlock); | ||
845 | set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); | ||
846 | } | ||
847 | |||
848 | li->li_user_lksb = kparams->lksb; | ||
849 | li->li_castaddr = kparams->castaddr; | ||
850 | li->li_castparam = kparams->castparam; | ||
851 | li->li_lksb.sb_lkid = kparams->lkid; | ||
852 | li->li_rqmode = kparams->mode; | ||
853 | if (kparams->flags & DLM_LKF_PERSISTENT) | ||
854 | set_bit(LI_FLAG_PERSISTENT, &li->li_flags); | ||
855 | |||
856 | /* Copy in the value block */ | ||
857 | if (kparams->flags & DLM_LKF_VALBLK) { | ||
858 | if (!li->li_lksb.sb_lvbptr) { | ||
859 | li->li_lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, | ||
860 | GFP_KERNEL); | ||
861 | if (!li->li_lksb.sb_lvbptr) { | ||
862 | status = -ENOMEM; | ||
863 | goto out_err; | ||
864 | } | ||
865 | } | ||
866 | |||
867 | memcpy(li->li_lksb.sb_lvbptr, kparams->lvb, DLM_USER_LVB_LEN); | ||
868 | } | ||
869 | |||
870 | /* Lock it ... */ | ||
871 | status = dlm_lock(fi->fi_ls->ls_lockspace, | ||
872 | kparams->mode, &li->li_lksb, | ||
873 | kparams->flags, | ||
874 | kparams->name, kparams->namelen, | ||
875 | kparams->parent, | ||
876 | ast_routine, | ||
877 | li, | ||
878 | (li->li_pend_bastaddr || li->li_bastaddr) ? | ||
879 | bast_routine : NULL, | ||
880 | kparams->range.ra_end ? &kparams->range : NULL); | ||
881 | if (status) | ||
882 | goto out_err; | ||
883 | |||
884 | /* If it succeeded (this far) with a new lock then keep track of | ||
885 | it on the file's lockinfo list */ | ||
886 | if (!status && test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags)) { | ||
887 | |||
888 | spin_lock(&fi->fi_li_lock); | ||
889 | list_add(&li->li_ownerqueue, &fi->fi_li_list); | ||
890 | spin_unlock(&fi->fi_li_lock); | ||
891 | if (add_lockinfo(li)) | ||
892 | printk(KERN_WARNING "Add lockinfo failed\n"); | ||
893 | |||
894 | up(&li->li_firstlock); | ||
895 | } | ||
896 | |||
897 | /* Return the lockid as the user needs it /now/ */ | ||
898 | return li->li_lksb.sb_lkid; | ||
899 | |||
900 | out_err: | ||
901 | if (test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags)) | ||
902 | release_lockinfo(li); | ||
903 | return status; | ||
904 | |||
905 | } | ||
906 | |||
907 | static int do_user_unlock(struct file_info *fi, uint8_t cmd, | ||
908 | struct dlm_lock_params *kparams) | ||
909 | { | ||
910 | struct lock_info *li; | ||
911 | int status; | ||
912 | int convert_cancel = 0; | ||
913 | |||
914 | li = get_lockinfo(kparams->lkid); | ||
915 | if (!li) { | ||
916 | li = allocate_lockinfo(fi, cmd, kparams); | ||
917 | spin_lock(&fi->fi_li_lock); | ||
918 | list_add(&li->li_ownerqueue, &fi->fi_li_list); | ||
919 | spin_unlock(&fi->fi_li_lock); | ||
920 | } | ||
921 | if (!li) | ||
922 | return -ENOMEM; | ||
923 | |||
924 | if (li->li_magic != LOCKINFO_MAGIC) | ||
925 | return -EINVAL; | ||
926 | |||
927 | li->li_user_lksb = kparams->lksb; | ||
928 | li->li_castparam = kparams->castparam; | ||
929 | li->li_cmd = cmd; | ||
930 | |||
931 | /* Cancelling a conversion doesn't remove the lock...*/ | ||
932 | if (kparams->flags & DLM_LKF_CANCEL && li->li_grmode != -1) | ||
933 | convert_cancel = 1; | ||
934 | |||
935 | /* dlm_unlock() passes a 0 for castaddr which means don't overwrite | ||
936 | the existing li_castaddr as that's the completion routine for | ||
937 | unlocks. dlm_unlock_wait() specifies a new AST routine to be | ||
938 | executed when the unlock completes. */ | ||
939 | if (kparams->castaddr) | ||
940 | li->li_castaddr = kparams->castaddr; | ||
941 | |||
942 | /* Use existing lksb & astparams */ | ||
943 | status = dlm_unlock(fi->fi_ls->ls_lockspace, | ||
944 | kparams->lkid, | ||
945 | kparams->flags, &li->li_lksb, li); | ||
946 | |||
947 | if (!status && !convert_cancel) { | ||
948 | spin_lock(&fi->fi_li_lock); | ||
949 | list_del(&li->li_ownerqueue); | ||
950 | spin_unlock(&fi->fi_li_lock); | ||
951 | } | ||
952 | |||
953 | return status; | ||
954 | } | ||
955 | |||
956 | /* Write call, submit a locking request */ | ||
957 | static ssize_t dlm_write(struct file *file, const char __user *buffer, | ||
958 | size_t count, loff_t *ppos) | ||
959 | { | ||
960 | struct file_info *fi = file->private_data; | ||
961 | struct dlm_write_request *kparams; | ||
962 | sigset_t tmpsig; | ||
963 | sigset_t allsigs; | ||
964 | int status; | ||
965 | |||
966 | /* -1 because lock name is optional */ | ||
967 | if (count < sizeof(struct dlm_write_request)-1) | ||
968 | return -EINVAL; | ||
969 | |||
970 | /* Has the lockspace been deleted */ | ||
971 | if (fi && test_bit(LS_FLAG_DELETED, &fi->fi_ls->ls_flags)) | ||
972 | return -ENOENT; | ||
973 | |||
974 | kparams = kmalloc(count, GFP_KERNEL); | ||
975 | if (!kparams) | ||
976 | return -ENOMEM; | ||
977 | |||
978 | status = -EFAULT; | ||
979 | /* Get the command info */ | ||
980 | if (copy_from_user(kparams, buffer, count)) | ||
981 | goto out_free; | ||
982 | |||
983 | status = -EBADE; | ||
984 | if (check_version(kparams)) | ||
985 | goto out_free; | ||
986 | |||
987 | /* Block signals while we are doing this */ | ||
988 | sigfillset(&allsigs); | ||
989 | sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | ||
990 | |||
991 | status = -EINVAL; | ||
992 | switch (kparams->cmd) | ||
993 | { | ||
994 | case DLM_USER_LOCK: | ||
995 | if (!fi) goto out_sig; | ||
996 | status = do_user_lock(fi, kparams->cmd, &kparams->i.lock); | ||
997 | break; | ||
998 | |||
999 | case DLM_USER_UNLOCK: | ||
1000 | if (!fi) goto out_sig; | ||
1001 | status = do_user_unlock(fi, kparams->cmd, &kparams->i.lock); | ||
1002 | break; | ||
1003 | |||
1004 | case DLM_USER_CREATE_LOCKSPACE: | ||
1005 | if (fi) goto out_sig; | ||
1006 | status = do_user_create_lockspace(fi, kparams->cmd, | ||
1007 | &kparams->i.lspace); | ||
1008 | break; | ||
1009 | |||
1010 | case DLM_USER_REMOVE_LOCKSPACE: | ||
1011 | if (fi) goto out_sig; | ||
1012 | status = do_user_remove_lockspace(fi, kparams->cmd, | ||
1013 | &kparams->i.lspace); | ||
1014 | break; | ||
1015 | default: | ||
1016 | printk("Unknown command passed to DLM device : %d\n", | ||
1017 | kparams->cmd); | ||
1018 | break; | ||
1019 | } | ||
1020 | |||
1021 | out_sig: | ||
1022 | /* Restore signals */ | ||
1023 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); | ||
1024 | recalc_sigpending(); | ||
1025 | |||
1026 | out_free: | ||
1027 | kfree(kparams); | ||
1028 | if (status == 0) | ||
1029 | return count; | ||
1030 | else | ||
1031 | return status; | ||
1032 | } | ||
1033 | |||
1034 | static struct file_operations _dlm_fops = { | ||
1035 | .open = dlm_open, | ||
1036 | .release = dlm_close, | ||
1037 | .read = dlm_read, | ||
1038 | .write = dlm_write, | ||
1039 | .poll = dlm_poll, | ||
1040 | .owner = THIS_MODULE, | ||
1041 | }; | ||
1042 | |||
1043 | static struct file_operations _dlm_ctl_fops = { | ||
1044 | .open = dlm_ctl_open, | ||
1045 | .release = dlm_ctl_close, | ||
1046 | .write = dlm_write, | ||
1047 | .owner = THIS_MODULE, | ||
1048 | }; | ||
1049 | |||
1050 | /* | ||
1051 | * Create control device | ||
1052 | */ | ||
1053 | static int __init dlm_device_init(void) | ||
1054 | { | ||
1055 | int r; | ||
1056 | |||
1057 | INIT_LIST_HEAD(&user_ls_list); | ||
1058 | init_MUTEX(&user_ls_lock); | ||
1059 | rwlock_init(&lockinfo_lock); | ||
1060 | |||
1061 | ctl_device.name = "dlm-control"; | ||
1062 | ctl_device.fops = &_dlm_ctl_fops; | ||
1063 | ctl_device.minor = MISC_DYNAMIC_MINOR; | ||
1064 | |||
1065 | r = misc_register(&ctl_device); | ||
1066 | if (r) { | ||
1067 | printk(KERN_ERR "dlm: misc_register failed for control dev\n"); | ||
1068 | return r; | ||
1069 | } | ||
1070 | |||
1071 | return 0; | ||
1072 | } | ||
1073 | |||
1074 | static void __exit dlm_device_exit(void) | ||
1075 | { | ||
1076 | misc_deregister(&ctl_device); | ||
1077 | } | ||
1078 | |||
1079 | MODULE_DESCRIPTION("Distributed Lock Manager device interface"); | ||
1080 | MODULE_AUTHOR("Red Hat, Inc."); | ||
1081 | MODULE_LICENSE("GPL"); | ||
1082 | |||
1083 | module_init(dlm_device_init); | ||
1084 | module_exit(dlm_device_exit); | ||