diff options
Diffstat (limited to 'fs/dlm/plock.c')
-rw-r--r-- | fs/dlm/plock.c | 439 |
1 files changed, 439 insertions, 0 deletions
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c new file mode 100644 index 000000000000..d6d6e370f89c --- /dev/null +++ b/fs/dlm/plock.c | |||
@@ -0,0 +1,439 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. | ||
3 | * | ||
4 | * This copyrighted material is made available to anyone wishing to use, | ||
5 | * modify, copy, or redistribute it subject to the terms and conditions | ||
6 | * of the GNU General Public License version 2. | ||
7 | */ | ||
8 | |||
9 | #include <linux/fs.h> | ||
10 | #include <linux/miscdevice.h> | ||
11 | #include <linux/poll.h> | ||
12 | #include <linux/dlm.h> | ||
13 | #include <linux/dlm_plock.h> | ||
14 | |||
15 | #include "dlm_internal.h" | ||
16 | #include "lockspace.h" | ||
17 | |||
18 | static spinlock_t ops_lock; | ||
19 | static struct list_head send_list; | ||
20 | static struct list_head recv_list; | ||
21 | static wait_queue_head_t send_wq; | ||
22 | static wait_queue_head_t recv_wq; | ||
23 | |||
24 | struct plock_op { | ||
25 | struct list_head list; | ||
26 | int done; | ||
27 | struct dlm_plock_info info; | ||
28 | }; | ||
29 | |||
30 | struct plock_xop { | ||
31 | struct plock_op xop; | ||
32 | void *callback; | ||
33 | void *fl; | ||
34 | void *file; | ||
35 | struct file_lock flc; | ||
36 | }; | ||
37 | |||
38 | |||
39 | static inline void set_version(struct dlm_plock_info *info) | ||
40 | { | ||
41 | info->version[0] = DLM_PLOCK_VERSION_MAJOR; | ||
42 | info->version[1] = DLM_PLOCK_VERSION_MINOR; | ||
43 | info->version[2] = DLM_PLOCK_VERSION_PATCH; | ||
44 | } | ||
45 | |||
46 | static int check_version(struct dlm_plock_info *info) | ||
47 | { | ||
48 | if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || | ||
49 | (DLM_PLOCK_VERSION_MINOR < info->version[1])) { | ||
50 | log_print("plock device version mismatch: " | ||
51 | "kernel (%u.%u.%u), user (%u.%u.%u)", | ||
52 | DLM_PLOCK_VERSION_MAJOR, | ||
53 | DLM_PLOCK_VERSION_MINOR, | ||
54 | DLM_PLOCK_VERSION_PATCH, | ||
55 | info->version[0], | ||
56 | info->version[1], | ||
57 | info->version[2]); | ||
58 | return -EINVAL; | ||
59 | } | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | static void send_op(struct plock_op *op) | ||
64 | { | ||
65 | set_version(&op->info); | ||
66 | INIT_LIST_HEAD(&op->list); | ||
67 | spin_lock(&ops_lock); | ||
68 | list_add_tail(&op->list, &send_list); | ||
69 | spin_unlock(&ops_lock); | ||
70 | wake_up(&send_wq); | ||
71 | } | ||
72 | |||
73 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | ||
74 | int cmd, struct file_lock *fl) | ||
75 | { | ||
76 | struct dlm_ls *ls; | ||
77 | struct plock_op *op; | ||
78 | struct plock_xop *xop; | ||
79 | int rv; | ||
80 | |||
81 | ls = dlm_find_lockspace_local(lockspace); | ||
82 | if (!ls) | ||
83 | return -EINVAL; | ||
84 | |||
85 | xop = kzalloc(sizeof(*xop), GFP_KERNEL); | ||
86 | if (!xop) { | ||
87 | rv = -ENOMEM; | ||
88 | goto out; | ||
89 | } | ||
90 | |||
91 | op = &xop->xop; | ||
92 | op->info.optype = DLM_PLOCK_OP_LOCK; | ||
93 | op->info.pid = fl->fl_pid; | ||
94 | op->info.ex = (fl->fl_type == F_WRLCK); | ||
95 | op->info.wait = IS_SETLKW(cmd); | ||
96 | op->info.fsid = ls->ls_global_id; | ||
97 | op->info.number = number; | ||
98 | op->info.start = fl->fl_start; | ||
99 | op->info.end = fl->fl_end; | ||
100 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) { | ||
101 | /* fl_owner is lockd which doesn't distinguish | ||
102 | processes on the nfs client */ | ||
103 | op->info.owner = (__u64) fl->fl_pid; | ||
104 | xop->callback = fl->fl_lmops->fl_grant; | ||
105 | locks_init_lock(&xop->flc); | ||
106 | locks_copy_lock(&xop->flc, fl); | ||
107 | xop->fl = fl; | ||
108 | xop->file = file; | ||
109 | } else { | ||
110 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
111 | xop->callback = NULL; | ||
112 | } | ||
113 | |||
114 | send_op(op); | ||
115 | |||
116 | if (xop->callback == NULL) | ||
117 | wait_event(recv_wq, (op->done != 0)); | ||
118 | else { | ||
119 | rv = -EINPROGRESS; | ||
120 | goto out; | ||
121 | } | ||
122 | |||
123 | spin_lock(&ops_lock); | ||
124 | if (!list_empty(&op->list)) { | ||
125 | log_error(ls, "dlm_posix_lock: op on list %llx", | ||
126 | (unsigned long long)number); | ||
127 | list_del(&op->list); | ||
128 | } | ||
129 | spin_unlock(&ops_lock); | ||
130 | |||
131 | rv = op->info.rv; | ||
132 | |||
133 | if (!rv) { | ||
134 | if (posix_lock_file_wait(file, fl) < 0) | ||
135 | log_error(ls, "dlm_posix_lock: vfs lock error %llx", | ||
136 | (unsigned long long)number); | ||
137 | } | ||
138 | |||
139 | kfree(xop); | ||
140 | out: | ||
141 | dlm_put_lockspace(ls); | ||
142 | return rv; | ||
143 | } | ||
144 | EXPORT_SYMBOL_GPL(dlm_posix_lock); | ||
145 | |||
146 | /* Returns failure iff a succesful lock operation should be canceled */ | ||
147 | static int dlm_plock_callback(struct plock_op *op) | ||
148 | { | ||
149 | struct file *file; | ||
150 | struct file_lock *fl; | ||
151 | struct file_lock *flc; | ||
152 | int (*notify)(void *, void *, int) = NULL; | ||
153 | struct plock_xop *xop = (struct plock_xop *)op; | ||
154 | int rv = 0; | ||
155 | |||
156 | spin_lock(&ops_lock); | ||
157 | if (!list_empty(&op->list)) { | ||
158 | log_print("dlm_plock_callback: op on list %llx", | ||
159 | (unsigned long long)op->info.number); | ||
160 | list_del(&op->list); | ||
161 | } | ||
162 | spin_unlock(&ops_lock); | ||
163 | |||
164 | /* check if the following 2 are still valid or make a copy */ | ||
165 | file = xop->file; | ||
166 | flc = &xop->flc; | ||
167 | fl = xop->fl; | ||
168 | notify = xop->callback; | ||
169 | |||
170 | if (op->info.rv) { | ||
171 | notify(flc, NULL, op->info.rv); | ||
172 | goto out; | ||
173 | } | ||
174 | |||
175 | /* got fs lock; bookkeep locally as well: */ | ||
176 | flc->fl_flags &= ~FL_SLEEP; | ||
177 | if (posix_lock_file(file, flc, NULL)) { | ||
178 | /* | ||
179 | * This can only happen in the case of kmalloc() failure. | ||
180 | * The filesystem's own lock is the authoritative lock, | ||
181 | * so a failure to get the lock locally is not a disaster. | ||
182 | * As long as the fs cannot reliably cancel locks (especially | ||
183 | * in a low-memory situation), we're better off ignoring | ||
184 | * this failure than trying to recover. | ||
185 | */ | ||
186 | log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", | ||
187 | (unsigned long long)op->info.number, file, fl); | ||
188 | } | ||
189 | |||
190 | rv = notify(flc, NULL, 0); | ||
191 | if (rv) { | ||
192 | /* XXX: We need to cancel the fs lock here: */ | ||
193 | log_print("dlm_plock_callback: lock granted after lock request " | ||
194 | "failed; dangling lock!\n"); | ||
195 | goto out; | ||
196 | } | ||
197 | |||
198 | out: | ||
199 | kfree(xop); | ||
200 | return rv; | ||
201 | } | ||
202 | |||
203 | int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | ||
204 | struct file_lock *fl) | ||
205 | { | ||
206 | struct dlm_ls *ls; | ||
207 | struct plock_op *op; | ||
208 | int rv; | ||
209 | |||
210 | ls = dlm_find_lockspace_local(lockspace); | ||
211 | if (!ls) | ||
212 | return -EINVAL; | ||
213 | |||
214 | op = kzalloc(sizeof(*op), GFP_KERNEL); | ||
215 | if (!op) { | ||
216 | rv = -ENOMEM; | ||
217 | goto out; | ||
218 | } | ||
219 | |||
220 | if (posix_lock_file_wait(file, fl) < 0) | ||
221 | log_error(ls, "dlm_posix_unlock: vfs unlock error %llx", | ||
222 | (unsigned long long)number); | ||
223 | |||
224 | op->info.optype = DLM_PLOCK_OP_UNLOCK; | ||
225 | op->info.pid = fl->fl_pid; | ||
226 | op->info.fsid = ls->ls_global_id; | ||
227 | op->info.number = number; | ||
228 | op->info.start = fl->fl_start; | ||
229 | op->info.end = fl->fl_end; | ||
230 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) | ||
231 | op->info.owner = (__u64) fl->fl_pid; | ||
232 | else | ||
233 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
234 | |||
235 | send_op(op); | ||
236 | wait_event(recv_wq, (op->done != 0)); | ||
237 | |||
238 | spin_lock(&ops_lock); | ||
239 | if (!list_empty(&op->list)) { | ||
240 | log_error(ls, "dlm_posix_unlock: op on list %llx", | ||
241 | (unsigned long long)number); | ||
242 | list_del(&op->list); | ||
243 | } | ||
244 | spin_unlock(&ops_lock); | ||
245 | |||
246 | rv = op->info.rv; | ||
247 | |||
248 | if (rv == -ENOENT) | ||
249 | rv = 0; | ||
250 | |||
251 | kfree(op); | ||
252 | out: | ||
253 | dlm_put_lockspace(ls); | ||
254 | return rv; | ||
255 | } | ||
256 | EXPORT_SYMBOL_GPL(dlm_posix_unlock); | ||
257 | |||
258 | int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, | ||
259 | struct file_lock *fl) | ||
260 | { | ||
261 | struct dlm_ls *ls; | ||
262 | struct plock_op *op; | ||
263 | int rv; | ||
264 | |||
265 | ls = dlm_find_lockspace_local(lockspace); | ||
266 | if (!ls) | ||
267 | return -EINVAL; | ||
268 | |||
269 | op = kzalloc(sizeof(*op), GFP_KERNEL); | ||
270 | if (!op) { | ||
271 | rv = -ENOMEM; | ||
272 | goto out; | ||
273 | } | ||
274 | |||
275 | op->info.optype = DLM_PLOCK_OP_GET; | ||
276 | op->info.pid = fl->fl_pid; | ||
277 | op->info.ex = (fl->fl_type == F_WRLCK); | ||
278 | op->info.fsid = ls->ls_global_id; | ||
279 | op->info.number = number; | ||
280 | op->info.start = fl->fl_start; | ||
281 | op->info.end = fl->fl_end; | ||
282 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) | ||
283 | op->info.owner = (__u64) fl->fl_pid; | ||
284 | else | ||
285 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
286 | |||
287 | send_op(op); | ||
288 | wait_event(recv_wq, (op->done != 0)); | ||
289 | |||
290 | spin_lock(&ops_lock); | ||
291 | if (!list_empty(&op->list)) { | ||
292 | log_error(ls, "dlm_posix_get: op on list %llx", | ||
293 | (unsigned long long)number); | ||
294 | list_del(&op->list); | ||
295 | } | ||
296 | spin_unlock(&ops_lock); | ||
297 | |||
298 | /* info.rv from userspace is 1 for conflict, 0 for no-conflict, | ||
299 | -ENOENT if there are no locks on the file */ | ||
300 | |||
301 | rv = op->info.rv; | ||
302 | |||
303 | fl->fl_type = F_UNLCK; | ||
304 | if (rv == -ENOENT) | ||
305 | rv = 0; | ||
306 | else if (rv > 0) { | ||
307 | fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; | ||
308 | fl->fl_pid = op->info.pid; | ||
309 | fl->fl_start = op->info.start; | ||
310 | fl->fl_end = op->info.end; | ||
311 | rv = 0; | ||
312 | } | ||
313 | |||
314 | kfree(op); | ||
315 | out: | ||
316 | dlm_put_lockspace(ls); | ||
317 | return rv; | ||
318 | } | ||
319 | EXPORT_SYMBOL_GPL(dlm_posix_get); | ||
320 | |||
321 | /* a read copies out one plock request from the send list */ | ||
322 | static ssize_t dev_read(struct file *file, char __user *u, size_t count, | ||
323 | loff_t *ppos) | ||
324 | { | ||
325 | struct dlm_plock_info info; | ||
326 | struct plock_op *op = NULL; | ||
327 | |||
328 | if (count < sizeof(info)) | ||
329 | return -EINVAL; | ||
330 | |||
331 | spin_lock(&ops_lock); | ||
332 | if (!list_empty(&send_list)) { | ||
333 | op = list_entry(send_list.next, struct plock_op, list); | ||
334 | list_move(&op->list, &recv_list); | ||
335 | memcpy(&info, &op->info, sizeof(info)); | ||
336 | } | ||
337 | spin_unlock(&ops_lock); | ||
338 | |||
339 | if (!op) | ||
340 | return -EAGAIN; | ||
341 | |||
342 | if (copy_to_user(u, &info, sizeof(info))) | ||
343 | return -EFAULT; | ||
344 | return sizeof(info); | ||
345 | } | ||
346 | |||
347 | /* a write copies in one plock result that should match a plock_op | ||
348 | on the recv list */ | ||
349 | static ssize_t dev_write(struct file *file, const char __user *u, size_t count, | ||
350 | loff_t *ppos) | ||
351 | { | ||
352 | struct dlm_plock_info info; | ||
353 | struct plock_op *op; | ||
354 | int found = 0; | ||
355 | |||
356 | if (count != sizeof(info)) | ||
357 | return -EINVAL; | ||
358 | |||
359 | if (copy_from_user(&info, u, sizeof(info))) | ||
360 | return -EFAULT; | ||
361 | |||
362 | if (check_version(&info)) | ||
363 | return -EINVAL; | ||
364 | |||
365 | spin_lock(&ops_lock); | ||
366 | list_for_each_entry(op, &recv_list, list) { | ||
367 | if (op->info.fsid == info.fsid && op->info.number == info.number && | ||
368 | op->info.owner == info.owner) { | ||
369 | list_del_init(&op->list); | ||
370 | found = 1; | ||
371 | op->done = 1; | ||
372 | memcpy(&op->info, &info, sizeof(info)); | ||
373 | break; | ||
374 | } | ||
375 | } | ||
376 | spin_unlock(&ops_lock); | ||
377 | |||
378 | if (found) { | ||
379 | struct plock_xop *xop; | ||
380 | xop = (struct plock_xop *)op; | ||
381 | if (xop->callback) | ||
382 | count = dlm_plock_callback(op); | ||
383 | else | ||
384 | wake_up(&recv_wq); | ||
385 | } else | ||
386 | log_print("dev_write no op %x %llx", info.fsid, | ||
387 | (unsigned long long)info.number); | ||
388 | return count; | ||
389 | } | ||
390 | |||
391 | static unsigned int dev_poll(struct file *file, poll_table *wait) | ||
392 | { | ||
393 | unsigned int mask = 0; | ||
394 | |||
395 | poll_wait(file, &send_wq, wait); | ||
396 | |||
397 | spin_lock(&ops_lock); | ||
398 | if (!list_empty(&send_list)) | ||
399 | mask = POLLIN | POLLRDNORM; | ||
400 | spin_unlock(&ops_lock); | ||
401 | |||
402 | return mask; | ||
403 | } | ||
404 | |||
405 | static const struct file_operations dev_fops = { | ||
406 | .read = dev_read, | ||
407 | .write = dev_write, | ||
408 | .poll = dev_poll, | ||
409 | .owner = THIS_MODULE | ||
410 | }; | ||
411 | |||
412 | static struct miscdevice plock_dev_misc = { | ||
413 | .minor = MISC_DYNAMIC_MINOR, | ||
414 | .name = DLM_PLOCK_MISC_NAME, | ||
415 | .fops = &dev_fops | ||
416 | }; | ||
417 | |||
418 | int dlm_plock_init(void) | ||
419 | { | ||
420 | int rv; | ||
421 | |||
422 | spin_lock_init(&ops_lock); | ||
423 | INIT_LIST_HEAD(&send_list); | ||
424 | INIT_LIST_HEAD(&recv_list); | ||
425 | init_waitqueue_head(&send_wq); | ||
426 | init_waitqueue_head(&recv_wq); | ||
427 | |||
428 | rv = misc_register(&plock_dev_misc); | ||
429 | if (rv) | ||
430 | log_print("dlm_plock_init: misc_register failed %d", rv); | ||
431 | return rv; | ||
432 | } | ||
433 | |||
434 | void dlm_plock_exit(void) | ||
435 | { | ||
436 | if (misc_deregister(&plock_dev_misc) < 0) | ||
437 | log_print("dlm_plock_exit: misc_deregister failed"); | ||
438 | } | ||
439 | |||