diff options
Diffstat (limited to 'fs/orangefs/devorangefs-req.c')
-rw-r--r-- | fs/orangefs/devorangefs-req.c | 943 |
1 files changed, 943 insertions, 0 deletions
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c new file mode 100644 index 000000000000..db170beba797 --- /dev/null +++ b/fs/orangefs/devorangefs-req.c | |||
@@ -0,0 +1,943 @@ | |||
1 | /* | ||
2 | * (C) 2001 Clemson University and The University of Chicago | ||
3 | * | ||
4 | * Changes by Acxiom Corporation to add protocol version to kernel | ||
5 | * communication, Copyright Acxiom Corporation, 2005. | ||
6 | * | ||
7 | * See COPYING in top-level directory. | ||
8 | */ | ||
9 | |||
10 | #include "protocol.h" | ||
11 | #include "orangefs-kernel.h" | ||
12 | #include "orangefs-dev-proto.h" | ||
13 | #include "orangefs-bufmap.h" | ||
14 | |||
15 | #include <linux/debugfs.h> | ||
16 | #include <linux/slab.h> | ||
17 | |||
18 | /* this file implements the /dev/pvfs2-req device node */ | ||
19 | |||
20 | static int open_access_count; | ||
21 | |||
22 | #define DUMP_DEVICE_ERROR() \ | ||
23 | do { \ | ||
24 | gossip_err("*****************************************************\n");\ | ||
25 | gossip_err("ORANGEFS Device Error: You cannot open the device file "); \ | ||
26 | gossip_err("\n/dev/%s more than once. Please make sure that\nthere " \ | ||
27 | "are no ", ORANGEFS_REQDEVICE_NAME); \ | ||
28 | gossip_err("instances of a program using this device\ncurrently " \ | ||
29 | "running. (You must verify this!)\n"); \ | ||
30 | gossip_err("For example, you can use the lsof program as follows:\n");\ | ||
31 | gossip_err("'lsof | grep %s' (run this as root)\n", \ | ||
32 | ORANGEFS_REQDEVICE_NAME); \ | ||
33 | gossip_err(" open_access_count = %d\n", open_access_count); \ | ||
34 | gossip_err("*****************************************************\n");\ | ||
35 | } while (0) | ||
36 | |||
37 | static int hash_func(__u64 tag, int table_size) | ||
38 | { | ||
39 | return do_div(tag, (unsigned int)table_size); | ||
40 | } | ||
41 | |||
42 | static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op) | ||
43 | { | ||
44 | int index = hash_func(op->tag, hash_table_size); | ||
45 | |||
46 | list_add_tail(&op->list, &htable_ops_in_progress[index]); | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * find the op with this tag and remove it from the in progress | ||
51 | * hash table. | ||
52 | */ | ||
53 | static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag) | ||
54 | { | ||
55 | struct orangefs_kernel_op_s *op, *next; | ||
56 | int index; | ||
57 | |||
58 | index = hash_func(tag, hash_table_size); | ||
59 | |||
60 | spin_lock(&htable_ops_in_progress_lock); | ||
61 | list_for_each_entry_safe(op, | ||
62 | next, | ||
63 | &htable_ops_in_progress[index], | ||
64 | list) { | ||
65 | if (op->tag == tag && !op_state_purged(op) && | ||
66 | !op_state_given_up(op)) { | ||
67 | list_del_init(&op->list); | ||
68 | spin_unlock(&htable_ops_in_progress_lock); | ||
69 | return op; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | spin_unlock(&htable_ops_in_progress_lock); | ||
74 | return NULL; | ||
75 | } | ||
76 | |||
77 | /* Returns whether any FS are still pending remounted */ | ||
78 | static int mark_all_pending_mounts(void) | ||
79 | { | ||
80 | int unmounted = 1; | ||
81 | struct orangefs_sb_info_s *orangefs_sb = NULL; | ||
82 | |||
83 | spin_lock(&orangefs_superblocks_lock); | ||
84 | list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { | ||
85 | /* All of these file system require a remount */ | ||
86 | orangefs_sb->mount_pending = 1; | ||
87 | unmounted = 0; | ||
88 | } | ||
89 | spin_unlock(&orangefs_superblocks_lock); | ||
90 | return unmounted; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Determine if a given file system needs to be remounted or not | ||
95 | * Returns -1 on error | ||
96 | * 0 if already mounted | ||
97 | * 1 if needs remount | ||
98 | */ | ||
99 | static int fs_mount_pending(__s32 fsid) | ||
100 | { | ||
101 | int mount_pending = -1; | ||
102 | struct orangefs_sb_info_s *orangefs_sb = NULL; | ||
103 | |||
104 | spin_lock(&orangefs_superblocks_lock); | ||
105 | list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { | ||
106 | if (orangefs_sb->fs_id == fsid) { | ||
107 | mount_pending = orangefs_sb->mount_pending; | ||
108 | break; | ||
109 | } | ||
110 | } | ||
111 | spin_unlock(&orangefs_superblocks_lock); | ||
112 | return mount_pending; | ||
113 | } | ||
114 | |||
115 | static int orangefs_devreq_open(struct inode *inode, struct file *file) | ||
116 | { | ||
117 | int ret = -EINVAL; | ||
118 | |||
119 | if (!(file->f_flags & O_NONBLOCK)) { | ||
120 | gossip_err("%s: device cannot be opened in blocking mode\n", | ||
121 | __func__); | ||
122 | goto out; | ||
123 | } | ||
124 | ret = -EACCES; | ||
125 | gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n"); | ||
126 | mutex_lock(&devreq_mutex); | ||
127 | |||
128 | if (open_access_count == 0) { | ||
129 | open_access_count = 1; | ||
130 | ret = 0; | ||
131 | } else { | ||
132 | DUMP_DEVICE_ERROR(); | ||
133 | } | ||
134 | mutex_unlock(&devreq_mutex); | ||
135 | |||
136 | out: | ||
137 | |||
138 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
139 | "pvfs2-client-core: open device complete (ret = %d)\n", | ||
140 | ret); | ||
141 | return ret; | ||
142 | } | ||
143 | |||
144 | /* Function for read() callers into the device */ | ||
145 | static ssize_t orangefs_devreq_read(struct file *file, | ||
146 | char __user *buf, | ||
147 | size_t count, loff_t *offset) | ||
148 | { | ||
149 | struct orangefs_kernel_op_s *op, *temp; | ||
150 | __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; | ||
151 | static __s32 magic = ORANGEFS_DEVREQ_MAGIC; | ||
152 | struct orangefs_kernel_op_s *cur_op = NULL; | ||
153 | unsigned long ret; | ||
154 | |||
155 | /* We do not support blocking IO. */ | ||
156 | if (!(file->f_flags & O_NONBLOCK)) { | ||
157 | gossip_err("%s: blocking read from client-core.\n", | ||
158 | __func__); | ||
159 | return -EINVAL; | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * The client will do an ioctl to find MAX_DEV_REQ_UPSIZE, then | ||
164 | * always read with that size buffer. | ||
165 | */ | ||
166 | if (count != MAX_DEV_REQ_UPSIZE) { | ||
167 | gossip_err("orangefs: client-core tried to read wrong size\n"); | ||
168 | return -EINVAL; | ||
169 | } | ||
170 | |||
171 | restart: | ||
172 | /* Get next op (if any) from top of list. */ | ||
173 | spin_lock(&orangefs_request_list_lock); | ||
174 | list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { | ||
175 | __s32 fsid; | ||
176 | /* This lock is held past the end of the loop when we break. */ | ||
177 | spin_lock(&op->lock); | ||
178 | if (unlikely(op_state_purged(op) || op_state_given_up(op))) { | ||
179 | spin_unlock(&op->lock); | ||
180 | continue; | ||
181 | } | ||
182 | |||
183 | fsid = fsid_of_op(op); | ||
184 | if (fsid != ORANGEFS_FS_ID_NULL) { | ||
185 | int ret; | ||
186 | /* Skip ops whose filesystem needs to be mounted. */ | ||
187 | ret = fs_mount_pending(fsid); | ||
188 | if (ret == 1) { | ||
189 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
190 | "%s: mount pending, skipping op tag " | ||
191 | "%llu %s\n", | ||
192 | __func__, | ||
193 | llu(op->tag), | ||
194 | get_opname_string(op)); | ||
195 | spin_unlock(&op->lock); | ||
196 | continue; | ||
197 | /* | ||
198 | * Skip ops whose filesystem we don't know about unless | ||
199 | * it is being mounted. | ||
200 | */ | ||
201 | /* XXX: is there a better way to detect this? */ | ||
202 | } else if (ret == -1 && | ||
203 | !(op->upcall.type == | ||
204 | ORANGEFS_VFS_OP_FS_MOUNT || | ||
205 | op->upcall.type == | ||
206 | ORANGEFS_VFS_OP_GETATTR)) { | ||
207 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
208 | "orangefs: skipping op tag %llu %s\n", | ||
209 | llu(op->tag), get_opname_string(op)); | ||
210 | gossip_err( | ||
211 | "orangefs: ERROR: fs_mount_pending %d\n", | ||
212 | fsid); | ||
213 | spin_unlock(&op->lock); | ||
214 | continue; | ||
215 | } | ||
216 | } | ||
217 | /* | ||
218 | * Either this op does not pertain to a filesystem, is mounting | ||
219 | * a filesystem, or pertains to a mounted filesystem. Let it | ||
220 | * through. | ||
221 | */ | ||
222 | cur_op = op; | ||
223 | break; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * At this point we either have a valid op and can continue or have not | ||
228 | * found an op and must ask the client to try again later. | ||
229 | */ | ||
230 | if (!cur_op) { | ||
231 | spin_unlock(&orangefs_request_list_lock); | ||
232 | return -EAGAIN; | ||
233 | } | ||
234 | |||
235 | gossip_debug(GOSSIP_DEV_DEBUG, "%s: reading op tag %llu %s\n", | ||
236 | __func__, | ||
237 | llu(cur_op->tag), | ||
238 | get_opname_string(cur_op)); | ||
239 | |||
240 | /* | ||
241 | * Such an op should never be on the list in the first place. If so, we | ||
242 | * will abort. | ||
243 | */ | ||
244 | if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) { | ||
245 | gossip_err("orangefs: ERROR: Current op already queued.\n"); | ||
246 | list_del_init(&cur_op->list); | ||
247 | spin_unlock(&cur_op->lock); | ||
248 | spin_unlock(&orangefs_request_list_lock); | ||
249 | return -EAGAIN; | ||
250 | } | ||
251 | |||
252 | list_del_init(&cur_op->list); | ||
253 | spin_unlock(&orangefs_request_list_lock); | ||
254 | |||
255 | spin_unlock(&cur_op->lock); | ||
256 | |||
257 | /* Push the upcall out. */ | ||
258 | ret = copy_to_user(buf, &proto_ver, sizeof(__s32)); | ||
259 | if (ret != 0) | ||
260 | goto error; | ||
261 | ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32)); | ||
262 | if (ret != 0) | ||
263 | goto error; | ||
264 | ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64)); | ||
265 | if (ret != 0) | ||
266 | goto error; | ||
267 | ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall, | ||
268 | sizeof(struct orangefs_upcall_s)); | ||
269 | if (ret != 0) | ||
270 | goto error; | ||
271 | |||
272 | spin_lock(&htable_ops_in_progress_lock); | ||
273 | spin_lock(&cur_op->lock); | ||
274 | if (unlikely(op_state_given_up(cur_op))) { | ||
275 | spin_unlock(&cur_op->lock); | ||
276 | spin_unlock(&htable_ops_in_progress_lock); | ||
277 | complete(&cur_op->waitq); | ||
278 | goto restart; | ||
279 | } | ||
280 | |||
281 | /* | ||
282 | * Set the operation to be in progress and move it between lists since | ||
283 | * it has been sent to the client. | ||
284 | */ | ||
285 | set_op_state_inprogress(cur_op); | ||
286 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
287 | "%s: 1 op:%s: op_state:%d: process:%s:\n", | ||
288 | __func__, | ||
289 | get_opname_string(cur_op), | ||
290 | cur_op->op_state, | ||
291 | current->comm); | ||
292 | orangefs_devreq_add_op(cur_op); | ||
293 | spin_unlock(&cur_op->lock); | ||
294 | spin_unlock(&htable_ops_in_progress_lock); | ||
295 | |||
296 | /* The client only asks to read one size buffer. */ | ||
297 | return MAX_DEV_REQ_UPSIZE; | ||
298 | error: | ||
299 | /* | ||
300 | * We were unable to copy the op data to the client. Put the op back in | ||
301 | * list. If client has crashed, the op will be purged later when the | ||
302 | * device is released. | ||
303 | */ | ||
304 | gossip_err("orangefs: Failed to copy data to user space\n"); | ||
305 | spin_lock(&orangefs_request_list_lock); | ||
306 | spin_lock(&cur_op->lock); | ||
307 | if (likely(!op_state_given_up(cur_op))) { | ||
308 | set_op_state_waiting(cur_op); | ||
309 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
310 | "%s: 2 op:%s: op_state:%d: process:%s:\n", | ||
311 | __func__, | ||
312 | get_opname_string(cur_op), | ||
313 | cur_op->op_state, | ||
314 | current->comm); | ||
315 | list_add(&cur_op->list, &orangefs_request_list); | ||
316 | spin_unlock(&cur_op->lock); | ||
317 | } else { | ||
318 | spin_unlock(&cur_op->lock); | ||
319 | complete(&cur_op->waitq); | ||
320 | } | ||
321 | spin_unlock(&orangefs_request_list_lock); | ||
322 | return -EFAULT; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Function for writev() callers into the device. | ||
327 | * | ||
328 | * Userspace should have written: | ||
329 | * - __u32 version | ||
330 | * - __u32 magic | ||
331 | * - __u64 tag | ||
332 | * - struct orangefs_downcall_s | ||
333 | * - trailer buffer (in the case of READDIR operations) | ||
334 | */ | ||
335 | static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, | ||
336 | struct iov_iter *iter) | ||
337 | { | ||
338 | ssize_t ret; | ||
339 | struct orangefs_kernel_op_s *op = NULL; | ||
340 | struct { | ||
341 | __u32 version; | ||
342 | __u32 magic; | ||
343 | __u64 tag; | ||
344 | } head; | ||
345 | int total = ret = iov_iter_count(iter); | ||
346 | int n; | ||
347 | int downcall_size = sizeof(struct orangefs_downcall_s); | ||
348 | int head_size = sizeof(head); | ||
349 | |||
350 | gossip_debug(GOSSIP_DEV_DEBUG, "%s: total:%d: ret:%zd:\n", | ||
351 | __func__, | ||
352 | total, | ||
353 | ret); | ||
354 | |||
355 | if (total < MAX_DEV_REQ_DOWNSIZE) { | ||
356 | gossip_err("%s: total:%d: must be at least:%u:\n", | ||
357 | __func__, | ||
358 | total, | ||
359 | (unsigned int) MAX_DEV_REQ_DOWNSIZE); | ||
360 | return -EFAULT; | ||
361 | } | ||
362 | |||
363 | n = copy_from_iter(&head, head_size, iter); | ||
364 | if (n < head_size) { | ||
365 | gossip_err("%s: failed to copy head.\n", __func__); | ||
366 | return -EFAULT; | ||
367 | } | ||
368 | |||
369 | if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) { | ||
370 | gossip_err("%s: userspace claims version" | ||
371 | "%d, minimum version required: %d.\n", | ||
372 | __func__, | ||
373 | head.version, | ||
374 | ORANGEFS_MINIMUM_USERSPACE_VERSION); | ||
375 | return -EPROTO; | ||
376 | } | ||
377 | |||
378 | if (head.magic != ORANGEFS_DEVREQ_MAGIC) { | ||
379 | gossip_err("Error: Device magic number does not match.\n"); | ||
380 | return -EPROTO; | ||
381 | } | ||
382 | |||
383 | /* remove the op from the in progress hash table */ | ||
384 | op = orangefs_devreq_remove_op(head.tag); | ||
385 | if (!op) { | ||
386 | gossip_err("WARNING: No one's waiting for tag %llu\n", | ||
387 | llu(head.tag)); | ||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | n = copy_from_iter(&op->downcall, downcall_size, iter); | ||
392 | if (n != downcall_size) { | ||
393 | gossip_err("%s: failed to copy downcall.\n", __func__); | ||
394 | goto Efault; | ||
395 | } | ||
396 | |||
397 | if (op->downcall.status) | ||
398 | goto wakeup; | ||
399 | |||
400 | /* | ||
401 | * We've successfully peeled off the head and the downcall. | ||
402 | * Something has gone awry if total doesn't equal the | ||
403 | * sum of head_size, downcall_size and trailer_size. | ||
404 | */ | ||
405 | if ((head_size + downcall_size + op->downcall.trailer_size) != total) { | ||
406 | gossip_err("%s: funky write, head_size:%d" | ||
407 | ": downcall_size:%d: trailer_size:%lld" | ||
408 | ": total size:%d:\n", | ||
409 | __func__, | ||
410 | head_size, | ||
411 | downcall_size, | ||
412 | op->downcall.trailer_size, | ||
413 | total); | ||
414 | goto Efault; | ||
415 | } | ||
416 | |||
417 | /* Only READDIR operations should have trailers. */ | ||
418 | if ((op->downcall.type != ORANGEFS_VFS_OP_READDIR) && | ||
419 | (op->downcall.trailer_size != 0)) { | ||
420 | gossip_err("%s: %x operation with trailer.", | ||
421 | __func__, | ||
422 | op->downcall.type); | ||
423 | goto Efault; | ||
424 | } | ||
425 | |||
426 | /* READDIR operations should always have trailers. */ | ||
427 | if ((op->downcall.type == ORANGEFS_VFS_OP_READDIR) && | ||
428 | (op->downcall.trailer_size == 0)) { | ||
429 | gossip_err("%s: %x operation with no trailer.", | ||
430 | __func__, | ||
431 | op->downcall.type); | ||
432 | goto Efault; | ||
433 | } | ||
434 | |||
435 | if (op->downcall.type != ORANGEFS_VFS_OP_READDIR) | ||
436 | goto wakeup; | ||
437 | |||
438 | op->downcall.trailer_buf = | ||
439 | vmalloc(op->downcall.trailer_size); | ||
440 | if (op->downcall.trailer_buf == NULL) { | ||
441 | gossip_err("%s: failed trailer vmalloc.\n", | ||
442 | __func__); | ||
443 | goto Enomem; | ||
444 | } | ||
445 | memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size); | ||
446 | n = copy_from_iter(op->downcall.trailer_buf, | ||
447 | op->downcall.trailer_size, | ||
448 | iter); | ||
449 | if (n != op->downcall.trailer_size) { | ||
450 | gossip_err("%s: failed to copy trailer.\n", __func__); | ||
451 | vfree(op->downcall.trailer_buf); | ||
452 | goto Efault; | ||
453 | } | ||
454 | |||
455 | wakeup: | ||
456 | /* | ||
457 | * Return to vfs waitqueue, and back to service_operation | ||
458 | * through wait_for_matching_downcall. | ||
459 | */ | ||
460 | spin_lock(&op->lock); | ||
461 | if (unlikely(op_is_cancel(op))) { | ||
462 | spin_unlock(&op->lock); | ||
463 | put_cancel(op); | ||
464 | } else if (unlikely(op_state_given_up(op))) { | ||
465 | spin_unlock(&op->lock); | ||
466 | complete(&op->waitq); | ||
467 | } else { | ||
468 | set_op_state_serviced(op); | ||
469 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
470 | "%s: op:%s: op_state:%d: process:%s:\n", | ||
471 | __func__, | ||
472 | get_opname_string(op), | ||
473 | op->op_state, | ||
474 | current->comm); | ||
475 | spin_unlock(&op->lock); | ||
476 | } | ||
477 | return ret; | ||
478 | |||
479 | Efault: | ||
480 | op->downcall.status = -(ORANGEFS_ERROR_BIT | 9); | ||
481 | ret = -EFAULT; | ||
482 | goto wakeup; | ||
483 | |||
484 | Enomem: | ||
485 | op->downcall.status = -(ORANGEFS_ERROR_BIT | 8); | ||
486 | ret = -ENOMEM; | ||
487 | goto wakeup; | ||
488 | } | ||
489 | |||
490 | /* | ||
491 | * NOTE: gets called when the last reference to this device is dropped. | ||
492 | * Using the open_access_count variable, we enforce a reference count | ||
493 | * on this file so that it can be opened by only one process at a time. | ||
494 | * the devreq_mutex is used to make sure all i/o has completed | ||
495 | * before we call orangefs_bufmap_finalize, and similar such tricky | ||
496 | * situations | ||
497 | */ | ||
498 | static int orangefs_devreq_release(struct inode *inode, struct file *file) | ||
499 | { | ||
500 | int unmounted = 0; | ||
501 | |||
502 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
503 | "%s:pvfs2-client-core: exiting, closing device\n", | ||
504 | __func__); | ||
505 | |||
506 | mutex_lock(&devreq_mutex); | ||
507 | orangefs_bufmap_finalize(); | ||
508 | |||
509 | open_access_count = -1; | ||
510 | |||
511 | unmounted = mark_all_pending_mounts(); | ||
512 | gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n", | ||
513 | (unmounted ? "UNMOUNTED" : "MOUNTED")); | ||
514 | |||
515 | purge_waiting_ops(); | ||
516 | purge_inprogress_ops(); | ||
517 | |||
518 | orangefs_bufmap_run_down(); | ||
519 | |||
520 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
521 | "pvfs2-client-core: device close complete\n"); | ||
522 | open_access_count = 0; | ||
523 | mutex_unlock(&devreq_mutex); | ||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | int is_daemon_in_service(void) | ||
528 | { | ||
529 | int in_service; | ||
530 | |||
531 | /* | ||
532 | * What this function does is checks if client-core is alive | ||
533 | * based on the access count we maintain on the device. | ||
534 | */ | ||
535 | mutex_lock(&devreq_mutex); | ||
536 | in_service = open_access_count == 1 ? 0 : -EIO; | ||
537 | mutex_unlock(&devreq_mutex); | ||
538 | return in_service; | ||
539 | } | ||
540 | |||
541 | bool __is_daemon_in_service(void) | ||
542 | { | ||
543 | return open_access_count == 1; | ||
544 | } | ||
545 | |||
546 | static inline long check_ioctl_command(unsigned int command) | ||
547 | { | ||
548 | /* Check for valid ioctl codes */ | ||
549 | if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) { | ||
550 | gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n", | ||
551 | command, | ||
552 | _IOC_TYPE(command), | ||
553 | ORANGEFS_DEV_MAGIC); | ||
554 | return -EINVAL; | ||
555 | } | ||
556 | /* and valid ioctl commands */ | ||
557 | if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) { | ||
558 | gossip_err("Invalid ioctl command number [%d >= %d]\n", | ||
559 | _IOC_NR(command), ORANGEFS_DEV_MAXNR); | ||
560 | return -ENOIOCTLCMD; | ||
561 | } | ||
562 | return 0; | ||
563 | } | ||
564 | |||
565 | static long dispatch_ioctl_command(unsigned int command, unsigned long arg) | ||
566 | { | ||
567 | static __s32 magic = ORANGEFS_DEVREQ_MAGIC; | ||
568 | static __s32 max_up_size = MAX_DEV_REQ_UPSIZE; | ||
569 | static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE; | ||
570 | struct ORANGEFS_dev_map_desc user_desc; | ||
571 | int ret = 0; | ||
572 | struct dev_mask_info_s mask_info = { 0 }; | ||
573 | struct dev_mask2_info_s mask2_info = { 0, 0 }; | ||
574 | int upstream_kmod = 1; | ||
575 | struct orangefs_sb_info_s *orangefs_sb; | ||
576 | |||
577 | /* mtmoore: add locking here */ | ||
578 | |||
579 | switch (command) { | ||
580 | case ORANGEFS_DEV_GET_MAGIC: | ||
581 | return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ? | ||
582 | -EIO : | ||
583 | 0); | ||
584 | case ORANGEFS_DEV_GET_MAX_UPSIZE: | ||
585 | return ((put_user(max_up_size, | ||
586 | (__s32 __user *) arg) == -EFAULT) ? | ||
587 | -EIO : | ||
588 | 0); | ||
589 | case ORANGEFS_DEV_GET_MAX_DOWNSIZE: | ||
590 | return ((put_user(max_down_size, | ||
591 | (__s32 __user *) arg) == -EFAULT) ? | ||
592 | -EIO : | ||
593 | 0); | ||
594 | case ORANGEFS_DEV_MAP: | ||
595 | ret = copy_from_user(&user_desc, | ||
596 | (struct ORANGEFS_dev_map_desc __user *) | ||
597 | arg, | ||
598 | sizeof(struct ORANGEFS_dev_map_desc)); | ||
599 | /* WTF -EIO and not -EFAULT? */ | ||
600 | return ret ? -EIO : orangefs_bufmap_initialize(&user_desc); | ||
601 | case ORANGEFS_DEV_REMOUNT_ALL: | ||
602 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
603 | "%s: got ORANGEFS_DEV_REMOUNT_ALL\n", | ||
604 | __func__); | ||
605 | |||
606 | /* | ||
607 | * remount all mounted orangefs volumes to regain the lost | ||
608 | * dynamic mount tables (if any) -- NOTE: this is done | ||
609 | * without keeping the superblock list locked due to the | ||
610 | * upcall/downcall waiting. also, the request mutex is | ||
611 | * used to ensure that no operations will be serviced until | ||
612 | * all of the remounts are serviced (to avoid ops between | ||
613 | * mounts to fail) | ||
614 | */ | ||
615 | ret = mutex_lock_interruptible(&request_mutex); | ||
616 | if (ret < 0) | ||
617 | return ret; | ||
618 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
619 | "%s: priority remount in progress\n", | ||
620 | __func__); | ||
621 | spin_lock(&orangefs_superblocks_lock); | ||
622 | list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { | ||
623 | /* | ||
624 | * We have to drop the spinlock, so entries can be | ||
625 | * removed. They can't be freed, though, so we just | ||
626 | * keep the forward pointers and zero the back ones - | ||
627 | * that way we can get to the rest of the list. | ||
628 | */ | ||
629 | if (!orangefs_sb->list.prev) | ||
630 | continue; | ||
631 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
632 | "%s: Remounting SB %p\n", | ||
633 | __func__, | ||
634 | orangefs_sb); | ||
635 | |||
636 | spin_unlock(&orangefs_superblocks_lock); | ||
637 | ret = orangefs_remount(orangefs_sb); | ||
638 | spin_lock(&orangefs_superblocks_lock); | ||
639 | if (ret) { | ||
640 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
641 | "SB %p remount failed\n", | ||
642 | orangefs_sb); | ||
643 | break; | ||
644 | } | ||
645 | } | ||
646 | spin_unlock(&orangefs_superblocks_lock); | ||
647 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
648 | "%s: priority remount complete\n", | ||
649 | __func__); | ||
650 | mutex_unlock(&request_mutex); | ||
651 | return ret; | ||
652 | |||
653 | case ORANGEFS_DEV_UPSTREAM: | ||
654 | ret = copy_to_user((void __user *)arg, | ||
655 | &upstream_kmod, | ||
656 | sizeof(upstream_kmod)); | ||
657 | |||
658 | if (ret != 0) | ||
659 | return -EIO; | ||
660 | else | ||
661 | return ret; | ||
662 | |||
663 | case ORANGEFS_DEV_CLIENT_MASK: | ||
664 | ret = copy_from_user(&mask2_info, | ||
665 | (void __user *)arg, | ||
666 | sizeof(struct dev_mask2_info_s)); | ||
667 | |||
668 | if (ret != 0) | ||
669 | return -EIO; | ||
670 | |||
671 | client_debug_mask.mask1 = mask2_info.mask1_value; | ||
672 | client_debug_mask.mask2 = mask2_info.mask2_value; | ||
673 | |||
674 | pr_info("%s: client debug mask has been been received " | ||
675 | ":%llx: :%llx:\n", | ||
676 | __func__, | ||
677 | (unsigned long long)client_debug_mask.mask1, | ||
678 | (unsigned long long)client_debug_mask.mask2); | ||
679 | |||
680 | return ret; | ||
681 | |||
682 | case ORANGEFS_DEV_CLIENT_STRING: | ||
683 | ret = copy_from_user(&client_debug_array_string, | ||
684 | (void __user *)arg, | ||
685 | ORANGEFS_MAX_DEBUG_STRING_LEN); | ||
686 | /* | ||
687 | * The real client-core makes an effort to ensure | ||
688 | * that actual strings that aren't too long to fit in | ||
689 | * this buffer is what we get here. We're going to use | ||
690 | * string functions on the stuff we got, so we'll make | ||
691 | * this extra effort to try and keep from | ||
692 | * flowing out of this buffer when we use the string | ||
693 | * functions, even if somehow the stuff we end up | ||
694 | * with here is garbage. | ||
695 | */ | ||
696 | client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] = | ||
697 | '\0'; | ||
698 | |||
699 | if (ret != 0) { | ||
700 | pr_info("%s: CLIENT_STRING: copy_from_user failed\n", | ||
701 | __func__); | ||
702 | return -EIO; | ||
703 | } | ||
704 | |||
705 | pr_info("%s: client debug array string has been received.\n", | ||
706 | __func__); | ||
707 | |||
708 | if (!help_string_initialized) { | ||
709 | |||
710 | /* Free the "we don't know yet" default string... */ | ||
711 | kfree(debug_help_string); | ||
712 | |||
713 | /* build a proper debug help string */ | ||
714 | if (orangefs_prepare_debugfs_help_string(0)) { | ||
715 | gossip_err("%s: no debug help string \n", | ||
716 | __func__); | ||
717 | return -EIO; | ||
718 | } | ||
719 | |||
720 | /* Replace the boilerplate boot-time debug-help file. */ | ||
721 | debugfs_remove(help_file_dentry); | ||
722 | |||
723 | help_file_dentry = | ||
724 | debugfs_create_file( | ||
725 | ORANGEFS_KMOD_DEBUG_HELP_FILE, | ||
726 | 0444, | ||
727 | debug_dir, | ||
728 | debug_help_string, | ||
729 | &debug_help_fops); | ||
730 | |||
731 | if (!help_file_dentry) { | ||
732 | gossip_err("%s: debugfs_create_file failed for" | ||
733 | " :%s:!\n", | ||
734 | __func__, | ||
735 | ORANGEFS_KMOD_DEBUG_HELP_FILE); | ||
736 | return -EIO; | ||
737 | } | ||
738 | } | ||
739 | |||
740 | debug_mask_to_string(&client_debug_mask, 1); | ||
741 | |||
742 | debugfs_remove(client_debug_dentry); | ||
743 | |||
744 | orangefs_client_debug_init(); | ||
745 | |||
746 | help_string_initialized++; | ||
747 | |||
748 | return ret; | ||
749 | |||
750 | case ORANGEFS_DEV_DEBUG: | ||
751 | ret = copy_from_user(&mask_info, | ||
752 | (void __user *)arg, | ||
753 | sizeof(mask_info)); | ||
754 | |||
755 | if (ret != 0) | ||
756 | return -EIO; | ||
757 | |||
758 | if (mask_info.mask_type == KERNEL_MASK) { | ||
759 | if ((mask_info.mask_value == 0) | ||
760 | && (kernel_mask_set_mod_init)) { | ||
761 | /* | ||
762 | * the kernel debug mask was set when the | ||
763 | * kernel module was loaded; don't override | ||
764 | * it if the client-core was started without | ||
765 | * a value for ORANGEFS_KMODMASK. | ||
766 | */ | ||
767 | return 0; | ||
768 | } | ||
769 | debug_mask_to_string(&mask_info.mask_value, | ||
770 | mask_info.mask_type); | ||
771 | gossip_debug_mask = mask_info.mask_value; | ||
772 | pr_info("%s: kernel debug mask has been modified to " | ||
773 | ":%s: :%llx:\n", | ||
774 | __func__, | ||
775 | kernel_debug_string, | ||
776 | (unsigned long long)gossip_debug_mask); | ||
777 | } else if (mask_info.mask_type == CLIENT_MASK) { | ||
778 | debug_mask_to_string(&mask_info.mask_value, | ||
779 | mask_info.mask_type); | ||
780 | pr_info("%s: client debug mask has been modified to" | ||
781 | ":%s: :%llx:\n", | ||
782 | __func__, | ||
783 | client_debug_string, | ||
784 | llu(mask_info.mask_value)); | ||
785 | } else { | ||
786 | gossip_lerr("Invalid mask type....\n"); | ||
787 | return -EINVAL; | ||
788 | } | ||
789 | |||
790 | return ret; | ||
791 | |||
792 | default: | ||
793 | return -ENOIOCTLCMD; | ||
794 | } | ||
795 | return -ENOIOCTLCMD; | ||
796 | } | ||
797 | |||
798 | static long orangefs_devreq_ioctl(struct file *file, | ||
799 | unsigned int command, unsigned long arg) | ||
800 | { | ||
801 | long ret; | ||
802 | |||
803 | /* Check for properly constructed commands */ | ||
804 | ret = check_ioctl_command(command); | ||
805 | if (ret < 0) | ||
806 | return (int)ret; | ||
807 | |||
808 | return (int)dispatch_ioctl_command(command, arg); | ||
809 | } | ||
810 | |||
811 | #ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ | ||
812 | |||
813 | /* Compat structure for the ORANGEFS_DEV_MAP ioctl */ | ||
814 | struct ORANGEFS_dev_map_desc32 { | ||
815 | compat_uptr_t ptr; | ||
816 | __s32 total_size; | ||
817 | __s32 size; | ||
818 | __s32 count; | ||
819 | }; | ||
820 | |||
821 | static unsigned long translate_dev_map26(unsigned long args, long *error) | ||
822 | { | ||
823 | struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args; | ||
824 | /* | ||
825 | * Depending on the architecture, allocate some space on the | ||
826 | * user-call-stack based on our expected layout. | ||
827 | */ | ||
828 | struct ORANGEFS_dev_map_desc __user *p = | ||
829 | compat_alloc_user_space(sizeof(*p)); | ||
830 | compat_uptr_t addr; | ||
831 | |||
832 | *error = 0; | ||
833 | /* get the ptr from the 32 bit user-space */ | ||
834 | if (get_user(addr, &p32->ptr)) | ||
835 | goto err; | ||
836 | /* try to put that into a 64-bit layout */ | ||
837 | if (put_user(compat_ptr(addr), &p->ptr)) | ||
838 | goto err; | ||
839 | /* copy the remaining fields */ | ||
840 | if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32))) | ||
841 | goto err; | ||
842 | if (copy_in_user(&p->size, &p32->size, sizeof(__s32))) | ||
843 | goto err; | ||
844 | if (copy_in_user(&p->count, &p32->count, sizeof(__s32))) | ||
845 | goto err; | ||
846 | return (unsigned long)p; | ||
847 | err: | ||
848 | *error = -EFAULT; | ||
849 | return 0; | ||
850 | } | ||
851 | |||
852 | /* | ||
853 | * 32 bit user-space apps' ioctl handlers when kernel modules | ||
854 | * is compiled as a 64 bit one | ||
855 | */ | ||
856 | static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd, | ||
857 | unsigned long args) | ||
858 | { | ||
859 | long ret; | ||
860 | unsigned long arg = args; | ||
861 | |||
862 | /* Check for properly constructed commands */ | ||
863 | ret = check_ioctl_command(cmd); | ||
864 | if (ret < 0) | ||
865 | return ret; | ||
866 | if (cmd == ORANGEFS_DEV_MAP) { | ||
867 | /* | ||
868 | * convert the arguments to what we expect internally | ||
869 | * in kernel space | ||
870 | */ | ||
871 | arg = translate_dev_map26(args, &ret); | ||
872 | if (ret < 0) { | ||
873 | gossip_err("Could not translate dev map\n"); | ||
874 | return ret; | ||
875 | } | ||
876 | } | ||
877 | /* no other ioctl requires translation */ | ||
878 | return dispatch_ioctl_command(cmd, arg); | ||
879 | } | ||
880 | |||
881 | #endif /* CONFIG_COMPAT is in .config */ | ||
882 | |||
883 | /* the assigned character device major number */ | ||
884 | static int orangefs_dev_major; | ||
885 | |||
886 | /* | ||
887 | * Initialize orangefs device specific state: | ||
888 | * Must be called at module load time only | ||
889 | */ | ||
890 | int orangefs_dev_init(void) | ||
891 | { | ||
892 | /* register orangefs-req device */ | ||
893 | orangefs_dev_major = register_chrdev(0, | ||
894 | ORANGEFS_REQDEVICE_NAME, | ||
895 | &orangefs_devreq_file_operations); | ||
896 | if (orangefs_dev_major < 0) { | ||
897 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
898 | "Failed to register /dev/%s (error %d)\n", | ||
899 | ORANGEFS_REQDEVICE_NAME, orangefs_dev_major); | ||
900 | return orangefs_dev_major; | ||
901 | } | ||
902 | |||
903 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
904 | "*** /dev/%s character device registered ***\n", | ||
905 | ORANGEFS_REQDEVICE_NAME); | ||
906 | gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n", | ||
907 | ORANGEFS_REQDEVICE_NAME, orangefs_dev_major); | ||
908 | return 0; | ||
909 | } | ||
910 | |||
911 | void orangefs_dev_cleanup(void) | ||
912 | { | ||
913 | unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME); | ||
914 | gossip_debug(GOSSIP_DEV_DEBUG, | ||
915 | "*** /dev/%s character device unregistered ***\n", | ||
916 | ORANGEFS_REQDEVICE_NAME); | ||
917 | } | ||
918 | |||
919 | static unsigned int orangefs_devreq_poll(struct file *file, | ||
920 | struct poll_table_struct *poll_table) | ||
921 | { | ||
922 | int poll_revent_mask = 0; | ||
923 | |||
924 | poll_wait(file, &orangefs_request_list_waitq, poll_table); | ||
925 | |||
926 | if (!list_empty(&orangefs_request_list)) | ||
927 | poll_revent_mask |= POLL_IN; | ||
928 | return poll_revent_mask; | ||
929 | } | ||
930 | |||
931 | const struct file_operations orangefs_devreq_file_operations = { | ||
932 | .owner = THIS_MODULE, | ||
933 | .read = orangefs_devreq_read, | ||
934 | .write_iter = orangefs_devreq_write_iter, | ||
935 | .open = orangefs_devreq_open, | ||
936 | .release = orangefs_devreq_release, | ||
937 | .unlocked_ioctl = orangefs_devreq_ioctl, | ||
938 | |||
939 | #ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ | ||
940 | .compat_ioctl = orangefs_devreq_compat_ioctl, | ||
941 | #endif | ||
942 | .poll = orangefs_devreq_poll | ||
943 | }; | ||